LLVM 19.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
54#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Module.h"
56#include "llvm/IR/NoFolder.h"
57#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
68#include "llvm/Support/Debug.h"
76#include <algorithm>
77#include <cassert>
78#include <climits>
79#include <cstddef>
80#include <cstdint>
81#include <iterator>
82#include <map>
83#include <optional>
84#include <set>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90using namespace PatternMatch;
91
92#define DEBUG_TYPE "simplifycfg"
93
95 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
96
97 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
98 "into preserving DomTree,"));
99
100// Chosen as 2 so as to be cheap, but still to have enough power to fold
101// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
102// To catch this, we need to fold a compare and a select, hence '2' being the
103// minimum reasonable default.
105 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
106 cl::desc(
107 "Control the amount of phi node folding to perform (default = 2)"));
108
110 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
111 cl::desc("Control the maximal total instruction cost that we are willing "
112 "to speculatively execute to fold a 2-entry PHI node into a "
113 "select (default = 4)"));
114
115static cl::opt<bool>
116 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
117 cl::desc("Hoist common instructions up to the parent block"));
118
120 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
121 cl::init(20),
122 cl::desc("Allow reordering across at most this many "
123 "instructions when hoisting"));
124
125static cl::opt<bool>
126 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
127 cl::desc("Sink common instructions down to the end block"));
128
130 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
131 cl::desc("Hoist conditional stores if an unconditional store precedes"));
132
134 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
135 cl::desc("Hoist conditional stores even if an unconditional store does not "
136 "precede - hoist multiple conditional stores into a single "
137 "predicated store"));
138
140 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
141 cl::desc("When merging conditional stores, do so even if the resultant "
142 "basic blocks are unlikely to be if-converted as a result"));
143
145 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
146 cl::desc("Allow exactly one expensive instruction to be speculatively "
147 "executed"));
148
150 "max-speculation-depth", cl::Hidden, cl::init(10),
151 cl::desc("Limit maximum recursion depth when calculating costs of "
152 "speculatively executed instructions"));
153
154static cl::opt<int>
155 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
156 cl::init(10),
157 cl::desc("Max size of a block which is still considered "
158 "small enough to thread through"));
159
160// Two is chosen to allow one negation and a logical combine.
162 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
163 cl::init(2),
164 cl::desc("Maximum cost of combining conditions when "
165 "folding branches"));
166
168 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
169 cl::init(2),
170 cl::desc("Multiplier to apply to threshold when determining whether or not "
171 "to fold branch to common destination when vector operations are "
172 "present"));
173
175 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
176 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
177
179 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
180 cl::desc("Limit cases to analyze when converting a switch to select"));
181
182STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
183STATISTIC(NumLinearMaps,
184 "Number of switch instructions turned into linear mapping");
185STATISTIC(NumLookupTables,
186 "Number of switch instructions turned into lookup tables");
188 NumLookupTablesHoles,
189 "Number of switch instructions turned into lookup tables (holes checked)");
190STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
191STATISTIC(NumFoldValueComparisonIntoPredecessors,
192 "Number of value comparisons folded into predecessor basic blocks");
193STATISTIC(NumFoldBranchToCommonDest,
194 "Number of branches folded into predecessor basic block");
196 NumHoistCommonCode,
197 "Number of common instruction 'blocks' hoisted up to the begin block");
198STATISTIC(NumHoistCommonInstrs,
199 "Number of common instructions hoisted up to the begin block");
200STATISTIC(NumSinkCommonCode,
201 "Number of common instruction 'blocks' sunk down to the end block");
202STATISTIC(NumSinkCommonInstrs,
203 "Number of common instructions sunk down to the end block");
204STATISTIC(NumSpeculations, "Number of speculative executed instructions");
205STATISTIC(NumInvokes,
206 "Number of invokes with empty resume blocks simplified into calls");
207STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
208STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
209
210namespace {
211
212// The first field contains the value that the switch produces when a certain
213// case group is selected, and the second field is a vector containing the
214// cases composing the case group.
215using SwitchCaseResultVectorTy =
217
218// The first field contains the phi node that generates a result of the switch
219// and the second field contains the value generated for a certain case in the
220// switch for that PHI.
221using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
222
223/// ValueEqualityComparisonCase - Represents a case of a switch.
224struct ValueEqualityComparisonCase {
226 BasicBlock *Dest;
227
228 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
229 : Value(Value), Dest(Dest) {}
230
231 bool operator<(ValueEqualityComparisonCase RHS) const {
232 // Comparing pointers is ok as we only rely on the order for uniquing.
233 return Value < RHS.Value;
234 }
235
236 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
237};
238
239class SimplifyCFGOpt {
241 DomTreeUpdater *DTU;
242 const DataLayout &DL;
243 ArrayRef<WeakVH> LoopHeaders;
245 bool Resimplify;
246
247 Value *isValueEqualityComparison(Instruction *TI);
248 BasicBlock *GetValueEqualityComparisonCases(
249 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
250 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
251 BasicBlock *Pred,
252 IRBuilder<> &Builder);
253 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
254 Instruction *PTI,
255 IRBuilder<> &Builder);
256 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
257 IRBuilder<> &Builder);
258
259 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
260 bool simplifySingleResume(ResumeInst *RI);
261 bool simplifyCommonResume(ResumeInst *RI);
262 bool simplifyCleanupReturn(CleanupReturnInst *RI);
263 bool simplifyUnreachable(UnreachableInst *UI);
264 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
265 bool simplifyIndirectBr(IndirectBrInst *IBI);
266 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
267 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
268 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
269
270 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
271 IRBuilder<> &Builder);
272
273 bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
274 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
275 Instruction *TI, Instruction *I1,
276 SmallVectorImpl<Instruction *> &OtherSuccTIs);
277 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
278 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
279 BasicBlock *TrueBB, BasicBlock *FalseBB,
280 uint32_t TrueWeight, uint32_t FalseWeight);
281 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
282 const DataLayout &DL);
283 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
284 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
285 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
286
287public:
288 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
289 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
290 const SimplifyCFGOptions &Opts)
291 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
292 assert((!DTU || !DTU->hasPostDomTree()) &&
293 "SimplifyCFG is not yet capable of maintaining validity of a "
294 "PostDomTree, so don't ask for it.");
295 }
296
297 bool simplifyOnce(BasicBlock *BB);
298 bool run(BasicBlock *BB);
299
300 // Helper to set Resimplify and return change indication.
301 bool requestResimplify() {
302 Resimplify = true;
303 return true;
304 }
305};
306
307} // end anonymous namespace
308
309/// Return true if all the PHI nodes in the basic block \p BB
310/// receive compatible (identical) incoming values when coming from
311/// all of the predecessor blocks that are specified in \p IncomingBlocks.
312///
313/// Note that if the values aren't exactly identical, but \p EquivalenceSet
314/// is provided, and *both* of the values are present in the set,
315/// then they are considered equal.
317 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
318 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
319 assert(IncomingBlocks.size() == 2 &&
320 "Only for a pair of incoming blocks at the time!");
321
322 // FIXME: it is okay if one of the incoming values is an `undef` value,
323 // iff the other incoming value is guaranteed to be a non-poison value.
324 // FIXME: it is okay if one of the incoming values is a `poison` value.
325 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
326 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
327 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
328 if (IV0 == IV1)
329 return true;
330 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
331 EquivalenceSet->contains(IV1))
332 return true;
333 return false;
334 });
335}
336
337/// Return true if it is safe to merge these two
338/// terminator instructions together.
339static bool
341 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
342 if (SI1 == SI2)
343 return false; // Can't merge with self!
344
345 // It is not safe to merge these two switch instructions if they have a common
346 // successor, and if that successor has a PHI node, and if *that* PHI node has
347 // conflicting incoming values from the two switch blocks.
348 BasicBlock *SI1BB = SI1->getParent();
349 BasicBlock *SI2BB = SI2->getParent();
350
351 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
352 bool Fail = false;
353 for (BasicBlock *Succ : successors(SI2BB)) {
354 if (!SI1Succs.count(Succ))
355 continue;
356 if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
357 continue;
358 Fail = true;
359 if (FailBlocks)
360 FailBlocks->insert(Succ);
361 else
362 break;
363 }
364
365 return !Fail;
366}
367
368/// Update PHI nodes in Succ to indicate that there will now be entries in it
369/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
370/// will be the same as those coming in from ExistPred, an existing predecessor
371/// of Succ.
372static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
373 BasicBlock *ExistPred,
374 MemorySSAUpdater *MSSAU = nullptr) {
375 for (PHINode &PN : Succ->phis())
376 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
377 if (MSSAU)
378 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
379 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
380}
381
382/// Compute an abstract "cost" of speculating the given instruction,
383/// which is assumed to be safe to speculate. TCC_Free means cheap,
384/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
385/// expensive.
387 const TargetTransformInfo &TTI) {
388 assert((!isa<Instruction>(I) ||
389 isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
390 "Instruction is not safe to speculatively execute!");
392}
393
394/// If we have a merge point of an "if condition" as accepted above,
395/// return true if the specified value dominates the block. We
396/// don't handle the true generality of domination here, just a special case
397/// which works well enough for us.
398///
399/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
400/// see if V (which must be an instruction) and its recursive operands
401/// that do not dominate BB have a combined cost lower than Budget and
402/// are non-trapping. If both are true, the instruction is inserted into the
403/// set and true is returned.
404///
405/// The cost for most non-trapping instructions is defined as 1 except for
406/// Select whose cost is 2.
407///
408/// After this function returns, Cost is increased by the cost of
409/// V plus its non-dominating operands. If that cost is greater than
410/// Budget, false is returned and Cost is undefined.
412 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
414 InstructionCost Budget,
416 unsigned Depth = 0) {
417 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
418 // so limit the recursion depth.
419 // TODO: While this recursion limit does prevent pathological behavior, it
420 // would be better to track visited instructions to avoid cycles.
422 return false;
423
424 Instruction *I = dyn_cast<Instruction>(V);
425 if (!I) {
426 // Non-instructions dominate all instructions and can be executed
427 // unconditionally.
428 return true;
429 }
430 BasicBlock *PBB = I->getParent();
431
432 // We don't want to allow weird loops that might have the "if condition" in
433 // the bottom of this block.
434 if (PBB == BB)
435 return false;
436
437 // If this instruction is defined in a block that contains an unconditional
438 // branch to BB, then it must be in the 'conditional' part of the "if
439 // statement". If not, it definitely dominates the region.
440 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
441 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
442 return true;
443
444 // If we have seen this instruction before, don't count it again.
445 if (AggressiveInsts.count(I))
446 return true;
447
448 // Okay, it looks like the instruction IS in the "condition". Check to
449 // see if it's a cheap instruction to unconditionally compute, and if it
450 // only uses stuff defined outside of the condition. If so, hoist it out.
452 return false;
453
455
456 // Allow exactly one instruction to be speculated regardless of its cost
457 // (as long as it is safe to do so).
458 // This is intended to flatten the CFG even if the instruction is a division
459 // or other expensive operation. The speculation of an expensive instruction
460 // is expected to be undone in CodeGenPrepare if the speculation has not
461 // enabled further IR optimizations.
462 if (Cost > Budget &&
463 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
464 !Cost.isValid()))
465 return false;
466
467 // Okay, we can only really hoist these out if their operands do
468 // not take us over the cost threshold.
469 for (Use &Op : I->operands())
470 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
471 Depth + 1))
472 return false;
473 // Okay, it's safe to do this! Remember this instruction.
474 AggressiveInsts.insert(I);
475 return true;
476}
477
478/// Extract ConstantInt from value, looking through IntToPtr
479/// and PointerNullValue. Return NULL if value is not a constant int.
481 // Normal constant int.
482 ConstantInt *CI = dyn_cast<ConstantInt>(V);
483 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
484 DL.isNonIntegralPointerType(V->getType()))
485 return CI;
486
487 // This is some kind of pointer constant. Turn it into a pointer-sized
488 // ConstantInt if possible.
489 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
490
491 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
492 if (isa<ConstantPointerNull>(V))
493 return ConstantInt::get(PtrTy, 0);
494
495 // IntToPtr const int.
496 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
497 if (CE->getOpcode() == Instruction::IntToPtr)
498 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
499 // The constant is very likely to have the right type already.
500 if (CI->getType() == PtrTy)
501 return CI;
502 else
503 return cast<ConstantInt>(
504 ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
505 }
506 return nullptr;
507}
508
509namespace {
510
511/// Given a chain of or (||) or and (&&) comparison of a value against a
512/// constant, this will try to recover the information required for a switch
513/// structure.
514/// It will depth-first traverse the chain of comparison, seeking for patterns
515/// like %a == 12 or %a < 4 and combine them to produce a set of integer
516/// representing the different cases for the switch.
517/// Note that if the chain is composed of '||' it will build the set of elements
518/// that matches the comparisons (i.e. any of this value validate the chain)
519/// while for a chain of '&&' it will build the set elements that make the test
520/// fail.
521struct ConstantComparesGatherer {
522 const DataLayout &DL;
523
524 /// Value found for the switch comparison
525 Value *CompValue = nullptr;
526
527 /// Extra clause to be checked before the switch
528 Value *Extra = nullptr;
529
530 /// Set of integers to match in switch
532
533 /// Number of comparisons matched in the and/or chain
534 unsigned UsedICmps = 0;
535
536 /// Construct and compute the result for the comparison instruction Cond
537 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
538 gather(Cond);
539 }
540
541 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
542 ConstantComparesGatherer &
543 operator=(const ConstantComparesGatherer &) = delete;
544
545private:
546 /// Try to set the current value used for the comparison, it succeeds only if
547 /// it wasn't set before or if the new value is the same as the old one
548 bool setValueOnce(Value *NewVal) {
549 if (CompValue && CompValue != NewVal)
550 return false;
551 CompValue = NewVal;
552 return (CompValue != nullptr);
553 }
554
555 /// Try to match Instruction "I" as a comparison against a constant and
556 /// populates the array Vals with the set of values that match (or do not
557 /// match depending on isEQ).
558 /// Return false on failure. On success, the Value the comparison matched
559 /// against is placed in CompValue.
560 /// If CompValue is already set, the function is expected to fail if a match
561 /// is found but the value compared to is different.
562 bool matchInstruction(Instruction *I, bool isEQ) {
563 // If this is an icmp against a constant, handle this as one of the cases.
564 ICmpInst *ICI;
565 ConstantInt *C;
566 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
567 (C = GetConstantInt(I->getOperand(1), DL)))) {
568 return false;
569 }
570
571 Value *RHSVal;
572 const APInt *RHSC;
573
574 // Pattern match a special case
575 // (x & ~2^z) == y --> x == y || x == y|2^z
576 // This undoes a transformation done by instcombine to fuse 2 compares.
577 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
578 // It's a little bit hard to see why the following transformations are
579 // correct. Here is a CVC3 program to verify them for 64-bit values:
580
581 /*
582 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
583 x : BITVECTOR(64);
584 y : BITVECTOR(64);
585 z : BITVECTOR(64);
586 mask : BITVECTOR(64) = BVSHL(ONE, z);
587 QUERY( (y & ~mask = y) =>
588 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
589 );
590 QUERY( (y | mask = y) =>
591 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
592 );
593 */
594
595 // Please note that each pattern must be a dual implication (<--> or
596 // iff). One directional implication can create spurious matches. If the
597 // implication is only one-way, an unsatisfiable condition on the left
598 // side can imply a satisfiable condition on the right side. Dual
599 // implication ensures that satisfiable conditions are transformed to
600 // other satisfiable conditions and unsatisfiable conditions are
601 // transformed to other unsatisfiable conditions.
602
603 // Here is a concrete example of a unsatisfiable condition on the left
604 // implying a satisfiable condition on the right:
605 //
606 // mask = (1 << z)
607 // (x & ~mask) == y --> (x == y || x == (y | mask))
608 //
609 // Substituting y = 3, z = 0 yields:
610 // (x & -2) == 3 --> (x == 3 || x == 2)
611
612 // Pattern match a special case:
613 /*
614 QUERY( (y & ~mask = y) =>
615 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
616 );
617 */
618 if (match(ICI->getOperand(0),
619 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
620 APInt Mask = ~*RHSC;
621 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
622 // If we already have a value for the switch, it has to match!
623 if (!setValueOnce(RHSVal))
624 return false;
625
626 Vals.push_back(C);
627 Vals.push_back(
628 ConstantInt::get(C->getContext(),
629 C->getValue() | Mask));
630 UsedICmps++;
631 return true;
632 }
633 }
634
635 // Pattern match a special case:
636 /*
637 QUERY( (y | mask = y) =>
638 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
639 );
640 */
641 if (match(ICI->getOperand(0),
642 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
643 APInt Mask = *RHSC;
644 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
645 // If we already have a value for the switch, it has to match!
646 if (!setValueOnce(RHSVal))
647 return false;
648
649 Vals.push_back(C);
650 Vals.push_back(ConstantInt::get(C->getContext(),
651 C->getValue() & ~Mask));
652 UsedICmps++;
653 return true;
654 }
655 }
656
657 // If we already have a value for the switch, it has to match!
658 if (!setValueOnce(ICI->getOperand(0)))
659 return false;
660
661 UsedICmps++;
662 Vals.push_back(C);
663 return ICI->getOperand(0);
664 }
665
666 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
667 ConstantRange Span =
669
670 // Shift the range if the compare is fed by an add. This is the range
671 // compare idiom as emitted by instcombine.
672 Value *CandidateVal = I->getOperand(0);
673 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
674 Span = Span.subtract(*RHSC);
675 CandidateVal = RHSVal;
676 }
677
678 // If this is an and/!= check, then we are looking to build the set of
679 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
680 // x != 0 && x != 1.
681 if (!isEQ)
682 Span = Span.inverse();
683
684 // If there are a ton of values, we don't want to make a ginormous switch.
685 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
686 return false;
687 }
688
689 // If we already have a value for the switch, it has to match!
690 if (!setValueOnce(CandidateVal))
691 return false;
692
693 // Add all values from the range to the set
694 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
695 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
696
697 UsedICmps++;
698 return true;
699 }
700
701 /// Given a potentially 'or'd or 'and'd together collection of icmp
702 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
703 /// the value being compared, and stick the list constants into the Vals
704 /// vector.
705 /// One "Extra" case is allowed to differ from the other.
706 void gather(Value *V) {
707 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
708
709 // Keep a stack (SmallVector for efficiency) for depth-first traversal
712
713 // Initialize
714 Visited.insert(V);
715 DFT.push_back(V);
716
717 while (!DFT.empty()) {
718 V = DFT.pop_back_val();
719
720 if (Instruction *I = dyn_cast<Instruction>(V)) {
721 // If it is a || (or && depending on isEQ), process the operands.
722 Value *Op0, *Op1;
723 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
724 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
725 if (Visited.insert(Op1).second)
726 DFT.push_back(Op1);
727 if (Visited.insert(Op0).second)
728 DFT.push_back(Op0);
729
730 continue;
731 }
732
733 // Try to match the current instruction
734 if (matchInstruction(I, isEQ))
735 // Match succeed, continue the loop
736 continue;
737 }
738
739 // One element of the sequence of || (or &&) could not be match as a
740 // comparison against the same value as the others.
741 // We allow only one "Extra" case to be checked before the switch
742 if (!Extra) {
743 Extra = V;
744 continue;
745 }
746 // Failed to parse a proper sequence, abort now
747 CompValue = nullptr;
748 break;
749 }
750 }
751};
752
753} // end anonymous namespace
754
756 MemorySSAUpdater *MSSAU = nullptr) {
757 Instruction *Cond = nullptr;
758 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
759 Cond = dyn_cast<Instruction>(SI->getCondition());
760 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
761 if (BI->isConditional())
762 Cond = dyn_cast<Instruction>(BI->getCondition());
763 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
764 Cond = dyn_cast<Instruction>(IBI->getAddress());
765 }
766
767 TI->eraseFromParent();
768 if (Cond)
770}
771
772/// Return true if the specified terminator checks
773/// to see if a value is equal to constant integer value.
774Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
775 Value *CV = nullptr;
776 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
777 // Do not permit merging of large switch instructions into their
778 // predecessors unless there is only one predecessor.
779 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
780 CV = SI->getCondition();
781 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
782 if (BI->isConditional() && BI->getCondition()->hasOneUse())
783 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
784 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
785 CV = ICI->getOperand(0);
786 }
787
788 // Unwrap any lossless ptrtoint cast.
789 if (CV) {
790 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
791 Value *Ptr = PTII->getPointerOperand();
792 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
793 CV = Ptr;
794 }
795 }
796 return CV;
797}
798
799/// Given a value comparison instruction,
800/// decode all of the 'cases' that it represents and return the 'default' block.
801BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
802 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
803 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
804 Cases.reserve(SI->getNumCases());
805 for (auto Case : SI->cases())
806 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
807 Case.getCaseSuccessor()));
808 return SI->getDefaultDest();
809 }
810
811 BranchInst *BI = cast<BranchInst>(TI);
812 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
813 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
814 Cases.push_back(ValueEqualityComparisonCase(
815 GetConstantInt(ICI->getOperand(1), DL), Succ));
816 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
817}
818
819/// Given a vector of bb/value pairs, remove any entries
820/// in the list that match the specified block.
821static void
823 std::vector<ValueEqualityComparisonCase> &Cases) {
824 llvm::erase(Cases, BB);
825}
826
827/// Return true if there are any keys in C1 that exist in C2 as well.
828static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
829 std::vector<ValueEqualityComparisonCase> &C2) {
830 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
831
832 // Make V1 be smaller than V2.
833 if (V1->size() > V2->size())
834 std::swap(V1, V2);
835
836 if (V1->empty())
837 return false;
838 if (V1->size() == 1) {
839 // Just scan V2.
840 ConstantInt *TheVal = (*V1)[0].Value;
841 for (const ValueEqualityComparisonCase &VECC : *V2)
842 if (TheVal == VECC.Value)
843 return true;
844 }
845
846 // Otherwise, just sort both lists and compare element by element.
847 array_pod_sort(V1->begin(), V1->end());
848 array_pod_sort(V2->begin(), V2->end());
849 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
850 while (i1 != e1 && i2 != e2) {
851 if ((*V1)[i1].Value == (*V2)[i2].Value)
852 return true;
853 if ((*V1)[i1].Value < (*V2)[i2].Value)
854 ++i1;
855 else
856 ++i2;
857 }
858 return false;
859}
860
861// Set branch weights on SwitchInst. This sets the metadata if there is at
862// least one non-zero weight.
864 // Check that there is at least one non-zero weight. Otherwise, pass
865 // nullptr to setMetadata which will erase the existing metadata.
866 MDNode *N = nullptr;
867 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
868 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
869 SI->setMetadata(LLVMContext::MD_prof, N);
870}
871
872// Similar to the above, but for branch and select instructions that take
873// exactly 2 weights.
874static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
875 uint32_t FalseWeight) {
876 assert(isa<BranchInst>(I) || isa<SelectInst>(I));
877 // Check that there is at least one non-zero weight. Otherwise, pass
878 // nullptr to setMetadata which will erase the existing metadata.
879 MDNode *N = nullptr;
880 if (TrueWeight || FalseWeight)
881 N = MDBuilder(I->getParent()->getContext())
882 .createBranchWeights(TrueWeight, FalseWeight);
883 I->setMetadata(LLVMContext::MD_prof, N);
884}
885
886/// If TI is known to be a terminator instruction and its block is known to
887/// only have a single predecessor block, check to see if that predecessor is
888/// also a value comparison with the same value, and if that comparison
889/// determines the outcome of this comparison. If so, simplify TI. This does a
890/// very limited form of jump threading.
891bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
892 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
893 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
894 if (!PredVal)
895 return false; // Not a value comparison in predecessor.
896
897 Value *ThisVal = isValueEqualityComparison(TI);
898 assert(ThisVal && "This isn't a value comparison!!");
899 if (ThisVal != PredVal)
900 return false; // Different predicates.
901
902 // TODO: Preserve branch weight metadata, similarly to how
903 // FoldValueComparisonIntoPredecessors preserves it.
904
905 // Find out information about when control will move from Pred to TI's block.
906 std::vector<ValueEqualityComparisonCase> PredCases;
907 BasicBlock *PredDef =
908 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
909 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
910
911 // Find information about how control leaves this block.
912 std::vector<ValueEqualityComparisonCase> ThisCases;
913 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
914 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
915
916 // If TI's block is the default block from Pred's comparison, potentially
917 // simplify TI based on this knowledge.
918 if (PredDef == TI->getParent()) {
919 // If we are here, we know that the value is none of those cases listed in
920 // PredCases. If there are any cases in ThisCases that are in PredCases, we
921 // can simplify TI.
922 if (!ValuesOverlap(PredCases, ThisCases))
923 return false;
924
925 if (isa<BranchInst>(TI)) {
926 // Okay, one of the successors of this condbr is dead. Convert it to a
927 // uncond br.
928 assert(ThisCases.size() == 1 && "Branch can only have one case!");
929 // Insert the new branch.
930 Instruction *NI = Builder.CreateBr(ThisDef);
931 (void)NI;
932
933 // Remove PHI node entries for the dead edge.
934 ThisCases[0].Dest->removePredecessor(PredDef);
935
936 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
937 << "Through successor TI: " << *TI << "Leaving: " << *NI
938 << "\n");
939
941
942 if (DTU)
943 DTU->applyUpdates(
944 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
945
946 return true;
947 }
948
949 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
950 // Okay, TI has cases that are statically dead, prune them away.
952 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
953 DeadCases.insert(PredCases[i].Value);
954
955 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
956 << "Through successor TI: " << *TI);
957
958 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
959 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
960 --i;
961 auto *Successor = i->getCaseSuccessor();
962 if (DTU)
963 ++NumPerSuccessorCases[Successor];
964 if (DeadCases.count(i->getCaseValue())) {
965 Successor->removePredecessor(PredDef);
966 SI.removeCase(i);
967 if (DTU)
968 --NumPerSuccessorCases[Successor];
969 }
970 }
971
972 if (DTU) {
973 std::vector<DominatorTree::UpdateType> Updates;
974 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
975 if (I.second == 0)
976 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
977 DTU->applyUpdates(Updates);
978 }
979
980 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
981 return true;
982 }
983
984 // Otherwise, TI's block must correspond to some matched value. Find out
985 // which value (or set of values) this is.
986 ConstantInt *TIV = nullptr;
987 BasicBlock *TIBB = TI->getParent();
988 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
989 if (PredCases[i].Dest == TIBB) {
990 if (TIV)
991 return false; // Cannot handle multiple values coming to this block.
992 TIV = PredCases[i].Value;
993 }
994 assert(TIV && "No edge from pred to succ?");
995
996 // Okay, we found the one constant that our value can be if we get into TI's
997 // BB. Find out which successor will unconditionally be branched to.
998 BasicBlock *TheRealDest = nullptr;
999 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1000 if (ThisCases[i].Value == TIV) {
1001 TheRealDest = ThisCases[i].Dest;
1002 break;
1003 }
1004
1005 // If not handled by any explicit cases, it is handled by the default case.
1006 if (!TheRealDest)
1007 TheRealDest = ThisDef;
1008
1009 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1010
1011 // Remove PHI node entries for dead edges.
1012 BasicBlock *CheckEdge = TheRealDest;
1013 for (BasicBlock *Succ : successors(TIBB))
1014 if (Succ != CheckEdge) {
1015 if (Succ != TheRealDest)
1016 RemovedSuccs.insert(Succ);
1017 Succ->removePredecessor(TIBB);
1018 } else
1019 CheckEdge = nullptr;
1020
1021 // Insert the new branch.
1022 Instruction *NI = Builder.CreateBr(TheRealDest);
1023 (void)NI;
1024
1025 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1026 << "Through successor TI: " << *TI << "Leaving: " << *NI
1027 << "\n");
1028
1030 if (DTU) {
1032 Updates.reserve(RemovedSuccs.size());
1033 for (auto *RemovedSucc : RemovedSuccs)
1034 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1035 DTU->applyUpdates(Updates);
1036 }
1037 return true;
1038}
1039
1040namespace {
1041
1042/// This class implements a stable ordering of constant
1043/// integers that does not depend on their address. This is important for
1044/// applications that sort ConstantInt's to ensure uniqueness.
1045struct ConstantIntOrdering {
1046 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1047 return LHS->getValue().ult(RHS->getValue());
1048 }
1049};
1050
1051} // end anonymous namespace
1052
1054 ConstantInt *const *P2) {
1055 const ConstantInt *LHS = *P1;
1056 const ConstantInt *RHS = *P2;
1057 if (LHS == RHS)
1058 return 0;
1059 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1060}
1061
1062/// Get Weights of a given terminator, the default weight is at the front
1063/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1064/// metadata.
1066 SmallVectorImpl<uint64_t> &Weights) {
1067 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1068 assert(MD);
1069 for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1070 ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1071 Weights.push_back(CI->getValue().getZExtValue());
1072 }
1073
1074 // If TI is a conditional eq, the default case is the false case,
1075 // and the corresponding branch-weight data is at index 2. We swap the
1076 // default weight to be the first entry.
1077 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1078 assert(Weights.size() == 2);
1079 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1080 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1081 std::swap(Weights.front(), Weights.back());
1082 }
1083}
1084
1085/// Keep halving the weights until all can fit in uint32_t.
1087 uint64_t Max = *llvm::max_element(Weights);
1088 if (Max > UINT_MAX) {
1089 unsigned Offset = 32 - llvm::countl_zero(Max);
1090 for (uint64_t &I : Weights)
1091 I >>= Offset;
1092 }
1093}
1094
1096 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1097 Instruction *PTI = PredBlock->getTerminator();
1098
1099 // If we have bonus instructions, clone them into the predecessor block.
1100 // Note that there may be multiple predecessor blocks, so we cannot move
1101 // bonus instructions to a predecessor block.
1102 for (Instruction &BonusInst : *BB) {
1103 if (BonusInst.isTerminator())
1104 continue;
1105
1106 Instruction *NewBonusInst = BonusInst.clone();
1107
1108 if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1109 PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1110 // Unless the instruction has the same !dbg location as the original
1111 // branch, drop it. When we fold the bonus instructions we want to make
1112 // sure we reset their debug locations in order to avoid stepping on
1113 // dead code caused by folding dead branches.
1114 NewBonusInst->setDebugLoc(DebugLoc());
1115 }
1116
1117 RemapInstruction(NewBonusInst, VMap,
1119
1120 // If we speculated an instruction, we need to drop any metadata that may
1121 // result in undefined behavior, as the metadata might have been valid
1122 // only given the branch precondition.
1123 // Similarly strip attributes on call parameters that may cause UB in
1124 // location the call is moved to.
1125 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1126
1127 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1128 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1129 RemapDPValueRange(NewBonusInst->getModule(), Range, VMap,
1131
1132 if (isa<DbgInfoIntrinsic>(BonusInst))
1133 continue;
1134
1135 NewBonusInst->takeName(&BonusInst);
1136 BonusInst.setName(NewBonusInst->getName() + ".old");
1137 VMap[&BonusInst] = NewBonusInst;
1138
1139 // Update (liveout) uses of bonus instructions,
1140 // now that the bonus instruction has been cloned into predecessor.
1141 // Note that we expect to be in a block-closed SSA form for this to work!
1142 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1143 auto *UI = cast<Instruction>(U.getUser());
1144 auto *PN = dyn_cast<PHINode>(UI);
1145 if (!PN) {
1146 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1147 "If the user is not a PHI node, then it should be in the same "
1148 "block as, and come after, the original bonus instruction.");
1149 continue; // Keep using the original bonus instruction.
1150 }
1151 // Is this the block-closed SSA form PHI node?
1152 if (PN->getIncomingBlock(U) == BB)
1153 continue; // Great, keep using the original bonus instruction.
1154 // The only other alternative is an "use" when coming from
1155 // the predecessor block - here we should refer to the cloned bonus instr.
1156 assert(PN->getIncomingBlock(U) == PredBlock &&
1157 "Not in block-closed SSA form?");
1158 U.set(NewBonusInst);
1159 }
1160 }
1161}
1162
1163bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1164 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1165 BasicBlock *BB = TI->getParent();
1166 BasicBlock *Pred = PTI->getParent();
1167
1169
1170 // Figure out which 'cases' to copy from SI to PSI.
1171 std::vector<ValueEqualityComparisonCase> BBCases;
1172 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1173
1174 std::vector<ValueEqualityComparisonCase> PredCases;
1175 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1176
1177 // Based on whether the default edge from PTI goes to BB or not, fill in
1178 // PredCases and PredDefault with the new switch cases we would like to
1179 // build.
1181
1182 // Update the branch weight metadata along the way
1184 bool PredHasWeights = hasBranchWeightMD(*PTI);
1185 bool SuccHasWeights = hasBranchWeightMD(*TI);
1186
1187 if (PredHasWeights) {
1188 GetBranchWeights(PTI, Weights);
1189 // branch-weight metadata is inconsistent here.
1190 if (Weights.size() != 1 + PredCases.size())
1191 PredHasWeights = SuccHasWeights = false;
1192 } else if (SuccHasWeights)
1193 // If there are no predecessor weights but there are successor weights,
1194 // populate Weights with 1, which will later be scaled to the sum of
1195 // successor's weights
1196 Weights.assign(1 + PredCases.size(), 1);
1197
1198 SmallVector<uint64_t, 8> SuccWeights;
1199 if (SuccHasWeights) {
1200 GetBranchWeights(TI, SuccWeights);
1201 // branch-weight metadata is inconsistent here.
1202 if (SuccWeights.size() != 1 + BBCases.size())
1203 PredHasWeights = SuccHasWeights = false;
1204 } else if (PredHasWeights)
1205 SuccWeights.assign(1 + BBCases.size(), 1);
1206
1207 if (PredDefault == BB) {
1208 // If this is the default destination from PTI, only the edges in TI
1209 // that don't occur in PTI, or that branch to BB will be activated.
1210 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1211 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1212 if (PredCases[i].Dest != BB)
1213 PTIHandled.insert(PredCases[i].Value);
1214 else {
1215 // The default destination is BB, we don't need explicit targets.
1216 std::swap(PredCases[i], PredCases.back());
1217
1218 if (PredHasWeights || SuccHasWeights) {
1219 // Increase weight for the default case.
1220 Weights[0] += Weights[i + 1];
1221 std::swap(Weights[i + 1], Weights.back());
1222 Weights.pop_back();
1223 }
1224
1225 PredCases.pop_back();
1226 --i;
1227 --e;
1228 }
1229
1230 // Reconstruct the new switch statement we will be building.
1231 if (PredDefault != BBDefault) {
1232 PredDefault->removePredecessor(Pred);
1233 if (DTU && PredDefault != BB)
1234 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1235 PredDefault = BBDefault;
1236 ++NewSuccessors[BBDefault];
1237 }
1238
1239 unsigned CasesFromPred = Weights.size();
1240 uint64_t ValidTotalSuccWeight = 0;
1241 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1242 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1243 PredCases.push_back(BBCases[i]);
1244 ++NewSuccessors[BBCases[i].Dest];
1245 if (SuccHasWeights || PredHasWeights) {
1246 // The default weight is at index 0, so weight for the ith case
1247 // should be at index i+1. Scale the cases from successor by
1248 // PredDefaultWeight (Weights[0]).
1249 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1250 ValidTotalSuccWeight += SuccWeights[i + 1];
1251 }
1252 }
1253
1254 if (SuccHasWeights || PredHasWeights) {
1255 ValidTotalSuccWeight += SuccWeights[0];
1256 // Scale the cases from predecessor by ValidTotalSuccWeight.
1257 for (unsigned i = 1; i < CasesFromPred; ++i)
1258 Weights[i] *= ValidTotalSuccWeight;
1259 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1260 Weights[0] *= SuccWeights[0];
1261 }
1262 } else {
1263 // If this is not the default destination from PSI, only the edges
1264 // in SI that occur in PSI with a destination of BB will be
1265 // activated.
1266 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1267 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1268 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1269 if (PredCases[i].Dest == BB) {
1270 PTIHandled.insert(PredCases[i].Value);
1271
1272 if (PredHasWeights || SuccHasWeights) {
1273 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1274 std::swap(Weights[i + 1], Weights.back());
1275 Weights.pop_back();
1276 }
1277
1278 std::swap(PredCases[i], PredCases.back());
1279 PredCases.pop_back();
1280 --i;
1281 --e;
1282 }
1283
1284 // Okay, now we know which constants were sent to BB from the
1285 // predecessor. Figure out where they will all go now.
1286 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1287 if (PTIHandled.count(BBCases[i].Value)) {
1288 // If this is one we are capable of getting...
1289 if (PredHasWeights || SuccHasWeights)
1290 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1291 PredCases.push_back(BBCases[i]);
1292 ++NewSuccessors[BBCases[i].Dest];
1293 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1294 }
1295
1296 // If there are any constants vectored to BB that TI doesn't handle,
1297 // they must go to the default destination of TI.
1298 for (ConstantInt *I : PTIHandled) {
1299 if (PredHasWeights || SuccHasWeights)
1300 Weights.push_back(WeightsForHandled[I]);
1301 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1302 ++NewSuccessors[BBDefault];
1303 }
1304 }
1305
1306 // Okay, at this point, we know which new successor Pred will get. Make
1307 // sure we update the number of entries in the PHI nodes for these
1308 // successors.
1309 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1310 if (DTU) {
1311 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1312 Updates.reserve(Updates.size() + NewSuccessors.size());
1313 }
1314 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1315 NewSuccessors) {
1316 for (auto I : seq(NewSuccessor.second)) {
1317 (void)I;
1318 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1319 }
1320 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1321 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1322 }
1323
1324 Builder.SetInsertPoint(PTI);
1325 // Convert pointer to int before we switch.
1326 if (CV->getType()->isPointerTy()) {
1327 CV =
1328 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1329 }
1330
1331 // Now that the successors are updated, create the new Switch instruction.
1332 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1333 NewSI->setDebugLoc(PTI->getDebugLoc());
1334 for (ValueEqualityComparisonCase &V : PredCases)
1335 NewSI->addCase(V.Value, V.Dest);
1336
1337 if (PredHasWeights || SuccHasWeights) {
1338 // Halve the weights if any of them cannot fit in an uint32_t
1339 FitWeights(Weights);
1340
1341 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1342
1343 setBranchWeights(NewSI, MDWeights);
1344 }
1345
1347
1348 // Okay, last check. If BB is still a successor of PSI, then we must
1349 // have an infinite loop case. If so, add an infinitely looping block
1350 // to handle the case to preserve the behavior of the code.
1351 BasicBlock *InfLoopBlock = nullptr;
1352 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1353 if (NewSI->getSuccessor(i) == BB) {
1354 if (!InfLoopBlock) {
1355 // Insert it at the end of the function, because it's either code,
1356 // or it won't matter if it's hot. :)
1357 InfLoopBlock =
1358 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1359 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1360 if (DTU)
1361 Updates.push_back(
1362 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1363 }
1364 NewSI->setSuccessor(i, InfLoopBlock);
1365 }
1366
1367 if (DTU) {
1368 if (InfLoopBlock)
1369 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1370
1371 Updates.push_back({DominatorTree::Delete, Pred, BB});
1372
1373 DTU->applyUpdates(Updates);
1374 }
1375
1376 ++NumFoldValueComparisonIntoPredecessors;
1377 return true;
1378}
1379
1380/// The specified terminator is a value equality comparison instruction
1381/// (either a switch or a branch on "X == c").
1382/// See if any of the predecessors of the terminator block are value comparisons
1383/// on the same value. If so, and if safe to do so, fold them together.
1384bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1385 IRBuilder<> &Builder) {
1386 BasicBlock *BB = TI->getParent();
1387 Value *CV = isValueEqualityComparison(TI); // CondVal
1388 assert(CV && "Not a comparison?");
1389
1390 bool Changed = false;
1391
1393 while (!Preds.empty()) {
1394 BasicBlock *Pred = Preds.pop_back_val();
1395 Instruction *PTI = Pred->getTerminator();
1396
1397 // Don't try to fold into itself.
1398 if (Pred == BB)
1399 continue;
1400
1401 // See if the predecessor is a comparison with the same value.
1402 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1403 if (PCV != CV)
1404 continue;
1405
1407 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1408 for (auto *Succ : FailBlocks) {
1409 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1410 return false;
1411 }
1412 }
1413
1414 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1415 Changed = true;
1416 }
1417 return Changed;
1418}
1419
1420// If we would need to insert a select that uses the value of this invoke
1421// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1422// need to do this), we can't hoist the invoke, as there is nowhere to put the
1423// select in this case.
1425 Instruction *I1, Instruction *I2) {
1426 for (BasicBlock *Succ : successors(BB1)) {
1427 for (const PHINode &PN : Succ->phis()) {
1428 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1429 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1430 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1431 return false;
1432 }
1433 }
1434 }
1435 return true;
1436}
1437
1438// Get interesting characteristics of instructions that
1439// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1440// instructions can be reordered across.
1446
1448 unsigned Flags = 0;
1449 if (I->mayReadFromMemory())
1450 Flags |= SkipReadMem;
1451 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1452 // inalloca) across stacksave/stackrestore boundaries.
1453 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1454 Flags |= SkipSideEffect;
1456 Flags |= SkipImplicitControlFlow;
1457 return Flags;
1458}
1459
1460// Returns true if it is safe to reorder an instruction across preceding
1461// instructions in a basic block.
1462static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1463 // Don't reorder a store over a load.
1464 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1465 return false;
1466
1467 // If we have seen an instruction with side effects, it's unsafe to reorder an
1468 // instruction which reads memory or itself has side effects.
1469 if ((Flags & SkipSideEffect) &&
1470 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1471 return false;
1472
1473 // Reordering across an instruction which does not necessarily transfer
1474 // control to the next instruction is speculation.
1476 return false;
1477
1478 // Hoisting of llvm.deoptimize is only legal together with the next return
1479 // instruction, which this pass is not always able to do.
1480 if (auto *CB = dyn_cast<CallBase>(I))
1481 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1482 return false;
1483
1484 // It's also unsafe/illegal to hoist an instruction above its instruction
1485 // operands
1486 BasicBlock *BB = I->getParent();
1487 for (Value *Op : I->operands()) {
1488 if (auto *J = dyn_cast<Instruction>(Op))
1489 if (J->getParent() == BB)
1490 return false;
1491 }
1492
1493 return true;
1494}
1495
1496static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1497
1498/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1499/// instructions \p I1 and \p I2 can and should be hoisted.
1501 const TargetTransformInfo &TTI) {
1502 // If we're going to hoist a call, make sure that the two instructions
1503 // we're commoning/hoisting are both marked with musttail, or neither of
1504 // them is marked as such. Otherwise, we might end up in a situation where
1505 // we hoist from a block where the terminator is a `ret` to a block where
1506 // the terminator is a `br`, and `musttail` calls expect to be followed by
1507 // a return.
1508 auto *C1 = dyn_cast<CallInst>(I1);
1509 auto *C2 = dyn_cast<CallInst>(I2);
1510 if (C1 && C2)
1511 if (C1->isMustTailCall() != C2->isMustTailCall())
1512 return false;
1513
1515 return false;
1516
1517 // If any of the two call sites has nomerge or convergent attribute, stop
1518 // hoisting.
1519 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1520 if (CB1->cannotMerge() || CB1->isConvergent())
1521 return false;
1522 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1523 if (CB2->cannotMerge() || CB2->isConvergent())
1524 return false;
1525
1526 return true;
1527}
1528
1529/// Hoists DPValues from \p I1 and \p OtherInstrs that are identical in
1530/// lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1531/// hoistCommonCodeFromSuccessors. e.g. The input:
1532/// I1 DPVs: { x, z },
1533/// OtherInsts: { I2 DPVs: { x, y, z } }
1534/// would result in hoisting only DPValue x.
1535static void
1537 SmallVectorImpl<Instruction *> &OtherInsts) {
1538 if (!I1->hasDbgRecords())
1539 return;
1540 using CurrentAndEndIt =
1541 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1542 // Vector of {Current, End} iterators.
1544 Itrs.reserve(OtherInsts.size() + 1);
1545 // Helper lambdas for lock-step checks:
1546 // Return true if this Current == End.
1547 auto atEnd = [](const CurrentAndEndIt &Pair) {
1548 return Pair.first == Pair.second;
1549 };
1550 // Return true if all Current are identical.
1551 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1552 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1554 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1555 });
1556 };
1557
1558 // Collect the iterators.
1559 Itrs.push_back(
1560 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1561 for (Instruction *Other : OtherInsts) {
1562 if (!Other->hasDbgRecords())
1563 return;
1564 Itrs.push_back(
1565 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1566 }
1567
1568 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1569 // the lock-step DbgRecord are identical, hoist all of them to TI.
1570 // This replicates the dbg.* intrinsic behaviour in
1571 // hoistCommonCodeFromSuccessors.
1572 while (none_of(Itrs, atEnd)) {
1573 bool HoistDPVs = allIdentical(Itrs);
1574 for (CurrentAndEndIt &Pair : Itrs) {
1575 // Increment Current iterator now as we may be about to move the
1576 // DbgRecord.
1577 DbgRecord &DR = *Pair.first++;
1578 if (HoistDPVs) {
1579 DR.removeFromParent();
1580 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1581 }
1582 }
1583 }
1584}
1585
1586/// Hoist any common code in the successor blocks up into the block. This
1587/// function guarantees that BB dominates all successors. If EqTermsOnly is
1588/// given, only perform hoisting in case both blocks only contain a terminator.
1589/// In that case, only the original BI will be replaced and selects for PHIs are
1590/// added.
1591bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1592 bool EqTermsOnly) {
1593 // This does very trivial matching, with limited scanning, to find identical
1594 // instructions in the two blocks. In particular, we don't want to get into
1595 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1596 // such, we currently just scan for obviously identical instructions in an
1597 // identical order, possibly separated by the same number of non-identical
1598 // instructions.
1599 unsigned int SuccSize = succ_size(BB);
1600 if (SuccSize < 2)
1601 return false;
1602
1603 // If either of the blocks has it's address taken, then we can't do this fold,
1604 // because the code we'd hoist would no longer run when we jump into the block
1605 // by it's address.
1606 for (auto *Succ : successors(BB))
1607 if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1608 return false;
1609
1610 auto *TI = BB->getTerminator();
1611
1612 // The second of pair is a SkipFlags bitmask.
1613 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1614 SmallVector<SuccIterPair, 8> SuccIterPairs;
1615 for (auto *Succ : successors(BB)) {
1616 BasicBlock::iterator SuccItr = Succ->begin();
1617 if (isa<PHINode>(*SuccItr))
1618 return false;
1619 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1620 }
1621
1622 // Check if only hoisting terminators is allowed. This does not add new
1623 // instructions to the hoist location.
1624 if (EqTermsOnly) {
1625 // Skip any debug intrinsics, as they are free to hoist.
1626 for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1627 auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1628 if (!INonDbg->isTerminator())
1629 return false;
1630 }
1631 // Now we know that we only need to hoist debug intrinsics and the
1632 // terminator. Let the loop below handle those 2 cases.
1633 }
1634
1635 // Count how many instructions were not hoisted so far. There's a limit on how
1636 // many instructions we skip, serving as a compilation time control as well as
1637 // preventing excessive increase of life ranges.
1638 unsigned NumSkipped = 0;
1639 // If we find an unreachable instruction at the beginning of a basic block, we
1640 // can still hoist instructions from the rest of the basic blocks.
1641 if (SuccIterPairs.size() > 2) {
1642 erase_if(SuccIterPairs,
1643 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1644 if (SuccIterPairs.size() < 2)
1645 return false;
1646 }
1647
1648 bool Changed = false;
1649
1650 for (;;) {
1651 auto *SuccIterPairBegin = SuccIterPairs.begin();
1652 auto &BB1ItrPair = *SuccIterPairBegin++;
1653 auto OtherSuccIterPairRange =
1654 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1655 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1656
1657 Instruction *I1 = &*BB1ItrPair.first;
1658
1659 // Skip debug info if it is not identical.
1660 bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1661 Instruction *I2 = &*Iter;
1662 return I1->isIdenticalToWhenDefined(I2);
1663 });
1664 if (!AllDbgInstsAreIdentical) {
1665 while (isa<DbgInfoIntrinsic>(I1))
1666 I1 = &*++BB1ItrPair.first;
1667 for (auto &SuccIter : OtherSuccIterRange) {
1668 Instruction *I2 = &*SuccIter;
1669 while (isa<DbgInfoIntrinsic>(I2))
1670 I2 = &*++SuccIter;
1671 }
1672 }
1673
1674 bool AllInstsAreIdentical = true;
1675 bool HasTerminator = I1->isTerminator();
1676 for (auto &SuccIter : OtherSuccIterRange) {
1677 Instruction *I2 = &*SuccIter;
1678 HasTerminator |= I2->isTerminator();
1679 if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
1680 AllInstsAreIdentical = false;
1681 }
1682
1684 for (auto &SuccIter : OtherSuccIterRange)
1685 OtherInsts.push_back(&*SuccIter);
1686
1687 // If we are hoisting the terminator instruction, don't move one (making a
1688 // broken BB), instead clone it, and remove BI.
1689 if (HasTerminator) {
1690 // Even if BB, which contains only one unreachable instruction, is ignored
1691 // at the beginning of the loop, we can hoist the terminator instruction.
1692 // If any instructions remain in the block, we cannot hoist terminators.
1693 if (NumSkipped || !AllInstsAreIdentical) {
1694 hoistLockstepIdenticalDPValues(TI, I1, OtherInsts);
1695 return Changed;
1696 }
1697
1698 return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) ||
1699 Changed;
1700 }
1701
1702 if (AllInstsAreIdentical) {
1703 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1704 AllInstsAreIdentical =
1705 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1706 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1707 Instruction *I2 = &*Pair.first;
1708 unsigned SkipFlagsBB2 = Pair.second;
1709 // Even if the instructions are identical, it may not
1710 // be safe to hoist them if we have skipped over
1711 // instructions with side effects or their operands
1712 // weren't hoisted.
1713 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1715 });
1716 }
1717
1718 if (AllInstsAreIdentical) {
1719 BB1ItrPair.first++;
1720 if (isa<DbgInfoIntrinsic>(I1)) {
1721 // The debug location is an integral part of a debug info intrinsic
1722 // and can't be separated from it or replaced. Instead of attempting
1723 // to merge locations, simply hoist both copies of the intrinsic.
1724 hoistLockstepIdenticalDPValues(TI, I1, OtherInsts);
1725 // We've just hoisted DPValues; move I1 after them (before TI) and
1726 // leave any that were not hoisted behind (by calling moveBefore
1727 // rather than moveBeforePreserving).
1728 I1->moveBefore(TI);
1729 for (auto &SuccIter : OtherSuccIterRange) {
1730 auto *I2 = &*SuccIter++;
1731 assert(isa<DbgInfoIntrinsic>(I2));
1732 I2->moveBefore(TI);
1733 }
1734 } else {
1735 // For a normal instruction, we just move one to right before the
1736 // branch, then replace all uses of the other with the first. Finally,
1737 // we remove the now redundant second instruction.
1738 hoistLockstepIdenticalDPValues(TI, I1, OtherInsts);
1739 // We've just hoisted DPValues; move I1 after them (before TI) and
1740 // leave any that were not hoisted behind (by calling moveBefore
1741 // rather than moveBeforePreserving).
1742 I1->moveBefore(TI);
1743 for (auto &SuccIter : OtherSuccIterRange) {
1744 Instruction *I2 = &*SuccIter++;
1745 assert(I2 != I1);
1746 if (!I2->use_empty())
1747 I2->replaceAllUsesWith(I1);
1748 I1->andIRFlags(I2);
1749 combineMetadataForCSE(I1, I2, true);
1750 // I1 and I2 are being combined into a single instruction. Its debug
1751 // location is the merged locations of the original instructions.
1752 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1753 I2->eraseFromParent();
1754 }
1755 }
1756 if (!Changed)
1757 NumHoistCommonCode += SuccIterPairs.size();
1758 Changed = true;
1759 NumHoistCommonInstrs += SuccIterPairs.size();
1760 } else {
1761 if (NumSkipped >= HoistCommonSkipLimit) {
1762 hoistLockstepIdenticalDPValues(TI, I1, OtherInsts);
1763 return Changed;
1764 }
1765 // We are about to skip over a pair of non-identical instructions. Record
1766 // if any have characteristics that would prevent reordering instructions
1767 // across them.
1768 for (auto &SuccIterPair : SuccIterPairs) {
1769 Instruction *I = &*SuccIterPair.first++;
1770 SuccIterPair.second |= skippedInstrFlags(I);
1771 }
1772 ++NumSkipped;
1773 }
1774 }
1775}
1776
1777bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1778 Instruction *TI, Instruction *I1,
1779 SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1780
1781 auto *BI = dyn_cast<BranchInst>(TI);
1782
1783 bool Changed = false;
1784 BasicBlock *TIParent = TI->getParent();
1785 BasicBlock *BB1 = I1->getParent();
1786
1787 // Use only for an if statement.
1788 auto *I2 = *OtherSuccTIs.begin();
1789 auto *BB2 = I2->getParent();
1790 if (BI) {
1791 assert(OtherSuccTIs.size() == 1);
1792 assert(BI->getSuccessor(0) == I1->getParent());
1793 assert(BI->getSuccessor(1) == I2->getParent());
1794 }
1795
1796 // In the case of an if statement, we try to hoist an invoke.
1797 // FIXME: Can we define a safety predicate for CallBr?
1798 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1799 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1800 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1801 return false;
1802
1803 // TODO: callbr hoisting currently disabled pending further study.
1804 if (isa<CallBrInst>(I1))
1805 return false;
1806
1807 for (BasicBlock *Succ : successors(BB1)) {
1808 for (PHINode &PN : Succ->phis()) {
1809 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1810 for (Instruction *OtherSuccTI : OtherSuccTIs) {
1811 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1812 if (BB1V == BB2V)
1813 continue;
1814
1815 // In the case of an if statement, check for
1816 // passingValueIsAlwaysUndefined here because we would rather eliminate
1817 // undefined control flow then converting it to a select.
1818 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1820 return false;
1821 }
1822 }
1823 }
1824
1825 // Hoist DPValues attached to the terminator to match dbg.* intrinsic hoisting
1826 // behaviour in hoistCommonCodeFromSuccessors.
1827 hoistLockstepIdenticalDPValues(TI, I1, OtherSuccTIs);
1828 // Clone the terminator and hoist it into the pred, without any debug info.
1829 Instruction *NT = I1->clone();
1830 NT->insertInto(TIParent, TI->getIterator());
1831 if (!NT->getType()->isVoidTy()) {
1832 I1->replaceAllUsesWith(NT);
1833 for (Instruction *OtherSuccTI : OtherSuccTIs)
1834 OtherSuccTI->replaceAllUsesWith(NT);
1835 NT->takeName(I1);
1836 }
1837 Changed = true;
1838 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1839
1840 // Ensure terminator gets a debug location, even an unknown one, in case
1841 // it involves inlinable calls.
1843 Locs.push_back(I1->getDebugLoc());
1844 for (auto *OtherSuccTI : OtherSuccTIs)
1845 Locs.push_back(OtherSuccTI->getDebugLoc());
1846 NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1847
1848 // PHIs created below will adopt NT's merged DebugLoc.
1849 IRBuilder<NoFolder> Builder(NT);
1850
1851 // In the case of an if statement, hoisting one of the terminators from our
1852 // successor is a great thing. Unfortunately, the successors of the if/else
1853 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1854 // must agree for all PHI nodes, so we insert select instruction to compute
1855 // the final result.
1856 if (BI) {
1857 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1858 for (BasicBlock *Succ : successors(BB1)) {
1859 for (PHINode &PN : Succ->phis()) {
1860 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1861 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1862 if (BB1V == BB2V)
1863 continue;
1864
1865 // These values do not agree. Insert a select instruction before NT
1866 // that determines the right value.
1867 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1868 if (!SI) {
1869 // Propagate fast-math-flags from phi node to its replacement select.
1870 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1871 if (isa<FPMathOperator>(PN))
1872 Builder.setFastMathFlags(PN.getFastMathFlags());
1873
1874 SI = cast<SelectInst>(Builder.CreateSelect(
1875 BI->getCondition(), BB1V, BB2V,
1876 BB1V->getName() + "." + BB2V->getName(), BI));
1877 }
1878
1879 // Make the PHI node use the select for all incoming values for BB1/BB2
1880 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1881 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1882 PN.setIncomingValue(i, SI);
1883 }
1884 }
1885 }
1886
1888
1889 // Update any PHI nodes in our new successors.
1890 for (BasicBlock *Succ : successors(BB1)) {
1891 AddPredecessorToBlock(Succ, TIParent, BB1);
1892 if (DTU)
1893 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1894 }
1895
1896 if (DTU)
1897 for (BasicBlock *Succ : successors(TI))
1898 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1899
1901 if (DTU)
1902 DTU->applyUpdates(Updates);
1903 return Changed;
1904}
1905
1906// Check lifetime markers.
1907static bool isLifeTimeMarker(const Instruction *I) {
1908 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1909 switch (II->getIntrinsicID()) {
1910 default:
1911 break;
1912 case Intrinsic::lifetime_start:
1913 case Intrinsic::lifetime_end:
1914 return true;
1915 }
1916 }
1917 return false;
1918}
1919
1920// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1921// into variables.
1923 int OpIdx) {
1924 return !isa<IntrinsicInst>(I);
1925}
1926
1927// All instructions in Insts belong to different blocks that all unconditionally
1928// branch to a common successor. Analyze each instruction and return true if it
1929// would be possible to sink them into their successor, creating one common
1930// instruction instead. For every value that would be required to be provided by
1931// PHI node (because an operand varies in each input block), add to PHIOperands.
1934 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1935 // Prune out obviously bad instructions to move. Each instruction must have
1936 // exactly zero or one use, and we check later that use is by a single, common
1937 // PHI instruction in the successor.
1938 bool HasUse = !Insts.front()->user_empty();
1939 for (auto *I : Insts) {
1940 // These instructions may change or break semantics if moved.
1941 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1942 I->getType()->isTokenTy())
1943 return false;
1944
1945 // Do not try to sink an instruction in an infinite loop - it can cause
1946 // this algorithm to infinite loop.
1947 if (I->getParent()->getSingleSuccessor() == I->getParent())
1948 return false;
1949
1950 // Conservatively return false if I is an inline-asm instruction. Sinking
1951 // and merging inline-asm instructions can potentially create arguments
1952 // that cannot satisfy the inline-asm constraints.
1953 // If the instruction has nomerge or convergent attribute, return false.
1954 if (const auto *C = dyn_cast<CallBase>(I))
1955 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1956 return false;
1957
1958 // Each instruction must have zero or one use.
1959 if (HasUse && !I->hasOneUse())
1960 return false;
1961 if (!HasUse && !I->user_empty())
1962 return false;
1963 }
1964
1965 const Instruction *I0 = Insts.front();
1966 for (auto *I : Insts) {
1967 if (!I->isSameOperationAs(I0))
1968 return false;
1969
1970 // swifterror pointers can only be used by a load or store; sinking a load
1971 // or store would require introducing a select for the pointer operand,
1972 // which isn't allowed for swifterror pointers.
1973 if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1974 return false;
1975 if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1976 return false;
1977 }
1978
1979 // All instructions in Insts are known to be the same opcode. If they have a
1980 // use, check that the only user is a PHI or in the same block as the
1981 // instruction, because if a user is in the same block as an instruction we're
1982 // contemplating sinking, it must already be determined to be sinkable.
1983 if (HasUse) {
1984 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1985 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1986 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1987 auto *U = cast<Instruction>(*I->user_begin());
1988 return (PNUse &&
1989 PNUse->getParent() == Succ &&
1990 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1991 U->getParent() == I->getParent();
1992 }))
1993 return false;
1994 }
1995
1996 // Because SROA can't handle speculating stores of selects, try not to sink
1997 // loads, stores or lifetime markers of allocas when we'd have to create a
1998 // PHI for the address operand. Also, because it is likely that loads or
1999 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2000 // them.
2001 // This can cause code churn which can have unintended consequences down
2002 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2003 // FIXME: This is a workaround for a deficiency in SROA - see
2004 // https://llvm.org/bugs/show_bug.cgi?id=30188
2005 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
2006 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2007 }))
2008 return false;
2009 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
2010 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
2011 }))
2012 return false;
2013 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
2014 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
2015 }))
2016 return false;
2017
2018 // For calls to be sinkable, they must all be indirect, or have same callee.
2019 // I.e. if we have two direct calls to different callees, we don't want to
2020 // turn that into an indirect call. Likewise, if we have an indirect call,
2021 // and a direct call, we don't actually want to have a single indirect call.
2022 if (isa<CallBase>(I0)) {
2023 auto IsIndirectCall = [](const Instruction *I) {
2024 return cast<CallBase>(I)->isIndirectCall();
2025 };
2026 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2027 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2028 if (HaveIndirectCalls) {
2029 if (!AllCallsAreIndirect)
2030 return false;
2031 } else {
2032 // All callees must be identical.
2033 Value *Callee = nullptr;
2034 for (const Instruction *I : Insts) {
2035 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2036 if (!Callee)
2037 Callee = CurrCallee;
2038 else if (Callee != CurrCallee)
2039 return false;
2040 }
2041 }
2042 }
2043
2044 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2045 Value *Op = I0->getOperand(OI);
2046 if (Op->getType()->isTokenTy())
2047 // Don't touch any operand of token type.
2048 return false;
2049
2050 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2051 assert(I->getNumOperands() == I0->getNumOperands());
2052 return I->getOperand(OI) == I0->getOperand(OI);
2053 };
2054 if (!all_of(Insts, SameAsI0)) {
2055 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
2057 // We can't create a PHI from this GEP.
2058 return false;
2059 for (auto *I : Insts)
2060 PHIOperands[I].push_back(I->getOperand(OI));
2061 }
2062 }
2063 return true;
2064}
2065
2066// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2067// instruction of every block in Blocks to their common successor, commoning
2068// into one instruction.
2070 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2071
2072 // canSinkInstructions returning true guarantees that every block has at
2073 // least one non-terminator instruction.
2075 for (auto *BB : Blocks) {
2076 Instruction *I = BB->getTerminator();
2077 do {
2078 I = I->getPrevNode();
2079 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2080 if (!isa<DbgInfoIntrinsic>(I))
2081 Insts.push_back(I);
2082 }
2083
2084 // The only checking we need to do now is that all users of all instructions
2085 // are the same PHI node. canSinkInstructions should have checked this but
2086 // it is slightly over-aggressive - it gets confused by commutative
2087 // instructions so double-check it here.
2088 Instruction *I0 = Insts.front();
2089 if (!I0->user_empty()) {
2090 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2091 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2092 auto *U = cast<Instruction>(*I->user_begin());
2093 return U == PNUse;
2094 }))
2095 return false;
2096 }
2097
2098 // We don't need to do any more checking here; canSinkInstructions should
2099 // have done it all for us.
2100 SmallVector<Value*, 4> NewOperands;
2101 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2102 // This check is different to that in canSinkInstructions. There, we
2103 // cared about the global view once simplifycfg (and instcombine) have
2104 // completed - it takes into account PHIs that become trivially
2105 // simplifiable. However here we need a more local view; if an operand
2106 // differs we create a PHI and rely on instcombine to clean up the very
2107 // small mess we may make.
2108 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2109 return I->getOperand(O) != I0->getOperand(O);
2110 });
2111 if (!NeedPHI) {
2112 NewOperands.push_back(I0->getOperand(O));
2113 continue;
2114 }
2115
2116 // Create a new PHI in the successor block and populate it.
2117 auto *Op = I0->getOperand(O);
2118 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2119 auto *PN =
2120 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2121 PN->insertBefore(BBEnd->begin());
2122 for (auto *I : Insts)
2123 PN->addIncoming(I->getOperand(O), I->getParent());
2124 NewOperands.push_back(PN);
2125 }
2126
2127 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2128 // and move it to the start of the successor block.
2129 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2130 I0->getOperandUse(O).set(NewOperands[O]);
2131
2132 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2133
2134 // Update metadata and IR flags, and merge debug locations.
2135 for (auto *I : Insts)
2136 if (I != I0) {
2137 // The debug location for the "common" instruction is the merged locations
2138 // of all the commoned instructions. We start with the original location
2139 // of the "common" instruction and iteratively merge each location in the
2140 // loop below.
2141 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2142 // However, as N-way merge for CallInst is rare, so we use simplified API
2143 // instead of using complex API for N-way merge.
2144 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2145 combineMetadataForCSE(I0, I, true);
2146 I0->andIRFlags(I);
2147 }
2148
2149 if (!I0->user_empty()) {
2150 // canSinkLastInstruction checked that all instructions were used by
2151 // one and only one PHI node. Find that now, RAUW it to our common
2152 // instruction and nuke it.
2153 auto *PN = cast<PHINode>(*I0->user_begin());
2154 PN->replaceAllUsesWith(I0);
2155 PN->eraseFromParent();
2156 }
2157
2158 // Finally nuke all instructions apart from the common instruction.
2159 for (auto *I : Insts) {
2160 if (I == I0)
2161 continue;
2162 // The remaining uses are debug users, replace those with the common inst.
2163 // In most (all?) cases this just introduces a use-before-def.
2164 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2165 I->replaceAllUsesWith(I0);
2166 I->eraseFromParent();
2167 }
2168
2169 return true;
2170}
2171
2172namespace {
2173
2174 // LockstepReverseIterator - Iterates through instructions
2175 // in a set of blocks in reverse order from the first non-terminator.
2176 // For example (assume all blocks have size n):
2177 // LockstepReverseIterator I([B1, B2, B3]);
2178 // *I-- = [B1[n], B2[n], B3[n]];
2179 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2180 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2181 // ...
2182 class LockstepReverseIterator {
2185 bool Fail;
2186
2187 public:
2188 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2189 reset();
2190 }
2191
2192 void reset() {
2193 Fail = false;
2194 Insts.clear();
2195 for (auto *BB : Blocks) {
2196 Instruction *Inst = BB->getTerminator();
2197 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2198 Inst = Inst->getPrevNode();
2199 if (!Inst) {
2200 // Block wasn't big enough.
2201 Fail = true;
2202 return;
2203 }
2204 Insts.push_back(Inst);
2205 }
2206 }
2207
2208 bool isValid() const {
2209 return !Fail;
2210 }
2211
2212 void operator--() {
2213 if (Fail)
2214 return;
2215 for (auto *&Inst : Insts) {
2216 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2217 Inst = Inst->getPrevNode();
2218 // Already at beginning of block.
2219 if (!Inst) {
2220 Fail = true;
2221 return;
2222 }
2223 }
2224 }
2225
2226 void operator++() {
2227 if (Fail)
2228 return;
2229 for (auto *&Inst : Insts) {
2230 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2231 Inst = Inst->getNextNode();
2232 // Already at end of block.
2233 if (!Inst) {
2234 Fail = true;
2235 return;
2236 }
2237 }
2238 }
2239
2241 return Insts;
2242 }
2243 };
2244
2245} // end anonymous namespace
2246
2247/// Check whether BB's predecessors end with unconditional branches. If it is
2248/// true, sink any common code from the predecessors to BB.
2250 DomTreeUpdater *DTU) {
2251 // We support two situations:
2252 // (1) all incoming arcs are unconditional
2253 // (2) there are non-unconditional incoming arcs
2254 //
2255 // (2) is very common in switch defaults and
2256 // else-if patterns;
2257 //
2258 // if (a) f(1);
2259 // else if (b) f(2);
2260 //
2261 // produces:
2262 //
2263 // [if]
2264 // / \
2265 // [f(1)] [if]
2266 // | | \
2267 // | | |
2268 // | [f(2)]|
2269 // \ | /
2270 // [ end ]
2271 //
2272 // [end] has two unconditional predecessor arcs and one conditional. The
2273 // conditional refers to the implicit empty 'else' arc. This conditional
2274 // arc can also be caused by an empty default block in a switch.
2275 //
2276 // In this case, we attempt to sink code from all *unconditional* arcs.
2277 // If we can sink instructions from these arcs (determined during the scan
2278 // phase below) we insert a common successor for all unconditional arcs and
2279 // connect that to [end], to enable sinking:
2280 //
2281 // [if]
2282 // / \
2283 // [x(1)] [if]
2284 // | | \
2285 // | | \
2286 // | [x(2)] |
2287 // \ / |
2288 // [sink.split] |
2289 // \ /
2290 // [ end ]
2291 //
2292 SmallVector<BasicBlock*,4> UnconditionalPreds;
2293 bool HaveNonUnconditionalPredecessors = false;
2294 for (auto *PredBB : predecessors(BB)) {
2295 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2296 if (PredBr && PredBr->isUnconditional())
2297 UnconditionalPreds.push_back(PredBB);
2298 else
2299 HaveNonUnconditionalPredecessors = true;
2300 }
2301 if (UnconditionalPreds.size() < 2)
2302 return false;
2303
2304 // We take a two-step approach to tail sinking. First we scan from the end of
2305 // each block upwards in lockstep. If the n'th instruction from the end of each
2306 // block can be sunk, those instructions are added to ValuesToSink and we
2307 // carry on. If we can sink an instruction but need to PHI-merge some operands
2308 // (because they're not identical in each instruction) we add these to
2309 // PHIOperands.
2310 int ScanIdx = 0;
2311 SmallPtrSet<Value*,4> InstructionsToSink;
2313 LockstepReverseIterator LRI(UnconditionalPreds);
2314 while (LRI.isValid() &&
2315 canSinkInstructions(*LRI, PHIOperands)) {
2316 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2317 << "\n");
2318 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2319 ++ScanIdx;
2320 --LRI;
2321 }
2322
2323 // If no instructions can be sunk, early-return.
2324 if (ScanIdx == 0)
2325 return false;
2326
2327 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2328
2329 if (!followedByDeoptOrUnreachable) {
2330 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2331 // actually sink before encountering instruction that is unprofitable to
2332 // sink?
2333 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2334 unsigned NumPHIdValues = 0;
2335 for (auto *I : *LRI)
2336 for (auto *V : PHIOperands[I]) {
2337 if (!InstructionsToSink.contains(V))
2338 ++NumPHIdValues;
2339 // FIXME: this check is overly optimistic. We may end up not sinking
2340 // said instruction, due to the very same profitability check.
2341 // See @creating_too_many_phis in sink-common-code.ll.
2342 }
2343 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2344 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2345 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2346 NumPHIInsts++;
2347
2348 return NumPHIInsts <= 1;
2349 };
2350
2351 // We've determined that we are going to sink last ScanIdx instructions,
2352 // and recorded them in InstructionsToSink. Now, some instructions may be
2353 // unprofitable to sink. But that determination depends on the instructions
2354 // that we are going to sink.
2355
2356 // First, forward scan: find the first instruction unprofitable to sink,
2357 // recording all the ones that are profitable to sink.
2358 // FIXME: would it be better, after we detect that not all are profitable.
2359 // to either record the profitable ones, or erase the unprofitable ones?
2360 // Maybe we need to choose (at runtime) the one that will touch least
2361 // instrs?
2362 LRI.reset();
2363 int Idx = 0;
2364 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2365 while (Idx < ScanIdx) {
2366 if (!ProfitableToSinkInstruction(LRI)) {
2367 // Too many PHIs would be created.
2368 LLVM_DEBUG(
2369 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2370 break;
2371 }
2372 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2373 --LRI;
2374 ++Idx;
2375 }
2376
2377 // If no instructions can be sunk, early-return.
2378 if (Idx == 0)
2379 return false;
2380
2381 // Did we determine that (only) some instructions are unprofitable to sink?
2382 if (Idx < ScanIdx) {
2383 // Okay, some instructions are unprofitable.
2384 ScanIdx = Idx;
2385 InstructionsToSink = InstructionsProfitableToSink;
2386
2387 // But, that may make other instructions unprofitable, too.
2388 // So, do a backward scan, do any earlier instructions become
2389 // unprofitable?
2390 assert(
2391 !ProfitableToSinkInstruction(LRI) &&
2392 "We already know that the last instruction is unprofitable to sink");
2393 ++LRI;
2394 --Idx;
2395 while (Idx >= 0) {
2396 // If we detect that an instruction becomes unprofitable to sink,
2397 // all earlier instructions won't be sunk either,
2398 // so preemptively keep InstructionsProfitableToSink in sync.
2399 // FIXME: is this the most performant approach?
2400 for (auto *I : *LRI)
2401 InstructionsProfitableToSink.erase(I);
2402 if (!ProfitableToSinkInstruction(LRI)) {
2403 // Everything starting with this instruction won't be sunk.
2404 ScanIdx = Idx;
2405 InstructionsToSink = InstructionsProfitableToSink;
2406 }
2407 ++LRI;
2408 --Idx;
2409 }
2410 }
2411
2412 // If no instructions can be sunk, early-return.
2413 if (ScanIdx == 0)
2414 return false;
2415 }
2416
2417 bool Changed = false;
2418
2419 if (HaveNonUnconditionalPredecessors) {
2420 if (!followedByDeoptOrUnreachable) {
2421 // It is always legal to sink common instructions from unconditional
2422 // predecessors. However, if not all predecessors are unconditional,
2423 // this transformation might be pessimizing. So as a rule of thumb,
2424 // don't do it unless we'd sink at least one non-speculatable instruction.
2425 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2426 LRI.reset();
2427 int Idx = 0;
2428 bool Profitable = false;
2429 while (Idx < ScanIdx) {
2430 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2431 Profitable = true;
2432 break;
2433 }
2434 --LRI;
2435 ++Idx;
2436 }
2437 if (!Profitable)
2438 return false;
2439 }
2440
2441 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2442 // We have a conditional edge and we're going to sink some instructions.
2443 // Insert a new block postdominating all blocks we're going to sink from.
2444 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2445 // Edges couldn't be split.
2446 return false;
2447 Changed = true;
2448 }
2449
2450 // Now that we've analyzed all potential sinking candidates, perform the
2451 // actual sink. We iteratively sink the last non-terminator of the source
2452 // blocks into their common successor unless doing so would require too
2453 // many PHI instructions to be generated (currently only one PHI is allowed
2454 // per sunk instruction).
2455 //
2456 // We can use InstructionsToSink to discount values needing PHI-merging that will
2457 // actually be sunk in a later iteration. This allows us to be more
2458 // aggressive in what we sink. This does allow a false positive where we
2459 // sink presuming a later value will also be sunk, but stop half way through
2460 // and never actually sink it which means we produce more PHIs than intended.
2461 // This is unlikely in practice though.
2462 int SinkIdx = 0;
2463 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2464 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2465 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2466 << "\n");
2467
2468 // Because we've sunk every instruction in turn, the current instruction to
2469 // sink is always at index 0.
2470 LRI.reset();
2471
2472 if (!sinkLastInstruction(UnconditionalPreds)) {
2473 LLVM_DEBUG(
2474 dbgs()
2475 << "SINK: stopping here, failed to actually sink instruction!\n");
2476 break;
2477 }
2478
2479 NumSinkCommonInstrs++;
2480 Changed = true;
2481 }
2482 if (SinkIdx != 0)
2483 ++NumSinkCommonCode;
2484 return Changed;
2485}
2486
2487namespace {
2488
2489struct CompatibleSets {
2490 using SetTy = SmallVector<InvokeInst *, 2>;
2491
2493
2494 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2495
2496 SetTy &getCompatibleSet(InvokeInst *II);
2497
2498 void insert(InvokeInst *II);
2499};
2500
2501CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2502 // Perform a linear scan over all the existing sets, see if the new `invoke`
2503 // is compatible with any particular set. Since we know that all the `invokes`
2504 // within a set are compatible, only check the first `invoke` in each set.
2505 // WARNING: at worst, this has quadratic complexity.
2506 for (CompatibleSets::SetTy &Set : Sets) {
2507 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2508 return Set;
2509 }
2510
2511 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2512 return Sets.emplace_back();
2513}
2514
2515void CompatibleSets::insert(InvokeInst *II) {
2516 getCompatibleSet(II).emplace_back(II);
2517}
2518
2519bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2520 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2521
2522 // Can we theoretically merge these `invoke`s?
2523 auto IsIllegalToMerge = [](InvokeInst *II) {
2524 return II->cannotMerge() || II->isInlineAsm();
2525 };
2526 if (any_of(Invokes, IsIllegalToMerge))
2527 return false;
2528
2529 // Either both `invoke`s must be direct,
2530 // or both `invoke`s must be indirect.
2531 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2532 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2533 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2534 if (HaveIndirectCalls) {
2535 if (!AllCallsAreIndirect)
2536 return false;
2537 } else {
2538 // All callees must be identical.
2539 Value *Callee = nullptr;
2540 for (InvokeInst *II : Invokes) {
2541 Value *CurrCallee = II->getCalledOperand();
2542 assert(CurrCallee && "There is always a called operand.");
2543 if (!Callee)
2544 Callee = CurrCallee;
2545 else if (Callee != CurrCallee)
2546 return false;
2547 }
2548 }
2549
2550 // Either both `invoke`s must not have a normal destination,
2551 // or both `invoke`s must have a normal destination,
2552 auto HasNormalDest = [](InvokeInst *II) {
2553 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2554 };
2555 if (any_of(Invokes, HasNormalDest)) {
2556 // Do not merge `invoke` that does not have a normal destination with one
2557 // that does have a normal destination, even though doing so would be legal.
2558 if (!all_of(Invokes, HasNormalDest))
2559 return false;
2560
2561 // All normal destinations must be identical.
2562 BasicBlock *NormalBB = nullptr;
2563 for (InvokeInst *II : Invokes) {
2564 BasicBlock *CurrNormalBB = II->getNormalDest();
2565 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2566 if (!NormalBB)
2567 NormalBB = CurrNormalBB;
2568 else if (NormalBB != CurrNormalBB)
2569 return false;
2570 }
2571
2572 // In the normal destination, the incoming values for these two `invoke`s
2573 // must be compatible.
2574 SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2576 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2577 &EquivalenceSet))
2578 return false;
2579 }
2580
2581#ifndef NDEBUG
2582 // All unwind destinations must be identical.
2583 // We know that because we have started from said unwind destination.
2584 BasicBlock *UnwindBB = nullptr;
2585 for (InvokeInst *II : Invokes) {
2586 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2587 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2588 if (!UnwindBB)
2589 UnwindBB = CurrUnwindBB;
2590 else
2591 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2592 }
2593#endif
2594
2595 // In the unwind destination, the incoming values for these two `invoke`s
2596 // must be compatible.
2598 Invokes.front()->getUnwindDest(),
2599 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2600 return false;
2601
2602 // Ignoring arguments, these `invoke`s must be identical,
2603 // including operand bundles.
2604 const InvokeInst *II0 = Invokes.front();
2605 for (auto *II : Invokes.drop_front())
2606 if (!II->isSameOperationAs(II0))
2607 return false;
2608
2609 // Can we theoretically form the data operands for the merged `invoke`?
2610 auto IsIllegalToMergeArguments = [](auto Ops) {
2611 Use &U0 = std::get<0>(Ops);
2612 Use &U1 = std::get<1>(Ops);
2613 if (U0 == U1)
2614 return false;
2615 return U0->getType()->isTokenTy() ||
2616 !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2617 U0.getOperandNo());
2618 };
2619 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2620 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2621 IsIllegalToMergeArguments))
2622 return false;
2623
2624 return true;
2625}
2626
2627} // namespace
2628
2629// Merge all invokes in the provided set, all of which are compatible
2630// as per the `CompatibleSets::shouldBelongToSameSet()`.
2632 DomTreeUpdater *DTU) {
2633 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2634
2636 if (DTU)
2637 Updates.reserve(2 + 3 * Invokes.size());
2638
2639 bool HasNormalDest =
2640 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2641
2642 // Clone one of the invokes into a new basic block.
2643 // Since they are all compatible, it doesn't matter which invoke is cloned.
2644 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2645 InvokeInst *II0 = Invokes.front();
2646 BasicBlock *II0BB = II0->getParent();
2647 BasicBlock *InsertBeforeBlock =
2648 II0->getParent()->getIterator()->getNextNode();
2649 Function *Func = II0BB->getParent();
2650 LLVMContext &Ctx = II0->getContext();
2651
2652 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2653 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2654
2655 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2656 // NOTE: all invokes have the same attributes, so no handling needed.
2657 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2658
2659 if (!HasNormalDest) {
2660 // This set does not have a normal destination,
2661 // so just form a new block with unreachable terminator.
2662 BasicBlock *MergedNormalDest = BasicBlock::Create(
2663 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2664 new UnreachableInst(Ctx, MergedNormalDest);
2665 MergedInvoke->setNormalDest(MergedNormalDest);
2666 }
2667
2668 // The unwind destination, however, remainds identical for all invokes here.
2669
2670 return MergedInvoke;
2671 }();
2672
2673 if (DTU) {
2674 // Predecessor blocks that contained these invokes will now branch to
2675 // the new block that contains the merged invoke, ...
2676 for (InvokeInst *II : Invokes)
2677 Updates.push_back(
2678 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2679
2680 // ... which has the new `unreachable` block as normal destination,
2681 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2682 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2683 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2684 SuccBBOfMergedInvoke});
2685
2686 // Since predecessor blocks now unconditionally branch to a new block,
2687 // they no longer branch to their original successors.
2688 for (InvokeInst *II : Invokes)
2689 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2690 Updates.push_back(
2691 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2692 }
2693
2694 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2695
2696 // Form the merged operands for the merged invoke.
2697 for (Use &U : MergedInvoke->operands()) {
2698 // Only PHI together the indirect callees and data operands.
2699 if (MergedInvoke->isCallee(&U)) {
2700 if (!IsIndirectCall)
2701 continue;
2702 } else if (!MergedInvoke->isDataOperand(&U))
2703 continue;
2704
2705 // Don't create trivial PHI's with all-identical incoming values.
2706 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2707 return II->getOperand(U.getOperandNo()) != U.get();
2708 });
2709 if (!NeedPHI)
2710 continue;
2711
2712 // Form a PHI out of all the data ops under this index.
2714 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2715 for (InvokeInst *II : Invokes)
2716 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2717
2718 U.set(PN);
2719 }
2720
2721 // We've ensured that each PHI node has compatible (identical) incoming values
2722 // when coming from each of the `invoke`s in the current merge set,
2723 // so update the PHI nodes accordingly.
2724 for (BasicBlock *Succ : successors(MergedInvoke))
2725 AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2726 /*ExistPred=*/Invokes.front()->getParent());
2727
2728 // And finally, replace the original `invoke`s with an unconditional branch
2729 // to the block with the merged `invoke`. Also, give that merged `invoke`
2730 // the merged debugloc of all the original `invoke`s.
2731 DILocation *MergedDebugLoc = nullptr;
2732 for (InvokeInst *II : Invokes) {
2733 // Compute the debug location common to all the original `invoke`s.
2734 if (!MergedDebugLoc)
2735 MergedDebugLoc = II->getDebugLoc();
2736 else
2737 MergedDebugLoc =
2738 DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2739
2740 // And replace the old `invoke` with an unconditionally branch
2741 // to the block with the merged `invoke`.
2742 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2743 OrigSuccBB->removePredecessor(II->getParent());
2744 BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2745 II->replaceAllUsesWith(MergedInvoke);
2746 II->eraseFromParent();
2747 ++NumInvokesMerged;
2748 }
2749 MergedInvoke->setDebugLoc(MergedDebugLoc);
2750 ++NumInvokeSetsFormed;
2751
2752 if (DTU)
2753 DTU->applyUpdates(Updates);
2754}
2755
2756/// If this block is a `landingpad` exception handling block, categorize all
2757/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2758/// being "mergeable" together, and then merge invokes in each set together.
2759///
2760/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2761/// [...] [...]
2762/// | |
2763/// [invoke0] [invoke1]
2764/// / \ / \
2765/// [cont0] [landingpad] [cont1]
2766/// to:
2767/// [...] [...]
2768/// \ /
2769/// [invoke]
2770/// / \
2771/// [cont] [landingpad]
2772///
2773/// But of course we can only do that if the invokes share the `landingpad`,
2774/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2775/// and the invoked functions are "compatible".
2778 return false;
2779
2780 bool Changed = false;
2781
2782 // FIXME: generalize to all exception handling blocks?
2783 if (!BB->isLandingPad())
2784 return Changed;
2785
2786 CompatibleSets Grouper;
2787
2788 // Record all the predecessors of this `landingpad`. As per verifier,
2789 // the only allowed predecessor is the unwind edge of an `invoke`.
2790 // We want to group "compatible" `invokes` into the same set to be merged.
2791 for (BasicBlock *PredBB : predecessors(BB))
2792 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2793
2794 // And now, merge `invoke`s that were grouped togeter.
2795 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2796 if (Invokes.size() < 2)
2797 continue;
2798 Changed = true;
2799 MergeCompatibleInvokesImpl(Invokes, DTU);
2800 }
2801
2802 return Changed;
2803}
2804
2805namespace {
2806/// Track ephemeral values, which should be ignored for cost-modelling
2807/// purposes. Requires walking instructions in reverse order.
2808class EphemeralValueTracker {
2810
2811 bool isEphemeral(const Instruction *I) {
2812 if (isa<AssumeInst>(I))
2813 return true;
2814 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2815 all_of(I->users(), [&](const User *U) {
2816 return EphValues.count(cast<Instruction>(U));
2817 });
2818 }
2819
2820public:
2821 bool track(const Instruction *I) {
2822 if (isEphemeral(I)) {
2823 EphValues.insert(I);
2824 return true;
2825 }
2826 return false;
2827 }
2828
2829 bool contains(const Instruction *I) const { return EphValues.contains(I); }
2830};
2831} // namespace
2832
2833/// Determine if we can hoist sink a sole store instruction out of a
2834/// conditional block.
2835///
2836/// We are looking for code like the following:
2837/// BrBB:
2838/// store i32 %add, i32* %arrayidx2
2839/// ... // No other stores or function calls (we could be calling a memory
2840/// ... // function).
2841/// %cmp = icmp ult %x, %y
2842/// br i1 %cmp, label %EndBB, label %ThenBB
2843/// ThenBB:
2844/// store i32 %add5, i32* %arrayidx2
2845/// br label EndBB
2846/// EndBB:
2847/// ...
2848/// We are going to transform this into:
2849/// BrBB:
2850/// store i32 %add, i32* %arrayidx2
2851/// ... //
2852/// %cmp = icmp ult %x, %y
2853/// %add.add5 = select i1 %cmp, i32 %add, %add5
2854/// store i32 %add.add5, i32* %arrayidx2
2855/// ...
2856///
2857/// \return The pointer to the value of the previous store if the store can be
2858/// hoisted into the predecessor block. 0 otherwise.
2860 BasicBlock *StoreBB, BasicBlock *EndBB) {
2861 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2862 if (!StoreToHoist)
2863 return nullptr;
2864
2865 // Volatile or atomic.
2866 if (!StoreToHoist->isSimple())
2867 return nullptr;
2868
2869 Value *StorePtr = StoreToHoist->getPointerOperand();
2870 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2871
2872 // Look for a store to the same pointer in BrBB.
2873 unsigned MaxNumInstToLookAt = 9;
2874 // Skip pseudo probe intrinsic calls which are not really killing any memory
2875 // accesses.
2876 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2877 if (!MaxNumInstToLookAt)
2878 break;
2879 --MaxNumInstToLookAt;
2880
2881 // Could be calling an instruction that affects memory like free().
2882 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2883 return nullptr;
2884
2885 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2886 // Found the previous store to same location and type. Make sure it is
2887 // simple, to avoid introducing a spurious non-atomic write after an
2888 // atomic write.
2889 if (SI->getPointerOperand() == StorePtr &&
2890 SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
2891 // Found the previous store, return its value operand.
2892 return SI->getValueOperand();
2893 return nullptr; // Unknown store.
2894 }
2895
2896 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2897 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2898 LI->isSimple()) {
2899 // Local objects (created by an `alloca` instruction) are always
2900 // writable, so once we are past a read from a location it is valid to
2901 // also write to that same location.
2902 // If the address of the local object never escapes the function, that
2903 // means it's never concurrently read or written, hence moving the store
2904 // from under the condition will not introduce a data race.
2905 auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2906 if (AI && !PointerMayBeCaptured(AI, false, true))
2907 // Found a previous load, return it.
2908 return LI;
2909 }
2910 // The load didn't work out, but we may still find a store.
2911 }
2912 }
2913
2914 return nullptr;
2915}
2916
2917/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2918/// converted to selects.
2920 BasicBlock *EndBB,
2921 unsigned &SpeculatedInstructions,
2923 const TargetTransformInfo &TTI) {
2925 BB->getParent()->hasMinSize()
2928
2929 bool HaveRewritablePHIs = false;
2930 for (PHINode &PN : EndBB->phis()) {
2931 Value *OrigV = PN.getIncomingValueForBlock(BB);
2932 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2933
2934 // FIXME: Try to remove some of the duplication with
2935 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2936 if (ThenV == OrigV)
2937 continue;
2938
2939 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2941
2942 // Don't convert to selects if we could remove undefined behavior instead.
2943 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2945 return false;
2946
2947 HaveRewritablePHIs = true;
2948 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2949 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2950 if (!OrigCE && !ThenCE)
2951 continue; // Known cheap (FIXME: Maybe not true for aggregates).
2952
2953 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2954 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2955 InstructionCost MaxCost =
2957 if (OrigCost + ThenCost > MaxCost)
2958 return false;
2959
2960 // Account for the cost of an unfolded ConstantExpr which could end up
2961 // getting expanded into Instructions.
2962 // FIXME: This doesn't account for how many operations are combined in the
2963 // constant expression.
2964 ++SpeculatedInstructions;
2965 if (SpeculatedInstructions > 1)
2966 return false;
2967 }
2968
2969 return HaveRewritablePHIs;
2970}
2971
2972/// Speculate a conditional basic block flattening the CFG.
2973///
2974/// Note that this is a very risky transform currently. Speculating
2975/// instructions like this is most often not desirable. Instead, there is an MI
2976/// pass which can do it with full awareness of the resource constraints.
2977/// However, some cases are "obvious" and we should do directly. An example of
2978/// this is speculating a single, reasonably cheap instruction.
2979///
2980/// There is only one distinct advantage to flattening the CFG at the IR level:
2981/// it makes very common but simplistic optimizations such as are common in
2982/// instcombine and the DAG combiner more powerful by removing CFG edges and
2983/// modeling their effects with easier to reason about SSA value graphs.
2984///
2985///
2986/// An illustration of this transform is turning this IR:
2987/// \code
2988/// BB:
2989/// %cmp = icmp ult %x, %y
2990/// br i1 %cmp, label %EndBB, label %ThenBB
2991/// ThenBB:
2992/// %sub = sub %x, %y
2993/// br label BB2
2994/// EndBB:
2995/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2996/// ...
2997/// \endcode
2998///
2999/// Into this IR:
3000/// \code
3001/// BB:
3002/// %cmp = icmp ult %x, %y
3003/// %sub = sub %x, %y
3004/// %cond = select i1 %cmp, 0, %sub
3005/// ...
3006/// \endcode
3007///
3008/// \returns true if the conditional block is removed.
3009bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3010 BasicBlock *ThenBB) {
3011 if (!Options.SpeculateBlocks)
3012 return false;
3013
3014 // Be conservative for now. FP select instruction can often be expensive.
3015 Value *BrCond = BI->getCondition();
3016 if (isa<FCmpInst>(BrCond))
3017 return false;
3018
3019 BasicBlock *BB = BI->getParent();
3020 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3021 InstructionCost Budget =
3023
3024 // If ThenBB is actually on the false edge of the conditional branch, remember
3025 // to swap the select operands later.
3026 bool Invert = false;
3027 if (ThenBB != BI->getSuccessor(0)) {
3028 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3029 Invert = true;
3030 }
3031 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3032
3033 // If the branch is non-unpredictable, and is predicted to *not* branch to
3034 // the `then` block, then avoid speculating it.
3035 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
3036 uint64_t TWeight, FWeight;
3037 if (extractBranchWeights(*BI, TWeight, FWeight) &&
3038 (TWeight + FWeight) != 0) {
3039 uint64_t EndWeight = Invert ? TWeight : FWeight;
3040 BranchProbability BIEndProb =
3041 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3043 if (BIEndProb >= Likely)
3044 return false;
3045 }
3046 }
3047
3048 // Keep a count of how many times instructions are used within ThenBB when
3049 // they are candidates for sinking into ThenBB. Specifically:
3050 // - They are defined in BB, and
3051 // - They have no side effects, and
3052 // - All of their uses are in ThenBB.
3053 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3054
3055 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
3056
3057 unsigned SpeculatedInstructions = 0;
3058 Value *SpeculatedStoreValue = nullptr;
3059 StoreInst *SpeculatedStore = nullptr;
3060 EphemeralValueTracker EphTracker;
3061 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3062 // Skip debug info.
3063 if (isa<DbgInfoIntrinsic>(I)) {
3064 SpeculatedDbgIntrinsics.push_back(&I);
3065 continue;
3066 }
3067
3068 // Skip pseudo probes. The consequence is we lose track of the branch
3069 // probability for ThenBB, which is fine since the optimization here takes
3070 // place regardless of the branch probability.
3071 if (isa<PseudoProbeInst>(I)) {
3072 // The probe should be deleted so that it will not be over-counted when
3073 // the samples collected on the non-conditional path are counted towards
3074 // the conditional path. We leave it for the counts inference algorithm to
3075 // figure out a proper count for an unknown probe.
3076 SpeculatedDbgIntrinsics.push_back(&I);
3077 continue;
3078 }
3079
3080 // Ignore ephemeral values, they will be dropped by the transform.
3081 if (EphTracker.track(&I))
3082 continue;
3083
3084 // Only speculatively execute a single instruction (not counting the
3085 // terminator) for now.
3086 ++SpeculatedInstructions;
3087 if (SpeculatedInstructions > 1)
3088 return false;
3089
3090 // Don't hoist the instruction if it's unsafe or expensive.
3092 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3093 &I, BB, ThenBB, EndBB))))
3094 return false;
3095 if (!SpeculatedStoreValue &&
3098 return false;
3099
3100 // Store the store speculation candidate.
3101 if (SpeculatedStoreValue)
3102 SpeculatedStore = cast<StoreInst>(&I);
3103
3104 // Do not hoist the instruction if any of its operands are defined but not
3105 // used in BB. The transformation will prevent the operand from
3106 // being sunk into the use block.
3107 for (Use &Op : I.operands()) {
3108 Instruction *OpI = dyn_cast<Instruction>(Op);
3109 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3110 continue; // Not a candidate for sinking.
3111
3112 ++SinkCandidateUseCounts[OpI];
3113 }
3114 }
3115
3116 // Consider any sink candidates which are only used in ThenBB as costs for
3117 // speculation. Note, while we iterate over a DenseMap here, we are summing
3118 // and so iteration order isn't significant.
3119 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3120 if (Inst->hasNUses(Count)) {
3121 ++SpeculatedInstructions;
3122 if (SpeculatedInstructions > 1)
3123 return false;
3124 }
3125
3126 // Check that we can insert the selects and that it's not too expensive to do
3127 // so.
3128 bool Convert = SpeculatedStore != nullptr;
3130 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3131 SpeculatedInstructions,
3132 Cost, TTI);
3133 if (!Convert || Cost > Budget)
3134 return false;
3135
3136 // If we get here, we can hoist the instruction and if-convert.
3137 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3138
3139 // Insert a select of the value of the speculated store.
3140 if (SpeculatedStoreValue) {
3141 IRBuilder<NoFolder> Builder(BI);
3142 Value *OrigV = SpeculatedStore->getValueOperand();
3143 Value *TrueV = SpeculatedStore->getValueOperand();
3144 Value *FalseV = SpeculatedStoreValue;
3145 if (Invert)
3146 std::swap(TrueV, FalseV);
3147 Value *S = Builder.CreateSelect(
3148 BrCond, TrueV, FalseV, "spec.store.select", BI);
3149 SpeculatedStore->setOperand(0, S);
3150 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3151 SpeculatedStore->getDebugLoc());
3152 // The value stored is still conditional, but the store itself is now
3153 // unconditonally executed, so we must be sure that any linked dbg.assign
3154 // intrinsics are tracking the new stored value (the result of the
3155 // select). If we don't, and the store were to be removed by another pass
3156 // (e.g. DSE), then we'd eventually end up emitting a location describing
3157 // the conditional value, unconditionally.
3158 //
3159 // === Before this transformation ===
3160 // pred:
3161 // store %one, %x.dest, !DIAssignID !1
3162 // dbg.assign %one, "x", ..., !1, ...
3163 // br %cond if.then
3164 //
3165 // if.then:
3166 // store %two, %x.dest, !DIAssignID !2
3167 // dbg.assign %two, "x", ..., !2, ...
3168 //
3169 // === After this transformation ===
3170 // pred:
3171 // store %one, %x.dest, !DIAssignID !1
3172 // dbg.assign %one, "x", ..., !1
3173 /// ...
3174 // %merge = select %cond, %two, %one
3175 // store %merge, %x.dest, !DIAssignID !2
3176 // dbg.assign %merge, "x", ..., !2
3177 auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3178 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3179 DbgAssign->replaceVariableLocationOp(OrigV, S);
3180 };
3181 for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3182 for_each(at::getDPVAssignmentMarkers(SpeculatedStore), replaceVariable);
3183 }
3184
3185 // Metadata can be dependent on the condition we are hoisting above.
3186 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3187 // to avoid making it appear as if the condition is a constant, which would
3188 // be misleading while debugging.
3189 // Similarly strip attributes that maybe dependent on condition we are
3190 // hoisting above.
3191 for (auto &I : make_early_inc_range(*ThenBB)) {
3192 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3193 // Don't update the DILocation of dbg.assign intrinsics.
3194 if (!isa<DbgAssignIntrinsic>(&I))
3195 I.setDebugLoc(DebugLoc());
3196 }
3197 I.dropUBImplyingAttrsAndMetadata();
3198
3199 // Drop ephemeral values.
3200 if (EphTracker.contains(&I)) {
3201 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3202 I.eraseFromParent();
3203 }
3204 }
3205
3206 // Hoist the instructions.
3207 // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these
3208 // instructions, in the same way that dbg.value intrinsics are dropped at the
3209 // end of this block.
3210 for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3211 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3212 // Drop all records except assign-kind DPValues (dbg.assign equivalent).
3213 if (DPValue *DPV = dyn_cast<DPValue>(&DR); !DPV || !DPV->isDbgAssign())
3214 It.dropOneDbgRecord(&DR);
3215 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3216 std::prev(ThenBB->end()));
3217
3218 // Insert selects and rewrite the PHI operands.
3219 IRBuilder<NoFolder> Builder(BI);
3220 for (PHINode &PN : EndBB->phis()) {
3221 unsigned OrigI = PN.getBasicBlockIndex(BB);
3222 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3223 Value *OrigV = PN.getIncomingValue(OrigI);
3224 Value *ThenV = PN.getIncomingValue(ThenI);
3225
3226 // Skip PHIs which are trivial.
3227 if (OrigV == ThenV)
3228 continue;
3229
3230 // Create a select whose true value is the speculatively executed value and
3231 // false value is the pre-existing value. Swap them if the branch
3232 // destinations were inverted.
3233 Value *TrueV = ThenV, *FalseV = OrigV;
3234 if (Invert)
3235 std::swap(TrueV, FalseV);
3236 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3237 PN.setIncomingValue(OrigI, V);
3238 PN.setIncomingValue(ThenI, V);
3239 }
3240
3241 // Remove speculated dbg intrinsics.
3242 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3243 // dbg value for the different flows and inserting it after the select.
3244 for (Instruction *I : SpeculatedDbgIntrinsics) {
3245 // We still want to know that an assignment took place so don't remove
3246 // dbg.assign intrinsics.
3247 if (!isa<DbgAssignIntrinsic>(I))
3248 I->eraseFromParent();
3249 }
3250
3251 ++NumSpeculations;
3252 return true;
3253}
3254
3255/// Return true if we can thread a branch across this block.
3257 int Size = 0;
3258 EphemeralValueTracker EphTracker;
3259
3260 // Walk the loop in reverse so that we can identify ephemeral values properly
3261 // (values only feeding assumes).
3262 for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3263 // Can't fold blocks that contain noduplicate or convergent calls.
3264 if (CallInst *CI = dyn_cast<CallInst>(&I))
3265 if (CI->cannotDuplicate() || CI->isConvergent())
3266 return false;
3267
3268 // Ignore ephemeral values which are deleted during codegen.
3269 // We will delete Phis while threading, so Phis should not be accounted in
3270 // block's size.
3271 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3272 if (Size++ > MaxSmallBlockSize)
3273 return false; // Don't clone large BB's.
3274 }
3275
3276 // We can only support instructions that do not define values that are
3277 // live outside of the current basic block.
3278 for (User *U : I.users()) {
3279 Instruction *UI = cast<Instruction>(U);
3280 if (UI->getParent() != BB || isa<PHINode>(UI))
3281 return false;
3282 }
3283
3284 // Looks ok, continue checking.
3285 }
3286
3287 return true;
3288}
3289
3291 BasicBlock *To) {
3292 // Don't look past the block defining the value, we might get the value from
3293 // a previous loop iteration.
3294 auto *I = dyn_cast<Instruction>(V);
3295 if (I && I->getParent() == To)
3296 return nullptr;
3297
3298 // We know the value if the From block branches on it.
3299 auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3300 if (BI && BI->isConditional() && BI->getCondition() == V &&
3301 BI->getSuccessor(0) != BI->getSuccessor(1))
3302 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3304
3305 return nullptr;
3306}
3307
3308/// If we have a conditional branch on something for which we know the constant
3309/// value in predecessors (e.g. a phi node in the current block), thread edges
3310/// from the predecessor to their ultimate destination.
3311static std::optional<bool>
3313 const DataLayout &DL,
3314 AssumptionCache *AC) {
3316 BasicBlock *BB = BI->getParent();
3317 Value *Cond = BI->getCondition();
3318 PHINode *PN = dyn_cast<PHINode>(Cond);
3319 if (PN && PN->getParent() == BB) {
3320 // Degenerate case of a single entry PHI.
3321 if (PN->getNumIncomingValues() == 1) {
3323 return true;
3324 }
3325
3326 for (Use &U : PN->incoming_values())
3327 if (auto *CB = dyn_cast<ConstantInt>(U))
3328 KnownValues[CB].insert(PN->getIncomingBlock(U));
3329 } else {
3330 for (BasicBlock *Pred : predecessors(BB)) {
3331 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3332 KnownValues[CB].insert(Pred);
3333 }
3334 }
3335
3336 if (KnownValues.empty())
3337 return false;
3338
3339 // Now we know that this block has multiple preds and two succs.
3340 // Check that the block is small enough and values defined in the block are
3341 // not used outside of it.
3343 return false;
3344
3345 for (const auto &Pair : KnownValues) {
3346 // Okay, we now know that all edges from PredBB should be revectored to
3347 // branch to RealDest.
3348 ConstantInt *CB = Pair.first;
3349 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3350 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3351
3352 if (RealDest == BB)
3353 continue; // Skip self loops.
3354
3355 // Skip if the predecessor's terminator is an indirect branch.
3356 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3357 return isa<IndirectBrInst>(PredBB->getTerminator());
3358 }))
3359 continue;
3360
3361 LLVM_DEBUG({
3362 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3363 << " has value " << *Pair.first << " in predecessors:\n";
3364 for (const BasicBlock *PredBB : Pair.second)
3365 dbgs() << " " << PredBB->getName() << "\n";
3366 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3367 });
3368
3369 // Split the predecessors we are threading into a new edge block. We'll
3370 // clone the instructions into this block, and then redirect it to RealDest.
3371 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3372
3373 // TODO: These just exist to reduce test diff, we can drop them if we like.
3374 EdgeBB->setName(RealDest->getName() + ".critedge");
3375 EdgeBB->moveBefore(RealDest);
3376
3377 // Update PHI nodes.
3378 AddPredecessorToBlock(RealDest, EdgeBB, BB);
3379
3380 // BB may have instructions that are being threaded over. Clone these
3381 // instructions into EdgeBB. We know that there will be no uses of the
3382 // cloned instructions outside of EdgeBB.
3383 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3384 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3385 TranslateMap[Cond] = CB;
3386
3387 // RemoveDIs: track instructions that we optimise away while folding, so
3388 // that we can copy DPValues from them later.
3389 BasicBlock::iterator SrcDbgCursor = BB->begin();
3390 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3391 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3392 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3393 continue;
3394 }
3395 // Clone the instruction.
3396 Instruction *N = BBI->clone();
3397 // Insert the new instruction into its new home.
3398 N->insertInto(EdgeBB, InsertPt);
3399
3400 if (BBI->hasName())
3401 N->setName(BBI->getName() + ".c");
3402
3403 // Update operands due to translation.
3404 for (Use &Op : N->operands()) {
3405 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3406 if (PI != TranslateMap.end())
3407 Op = PI->second;
3408 }
3409
3410 // Check for trivial simplification.
3411 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3412 if (!BBI->use_empty())
3413 TranslateMap[&*BBI] = V;
3414 if (!N->mayHaveSideEffects()) {
3415 N->eraseFromParent(); // Instruction folded away, don't need actual
3416 // inst
3417 N = nullptr;
3418 }
3419 } else {
3420 if (!BBI->use_empty())
3421 TranslateMap[&*BBI] = N;
3422 }
3423 if (N) {
3424 // Copy all debug-info attached to instructions from the last we
3425 // successfully clone, up to this instruction (they might have been
3426 // folded away).
3427 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3428 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3429 SrcDbgCursor = std::next(BBI);
3430 // Clone debug-info on this instruction too.
3431 N->cloneDebugInfoFrom(&*BBI);
3432
3433 // Register the new instruction with the assumption cache if necessary.
3434 if (auto *Assume = dyn_cast<AssumeInst>(N))
3435 if (AC)
3436 AC->registerAssumption(Assume);
3437 }
3438 }
3439
3440 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3441 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3442 InsertPt->cloneDebugInfoFrom(BI);
3443
3444 BB->removePredecessor(EdgeBB);
3445 BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3446 EdgeBI->setSuccessor(0, RealDest);
3447 EdgeBI->setDebugLoc(BI->getDebugLoc());
3448
3449 if (DTU) {
3451 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3452 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3453 DTU->applyUpdates(Updates);
3454 }
3455
3456 // For simplicity, we created a separate basic block for the edge. Merge
3457 // it back into the predecessor if possible. This not only avoids
3458 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3459 // bypass the check for trivial cycles above.
3460 MergeBlockIntoPredecessor(EdgeBB, DTU);
3461
3462 // Signal repeat, simplifying any other constants.
3463 return std::nullopt;
3464 }
3465
3466 return false;
3467}
3468
3470 DomTreeUpdater *DTU,
3471 const DataLayout &DL,
3472 AssumptionCache *AC) {
3473 std::optional<bool> Result;
3474 bool EverChanged = false;
3475 do {
3476 // Note that None means "we changed things, but recurse further."
3477 Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3478 EverChanged |= Result == std::nullopt || *Result;
3479 } while (Result == std::nullopt);
3480 return EverChanged;
3481}
3482
3483/// Given a BB that starts with the specified two-entry PHI node,
3484/// see if we can eliminate it.
3486 DomTreeUpdater *DTU, const DataLayout &DL) {
3487 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3488 // statement", which has a very simple dominance structure. Basically, we
3489 // are trying to find the condition that is being branched on, which
3490 // subsequently causes this merge to happen. We really want control
3491 // dependence information for this check, but simplifycfg can't keep it up
3492 // to date, and this catches most of the cases we care about anyway.
3493 BasicBlock *BB = PN->getParent();
3494
3495 BasicBlock *IfTrue, *IfFalse;
3496 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3497 if (!DomBI)
3498 return false;
3499 Value *IfCond = DomBI->getCondition();
3500 // Don't bother if the branch will be constant folded trivially.
3501 if (isa<ConstantInt>(IfCond))
3502 return false;
3503
3504 BasicBlock *DomBlock = DomBI->getParent();
3507 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3508 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3509 });
3510 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3511 "Will have either one or two blocks to speculate.");
3512
3513 // If the branch is non-unpredictable, see if we either predictably jump to
3514 // the merge bb (if we have only a single 'then' block), or if we predictably
3515 // jump to one specific 'then' block (if we have two of them).
3516 // It isn't beneficial to speculatively execute the code
3517 // from the block that we know is predictably not entered.
3518 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3519 uint64_t TWeight, FWeight;
3520 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3521 (TWeight + FWeight) != 0) {
3522 BranchProbability BITrueProb =
3523 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3525 BranchProbability BIFalseProb = BITrueProb.getCompl();
3526 if (IfBlocks.size() == 1) {
3527 BranchProbability BIBBProb =
3528 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3529 if (BIBBProb >= Likely)
3530 return false;
3531 } else {
3532 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3533 return false;
3534 }
3535 }
3536 }
3537
3538 // Don't try to fold an unreachable block. For example, the phi node itself
3539 // can't be the candidate if-condition for a select that we want to form.
3540 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3541 if (IfCondPhiInst->getParent() == BB)
3542 return false;
3543
3544 // Okay, we found that we can merge this two-entry phi node into a select.
3545 // Doing so would require us to fold *all* two entry phi nodes in this block.
3546 // At some point this becomes non-profitable (particularly if the target
3547 // doesn't support cmov's). Only do this transformation if there are two or
3548 // fewer PHI nodes in this block.
3549 unsigned NumPhis = 0;
3550 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3551 if (NumPhis > 2)
3552 return false;
3553
3554 // Loop over the PHI's seeing if we can promote them all to select
3555 // instructions. While we are at it, keep track of the instructions
3556 // that need to be moved to the dominating block.
3557 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3559 InstructionCost Budget =
3561
3562 bool Changed = false;
3563 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3564 PHINode *PN = cast<PHINode>(II++);
3565 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3566 PN->replaceAllUsesWith(V);
3567 PN->eraseFromParent();
3568 Changed = true;
3569 continue;
3570 }
3571
3572 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3573 Cost, Budget, TTI) ||
3574 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3575 Cost, Budget, TTI))
3576 return Changed;
3577 }
3578
3579 // If we folded the first phi, PN dangles at this point. Refresh it. If
3580 // we ran out of PHIs then we simplified them all.
3581 PN = dyn_cast<PHINode>(BB->begin());
3582 if (!PN)
3583 return true;
3584
3585 // Return true if at least one of these is a 'not', and another is either
3586 // a 'not' too, or a constant.
3587 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3588 if (!match(V0, m_Not(m_Value())))
3589 std::swap(V0, V1);
3590 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3591 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3592 };
3593
3594 // Don't fold i1 branches on PHIs which contain binary operators or
3595 // (possibly inverted) select form of or/ands, unless one of
3596 // the incoming values is an 'not' and another one is freely invertible.
3597 // These can often be turned into switches and other things.
3598 auto IsBinOpOrAnd = [](Value *V) {
3599 return match(
3600 V, m_CombineOr(
3601 m_BinOp(),
3604 };
3605 if (PN->getType()->isIntegerTy(1) &&
3606 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3607 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3608 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3609 PN->getIncomingValue(1)))
3610 return Changed;
3611
3612 // If all PHI nodes are promotable, check to make sure that all instructions
3613 // in the predecessor blocks can be promoted as well. If not, we won't be able
3614 // to get rid of the control flow, so it's not worth promoting to select
3615 // instructions.
3616 for (BasicBlock *IfBlock : IfBlocks)
3617 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3618 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3619 // This is not an aggressive instruction that we can promote.
3620 // Because of this, we won't be able to get rid of the control flow, so
3621 // the xform is not worth it.
3622 return Changed;
3623 }
3624
3625 // If either of the blocks has it's address taken, we can't do this fold.
3626 if (any_of(IfBlocks,
3627 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3628 return Changed;
3629
3630 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3631 << " T: " << IfTrue->getName()
3632 << " F: " << IfFalse->getName() << "\n");
3633
3634 // If we can still promote the PHI nodes after this gauntlet of tests,
3635 // do all of the PHI's now.
3636
3637 // Move all 'aggressive' instructions, which are defined in the
3638 // conditional parts of the if's up to the dominating block.
3639 for (BasicBlock *IfBlock : IfBlocks)
3640 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3641
3642 IRBuilder<NoFolder> Builder(DomBI);
3643 // Propagate fast-math-flags from phi nodes to replacement selects.
3644 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3645 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3646 if (isa<FPMathOperator>(PN))
3647 Builder.setFastMathFlags(PN->getFastMathFlags());
3648
3649 // Change the PHI node into a select instruction.
3650 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3651 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3652
3653 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3654 PN->replaceAllUsesWith(Sel);
3655 Sel->takeName(PN);
3656 PN->eraseFromParent();
3657 }
3658
3659 // At this point, all IfBlocks are empty, so our if statement
3660 // has been flattened. Change DomBlock to jump directly to our new block to
3661 // avoid other simplifycfg's kicking in on the diamond.
3662 Builder.CreateBr(BB);
3663
3665 if (DTU) {
3666 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3667 for (auto *Successor : successors(DomBlock))
3668 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3669 }
3670
3671 DomBI->eraseFromParent();
3672 if (DTU)
3673 DTU->applyUpdates(Updates);
3674
3675 return true;
3676}
3677
3679 Instruction::BinaryOps Opc, Value *LHS,
3680 Value *RHS, const Twine &Name = "") {
3681 // Try to relax logical op to binary op.
3682 if (impliesPoison(RHS, LHS))
3683 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3684 if (Opc == Instruction::And)
3685 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3686 if (Opc == Instruction::Or)
3687 return Builder.CreateLogicalOr(LHS, RHS, Name);
3688 llvm_unreachable("Invalid logical opcode");
3689}
3690
3691/// Return true if either PBI or BI has branch weight available, and store
3692/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3693/// not have branch weight, use 1:1 as its weight.
3695 uint64_t &PredTrueWeight,
3696 uint64_t &PredFalseWeight,
3697 uint64_t &SuccTrueWeight,
3698 uint64_t &SuccFalseWeight) {
3699 bool PredHasWeights =
3700 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3701 bool SuccHasWeights =
3702 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3703 if (PredHasWeights || SuccHasWeights) {
3704 if (!PredHasWeights)
3705 PredTrueWeight = PredFalseWeight = 1;
3706 if (!SuccHasWeights)
3707 SuccTrueWeight = SuccFalseWeight = 1;
3708 return true;
3709 } else {
3710 return false;
3711 }
3712}
3713
3714/// Determine if the two branches share a common destination and deduce a glue
3715/// that joins the branches' conditions to arrive at the common destination if
3716/// that would be profitable.
3717static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3719 const TargetTransformInfo *TTI) {
3720 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3721 "Both blocks must end with a conditional branches.");
3723 "PredBB must be a predecessor of BB.");
3724
3725 // We have the potential to fold the conditions together, but if the
3726 // predecessor branch is predictable, we may not want to merge them.
3727 uint64_t PTWeight, PFWeight;
3728 BranchProbability PBITrueProb, Likely;
3729 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3730 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3731 (PTWeight + PFWeight) != 0) {
3732 PBITrueProb =
3733 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3735 }
3736
3737 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3738 // Speculate the 2nd condition unless the 1st is probably true.
3739 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3740 return {{BI->getSuccessor(0), Instruction::Or, false}};
3741 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3742 // Speculate the 2nd condition unless the 1st is probably false.
3743 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3744 return {{BI->getSuccessor(1), Instruction::And, false}};
3745 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3746 // Speculate the 2nd condition unless the 1st is probably true.
3747 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3748 return {{BI->getSuccessor(1), Instruction::And, true}};
3749 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3750 // Speculate the 2nd condition unless the 1st is probably false.
3751 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3752 return {{BI->getSuccessor(0), Instruction::Or, true}};
3753 }
3754 return std::nullopt;
3755}
3756
3758 DomTreeUpdater *DTU,
3759 MemorySSAUpdater *MSSAU,
3760 const TargetTransformInfo *TTI) {
3761 BasicBlock *BB = BI->getParent();
3762 BasicBlock *PredBlock = PBI->getParent();
3763
3764 // Determine if the two branches share a common destination.
3765 BasicBlock *CommonSucc;
3767 bool InvertPredCond;
3768 std::tie(CommonSucc, Opc, InvertPredCond) =
3770
3771 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3772
3773 IRBuilder<> Builder(PBI);
3774 // The builder is used to create instructions to eliminate the branch in BB.
3775 // If BB's terminator has !annotation metadata, add it to the new
3776 // instructions.
3778 {LLVMContext::MD_annotation});
3779
3780 // If we need to invert the condition in the pred block to match, do so now.
3781 if (InvertPredCond) {
3782 InvertBranch(PBI, Builder);
3783 }
3784
3785 BasicBlock *UniqueSucc =
3786 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3787
3788 // Before cloning instructions, notify the successor basic block that it
3789 // is about to have a new predecessor. This will update PHI nodes,
3790 // which will allow us to update live-out uses of bonus instructions.
3791 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3792
3793 // Try to update branch weights.
3794 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3795 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3796 SuccTrueWeight, SuccFalseWeight)) {
3797 SmallVector<uint64_t, 8> NewWeights;
3798
3799 if (PBI->getSuccessor(0) == BB) {
3800 // PBI: br i1 %x, BB, FalseDest
3801 // BI: br i1 %y, UniqueSucc, FalseDest
3802 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3803 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3804 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3805 // TrueWeight for PBI * FalseWeight for BI.
3806 // We assume that total weights of a BranchInst can fit into 32 bits.
3807 // Therefore, we will not have overflow using 64-bit arithmetic.
3808 NewWeights.push_back(PredFalseWeight *
3809 (SuccFalseWeight + SuccTrueWeight) +
3810 PredTrueWeight * SuccFalseWeight);
3811 } else {
3812 // PBI: br i1 %x, TrueDest, BB
3813 // BI: br i1 %y, TrueDest, UniqueSucc
3814 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3815 // FalseWeight for PBI * TrueWeight for BI.
3816 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3817 PredFalseWeight * SuccTrueWeight);
3818 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3819 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3820 }
3821
3822 // Halve the weights if any of them cannot fit in an uint32_t
3823 FitWeights(NewWeights);
3824
3825 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3826 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3827
3828 // TODO: If BB is reachable from all paths through PredBlock, then we
3829 // could replace PBI's branch probabilities with BI's.
3830 } else
3831 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3832
3833 // Now, update the CFG.
3834 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3835
3836 if (DTU)
3837 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3838 {DominatorTree::Delete, PredBlock, BB}});
3839
3840 // If BI was a loop latch, it may have had associated loop metadata.
3841 // We need to copy it to the new latch, that is, PBI.
3842 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3843 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3844
3845 ValueToValueMapTy VMap; // maps original values to cloned values
3847
3848 Module *M = BB->getModule();
3849
3850 if (PredBlock->IsNewDbgInfoFormat) {
3851 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3852 for (DPValue &DPV :
3854 RemapDPValue(M, &DPV, VMap,
3856 }
3857 }
3858
3859 // Now that the Cond was cloned into the predecessor basic block,
3860 // or/and the two conditions together.
3861 Value *BICond = VMap[BI->getCondition()];
3862 PBI->setCondition(
3863 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3864
3865 ++NumFoldBranchToCommonDest;
3866 return true;
3867}
3868
3869/// Return if an instruction's type or any of its operands' types are a vector
3870/// type.
3871static bool isVectorOp(Instruction &I) {
3872 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3873 return U->getType()->isVectorTy();
3874 });
3875}
3876
3877/// If this basic block is simple enough, and if a predecessor branches to us
3878/// and one of our successors, fold the block into the predecessor and use
3879/// logical operations to pick the right destination.
3881 MemorySSAUpdater *MSSAU,
3882 const TargetTransformInfo *TTI,
3883 unsigned BonusInstThreshold) {
3884 // If this block ends with an unconditional branch,
3885 // let SpeculativelyExecuteBB() deal with it.
3886 if (!BI->isConditional())
3887 return false;
3888
3889 BasicBlock *BB = BI->getParent();
3893
3894 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3895
3896 if (!Cond ||
3897 (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3898 !isa<SelectInst>(Cond)) ||
3899 Cond->getParent() != BB || !Cond->hasOneUse())
3900 return false;
3901
3902 // Finally, don't infinitely unroll conditional loops.
3903 if (is_contained(successors(BB), BB))
3904 return false;
3905
3906 // With which predecessors will we want to deal with?
3908 for (BasicBlock *PredBlock : predecessors(BB)) {
3909 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3910
3911 // Check that we have two conditional branches. If there is a PHI node in
3912 // the common successor, verify that the same value flows in from both
3913 // blocks.
3914 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3915 continue;
3916
3917 // Determine if the two branches share a common destination.
3918 BasicBlock *CommonSucc;
3920 bool InvertPredCond;
3921 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3922 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3923 else
3924 continue;
3925
3926 // Check the cost of inserting the necessary logic before performing the
3927 // transformation.
3928 if (TTI) {
3929 Type *Ty = BI->getCondition()->getType();
3931 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3932 !isa<CmpInst>(PBI->getCondition())))
3933 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3934
3936 continue;
3937 }
3938
3939 // Ok, we do want to deal with this predecessor. Record it.
3940 Preds.emplace_back(PredBlock);
3941 }
3942
3943 // If there aren't any predecessors into which we can fold,
3944 // don't bother checking the cost.
3945 if (Preds.empty())
3946 return false;
3947
3948 // Only allow this transformation if computing the condition doesn't involve
3949 // too many instructions and these involved instructions can be executed
3950 // unconditionally. We denote all involved instructions except the condition
3951 // as "bonus instructions", and only allow this transformation when the
3952 // number of the bonus instructions we'll need to create when cloning into
3953 // each predecessor does not exceed a certain threshold.
3954 unsigned NumBonusInsts = 0;
3955 bool SawVectorOp = false;
3956 const unsigned PredCount = Preds.size();
3957 for (Instruction &I : *BB) {
3958 // Don't check the branch condition comparison itself.
3959 if (&I == Cond)
3960 continue;
3961 // Ignore dbg intrinsics, and the terminator.
3962 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3963 continue;
3964 // I must be safe to execute unconditionally.
3966 return false;
3967 SawVectorOp |= isVectorOp(I);
3968
3969 // Account for the cost of duplicating this instruction into each
3970 // predecessor. Ignore free instructions.
3971 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3973 NumBonusInsts += PredCount;
3974
3975 // Early exits once we reach the limit.
3976 if (NumBonusInsts >
3977 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3978 return false;
3979 }
3980
3981 auto IsBCSSAUse = [BB, &I](Use &U) {
3982 auto *UI = cast<Instruction>(U.getUser());
3983 if (auto *PN = dyn_cast<PHINode>(UI))
3984 return PN->getIncomingBlock(U) == BB;
3985 return UI->getParent() == BB && I.comesBefore(UI);
3986 };
3987
3988 // Does this instruction require rewriting of uses?
3989 if (!all_of(I.uses(), IsBCSSAUse))
3990 return false;
3991 }
3992 if (NumBonusInsts >
3993 BonusInstThreshold *
3994 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3995 return false;
3996
3997 // Ok, we have the budget. Perform the transformation.
3998 for (BasicBlock *PredBlock : Preds) {
3999 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
4000 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4001 }
4002 return false;
4003}
4004
4005// If there is only one store in BB1 and BB2, return it, otherwise return
4006// nullptr.
4008 StoreInst *S = nullptr;
4009 for (auto *BB : {BB1, BB2}) {
4010 if (!BB)
4011 continue;
4012 for (auto &I : *BB)
4013 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4014 if (S)
4015 // Multiple stores seen.
4016 return nullptr;
4017 else
4018 S = SI;
4019 }
4020 }
4021 return S;
4022}
4023
4025 Value *AlternativeV = nullptr) {
4026 // PHI is going to be a PHI node that allows the value V that is defined in
4027 // BB to be referenced in BB's only successor.
4028 //
4029 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4030 // doesn't matter to us what the other operand is (it'll never get used). We
4031 // could just create a new PHI with an undef incoming value, but that could
4032 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4033 // other PHI. So here we directly look for some PHI in BB's successor with V
4034 // as an incoming operand. If we find one, we use it, else we create a new
4035 // one.
4036 //
4037 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4038 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4039 // where OtherBB is the single other predecessor of BB's only successor.
4040 PHINode *PHI = nullptr;
4041 BasicBlock *Succ = BB->getSingleSuccessor();
4042
4043 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4044 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4045 PHI = cast<PHINode>(I);
4046 if (!AlternativeV)
4047 break;
4048
4049 assert(Succ->hasNPredecessors(2));
4050 auto PredI = pred_begin(Succ);
4051 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4052 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4053 break;
4054 PHI = nullptr;
4055 }
4056 if (PHI)
4057 return PHI;
4058
4059 // If V is not an instruction defined in BB, just return it.
4060 if (!AlternativeV &&
4061 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4062 return V;
4063
4064 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4065 PHI->insertBefore(Succ->begin());
4066 PHI->addIncoming(V, BB);
4067 for (BasicBlock *PredBB : predecessors(Succ))
4068 if (PredBB != BB)
4069 PHI->addIncoming(
4070 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4071 return PHI;
4072}
4073
4075 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4076 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4077 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4078 // For every pointer, there must be exactly two stores, one coming from
4079 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4080 // store (to any address) in PTB,PFB or QTB,QFB.
4081 // FIXME: We could relax this restriction with a bit more work and performance
4082 // testing.
4083 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4084 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4085 if (!PStore || !QStore)
4086 return false;
4087
4088 // Now check the stores are compatible.
4089 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4090 PStore->getValueOperand()->getType() !=
4091 QStore->getValueOperand()->getType())
4092 return false;
4093
4094 // Check that sinking the store won't cause program behavior changes. Sinking
4095 // the store out of the Q blocks won't change any behavior as we're sinking
4096 // from a block to its unconditional successor. But we're moving a store from
4097 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4098 // So we need to check that there are no aliasing loads or stores in
4099 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4100 // operations between PStore and the end of its parent block.
4101 //
4102 // The ideal way to do this is to query AliasAnalysis, but we don't
4103 // preserve AA currently so that is dangerous. Be super safe and just
4104 // check there are no other memory operations at all.
4105 for (auto &I : *QFB->getSinglePredecessor())
4106 if (I.mayReadOrWriteMemory())
4107 return false;
4108 for (auto &I : *QFB)
4109 if (&I != QStore && I.mayReadOrWriteMemory())
4110 return false;
4111 if (QTB)
4112 for (auto &I : *QTB)
4113 if (&I != QStore && I.mayReadOrWriteMemory())
4114 return false;
4115 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4116 I != E; ++I)
4117 if (&*I != PStore && I->mayReadOrWriteMemory())
4118 return false;
4119
4120 // If we're not in aggressive mode, we only optimize if we have some
4121 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4122 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4123 if (!BB)
4124 return true;
4125 // Heuristic: if the block can be if-converted/phi-folded and the
4126 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4127 // thread this store.
4129 InstructionCost Budget =
4131 for (auto &I : BB->instructionsWithoutDebug(false)) {
4132 // Consider terminator instruction to be free.
4133 if (I.isTerminator())
4134 continue;
4135 // If this is one the stores that we want to speculate out of this BB,
4136 // then don't count it's cost, consider it to be free.
4137 if (auto *S = dyn_cast<StoreInst>(&I))
4138 if (llvm::find(FreeStores, S))
4139 continue;
4140 // Else, we have a white-list of instructions that we are ak speculating.
4141 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4142 return false; // Not in white-list - not worthwhile folding.
4143 // And finally, if this is a non-free instruction that we are okay
4144 // speculating, ensure that we consider the speculation budget.
4145 Cost +=
4147 if (Cost > Budget)
4148 return false; // Eagerly refuse to fold as soon as we're out of budget.
4149 }
4150 assert(Cost <= Budget &&
4151 "When we run out of budget we will eagerly return from within the "
4152 "per-instruction loop.");
4153 return true;
4154 };
4155
4156 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4158 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4159 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4160 return false;
4161
4162 // If PostBB has more than two predecessors, we need to split it so we can
4163 // sink the store.
4164 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4165 // We know that QFB's only successor is PostBB. And QFB has a single
4166 // predecessor. If QTB exists, then its only successor is also PostBB.
4167 // If QTB does not exist, then QFB's only predecessor has a conditional
4168 // branch to QFB and PostBB.
4169 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4170 BasicBlock *NewBB =
4171 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4172 if (!NewBB)
4173 return false;
4174 PostBB = NewBB;
4175 }
4176
4177 // OK, we're going to sink the stores to PostBB. The store has to be
4178 // conditional though, so first create the predicate.
4179 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4180 ->getCondition();
4181 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4182 ->getCondition();
4183
4185 PStore->getParent());
4187 QStore->getParent(), PPHI);
4188
4189 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4190 IRBuilder<> QB(PostBB, PostBBFirst);
4191 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4192
4193 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4194 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4195
4196 if (InvertPCond)
4197 PPred = QB.CreateNot(PPred);
4198 if (InvertQCond)
4199 QPred = QB.CreateNot(QPred);
4200 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4201
4202 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4203 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4204 /*Unreachable=*/false,
4205 /*BranchWeights=*/nullptr, DTU);
4206
4207 QB.SetInsertPoint(T);
4208 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4209 SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4210 // Choose the minimum alignment. If we could prove both stores execute, we
4211 // could use biggest one. In this case, though, we only know that one of the
4212 // stores executes. And we don't know it's safe to take the alignment from a
4213 // store that doesn't execute.
4214 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4215
4216 QStore->eraseFromParent();
4217 PStore->eraseFromParent();
4218
4219 return true;
4220}
4221
4223 DomTreeUpdater *DTU, const DataLayout &DL,
4224 const TargetTransformInfo &TTI) {
4225 // The intention here is to find diamonds or triangles (see below) where each
4226 // conditional block contains a store to the same address. Both of these
4227 // stores are conditional, so they can't be unconditionally sunk. But it may
4228 // be profitable to speculatively sink the stores into one merged store at the
4229 // end, and predicate the merged store on the union of the two conditions of
4230 // PBI and QBI.
4231 //
4232 // This can reduce the number of stores executed if both of the conditions are
4233 // true, and can allow the blocks to become small enough to be if-converted.
4234 // This optimization will also chain, so that ladders of test-and-set
4235 // sequences can be if-converted away.
4236 //
4237 // We only deal with simple diamonds or triangles:
4238 //
4239 // PBI or PBI or a combination of the two
4240 // / \ | \
4241 // PTB PFB | PFB
4242 // \ / | /
4243 // QBI QBI
4244 // / \ | \
4245 // QTB QFB | QFB
4246 // \ / | /
4247 // PostBB PostBB
4248 //
4249 // We model triangles as a type of diamond with a nullptr "true" block.
4250 // Triangles are canonicalized so that the fallthrough edge is represented by
4251 // a true condition, as in the diagram above.
4252 BasicBlock *PTB = PBI->getSuccessor(0);
4253 BasicBlock *PFB = PBI->getSuccessor(1);
4254 BasicBlock *QTB = QBI->getSuccessor(0);
4255 BasicBlock *QFB = QBI->getSuccessor(1);
4256 BasicBlock *PostBB = QFB->getSingleSuccessor();
4257
4258 // Make sure we have a good guess for PostBB. If QTB's only successor is
4259 // QFB, then QFB is a better PostBB.
4260 if (QTB->getSingleSuccessor() == QFB)
4261 PostBB = QFB;
4262
4263 // If we couldn't find a good PostBB, stop.
4264 if (!PostBB)
4265 return false;
4266
4267 bool InvertPCond = false, InvertQCond = false;
4268 // Canonicalize fallthroughs to the true branches.
4269 if (PFB == QBI->getParent()) {
4270 std::swap(PFB, PTB);
4271 InvertPCond = true;
4272 }
4273 if (QFB == PostBB) {
4274 std::swap(QFB, QTB);
4275 InvertQCond = true;
4276 }
4277
4278 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4279 // and QFB may not. Model fallthroughs as a nullptr block.
4280 if (PTB == QBI->getParent())
4281 PTB = nullptr;
4282 if (QTB == PostBB)
4283 QTB = nullptr;
4284
4285 // Legality bailouts. We must have at least the non-fallthrough blocks and
4286 // the post-dominating block, and the non-fallthroughs must only have one
4287 // predecessor.
4288 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4289 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4290 };
4291 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4292 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4293 return false;
4294 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4295 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4296 return false;
4297 if (!QBI->getParent()->hasNUses(2))
4298 return false;
4299
4300 // OK, this is a sequence of two diamonds or triangles.
4301 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4302 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4303 for (auto *BB : {PTB, PFB}) {
4304 if (!BB)
4305 continue;
4306 for (auto &I : *BB)
4307 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4308 PStoreAddresses.insert(SI->getPointerOperand());
4309 }
4310 for (auto *BB : {QTB, QFB}) {
4311 if (!BB)
4312 continue;
4313 for (auto &I : *BB)
4314 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4315 QStoreAddresses.insert(SI->getPointerOperand());
4316 }
4317
4318 set_intersect(PStoreAddresses, QStoreAddresses);
4319 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4320 // clear what it contains.
4321 auto &CommonAddresses = PStoreAddresses;
4322
4323 bool Changed = false;
4324 for (auto *Address : CommonAddresses)
4325 Changed |=
4326 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4327 InvertPCond, InvertQCond, DTU, DL, TTI);
4328 return Changed;
4329}
4330
4331/// If the previous block ended with a widenable branch, determine if reusing
4332/// the target block is profitable and legal. This will have the effect of
4333/// "widening" PBI, but doesn't require us to reason about hosting safety.
4335 DomTreeUpdater *DTU) {
4336 // TODO: This can be generalized in two important ways:
4337 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4338 // values from the PBI edge.
4339 // 2) We can sink side effecting instructions into BI's fallthrough
4340 // successor provided they doesn't contribute to computation of
4341 // BI's condition.
4342 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4343 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4344 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4346 return false;
4347 if (!IfFalseBB->phis().empty())
4348 return false; // TODO
4349 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4350 // may undo the transform done here.
4351 // TODO: There might be a more fine-grained solution to this.
4352 if (!llvm::succ_empty(IfFalseBB))
4353 return false;
4354 // Use lambda to lazily compute expensive condition after cheap ones.
4355 auto NoSideEffects = [](BasicBlock &BB) {
4356 return llvm::none_of(BB, [](const Instruction &I) {
4357 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4358 });
4359 };
4360 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4361 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4362 NoSideEffects(*BI->getParent())) {
4363 auto *OldSuccessor = BI->getSuccessor(1);
4364 OldSuccessor->removePredecessor(BI->getParent());
4365 BI->setSuccessor(1, IfFalseBB);
4366 if (DTU)
4367 DTU->applyUpdates(
4368 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4369 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4370 return true;
4371 }
4372 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4373 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4374 NoSideEffects(*BI->getParent())) {
4375 auto *OldSuccessor = BI->getSuccessor(0);
4376 OldSuccessor->removePredecessor(BI->getParent());
4377 BI->setSuccessor(0, IfFalseBB);
4378 if (DTU)
4379 DTU->applyUpdates(
4380 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4381 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4382 return true;
4383 }
4384 return false;
4385}
4386
4387/// If we have a conditional branch as a predecessor of another block,
4388/// this function tries to simplify it. We know
4389/// that PBI and BI are both conditional branches, and BI is in one of the
4390/// successor blocks of PBI - PBI branches to BI.
4392 DomTreeUpdater *DTU,
4393 const DataLayout &DL,
4394 const TargetTransformInfo &TTI) {
4395 assert(PBI->isConditional() && BI->isConditional());
4396 BasicBlock *BB = BI->getParent();
4397
4398 // If this block ends with a branch instruction, and if there is a
4399 // predecessor that ends on a branch of the same condition, make
4400 // this conditional branch redundant.
4401 if (PBI->getCondition() == BI->getCondition() &&
4402 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4403 // Okay, the outcome of this conditional branch is statically
4404 // knowable. If this block had a single pred, handle specially, otherwise
4405 // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4406 if (BB->getSinglePredecessor()) {
4407 // Turn this into a branch on constant.
4408 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4409 BI->setCondition(
4410 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4411 return true; // Nuke the branch on constant.
4412 }
4413 }
4414
4415 // If the previous block ended with a widenable branch, determine if reusing
4416 // the target block is profitable and legal. This will have the effect of
4417 // "widening" PBI, but doesn't require us to reason about hosting safety.
4418 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4419 return true;
4420
4421 // If both branches are conditional and both contain stores to the same
4422 // address, remove the stores from the conditionals and create a conditional
4423 // merged store at the end.
4424 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4425 return true;
4426
4427 // If this is a conditional branch in an empty block, and if any
4428 // predecessors are a conditional branch to one of our destinations,
4429 // fold the conditions into logical ops and one cond br.
4430
4431 // Ignore dbg intrinsics.
4432 if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4433 return false;
4434
4435 int PBIOp, BIOp;
4436 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4437 PBIOp = 0;
4438 BIOp = 0;
4439 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4440 PBIOp = 0;
4441 BIOp = 1;
4442 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4443 PBIOp = 1;
4444 BIOp = 0;
4445 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4446 PBIOp = 1;
4447 BIOp = 1;
4448 } else {
4449 return false;
4450 }
4451
4452 // Check to make sure that the other destination of this branch
4453 // isn't BB itself. If so, this is an infinite loop that will
4454 // keep getting unwound.
4455 if (PBI->getSuccessor(PBIOp) == BB)
4456 return false;
4457
4458 // If predecessor's branch probability to BB is too low don't merge branches.
4459 SmallVector<uint32_t, 2> PredWeights;
4460 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4461 extractBranchWeights(*PBI, PredWeights) &&
4462 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4463
4465 PredWeights[PBIOp],
4466 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4467
4469 if (CommonDestProb >= Likely)
4470 return false;
4471 }
4472
4473 // Do not perform this transformation if it would require
4474 // insertion of a large number of select instructions. For targets
4475 // without predication/cmovs, this is a big pessimization.
4476
4477 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4478 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4479 unsigned NumPhis = 0;
4480 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4481 ++II, ++NumPhis) {
4482 if (NumPhis > 2) // Disable this xform.
4483 return false;
4484 }
4485
4486 // Finally, if everything is ok, fold the branches to logical ops.
4487 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4488
4489 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4490 << "AND: " << *BI->getParent());
4491
4493
4494 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4495 // branch in it, where one edge (OtherDest) goes back to itself but the other
4496 // exits. We don't *know* that the program avoids the infinite loop
4497 // (even though that seems likely). If we do this xform naively, we'll end up
4498 // recursively unpeeling the loop. Since we know that (after the xform is
4499 // done) that the block *is* infinite if reached, we just make it an obviously
4500 // infinite loop with no cond branch.
4501 if (OtherDest == BB) {
4502 // Insert it at the end of the function, because it's either code,
4503 // or it won't matter if it's hot. :)
4504 BasicBlock *InfLoopBlock =
4505 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4506 BranchInst::Create(InfLoopBlock, InfLoopBlock);
4507 if (DTU)
4508 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4509 OtherDest = InfLoopBlock;
4510 }
4511
4512 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4513
4514 // BI may have other predecessors. Because of this, we leave
4515 // it alone, but modify PBI.
4516
4517 // Make sure we get to CommonDest on True&True directions.
4518 Value *PBICond = PBI->getCondition();
4519 IRBuilder<NoFolder> Builder(PBI);
4520 if (PBIOp)
4521 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4522
4523 Value *BICond = BI->getCondition();
4524 if (BIOp)
4525 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4526
4527 // Merge the conditions.
4528 Value *Cond =
4529 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4530
4531 // Modify PBI to branch on the new condition to the new dests.
4532 PBI->setCondition(Cond);
4533 PBI->setSuccessor(0, CommonDest);
4534 PBI->setSuccessor(1, OtherDest);
4535
4536 if (DTU) {
4537 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4538 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4539
4540 DTU->applyUpdates(Updates);
4541 }
4542
4543 // Update branch weight for PBI.
4544 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4545 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4546 bool HasWeights =
4547 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4548 SuccTrueWeight, SuccFalseWeight);
4549 if (HasWeights) {
4550 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4551 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4552 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4553 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4554 // The weight to CommonDest should be PredCommon * SuccTotal +
4555 // PredOther * SuccCommon.
4556 // The weight to OtherDest should be PredOther * SuccOther.
4557 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4558 PredOther * SuccCommon,
4559 PredOther * SuccOther};
4560 // Halve the weights if any of them cannot fit in an uint32_t
4561 FitWeights(NewWeights);
4562
4563 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4564 }
4565
4566 // OtherDest may have phi nodes. If so, add an entry from PBI's
4567 // block that are identical to the entries for BI's block.
4568 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4569
4570 // We know that the CommonDest already had an edge from PBI to
4571 // it. If it has PHIs though, the PHIs may have different
4572 // entries for BB and PBI's BB. If so, insert a select to make
4573 // them agree.
4574 for (PHINode &PN : CommonDest->phis()) {
4575 Value *BIV = PN.getIncomingValueForBlock(BB);
4576 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4577 Value *PBIV = PN.getIncomingValue(PBBIdx);
4578 if (BIV != PBIV) {
4579 // Insert a select in PBI to pick the right value.
4580 SelectInst *NV = cast<SelectInst>(
4581 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4582 PN.setIncomingValue(PBBIdx, NV);
4583 // Although the select has the same condition as PBI, the original branch
4584 // weights for PBI do not apply to the new select because the select's
4585 // 'logical' edges are incoming edges of the phi that is eliminated, not
4586 // the outgoing edges of PBI.
4587 if (HasWeights) {
4588 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4589 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4590 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4591 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4592 // The weight to PredCommonDest should be PredCommon * SuccTotal.
4593 // The weight to PredOtherDest should be PredOther * SuccCommon.
4594 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4595 PredOther * SuccCommon};
4596
4597 FitWeights(NewWeights);
4598
4599 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4600 }
4601 }
4602 }
4603
4604 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4605 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4606
4607 // This basic block is probably dead. We know it has at least
4608 // one fewer predecessor.
4609 return true;
4610}
4611
4612// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4613// true or to FalseBB if Cond is false.
4614// Takes care of updating the successors and removing the old terminator.
4615// Also makes sure not to introduce new successors by assuming that edges to
4616// non-successor TrueBBs and FalseBBs aren't reachable.
4617bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4618 Value *Cond, BasicBlock *TrueBB,
4619 BasicBlock *FalseBB,
4620 uint32_t TrueWeight,
4621 uint32_t FalseWeight) {
4622 auto *BB = OldTerm->getParent();
4623 // Remove any superfluous successor edges from the CFG.
4624 // First, figure out which successors to preserve.
4625 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4626 // successor.
4627 BasicBlock *KeepEdge1 = TrueBB;
4628 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4629
4630 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4631
4632 // Then remove the rest.
4633 for (BasicBlock *Succ : successors(OldTerm)) {
4634 // Make sure only to keep exactly one copy of each edge.
4635 if (Succ == KeepEdge1)
4636 KeepEdge1 = nullptr;
4637 else if (Succ == KeepEdge2)
4638 KeepEdge2 = nullptr;
4639 else {
4640 Succ->removePredecessor(BB,
4641 /*KeepOneInputPHIs=*/true);
4642
4643 if (Succ != TrueBB && Succ != FalseBB)
4644 RemovedSuccessors.insert(Succ);
4645 }
4646 }
4647
4648 IRBuilder<> Builder(OldTerm);
4649 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4650
4651 // Insert an appropriate new terminator.
4652 if (!KeepEdge1 && !KeepEdge2) {
4653 if (TrueBB == FalseBB) {
4654 // We were only looking for one successor, and it was present.
4655 // Create an unconditional branch to it.
4656 Builder.CreateBr(TrueBB);
4657 } else {
4658 // We found both of the successors we were looking for.
4659 // Create a conditional branch sharing the condition of the select.
4660 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4661 if (TrueWeight != FalseWeight)
4662 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4663 }
4664 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4665 // Neither of the selected blocks were successors, so this
4666 // terminator must be unreachable.
4667 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4668 } else {
4669 // One of the selected values was a successor, but the other wasn't.
4670 // Insert an unconditional branch to the one that was found;
4671 // the edge to the one that wasn't must be unreachable.
4672 if (!KeepEdge1) {
4673 // Only TrueBB was found.
4674 Builder.CreateBr(TrueBB);
4675 } else {
4676 // Only FalseBB was found.
4677 Builder.CreateBr(FalseBB);
4678 }
4679 }
4680
4682
4683 if (DTU) {
4685 Updates.reserve(RemovedSuccessors.size());
4686 for (auto *RemovedSuccessor : RemovedSuccessors)
4687 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4688 DTU->applyUpdates(Updates);
4689 }
4690
4691 return true;
4692}
4693
4694// Replaces
4695// (switch (select cond, X, Y)) on constant X, Y
4696// with a branch - conditional if X and Y lead to distinct BBs,
4697// unconditional otherwise.
4698bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4699 SelectInst *Select) {
4700 // Check for constant integer values in the select.
4701 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4702 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4703 if (!TrueVal || !FalseVal)
4704 return false;
4705
4706 // Find the relevant condition and destinations.
4707 Value *Condition = Select->getCondition();
4708 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4709 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4710
4711 // Get weight for TrueBB and FalseBB.
4712 uint32_t TrueWeight = 0, FalseWeight = 0;
4714 bool HasWeights = hasBranchWeightMD(*SI);
4715 if (HasWeights) {
4716 GetBranchWeights(SI, Weights);
4717 if (Weights.size() == 1 + SI->getNumCases()) {
4718 TrueWeight =
4719 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4720 FalseWeight =
4721 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4722 }
4723 }
4724
4725 // Perform the actual simplification.
4726 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4727 FalseWeight);
4728}
4729
4730// Replaces
4731// (indirectbr (select cond, blockaddress(@fn, BlockA),
4732// blockaddress(@fn, BlockB)))
4733// with
4734// (br cond, BlockA, BlockB).
4735bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4736 SelectInst *SI) {
4737 // Check that both operands of the select are block addresses.
4738 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4739 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4740 if (!TBA || !FBA)
4741 return false;
4742
4743 // Extract the actual blocks.
4744 BasicBlock *TrueBB = TBA->getBasicBlock();
4745 BasicBlock *FalseBB = FBA->getBasicBlock();
4746
4747 // Perform the actual simplification.
4748 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4749 0);
4750}
4751
4752/// This is called when we find an icmp instruction
4753/// (a seteq/setne with a constant) as the only instruction in a
4754/// block that ends with an uncond branch. We are looking for a very specific
4755/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4756/// this case, we merge the first two "or's of icmp" into a switch, but then the
4757/// default value goes to an uncond block with a seteq in it, we get something
4758/// like:
4759///
4760/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4761/// DEFAULT:
4762/// %tmp = icmp eq i8 %A, 92
4763/// br label %end
4764/// end:
4765/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4766///
4767/// We prefer to split the edge to 'end' so that there is a true/false entry to
4768/// the PHI, merging the third icmp into the switch.
4769bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4770 ICmpInst *ICI, IRBuilder<> &Builder) {
4771 BasicBlock *BB = ICI->getParent();
4772
4773 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4774 // complex.
4775 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4776 return false;
4777
4778 Value *V = ICI->getOperand(0);
4779 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4780
4781 // The pattern we're looking for is where our only predecessor is a switch on
4782 // 'V' and this block is the default case for the switch. In this case we can
4783 // fold the compared value into the switch to simplify things.
4784 BasicBlock *Pred = BB->getSinglePredecessor();
4785 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4786 return false;
4787
4788 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4789 if (SI->getCondition() != V)
4790 return false;
4791
4792 // If BB is reachable on a non-default case, then we simply know the value of
4793 // V in this block. Substitute it and constant fold the icmp instruction
4794 // away.
4795 if (SI->getDefaultDest() != BB) {
4796 ConstantInt *VVal = SI->findCaseDest(BB);
4797 assert(VVal && "Should have a unique destination value");
4798 ICI->setOperand(0, VVal);
4799
4800 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4801 ICI->replaceAllUsesWith(V);
4802 ICI->eraseFromParent();
4803 }
4804 // BB is now empty, so it is likely to simplify away.
4805 return requestResimplify();
4806 }
4807
4808 // Ok, the block is reachable from the default dest. If the constant we're
4809 // comparing exists in one of the other edges, then we can constant fold ICI
4810 // and zap it.
4811 if (SI->findCaseValue(Cst) != SI->case_default()) {
4812 Value *V;
4813 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4815 else
4817
4818 ICI->replaceAllUsesWith(V);
4819 ICI->eraseFromParent();
4820 // BB is now empty, so it is likely to simplify away.
4821 return requestResimplify();
4822 }
4823
4824 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4825 // the block.
4826 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4827 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4828 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4829 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4830 return false;
4831
4832 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4833 // true in the PHI.
4834 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4835 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4836
4837 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4838 std::swap(DefaultCst, NewCst);
4839
4840 // Replace ICI (which is used by the PHI for the default value) with true or
4841 // false depending on if it is EQ or NE.
4842 ICI->replaceAllUsesWith(DefaultCst);
4843 ICI->eraseFromParent();
4844
4846
4847 // Okay, the switch goes to this block on a default value. Add an edge from
4848 // the switch to the merge point on the compared value.
4849 BasicBlock *NewBB =
4850 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4851 {
4853 auto W0 = SIW.getSuccessorWeight(0);
4855 if (W0) {
4856 NewW = ((uint64_t(*W0) + 1) >> 1);
4857 SIW.setSuccessorWeight(0, *NewW);
4858 }
4859 SIW.addCase(Cst, NewBB, NewW);
4860 if (DTU)
4861 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4862 }
4863
4864 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4865 Builder.SetInsertPoint(NewBB);
4866 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4867 Builder.CreateBr(SuccBlock);
4868 PHIUse->addIncoming(NewCst, NewBB);
4869 if (DTU) {
4870 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4871 DTU->applyUpdates(Updates);
4872 }
4873 return true;
4874}
4875
4876/// The specified branch is a conditional branch.
4877/// Check to see if it is branching on an or/and chain of icmp instructions, and
4878/// fold it into a switch instruction if so.
4879bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4880 IRBuilder<> &Builder,
4881 const DataLayout &DL) {
4882 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4883 if (!Cond)
4884 return false;
4885
4886 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4887 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4888 // 'setne's and'ed together, collect them.
4889
4890 // Try to gather values from a chain of and/or to be turned into a switch
4891 ConstantComparesGatherer ConstantCompare(Cond, DL);
4892 // Unpack the result
4893 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4894 Value *CompVal = ConstantCompare.CompValue;
4895 unsigned UsedICmps = ConstantCompare.UsedICmps;
4896 Value *ExtraCase = ConstantCompare.Extra;
4897
4898 // If we didn't have a multiply compared value, fail.
4899 if (!CompVal)
4900 return false;
4901
4902 // Avoid turning single icmps into a switch.
4903 if (UsedICmps <= 1)
4904 return false;
4905
4906 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4907
4908 // There might be duplicate constants in the list, which the switch
4909 // instruction can't handle, remove them now.
4910 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4911 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4912
4913 // If Extra was used, we require at least two switch values to do the
4914 // transformation. A switch with one value is just a conditional branch.
4915 if (ExtraCase && Values.size() < 2)
4916 return false;
4917
4918 // TODO: Preserve branch weight metadata, similarly to how
4919 // FoldValueComparisonIntoPredecessors preserves it.
4920
4921 // Figure out which block is which destination.
4922 BasicBlock *DefaultBB = BI->getSuccessor(1);
4923 BasicBlock *EdgeBB = BI->getSuccessor(0);
4924 if (!TrueWhenEqual)
4925 std::swap(DefaultBB, EdgeBB);
4926
4927 BasicBlock *BB = BI->getParent();
4928
4929 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4930 << " cases into SWITCH. BB is:\n"
4931 << *BB);
4932
4934
4935 // If there are any extra values that couldn't be folded into the switch
4936 // then we evaluate them with an explicit branch first. Split the block
4937 // right before the condbr to handle it.
4938 if (ExtraCase) {
4939 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4940 /*MSSAU=*/nullptr, "switch.early.test");
4941
4942 // Remove the uncond branch added to the old block.
4943 Instruction *OldTI = BB->getTerminator();
4944 Builder.SetInsertPoint(OldTI);
4945
4946 // There can be an unintended UB if extra values are Poison. Before the
4947 // transformation, extra values may not be evaluated according to the
4948 // condition, and it will not raise UB. But after transformation, we are
4949 // evaluating extra values before checking the condition, and it will raise
4950 // UB. It can be solved by adding freeze instruction to extra values.
4951 AssumptionCache *AC = Options.AC;
4952
4953 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4954 ExtraCase = Builder.CreateFreeze(ExtraCase);
4955
4956 if (TrueWhenEqual)
4957 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4958 else
4959 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4960
4961 OldTI->eraseFromParent();
4962
4963 if (DTU)
4964 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4965
4966 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4967 // for the edge we just added.
4968 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4969
4970 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4971 << "\nEXTRABB = " << *BB);
4972 BB = NewBB;
4973 }
4974
4975 Builder.SetInsertPoint(BI);
4976 // Convert pointer to int before we switch.
4977 if (CompVal->getType()->isPointerTy()) {
4978 CompVal = Builder.CreatePtrToInt(
4979 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4980 }
4981
4982 // Create the new switch instruction now.
4983 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4984
4985 // Add all of the 'cases' to the switch instruction.
4986 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4987 New->addCase(Values[i], EdgeBB);
4988
4989 // We added edges from PI to the EdgeBB. As such, if there were any
4990 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4991 // the number of edges added.
4992 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4993 PHINode *PN = cast<PHINode>(BBI);
4994 Value *InVal = PN->getIncomingValueForBlock(BB);
4995 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
4996 PN->addIncoming(InVal, BB);
4997 }
4998
4999 // Erase the old branch instruction.
5001 if (DTU)
5002 DTU->applyUpdates(Updates);
5003
5004 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5005 return true;
5006}
5007
5008bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5009 if (isa<PHINode>(RI->getValue()))
5010 return simplifyCommonResume(RI);
5011 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
5012 RI->getValue() == RI->getParent()->getFirstNonPHI())
5013 // The resume must unwind the exception that caused control to branch here.
5014 return simplifySingleResume(RI);
5015
5016 return false;
5017}
5018
5019// Check if cleanup block is empty
5021 for (Instruction &I : R) {
5022 auto *II = dyn_cast<IntrinsicInst>(&I);
5023 if (!II)
5024 return false;
5025
5026 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5027 switch (IntrinsicID) {
5028 case Intrinsic::dbg_declare:
5029 case Intrinsic::dbg_value:
5030 case Intrinsic::dbg_label:
5031 case Intrinsic::lifetime_end:
5032 break;
5033 default:
5034 return false;
5035 }
5036 }
5037 return true;
5038}
5039
5040// Simplify resume that is shared by several landing pads (phi of landing pad).
5041bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5042 BasicBlock *BB = RI->getParent();
5043
5044 // Check that there are no other instructions except for debug and lifetime
5045 // intrinsics between the phi's and resume instruction.
5048 return false;
5049
5050 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5051 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5052
5053 // Check incoming blocks to see if any of them are trivial.
5054 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5055 Idx++) {
5056 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5057 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5058
5059 // If the block has other successors, we can not delete it because
5060 // it has other dependents.
5061 if (IncomingBB->getUniqueSuccessor() != BB)
5062 continue;
5063
5064 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
5065 // Not the landing pad that caused the control to branch here.
5066 if (IncomingValue != LandingPad)
5067 continue;
5068
5070 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5071 TrivialUnwindBlocks.insert(IncomingBB);
5072 }
5073
5074 // If no trivial unwind blocks, don't do any simplifications.
5075 if (TrivialUnwindBlocks.empty())
5076 return false;
5077
5078 // Turn all invokes that unwind here into calls.
5079 for (auto *TrivialBB : TrivialUnwindBlocks) {
5080 // Blocks that will be simplified should be removed from the phi node.
5081 // Note there could be multiple edges to the resume block, and we need
5082 // to remove them all.
5083 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5084 BB->removePredecessor(TrivialBB, true);
5085
5086 for (BasicBlock *Pred :
5088 removeUnwindEdge(Pred, DTU);
5089 ++NumInvokes;
5090 }
5091
5092 // In each SimplifyCFG run, only the current processed block can be erased.
5093 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5094 // of erasing TrivialBB, we only remove the branch to the common resume
5095 // block so that we can later erase the resume block since it has no
5096 // predecessors.
5097 TrivialBB->getTerminator()->eraseFromParent();
5098 new UnreachableInst(RI->getContext(), TrivialBB);
5099 if (DTU)
5100 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5101 }
5102
5103 // Delete the resume block if all its predecessors have been removed.
5104 if (pred_empty(BB))
5105 DeleteDeadBlock(BB, DTU);
5106
5107 return !TrivialUnwindBlocks.empty();
5108}
5109
5110// Simplify resume that is only used by a single (non-phi) landing pad.
5111bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5112 BasicBlock *BB = RI->getParent();
5113 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5114 assert(RI->getValue() == LPInst &&
5115 "Resume must unwind the exception that caused control to here");
5116
5117 // Check that there are no other instructions except for debug intrinsics.
5119 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5120 return false;
5121
5122 // Turn all invokes that unwind here into calls and delete the basic block.
5124 removeUnwindEdge(Pred, DTU);
5125 ++NumInvokes;
5126 }
5127
5128 // The landingpad is now unreachable. Zap it.
5129 DeleteDeadBlock(BB, DTU);
5130 return true;
5131}
5132
5134 // If this is a trivial cleanup pad that executes no instructions, it can be
5135 // eliminated. If the cleanup pad continues to the caller, any predecessor
5136 // that is an EH pad will be updated to continue to the caller and any
5137 // predecessor that terminates with an invoke instruction will have its invoke
5138 // instruction converted to a call instruction. If the cleanup pad being
5139 // simplified does not continue to the caller, each predecessor will be
5140 // updated to continue to the unwind destination of the cleanup pad being
5141 // simplified.
5142 BasicBlock *BB = RI->getParent();
5143 CleanupPadInst *CPInst = RI->getCleanupPad();
5144 if (CPInst->getParent() != BB)
5145 // This isn't an empty cleanup.
5146 return false;
5147
5148 // We cannot kill the pad if it has multiple uses. This typically arises
5149 // from unreachable basic blocks.
5150 if (!CPInst->hasOneUse())
5151 return false;
5152
5153 // Check that there are no other instructions except for benign intrinsics.
5155 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5156 return false;
5157
5158 // If the cleanup return we are simplifying unwinds to the caller, this will
5159 // set UnwindDest to nullptr.
5160 BasicBlock *UnwindDest = RI->getUnwindDest();
5161 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5162
5163 // We're about to remove BB from the control flow. Before we do, sink any
5164 // PHINodes into the unwind destination. Doing this before changing the
5165 // control flow avoids some potentially slow checks, since we can currently
5166 // be certain that UnwindDest and BB have no common predecessors (since they
5167 // are both EH pads).
5168 if (UnwindDest) {
5169 // First, go through the PHI nodes in UnwindDest and update any nodes that
5170 // reference the block we are removing
5171 for (PHINode &DestPN : UnwindDest->phis()) {
5172 int Idx = DestPN.getBasicBlockIndex(BB);
5173 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5174 assert(Idx != -1);
5175 // This PHI node has an incoming value that corresponds to a control
5176 // path through the cleanup pad we are removing. If the incoming
5177 // value is in the cleanup pad, it must be a PHINode (because we
5178 // verified above that the block is otherwise empty). Otherwise, the
5179 // value is either a constant or a value that dominates the cleanup
5180 // pad being removed.
5181 //
5182 // Because BB and UnwindDest are both EH pads, all of their
5183 // predecessors must unwind to these blocks, and since no instruction
5184 // can have multiple unwind destinations, there will be no overlap in
5185 // incoming blocks between SrcPN and DestPN.
5186 Value *SrcVal = DestPN.getIncomingValue(Idx);
5187 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5188
5189 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5190 for (auto *Pred : predecessors(BB)) {
5191 Value *Incoming =
5192 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5193 DestPN.addIncoming(Incoming, Pred);
5194 }
5195 }
5196
5197 // Sink any remaining PHI nodes directly into UnwindDest.
5198 Instruction *InsertPt = DestEHPad;
5199 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5200 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5201 // If the PHI node has no uses or all of its uses are in this basic
5202 // block (meaning they are debug or lifetime intrinsics), just leave
5203 // it. It will be erased when we erase BB below.
5204 continue;
5205
5206 // Otherwise, sink this PHI node into UnwindDest.
5207 // Any predecessors to UnwindDest which are not already represented
5208 // must be back edges which inherit the value from the path through
5209 // BB. In this case, the PHI value must reference itself.
5210 for (auto *pred : predecessors(UnwindDest))
5211 if (pred != BB)
5212 PN.addIncoming(&PN, pred);
5213 PN.moveBefore(InsertPt);
5214 // Also, add a dummy incoming value for the original BB itself,
5215 // so that the PHI is well-formed until we drop said predecessor.
5216 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5217 }
5218 }
5219
5220 std::vector<DominatorTree::UpdateType> Updates;
5221
5222 // We use make_early_inc_range here because we will remove all predecessors.
5224 if (UnwindDest == nullptr) {
5225 if (DTU) {
5226 DTU->applyUpdates(Updates);
5227 Updates.clear();
5228 }
5229 removeUnwindEdge(PredBB, DTU);
5230 ++NumInvokes;
5231 } else {
5232 BB->removePredecessor(PredBB);
5233 Instruction *TI = PredBB->getTerminator();
5234 TI->replaceUsesOfWith(BB, UnwindDest);
5235 if (DTU) {
5236 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5237 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5238 }
5239 }
5240 }
5241
5242 if (DTU)
5243 DTU->applyUpdates(Updates);
5244
5245 DeleteDeadBlock(BB, DTU);
5246
5247 return true;
5248}
5249
5250// Try to merge two cleanuppads together.
5252 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5253 // with.
5254 BasicBlock *UnwindDest = RI->getUnwindDest();
5255 if (!UnwindDest)
5256 return false;
5257
5258 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5259 // be safe to merge without code duplication.
5260 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5261 return false;
5262
5263 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5264 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5265 if (!SuccessorCleanupPad)
5266 return false;
5267
5268 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5269 // Replace any uses of the successor cleanupad with the predecessor pad
5270 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5271 // funclet bundle operands.
5272 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5273 // Remove the old cleanuppad.
5274 SuccessorCleanupPad->eraseFromParent();
5275 // Now, we simply replace the cleanupret with a branch to the unwind
5276 // destination.
5277 BranchInst::Create(UnwindDest, RI->getParent());
5278 RI->eraseFromParent();
5279
5280 return true;
5281}
5282
5283bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5284 // It is possible to transiantly have an undef cleanuppad operand because we
5285 // have deleted some, but not all, dead blocks.
5286 // Eventually, this block will be deleted.
5287 if (isa<UndefValue>(RI->getOperand(0)))
5288 return false;
5289
5290 if (mergeCleanupPad(RI))
5291 return true;
5292
5293 if (removeEmptyCleanup(RI, DTU))
5294 return true;
5295
5296 return false;
5297}
5298
5299// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5300bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5301 BasicBlock *BB = UI->getParent();
5302
5303 bool Changed = false;
5304
5305 // Ensure that any debug-info records that used to occur after the Unreachable
5306 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5307 // the block.
5309
5310 // Debug-info records on the unreachable inst itself should be deleted, as
5311 // below we delete everything past the final executable instruction.
5312 UI->dropDbgRecords();
5313
5314 // If there are any instructions immediately before the unreachable that can
5315 // be removed, do so.
5316 while (UI->getIterator() != BB->begin()) {
5318 --BBI;
5319
5321 break; // Can not drop any more instructions. We're done here.
5322 // Otherwise, this instruction can be freely erased,
5323 // even if it is not side-effect free.
5324
5325 // Note that deleting EH's here is in fact okay, although it involves a bit
5326 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5327 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5328 // and we can therefore guarantee this block will be erased.
5329
5330 // If we're deleting this, we're deleting any subsequent debug info, so
5331 // delete DbgRecords.
5332 BBI->dropDbgRecords();
5333
5334 // Delete this instruction (any uses are guaranteed to be dead)
5335 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5336 BBI->eraseFromParent();
5337 Changed = true;
5338 }
5339
5340 // If the unreachable instruction is the first in the block, take a gander
5341 // at all of the predecessors of this instruction, and simplify them.
5342 if (&BB->front() != UI)
5343 return Changed;
5344
5345 std::vector<DominatorTree::UpdateType> Updates;
5346
5348 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5349 auto *Predecessor = Preds[i];
5350 Instruction *TI = Predecessor->getTerminator();
5351 IRBuilder<> Builder(TI);
5352 if (auto *BI = dyn_cast<BranchInst>(TI)) {
5353 // We could either have a proper unconditional branch,
5354 // or a degenerate conditional branch with matching destinations.
5355 if (all_of(BI->successors(),
5356 [BB](auto *Successor) { return Successor == BB; })) {
5357 new UnreachableInst(TI->getContext(), TI->getIterator());
5358 TI->eraseFromParent();
5359 Changed = true;
5360 } else {
5361 assert(BI->isConditional() && "Can't get here with an uncond branch.");
5362 Value* Cond = BI->getCondition();
5363 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5364 "The destinations are guaranteed to be different here.");
5365 CallInst *Assumption;
5366 if (BI->getSuccessor(0) == BB) {
5367 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5368 Builder.CreateBr(BI->getSuccessor(1));
5369 } else {
5370 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5371 Assumption = Builder.CreateAssumption(Cond);
5372 Builder.CreateBr(BI->getSuccessor(0));
5373 }
5374 if (Options.AC)
5375 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5376
5378 Changed = true;
5379 }
5380 if (DTU)
5381 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5382 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5384 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5385 if (i->getCaseSuccessor() != BB) {
5386 ++i;
5387 continue;
5388 }
5389 BB->removePredecessor(SU->getParent());
5390 i = SU.removeCase(i);
5391 e = SU->case_end();
5392 Changed = true;
5393 }
5394 // Note that the default destination can't be removed!
5395 if (DTU && SI->getDefaultDest() != BB)
5396 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5397 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5398 if (II->getUnwindDest() == BB) {
5399 if (DTU) {
5400 DTU->applyUpdates(Updates);
5401 Updates.clear();
5402 }
5403 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5404 if (!CI->doesNotThrow())
5405 CI->setDoesNotThrow();
5406 Changed = true;
5407 }
5408 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5409 if (CSI->getUnwindDest() == BB) {
5410 if (DTU) {
5411 DTU->applyUpdates(Updates);
5412 Updates.clear();
5413 }
5414 removeUnwindEdge(TI->getParent(), DTU);
5415 Changed = true;
5416 continue;
5417 }
5418
5419 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5420 E = CSI->handler_end();
5421 I != E; ++I) {
5422 if (*I == BB) {
5423 CSI->removeHandler(I);
5424 --I;
5425 --E;
5426 Changed = true;
5427 }
5428 }
5429 if (DTU)
5430 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5431 if (CSI->getNumHandlers() == 0) {
5432 if (CSI->hasUnwindDest()) {
5433 // Redirect all predecessors of the block containing CatchSwitchInst
5434 // to instead branch to the CatchSwitchInst's unwind destination.
5435 if (DTU) {
5436 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5437 Updates.push_back({DominatorTree::Insert,
5438 PredecessorOfPredecessor,
5439 CSI->getUnwindDest()});
5440 Updates.push_back({DominatorTree::Delete,
5441 PredecessorOfPredecessor, Predecessor});
5442 }
5443 }
5444 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5445 } else {
5446 // Rewrite all preds to unwind to caller (or from invoke to call).
5447 if (DTU) {
5448 DTU->applyUpdates(Updates);
5449 Updates.clear();
5450 }
5451 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5452 for (BasicBlock *EHPred : EHPreds)
5453 removeUnwindEdge(EHPred, DTU);
5454 }
5455 // The catchswitch is no longer reachable.
5456 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5457 CSI->eraseFromParent();
5458 Changed = true;
5459 }
5460 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5461 (void)CRI;
5462 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5463 "Expected to always have an unwind to BB.");
5464 if (DTU)
5465 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5466 new UnreachableInst(TI->getContext(), TI->getIterator());
5467 TI->eraseFromParent();
5468 Changed = true;
5469 }
5470 }
5471
5472 if (DTU)
5473 DTU->applyUpdates(Updates);
5474
5475 // If this block is now dead, remove it.
5476 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5477 DeleteDeadBlock(BB, DTU);
5478 return true;
5479 }
5480
5481 return Changed;
5482}
5483
5485 assert(Cases.size() >= 1);
5486
5488 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5489 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5490 return false;
5491 }
5492 return true;
5493}
5494
5496 DomTreeUpdater *DTU) {
5497 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5498 auto *BB = Switch->getParent();
5499 auto *OrigDefaultBlock = Switch->getDefaultDest();
5500 OrigDefaultBlock->removePredecessor(BB);
5501 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5502 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5503 OrigDefaultBlock);
5504 new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5505 Switch->setDefaultDest(&*NewDefaultBlock);
5506 if (DTU) {
5508 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5509 if (!is_contained(successors(BB), OrigDefaultBlock))
5510 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5511 DTU->applyUpdates(Updates);
5512 }
5513}
5514
5515/// Turn a switch into an integer range comparison and branch.
5516/// Switches with more than 2 destinations are ignored.
5517/// Switches with 1 destination are also ignored.
5518bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5519 IRBuilder<> &Builder) {
5520 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5521
5522 bool HasDefault =
5523 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5524
5525 auto *BB = SI->getParent();
5526
5527 // Partition the cases into two sets with different destinations.
5528 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5529 BasicBlock *DestB = nullptr;
5532
5533 for (auto Case : SI->cases()) {
5534 BasicBlock *Dest = Case.getCaseSuccessor();
5535 if (!DestA)
5536 DestA = Dest;
5537 if (Dest == DestA) {
5538 CasesA.push_back(Case.getCaseValue());
5539 continue;
5540 }
5541 if (!DestB)
5542 DestB = Dest;
5543 if (Dest == DestB) {
5544 CasesB.push_back(Case.getCaseValue());
5545 continue;
5546 }
5547 return false; // More than two destinations.
5548 }
5549 if (!DestB)
5550 return false; // All destinations are the same and the default is unreachable
5551
5552 assert(DestA && DestB &&
5553 "Single-destination switch should have been folded.");
5554 assert(DestA != DestB);
5555 assert(DestB != SI->getDefaultDest());
5556 assert(!CasesB.empty() && "There must be non-default cases.");
5557 assert(!CasesA.empty() || HasDefault);
5558
5559 // Figure out if one of the sets of cases form a contiguous range.
5560 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5561 BasicBlock *ContiguousDest = nullptr;
5562 BasicBlock *OtherDest = nullptr;
5563 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5564 ContiguousCases = &CasesA;
5565 ContiguousDest = DestA;
5566 OtherDest = DestB;
5567 } else if (CasesAreContiguous(CasesB)) {
5568 ContiguousCases = &CasesB;
5569 ContiguousDest = DestB;
5570 OtherDest = DestA;
5571 } else
5572 return false;
5573
5574 // Start building the compare and branch.
5575
5576 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5577 Constant *NumCases =
5578 ConstantInt::get(Offset->getType(), ContiguousCases->size());
5579
5580 Value *Sub = SI->getCondition();
5581 if (!Offset->isNullValue())
5582 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5583
5584 Value *Cmp;
5585 // If NumCases overflowed, then all possible values jump to the successor.
5586 if (NumCases->isNullValue() && !ContiguousCases->empty())
5587 Cmp = ConstantInt::getTrue(SI->getContext());
5588 else
5589 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5590 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5591
5592 // Update weight for the newly-created conditional branch.
5593 if (hasBranchWeightMD(*SI)) {
5595 GetBranchWeights(SI, Weights);
5596 if (Weights.size() == 1 + SI->getNumCases()) {
5597 uint64_t TrueWeight = 0;
5598 uint64_t FalseWeight = 0;
5599 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5600 if (SI->getSuccessor(I) == ContiguousDest)
5601 TrueWeight += Weights[I];
5602 else
5603 FalseWeight += Weights[I];
5604 }
5605 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5606 TrueWeight /= 2;
5607 FalseWeight /= 2;
5608 }
5609 setBranchWeights(NewBI, TrueWeight, FalseWeight);
5610 }
5611 }
5612
5613 // Prune obsolete incoming values off the successors' PHI nodes.
5614 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5615 unsigned PreviousEdges = ContiguousCases->size();
5616 if (ContiguousDest == SI->getDefaultDest())
5617 ++PreviousEdges;
5618 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5619 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5620 }
5621 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5622 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5623 if (OtherDest == SI->getDefaultDest())
5624 ++PreviousEdges;
5625 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5626 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5627 }
5628
5629 // Clean up the default block - it may have phis or other instructions before
5630 // the unreachable terminator.
5631 if (!HasDefault)
5633
5634 auto *UnreachableDefault = SI->getDefaultDest();
5635
5636 // Drop the switch.
5637 SI->eraseFromParent();
5638
5639 if (!HasDefault && DTU)
5640 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5641
5642 return true;
5643}
5644
5645/// Compute masked bits for the condition of a switch
5646/// and use it to remove dead cases.
5648 AssumptionCache *AC,
5649 const DataLayout &DL) {
5650 Value *Cond = SI->getCondition();
5651 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5652
5653 // We can also eliminate cases by determining that their values are outside of
5654 // the limited range of the condition based on how many significant (non-sign)
5655 // bits are in the condition value.
5656 unsigned MaxSignificantBitsInCond =
5657 ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5658
5659 // Gather dead cases.
5661 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5662 SmallVector<BasicBlock *, 8> UniqueSuccessors;
5663 for (const auto &Case : SI->cases()) {
5664 auto *Successor = Case.getCaseSuccessor();
5665 if (DTU) {
5666 if (!NumPerSuccessorCases.count(Successor))
5667 UniqueSuccessors.push_back(Successor);
5668 ++NumPerSuccessorCases[Successor];
5669 }
5670 const APInt &CaseVal = Case.getCaseValue()->getValue();
5671 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5672 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5673 DeadCases.push_back(Case.getCaseValue());
5674 if (DTU)
5675 --NumPerSuccessorCases[Successor];
5676 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5677 << " is dead.\n");
5678 }
5679 }
5680
5681 // If we can prove that the cases must cover all possible values, the
5682 // default destination becomes dead and we can remove it. If we know some
5683 // of the bits in the value, we can use that to more precisely compute the
5684 // number of possible unique case values.
5685 bool HasDefault =
5686 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5687 const unsigned NumUnknownBits =
5688 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5689 assert(NumUnknownBits <= Known.getBitWidth());
5690 if (HasDefault && DeadCases.empty() &&
5691 NumUnknownBits < 64 /* avoid overflow */ &&
5692 SI->getNumCases() == (1ULL << NumUnknownBits)) {
5694 return true;
5695 }
5696
5697 if (DeadCases.empty())
5698 return false;
5699
5701 for (ConstantInt *DeadCase : DeadCases) {
5702 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5703 assert(CaseI != SI->case_default() &&
5704 "Case was not found. Probably mistake in DeadCases forming.");
5705 // Prune unused values from PHI nodes.
5706 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5707 SIW.removeCase(CaseI);
5708 }
5709
5710 if (DTU) {
5711 std::vector<DominatorTree::UpdateType> Updates;
5712 for (auto *Successor : UniqueSuccessors)
5713 if (NumPerSuccessorCases[Successor] == 0)
5714 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5715 DTU->applyUpdates(Updates);
5716 }
5717
5718 return true;
5719}
5720
5721/// If BB would be eligible for simplification by
5722/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5723/// by an unconditional branch), look at the phi node for BB in the successor
5724/// block and see if the incoming value is equal to CaseValue. If so, return
5725/// the phi node, and set PhiIndex to BB's index in the phi node.
5727 BasicBlock *BB, int *PhiIndex) {
5728 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5729 return nullptr; // BB must be empty to be a candidate for simplification.
5730 if (!BB->getSinglePredecessor())
5731 return nullptr; // BB must be dominated by the switch.
5732
5733 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5734 if (!Branch || !Branch->isUnconditional())
5735 return nullptr; // Terminator must be unconditional branch.
5736
5737 BasicBlock *Succ = Branch->getSuccessor(0);
5738
5739 for (PHINode &PHI : Succ->phis()) {
5740 int Idx = PHI.getBasicBlockIndex(BB);
5741 assert(Idx >= 0 && "PHI has no entry for predecessor?");
5742
5743 Value *InValue = PHI.getIncomingValue(Idx);
5744 if (InValue != CaseValue)
5745 continue;
5746
5747 *PhiIndex = Idx;
5748 return &PHI;
5749 }
5750
5751 return nullptr;
5752}
5753
5754/// Try to forward the condition of a switch instruction to a phi node
5755/// dominated by the switch, if that would mean that some of the destination
5756/// blocks of the switch can be folded away. Return true if a change is made.
5758 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5759
5760 ForwardingNodesMap ForwardingNodes;
5761 BasicBlock *SwitchBlock = SI->getParent();
5762 bool Changed = false;
5763 for (const auto &Case : SI->cases()) {
5764 ConstantInt *CaseValue = Case.getCaseValue();
5765 BasicBlock *CaseDest = Case.getCaseSuccessor();
5766
5767 // Replace phi operands in successor blocks that are using the constant case
5768 // value rather than the switch condition variable:
5769 // switchbb:
5770 // switch i32 %x, label %default [
5771 // i32 17, label %succ
5772 // ...
5773 // succ:
5774 // %r = phi i32 ... [ 17, %switchbb ] ...
5775 // -->
5776 // %r = phi i32 ... [ %x, %switchbb ] ...
5777
5778 for (PHINode &Phi : CaseDest->phis()) {
5779 // This only works if there is exactly 1 incoming edge from the switch to
5780 // a phi. If there is >1, that means multiple cases of the switch map to 1
5781 // value in the phi, and that phi value is not the switch condition. Thus,
5782 // this transform would not make sense (the phi would be invalid because
5783 // a phi can't have different incoming values from the same block).
5784 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5785 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5786 count(Phi.blocks(), SwitchBlock) == 1) {
5787 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5788 Changed = true;
5789 }
5790 }
5791
5792 // Collect phi nodes that are indirectly using this switch's case constants.
5793 int PhiIdx;
5794 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5795 ForwardingNodes[Phi].push_back(PhiIdx);
5796 }
5797
5798 for (auto &ForwardingNode : ForwardingNodes) {
5799 PHINode *Phi = ForwardingNode.first;
5800 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5801 if (Indexes.size() < 2)
5802 continue;
5803
5804 for (int Index : Indexes)
5805 Phi->setIncomingValue(Index, SI->getCondition());
5806 Changed = true;
5807 }
5808
5809 return Changed;
5810}
5811
5812/// Return true if the backend will be able to handle
5813/// initializing an array of constants like C.
5815 if (C->isThreadDependent())
5816 return false;
5817 if (C->isDLLImportDependent())
5818 return false;
5819
5820 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5821 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5822 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5823 return false;
5824
5825 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5826 // Pointer casts and in-bounds GEPs will not prohibit the backend from
5827 // materializing the array of constants.
5828 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5829 if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5830 return false;
5831 }
5832
5834 return false;
5835
5836 return true;
5837}
5838
5839/// If V is a Constant, return it. Otherwise, try to look up
5840/// its constant value in ConstantPool, returning 0 if it's not there.
5841static Constant *
5844 if (Constant *C = dyn_cast<Constant>(V))
5845 return C;
5846 return ConstantPool.lookup(V);
5847}
5848
5849/// Try to fold instruction I into a constant. This works for
5850/// simple instructions such as binary operations where both operands are
5851/// constant or can be replaced by constants from the ConstantPool. Returns the
5852/// resulting constant on success, 0 otherwise.
5853static Constant *
5856 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5857 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5858 if (!A)
5859 return nullptr;
5860 if (A->isAllOnesValue())
5861 return LookupConstant(Select->getTrueValue(), ConstantPool);
5862 if (A->isNullValue())
5863 return LookupConstant(Select->getFalseValue(), ConstantPool);
5864 return nullptr;
5865 }
5866
5868 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5869 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5870 COps.push_back(A);
5871 else
5872 return nullptr;
5873 }
5874
5875 return ConstantFoldInstOperands(I, COps, DL);
5876}
5877
5878/// Try to determine the resulting constant values in phi nodes
5879/// at the common destination basic block, *CommonDest, for one of the case
5880/// destionations CaseDest corresponding to value CaseVal (0 for the default
5881/// case), of a switch instruction SI.
5882static bool
5884 BasicBlock **CommonDest,
5885 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5886 const DataLayout &DL, const TargetTransformInfo &TTI) {
5887 // The block from which we enter the common destination.
5888 BasicBlock *Pred = SI->getParent();
5889
5890 // If CaseDest is empty except for some side-effect free instructions through
5891 // which we can constant-propagate the CaseVal, continue to its successor.
5893 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5894 for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5895 if (I.isTerminator()) {
5896 // If the terminator is a simple branch, continue to the next block.
5897 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5898 return false;
5899 Pred = CaseDest;
5900 CaseDest = I.getSuccessor(0);
5901 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5902 // Instruction is side-effect free and constant.
5903
5904 // If the instruction has uses outside this block or a phi node slot for
5905 // the block, it is not safe to bypass the instruction since it would then
5906 // no longer dominate all its uses.
5907 for (auto &Use : I.uses()) {
5908 User *User = Use.getUser();
5909 if (Instruction *I = dyn_cast<Instruction>(User))
5910 if (I->getParent() == CaseDest)
5911 continue;
5912 if (PHINode *Phi = dyn_cast<PHINode>(User))
5913 if (Phi->getIncomingBlock(Use) == CaseDest)
5914 continue;
5915 return false;
5916 }
5917
5918 ConstantPool.insert(std::make_pair(&I, C));
5919 } else {
5920 break;
5921 }
5922 }
5923
5924 // If we did not have a CommonDest before, use the current one.
5925 if (!*CommonDest)
5926 *CommonDest = CaseDest;
5927 // If the destination isn't the common one, abort.
5928 if (CaseDest != *CommonDest)
5929 return false;
5930
5931 // Get the values for this case from phi nodes in the destination block.
5932 for (PHINode &PHI : (*CommonDest)->phis()) {
5933 int Idx = PHI.getBasicBlockIndex(Pred);
5934 if (Idx == -1)
5935 continue;
5936
5937 Constant *ConstVal =
5938 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5939 if (!ConstVal)
5940 return false;
5941
5942 // Be conservative about which kinds of constants we support.
5943 if (!ValidLookupTableConstant(ConstVal, TTI))
5944 return false;
5945
5946 Res.push_back(std::make_pair(&PHI, ConstVal));
5947 }
5948
5949 return Res.size() > 0;
5950}
5951
5952// Helper function used to add CaseVal to the list of cases that generate
5953// Result. Returns the updated number of cases that generate this result.
5954static size_t mapCaseToResult(ConstantInt *CaseVal,
5955 SwitchCaseResultVectorTy &UniqueResults,
5956 Constant *Result) {
5957 for (auto &I : UniqueResults) {
5958 if (I.first == Result) {
5959 I.second.push_back(CaseVal);
5960 return I.second.size();
5961 }
5962 }
5963 UniqueResults.push_back(
5964 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5965 return 1;
5966}
5967
5968// Helper function that initializes a map containing
5969// results for the PHI node of the common destination block for a switch
5970// instruction. Returns false if multiple PHI nodes have been found or if
5971// there is not a common destination block for the switch.
5973 BasicBlock *&CommonDest,
5974 SwitchCaseResultVectorTy &UniqueResults,
5975 Constant *&DefaultResult,
5976 const DataLayout &DL,
5977 const TargetTransformInfo &TTI,
5978 uintptr_t MaxUniqueResults) {
5979 for (const auto &I : SI->cases()) {
5980 ConstantInt *CaseVal = I.getCaseValue();
5981
5982 // Resulting value at phi nodes for this case value.
5983 SwitchCaseResultsTy Results;
5984 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5985 DL, TTI))
5986 return false;
5987
5988 // Only one value per case is permitted.
5989 if (Results.size() > 1)
5990 return false;
5991
5992 // Add the case->result mapping to UniqueResults.
5993 const size_t NumCasesForResult =
5994 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
5995
5996 // Early out if there are too many cases for this result.
5997 if (NumCasesForResult > MaxSwitchCasesPerResult)
5998 return false;
5999
6000 // Early out if there are too many unique results.
6001 if (UniqueResults.size() > MaxUniqueResults)
6002 return false;
6003
6004 // Check the PHI consistency.
6005 if (!PHI)
6006 PHI = Results[0].first;
6007 else if (PHI != Results[0].first)
6008 return false;
6009 }
6010 // Find the default result value.
6012 BasicBlock *DefaultDest = SI->getDefaultDest();
6013 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6014 DL, TTI);
6015 // If the default value is not found abort unless the default destination
6016 // is unreachable.
6017 DefaultResult =
6018 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6019 if ((!DefaultResult &&
6020 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
6021 return false;
6022
6023 return true;
6024}
6025
6026// Helper function that checks if it is possible to transform a switch with only
6027// two cases (or two cases + default) that produces a result into a select.
6028// TODO: Handle switches with more than 2 cases that map to the same result.
6029static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6030 Constant *DefaultResult, Value *Condition,
6031 IRBuilder<> &Builder) {
6032 // If we are selecting between only two cases transform into a simple
6033 // select or a two-way select if default is possible.
6034 // Example:
6035 // switch (a) { %0 = icmp eq i32 %a, 10
6036 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6037 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6038 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6039 // }
6040 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6041 ResultVector[1].second.size() == 1) {
6042 ConstantInt *FirstCase = ResultVector[0].second[0];
6043 ConstantInt *SecondCase = ResultVector[1].second[0];
6044 Value *SelectValue = ResultVector[1].first;
6045 if (DefaultResult) {
6046 Value *ValueCompare =
6047 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6048 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6049 DefaultResult, "switch.select");
6050 }
6051 Value *ValueCompare =
6052 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6053 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6054 SelectValue, "switch.select");
6055 }
6056
6057 // Handle the degenerate case where two cases have the same result value.
6058 if (ResultVector.size() == 1 && DefaultResult) {
6059 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6060 unsigned CaseCount = CaseValues.size();
6061 // n bits group cases map to the same result:
6062 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6063 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6064 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6065 if (isPowerOf2_32(CaseCount)) {
6066 ConstantInt *MinCaseVal = CaseValues[0];
6067 // Find mininal value.
6068 for (auto *Case : CaseValues)
6069 if (Case->getValue().slt(MinCaseVal->getValue()))
6070 MinCaseVal = Case;
6071
6072 // Mark the bits case number touched.
6073 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6074 for (auto *Case : CaseValues)
6075 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6076
6077 // Check if cases with the same result can cover all number
6078 // in touched bits.
6079 if (BitMask.popcount() == Log2_32(CaseCount)) {
6080 if (!MinCaseVal->isNullValue())
6081 Condition = Builder.CreateSub(Condition, MinCaseVal);
6082 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6083 Value *Cmp = Builder.CreateICmpEQ(
6084 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6085 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6086 }
6087 }
6088
6089 // Handle the degenerate case where two cases have the same value.
6090 if (CaseValues.size() == 2) {
6091 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6092 "switch.selectcmp.case1");
6093 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6094 "switch.selectcmp.case2");
6095 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6096 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6097 }
6098 }
6099
6100 return nullptr;
6101}
6102
6103// Helper function to cleanup a switch instruction that has been converted into
6104// a select, fixing up PHI nodes and basic blocks.
6106 Value *SelectValue,
6107 IRBuilder<> &Builder,
6108 DomTreeUpdater *DTU) {
6109 std::vector<DominatorTree::UpdateType> Updates;
6110
6111 BasicBlock *SelectBB = SI->getParent();
6112 BasicBlock *DestBB = PHI->getParent();
6113
6114 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6115 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6116 Builder.CreateBr(DestBB);
6117
6118 // Remove the switch.
6119
6120 PHI->removeIncomingValueIf(
6121 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6122 PHI->addIncoming(SelectValue, SelectBB);
6123
6124 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6125 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6126 BasicBlock *Succ = SI->getSuccessor(i);
6127
6128 if (Succ == DestBB)
6129 continue;
6130 Succ->removePredecessor(SelectBB);
6131 if (DTU && RemovedSuccessors.insert(Succ).second)
6132 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6133 }
6134 SI->eraseFromParent();
6135 if (DTU)
6136 DTU->applyUpdates(Updates);
6137}
6138
6139/// If a switch is only used to initialize one or more phi nodes in a common
6140/// successor block with only two different constant values, try to replace the
6141/// switch with a select. Returns true if the fold was made.
6142static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6143 DomTreeUpdater *DTU, const DataLayout &DL,
6144 const TargetTransformInfo &TTI) {
6145 Value *const Cond = SI->getCondition();
6146 PHINode *PHI = nullptr;
6147 BasicBlock *CommonDest = nullptr;
6148 Constant *DefaultResult;
6149 SwitchCaseResultVectorTy UniqueResults;
6150 // Collect all the cases that will deliver the same value from the switch.
6151 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6152 DL, TTI, /*MaxUniqueResults*/ 2))
6153 return false;
6154
6155 assert(PHI != nullptr && "PHI for value select not found");
6156 Builder.SetInsertPoint(SI);
6157 Value *SelectValue =
6158 foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6159 if (!SelectValue)
6160 return false;
6161
6162 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6163 return true;
6164}
6165
6166namespace {
6167
6168/// This class represents a lookup table that can be used to replace a switch.
6169class SwitchLookupTable {
6170public:
6171 /// Create a lookup table to use as a switch replacement with the contents
6172 /// of Values, using DefaultValue to fill any holes in the table.
6173 SwitchLookupTable(
6174 Module &M, uint64_t TableSize, ConstantInt *Offset,
6175 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6176 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6177
6178 /// Build instructions with Builder to retrieve the value at
6179 /// the position given by Index in the lookup table.
6180 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6181
6182 /// Return true if a table with TableSize elements of
6183 /// type ElementType would fit in a target-legal register.
6184 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6185 Type *ElementType);
6186
6187private:
6188 // Depending on the contents of the table, it can be represented in
6189 // different ways.
6190 enum {
6191 // For tables where each element contains the same value, we just have to
6192 // store that single value and return it for each lookup.
6193 SingleValueKind,
6194
6195 // For tables where there is a linear relationship between table index
6196 // and values. We calculate the result with a simple multiplication
6197 // and addition instead of a table lookup.
6198 LinearMapKind,
6199
6200 // For small tables with integer elements, we can pack them into a bitmap
6201 // that fits into a target-legal register. Values are retrieved by
6202 // shift and mask operations.
6203 BitMapKind,
6204
6205 // The table is stored as an array of values. Values are retrieved by load
6206 // instructions from the table.
6207 ArrayKind
6208 } Kind;
6209
6210 // For SingleValueKind, this is the single value.
6211 Constant *SingleValue = nullptr;
6212
6213 // For BitMapKind, this is the bitmap.
6214 ConstantInt *BitMap = nullptr;
6215 IntegerType *BitMapElementTy = nullptr;
6216
6217 // For LinearMapKind, these are the constants used to derive the value.
6218 ConstantInt *LinearOffset = nullptr;
6219 ConstantInt *LinearMultiplier = nullptr;
6220 bool LinearMapValWrapped = false;
6221
6222 // For ArrayKind, this is the array.
6223 GlobalVariable *Array = nullptr;
6224};
6225
6226} // end anonymous namespace
6227
6228SwitchLookupTable::SwitchLookupTable(
6229 Module &M, uint64_t TableSize, ConstantInt *Offset,
6230 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6231 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6232 assert(Values.size() && "Can't build lookup table without values!");
6233 assert(TableSize >= Values.size() && "Can't fit values in table!");
6234
6235 // If all values in the table are equal, this is that value.
6236 SingleValue = Values.begin()->second;
6237
6238 Type *ValueType = Values.begin()->second->getType();
6239
6240 // Build up the table contents.
6241 SmallVector<Constant *, 64> TableContents(TableSize);
6242 for (size_t I = 0, E = Values.size(); I != E; ++I) {
6243 ConstantInt *CaseVal = Values[I].first;
6244 Constant *CaseRes = Values[I].second;
6245 assert(CaseRes->getType() == ValueType);
6246
6247 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6248 TableContents[Idx] = CaseRes;
6249
6250 if (CaseRes != SingleValue)
6251 SingleValue = nullptr;
6252 }
6253
6254 // Fill in any holes in the table with the default result.
6255 if (Values.size() < TableSize) {
6256 assert(DefaultValue &&
6257 "Need a default value to fill the lookup table holes.");
6258 assert(DefaultValue->getType() == ValueType);
6259 for (uint64_t I = 0; I < TableSize; ++I) {
6260 if (!TableContents[I])
6261 TableContents[I] = DefaultValue;
6262 }
6263
6264 if (DefaultValue != SingleValue)
6265 SingleValue = nullptr;
6266 }
6267
6268 // If each element in the table contains the same value, we only need to store
6269 // that single value.
6270 if (SingleValue) {
6271 Kind = SingleValueKind;
6272 return;
6273 }
6274
6275 // Check if we can derive the value with a linear transformation from the
6276 // table index.
6277 if (isa<IntegerType>(ValueType)) {
6278 bool LinearMappingPossible = true;
6279 APInt PrevVal;
6280 APInt DistToPrev;
6281 // When linear map is monotonic and signed overflow doesn't happen on
6282 // maximum index, we can attach nsw on Add and Mul.
6283 bool NonMonotonic = false;
6284 assert(TableSize >= 2 && "Should be a SingleValue table.");
6285 // Check if there is the same distance between two consecutive values.
6286 for (uint64_t I = 0; I < TableSize; ++I) {
6287 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6288 if (!ConstVal) {
6289 // This is an undef. We could deal with it, but undefs in lookup tables
6290 // are very seldom. It's probably not worth the additional complexity.
6291 LinearMappingPossible = false;
6292 break;
6293 }
6294 const APInt &Val = ConstVal->getValue();
6295 if (I != 0) {
6296 APInt Dist = Val - PrevVal;
6297 if (I == 1) {
6298 DistToPrev = Dist;
6299 } else if (Dist != DistToPrev) {
6300 LinearMappingPossible = false;
6301 break;
6302 }
6303 NonMonotonic |=
6304 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6305 }
6306 PrevVal = Val;
6307 }
6308 if (LinearMappingPossible) {
6309 LinearOffset = cast<ConstantInt>(TableContents[0]);
6310 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6311 bool MayWrap = false;
6312 APInt M = LinearMultiplier->getValue();
6313 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6314 LinearMapValWrapped = NonMonotonic || MayWrap;
6315 Kind = LinearMapKind;
6316 ++NumLinearMaps;
6317 return;
6318 }
6319 }
6320
6321 // If the type is integer and the table fits in a register, build a bitmap.
6322 if (WouldFitInRegister(DL, TableSize, ValueType)) {
6323 IntegerType *IT = cast<IntegerType>(ValueType);
6324 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6325 for (uint64_t I = TableSize; I > 0; --I) {
6326 TableInt <<= IT->getBitWidth();
6327 // Insert values into the bitmap. Undef values are set to zero.
6328 if (!isa<UndefValue>(TableContents[I - 1])) {
6329 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6330 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6331 }
6332 }
6333 BitMap = ConstantInt::get(M.getContext(), TableInt);
6334 BitMapElementTy = IT;
6335 Kind = BitMapKind;
6336 ++NumBitMaps;
6337 return;
6338 }
6339
6340 // Store the table in an array.
6341 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6342 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6343
6344 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6345 GlobalVariable::PrivateLinkage, Initializer,
6346 "switch.table." + FuncName);
6347 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6348 // Set the alignment to that of an array items. We will be only loading one
6349 // value out of it.
6350 Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6351 Kind = ArrayKind;
6352}
6353
6354Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6355 switch (Kind) {
6356 case SingleValueKind:
6357 return SingleValue;
6358 case LinearMapKind: {
6359 // Derive the result value from the input value.
6360 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6361 false, "switch.idx.cast");
6362 if (!LinearMultiplier->isOne())
6363 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6364 /*HasNUW = */ false,
6365 /*HasNSW = */ !LinearMapValWrapped);
6366
6367 if (!LinearOffset->isZero())
6368 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6369 /*HasNUW = */ false,
6370 /*HasNSW = */ !LinearMapValWrapped);
6371 return Result;
6372 }
6373 case BitMapKind: {
6374 // Type of the bitmap (e.g. i59).
6375 IntegerType *MapTy = BitMap->getIntegerType();
6376
6377 // Cast Index to the same type as the bitmap.
6378 // Note: The Index is <= the number of elements in the table, so
6379 // truncating it to the width of the bitmask is safe.
6380 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6381
6382 // Multiply the shift amount by the element width. NUW/NSW can always be
6383 // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6384 // BitMap's bit width.
6385 ShiftAmt = Builder.CreateMul(
6386 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6387 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6388
6389 // Shift down.
6390 Value *DownShifted =
6391 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6392 // Mask off.
6393 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6394 }
6395 case ArrayKind: {
6396 // Make sure the table index will not overflow when treated as signed.
6397 IntegerType *IT = cast<IntegerType>(Index->getType());
6398 uint64_t TableSize =
6399 Array->getInitializer()->getType()->getArrayNumElements();
6400 if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6401 Index = Builder.CreateZExt(
6402 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6403 "switch.tableidx.zext");
6404
6405 Value *GEPIndices[] = {Builder.getInt32(0), Index};
6406 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6407 GEPIndices, "switch.gep");
6408 return Builder.CreateLoad(
6409 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6410 "switch.load");
6411 }
6412 }
6413 llvm_unreachable("Unknown lookup table kind!");
6414}
6415
6416bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6417 uint64_t TableSize,
6418 Type *ElementType) {
6419 auto *IT = dyn_cast<IntegerType>(ElementType);
6420 if (!IT)
6421 return false;
6422 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6423 // are <= 15, we could try to narrow the type.
6424
6425 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6426 if (TableSize >= UINT_MAX / IT->getBitWidth())
6427 return false;
6428 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6429}
6430
6432 const DataLayout &DL) {
6433 // Allow any legal type.
6434 if (TTI.isTypeLegal(Ty))
6435 return true;
6436
6437 auto *IT = dyn_cast<IntegerType>(Ty);
6438 if (!IT)
6439 return false;
6440
6441 // Also allow power of 2 integer types that have at least 8 bits and fit in
6442 // a register. These types are common in frontend languages and targets
6443 // usually support loads of these types.
6444 // TODO: We could relax this to any integer that fits in a register and rely
6445 // on ABI alignment and padding in the table to allow the load to be widened.
6446 // Or we could widen the constants and truncate the load.
6447 unsigned BitWidth = IT->getBitWidth();
6448 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6449 DL.fitsInLegalInteger(IT->getBitWidth());
6450}
6451
6452static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6453 // 40% is the default density for building a jump table in optsize/minsize
6454 // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6455 // function was based on.
6456 const uint64_t MinDensity = 40;
6457
6458 if (CaseRange >= UINT64_MAX / 100)
6459 return false; // Avoid multiplication overflows below.
6460
6461 return NumCases * 100 >= CaseRange * MinDensity;
6462}
6463
6465 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6466 uint64_t Range = Diff + 1;
6467 if (Range < Diff)
6468 return false; // Overflow.
6469
6470 return isSwitchDense(Values.size(), Range);
6471}
6472
6473/// Determine whether a lookup table should be built for this switch, based on
6474/// the number of cases, size of the table, and the types of the results.
6475// TODO: We could support larger than legal types by limiting based on the
6476// number of loads required and/or table size. If the constants are small we
6477// could use smaller table entries and extend after the load.
6478static bool
6480 const TargetTransformInfo &TTI, const DataLayout &DL,
6481 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6482 if (SI->getNumCases() > TableSize)
6483 return false; // TableSize overflowed.
6484
6485 bool AllTablesFitInRegister = true;
6486 bool HasIllegalType = false;
6487 for (const auto &I : ResultTypes) {
6488 Type *Ty = I.second;
6489
6490 // Saturate this flag to true.
6491 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6492
6493 // Saturate this flag to false.
6494 AllTablesFitInRegister =
6495 AllTablesFitInRegister &&
6496 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6497
6498 // If both flags saturate, we're done. NOTE: This *only* works with
6499 // saturating flags, and all flags have to saturate first due to the
6500 // non-deterministic behavior of iterating over a dense map.
6501 if (HasIllegalType && !AllTablesFitInRegister)
6502 break;
6503 }
6504
6505 // If each table would fit in a register, we should build it anyway.
6506 if (AllTablesFitInRegister)
6507 return true;
6508
6509 // Don't build a table that doesn't fit in-register if it has illegal types.
6510 if (HasIllegalType)
6511 return false;
6512
6513 return isSwitchDense(SI->getNumCases(), TableSize);
6514}
6515
6517 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6518 bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6519 const DataLayout &DL, const TargetTransformInfo &TTI) {
6520 if (MinCaseVal.isNullValue())
6521 return true;
6522 if (MinCaseVal.isNegative() ||
6523 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6524 !HasDefaultResults)
6525 return false;
6526 return all_of(ResultTypes, [&](const auto &KV) {
6527 return SwitchLookupTable::WouldFitInRegister(
6528 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6529 KV.second /* ResultType */);
6530 });
6531}
6532
6533/// Try to reuse the switch table index compare. Following pattern:
6534/// \code
6535/// if (idx < tablesize)
6536/// r = table[idx]; // table does not contain default_value
6537/// else
6538/// r = default_value;
6539/// if (r != default_value)
6540/// ...
6541/// \endcode
6542/// Is optimized to:
6543/// \code
6544/// cond = idx < tablesize;
6545/// if (cond)
6546/// r = table[idx];
6547/// else
6548/// r = default_value;
6549/// if (cond)
6550/// ...
6551/// \endcode
6552/// Jump threading will then eliminate the second if(cond).
6554 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6555 Constant *DefaultValue,
6556 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6557 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6558 if (!CmpInst)
6559 return;
6560
6561 // We require that the compare is in the same block as the phi so that jump
6562 // threading can do its work afterwards.
6563 if (CmpInst->getParent() != PhiBlock)
6564 return;
6565
6566 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6567 if (!CmpOp1)
6568 return;
6569
6570 Value *RangeCmp = RangeCheckBranch->getCondition();
6571 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6572 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6573
6574 // Check if the compare with the default value is constant true or false.
6576 DefaultValue, CmpOp1, true);
6577 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6578 return;
6579
6580 // Check if the compare with the case values is distinct from the default
6581 // compare result.
6582 for (auto ValuePair : Values) {
6584 ValuePair.second, CmpOp1, true);
6585 if (!CaseConst || CaseConst == DefaultConst ||
6586 (CaseConst != TrueConst && CaseConst != FalseConst))
6587 return;
6588 }
6589
6590 // Check if the branch instruction dominates the phi node. It's a simple
6591 // dominance check, but sufficient for our needs.
6592 // Although this check is invariant in the calling loops, it's better to do it
6593 // at this late stage. Practically we do it at most once for a switch.
6594 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6595 for (BasicBlock *Pred : predecessors(PhiBlock)) {
6596 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6597 return;
6598 }
6599
6600 if (DefaultConst == FalseConst) {
6601 // The compare yields the same result. We can replace it.
6602 CmpInst->replaceAllUsesWith(RangeCmp);
6603 ++NumTableCmpReuses;
6604 } else {
6605 // The compare yields the same result, just inverted. We can replace it.
6606 Value *InvertedTableCmp = BinaryOperator::CreateXor(
6607 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6608 RangeCheckBranch->getIterator());
6609 CmpInst->replaceAllUsesWith(InvertedTableCmp);
6610 ++NumTableCmpReuses;
6611 }
6612}
6613
6614/// If the switch is only used to initialize one or more phi nodes in a common
6615/// successor block with different constant values, replace the switch with
6616/// lookup tables.
6618 DomTreeUpdater *DTU, const DataLayout &DL,
6619 const TargetTransformInfo &TTI) {
6620 assert(SI->getNumCases() > 1 && "Degenerate switch?");
6621
6622 BasicBlock *BB = SI->getParent();
6623 Function *Fn = BB->getParent();
6624 // Only build lookup table when we have a target that supports it or the
6625 // attribute is not set.
6627 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6628 return false;
6629
6630 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6631 // split off a dense part and build a lookup table for that.
6632
6633 // FIXME: This creates arrays of GEPs to constant strings, which means each
6634 // GEP needs a runtime relocation in PIC code. We should just build one big
6635 // string and lookup indices into that.
6636
6637 // Ignore switches with less than three cases. Lookup tables will not make
6638 // them faster, so we don't analyze them.
6639 if (SI->getNumCases() < 3)
6640 return false;
6641
6642 // Figure out the corresponding result for each case value and phi node in the
6643 // common destination, as well as the min and max case values.
6644 assert(!SI->cases().empty());
6645 SwitchInst::CaseIt CI = SI->case_begin();
6646 ConstantInt *MinCaseVal = CI->getCaseValue();
6647 ConstantInt *MaxCaseVal = CI->getCaseValue();
6648
6649 BasicBlock *CommonDest = nullptr;
6650
6651 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6653
6657
6658 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6659 ConstantInt *CaseVal = CI->getCaseValue();
6660 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6661 MinCaseVal = CaseVal;
6662 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6663 MaxCaseVal = CaseVal;
6664
6665 // Resulting value at phi nodes for this case value.
6667 ResultsTy Results;
6668 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6669 Results, DL, TTI))
6670 return false;
6671
6672 // Append the result from this case to the list for each phi.
6673 for (const auto &I : Results) {
6674 PHINode *PHI = I.first;
6675 Constant *Value = I.second;
6676 if (!ResultLists.count(PHI))
6677 PHIs.push_back(PHI);
6678 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6679 }
6680 }
6681
6682 // Keep track of the result types.
6683 for (PHINode *PHI : PHIs) {
6684 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6685 }
6686
6687 uint64_t NumResults = ResultLists[PHIs[0]].size();
6688
6689 // If the table has holes, we need a constant result for the default case
6690 // or a bitmask that fits in a register.
6691 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6692 bool HasDefaultResults =
6693 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6694 DefaultResultsList, DL, TTI);
6695
6696 for (const auto &I : DefaultResultsList) {
6697 PHINode *PHI = I.first;
6698 Constant *Result = I.second;
6699 DefaultResults[PHI] = Result;
6700 }
6701
6702 bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6703 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6704 uint64_t TableSize;
6705 if (UseSwitchConditionAsTableIndex)
6706 TableSize = MaxCaseVal->getLimitedValue() + 1;
6707 else
6708 TableSize =
6709 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6710
6711 bool TableHasHoles = (NumResults < TableSize);
6712 bool NeedMask = (TableHasHoles && !HasDefaultResults);
6713 if (NeedMask) {
6714 // As an extra penalty for the validity test we require more cases.
6715 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6716 return false;
6717 if (!DL.fitsInLegalInteger(TableSize))
6718 return false;
6719 }
6720
6721 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6722 return false;
6723
6724 std::vector<DominatorTree::UpdateType> Updates;
6725
6726 // Compute the maximum table size representable by the integer type we are
6727 // switching upon.
6728 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6729 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6730 assert(MaxTableSize >= TableSize &&
6731 "It is impossible for a switch to have more entries than the max "
6732 "representable value of its input integer type's size.");
6733
6734 // If the default destination is unreachable, or if the lookup table covers
6735 // all values of the conditional variable, branch directly to the lookup table
6736 // BB. Otherwise, check that the condition is within the case range.
6737 bool DefaultIsReachable =
6738 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6739
6740 // Create the BB that does the lookups.
6741 Module &Mod = *CommonDest->getParent()->getParent();
6742 BasicBlock *LookupBB = BasicBlock::Create(
6743 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6744
6745 // Compute the table index value.
6746 Builder.SetInsertPoint(SI);
6747 Value *TableIndex;
6748 ConstantInt *TableIndexOffset;
6749 if (UseSwitchConditionAsTableIndex) {
6750 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6751 TableIndex = SI->getCondition();
6752 } else {
6753 TableIndexOffset = MinCaseVal;
6754 // If the default is unreachable, all case values are s>= MinCaseVal. Then
6755 // we can try to attach nsw.
6756 bool MayWrap = true;
6757 if (!DefaultIsReachable) {
6758 APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6759 (void)Res;
6760 }
6761
6762 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6763 "switch.tableidx", /*HasNUW =*/false,
6764 /*HasNSW =*/!MayWrap);
6765 }
6766
6767 BranchInst *RangeCheckBranch = nullptr;
6768
6769 // Grow the table to cover all possible index values to avoid the range check.
6770 // It will use the default result to fill in the table hole later, so make
6771 // sure it exist.
6772 if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6773 ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6774 // Grow the table shouldn't have any size impact by checking
6775 // WouldFitInRegister.
6776 // TODO: Consider growing the table also when it doesn't fit in a register
6777 // if no optsize is specified.
6778 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6779 if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6780 return SwitchLookupTable::WouldFitInRegister(
6781 DL, UpperBound, KV.second /* ResultType */);
6782 })) {
6783 // The default branch is unreachable after we enlarge the lookup table.
6784 // Adjust DefaultIsReachable to reuse code path.
6785 TableSize = UpperBound;
6786 DefaultIsReachable = false;
6787 }
6788 }
6789
6790 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6791 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6792 Builder.CreateBr(LookupBB);
6793 if (DTU)
6794 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6795 // Note: We call removeProdecessor later since we need to be able to get the
6796 // PHI value for the default case in case we're using a bit mask.
6797 } else {
6798 Value *Cmp = Builder.CreateICmpULT(
6799 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6800 RangeCheckBranch =
6801 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6802 if (DTU)
6803 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6804 }
6805
6806 // Populate the BB that does the lookups.
6807 Builder.SetInsertPoint(LookupBB);
6808
6809 if (NeedMask) {
6810 // Before doing the lookup, we do the hole check. The LookupBB is therefore
6811 // re-purposed to do the hole check, and we create a new LookupBB.
6812 BasicBlock *MaskBB = LookupBB;
6813 MaskBB->setName("switch.hole_check");
6814 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6815 CommonDest->getParent(), CommonDest);
6816
6817 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6818 // unnecessary illegal types.
6819 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6820 APInt MaskInt(TableSizePowOf2, 0);
6821 APInt One(TableSizePowOf2, 1);
6822 // Build bitmask; fill in a 1 bit for every case.
6823 const ResultListTy &ResultList = ResultLists[PHIs[0]];
6824 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6825 uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6826 .getLimitedValue();
6827 MaskInt |= One << Idx;
6828 }
6829 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6830
6831 // Get the TableIndex'th bit of the bitmask.
6832 // If this bit is 0 (meaning hole) jump to the default destination,
6833 // else continue with table lookup.
6834 IntegerType *MapTy = TableMask->getIntegerType();
6835 Value *MaskIndex =
6836 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6837 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6838 Value *LoBit = Builder.CreateTrunc(
6839 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6840 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6841 if (DTU) {
6842 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6843 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6844 }
6845 Builder.SetInsertPoint(LookupBB);
6846 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6847 }
6848
6849 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6850 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6851 // do not delete PHINodes here.
6852 SI->getDefaultDest()->removePredecessor(BB,
6853 /*KeepOneInputPHIs=*/true);
6854 if (DTU)
6855 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6856 }
6857
6858 for (PHINode *PHI : PHIs) {
6859 const ResultListTy &ResultList = ResultLists[PHI];
6860
6861 // If using a bitmask, use any value to fill the lookup table holes.
6862 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6863 StringRef FuncName = Fn->getName();
6864 SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6865 DL, FuncName);
6866
6867 Value *Result = Table.BuildLookup(TableIndex, Builder);
6868
6869 // Do a small peephole optimization: re-use the switch table compare if
6870 // possible.
6871 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6872 BasicBlock *PhiBlock = PHI->getParent();
6873 // Search for compare instructions which use the phi.
6874 for (auto *User : PHI->users()) {
6875 reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6876 }
6877 }
6878
6879 PHI->addIncoming(Result, LookupBB);
6880 }
6881
6882 Builder.CreateBr(CommonDest);
6883 if (DTU)
6884 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6885
6886 // Remove the switch.
6887 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6888 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6889 BasicBlock *Succ = SI->getSuccessor(i);
6890
6891 if (Succ == SI->getDefaultDest())
6892 continue;
6893 Succ->removePredecessor(BB);
6894 if (DTU && RemovedSuccessors.insert(Succ).second)
6895 Updates.push_back({DominatorTree::Delete, BB, Succ});
6896 }
6897 SI->eraseFromParent();
6898
6899 if (DTU)
6900 DTU->applyUpdates(Updates);
6901
6902 ++NumLookupTables;
6903 if (NeedMask)
6904 ++NumLookupTablesHoles;
6905 return true;
6906}
6907
6908/// Try to transform a switch that has "holes" in it to a contiguous sequence
6909/// of cases.
6910///
6911/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6912/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6913///
6914/// This converts a sparse switch into a dense switch which allows better
6915/// lowering and could also allow transforming into a lookup table.
6916static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6917 const DataLayout &DL,
6918 const TargetTransformInfo &TTI) {
6919 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6920 if (CondTy->getIntegerBitWidth() > 64 ||
6921 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6922 return false;
6923 // Only bother with this optimization if there are more than 3 switch cases;
6924 // SDAG will only bother creating jump tables for 4 or more cases.
6925 if (SI->getNumCases() < 4)
6926 return false;
6927
6928 // This transform is agnostic to the signedness of the input or case values. We
6929 // can treat the case values as signed or unsigned. We can optimize more common
6930 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6931 // as signed.
6933 for (const auto &C : SI->cases())
6934 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6935 llvm::sort(Values);
6936
6937 // If the switch is already dense, there's nothing useful to do here.
6938 if (isSwitchDense(Values))
6939 return false;
6940
6941 // First, transform the values such that they start at zero and ascend.
6942 int64_t Base = Values[0];
6943 for (auto &V : Values)
6944 V -= (uint64_t)(Base);
6945
6946 // Now we have signed numbers that have been shifted so that, given enough
6947 // precision, there are no negative values. Since the rest of the transform
6948 // is bitwise only, we switch now to an unsigned representation.
6949
6950 // This transform can be done speculatively because it is so cheap - it
6951 // results in a single rotate operation being inserted.
6952
6953 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6954 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6955 // less than 64.
6956 unsigned Shift = 64;
6957 for (auto &V : Values)
6958 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6959 assert(Shift < 64);
6960 if (Shift > 0)
6961 for (auto &V : Values)
6962 V = (int64_t)((uint64_t)V >> Shift);
6963
6964 if (!isSwitchDense(Values))
6965 // Transform didn't create a dense switch.
6966 return false;
6967
6968 // The obvious transform is to shift the switch condition right and emit a
6969 // check that the condition actually cleanly divided by GCD, i.e.
6970 // C & (1 << Shift - 1) == 0
6971 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6972 //
6973 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6974 // shift and puts the shifted-off bits in the uppermost bits. If any of these
6975 // are nonzero then the switch condition will be very large and will hit the
6976 // default case.
6977
6978 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6979 Builder.SetInsertPoint(SI);
6980 Value *Sub =
6981 Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6982 Value *Rot = Builder.CreateIntrinsic(
6983 Ty, Intrinsic::fshl,
6984 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6985 SI->replaceUsesOfWith(SI->getCondition(), Rot);
6986
6987 for (auto Case : SI->cases()) {
6988 auto *Orig = Case.getCaseValue();
6989 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
6990 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
6991 }
6992 return true;
6993}
6994
6995/// Tries to transform switch of powers of two to reduce switch range.
6996/// For example, switch like:
6997/// switch (C) { case 1: case 2: case 64: case 128: }
6998/// will be transformed to:
6999/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7000///
7001/// This transformation allows better lowering and could allow transforming into
7002/// a lookup table.
7004 const DataLayout &DL,
7005 const TargetTransformInfo &TTI) {
7006 Value *Condition = SI->getCondition();
7007 LLVMContext &Context = SI->getContext();
7008 auto *CondTy = cast<IntegerType>(Condition->getType());
7009
7010 if (CondTy->getIntegerBitWidth() > 64 ||
7011 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7012 return false;
7013
7014 const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7015 IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7016 {Condition, ConstantInt::getTrue(Context)}),
7018
7019 if (CttzIntrinsicCost > TTI::TCC_Basic)
7020 // Inserting intrinsic is too expensive.
7021 return false;
7022
7023 // Only bother with this optimization if there are more than 3 switch cases.
7024 // SDAG will only bother creating jump tables for 4 or more cases.
7025 if (SI->getNumCases() < 4)
7026 return false;
7027
7028 // We perform this optimization only for switches with
7029 // unreachable default case.
7030 // This assumtion will save us from checking if `Condition` is a power of two.
7031 if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7032 return false;
7033
7034 // Check that switch cases are powers of two.
7036 for (const auto &Case : SI->cases()) {
7037 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7038 if (llvm::has_single_bit(CaseValue))
7039 Values.push_back(CaseValue);
7040 else
7041 return false;
7042 }
7043
7044 // isSwichDense requires case values to be sorted.
7045 llvm::sort(Values);
7046 if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
7047 llvm::countr_zero(Values.front()) + 1))
7048 // Transform is unable to generate dense switch.
7049 return false;
7050
7051 Builder.SetInsertPoint(SI);
7052
7053 // Replace each case with its trailing zeros number.
7054 for (auto &Case : SI->cases()) {
7055 auto *OrigValue = Case.getCaseValue();
7056 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7057 OrigValue->getValue().countr_zero()));
7058 }
7059
7060 // Replace condition with its trailing zeros number.
7061 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7062 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7063
7064 SI->setCondition(ConditionTrailingZeros);
7065
7066 return true;
7067}
7068
7069bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7070 BasicBlock *BB = SI->getParent();
7071
7072 if (isValueEqualityComparison(SI)) {
7073 // If we only have one predecessor, and if it is a branch on this value,
7074 // see if that predecessor totally determines the outcome of this switch.
7075 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7076 if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7077 return requestResimplify();
7078
7079 Value *Cond = SI->getCondition();
7080 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7081 if (SimplifySwitchOnSelect(SI, Select))
7082 return requestResimplify();
7083
7084 // If the block only contains the switch, see if we can fold the block
7085 // away into any preds.
7086 if (SI == &*BB->instructionsWithoutDebug(false).begin())
7087 if (FoldValueComparisonIntoPredecessors(SI, Builder))
7088 return requestResimplify();
7089 }
7090
7091 // Try to transform the switch into an icmp and a branch.
7092 // The conversion from switch to comparison may lose information on
7093 // impossible switch values, so disable it early in the pipeline.
7094 if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7095 return requestResimplify();
7096
7097 // Remove unreachable cases.
7098 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7099 return requestResimplify();
7100
7101 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7102 return requestResimplify();
7103
7104 if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7105 return requestResimplify();
7106
7107 // The conversion from switch to lookup tables results in difficult-to-analyze
7108 // code and makes pruning branches much harder. This is a problem if the
7109 // switch expression itself can still be restricted as a result of inlining or
7110 // CVP. Therefore, only apply this transformation during late stages of the
7111 // optimisation pipeline.
7112 if (Options.ConvertSwitchToLookupTable &&
7113 SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7114 return requestResimplify();
7115
7116 if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7117 return requestResimplify();
7118
7119 if (ReduceSwitchRange(SI, Builder, DL, TTI))
7120 return requestResimplify();
7121
7122 if (HoistCommon &&
7123 hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7124 return requestResimplify();
7125
7126 return false;
7127}
7128
7129bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7130 BasicBlock *BB = IBI->getParent();
7131 bool Changed = false;
7132
7133 // Eliminate redundant destinations.
7136 for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7137 BasicBlock *Dest = IBI->getDestination(i);
7138 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7139 if (!Dest->hasAddressTaken())
7140 RemovedSuccs.insert(Dest);
7141 Dest->removePredecessor(BB);
7142 IBI->removeDestination(i);
7143 --i;
7144 --e;
7145 Changed = true;
7146 }
7147 }
7148
7149 if (DTU) {
7150 std::vector<DominatorTree::UpdateType> Updates;
7151 Updates.reserve(RemovedSuccs.size());
7152 for (auto *RemovedSucc : RemovedSuccs)
7153 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7154 DTU->applyUpdates(Updates);
7155 }
7156
7157 if (IBI->getNumDestinations() == 0) {
7158 // If the indirectbr has no successors, change it to unreachable.
7159 new UnreachableInst(IBI->getContext(), IBI->getIterator());
7161 return true;
7162 }
7163
7164 if (IBI->getNumDestinations() == 1) {
7165 // If the indirectbr has one successor, change it to a direct branch.
7168 return true;
7169 }
7170
7171 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7172 if (SimplifyIndirectBrOnSelect(IBI, SI))
7173 return requestResimplify();
7174 }
7175 return Changed;
7176}
7177
7178/// Given an block with only a single landing pad and a unconditional branch
7179/// try to find another basic block which this one can be merged with. This
7180/// handles cases where we have multiple invokes with unique landing pads, but
7181/// a shared handler.
7182///
7183/// We specifically choose to not worry about merging non-empty blocks
7184/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7185/// practice, the optimizer produces empty landing pad blocks quite frequently
7186/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7187/// sinking in this file)
7188///
7189/// This is primarily a code size optimization. We need to avoid performing
7190/// any transform which might inhibit optimization (such as our ability to
7191/// specialize a particular handler via tail commoning). We do this by not
7192/// merging any blocks which require us to introduce a phi. Since the same
7193/// values are flowing through both blocks, we don't lose any ability to
7194/// specialize. If anything, we make such specialization more likely.
7195///
7196/// TODO - This transformation could remove entries from a phi in the target
7197/// block when the inputs in the phi are the same for the two blocks being
7198/// merged. In some cases, this could result in removal of the PHI entirely.
7200 BasicBlock *BB, DomTreeUpdater *DTU) {
7201 auto Succ = BB->getUniqueSuccessor();
7202 assert(Succ);
7203 // If there's a phi in the successor block, we'd likely have to introduce
7204 // a phi into the merged landing pad block.
7205 if (isa<PHINode>(*Succ->begin()))
7206 return false;
7207
7208 for (BasicBlock *OtherPred : predecessors(Succ)) {
7209 if (BB == OtherPred)
7210 continue;
7211 BasicBlock::iterator I = OtherPred->begin();
7212 LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7213 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7214 continue;
7215 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7216 ;
7217 BranchInst *BI2 = dyn_cast<BranchInst>(I);
7218 if (!BI2 || !BI2->isIdenticalTo(BI))
7219 continue;
7220
7221 std::vector<DominatorTree::UpdateType> Updates;
7222
7223 // We've found an identical block. Update our predecessors to take that
7224 // path instead and make ourselves dead.
7226 for (BasicBlock *Pred : UniquePreds) {
7227 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7228 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7229 "unexpected successor");
7230 II->setUnwindDest(OtherPred);
7231 if (DTU) {
7232 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7233 Updates.push_back({DominatorTree::Delete, Pred, BB});
7234 }
7235 }
7236
7237 // The debug info in OtherPred doesn't cover the merged control flow that
7238 // used to go through BB. We need to delete it or update it.
7239 for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7240 if (isa<DbgInfoIntrinsic>(Inst))
7241 Inst.eraseFromParent();
7242
7244 for (BasicBlock *Succ : UniqueSuccs) {
7245 Succ->removePredecessor(BB);
7246 if (DTU)
7247 Updates.push_back({DominatorTree::Delete, BB, Succ});
7248 }
7249
7250 IRBuilder<> Builder(BI);
7251 Builder.CreateUnreachable();
7252 BI->eraseFromParent();
7253 if (DTU)
7254 DTU->applyUpdates(Updates);
7255 return true;
7256 }
7257 return false;
7258}
7259
7260bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7261 return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7262 : simplifyCondBranch(Branch, Builder);
7263}
7264
7265bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7266 IRBuilder<> &Builder) {
7267 BasicBlock *BB = BI->getParent();
7268 BasicBlock *Succ = BI->getSuccessor(0);
7269
7270 // If the Terminator is the only non-phi instruction, simplify the block.
7271 // If LoopHeader is provided, check if the block or its successor is a loop
7272 // header. (This is for early invocations before loop simplify and
7273 // vectorization to keep canonical loop forms for nested loops. These blocks
7274 // can be eliminated when the pass is invoked later in the back-end.)
7275 // Note that if BB has only one predecessor then we do not introduce new
7276 // backedge, so we can eliminate BB.
7277 bool NeedCanonicalLoop =
7278 Options.NeedCanonicalLoop &&
7279 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7280 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7282 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7283 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7284 return true;
7285
7286 // If the only instruction in the block is a seteq/setne comparison against a
7287 // constant, try to simplify the block.
7288 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7289 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7290 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7291 ;
7292 if (I->isTerminator() &&
7293 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7294 return true;
7295 }
7296
7297 // See if we can merge an empty landing pad block with another which is
7298 // equivalent.
7299 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7300 for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7301 ;
7302 if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7303 return true;
7304 }
7305
7306 // If this basic block is ONLY a compare and a branch, and if a predecessor
7307 // branches to us and our successor, fold the comparison into the
7308 // predecessor and use logical operations to update the incoming value
7309 // for PHI nodes in common successor.
7310 if (Options.SpeculateBlocks &&
7311 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7312 Options.BonusInstThreshold))
7313 return requestResimplify();
7314 return false;
7315}
7316
7318 BasicBlock *PredPred = nullptr;
7319 for (auto *P : predecessors(BB)) {
7320 BasicBlock *PPred = P->getSinglePredecessor();
7321 if (!PPred || (PredPred && PredPred != PPred))
7322 return nullptr;
7323 PredPred = PPred;
7324 }
7325 return PredPred;
7326}
7327
7328bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7329 assert(
7330 !isa<ConstantInt>(BI->getCondition()) &&
7331 BI->getSuccessor(0) != BI->getSuccessor(1) &&
7332 "Tautological conditional branch should have been eliminated already.");
7333
7334 BasicBlock *BB = BI->getParent();
7335 if (!Options.SimplifyCondBranch ||
7336 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7337 return false;
7338
7339 // Conditional branch
7340 if (isValueEqualityComparison(BI)) {
7341 // If we only have one predecessor, and if it is a branch on this value,
7342 // see if that predecessor totally determines the outcome of this
7343 // switch.
7344 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7345 if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7346 return requestResimplify();
7347
7348 // This block must be empty, except for the setcond inst, if it exists.
7349 // Ignore dbg and pseudo intrinsics.
7350 auto I = BB->instructionsWithoutDebug(true).begin();
7351 if (&*I == BI) {
7352 if (FoldValueComparisonIntoPredecessors(BI, Builder))
7353 return requestResimplify();
7354 } else if (&*I == cast<Instruction>(BI->getCondition())) {
7355 ++I;
7356 if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7357 return requestResimplify();
7358 }
7359 }
7360
7361 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7362 if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7363 return true;
7364
7365 // If this basic block has dominating predecessor blocks and the dominating
7366 // blocks' conditions imply BI's condition, we know the direction of BI.
7367 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7368 if (Imp) {
7369 // Turn this into a branch on constant.
7370 auto *OldCond = BI->getCondition();
7371 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7372 : ConstantInt::getFalse(BB->getContext());
7373 BI->setCondition(TorF);
7375 return requestResimplify();
7376 }
7377
7378 // If this basic block is ONLY a compare and a branch, and if a predecessor
7379 // branches to us and one of our successors, fold the comparison into the
7380 // predecessor and use logical operations to pick the right destination.
7381 if (Options.SpeculateBlocks &&
7382 FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7383 Options.BonusInstThreshold))
7384 return requestResimplify();
7385
7386 // We have a conditional branch to two blocks that are only reachable
7387 // from BI. We know that the condbr dominates the two blocks, so see if
7388 // there is any identical code in the "then" and "else" blocks. If so, we
7389 // can hoist it up to the branching block.
7390 if (BI->getSuccessor(0)->getSinglePredecessor()) {
7391 if (BI->getSuccessor(1)->getSinglePredecessor()) {
7392 if (HoistCommon && hoistCommonCodeFromSuccessors(
7393 BI->getParent(), !Options.HoistCommonInsts))
7394 return requestResimplify();
7395 } else {
7396 // If Successor #1 has multiple preds, we may be able to conditionally
7397 // execute Successor #0 if it branches to Successor #1.
7398 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7399 if (Succ0TI->getNumSuccessors() == 1 &&
7400 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7401 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7402 return requestResimplify();
7403 }
7404 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7405 // If Successor #0 has multiple preds, we may be able to conditionally
7406 // execute Successor #1 if it branches to Successor #0.
7407 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7408 if (Succ1TI->getNumSuccessors() == 1 &&
7409 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7410 if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7411 return requestResimplify();
7412 }
7413
7414 // If this is a branch on something for which we know the constant value in
7415 // predecessors (e.g. a phi node in the current block), thread control
7416 // through this block.
7418 return requestResimplify();
7419
7420 // Scan predecessor blocks for conditional branches.
7421 for (BasicBlock *Pred : predecessors(BB))
7422 if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7423 if (PBI != BI && PBI->isConditional())
7424 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7425 return requestResimplify();
7426
7427 // Look for diamond patterns.
7428 if (MergeCondStores)
7430 if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7431 if (PBI != BI && PBI->isConditional())
7432 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7433 return requestResimplify();
7434
7435 return false;
7436}
7437
7438/// Check if passing a value to an instruction will cause undefined behavior.
7439static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7440 Constant *C = dyn_cast<Constant>(V);
7441 if (!C)
7442 return false;
7443
7444 if (I->use_empty())
7445 return false;
7446
7447 if (C->isNullValue() || isa<UndefValue>(C)) {
7448 // Only look at the first use, avoid hurting compile time with long uselists
7449 auto *Use = cast<Instruction>(*I->user_begin());
7450 // Bail out if Use is not in the same BB as I or Use == I or Use comes
7451 // before I in the block. The latter two can be the case if Use is a PHI
7452 // node.
7453 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7454 return false;
7455
7456 // Now make sure that there are no instructions in between that can alter
7457 // control flow (eg. calls)
7458 auto InstrRange =
7459 make_range(std::next(I->getIterator()), Use->getIterator());
7460 if (any_of(InstrRange, [](Instruction &I) {
7462 }))
7463 return false;
7464
7465 // Look through GEPs. A load from a GEP derived from NULL is still undefined
7466 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7467 if (GEP->getPointerOperand() == I) {
7468 // The current base address is null, there are four cases to consider:
7469 // getelementptr (TY, null, 0) -> null
7470 // getelementptr (TY, null, not zero) -> may be modified
7471 // getelementptr inbounds (TY, null, 0) -> null
7472 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
7473 // undefined?
7474 if (!GEP->hasAllZeroIndices() &&
7475 (!GEP->isInBounds() ||
7476 NullPointerIsDefined(GEP->getFunction(),
7477 GEP->getPointerAddressSpace())))
7478 PtrValueMayBeModified = true;
7479 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7480 }
7481
7482 // Look through return.
7483 if (ReturnInst *Ret = dyn_cast<ReturnInst>(Use)) {
7484 bool HasNoUndefAttr =
7485 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
7486 // Return undefined to a noundef return value is undefined.
7487 if (isa<UndefValue>(C) && HasNoUndefAttr)
7488 return true;
7489 // Return null to a nonnull+noundef return value is undefined.
7490 if (C->isNullValue() && HasNoUndefAttr &&
7491 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
7492 return !PtrValueMayBeModified;
7493 }
7494 }
7495
7496 // Look through bitcasts.
7497 if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7498 return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7499
7500 // Load from null is undefined.
7501 if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7502 if (!LI->isVolatile())
7503 return !NullPointerIsDefined(LI->getFunction(),
7504 LI->getPointerAddressSpace());
7505
7506 // Store to null is undefined.
7507 if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7508 if (!SI->isVolatile())
7509 return (!NullPointerIsDefined(SI->getFunction(),
7510 SI->getPointerAddressSpace())) &&
7511 SI->getPointerOperand() == I;
7512
7513 if (auto *CB = dyn_cast<CallBase>(Use)) {
7514 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7515 return false;
7516 // A call to null is undefined.
7517 if (CB->getCalledOperand() == I)
7518 return true;
7519
7520 if (C->isNullValue()) {
7521 for (const llvm::Use &Arg : CB->args())
7522 if (Arg == I) {
7523 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7524 if (CB->isPassingUndefUB(ArgIdx) &&
7525 CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7526 // Passing null to a nonnnull+noundef argument is undefined.
7527 return !PtrValueMayBeModified;
7528 }
7529 }
7530 } else if (isa<UndefValue>(C)) {
7531 // Passing undef to a noundef argument is undefined.
7532 for (const llvm::Use &Arg : CB->args())
7533 if (Arg == I) {
7534 unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7535 if (CB->isPassingUndefUB(ArgIdx)) {
7536 // Passing undef to a noundef argument is undefined.
7537 return true;
7538 }
7539 }
7540 }
7541 }
7542 }
7543 return false;
7544}
7545
7546/// If BB has an incoming value that will always trigger undefined behavior
7547/// (eg. null pointer dereference), remove the branch leading here.
7549 DomTreeUpdater *DTU,
7550 AssumptionCache *AC) {
7551 for (PHINode &PHI : BB->phis())
7552 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7553 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7554 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7555 Instruction *T = Predecessor->getTerminator();
7556 IRBuilder<> Builder(T);
7557 if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7558 BB->removePredecessor(Predecessor);
7559 // Turn unconditional branches into unreachables and remove the dead
7560 // destination from conditional branches.
7561 if (BI->isUnconditional())
7562 Builder.CreateUnreachable();
7563 else {
7564 // Preserve guarding condition in assume, because it might not be
7565 // inferrable from any dominating condition.
7566 Value *Cond = BI->getCondition();
7567 CallInst *Assumption;
7568 if (BI->getSuccessor(0) == BB)
7569 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7570 else
7571 Assumption = Builder.CreateAssumption(Cond);
7572 if (AC)
7573 AC->registerAssumption(cast<AssumeInst>(Assumption));
7574 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7575 : BI->getSuccessor(0));
7576 }
7577 BI->eraseFromParent();
7578 if (DTU)
7579 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7580 return true;
7581 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7582 // Redirect all branches leading to UB into
7583 // a newly created unreachable block.
7584 BasicBlock *Unreachable = BasicBlock::Create(
7585 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7586 Builder.SetInsertPoint(Unreachable);
7587 // The new block contains only one instruction: Unreachable
7588 Builder.CreateUnreachable();
7589 for (const auto &Case : SI->cases())
7590 if (Case.getCaseSuccessor() == BB) {
7591 BB->removePredecessor(Predecessor);
7592 Case.setSuccessor(Unreachable);
7593 }
7594 if (SI->getDefaultDest() == BB) {
7595 BB->removePredecessor(Predecessor);
7596 SI->setDefaultDest(Unreachable);
7597 }
7598
7599 if (DTU)
7600 DTU->applyUpdates(
7601 { { DominatorTree::Insert, Predecessor, Unreachable },
7602 { DominatorTree::Delete, Predecessor, BB } });
7603 return true;
7604 }
7605 }
7606
7607 return false;
7608}
7609
7610bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7611 bool Changed = false;
7612
7613 assert(BB && BB->getParent() && "Block not embedded in function!");
7614 assert(BB->getTerminator() && "Degenerate basic block encountered!");
7615
7616 // Remove basic blocks that have no predecessors (except the entry block)...
7617 // or that just have themself as a predecessor. These are unreachable.
7618 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7619 BB->getSinglePredecessor() == BB) {
7620 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7621 DeleteDeadBlock(BB, DTU);
7622 return true;
7623 }
7624
7625 // Check to see if we can constant propagate this terminator instruction
7626 // away...
7627 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7628 /*TLI=*/nullptr, DTU);
7629
7630 // Check for and eliminate duplicate PHI nodes in this block.
7631 Changed |= EliminateDuplicatePHINodes(BB);
7632
7633 // Check for and remove branches that will always cause undefined behavior.
7635 return requestResimplify();
7636
7637 // Merge basic blocks into their predecessor if there is only one distinct
7638 // pred, and if there is only one distinct successor of the predecessor, and
7639 // if there are no PHI nodes.
7640 if (MergeBlockIntoPredecessor(BB, DTU))
7641 return true;
7642
7643 if (SinkCommon && Options.SinkCommonInsts)
7644 if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7645 MergeCompatibleInvokes(BB, DTU)) {
7646 // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7647 // so we may now how duplicate PHI's.
7648 // Let's rerun EliminateDuplicatePHINodes() first,
7649 // before FoldTwoEntryPHINode() potentially converts them into select's,
7650 // after which we'd need a whole EarlyCSE pass run to cleanup them.
7651 return true;
7652 }
7653
7654 IRBuilder<> Builder(BB);
7655
7656 if (Options.SpeculateBlocks &&
7657 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7658 // If there is a trivial two-entry PHI node in this basic block, and we can
7659 // eliminate it, do so now.
7660 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7661 if (PN->getNumIncomingValues() == 2)
7662 if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7663 return true;
7664 }
7665
7667 Builder.SetInsertPoint(Terminator);
7668 switch (Terminator->getOpcode()) {
7669 case Instruction::Br:
7670 Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7671 break;
7672 case Instruction::Resume:
7673 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7674 break;
7675 case Instruction::CleanupRet:
7676 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7677 break;
7678 case Instruction::Switch:
7679 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7680 break;
7681 case Instruction::Unreachable:
7682 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7683 break;
7684 case Instruction::IndirectBr:
7685 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7686 break;
7687 }
7688
7689 return Changed;
7690}
7691
7692bool SimplifyCFGOpt::run(BasicBlock *BB) {
7693 bool Changed = false;
7694
7695 // Repeated simplify BB as long as resimplification is requested.
7696 do {
7697 Resimplify = false;
7698
7699 // Perform one round of simplifcation. Resimplify flag will be set if
7700 // another iteration is requested.
7701 Changed |= simplifyOnce(BB);
7702 } while (Resimplify);
7703
7704 return Changed;
7705}
7706
7709 ArrayRef<WeakVH> LoopHeaders) {
7710 return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7711 Options)
7712 .run(BB);
7713}
#define Fail
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
uint64_t Size
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1290
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
hexagon gen pred
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static LVOptions Options
Definition: LVOptions.cpp:25
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB)
Return true if we can thread a branch across this block.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static Constant * LookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static void GetBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static ConstantInt * GetConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static void EliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static std::optional< bool > FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
static PHINode * FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}{Tru...
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static void setBranchWeights(SwitchInst *SI, ArrayRef< uint32_t > Weights)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder)
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap< PHINode *, Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool IncomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange)
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool ForwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU)
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static void FitWeights(MutableArrayRef< uint64_t > Weights)
Keep halving the weights until all can fit in uint32_t.
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static unsigned skippedInstrFlags(Instruction *I)
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static bool ValuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< Instruction *, SmallVector< Value *, 4 > > &PHIOperands)
static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static void MergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool ShouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallDenseMap< PHINode *, Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DPValues from I1 and OtherInstrs that are identical in lock-step to TI.
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool CasesAreContiguous(SmallVectorImpl< ConstantInt * > &Cases)
static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static bool isLifeTimeMarker(const Instruction *I)
static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1144
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:334
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1947
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & back() const
back - Get the last element.
Definition: ArrayRef.h:174
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
A cache of @llvm.assume calls within a function.
void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:335
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Definition: BasicBlock.cpp:234
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:639
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:347
const Instruction & front() const
Definition: BasicBlock.h:452
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:461
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:477
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:439
const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
Definition: BasicBlock.cpp:311
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:447
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:469
void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
Definition: BasicBlock.cpp:699
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:366
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:155
void insertDbgRecordBefore(DbgRecord *DPV, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
bool isLandingPad() const
Return true if this basic block is a landing pad.
Definition: BasicBlock.cpp:659
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
Definition: BasicBlock.cpp:465
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:612
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:276
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:496
This class represents a no-op cast from one type to another.
The address of a basic block.
Definition: Constants.h:888
BasicBlock * getBasicBlock() const
Definition: Constants.h:917
Conditional or Unconditional Branch instruction.
iterator_range< succ_op_iterator > successors()
void setCondition(Value *V)
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Value * getCondition() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1770
bool cannotMerge() const
Determine if the call cannot be tail merged.
Definition: InstrTypes.h:2237
bool isIndirectCall() const
Return true if the callsite is an indirect call.
Value * getCalledOperand() const
Definition: InstrTypes.h:1696
Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
This class represents a function call, abstracting a target machine's calling convention.
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2404
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2525
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bool isNegative() const
Definition: Constants.h:199
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:254
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition: Constants.h:183
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:147
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
This class represents a range of values.
Definition: ConstantRange.h:47
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
bool isEmptySet() const
Return true if this set contains no members.
bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
ConstantRange inverse() const
Return a new range that is the logical not of the current set.
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:90
Debug location.
static DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
static DILocation * getMergedLocation(DILocation *LocA, DILocation *LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Record of a variable value-assignment, aka a non instruction representation of the dbg....
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Base class for non-instruction debug metadata records that have positions within IR.
simple_ilist< DbgRecord >::iterator self_iterator
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
bool hasPostDomTree() const
Returns true if it holds a PostDominatorTree.
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:695
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
iterator begin()
Definition: Function.h:798
size_t size() const
Definition: Function.h:803
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
This instruction compares its operands according to the predicate given to the constructor.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2240
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2028
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1257
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1875
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:227
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1137
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2224
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1789
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
CallInst * CreateAssumption(Value *Cond, ArrayRef< OperandBundleDef > OpBundles=std::nullopt)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Definition: IRBuilder.cpp:551
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1802
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1108
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1670
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1676
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1355
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
Definition: Instruction.h:83
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:148
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:358
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
Definition: Instruction.h:254
void dropUBImplyingAttrsAndMetadata()
Drop any attributes or metadata that can cause immediate undefined behavior.
bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1704
bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:929
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
Invoke instruction.
BasicBlock * getUnwindDest() const
void setNormalDest(BasicBlock *B)
void setUnwindDest(BasicBlock *B)
BasicBlock * getNormalDest() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
Definition: Instructions.h:184
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
size_type size() const
Definition: MapVector.h:60
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:295
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents a cast from a pointer to an integer.
Resume the propagation of an exception.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Align getAlign() const
Definition: Instructions.h:369
bool isSimple() const
Definition: Instructions.h:406
Value * getValueOperand()
Definition: Instructions.h:414
bool isUnordered() const
Definition: Instructions.h:408
Value * getPointerOperand()
Definition: Instructions.h:417
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
std::optional< uint32_t > CaseWeightOpt
SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
BasicBlock * getSuccessor(unsigned idx) const
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:882
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
bool user_empty() const
Definition: Value.h:385
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A range adaptor for a pair of iterators.
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ArchKind & operator--(ArchKind &Kind)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_any_apint > m_AnyIntegralConstant()
Match an integer or vector with any integral constant.
Definition: PatternMatch.h:460
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:800
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
Definition: DebugInfo.cpp:1787
SmallVector< DPValue * > getDPVAssignmentMarkers(const Instruction *Inst)
Definition: DebugInfo.h:234
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
void RemapDPValue(Module *M, DPValue *V, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DPValue V using the value map VM.
Definition: ValueMapper.h:271
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1724
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
bool succ_empty(const Instruction *I)
Definition: CFG.h:255
bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
BranchInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1777
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2068
constexpr bool has_single_bit(T Value) noexcept
Definition: bit.h:146
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
Definition: BasicBlock.cpp:680
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition: Local.cpp:1113
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder)
bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
Constant * ConstantFoldInstOperands(Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:2014
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1745
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition: STLExtras.h:1431
Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition: Local.cpp:3142
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3310
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
Definition: GuardUtils.cpp:26
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition: Local.cpp:3555
@ And
Bitwise or logical AND of integers.
void RemapDPValueRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Remap the Values used in the DPValue V using the value map VM.
Definition: ValueMapper.h:279
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4031
auto max_element(R &&Range)
Definition: STLExtras.h:1995
bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition: Sequence.h:305
unsigned succ_size(const MachineBasicBlock *BB)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1616
bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DPValue types only and downcast.
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition: Local.cpp:1480
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:349
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
AAMDNodes merge(const AAMDNodes &Other) const
Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254