LLVM 18.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
46/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
47/// reusing an existing cast if a suitable one (= dominating IP) exists, or
48/// creating a new one.
49Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
52 // This function must be called with the builder having a valid insertion
53 // point. It doesn't need to be the actual IP where the uses of the returned
54 // cast will be added, but it must dominate such IP.
55 // We use this precondition to produce a cast that will dominate all its
56 // uses. In particular, this is crucial for the case where the builder's
57 // insertion point *is* the point where we were asked to put the cast.
58 // Since we don't know the builder's insertion point is actually
59 // where the uses will be added (only that it dominates it), we are
60 // not allowed to move it.
62
63 Value *Ret = nullptr;
64
65 // Check to see if there is already a cast!
66 for (User *U : V->users()) {
67 if (U->getType() != Ty)
68 continue;
69 CastInst *CI = dyn_cast<CastInst>(U);
70 if (!CI || CI->getOpcode() != Op)
71 continue;
72
73 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
74 // the cast must also properly dominate the Builder's insertion point.
75 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
76 (&*IP == CI || CI->comesBefore(&*IP))) {
77 Ret = CI;
78 break;
79 }
80 }
81
82 // Create a new cast.
83 if (!Ret) {
84 SCEVInsertPointGuard Guard(Builder, this);
85 Builder.SetInsertPoint(&*IP);
86 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
87 }
88
89 // We assert at the end of the function since IP might point to an
90 // instruction with different dominance properties than a cast
91 // (an invoke for example) and not dominate BIP (but the cast does).
92 assert(!isa<Instruction>(Ret) ||
93 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
94
95 return Ret;
96}
97
100 Instruction *MustDominate) const {
101 BasicBlock::iterator IP = ++I->getIterator();
102 if (auto *II = dyn_cast<InvokeInst>(I))
103 IP = II->getNormalDest()->begin();
104
105 while (isa<PHINode>(IP))
106 ++IP;
107
108 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
109 ++IP;
110 } else if (isa<CatchSwitchInst>(IP)) {
111 IP = MustDominate->getParent()->getFirstInsertionPt();
112 } else {
113 assert(!IP->isEHPad() && "unexpected eh pad!");
114 }
115
116 // Adjust insert point to be after instructions inserted by the expander, so
117 // we can re-use already inserted instructions. Avoid skipping past the
118 // original \p MustDominate, in case it is an inserted instruction.
119 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
120 ++IP;
121
122 return IP;
123}
124
126SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
127 // Cast the argument at the beginning of the entry block, after
128 // any bitcasts of other arguments.
129 if (Argument *A = dyn_cast<Argument>(V)) {
130 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
131 while ((isa<BitCastInst>(IP) &&
132 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
133 cast<BitCastInst>(IP)->getOperand(0) != A) ||
134 isa<DbgInfoIntrinsic>(IP))
135 ++IP;
136 return IP;
137 }
138
139 // Cast the instruction immediately after the instruction.
140 if (Instruction *I = dyn_cast<Instruction>(V))
141 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
142
143 // Otherwise, this must be some kind of a constant,
144 // so let's plop this cast into the function's entry block.
145 assert(isa<Constant>(V) &&
146 "Expected the cast argument to be a global/constant");
147 return Builder.GetInsertBlock()
148 ->getParent()
149 ->getEntryBlock()
151}
152
153/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
154/// which must be possible with a noop cast, doing what we can to share
155/// the casts.
156Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
157 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
158 assert((Op == Instruction::BitCast ||
159 Op == Instruction::PtrToInt ||
160 Op == Instruction::IntToPtr) &&
161 "InsertNoopCastOfTo cannot perform non-noop casts!");
162 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
163 "InsertNoopCastOfTo cannot change sizes!");
164
165 // inttoptr only works for integral pointers. For non-integral pointers, we
166 // can create a GEP on null with the integral value as index. Note that
167 // it is safe to use GEP of null instead of inttoptr here, because only
168 // expressions already based on a GEP of null should be converted to pointers
169 // during expansion.
170 if (Op == Instruction::IntToPtr) {
171 auto *PtrTy = cast<PointerType>(Ty);
172 if (DL.isNonIntegralPointerType(PtrTy)) {
173 assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
174 "alloc size of i8 must by 1 byte for the GEP to be correct");
175 return Builder.CreateGEP(
176 Builder.getInt8Ty(), Constant::getNullValue(PtrTy), V, "scevgep");
177 }
178 }
179 // Short-circuit unnecessary bitcasts.
180 if (Op == Instruction::BitCast) {
181 if (V->getType() == Ty)
182 return V;
183 if (CastInst *CI = dyn_cast<CastInst>(V)) {
184 if (CI->getOperand(0)->getType() == Ty)
185 return CI->getOperand(0);
186 }
187 }
188 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
189 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
190 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
191 if (CastInst *CI = dyn_cast<CastInst>(V))
192 if ((CI->getOpcode() == Instruction::PtrToInt ||
193 CI->getOpcode() == Instruction::IntToPtr) &&
194 SE.getTypeSizeInBits(CI->getType()) ==
196 return CI->getOperand(0);
197 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
198 if ((CE->getOpcode() == Instruction::PtrToInt ||
199 CE->getOpcode() == Instruction::IntToPtr) &&
200 SE.getTypeSizeInBits(CE->getType()) ==
201 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
202 return CE->getOperand(0);
203 }
204
205 // Fold a cast of a constant.
206 if (Constant *C = dyn_cast<Constant>(V))
207 return ConstantExpr::getCast(Op, C, Ty);
208
209 // Try to reuse existing cast, or insert one.
210 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
211}
212
213/// InsertBinop - Insert the specified binary operator, doing a small amount
214/// of work to avoid inserting an obviously redundant operation, and hoisting
215/// to an outer loop when the opportunity is there and it is safe.
216Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
217 Value *LHS, Value *RHS,
218 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
219 // Fold a binop with constant operands.
220 if (Constant *CLHS = dyn_cast<Constant>(LHS))
221 if (Constant *CRHS = dyn_cast<Constant>(RHS))
222 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
223 return Res;
224
225 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
226 unsigned ScanLimit = 6;
227 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
228 // Scanning starts from the last instruction before the insertion point.
230 if (IP != BlockBegin) {
231 --IP;
232 for (; ScanLimit; --IP, --ScanLimit) {
233 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
234 // generated code.
235 if (isa<DbgInfoIntrinsic>(IP))
236 ScanLimit++;
237
238 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
239 // Ensure that no-wrap flags match.
240 if (isa<OverflowingBinaryOperator>(I)) {
241 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
242 return true;
243 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
244 return true;
245 }
246 // Conservatively, do not use any instruction which has any of exact
247 // flags installed.
248 if (isa<PossiblyExactOperator>(I) && I->isExact())
249 return true;
250 return false;
251 };
252 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
253 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
254 return &*IP;
255 if (IP == BlockBegin) break;
256 }
257 }
258
259 // Save the original insertion point so we can restore it when we're done.
260 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
261 SCEVInsertPointGuard Guard(Builder, this);
262
263 if (IsSafeToHoist) {
264 // Move the insertion point out of as many loops as we can.
265 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
266 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
267 BasicBlock *Preheader = L->getLoopPreheader();
268 if (!Preheader) break;
269
270 // Ok, move up a level.
271 Builder.SetInsertPoint(Preheader->getTerminator());
272 }
273 }
274
275 // If we haven't found this binop, insert it.
276 // TODO: Use the Builder, which will make CreateBinOp below fold with
277 // InstSimplifyFolder.
278 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
279 BO->setDebugLoc(Loc);
280 if (Flags & SCEV::FlagNUW)
282 if (Flags & SCEV::FlagNSW)
283 BO->setHasNoSignedWrap();
284
285 return BO;
286}
287
288/// expandAddToGEP - Expand an addition expression with a pointer type into
289/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
290/// BasicAliasAnalysis and other passes analyze the result. See the rules
291/// for getelementptr vs. inttoptr in
292/// http://llvm.org/docs/LangRef.html#pointeraliasing
293/// for details.
294///
295/// Design note: The correctness of using getelementptr here depends on
296/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
297/// they may introduce pointer arithmetic which may not be safely converted
298/// into getelementptr.
299///
300/// Design note: It might seem desirable for this function to be more
301/// loop-aware. If some of the indices are loop-invariant while others
302/// aren't, it might seem desirable to emit multiple GEPs, keeping the
303/// loop-invariant portions of the overall computation outside the loop.
304/// However, there are a few reasons this is not done here. Hoisting simple
305/// arithmetic is a low-level optimization that often isn't very
306/// important until late in the optimization process. In fact, passes
307/// like InstructionCombining will combine GEPs, even if it means
308/// pushing loop-invariant computation down into loops, so even if the
309/// GEPs were split here, the work would quickly be undone. The
310/// LoopStrengthReduction pass, which is usually run quite late (and
311/// after the last InstructionCombining pass), takes care of hoisting
312/// loop-invariant portions of expressions, after considering what
313/// can be folded using target addressing modes.
314///
315Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
316 assert(!isa<Instruction>(V) ||
317 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
318
319 Value *Idx = expand(Offset);
320
321 // Fold a GEP with constant operands.
322 if (Constant *CLHS = dyn_cast<Constant>(V))
323 if (Constant *CRHS = dyn_cast<Constant>(Idx))
324 return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS);
325
326 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
327 unsigned ScanLimit = 6;
328 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
329 // Scanning starts from the last instruction before the insertion point.
331 if (IP != BlockBegin) {
332 --IP;
333 for (; ScanLimit; --IP, --ScanLimit) {
334 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
335 // generated code.
336 if (isa<DbgInfoIntrinsic>(IP))
337 ScanLimit++;
338 if (IP->getOpcode() == Instruction::GetElementPtr &&
339 IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
340 cast<GEPOperator>(&*IP)->getSourceElementType() ==
341 Builder.getInt8Ty())
342 return &*IP;
343 if (IP == BlockBegin) break;
344 }
345 }
346
347 // Save the original insertion point so we can restore it when we're done.
348 SCEVInsertPointGuard Guard(Builder, this);
349
350 // Move the insertion point out of as many loops as we can.
351 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
352 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
353 BasicBlock *Preheader = L->getLoopPreheader();
354 if (!Preheader) break;
355
356 // Ok, move up a level.
357 Builder.SetInsertPoint(Preheader->getTerminator());
358 }
359
360 // Emit a GEP.
361 return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "scevgep");
362}
363
364/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
365/// SCEV expansion. If they are nested, this is the most nested. If they are
366/// neighboring, pick the later.
367static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
368 DominatorTree &DT) {
369 if (!A) return B;
370 if (!B) return A;
371 if (A->contains(B)) return B;
372 if (B->contains(A)) return A;
373 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
374 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
375 return A; // Arbitrarily break the tie.
376}
377
378/// getRelevantLoop - Get the most relevant loop associated with the given
379/// expression, according to PickMostRelevantLoop.
380const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
381 // Test whether we've already computed the most relevant loop for this SCEV.
382 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
383 if (!Pair.second)
384 return Pair.first->second;
385
386 switch (S->getSCEVType()) {
387 case scConstant:
388 case scVScale:
389 return nullptr; // A constant has no relevant loops.
390 case scTruncate:
391 case scZeroExtend:
392 case scSignExtend:
393 case scPtrToInt:
394 case scAddExpr:
395 case scMulExpr:
396 case scUDivExpr:
397 case scAddRecExpr:
398 case scUMaxExpr:
399 case scSMaxExpr:
400 case scUMinExpr:
401 case scSMinExpr:
403 const Loop *L = nullptr;
404 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
405 L = AR->getLoop();
406 for (const SCEV *Op : S->operands())
407 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
408 return RelevantLoops[S] = L;
409 }
410 case scUnknown: {
411 const SCEVUnknown *U = cast<SCEVUnknown>(S);
412 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
413 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
414 // A non-instruction has no relevant loops.
415 return nullptr;
416 }
418 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
419 }
420 llvm_unreachable("Unexpected SCEV type!");
421}
422
423namespace {
424
425/// LoopCompare - Compare loops by PickMostRelevantLoop.
426class LoopCompare {
427 DominatorTree &DT;
428public:
429 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
430
431 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
432 std::pair<const Loop *, const SCEV *> RHS) const {
433 // Keep pointer operands sorted at the end.
434 if (LHS.second->getType()->isPointerTy() !=
435 RHS.second->getType()->isPointerTy())
436 return LHS.second->getType()->isPointerTy();
437
438 // Compare loops with PickMostRelevantLoop.
439 if (LHS.first != RHS.first)
440 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
441
442 // If one operand is a non-constant negative and the other is not,
443 // put the non-constant negative on the right so that a sub can
444 // be used instead of a negate and add.
445 if (LHS.second->isNonConstantNegative()) {
446 if (!RHS.second->isNonConstantNegative())
447 return false;
448 } else if (RHS.second->isNonConstantNegative())
449 return true;
450
451 // Otherwise they are equivalent according to this comparison.
452 return false;
453 }
454};
455
456}
457
458Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
459 // Collect all the add operands in a loop, along with their associated loops.
460 // Iterate in reverse so that constants are emitted last, all else equal, and
461 // so that pointer operands are inserted first, which the code below relies on
462 // to form more involved GEPs.
464 for (const SCEV *Op : reverse(S->operands()))
465 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
466
467 // Sort by loop. Use a stable sort so that constants follow non-constants and
468 // pointer operands precede non-pointer operands.
469 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
470
471 // Emit instructions to add all the operands. Hoist as much as possible
472 // out of loops, and form meaningful getelementptrs where possible.
473 Value *Sum = nullptr;
474 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
475 const Loop *CurLoop = I->first;
476 const SCEV *Op = I->second;
477 if (!Sum) {
478 // This is the first operand. Just expand it.
479 Sum = expand(Op);
480 ++I;
481 continue;
482 }
483
484 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
485 if (isa<PointerType>(Sum->getType())) {
486 // The running sum expression is a pointer. Try to form a getelementptr
487 // at this level with that as the base.
489 for (; I != E && I->first == CurLoop; ++I) {
490 // If the operand is SCEVUnknown and not instructions, peek through
491 // it, to enable more of it to be folded into the GEP.
492 const SCEV *X = I->second;
493 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
494 if (!isa<Instruction>(U->getValue()))
495 X = SE.getSCEV(U->getValue());
496 NewOps.push_back(X);
497 }
498 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
499 } else if (Op->isNonConstantNegative()) {
500 // Instead of doing a negate and add, just do a subtract.
501 Value *W = expand(SE.getNegativeSCEV(Op));
502 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
503 /*IsSafeToHoist*/ true);
504 ++I;
505 } else {
506 // A simple add.
507 Value *W = expand(Op);
508 // Canonicalize a constant to the RHS.
509 if (isa<Constant>(Sum))
510 std::swap(Sum, W);
511 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
512 /*IsSafeToHoist*/ true);
513 ++I;
514 }
515 }
516
517 return Sum;
518}
519
520Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
521 Type *Ty = S->getType();
522
523 // Collect all the mul operands in a loop, along with their associated loops.
524 // Iterate in reverse so that constants are emitted last, all else equal.
526 for (const SCEV *Op : reverse(S->operands()))
527 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
528
529 // Sort by loop. Use a stable sort so that constants follow non-constants.
530 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
531
532 // Emit instructions to mul all the operands. Hoist as much as possible
533 // out of loops.
534 Value *Prod = nullptr;
535 auto I = OpsAndLoops.begin();
536
537 // Expand the calculation of X pow N in the following manner:
538 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
539 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
540 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
541 auto E = I;
542 // Calculate how many times the same operand from the same loop is included
543 // into this power.
544 uint64_t Exponent = 0;
545 const uint64_t MaxExponent = UINT64_MAX >> 1;
546 // No one sane will ever try to calculate such huge exponents, but if we
547 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
548 // below when the power of 2 exceeds our Exponent, and we want it to be
549 // 1u << 31 at most to not deal with unsigned overflow.
550 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
551 ++Exponent;
552 ++E;
553 }
554 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
555
556 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
557 // that are needed into the result.
558 Value *P = expand(I->second);
559 Value *Result = nullptr;
560 if (Exponent & 1)
561 Result = P;
562 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
563 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
564 /*IsSafeToHoist*/ true);
565 if (Exponent & BinExp)
566 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
568 /*IsSafeToHoist*/ true)
569 : P;
570 }
571
572 I = E;
573 assert(Result && "Nothing was expanded?");
574 return Result;
575 };
576
577 while (I != OpsAndLoops.end()) {
578 if (!Prod) {
579 // This is the first operand. Just expand it.
580 Prod = ExpandOpBinPowN();
581 } else if (I->second->isAllOnesValue()) {
582 // Instead of doing a multiply by negative one, just do a negate.
583 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
584 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
585 ++I;
586 } else {
587 // A simple mul.
588 Value *W = ExpandOpBinPowN();
589 // Canonicalize a constant to the RHS.
590 if (isa<Constant>(Prod)) std::swap(Prod, W);
591 const APInt *RHS;
592 if (match(W, m_Power2(RHS))) {
593 // Canonicalize Prod*(1<<C) to Prod<<C.
594 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
595 auto NWFlags = S->getNoWrapFlags();
596 // clear nsw flag if shl will produce poison value.
597 if (RHS->logBase2() == RHS->getBitWidth() - 1)
598 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
599 Prod = InsertBinop(Instruction::Shl, Prod,
600 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
601 /*IsSafeToHoist*/ true);
602 } else {
603 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
604 /*IsSafeToHoist*/ true);
605 }
606 }
607 }
608
609 return Prod;
610}
611
612Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
613 Value *LHS = expand(S->getLHS());
614 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
615 const APInt &RHS = SC->getAPInt();
616 if (RHS.isPowerOf2())
617 return InsertBinop(Instruction::LShr, LHS,
618 ConstantInt::get(SC->getType(), RHS.logBase2()),
619 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
620 }
621
622 Value *RHS = expand(S->getRHS());
623 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
624 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
625}
626
627/// Determine if this is a well-behaved chain of instructions leading back to
628/// the PHI. If so, it may be reused by expanded expressions.
629bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
630 const Loop *L) {
631 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
632 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
633 return false;
634 // If any of the operands don't dominate the insert position, bail.
635 // Addrec operands are always loop-invariant, so this can only happen
636 // if there are instructions which haven't been hoisted.
637 if (L == IVIncInsertLoop) {
638 for (Use &Op : llvm::drop_begin(IncV->operands()))
639 if (Instruction *OInst = dyn_cast<Instruction>(Op))
640 if (!SE.DT.dominates(OInst, IVIncInsertPos))
641 return false;
642 }
643 // Advance to the next instruction.
644 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
645 if (!IncV)
646 return false;
647
648 if (IncV->mayHaveSideEffects())
649 return false;
650
651 if (IncV == PN)
652 return true;
653
654 return isNormalAddRecExprPHI(PN, IncV, L);
655}
656
657/// getIVIncOperand returns an induction variable increment's induction
658/// variable operand.
659///
660/// If allowScale is set, any type of GEP is allowed as long as the nonIV
661/// operands dominate InsertPos.
662///
663/// If allowScale is not set, ensure that a GEP increment conforms to one of the
664/// simple patterns generated by getAddRecExprPHILiterally and
665/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
667 Instruction *InsertPos,
668 bool allowScale) {
669 if (IncV == InsertPos)
670 return nullptr;
671
672 switch (IncV->getOpcode()) {
673 default:
674 return nullptr;
675 // Check for a simple Add/Sub or GEP of a loop invariant step.
676 case Instruction::Add:
677 case Instruction::Sub: {
678 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
679 if (!OInst || SE.DT.dominates(OInst, InsertPos))
680 return dyn_cast<Instruction>(IncV->getOperand(0));
681 return nullptr;
682 }
683 case Instruction::BitCast:
684 return dyn_cast<Instruction>(IncV->getOperand(0));
685 case Instruction::GetElementPtr:
686 for (Use &U : llvm::drop_begin(IncV->operands())) {
687 if (isa<Constant>(U))
688 continue;
689 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
690 if (!SE.DT.dominates(OInst, InsertPos))
691 return nullptr;
692 }
693 if (allowScale) {
694 // allow any kind of GEP as long as it can be hoisted.
695 continue;
696 }
697 // GEPs produced by SCEVExpander use i8 element type.
698 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
699 return nullptr;
700 break;
701 }
702 return dyn_cast<Instruction>(IncV->getOperand(0));
703 }
704}
705
706/// If the insert point of the current builder or any of the builders on the
707/// stack of saved builders has 'I' as its insert point, update it to point to
708/// the instruction after 'I'. This is intended to be used when the instruction
709/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
710/// different block, the inconsistent insert point (with a mismatched
711/// Instruction and Block) can lead to an instruction being inserted in a block
712/// other than its parent.
713void SCEVExpander::fixupInsertPoints(Instruction *I) {
715 BasicBlock::iterator NewInsertPt = std::next(It);
716 if (Builder.GetInsertPoint() == It)
717 Builder.SetInsertPoint(&*NewInsertPt);
718 for (auto *InsertPtGuard : InsertPointGuards)
719 if (InsertPtGuard->GetInsertPoint() == It)
720 InsertPtGuard->SetInsertPoint(NewInsertPt);
721}
722
723/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
724/// it available to other uses in this loop. Recursively hoist any operands,
725/// until we reach a value that dominates InsertPos.
727 bool RecomputePoisonFlags) {
728 auto FixupPoisonFlags = [this](Instruction *I) {
729 // Drop flags that are potentially inferred from old context and infer flags
730 // in new context.
731 I->dropPoisonGeneratingFlags();
732 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
733 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
734 auto *BO = cast<BinaryOperator>(I);
739 }
740 };
741
742 if (SE.DT.dominates(IncV, InsertPos)) {
743 if (RecomputePoisonFlags)
744 FixupPoisonFlags(IncV);
745 return true;
746 }
747
748 // InsertPos must itself dominate IncV so that IncV's new position satisfies
749 // its existing users.
750 if (isa<PHINode>(InsertPos) ||
751 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
752 return false;
753
754 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
755 return false;
756
757 // Check that the chain of IV operands leading back to Phi can be hoisted.
759 for(;;) {
760 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
761 if (!Oper)
762 return false;
763 // IncV is safe to hoist.
764 IVIncs.push_back(IncV);
765 IncV = Oper;
766 if (SE.DT.dominates(IncV, InsertPos))
767 break;
768 }
769 for (Instruction *I : llvm::reverse(IVIncs)) {
770 fixupInsertPoints(I);
771 I->moveBefore(InsertPos);
772 if (RecomputePoisonFlags)
773 FixupPoisonFlags(I);
774 }
775 return true;
776}
777
778/// Determine if this cyclic phi is in a form that would have been generated by
779/// LSR. We don't care if the phi was actually expanded in this pass, as long
780/// as it is in a low-cost form, for example, no implied multiplication. This
781/// should match any patterns generated by getAddRecExprPHILiterally and
782/// expandAddtoGEP.
783bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
784 const Loop *L) {
785 for(Instruction *IVOper = IncV;
786 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
787 /*allowScale=*/false));) {
788 if (IVOper == PN)
789 return true;
790 }
791 return false;
792}
793
794/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
795/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
796/// need to materialize IV increments elsewhere to handle difficult situations.
797Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
798 bool useSubtract) {
799 Value *IncV;
800 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
801 if (PN->getType()->isPointerTy()) {
802 IncV = expandAddToGEP(SE.getSCEV(StepV), PN);
803 } else {
804 IncV = useSubtract ?
805 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
806 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
807 }
808 return IncV;
809}
810
811/// Check whether we can cheaply express the requested SCEV in terms of
812/// the available PHI SCEV by truncation and/or inversion of the step.
814 const SCEVAddRecExpr *Phi,
815 const SCEVAddRecExpr *Requested,
816 bool &InvertStep) {
817 // We can't transform to match a pointer PHI.
818 Type *PhiTy = Phi->getType();
819 Type *RequestedTy = Requested->getType();
820 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
821 return false;
822
823 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
824 return false;
825
826 // Try truncate it if necessary.
827 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
828 if (!Phi)
829 return false;
830
831 // Check whether truncation will help.
832 if (Phi == Requested) {
833 InvertStep = false;
834 return true;
835 }
836
837 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
838 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
839 InvertStep = true;
840 return true;
841 }
842
843 return false;
844}
845
846static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
847 if (!isa<IntegerType>(AR->getType()))
848 return false;
849
850 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
851 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
852 const SCEV *Step = AR->getStepRecurrence(SE);
853 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
854 SE.getSignExtendExpr(AR, WideTy));
855 const SCEV *ExtendAfterOp =
856 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
857 return ExtendAfterOp == OpAfterExtend;
858}
859
860static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
861 if (!isa<IntegerType>(AR->getType()))
862 return false;
863
864 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
865 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
866 const SCEV *Step = AR->getStepRecurrence(SE);
867 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
868 SE.getZeroExtendExpr(AR, WideTy));
869 const SCEV *ExtendAfterOp =
870 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
871 return ExtendAfterOp == OpAfterExtend;
872}
873
874/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
875/// the base addrec, which is the addrec without any non-loop-dominating
876/// values, and return the PHI.
877PHINode *
878SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
879 const Loop *L, Type *&TruncTy,
880 bool &InvertStep) {
881 assert((!IVIncInsertLoop || IVIncInsertPos) &&
882 "Uninitialized insert position");
883
884 // Reuse a previously-inserted PHI, if present.
885 BasicBlock *LatchBlock = L->getLoopLatch();
886 if (LatchBlock) {
887 PHINode *AddRecPhiMatch = nullptr;
888 Instruction *IncV = nullptr;
889 TruncTy = nullptr;
890 InvertStep = false;
891
892 // Only try partially matching scevs that need truncation and/or
893 // step-inversion if we know this loop is outside the current loop.
894 bool TryNonMatchingSCEV =
895 IVIncInsertLoop &&
896 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
897
898 for (PHINode &PN : L->getHeader()->phis()) {
899 if (!SE.isSCEVable(PN.getType()))
900 continue;
901
902 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
903 // PHI has no meaning at all.
904 if (!PN.isComplete()) {
906 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
907 continue;
908 }
909
910 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
911 if (!PhiSCEV)
912 continue;
913
914 bool IsMatchingSCEV = PhiSCEV == Normalized;
915 // We only handle truncation and inversion of phi recurrences for the
916 // expanded expression if the expanded expression's loop dominates the
917 // loop we insert to. Check now, so we can bail out early.
918 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
919 continue;
920
921 // TODO: this possibly can be reworked to avoid this cast at all.
922 Instruction *TempIncV =
923 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
924 if (!TempIncV)
925 continue;
926
927 // Check whether we can reuse this PHI node.
928 if (LSRMode) {
929 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
930 continue;
931 } else {
932 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
933 continue;
934 }
935
936 // Stop if we have found an exact match SCEV.
937 if (IsMatchingSCEV) {
938 IncV = TempIncV;
939 TruncTy = nullptr;
940 InvertStep = false;
941 AddRecPhiMatch = &PN;
942 break;
943 }
944
945 // Try whether the phi can be translated into the requested form
946 // (truncated and/or offset by a constant).
947 if ((!TruncTy || InvertStep) &&
948 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
949 // Record the phi node. But don't stop we might find an exact match
950 // later.
951 AddRecPhiMatch = &PN;
952 IncV = TempIncV;
953 TruncTy = Normalized->getType();
954 }
955 }
956
957 if (AddRecPhiMatch) {
958 // Ok, the add recurrence looks usable.
959 // Remember this PHI, even in post-inc mode.
960 InsertedValues.insert(AddRecPhiMatch);
961 // Remember the increment.
962 rememberInstruction(IncV);
963 // Those values were not actually inserted but re-used.
964 ReusedValues.insert(AddRecPhiMatch);
965 ReusedValues.insert(IncV);
966 return AddRecPhiMatch;
967 }
968 }
969
970 // Save the original insertion point so we can restore it when we're done.
971 SCEVInsertPointGuard Guard(Builder, this);
972
973 // Another AddRec may need to be recursively expanded below. For example, if
974 // this AddRec is quadratic, the StepV may itself be an AddRec in this
975 // loop. Remove this loop from the PostIncLoops set before expanding such
976 // AddRecs. Otherwise, we cannot find a valid position for the step
977 // (i.e. StepV can never dominate its loop header). Ideally, we could do
978 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
979 // so it's not worth implementing SmallPtrSet::swap.
980 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
981 PostIncLoops.clear();
982
983 // Expand code for the start value into the loop preheader.
984 assert(L->getLoopPreheader() &&
985 "Can't expand add recurrences without a loop preheader!");
986 Value *StartV =
987 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
988
989 // StartV must have been be inserted into L's preheader to dominate the new
990 // phi.
991 assert(!isa<Instruction>(StartV) ||
992 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
993 L->getHeader()));
994
995 // Expand code for the step value. Do this before creating the PHI so that PHI
996 // reuse code doesn't see an incomplete PHI.
997 const SCEV *Step = Normalized->getStepRecurrence(SE);
998 Type *ExpandTy = Normalized->getType();
999 // If the stride is negative, insert a sub instead of an add for the increment
1000 // (unless it's a constant, because subtracts of constants are canonicalized
1001 // to adds).
1002 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1003 if (useSubtract)
1004 Step = SE.getNegativeSCEV(Step);
1005 // Expand the step somewhere that dominates the loop header.
1006 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1007
1008 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1009 // we actually do emit an addition. It does not apply if we emit a
1010 // subtraction.
1011 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1012 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1013
1014 // Create the PHI.
1015 BasicBlock *Header = L->getHeader();
1016 Builder.SetInsertPoint(Header, Header->begin());
1017 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1018 PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
1019 Twine(IVName) + ".iv");
1020
1021 // Create the step instructions and populate the PHI.
1022 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1023 BasicBlock *Pred = *HPI;
1024
1025 // Add a start value.
1026 if (!L->contains(Pred)) {
1027 PN->addIncoming(StartV, Pred);
1028 continue;
1029 }
1030
1031 // Create a step value and add it to the PHI.
1032 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1033 // instructions at IVIncInsertPos.
1034 Instruction *InsertPos = L == IVIncInsertLoop ?
1035 IVIncInsertPos : Pred->getTerminator();
1036 Builder.SetInsertPoint(InsertPos);
1037 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1038
1039 if (isa<OverflowingBinaryOperator>(IncV)) {
1040 if (IncrementIsNUW)
1041 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1042 if (IncrementIsNSW)
1043 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1044 }
1045 PN->addIncoming(IncV, Pred);
1046 }
1047
1048 // After expanding subexpressions, restore the PostIncLoops set so the caller
1049 // can ensure that IVIncrement dominates the current uses.
1050 PostIncLoops = SavedPostIncLoops;
1051
1052 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1053 // effective when we are able to use an IV inserted here, so record it.
1054 InsertedValues.insert(PN);
1055 InsertedIVs.push_back(PN);
1056 return PN;
1057}
1058
1059Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1060 const Loop *L = S->getLoop();
1061
1062 // Determine a normalized form of this expression, which is the expression
1063 // before any post-inc adjustment is made.
1064 const SCEVAddRecExpr *Normalized = S;
1065 if (PostIncLoops.count(L)) {
1067 Loops.insert(L);
1068 Normalized = cast<SCEVAddRecExpr>(
1069 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1070 }
1071
1072 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1073 const SCEV *Step = Normalized->getStepRecurrence(SE);
1074 assert(SE.properlyDominates(Start, L->getHeader()) &&
1075 "Start does not properly dominate loop header");
1076 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1077
1078 // In some cases, we decide to reuse an existing phi node but need to truncate
1079 // it and/or invert the step.
1080 Type *TruncTy = nullptr;
1081 bool InvertStep = false;
1082 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1083
1084 // Accommodate post-inc mode, if necessary.
1085 Value *Result;
1086 if (!PostIncLoops.count(L))
1087 Result = PN;
1088 else {
1089 // In PostInc mode, use the post-incremented value.
1090 BasicBlock *LatchBlock = L->getLoopLatch();
1091 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1092 Result = PN->getIncomingValueForBlock(LatchBlock);
1093
1094 // We might be introducing a new use of the post-inc IV that is not poison
1095 // safe, in which case we should drop poison generating flags. Only keep
1096 // those flags for which SCEV has proven that they always hold.
1097 if (isa<OverflowingBinaryOperator>(Result)) {
1098 auto *I = cast<Instruction>(Result);
1099 if (!S->hasNoUnsignedWrap())
1100 I->setHasNoUnsignedWrap(false);
1101 if (!S->hasNoSignedWrap())
1102 I->setHasNoSignedWrap(false);
1103 }
1104
1105 // For an expansion to use the postinc form, the client must call
1106 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1107 // or dominated by IVIncInsertPos.
1108 if (isa<Instruction>(Result) &&
1109 !SE.DT.dominates(cast<Instruction>(Result),
1110 &*Builder.GetInsertPoint())) {
1111 // The induction variable's postinc expansion does not dominate this use.
1112 // IVUsers tries to prevent this case, so it is rare. However, it can
1113 // happen when an IVUser outside the loop is not dominated by the latch
1114 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1115 // all cases. Consider a phi outside whose operand is replaced during
1116 // expansion with the value of the postinc user. Without fundamentally
1117 // changing the way postinc users are tracked, the only remedy is
1118 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1119 // but hopefully expandCodeFor handles that.
1120 bool useSubtract =
1121 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1122 if (useSubtract)
1123 Step = SE.getNegativeSCEV(Step);
1124 Value *StepV;
1125 {
1126 // Expand the step somewhere that dominates the loop header.
1127 SCEVInsertPointGuard Guard(Builder, this);
1128 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1129 }
1130 Result = expandIVInc(PN, StepV, L, useSubtract);
1131 }
1132 }
1133
1134 // We have decided to reuse an induction variable of a dominating loop. Apply
1135 // truncation and/or inversion of the step.
1136 if (TruncTy) {
1137 // Truncate the result.
1138 if (TruncTy != Result->getType())
1139 Result = Builder.CreateTrunc(Result, TruncTy);
1140
1141 // Invert the result.
1142 if (InvertStep)
1143 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1144 }
1145
1146 return Result;
1147}
1148
1149Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1150 // In canonical mode we compute the addrec as an expression of a canonical IV
1151 // using evaluateAtIteration and expand the resulting SCEV expression. This
1152 // way we avoid introducing new IVs to carry on the computation of the addrec
1153 // throughout the loop.
1154 //
1155 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1156 // type wider than the addrec itself. Emitting a canonical IV of the
1157 // proper type might produce non-legal types, for example expanding an i64
1158 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1159 // back to non-canonical mode for nested addrecs.
1160 if (!CanonicalMode || (S->getNumOperands() > 2))
1161 return expandAddRecExprLiterally(S);
1162
1163 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1164 const Loop *L = S->getLoop();
1165
1166 // First check for an existing canonical IV in a suitable type.
1167 PHINode *CanonicalIV = nullptr;
1168 if (PHINode *PN = L->getCanonicalInductionVariable())
1169 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1170 CanonicalIV = PN;
1171
1172 // Rewrite an AddRec in terms of the canonical induction variable, if
1173 // its type is more narrow.
1174 if (CanonicalIV &&
1175 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1176 !S->getType()->isPointerTy()) {
1178 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1179 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1180 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1182 BasicBlock::iterator NewInsertPt =
1183 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1184 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1185 return V;
1186 }
1187
1188 // {X,+,F} --> X + {0,+,F}
1189 if (!S->getStart()->isZero()) {
1190 if (isa<PointerType>(S->getType())) {
1191 Value *StartV = expand(SE.getPointerBase(S));
1192 return expandAddToGEP(SE.removePointerBase(S), StartV);
1193 }
1194
1196 NewOps[0] = SE.getConstant(Ty, 0);
1197 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1199
1200 // Just do a normal add. Pre-expand the operands to suppress folding.
1201 //
1202 // The LHS and RHS values are factored out of the expand call to make the
1203 // output independent of the argument evaluation order.
1204 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1205 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1206 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1207 }
1208
1209 // If we don't yet have a canonical IV, create one.
1210 if (!CanonicalIV) {
1211 // Create and insert the PHI node for the induction variable in the
1212 // specified loop.
1213 BasicBlock *Header = L->getHeader();
1214 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1215 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1216 CanonicalIV->insertBefore(Header->begin());
1217 rememberInstruction(CanonicalIV);
1218
1220 Constant *One = ConstantInt::get(Ty, 1);
1221 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1222 BasicBlock *HP = *HPI;
1223 if (!PredSeen.insert(HP).second) {
1224 // There must be an incoming value for each predecessor, even the
1225 // duplicates!
1226 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1227 continue;
1228 }
1229
1230 if (L->contains(HP)) {
1231 // Insert a unit add instruction right before the terminator
1232 // corresponding to the back-edge.
1233 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1234 "indvar.next",
1235 HP->getTerminator());
1236 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1237 rememberInstruction(Add);
1238 CanonicalIV->addIncoming(Add, HP);
1239 } else {
1240 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1241 }
1242 }
1243 }
1244
1245 // {0,+,1} --> Insert a canonical induction variable into the loop!
1246 if (S->isAffine() && S->getOperand(1)->isOne()) {
1247 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1248 "IVs with types different from the canonical IV should "
1249 "already have been handled!");
1250 return CanonicalIV;
1251 }
1252
1253 // {0,+,F} --> {0,+,1} * F
1254
1255 // If this is a simple linear addrec, emit it now as a special case.
1256 if (S->isAffine()) // {0,+,F} --> i*F
1257 return
1258 expand(SE.getTruncateOrNoop(
1259 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1261 CanonicalIV->getType())),
1262 Ty));
1263
1264 // If this is a chain of recurrences, turn it into a closed form, using the
1265 // folders, then expandCodeFor the closed form. This allows the folders to
1266 // simplify the expression without having to build a bunch of special code
1267 // into this folder.
1268 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1269
1270 // Promote S up to the canonical IV type, if the cast is foldable.
1271 const SCEV *NewS = S;
1272 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1273 if (isa<SCEVAddRecExpr>(Ext))
1274 NewS = Ext;
1275
1276 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1277
1278 // Truncate the result down to the original type, if needed.
1279 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1280 return expand(T);
1281}
1282
1283Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1284 Value *V = expand(S->getOperand());
1285 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1286 GetOptimalInsertionPointForCastOf(V));
1287}
1288
1289Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1290 Value *V = expand(S->getOperand());
1291 return Builder.CreateTrunc(V, S->getType());
1292}
1293
1294Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1295 Value *V = expand(S->getOperand());
1296 return Builder.CreateZExt(V, S->getType());
1297}
1298
1299Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1300 Value *V = expand(S->getOperand());
1301 return Builder.CreateSExt(V, S->getType());
1302}
1303
1304Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1305 Intrinsic::ID IntrinID, Twine Name,
1306 bool IsSequential) {
1307 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1308 Type *Ty = LHS->getType();
1309 if (IsSequential)
1310 LHS = Builder.CreateFreeze(LHS);
1311 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1312 Value *RHS = expand(S->getOperand(i));
1313 if (IsSequential && i != 0)
1314 RHS = Builder.CreateFreeze(RHS);
1315 Value *Sel;
1316 if (Ty->isIntegerTy())
1317 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1318 /*FMFSource=*/nullptr, Name);
1319 else {
1320 Value *ICmp =
1321 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1322 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1323 }
1324 LHS = Sel;
1325 }
1326 return LHS;
1327}
1328
1329Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1330 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1331}
1332
1333Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1334 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1335}
1336
1337Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1338 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1339}
1340
1341Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1342 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1343}
1344
1345Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1346 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1347}
1348
1349Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1350 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1351}
1352
1355 setInsertPoint(IP);
1356 Value *V = expandCodeFor(SH, Ty);
1357 return V;
1358}
1359
1361 // Expand the code for this SCEV.
1362 Value *V = expand(SH);
1363
1364 if (Ty) {
1365 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1366 "non-trivial casts should be done with the SCEVs directly!");
1367 V = InsertNoopCastOfTo(V, Ty);
1368 }
1369 return V;
1370}
1371
1372static bool
1374 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1375 // If the instruction cannot be poison, it's always safe to reuse.
1377 return true;
1378
1379 // Otherwise, it is possible that I is more poisonous that S. Collect the
1380 // poison-contributors of S, and then check whether I has any additional
1381 // poison-contributors. Poison that is contributed through poison-generating
1382 // flags is handled by dropping those flags instead.
1384 SE.getPoisonGeneratingValues(PoisonVals, S);
1385
1386 SmallVector<Value *> Worklist;
1388 Worklist.push_back(I);
1389 while (!Worklist.empty()) {
1390 Value *V = Worklist.pop_back_val();
1391 if (!Visited.insert(V).second)
1392 continue;
1393
1394 // Avoid walking large instruction graphs.
1395 if (Visited.size() > 16)
1396 return false;
1397
1398 // Either the value can't be poison, or the S would also be poison if it
1399 // is.
1400 if (PoisonVals.contains(V) || isGuaranteedNotToBePoison(V))
1401 continue;
1402
1403 auto *I = dyn_cast<Instruction>(V);
1404 if (!I)
1405 return false;
1406
1407 // FIXME: Ignore vscale, even though it technically could be poison. Do this
1408 // because SCEV currently assumes it can't be poison. Remove this special
1409 // case once we proper model when vscale can be poison.
1410 if (auto *II = dyn_cast<IntrinsicInst>(I);
1411 II && II->getIntrinsicID() == Intrinsic::vscale)
1412 continue;
1413
1414 if (canCreatePoison(cast<Operator>(I), /*ConsiderFlagsAndMetadata*/ false))
1415 return false;
1416
1417 // If the instruction can't create poison, we can recurse to its operands.
1418 if (I->hasPoisonGeneratingFlagsOrMetadata())
1419 DropPoisonGeneratingInsts.push_back(I);
1420
1421 for (Value *Op : I->operands())
1422 Worklist.push_back(Op);
1423 }
1424 return true;
1425}
1426
1427Value *SCEVExpander::FindValueInExprValueMap(
1428 const SCEV *S, const Instruction *InsertPt,
1429 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1430 // If the expansion is not in CanonicalMode, and the SCEV contains any
1431 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1432 if (!CanonicalMode && SE.containsAddRecurrence(S))
1433 return nullptr;
1434
1435 // If S is a constant, it may be worse to reuse an existing Value.
1436 if (isa<SCEVConstant>(S))
1437 return nullptr;
1438
1439 for (Value *V : SE.getSCEVValues(S)) {
1440 Instruction *EntInst = dyn_cast<Instruction>(V);
1441 if (!EntInst)
1442 continue;
1443
1444 // Choose a Value from the set which dominates the InsertPt.
1445 // InsertPt should be inside the Value's parent loop so as not to break
1446 // the LCSSA form.
1447 assert(EntInst->getFunction() == InsertPt->getFunction());
1448 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1449 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1450 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1451 continue;
1452
1453 // Make sure reusing the instruction is poison-safe.
1454 if (canReuseInstruction(SE, S, EntInst, DropPoisonGeneratingInsts))
1455 return V;
1456 DropPoisonGeneratingInsts.clear();
1457 }
1458 return nullptr;
1459}
1460
1461// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1462// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1463// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1464// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1465// the expansion will try to reuse Value from ExprValueMap, and only when it
1466// fails, expand the SCEV literally.
1467Value *SCEVExpander::expand(const SCEV *S) {
1468 // Compute an insertion point for this SCEV object. Hoist the instructions
1469 // as far out in the loop nest as possible.
1470 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1471
1472 // We can move insertion point only if there is no div or rem operations
1473 // otherwise we are risky to move it over the check for zero denominator.
1474 auto SafeToHoist = [](const SCEV *S) {
1475 return !SCEVExprContains(S, [](const SCEV *S) {
1476 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1477 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1478 // Division by non-zero constants can be hoisted.
1479 return SC->getValue()->isZero();
1480 // All other divisions should not be moved as they may be
1481 // divisions by zero and should be kept within the
1482 // conditions of the surrounding loops that guard their
1483 // execution (see PR35406).
1484 return true;
1485 }
1486 return false;
1487 });
1488 };
1489 if (SafeToHoist(S)) {
1490 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1491 L = L->getParentLoop()) {
1492 if (SE.isLoopInvariant(S, L)) {
1493 if (!L) break;
1494 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1495 InsertPt = Preheader->getTerminator()->getIterator();
1496 } else {
1497 // LSR sets the insertion point for AddRec start/step values to the
1498 // block start to simplify value reuse, even though it's an invalid
1499 // position. SCEVExpander must correct for this in all cases.
1500 InsertPt = L->getHeader()->getFirstInsertionPt();
1501 }
1502 } else {
1503 // If the SCEV is computable at this level, insert it into the header
1504 // after the PHIs (and after any other instructions that we've inserted
1505 // there) so that it is guaranteed to dominate any user inside the loop.
1506 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1507 InsertPt = L->getHeader()->getFirstInsertionPt();
1508
1509 while (InsertPt != Builder.GetInsertPoint() &&
1510 (isInsertedInstruction(&*InsertPt) ||
1511 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1512 InsertPt = std::next(InsertPt);
1513 }
1514 break;
1515 }
1516 }
1517 }
1518
1519 // Check to see if we already expanded this here.
1520 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1521 if (I != InsertedExpressions.end())
1522 return I->second;
1523
1524 SCEVInsertPointGuard Guard(Builder, this);
1525 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1526
1527 // Expand the expression into instructions.
1528 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1529 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1530 if (!V) {
1531 V = visit(S);
1532 V = fixupLCSSAFormFor(V);
1533 } else {
1534 for (Instruction *I : DropPoisonGeneratingInsts)
1535 I->dropPoisonGeneratingFlagsAndMetadata();
1536 }
1537 // Remember the expanded value for this SCEV at this location.
1538 //
1539 // This is independent of PostIncLoops. The mapped value simply materializes
1540 // the expression at this insertion point. If the mapped value happened to be
1541 // a postinc expansion, it could be reused by a non-postinc user, but only if
1542 // its insertion point was already at the head of the loop.
1543 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1544 return V;
1545}
1546
1547void SCEVExpander::rememberInstruction(Value *I) {
1548 auto DoInsert = [this](Value *V) {
1549 if (!PostIncLoops.empty())
1550 InsertedPostIncValues.insert(V);
1551 else
1552 InsertedValues.insert(V);
1553 };
1554 DoInsert(I);
1555}
1556
1557/// replaceCongruentIVs - Check for congruent phis in this loop header and
1558/// replace them with their most canonical representative. Return the number of
1559/// phis eliminated.
1560///
1561/// This does not depend on any SCEVExpander state but should be used in
1562/// the same context that SCEVExpander is used.
1563unsigned
1566 const TargetTransformInfo *TTI) {
1567 // Find integer phis in order of increasing width.
1569 for (PHINode &PN : L->getHeader()->phis())
1570 Phis.push_back(&PN);
1571
1572 if (TTI)
1573 // Use stable_sort to preserve order of equivalent PHIs, so the order
1574 // of the sorted Phis is the same from run to run on the same loop.
1575 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1576 // Put pointers at the back and make sure pointer < pointer = false.
1577 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1578 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1581 });
1582
1583 unsigned NumElim = 0;
1585 // Process phis from wide to narrow. Map wide phis to their truncation
1586 // so narrow phis can reuse them.
1587 for (PHINode *Phi : Phis) {
1588 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1589 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1590 return V;
1591 if (!SE.isSCEVable(PN->getType()))
1592 return nullptr;
1593 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1594 if (!Const)
1595 return nullptr;
1596 return Const->getValue();
1597 };
1598
1599 // Fold constant phis. They may be congruent to other constant phis and
1600 // would confuse the logic below that expects proper IVs.
1601 if (Value *V = SimplifyPHINode(Phi)) {
1602 if (V->getType() != Phi->getType())
1603 continue;
1604 SE.forgetValue(Phi);
1605 Phi->replaceAllUsesWith(V);
1606 DeadInsts.emplace_back(Phi);
1607 ++NumElim;
1608 SCEV_DEBUG_WITH_TYPE(DebugType,
1609 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1610 << '\n');
1611 continue;
1612 }
1613
1614 if (!SE.isSCEVable(Phi->getType()))
1615 continue;
1616
1617 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1618 if (!OrigPhiRef) {
1619 OrigPhiRef = Phi;
1620 if (Phi->getType()->isIntegerTy() && TTI &&
1621 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1622 // Make sure we only rewrite using simple induction variables;
1623 // otherwise, we can make the trip count of a loop unanalyzable
1624 // to SCEV.
1625 const SCEV *PhiExpr = SE.getSCEV(Phi);
1626 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1627 // This phi can be freely truncated to the narrowest phi type. Map the
1628 // truncated expression to it so it will be reused for narrow types.
1629 const SCEV *TruncExpr =
1630 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1631 ExprToIVMap[TruncExpr] = Phi;
1632 }
1633 }
1634 continue;
1635 }
1636
1637 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1638 // sense.
1639 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1640 continue;
1641
1642 if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1643 Instruction *OrigInc = dyn_cast<Instruction>(
1644 OrigPhiRef->getIncomingValueForBlock(LatchBlock));
1645 Instruction *IsomorphicInc =
1646 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1647
1648 if (OrigInc && IsomorphicInc) {
1649 // If this phi has the same width but is more canonical, replace the
1650 // original with it. As part of the "more canonical" determination,
1651 // respect a prior decision to use an IV chain.
1652 if (OrigPhiRef->getType() == Phi->getType() &&
1653 !(ChainedPhis.count(Phi) ||
1654 isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) &&
1655 (ChainedPhis.count(Phi) ||
1656 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1657 std::swap(OrigPhiRef, Phi);
1658 std::swap(OrigInc, IsomorphicInc);
1659 }
1660 // Replacing the congruent phi is sufficient because acyclic
1661 // redundancy elimination, CSE/GVN, should handle the
1662 // rest. However, once SCEV proves that a phi is congruent,
1663 // it's often the head of an IV user cycle that is isomorphic
1664 // with the original phi. It's worth eagerly cleaning up the
1665 // common case of a single IV increment so that DeleteDeadPHIs
1666 // can remove cycles that had postinc uses.
1667 // Because we may potentially introduce a new use of OrigIV that didn't
1668 // exist before at this point, its poison flags need readjustment.
1669 const SCEV *TruncExpr =
1670 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1671 if (OrigInc != IsomorphicInc &&
1672 TruncExpr == SE.getSCEV(IsomorphicInc) &&
1673 SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) &&
1674 hoistIVInc(OrigInc, IsomorphicInc, /*RecomputePoisonFlags*/ true)) {
1676 DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1677 << *IsomorphicInc << '\n');
1678 Value *NewInc = OrigInc;
1679 if (OrigInc->getType() != IsomorphicInc->getType()) {
1681 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1682 IP = PN->getParent()->getFirstInsertionPt();
1683 else
1684 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1685
1686 IRBuilder<> Builder(IP->getParent(), IP);
1687 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1688 NewInc = Builder.CreateTruncOrBitCast(
1689 OrigInc, IsomorphicInc->getType(), IVName);
1690 }
1691 IsomorphicInc->replaceAllUsesWith(NewInc);
1692 DeadInsts.emplace_back(IsomorphicInc);
1693 }
1694 }
1695 }
1696 SCEV_DEBUG_WITH_TYPE(DebugType,
1697 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1698 << '\n');
1700 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1701 ++NumElim;
1702 Value *NewIV = OrigPhiRef;
1703 if (OrigPhiRef->getType() != Phi->getType()) {
1704 IRBuilder<> Builder(L->getHeader(),
1705 L->getHeader()->getFirstInsertionPt());
1706 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1707 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1708 }
1709 Phi->replaceAllUsesWith(NewIV);
1710 DeadInsts.emplace_back(Phi);
1711 }
1712 return NumElim;
1713}
1714
1716 const Instruction *At,
1717 Loop *L) {
1718 using namespace llvm::PatternMatch;
1719
1720 SmallVector<BasicBlock *, 4> ExitingBlocks;
1721 L->getExitingBlocks(ExitingBlocks);
1722
1723 // Look for suitable value in simple conditions at the loop exits.
1724 for (BasicBlock *BB : ExitingBlocks) {
1726 Instruction *LHS, *RHS;
1727
1728 if (!match(BB->getTerminator(),
1731 continue;
1732
1733 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1734 return true;
1735
1736 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1737 return true;
1738 }
1739
1740 // Use expand's logic which is used for reusing a previous Value in
1741 // ExprValueMap. Note that we don't currently model the cost of
1742 // needing to drop poison generating flags on the instruction if we
1743 // want to reuse it. We effectively assume that has zero cost.
1744 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1745 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1746}
1747
1748template<typename T> static InstructionCost costAndCollectOperands(
1751 SmallVectorImpl<SCEVOperand> &Worklist) {
1752
1753 const T *S = cast<T>(WorkItem.S);
1755 // Object to help map SCEV operands to expanded IR instructions.
1756 struct OperationIndices {
1757 OperationIndices(unsigned Opc, size_t min, size_t max) :
1758 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1759 unsigned Opcode;
1760 size_t MinIdx;
1761 size_t MaxIdx;
1762 };
1763
1764 // Collect the operations of all the instructions that will be needed to
1765 // expand the SCEVExpr. This is so that when we come to cost the operands,
1766 // we know what the generated user(s) will be.
1768
1769 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1770 Operations.emplace_back(Opcode, 0, 0);
1771 return TTI.getCastInstrCost(Opcode, S->getType(),
1772 S->getOperand(0)->getType(),
1774 };
1775
1776 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1777 unsigned MinIdx = 0,
1778 unsigned MaxIdx = 1) -> InstructionCost {
1779 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1780 return NumRequired *
1781 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1782 };
1783
1784 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1785 unsigned MaxIdx) -> InstructionCost {
1786 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1787 Type *OpType = S->getType();
1788 return NumRequired * TTI.getCmpSelInstrCost(
1789 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1791 };
1792
1793 switch (S->getSCEVType()) {
1794 case scCouldNotCompute:
1795 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1796 case scUnknown:
1797 case scConstant:
1798 case scVScale:
1799 return 0;
1800 case scPtrToInt:
1801 Cost = CastCost(Instruction::PtrToInt);
1802 break;
1803 case scTruncate:
1804 Cost = CastCost(Instruction::Trunc);
1805 break;
1806 case scZeroExtend:
1807 Cost = CastCost(Instruction::ZExt);
1808 break;
1809 case scSignExtend:
1810 Cost = CastCost(Instruction::SExt);
1811 break;
1812 case scUDivExpr: {
1813 unsigned Opcode = Instruction::UDiv;
1814 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1815 if (SC->getAPInt().isPowerOf2())
1816 Opcode = Instruction::LShr;
1817 Cost = ArithCost(Opcode, 1);
1818 break;
1819 }
1820 case scAddExpr:
1821 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1822 break;
1823 case scMulExpr:
1824 // TODO: this is a very pessimistic cost modelling for Mul,
1825 // because of Bin Pow algorithm actually used by the expander,
1826 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1827 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1828 break;
1829 case scSMaxExpr:
1830 case scUMaxExpr:
1831 case scSMinExpr:
1832 case scUMinExpr:
1833 case scSequentialUMinExpr: {
1834 // FIXME: should this ask the cost for Intrinsic's?
1835 // The reduction tree.
1836 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1837 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1838 switch (S->getSCEVType()) {
1839 case scSequentialUMinExpr: {
1840 // The safety net against poison.
1841 // FIXME: this is broken.
1842 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1843 Cost += ArithCost(Instruction::Or,
1844 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1845 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1846 break;
1847 }
1848 default:
1849 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1850 "Unhandled SCEV expression type?");
1851 break;
1852 }
1853 break;
1854 }
1855 case scAddRecExpr: {
1856 // In this polynominal, we may have some zero operands, and we shouldn't
1857 // really charge for those. So how many non-zero coefficients are there?
1858 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1859 return !Op->isZero();
1860 });
1861
1862 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1863 assert(!(*std::prev(S->operands().end()))->isZero() &&
1864 "Last operand should not be zero");
1865
1866 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1867 int NumNonZeroDegreeNonOneTerms =
1868 llvm::count_if(S->operands(), [](const SCEV *Op) {
1869 auto *SConst = dyn_cast<SCEVConstant>(Op);
1870 return !SConst || SConst->getAPInt().ugt(1);
1871 });
1872
1873 // Much like with normal add expr, the polynominal will require
1874 // one less addition than the number of it's terms.
1875 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1876 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1877 // Here, *each* one of those will require a multiplication.
1878 InstructionCost MulCost =
1879 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1880 Cost = AddCost + MulCost;
1881
1882 // What is the degree of this polynominal?
1883 int PolyDegree = S->getNumOperands() - 1;
1884 assert(PolyDegree >= 1 && "Should be at least affine.");
1885
1886 // The final term will be:
1887 // Op_{PolyDegree} * x ^ {PolyDegree}
1888 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1889 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1890 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1891 // FIXME: this is conservatively correct, but might be overly pessimistic.
1892 Cost += MulCost * (PolyDegree - 1);
1893 break;
1894 }
1895 }
1896
1897 for (auto &CostOp : Operations) {
1898 for (auto SCEVOp : enumerate(S->operands())) {
1899 // Clamp the index to account for multiple IR operations being chained.
1900 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1901 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1902 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1903 }
1904 }
1905 return Cost;
1906}
1907
1908bool SCEVExpander::isHighCostExpansionHelper(
1909 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1910 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1912 SmallVectorImpl<SCEVOperand> &Worklist) {
1913 if (Cost > Budget)
1914 return true; // Already run out of budget, give up.
1915
1916 const SCEV *S = WorkItem.S;
1917 // Was the cost of expansion of this expression already accounted for?
1918 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1919 return false; // We have already accounted for this expression.
1920
1921 // If we can find an existing value for this scev available at the point "At"
1922 // then consider the expression cheap.
1923 if (hasRelatedExistingExpansion(S, &At, L))
1924 return false; // Consider the expression to be free.
1925
1927 L->getHeader()->getParent()->hasMinSize()
1930
1931 switch (S->getSCEVType()) {
1932 case scCouldNotCompute:
1933 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1934 case scUnknown:
1935 case scVScale:
1936 // Assume to be zero-cost.
1937 return false;
1938 case scConstant: {
1939 // Only evalulate the costs of constants when optimizing for size.
1941 return false;
1942 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
1943 Type *Ty = S->getType();
1945 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
1946 return Cost > Budget;
1947 }
1948 case scTruncate:
1949 case scPtrToInt:
1950 case scZeroExtend:
1951 case scSignExtend: {
1952 Cost +=
1953 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
1954 return false; // Will answer upon next entry into this function.
1955 }
1956 case scUDivExpr: {
1957 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
1958 // HowManyLessThans produced to compute a precise expression, rather than a
1959 // UDiv from the user's code. If we can't find a UDiv in the code with some
1960 // simple searching, we need to account for it's cost.
1961
1962 // At the beginning of this function we already tried to find existing
1963 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
1964 // pattern involving division. This is just a simple search heuristic.
1966 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
1967 return false; // Consider it to be free.
1968
1969 Cost +=
1970 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
1971 return false; // Will answer upon next entry into this function.
1972 }
1973 case scAddExpr:
1974 case scMulExpr:
1975 case scUMaxExpr:
1976 case scSMaxExpr:
1977 case scUMinExpr:
1978 case scSMinExpr:
1979 case scSequentialUMinExpr: {
1980 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
1981 "Nary expr should have more than 1 operand.");
1982 // The simple nary expr will require one less op (or pair of ops)
1983 // than the number of it's terms.
1984 Cost +=
1985 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
1986 return Cost > Budget;
1987 }
1988 case scAddRecExpr: {
1989 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
1990 "Polynomial should be at least linear");
1991 Cost += costAndCollectOperands<SCEVAddRecExpr>(
1992 WorkItem, TTI, CostKind, Worklist);
1993 return Cost > Budget;
1994 }
1995 }
1996 llvm_unreachable("Unknown SCEV kind!");
1997}
1998
2000 Instruction *IP) {
2001 assert(IP);
2002 switch (Pred->getKind()) {
2004 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2006 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2007 case SCEVPredicate::P_Wrap: {
2008 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2009 return expandWrapPredicate(AddRecPred, IP);
2010 }
2011 }
2012 llvm_unreachable("Unknown SCEV predicate type");
2013}
2014
2016 Instruction *IP) {
2017 Value *Expr0 = expand(Pred->getLHS(), IP);
2018 Value *Expr1 = expand(Pred->getRHS(), IP);
2019
2020 Builder.SetInsertPoint(IP);
2021 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2022 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2023 return I;
2024}
2025
2027 Instruction *Loc, bool Signed) {
2028 assert(AR->isAffine() && "Cannot generate RT check for "
2029 "non-affine expression");
2030
2031 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2033 const SCEV *ExitCount =
2035
2036 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2037
2038 const SCEV *Step = AR->getStepRecurrence(SE);
2039 const SCEV *Start = AR->getStart();
2040
2041 Type *ARTy = AR->getType();
2042 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2043 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2044
2045 // The expression {Start,+,Step} has nusw/nssw if
2046 // Step < 0, Start - |Step| * Backedge <= Start
2047 // Step >= 0, Start + |Step| * Backedge > Start
2048 // and |Step| * Backedge doesn't unsigned overflow.
2049
2050 Builder.SetInsertPoint(Loc);
2051 Value *TripCountVal = expand(ExitCount, Loc);
2052
2053 IntegerType *Ty =
2055
2056 Value *StepValue = expand(Step, Loc);
2057 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2058 Value *StartValue = expand(Start, Loc);
2059
2060 ConstantInt *Zero =
2061 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2062
2063 Builder.SetInsertPoint(Loc);
2064 // Compute |Step|
2065 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2066 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2067
2068 // Compute |Step| * Backedge
2069 // Compute:
2070 // 1. Start + |Step| * Backedge < Start
2071 // 2. Start - |Step| * Backedge > Start
2072 //
2073 // And select either 1. or 2. depending on whether step is positive or
2074 // negative. If Step is known to be positive or negative, only create
2075 // either 1. or 2.
2076 auto ComputeEndCheck = [&]() -> Value * {
2077 // Checking <u 0 is always false.
2078 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2079 return ConstantInt::getFalse(Loc->getContext());
2080
2081 // Get the backedge taken count and truncate or extended to the AR type.
2082 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2083
2084 Value *MulV, *OfMul;
2085 if (Step->isOne()) {
2086 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2087 // needed, there is never an overflow, so to avoid artificially inflating
2088 // the cost of the check, directly emit the optimized IR.
2089 MulV = TruncTripCount;
2090 OfMul = ConstantInt::getFalse(MulV->getContext());
2091 } else {
2092 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2093 Intrinsic::umul_with_overflow, Ty);
2094 CallInst *Mul =
2095 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2096 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2097 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2098 }
2099
2100 Value *Add = nullptr, *Sub = nullptr;
2101 bool NeedPosCheck = !SE.isKnownNegative(Step);
2102 bool NeedNegCheck = !SE.isKnownPositive(Step);
2103
2104 if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
2105 Value *NegMulV = Builder.CreateNeg(MulV);
2106 if (NeedPosCheck)
2107 Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
2108 if (NeedNegCheck)
2109 Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
2110 } else {
2111 if (NeedPosCheck)
2112 Add = Builder.CreateAdd(StartValue, MulV);
2113 if (NeedNegCheck)
2114 Sub = Builder.CreateSub(StartValue, MulV);
2115 }
2116
2117 Value *EndCompareLT = nullptr;
2118 Value *EndCompareGT = nullptr;
2119 Value *EndCheck = nullptr;
2120 if (NeedPosCheck)
2121 EndCheck = EndCompareLT = Builder.CreateICmp(
2123 if (NeedNegCheck)
2124 EndCheck = EndCompareGT = Builder.CreateICmp(
2125 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2126 if (NeedPosCheck && NeedNegCheck) {
2127 // Select the answer based on the sign of Step.
2128 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2129 }
2130 return Builder.CreateOr(EndCheck, OfMul);
2131 };
2132 Value *EndCheck = ComputeEndCheck();
2133
2134 // If the backedge taken count type is larger than the AR type,
2135 // check that we don't drop any bits by truncating it. If we are
2136 // dropping bits, then we have overflow (unless the step is zero).
2137 if (SrcBits > DstBits) {
2138 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2139 auto *BackedgeCheck =
2140 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2141 ConstantInt::get(Loc->getContext(), MaxVal));
2142 BackedgeCheck = Builder.CreateAnd(
2143 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2144
2145 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2146 }
2147
2148 return EndCheck;
2149}
2150
2152 Instruction *IP) {
2153 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2154 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2155
2156 // Add a check for NUSW
2158 NUSWCheck = generateOverflowCheck(A, IP, false);
2159
2160 // Add a check for NSSW
2162 NSSWCheck = generateOverflowCheck(A, IP, true);
2163
2164 if (NUSWCheck && NSSWCheck)
2165 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2166
2167 if (NUSWCheck)
2168 return NUSWCheck;
2169
2170 if (NSSWCheck)
2171 return NSSWCheck;
2172
2173 return ConstantInt::getFalse(IP->getContext());
2174}
2175
2177 Instruction *IP) {
2178 // Loop over all checks in this set.
2179 SmallVector<Value *> Checks;
2180 for (const auto *Pred : Union->getPredicates()) {
2181 Checks.push_back(expandCodeForPredicate(Pred, IP));
2182 Builder.SetInsertPoint(IP);
2183 }
2184
2185 if (Checks.empty())
2186 return ConstantInt::getFalse(IP->getContext());
2187 return Builder.CreateOr(Checks);
2188}
2189
2190Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2191 auto *DefI = dyn_cast<Instruction>(V);
2192 if (!PreserveLCSSA || !DefI)
2193 return V;
2194
2195 Instruction *InsertPt = &*Builder.GetInsertPoint();
2196 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2197 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2198 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2199 return V;
2200
2201 // Create a temporary instruction to at the current insertion point, so we
2202 // can hand it off to the helper to create LCSSA PHIs if required for the
2203 // new use.
2204 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2205 // would accept a insertion point and return an LCSSA phi for that
2206 // insertion point, so there is no need to insert & remove the temporary
2207 // instruction.
2208 Type *ToTy;
2209 if (DefI->getType()->isIntegerTy())
2210 ToTy = PointerType::get(DefI->getContext(), 0);
2211 else
2212 ToTy = Type::getInt32Ty(DefI->getContext());
2213 Instruction *User =
2214 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2215 auto RemoveUserOnExit =
2216 make_scope_exit([User]() { User->eraseFromParent(); });
2217
2219 ToUpdate.push_back(DefI);
2220 SmallVector<PHINode *, 16> PHIsToRemove;
2221 SmallVector<PHINode *, 16> InsertedPHIs;
2222 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2223 &InsertedPHIs);
2224 for (PHINode *PN : InsertedPHIs)
2225 rememberInstruction(PN);
2226 for (PHINode *PN : PHIsToRemove) {
2227 if (!PN->use_empty())
2228 continue;
2229 InsertedValues.erase(PN);
2230 InsertedPostIncValues.erase(PN);
2231 PN->eraseFromParent();
2232 }
2233
2234 return User->getOperand(0);
2235}
2236
2237namespace {
2238// Search for a SCEV subexpression that is not safe to expand. Any expression
2239// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2240// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2241// instruction, but the important thing is that we prove the denominator is
2242// nonzero before expansion.
2243//
2244// IVUsers already checks that IV-derived expressions are safe. So this check is
2245// only needed when the expression includes some subexpression that is not IV
2246// derived.
2247//
2248// Currently, we only allow division by a value provably non-zero here.
2249//
2250// We cannot generally expand recurrences unless the step dominates the loop
2251// header. The expander handles the special case of affine recurrences by
2252// scaling the recurrence outside the loop, but this technique isn't generally
2253// applicable. Expanding a nested recurrence outside a loop requires computing
2254// binomial coefficients. This could be done, but the recurrence has to be in a
2255// perfectly reduced form, which can't be guaranteed.
2256struct SCEVFindUnsafe {
2257 ScalarEvolution &SE;
2258 bool CanonicalMode;
2259 bool IsUnsafe = false;
2260
2261 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2262 : SE(SE), CanonicalMode(CanonicalMode) {}
2263
2264 bool follow(const SCEV *S) {
2265 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2266 if (!SE.isKnownNonZero(D->getRHS())) {
2267 IsUnsafe = true;
2268 return false;
2269 }
2270 }
2271 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2272 // For non-affine addrecs or in non-canonical mode we need a preheader
2273 // to insert into.
2274 if (!AR->getLoop()->getLoopPreheader() &&
2275 (!CanonicalMode || !AR->isAffine())) {
2276 IsUnsafe = true;
2277 return false;
2278 }
2279 }
2280 return true;
2281 }
2282 bool isDone() const { return IsUnsafe; }
2283};
2284} // namespace
2285
2287 SCEVFindUnsafe Search(SE, CanonicalMode);
2288 visitAll(S, Search);
2289 return !Search.IsUnsafe;
2290}
2291
2293 const Instruction *InsertionPoint) const {
2294 if (!isSafeToExpand(S))
2295 return false;
2296 // We have to prove that the expanded site of S dominates InsertionPoint.
2297 // This is easy when not in the same block, but hard when S is an instruction
2298 // to be expanded somewhere inside the same block as our insertion point.
2299 // What we really need here is something analogous to an OrderedBasicBlock,
2300 // but for the moment, we paper over the problem by handling two common and
2301 // cheap to check cases.
2302 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2303 return true;
2304 if (SE.dominates(S, InsertionPoint->getParent())) {
2305 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2306 return true;
2307 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2308 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2309 return true;
2310 }
2311 return false;
2312}
2313
2315 // Result is used, nothing to remove.
2316 if (ResultUsed)
2317 return;
2318
2319 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2320#ifndef NDEBUG
2321 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2322 InsertedInstructions.end());
2323 (void)InsertedSet;
2324#endif
2325 // Remove sets with value handles.
2326 Expander.clear();
2327
2328 // Remove all inserted instructions.
2329 for (Instruction *I : reverse(InsertedInstructions)) {
2330#ifndef NDEBUG
2331 assert(all_of(I->users(),
2332 [&InsertedSet](Value *U) {
2333 return InsertedSet.contains(cast<Instruction>(U));
2334 }) &&
2335 "removed instruction should only be used by instructions inserted "
2336 "during expansion");
2337#endif
2338 assert(!I->getType()->isVoidTy() &&
2339 "inserted instruction should have non-void types");
2340 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2341 I->eraseFromParent();
2342 }
2343}
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static bool canReuseInstruction(ScalarEvolution &SE, const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:335
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:668
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1058
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:734
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:738
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:825
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:997
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:1957
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
This class represents an Operation in the Expression.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:398
A debug info location.
Definition: DebugLoc.h:33
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
const BasicBlock & getEntryBlock() const
Definition: Function.h:747
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1993
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1712
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2007
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2478
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:941
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1119
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2497
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1997
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2359
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:145
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1335
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1466
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1318
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1488
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2126
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2374
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1862
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2313
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:502
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2628
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:89
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:195
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:83
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:389
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:279
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:442
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:468
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
Class to represent pointers.
Definition: DerivedTypes.h:643
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getPredicatedBackedgeTakenCount(const Loop *L, SmallVector< const SCEVPredicate *, 4 > &Predicates)
Similar to getBackedgeTakenCount, except it will add a set of SCEV predicates to Predicates that are ...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
void getPoisonGeneratingValues(SmallPtrSetImpl< const Value * > &Result, const SCEV *S)
Return the set of Values that, if poison, will definitively result in S being poison as well.
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
size_type size() const
Definition: SmallPtrSet.h:93
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:390
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
@ None
The cast is not used with a load/store of any kind.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
self_iterator getIterator()
Definition: ilist_node.h:82
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:552
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:724
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:168
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:330
@ Offset
Definition: DWP.cpp:440
void stable_sort(R &&Range)
Definition: STLExtras.h:1971
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
bool canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2338
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
bool programUndefinedIfPoison(const Instruction *Inst)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1926
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
InstructionCost Cost
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...