LLVM 19.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
47 NUW = false;
48 NSW = false;
49 Exact = false;
50 Disjoint = false;
51 NNeg = false;
52 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
53 NUW = OBO->hasNoUnsignedWrap();
54 NSW = OBO->hasNoSignedWrap();
55 }
56 if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
57 Exact = PEO->isExact();
58 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
59 Disjoint = PDI->isDisjoint();
60 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
61 NNeg = PNI->hasNonNeg();
62}
63
65 if (isa<OverflowingBinaryOperator>(I)) {
66 I->setHasNoUnsignedWrap(NUW);
67 I->setHasNoSignedWrap(NSW);
68 }
69 if (isa<PossiblyExactOperator>(I))
70 I->setIsExact(Exact);
71 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
72 PDI->setIsDisjoint(Disjoint);
73 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
74 PNI->setNonNeg(NNeg);
75}
76
77/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
78/// reusing an existing cast if a suitable one (= dominating IP) exists, or
79/// creating a new one.
80Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
83 // This function must be called with the builder having a valid insertion
84 // point. It doesn't need to be the actual IP where the uses of the returned
85 // cast will be added, but it must dominate such IP.
86 // We use this precondition to produce a cast that will dominate all its
87 // uses. In particular, this is crucial for the case where the builder's
88 // insertion point *is* the point where we were asked to put the cast.
89 // Since we don't know the builder's insertion point is actually
90 // where the uses will be added (only that it dominates it), we are
91 // not allowed to move it.
93
94 Value *Ret = nullptr;
95
96 // Check to see if there is already a cast!
97 for (User *U : V->users()) {
98 if (U->getType() != Ty)
99 continue;
100 CastInst *CI = dyn_cast<CastInst>(U);
101 if (!CI || CI->getOpcode() != Op)
102 continue;
103
104 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
105 // the cast must also properly dominate the Builder's insertion point.
106 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
107 (&*IP == CI || CI->comesBefore(&*IP))) {
108 Ret = CI;
109 break;
110 }
111 }
112
113 // Create a new cast.
114 if (!Ret) {
115 SCEVInsertPointGuard Guard(Builder, this);
116 Builder.SetInsertPoint(&*IP);
117 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
118 }
119
120 // We assert at the end of the function since IP might point to an
121 // instruction with different dominance properties than a cast
122 // (an invoke for example) and not dominate BIP (but the cast does).
123 assert(!isa<Instruction>(Ret) ||
124 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
125
126 return Ret;
127}
128
131 Instruction *MustDominate) const {
132 BasicBlock::iterator IP = ++I->getIterator();
133 if (auto *II = dyn_cast<InvokeInst>(I))
134 IP = II->getNormalDest()->begin();
135
136 while (isa<PHINode>(IP))
137 ++IP;
138
139 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
140 ++IP;
141 } else if (isa<CatchSwitchInst>(IP)) {
142 IP = MustDominate->getParent()->getFirstInsertionPt();
143 } else {
144 assert(!IP->isEHPad() && "unexpected eh pad!");
145 }
146
147 // Adjust insert point to be after instructions inserted by the expander, so
148 // we can re-use already inserted instructions. Avoid skipping past the
149 // original \p MustDominate, in case it is an inserted instruction.
150 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
151 ++IP;
152
153 return IP;
154}
155
157SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
158 // Cast the argument at the beginning of the entry block, after
159 // any bitcasts of other arguments.
160 if (Argument *A = dyn_cast<Argument>(V)) {
161 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
162 while ((isa<BitCastInst>(IP) &&
163 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
164 cast<BitCastInst>(IP)->getOperand(0) != A) ||
165 isa<DbgInfoIntrinsic>(IP))
166 ++IP;
167 return IP;
168 }
169
170 // Cast the instruction immediately after the instruction.
171 if (Instruction *I = dyn_cast<Instruction>(V))
172 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
173
174 // Otherwise, this must be some kind of a constant,
175 // so let's plop this cast into the function's entry block.
176 assert(isa<Constant>(V) &&
177 "Expected the cast argument to be a global/constant");
178 return Builder.GetInsertBlock()
179 ->getParent()
180 ->getEntryBlock()
182}
183
184/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
185/// which must be possible with a noop cast, doing what we can to share
186/// the casts.
187Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
188 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
189 assert((Op == Instruction::BitCast ||
190 Op == Instruction::PtrToInt ||
191 Op == Instruction::IntToPtr) &&
192 "InsertNoopCastOfTo cannot perform non-noop casts!");
193 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
194 "InsertNoopCastOfTo cannot change sizes!");
195
196 // inttoptr only works for integral pointers. For non-integral pointers, we
197 // can create a GEP on null with the integral value as index. Note that
198 // it is safe to use GEP of null instead of inttoptr here, because only
199 // expressions already based on a GEP of null should be converted to pointers
200 // during expansion.
201 if (Op == Instruction::IntToPtr) {
202 auto *PtrTy = cast<PointerType>(Ty);
203 if (DL.isNonIntegralPointerType(PtrTy))
204 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
205 }
206 // Short-circuit unnecessary bitcasts.
207 if (Op == Instruction::BitCast) {
208 if (V->getType() == Ty)
209 return V;
210 if (CastInst *CI = dyn_cast<CastInst>(V)) {
211 if (CI->getOperand(0)->getType() == Ty)
212 return CI->getOperand(0);
213 }
214 }
215 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
216 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
217 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
218 if (CastInst *CI = dyn_cast<CastInst>(V))
219 if ((CI->getOpcode() == Instruction::PtrToInt ||
220 CI->getOpcode() == Instruction::IntToPtr) &&
221 SE.getTypeSizeInBits(CI->getType()) ==
223 return CI->getOperand(0);
224 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
225 if ((CE->getOpcode() == Instruction::PtrToInt ||
226 CE->getOpcode() == Instruction::IntToPtr) &&
227 SE.getTypeSizeInBits(CE->getType()) ==
228 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
229 return CE->getOperand(0);
230 }
231
232 // Fold a cast of a constant.
233 if (Constant *C = dyn_cast<Constant>(V))
234 return ConstantExpr::getCast(Op, C, Ty);
235
236 // Try to reuse existing cast, or insert one.
237 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
238}
239
240/// InsertBinop - Insert the specified binary operator, doing a small amount
241/// of work to avoid inserting an obviously redundant operation, and hoisting
242/// to an outer loop when the opportunity is there and it is safe.
243Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
244 Value *LHS, Value *RHS,
245 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
246 // Fold a binop with constant operands.
247 if (Constant *CLHS = dyn_cast<Constant>(LHS))
248 if (Constant *CRHS = dyn_cast<Constant>(RHS))
249 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
250 return Res;
251
252 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
253 unsigned ScanLimit = 6;
254 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
255 // Scanning starts from the last instruction before the insertion point.
257 if (IP != BlockBegin) {
258 --IP;
259 for (; ScanLimit; --IP, --ScanLimit) {
260 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
261 // generated code.
262 if (isa<DbgInfoIntrinsic>(IP))
263 ScanLimit++;
264
265 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
266 // Ensure that no-wrap flags match.
267 if (isa<OverflowingBinaryOperator>(I)) {
268 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
269 return true;
270 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
271 return true;
272 }
273 // Conservatively, do not use any instruction which has any of exact
274 // flags installed.
275 if (isa<PossiblyExactOperator>(I) && I->isExact())
276 return true;
277 return false;
278 };
279 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
280 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
281 return &*IP;
282 if (IP == BlockBegin) break;
283 }
284 }
285
286 // Save the original insertion point so we can restore it when we're done.
287 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
288 SCEVInsertPointGuard Guard(Builder, this);
289
290 if (IsSafeToHoist) {
291 // Move the insertion point out of as many loops as we can.
292 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
293 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
294 BasicBlock *Preheader = L->getLoopPreheader();
295 if (!Preheader) break;
296
297 // Ok, move up a level.
298 Builder.SetInsertPoint(Preheader->getTerminator());
299 }
300 }
301
302 // If we haven't found this binop, insert it.
303 // TODO: Use the Builder, which will make CreateBinOp below fold with
304 // InstSimplifyFolder.
305 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
306 BO->setDebugLoc(Loc);
307 if (Flags & SCEV::FlagNUW)
309 if (Flags & SCEV::FlagNSW)
310 BO->setHasNoSignedWrap();
311
312 return BO;
313}
314
315/// expandAddToGEP - Expand an addition expression with a pointer type into
316/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
317/// BasicAliasAnalysis and other passes analyze the result. See the rules
318/// for getelementptr vs. inttoptr in
319/// http://llvm.org/docs/LangRef.html#pointeraliasing
320/// for details.
321///
322/// Design note: The correctness of using getelementptr here depends on
323/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
324/// they may introduce pointer arithmetic which may not be safely converted
325/// into getelementptr.
326///
327/// Design note: It might seem desirable for this function to be more
328/// loop-aware. If some of the indices are loop-invariant while others
329/// aren't, it might seem desirable to emit multiple GEPs, keeping the
330/// loop-invariant portions of the overall computation outside the loop.
331/// However, there are a few reasons this is not done here. Hoisting simple
332/// arithmetic is a low-level optimization that often isn't very
333/// important until late in the optimization process. In fact, passes
334/// like InstructionCombining will combine GEPs, even if it means
335/// pushing loop-invariant computation down into loops, so even if the
336/// GEPs were split here, the work would quickly be undone. The
337/// LoopStrengthReduction pass, which is usually run quite late (and
338/// after the last InstructionCombining pass), takes care of hoisting
339/// loop-invariant portions of expressions, after considering what
340/// can be folded using target addressing modes.
341///
342Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
343 assert(!isa<Instruction>(V) ||
344 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
345
346 Value *Idx = expand(Offset);
347
348 // Fold a GEP with constant operands.
349 if (Constant *CLHS = dyn_cast<Constant>(V))
350 if (Constant *CRHS = dyn_cast<Constant>(Idx))
351 return Builder.CreatePtrAdd(CLHS, CRHS);
352
353 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
354 unsigned ScanLimit = 6;
355 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
356 // Scanning starts from the last instruction before the insertion point.
358 if (IP != BlockBegin) {
359 --IP;
360 for (; ScanLimit; --IP, --ScanLimit) {
361 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
362 // generated code.
363 if (isa<DbgInfoIntrinsic>(IP))
364 ScanLimit++;
365 if (IP->getOpcode() == Instruction::GetElementPtr &&
366 IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
367 cast<GEPOperator>(&*IP)->getSourceElementType() ==
368 Builder.getInt8Ty())
369 return &*IP;
370 if (IP == BlockBegin) break;
371 }
372 }
373
374 // Save the original insertion point so we can restore it when we're done.
375 SCEVInsertPointGuard Guard(Builder, this);
376
377 // Move the insertion point out of as many loops as we can.
378 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
379 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
380 BasicBlock *Preheader = L->getLoopPreheader();
381 if (!Preheader) break;
382
383 // Ok, move up a level.
384 Builder.SetInsertPoint(Preheader->getTerminator());
385 }
386
387 // Emit a GEP.
388 return Builder.CreatePtrAdd(V, Idx, "scevgep");
389}
390
391/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
392/// SCEV expansion. If they are nested, this is the most nested. If they are
393/// neighboring, pick the later.
394static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
395 DominatorTree &DT) {
396 if (!A) return B;
397 if (!B) return A;
398 if (A->contains(B)) return B;
399 if (B->contains(A)) return A;
400 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
401 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
402 return A; // Arbitrarily break the tie.
403}
404
405/// getRelevantLoop - Get the most relevant loop associated with the given
406/// expression, according to PickMostRelevantLoop.
407const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
408 // Test whether we've already computed the most relevant loop for this SCEV.
409 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
410 if (!Pair.second)
411 return Pair.first->second;
412
413 switch (S->getSCEVType()) {
414 case scConstant:
415 case scVScale:
416 return nullptr; // A constant has no relevant loops.
417 case scTruncate:
418 case scZeroExtend:
419 case scSignExtend:
420 case scPtrToInt:
421 case scAddExpr:
422 case scMulExpr:
423 case scUDivExpr:
424 case scAddRecExpr:
425 case scUMaxExpr:
426 case scSMaxExpr:
427 case scUMinExpr:
428 case scSMinExpr:
430 const Loop *L = nullptr;
431 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
432 L = AR->getLoop();
433 for (const SCEV *Op : S->operands())
434 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
435 return RelevantLoops[S] = L;
436 }
437 case scUnknown: {
438 const SCEVUnknown *U = cast<SCEVUnknown>(S);
439 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
440 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
441 // A non-instruction has no relevant loops.
442 return nullptr;
443 }
445 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
446 }
447 llvm_unreachable("Unexpected SCEV type!");
448}
449
450namespace {
451
452/// LoopCompare - Compare loops by PickMostRelevantLoop.
453class LoopCompare {
454 DominatorTree &DT;
455public:
456 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
457
458 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
459 std::pair<const Loop *, const SCEV *> RHS) const {
460 // Keep pointer operands sorted at the end.
461 if (LHS.second->getType()->isPointerTy() !=
462 RHS.second->getType()->isPointerTy())
463 return LHS.second->getType()->isPointerTy();
464
465 // Compare loops with PickMostRelevantLoop.
466 if (LHS.first != RHS.first)
467 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
468
469 // If one operand is a non-constant negative and the other is not,
470 // put the non-constant negative on the right so that a sub can
471 // be used instead of a negate and add.
472 if (LHS.second->isNonConstantNegative()) {
473 if (!RHS.second->isNonConstantNegative())
474 return false;
475 } else if (RHS.second->isNonConstantNegative())
476 return true;
477
478 // Otherwise they are equivalent according to this comparison.
479 return false;
480 }
481};
482
483}
484
485Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
486 // Collect all the add operands in a loop, along with their associated loops.
487 // Iterate in reverse so that constants are emitted last, all else equal, and
488 // so that pointer operands are inserted first, which the code below relies on
489 // to form more involved GEPs.
491 for (const SCEV *Op : reverse(S->operands()))
492 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
493
494 // Sort by loop. Use a stable sort so that constants follow non-constants and
495 // pointer operands precede non-pointer operands.
496 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
497
498 // Emit instructions to add all the operands. Hoist as much as possible
499 // out of loops, and form meaningful getelementptrs where possible.
500 Value *Sum = nullptr;
501 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
502 const Loop *CurLoop = I->first;
503 const SCEV *Op = I->second;
504 if (!Sum) {
505 // This is the first operand. Just expand it.
506 Sum = expand(Op);
507 ++I;
508 continue;
509 }
510
511 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
512 if (isa<PointerType>(Sum->getType())) {
513 // The running sum expression is a pointer. Try to form a getelementptr
514 // at this level with that as the base.
516 for (; I != E && I->first == CurLoop; ++I) {
517 // If the operand is SCEVUnknown and not instructions, peek through
518 // it, to enable more of it to be folded into the GEP.
519 const SCEV *X = I->second;
520 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
521 if (!isa<Instruction>(U->getValue()))
522 X = SE.getSCEV(U->getValue());
523 NewOps.push_back(X);
524 }
525 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
526 } else if (Op->isNonConstantNegative()) {
527 // Instead of doing a negate and add, just do a subtract.
528 Value *W = expand(SE.getNegativeSCEV(Op));
529 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
530 /*IsSafeToHoist*/ true);
531 ++I;
532 } else {
533 // A simple add.
534 Value *W = expand(Op);
535 // Canonicalize a constant to the RHS.
536 if (isa<Constant>(Sum))
537 std::swap(Sum, W);
538 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
539 /*IsSafeToHoist*/ true);
540 ++I;
541 }
542 }
543
544 return Sum;
545}
546
547Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
548 Type *Ty = S->getType();
549
550 // Collect all the mul operands in a loop, along with their associated loops.
551 // Iterate in reverse so that constants are emitted last, all else equal.
553 for (const SCEV *Op : reverse(S->operands()))
554 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
555
556 // Sort by loop. Use a stable sort so that constants follow non-constants.
557 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
558
559 // Emit instructions to mul all the operands. Hoist as much as possible
560 // out of loops.
561 Value *Prod = nullptr;
562 auto I = OpsAndLoops.begin();
563
564 // Expand the calculation of X pow N in the following manner:
565 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
566 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
567 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
568 auto E = I;
569 // Calculate how many times the same operand from the same loop is included
570 // into this power.
571 uint64_t Exponent = 0;
572 const uint64_t MaxExponent = UINT64_MAX >> 1;
573 // No one sane will ever try to calculate such huge exponents, but if we
574 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
575 // below when the power of 2 exceeds our Exponent, and we want it to be
576 // 1u << 31 at most to not deal with unsigned overflow.
577 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
578 ++Exponent;
579 ++E;
580 }
581 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
582
583 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
584 // that are needed into the result.
585 Value *P = expand(I->second);
586 Value *Result = nullptr;
587 if (Exponent & 1)
588 Result = P;
589 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
590 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
591 /*IsSafeToHoist*/ true);
592 if (Exponent & BinExp)
593 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
595 /*IsSafeToHoist*/ true)
596 : P;
597 }
598
599 I = E;
600 assert(Result && "Nothing was expanded?");
601 return Result;
602 };
603
604 while (I != OpsAndLoops.end()) {
605 if (!Prod) {
606 // This is the first operand. Just expand it.
607 Prod = ExpandOpBinPowN();
608 } else if (I->second->isAllOnesValue()) {
609 // Instead of doing a multiply by negative one, just do a negate.
610 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
611 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
612 ++I;
613 } else {
614 // A simple mul.
615 Value *W = ExpandOpBinPowN();
616 // Canonicalize a constant to the RHS.
617 if (isa<Constant>(Prod)) std::swap(Prod, W);
618 const APInt *RHS;
619 if (match(W, m_Power2(RHS))) {
620 // Canonicalize Prod*(1<<C) to Prod<<C.
621 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
622 auto NWFlags = S->getNoWrapFlags();
623 // clear nsw flag if shl will produce poison value.
624 if (RHS->logBase2() == RHS->getBitWidth() - 1)
625 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
626 Prod = InsertBinop(Instruction::Shl, Prod,
627 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
628 /*IsSafeToHoist*/ true);
629 } else {
630 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
631 /*IsSafeToHoist*/ true);
632 }
633 }
634 }
635
636 return Prod;
637}
638
639Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
640 Value *LHS = expand(S->getLHS());
641 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
642 const APInt &RHS = SC->getAPInt();
643 if (RHS.isPowerOf2())
644 return InsertBinop(Instruction::LShr, LHS,
645 ConstantInt::get(SC->getType(), RHS.logBase2()),
646 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
647 }
648
649 Value *RHS = expand(S->getRHS());
650 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
651 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
652}
653
654/// Determine if this is a well-behaved chain of instructions leading back to
655/// the PHI. If so, it may be reused by expanded expressions.
656bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
657 const Loop *L) {
658 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
659 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
660 return false;
661 // If any of the operands don't dominate the insert position, bail.
662 // Addrec operands are always loop-invariant, so this can only happen
663 // if there are instructions which haven't been hoisted.
664 if (L == IVIncInsertLoop) {
665 for (Use &Op : llvm::drop_begin(IncV->operands()))
666 if (Instruction *OInst = dyn_cast<Instruction>(Op))
667 if (!SE.DT.dominates(OInst, IVIncInsertPos))
668 return false;
669 }
670 // Advance to the next instruction.
671 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
672 if (!IncV)
673 return false;
674
675 if (IncV->mayHaveSideEffects())
676 return false;
677
678 if (IncV == PN)
679 return true;
680
681 return isNormalAddRecExprPHI(PN, IncV, L);
682}
683
684/// getIVIncOperand returns an induction variable increment's induction
685/// variable operand.
686///
687/// If allowScale is set, any type of GEP is allowed as long as the nonIV
688/// operands dominate InsertPos.
689///
690/// If allowScale is not set, ensure that a GEP increment conforms to one of the
691/// simple patterns generated by getAddRecExprPHILiterally and
692/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
694 Instruction *InsertPos,
695 bool allowScale) {
696 if (IncV == InsertPos)
697 return nullptr;
698
699 switch (IncV->getOpcode()) {
700 default:
701 return nullptr;
702 // Check for a simple Add/Sub or GEP of a loop invariant step.
703 case Instruction::Add:
704 case Instruction::Sub: {
705 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
706 if (!OInst || SE.DT.dominates(OInst, InsertPos))
707 return dyn_cast<Instruction>(IncV->getOperand(0));
708 return nullptr;
709 }
710 case Instruction::BitCast:
711 return dyn_cast<Instruction>(IncV->getOperand(0));
712 case Instruction::GetElementPtr:
713 for (Use &U : llvm::drop_begin(IncV->operands())) {
714 if (isa<Constant>(U))
715 continue;
716 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
717 if (!SE.DT.dominates(OInst, InsertPos))
718 return nullptr;
719 }
720 if (allowScale) {
721 // allow any kind of GEP as long as it can be hoisted.
722 continue;
723 }
724 // GEPs produced by SCEVExpander use i8 element type.
725 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
726 return nullptr;
727 break;
728 }
729 return dyn_cast<Instruction>(IncV->getOperand(0));
730 }
731}
732
733/// If the insert point of the current builder or any of the builders on the
734/// stack of saved builders has 'I' as its insert point, update it to point to
735/// the instruction after 'I'. This is intended to be used when the instruction
736/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
737/// different block, the inconsistent insert point (with a mismatched
738/// Instruction and Block) can lead to an instruction being inserted in a block
739/// other than its parent.
740void SCEVExpander::fixupInsertPoints(Instruction *I) {
742 BasicBlock::iterator NewInsertPt = std::next(It);
743 if (Builder.GetInsertPoint() == It)
744 Builder.SetInsertPoint(&*NewInsertPt);
745 for (auto *InsertPtGuard : InsertPointGuards)
746 if (InsertPtGuard->GetInsertPoint() == It)
747 InsertPtGuard->SetInsertPoint(NewInsertPt);
748}
749
750/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
751/// it available to other uses in this loop. Recursively hoist any operands,
752/// until we reach a value that dominates InsertPos.
754 bool RecomputePoisonFlags) {
755 auto FixupPoisonFlags = [this](Instruction *I) {
756 // Drop flags that are potentially inferred from old context and infer flags
757 // in new context.
758 rememberFlags(I);
759 I->dropPoisonGeneratingFlags();
760 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
761 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
762 auto *BO = cast<BinaryOperator>(I);
767 }
768 };
769
770 if (SE.DT.dominates(IncV, InsertPos)) {
771 if (RecomputePoisonFlags)
772 FixupPoisonFlags(IncV);
773 return true;
774 }
775
776 // InsertPos must itself dominate IncV so that IncV's new position satisfies
777 // its existing users.
778 if (isa<PHINode>(InsertPos) ||
779 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
780 return false;
781
782 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
783 return false;
784
785 // Check that the chain of IV operands leading back to Phi can be hoisted.
787 for(;;) {
788 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
789 if (!Oper)
790 return false;
791 // IncV is safe to hoist.
792 IVIncs.push_back(IncV);
793 IncV = Oper;
794 if (SE.DT.dominates(IncV, InsertPos))
795 break;
796 }
797 for (Instruction *I : llvm::reverse(IVIncs)) {
798 fixupInsertPoints(I);
799 I->moveBefore(InsertPos);
800 if (RecomputePoisonFlags)
801 FixupPoisonFlags(I);
802 }
803 return true;
804}
805
807 PHINode *WidePhi,
808 Instruction *OrigInc,
809 Instruction *WideInc) {
810 return match(OrigInc, m_c_BinOp(m_Specific(OrigPhi), m_Value())) &&
811 match(WideInc, m_c_BinOp(m_Specific(WidePhi), m_Value())) &&
812 OrigInc->getOpcode() == WideInc->getOpcode();
813}
814
815/// Determine if this cyclic phi is in a form that would have been generated by
816/// LSR. We don't care if the phi was actually expanded in this pass, as long
817/// as it is in a low-cost form, for example, no implied multiplication. This
818/// should match any patterns generated by getAddRecExprPHILiterally and
819/// expandAddtoGEP.
820bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
821 const Loop *L) {
822 for(Instruction *IVOper = IncV;
823 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
824 /*allowScale=*/false));) {
825 if (IVOper == PN)
826 return true;
827 }
828 return false;
829}
830
831/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
832/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
833/// need to materialize IV increments elsewhere to handle difficult situations.
834Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
835 bool useSubtract) {
836 Value *IncV;
837 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
838 if (PN->getType()->isPointerTy()) {
839 // TODO: Change name to IVName.iv.next.
840 IncV = Builder.CreatePtrAdd(PN, StepV, "scevgep");
841 } else {
842 IncV = useSubtract ?
843 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
844 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
845 }
846 return IncV;
847}
848
849/// Check whether we can cheaply express the requested SCEV in terms of
850/// the available PHI SCEV by truncation and/or inversion of the step.
852 const SCEVAddRecExpr *Phi,
853 const SCEVAddRecExpr *Requested,
854 bool &InvertStep) {
855 // We can't transform to match a pointer PHI.
856 Type *PhiTy = Phi->getType();
857 Type *RequestedTy = Requested->getType();
858 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
859 return false;
860
861 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
862 return false;
863
864 // Try truncate it if necessary.
865 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
866 if (!Phi)
867 return false;
868
869 // Check whether truncation will help.
870 if (Phi == Requested) {
871 InvertStep = false;
872 return true;
873 }
874
875 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
876 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
877 InvertStep = true;
878 return true;
879 }
880
881 return false;
882}
883
884static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
885 if (!isa<IntegerType>(AR->getType()))
886 return false;
887
888 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
889 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
890 const SCEV *Step = AR->getStepRecurrence(SE);
891 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
892 SE.getSignExtendExpr(AR, WideTy));
893 const SCEV *ExtendAfterOp =
894 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
895 return ExtendAfterOp == OpAfterExtend;
896}
897
898static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
899 if (!isa<IntegerType>(AR->getType()))
900 return false;
901
902 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
903 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
904 const SCEV *Step = AR->getStepRecurrence(SE);
905 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
906 SE.getZeroExtendExpr(AR, WideTy));
907 const SCEV *ExtendAfterOp =
908 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
909 return ExtendAfterOp == OpAfterExtend;
910}
911
912/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
913/// the base addrec, which is the addrec without any non-loop-dominating
914/// values, and return the PHI.
915PHINode *
916SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
917 const Loop *L, Type *&TruncTy,
918 bool &InvertStep) {
919 assert((!IVIncInsertLoop || IVIncInsertPos) &&
920 "Uninitialized insert position");
921
922 // Reuse a previously-inserted PHI, if present.
923 BasicBlock *LatchBlock = L->getLoopLatch();
924 if (LatchBlock) {
925 PHINode *AddRecPhiMatch = nullptr;
926 Instruction *IncV = nullptr;
927 TruncTy = nullptr;
928 InvertStep = false;
929
930 // Only try partially matching scevs that need truncation and/or
931 // step-inversion if we know this loop is outside the current loop.
932 bool TryNonMatchingSCEV =
933 IVIncInsertLoop &&
934 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
935
936 for (PHINode &PN : L->getHeader()->phis()) {
937 if (!SE.isSCEVable(PN.getType()))
938 continue;
939
940 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
941 // PHI has no meaning at all.
942 if (!PN.isComplete()) {
944 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
945 continue;
946 }
947
948 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
949 if (!PhiSCEV)
950 continue;
951
952 bool IsMatchingSCEV = PhiSCEV == Normalized;
953 // We only handle truncation and inversion of phi recurrences for the
954 // expanded expression if the expanded expression's loop dominates the
955 // loop we insert to. Check now, so we can bail out early.
956 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
957 continue;
958
959 // TODO: this possibly can be reworked to avoid this cast at all.
960 Instruction *TempIncV =
961 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
962 if (!TempIncV)
963 continue;
964
965 // Check whether we can reuse this PHI node.
966 if (LSRMode) {
967 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
968 continue;
969 } else {
970 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
971 continue;
972 }
973
974 // Stop if we have found an exact match SCEV.
975 if (IsMatchingSCEV) {
976 IncV = TempIncV;
977 TruncTy = nullptr;
978 InvertStep = false;
979 AddRecPhiMatch = &PN;
980 break;
981 }
982
983 // Try whether the phi can be translated into the requested form
984 // (truncated and/or offset by a constant).
985 if ((!TruncTy || InvertStep) &&
986 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
987 // Record the phi node. But don't stop we might find an exact match
988 // later.
989 AddRecPhiMatch = &PN;
990 IncV = TempIncV;
991 TruncTy = Normalized->getType();
992 }
993 }
994
995 if (AddRecPhiMatch) {
996 // Ok, the add recurrence looks usable.
997 // Remember this PHI, even in post-inc mode.
998 InsertedValues.insert(AddRecPhiMatch);
999 // Remember the increment.
1000 rememberInstruction(IncV);
1001 // Those values were not actually inserted but re-used.
1002 ReusedValues.insert(AddRecPhiMatch);
1003 ReusedValues.insert(IncV);
1004 return AddRecPhiMatch;
1005 }
1006 }
1007
1008 // Save the original insertion point so we can restore it when we're done.
1009 SCEVInsertPointGuard Guard(Builder, this);
1010
1011 // Another AddRec may need to be recursively expanded below. For example, if
1012 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1013 // loop. Remove this loop from the PostIncLoops set before expanding such
1014 // AddRecs. Otherwise, we cannot find a valid position for the step
1015 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1016 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1017 // so it's not worth implementing SmallPtrSet::swap.
1018 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1019 PostIncLoops.clear();
1020
1021 // Expand code for the start value into the loop preheader.
1022 assert(L->getLoopPreheader() &&
1023 "Can't expand add recurrences without a loop preheader!");
1024 Value *StartV =
1025 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
1026
1027 // StartV must have been be inserted into L's preheader to dominate the new
1028 // phi.
1029 assert(!isa<Instruction>(StartV) ||
1030 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1031 L->getHeader()));
1032
1033 // Expand code for the step value. Do this before creating the PHI so that PHI
1034 // reuse code doesn't see an incomplete PHI.
1035 const SCEV *Step = Normalized->getStepRecurrence(SE);
1036 Type *ExpandTy = Normalized->getType();
1037 // If the stride is negative, insert a sub instead of an add for the increment
1038 // (unless it's a constant, because subtracts of constants are canonicalized
1039 // to adds).
1040 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1041 if (useSubtract)
1042 Step = SE.getNegativeSCEV(Step);
1043 // Expand the step somewhere that dominates the loop header.
1044 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1045
1046 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1047 // we actually do emit an addition. It does not apply if we emit a
1048 // subtraction.
1049 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1050 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1051
1052 // Create the PHI.
1053 BasicBlock *Header = L->getHeader();
1054 Builder.SetInsertPoint(Header, Header->begin());
1055 PHINode *PN =
1056 Builder.CreatePHI(ExpandTy, pred_size(Header), Twine(IVName) + ".iv");
1057
1058 // Create the step instructions and populate the PHI.
1059 for (BasicBlock *Pred : predecessors(Header)) {
1060 // Add a start value.
1061 if (!L->contains(Pred)) {
1062 PN->addIncoming(StartV, Pred);
1063 continue;
1064 }
1065
1066 // Create a step value and add it to the PHI.
1067 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1068 // instructions at IVIncInsertPos.
1069 Instruction *InsertPos = L == IVIncInsertLoop ?
1070 IVIncInsertPos : Pred->getTerminator();
1071 Builder.SetInsertPoint(InsertPos);
1072 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1073
1074 if (isa<OverflowingBinaryOperator>(IncV)) {
1075 if (IncrementIsNUW)
1076 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1077 if (IncrementIsNSW)
1078 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1079 }
1080 PN->addIncoming(IncV, Pred);
1081 }
1082
1083 // After expanding subexpressions, restore the PostIncLoops set so the caller
1084 // can ensure that IVIncrement dominates the current uses.
1085 PostIncLoops = SavedPostIncLoops;
1086
1087 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1088 // effective when we are able to use an IV inserted here, so record it.
1089 InsertedValues.insert(PN);
1090 InsertedIVs.push_back(PN);
1091 return PN;
1092}
1093
1094Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1095 const Loop *L = S->getLoop();
1096
1097 // Determine a normalized form of this expression, which is the expression
1098 // before any post-inc adjustment is made.
1099 const SCEVAddRecExpr *Normalized = S;
1100 if (PostIncLoops.count(L)) {
1102 Loops.insert(L);
1103 Normalized = cast<SCEVAddRecExpr>(
1104 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1105 }
1106
1107 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1108 const SCEV *Step = Normalized->getStepRecurrence(SE);
1109 assert(SE.properlyDominates(Start, L->getHeader()) &&
1110 "Start does not properly dominate loop header");
1111 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1112
1113 // In some cases, we decide to reuse an existing phi node but need to truncate
1114 // it and/or invert the step.
1115 Type *TruncTy = nullptr;
1116 bool InvertStep = false;
1117 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1118
1119 // Accommodate post-inc mode, if necessary.
1120 Value *Result;
1121 if (!PostIncLoops.count(L))
1122 Result = PN;
1123 else {
1124 // In PostInc mode, use the post-incremented value.
1125 BasicBlock *LatchBlock = L->getLoopLatch();
1126 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1127 Result = PN->getIncomingValueForBlock(LatchBlock);
1128
1129 // We might be introducing a new use of the post-inc IV that is not poison
1130 // safe, in which case we should drop poison generating flags. Only keep
1131 // those flags for which SCEV has proven that they always hold.
1132 if (isa<OverflowingBinaryOperator>(Result)) {
1133 auto *I = cast<Instruction>(Result);
1134 if (!S->hasNoUnsignedWrap())
1135 I->setHasNoUnsignedWrap(false);
1136 if (!S->hasNoSignedWrap())
1137 I->setHasNoSignedWrap(false);
1138 }
1139
1140 // For an expansion to use the postinc form, the client must call
1141 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1142 // or dominated by IVIncInsertPos.
1143 if (isa<Instruction>(Result) &&
1144 !SE.DT.dominates(cast<Instruction>(Result),
1145 &*Builder.GetInsertPoint())) {
1146 // The induction variable's postinc expansion does not dominate this use.
1147 // IVUsers tries to prevent this case, so it is rare. However, it can
1148 // happen when an IVUser outside the loop is not dominated by the latch
1149 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1150 // all cases. Consider a phi outside whose operand is replaced during
1151 // expansion with the value of the postinc user. Without fundamentally
1152 // changing the way postinc users are tracked, the only remedy is
1153 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1154 // but hopefully expandCodeFor handles that.
1155 bool useSubtract =
1156 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1157 if (useSubtract)
1158 Step = SE.getNegativeSCEV(Step);
1159 Value *StepV;
1160 {
1161 // Expand the step somewhere that dominates the loop header.
1162 SCEVInsertPointGuard Guard(Builder, this);
1163 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1164 }
1165 Result = expandIVInc(PN, StepV, L, useSubtract);
1166 }
1167 }
1168
1169 // We have decided to reuse an induction variable of a dominating loop. Apply
1170 // truncation and/or inversion of the step.
1171 if (TruncTy) {
1172 // Truncate the result.
1173 if (TruncTy != Result->getType())
1174 Result = Builder.CreateTrunc(Result, TruncTy);
1175
1176 // Invert the result.
1177 if (InvertStep)
1178 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1179 }
1180
1181 return Result;
1182}
1183
1184Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1185 // In canonical mode we compute the addrec as an expression of a canonical IV
1186 // using evaluateAtIteration and expand the resulting SCEV expression. This
1187 // way we avoid introducing new IVs to carry on the computation of the addrec
1188 // throughout the loop.
1189 //
1190 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1191 // type wider than the addrec itself. Emitting a canonical IV of the
1192 // proper type might produce non-legal types, for example expanding an i64
1193 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1194 // back to non-canonical mode for nested addrecs.
1195 if (!CanonicalMode || (S->getNumOperands() > 2))
1196 return expandAddRecExprLiterally(S);
1197
1198 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1199 const Loop *L = S->getLoop();
1200
1201 // First check for an existing canonical IV in a suitable type.
1202 PHINode *CanonicalIV = nullptr;
1203 if (PHINode *PN = L->getCanonicalInductionVariable())
1204 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1205 CanonicalIV = PN;
1206
1207 // Rewrite an AddRec in terms of the canonical induction variable, if
1208 // its type is more narrow.
1209 if (CanonicalIV &&
1210 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1211 !S->getType()->isPointerTy()) {
1213 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1214 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1215 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1217 BasicBlock::iterator NewInsertPt =
1218 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1219 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1220 return V;
1221 }
1222
1223 // {X,+,F} --> X + {0,+,F}
1224 if (!S->getStart()->isZero()) {
1225 if (isa<PointerType>(S->getType())) {
1226 Value *StartV = expand(SE.getPointerBase(S));
1227 return expandAddToGEP(SE.removePointerBase(S), StartV);
1228 }
1229
1231 NewOps[0] = SE.getConstant(Ty, 0);
1232 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1234
1235 // Just do a normal add. Pre-expand the operands to suppress folding.
1236 //
1237 // The LHS and RHS values are factored out of the expand call to make the
1238 // output independent of the argument evaluation order.
1239 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1240 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1241 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1242 }
1243
1244 // If we don't yet have a canonical IV, create one.
1245 if (!CanonicalIV) {
1246 // Create and insert the PHI node for the induction variable in the
1247 // specified loop.
1248 BasicBlock *Header = L->getHeader();
1249 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1250 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1251 CanonicalIV->insertBefore(Header->begin());
1252 rememberInstruction(CanonicalIV);
1253
1255 Constant *One = ConstantInt::get(Ty, 1);
1256 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1257 BasicBlock *HP = *HPI;
1258 if (!PredSeen.insert(HP).second) {
1259 // There must be an incoming value for each predecessor, even the
1260 // duplicates!
1261 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1262 continue;
1263 }
1264
1265 if (L->contains(HP)) {
1266 // Insert a unit add instruction right before the terminator
1267 // corresponding to the back-edge.
1268 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1269 "indvar.next",
1270 HP->getTerminator()->getIterator());
1271 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1272 rememberInstruction(Add);
1273 CanonicalIV->addIncoming(Add, HP);
1274 } else {
1275 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1276 }
1277 }
1278 }
1279
1280 // {0,+,1} --> Insert a canonical induction variable into the loop!
1281 if (S->isAffine() && S->getOperand(1)->isOne()) {
1282 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1283 "IVs with types different from the canonical IV should "
1284 "already have been handled!");
1285 return CanonicalIV;
1286 }
1287
1288 // {0,+,F} --> {0,+,1} * F
1289
1290 // If this is a simple linear addrec, emit it now as a special case.
1291 if (S->isAffine()) // {0,+,F} --> i*F
1292 return
1293 expand(SE.getTruncateOrNoop(
1294 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1296 CanonicalIV->getType())),
1297 Ty));
1298
1299 // If this is a chain of recurrences, turn it into a closed form, using the
1300 // folders, then expandCodeFor the closed form. This allows the folders to
1301 // simplify the expression without having to build a bunch of special code
1302 // into this folder.
1303 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1304
1305 // Promote S up to the canonical IV type, if the cast is foldable.
1306 const SCEV *NewS = S;
1307 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1308 if (isa<SCEVAddRecExpr>(Ext))
1309 NewS = Ext;
1310
1311 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1312
1313 // Truncate the result down to the original type, if needed.
1314 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1315 return expand(T);
1316}
1317
1318Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1319 Value *V = expand(S->getOperand());
1320 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1321 GetOptimalInsertionPointForCastOf(V));
1322}
1323
1324Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1325 Value *V = expand(S->getOperand());
1326 return Builder.CreateTrunc(V, S->getType());
1327}
1328
1329Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1330 Value *V = expand(S->getOperand());
1331 return Builder.CreateZExt(V, S->getType(), "",
1333}
1334
1335Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1336 Value *V = expand(S->getOperand());
1337 return Builder.CreateSExt(V, S->getType());
1338}
1339
1340Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1341 Intrinsic::ID IntrinID, Twine Name,
1342 bool IsSequential) {
1343 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1344 Type *Ty = LHS->getType();
1345 if (IsSequential)
1346 LHS = Builder.CreateFreeze(LHS);
1347 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1348 Value *RHS = expand(S->getOperand(i));
1349 if (IsSequential && i != 0)
1350 RHS = Builder.CreateFreeze(RHS);
1351 Value *Sel;
1352 if (Ty->isIntegerTy())
1353 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1354 /*FMFSource=*/nullptr, Name);
1355 else {
1356 Value *ICmp =
1357 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1358 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1359 }
1360 LHS = Sel;
1361 }
1362 return LHS;
1363}
1364
1365Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1366 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1367}
1368
1369Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1370 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1371}
1372
1373Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1374 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1375}
1376
1377Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1378 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1379}
1380
1381Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1382 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1383}
1384
1385Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1386 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1387}
1388
1391 setInsertPoint(IP);
1392 Value *V = expandCodeFor(SH, Ty);
1393 return V;
1394}
1395
1397 // Expand the code for this SCEV.
1398 Value *V = expand(SH);
1399
1400 if (Ty) {
1401 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1402 "non-trivial casts should be done with the SCEVs directly!");
1403 V = InsertNoopCastOfTo(V, Ty);
1404 }
1405 return V;
1406}
1407
1408Value *SCEVExpander::FindValueInExprValueMap(
1409 const SCEV *S, const Instruction *InsertPt,
1410 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1411 // If the expansion is not in CanonicalMode, and the SCEV contains any
1412 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1413 if (!CanonicalMode && SE.containsAddRecurrence(S))
1414 return nullptr;
1415
1416 // If S is a constant, it may be worse to reuse an existing Value.
1417 if (isa<SCEVConstant>(S))
1418 return nullptr;
1419
1420 for (Value *V : SE.getSCEVValues(S)) {
1421 Instruction *EntInst = dyn_cast<Instruction>(V);
1422 if (!EntInst)
1423 continue;
1424
1425 // Choose a Value from the set which dominates the InsertPt.
1426 // InsertPt should be inside the Value's parent loop so as not to break
1427 // the LCSSA form.
1428 assert(EntInst->getFunction() == InsertPt->getFunction());
1429 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1430 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1431 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1432 continue;
1433
1434 // Make sure reusing the instruction is poison-safe.
1435 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1436 return V;
1437 DropPoisonGeneratingInsts.clear();
1438 }
1439 return nullptr;
1440}
1441
1442// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1443// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1444// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1445// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1446// the expansion will try to reuse Value from ExprValueMap, and only when it
1447// fails, expand the SCEV literally.
1448Value *SCEVExpander::expand(const SCEV *S) {
1449 // Compute an insertion point for this SCEV object. Hoist the instructions
1450 // as far out in the loop nest as possible.
1451 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1452
1453 // We can move insertion point only if there is no div or rem operations
1454 // otherwise we are risky to move it over the check for zero denominator.
1455 auto SafeToHoist = [](const SCEV *S) {
1456 return !SCEVExprContains(S, [](const SCEV *S) {
1457 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1458 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1459 // Division by non-zero constants can be hoisted.
1460 return SC->getValue()->isZero();
1461 // All other divisions should not be moved as they may be
1462 // divisions by zero and should be kept within the
1463 // conditions of the surrounding loops that guard their
1464 // execution (see PR35406).
1465 return true;
1466 }
1467 return false;
1468 });
1469 };
1470 if (SafeToHoist(S)) {
1471 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1472 L = L->getParentLoop()) {
1473 if (SE.isLoopInvariant(S, L)) {
1474 if (!L) break;
1475 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1476 InsertPt = Preheader->getTerminator()->getIterator();
1477 } else {
1478 // LSR sets the insertion point for AddRec start/step values to the
1479 // block start to simplify value reuse, even though it's an invalid
1480 // position. SCEVExpander must correct for this in all cases.
1481 InsertPt = L->getHeader()->getFirstInsertionPt();
1482 }
1483 } else {
1484 // If the SCEV is computable at this level, insert it into the header
1485 // after the PHIs (and after any other instructions that we've inserted
1486 // there) so that it is guaranteed to dominate any user inside the loop.
1487 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1488 InsertPt = L->getHeader()->getFirstInsertionPt();
1489
1490 while (InsertPt != Builder.GetInsertPoint() &&
1491 (isInsertedInstruction(&*InsertPt) ||
1492 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1493 InsertPt = std::next(InsertPt);
1494 }
1495 break;
1496 }
1497 }
1498 }
1499
1500 // Check to see if we already expanded this here.
1501 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1502 if (I != InsertedExpressions.end())
1503 return I->second;
1504
1505 SCEVInsertPointGuard Guard(Builder, this);
1506 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1507
1508 // Expand the expression into instructions.
1509 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1510 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1511 if (!V) {
1512 V = visit(S);
1513 V = fixupLCSSAFormFor(V);
1514 } else {
1515 for (Instruction *I : DropPoisonGeneratingInsts) {
1516 rememberFlags(I);
1517 I->dropPoisonGeneratingFlagsAndMetadata();
1518 // See if we can re-infer from first principles any of the flags we just
1519 // dropped.
1520 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1521 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1522 auto *BO = cast<BinaryOperator>(I);
1527 }
1528 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1529 auto *Src = NNI->getOperand(0);
1531 Constant::getNullValue(Src->getType()), I,
1532 DL).value_or(false))
1533 NNI->setNonNeg(true);
1534 }
1535 }
1536 }
1537 // Remember the expanded value for this SCEV at this location.
1538 //
1539 // This is independent of PostIncLoops. The mapped value simply materializes
1540 // the expression at this insertion point. If the mapped value happened to be
1541 // a postinc expansion, it could be reused by a non-postinc user, but only if
1542 // its insertion point was already at the head of the loop.
1543 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1544 return V;
1545}
1546
1547void SCEVExpander::rememberInstruction(Value *I) {
1548 auto DoInsert = [this](Value *V) {
1549 if (!PostIncLoops.empty())
1550 InsertedPostIncValues.insert(V);
1551 else
1552 InsertedValues.insert(V);
1553 };
1554 DoInsert(I);
1555}
1556
1557void SCEVExpander::rememberFlags(Instruction *I) {
1558 // If we already have flags for the instruction, keep the existing ones.
1559 OrigFlags.try_emplace(I, PoisonFlags(I));
1560}
1561
1562void SCEVExpander::replaceCongruentIVInc(
1563 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1565 BasicBlock *LatchBlock = L->getLoopLatch();
1566 if (!LatchBlock)
1567 return;
1568
1569 Instruction *OrigInc =
1570 dyn_cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1571 Instruction *IsomorphicInc =
1572 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1573 if (!OrigInc || !IsomorphicInc)
1574 return;
1575
1576 // If this phi has the same width but is more canonical, replace the
1577 // original with it. As part of the "more canonical" determination,
1578 // respect a prior decision to use an IV chain.
1579 if (OrigPhi->getType() == Phi->getType() &&
1580 !(ChainedPhis.count(Phi) ||
1581 isExpandedAddRecExprPHI(OrigPhi, OrigInc, L)) &&
1582 (ChainedPhis.count(Phi) ||
1583 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1584 std::swap(OrigPhi, Phi);
1585 std::swap(OrigInc, IsomorphicInc);
1586 }
1587
1588 // Replacing the congruent phi is sufficient because acyclic
1589 // redundancy elimination, CSE/GVN, should handle the
1590 // rest. However, once SCEV proves that a phi is congruent,
1591 // it's often the head of an IV user cycle that is isomorphic
1592 // with the original phi. It's worth eagerly cleaning up the
1593 // common case of a single IV increment so that DeleteDeadPHIs
1594 // can remove cycles that had postinc uses.
1595 // Because we may potentially introduce a new use of OrigIV that didn't
1596 // exist before at this point, its poison flags need readjustment.
1597 const SCEV *TruncExpr =
1598 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1599 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(IsomorphicInc) ||
1600 !SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc))
1601 return;
1602
1603 bool BothHaveNUW = false;
1604 bool BothHaveNSW = false;
1605 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(OrigInc);
1606 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(IsomorphicInc);
1607 if (OBOIncV && OBOIsomorphic) {
1608 BothHaveNUW =
1609 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1610 BothHaveNSW =
1611 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1612 }
1613
1614 if (!hoistIVInc(OrigInc, IsomorphicInc,
1615 /*RecomputePoisonFlags*/ true))
1616 return;
1617
1618 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1619 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1620 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1621 // make IsomorphicInc's uses more poisonous.
1622 assert(OrigInc->getType()->getScalarSizeInBits() >=
1623 IsomorphicInc->getType()->getScalarSizeInBits() &&
1624 "Should only replace an increment with a wider one.");
1625 if (BothHaveNUW || BothHaveNSW) {
1626 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1627 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1628 }
1629
1630 SCEV_DEBUG_WITH_TYPE(DebugType,
1631 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1632 << *IsomorphicInc << '\n');
1633 Value *NewInc = OrigInc;
1634 if (OrigInc->getType() != IsomorphicInc->getType()) {
1636 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1637 IP = PN->getParent()->getFirstInsertionPt();
1638 else
1639 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1640
1641 IRBuilder<> Builder(IP->getParent(), IP);
1642 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1643 NewInc =
1644 Builder.CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
1645 }
1646 IsomorphicInc->replaceAllUsesWith(NewInc);
1647 DeadInsts.emplace_back(IsomorphicInc);
1648}
1649
1650/// replaceCongruentIVs - Check for congruent phis in this loop header and
1651/// replace them with their most canonical representative. Return the number of
1652/// phis eliminated.
1653///
1654/// This does not depend on any SCEVExpander state but should be used in
1655/// the same context that SCEVExpander is used.
1656unsigned
1659 const TargetTransformInfo *TTI) {
1660 // Find integer phis in order of increasing width.
1662 for (PHINode &PN : L->getHeader()->phis())
1663 Phis.push_back(&PN);
1664
1665 if (TTI)
1666 // Use stable_sort to preserve order of equivalent PHIs, so the order
1667 // of the sorted Phis is the same from run to run on the same loop.
1668 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1669 // Put pointers at the back and make sure pointer < pointer = false.
1670 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1671 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1674 });
1675
1676 unsigned NumElim = 0;
1678 // Process phis from wide to narrow. Map wide phis to their truncation
1679 // so narrow phis can reuse them.
1680 for (PHINode *Phi : Phis) {
1681 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1682 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1683 return V;
1684 if (!SE.isSCEVable(PN->getType()))
1685 return nullptr;
1686 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1687 if (!Const)
1688 return nullptr;
1689 return Const->getValue();
1690 };
1691
1692 // Fold constant phis. They may be congruent to other constant phis and
1693 // would confuse the logic below that expects proper IVs.
1694 if (Value *V = SimplifyPHINode(Phi)) {
1695 if (V->getType() != Phi->getType())
1696 continue;
1697 SE.forgetValue(Phi);
1698 Phi->replaceAllUsesWith(V);
1699 DeadInsts.emplace_back(Phi);
1700 ++NumElim;
1701 SCEV_DEBUG_WITH_TYPE(DebugType,
1702 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1703 << '\n');
1704 continue;
1705 }
1706
1707 if (!SE.isSCEVable(Phi->getType()))
1708 continue;
1709
1710 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1711 if (!OrigPhiRef) {
1712 OrigPhiRef = Phi;
1713 if (Phi->getType()->isIntegerTy() && TTI &&
1714 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1715 // Make sure we only rewrite using simple induction variables;
1716 // otherwise, we can make the trip count of a loop unanalyzable
1717 // to SCEV.
1718 const SCEV *PhiExpr = SE.getSCEV(Phi);
1719 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1720 // This phi can be freely truncated to the narrowest phi type. Map the
1721 // truncated expression to it so it will be reused for narrow types.
1722 const SCEV *TruncExpr =
1723 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1724 ExprToIVMap[TruncExpr] = Phi;
1725 }
1726 }
1727 continue;
1728 }
1729
1730 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1731 // sense.
1732 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1733 continue;
1734
1735 replaceCongruentIVInc(Phi, OrigPhiRef, L, DT, DeadInsts);
1736 SCEV_DEBUG_WITH_TYPE(DebugType,
1737 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1738 << '\n');
1740 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1741 ++NumElim;
1742 Value *NewIV = OrigPhiRef;
1743 if (OrigPhiRef->getType() != Phi->getType()) {
1744 IRBuilder<> Builder(L->getHeader(),
1745 L->getHeader()->getFirstInsertionPt());
1746 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1747 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1748 }
1749 Phi->replaceAllUsesWith(NewIV);
1750 DeadInsts.emplace_back(Phi);
1751 }
1752 return NumElim;
1753}
1754
1756 const Instruction *At,
1757 Loop *L) {
1758 using namespace llvm::PatternMatch;
1759
1760 SmallVector<BasicBlock *, 4> ExitingBlocks;
1761 L->getExitingBlocks(ExitingBlocks);
1762
1763 // Look for suitable value in simple conditions at the loop exits.
1764 for (BasicBlock *BB : ExitingBlocks) {
1766 Instruction *LHS, *RHS;
1767
1768 if (!match(BB->getTerminator(),
1771 continue;
1772
1773 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1774 return true;
1775
1776 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1777 return true;
1778 }
1779
1780 // Use expand's logic which is used for reusing a previous Value in
1781 // ExprValueMap. Note that we don't currently model the cost of
1782 // needing to drop poison generating flags on the instruction if we
1783 // want to reuse it. We effectively assume that has zero cost.
1784 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1785 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1786}
1787
1788template<typename T> static InstructionCost costAndCollectOperands(
1791 SmallVectorImpl<SCEVOperand> &Worklist) {
1792
1793 const T *S = cast<T>(WorkItem.S);
1795 // Object to help map SCEV operands to expanded IR instructions.
1796 struct OperationIndices {
1797 OperationIndices(unsigned Opc, size_t min, size_t max) :
1798 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1799 unsigned Opcode;
1800 size_t MinIdx;
1801 size_t MaxIdx;
1802 };
1803
1804 // Collect the operations of all the instructions that will be needed to
1805 // expand the SCEVExpr. This is so that when we come to cost the operands,
1806 // we know what the generated user(s) will be.
1808
1809 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1810 Operations.emplace_back(Opcode, 0, 0);
1811 return TTI.getCastInstrCost(Opcode, S->getType(),
1812 S->getOperand(0)->getType(),
1814 };
1815
1816 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1817 unsigned MinIdx = 0,
1818 unsigned MaxIdx = 1) -> InstructionCost {
1819 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1820 return NumRequired *
1821 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1822 };
1823
1824 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1825 unsigned MaxIdx) -> InstructionCost {
1826 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1827 Type *OpType = S->getType();
1828 return NumRequired * TTI.getCmpSelInstrCost(
1829 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1831 };
1832
1833 switch (S->getSCEVType()) {
1834 case scCouldNotCompute:
1835 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1836 case scUnknown:
1837 case scConstant:
1838 case scVScale:
1839 return 0;
1840 case scPtrToInt:
1841 Cost = CastCost(Instruction::PtrToInt);
1842 break;
1843 case scTruncate:
1844 Cost = CastCost(Instruction::Trunc);
1845 break;
1846 case scZeroExtend:
1847 Cost = CastCost(Instruction::ZExt);
1848 break;
1849 case scSignExtend:
1850 Cost = CastCost(Instruction::SExt);
1851 break;
1852 case scUDivExpr: {
1853 unsigned Opcode = Instruction::UDiv;
1854 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1855 if (SC->getAPInt().isPowerOf2())
1856 Opcode = Instruction::LShr;
1857 Cost = ArithCost(Opcode, 1);
1858 break;
1859 }
1860 case scAddExpr:
1861 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1862 break;
1863 case scMulExpr:
1864 // TODO: this is a very pessimistic cost modelling for Mul,
1865 // because of Bin Pow algorithm actually used by the expander,
1866 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1867 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1868 break;
1869 case scSMaxExpr:
1870 case scUMaxExpr:
1871 case scSMinExpr:
1872 case scUMinExpr:
1873 case scSequentialUMinExpr: {
1874 // FIXME: should this ask the cost for Intrinsic's?
1875 // The reduction tree.
1876 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1877 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1878 switch (S->getSCEVType()) {
1879 case scSequentialUMinExpr: {
1880 // The safety net against poison.
1881 // FIXME: this is broken.
1882 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1883 Cost += ArithCost(Instruction::Or,
1884 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1885 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1886 break;
1887 }
1888 default:
1889 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1890 "Unhandled SCEV expression type?");
1891 break;
1892 }
1893 break;
1894 }
1895 case scAddRecExpr: {
1896 // In this polynominal, we may have some zero operands, and we shouldn't
1897 // really charge for those. So how many non-zero coefficients are there?
1898 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1899 return !Op->isZero();
1900 });
1901
1902 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1903 assert(!(*std::prev(S->operands().end()))->isZero() &&
1904 "Last operand should not be zero");
1905
1906 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1907 int NumNonZeroDegreeNonOneTerms =
1908 llvm::count_if(S->operands(), [](const SCEV *Op) {
1909 auto *SConst = dyn_cast<SCEVConstant>(Op);
1910 return !SConst || SConst->getAPInt().ugt(1);
1911 });
1912
1913 // Much like with normal add expr, the polynominal will require
1914 // one less addition than the number of it's terms.
1915 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1916 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1917 // Here, *each* one of those will require a multiplication.
1918 InstructionCost MulCost =
1919 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1920 Cost = AddCost + MulCost;
1921
1922 // What is the degree of this polynominal?
1923 int PolyDegree = S->getNumOperands() - 1;
1924 assert(PolyDegree >= 1 && "Should be at least affine.");
1925
1926 // The final term will be:
1927 // Op_{PolyDegree} * x ^ {PolyDegree}
1928 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1929 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1930 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1931 // FIXME: this is conservatively correct, but might be overly pessimistic.
1932 Cost += MulCost * (PolyDegree - 1);
1933 break;
1934 }
1935 }
1936
1937 for (auto &CostOp : Operations) {
1938 for (auto SCEVOp : enumerate(S->operands())) {
1939 // Clamp the index to account for multiple IR operations being chained.
1940 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1941 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1942 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1943 }
1944 }
1945 return Cost;
1946}
1947
1948bool SCEVExpander::isHighCostExpansionHelper(
1949 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1950 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1952 SmallVectorImpl<SCEVOperand> &Worklist) {
1953 if (Cost > Budget)
1954 return true; // Already run out of budget, give up.
1955
1956 const SCEV *S = WorkItem.S;
1957 // Was the cost of expansion of this expression already accounted for?
1958 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1959 return false; // We have already accounted for this expression.
1960
1961 // If we can find an existing value for this scev available at the point "At"
1962 // then consider the expression cheap.
1963 if (hasRelatedExistingExpansion(S, &At, L))
1964 return false; // Consider the expression to be free.
1965
1967 L->getHeader()->getParent()->hasMinSize()
1970
1971 switch (S->getSCEVType()) {
1972 case scCouldNotCompute:
1973 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1974 case scUnknown:
1975 case scVScale:
1976 // Assume to be zero-cost.
1977 return false;
1978 case scConstant: {
1979 // Only evalulate the costs of constants when optimizing for size.
1981 return false;
1982 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
1983 Type *Ty = S->getType();
1985 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
1986 return Cost > Budget;
1987 }
1988 case scTruncate:
1989 case scPtrToInt:
1990 case scZeroExtend:
1991 case scSignExtend: {
1992 Cost +=
1993 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
1994 return false; // Will answer upon next entry into this function.
1995 }
1996 case scUDivExpr: {
1997 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
1998 // HowManyLessThans produced to compute a precise expression, rather than a
1999 // UDiv from the user's code. If we can't find a UDiv in the code with some
2000 // simple searching, we need to account for it's cost.
2001
2002 // At the beginning of this function we already tried to find existing
2003 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2004 // pattern involving division. This is just a simple search heuristic.
2006 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
2007 return false; // Consider it to be free.
2008
2009 Cost +=
2010 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2011 return false; // Will answer upon next entry into this function.
2012 }
2013 case scAddExpr:
2014 case scMulExpr:
2015 case scUMaxExpr:
2016 case scSMaxExpr:
2017 case scUMinExpr:
2018 case scSMinExpr:
2019 case scSequentialUMinExpr: {
2020 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2021 "Nary expr should have more than 1 operand.");
2022 // The simple nary expr will require one less op (or pair of ops)
2023 // than the number of it's terms.
2024 Cost +=
2025 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2026 return Cost > Budget;
2027 }
2028 case scAddRecExpr: {
2029 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2030 "Polynomial should be at least linear");
2031 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2032 WorkItem, TTI, CostKind, Worklist);
2033 return Cost > Budget;
2034 }
2035 }
2036 llvm_unreachable("Unknown SCEV kind!");
2037}
2038
2040 Instruction *IP) {
2041 assert(IP);
2042 switch (Pred->getKind()) {
2044 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2046 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2047 case SCEVPredicate::P_Wrap: {
2048 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2049 return expandWrapPredicate(AddRecPred, IP);
2050 }
2051 }
2052 llvm_unreachable("Unknown SCEV predicate type");
2053}
2054
2056 Instruction *IP) {
2057 Value *Expr0 = expand(Pred->getLHS(), IP);
2058 Value *Expr1 = expand(Pred->getRHS(), IP);
2059
2060 Builder.SetInsertPoint(IP);
2061 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2062 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2063 return I;
2064}
2065
2067 Instruction *Loc, bool Signed) {
2068 assert(AR->isAffine() && "Cannot generate RT check for "
2069 "non-affine expression");
2070
2071 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2073 const SCEV *ExitCount =
2075
2076 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2077
2078 const SCEV *Step = AR->getStepRecurrence(SE);
2079 const SCEV *Start = AR->getStart();
2080
2081 Type *ARTy = AR->getType();
2082 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2083 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2084
2085 // The expression {Start,+,Step} has nusw/nssw if
2086 // Step < 0, Start - |Step| * Backedge <= Start
2087 // Step >= 0, Start + |Step| * Backedge > Start
2088 // and |Step| * Backedge doesn't unsigned overflow.
2089
2090 Builder.SetInsertPoint(Loc);
2091 Value *TripCountVal = expand(ExitCount, Loc);
2092
2093 IntegerType *Ty =
2095
2096 Value *StepValue = expand(Step, Loc);
2097 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2098 Value *StartValue = expand(Start, Loc);
2099
2100 ConstantInt *Zero =
2101 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2102
2103 Builder.SetInsertPoint(Loc);
2104 // Compute |Step|
2105 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2106 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2107
2108 // Compute |Step| * Backedge
2109 // Compute:
2110 // 1. Start + |Step| * Backedge < Start
2111 // 2. Start - |Step| * Backedge > Start
2112 //
2113 // And select either 1. or 2. depending on whether step is positive or
2114 // negative. If Step is known to be positive or negative, only create
2115 // either 1. or 2.
2116 auto ComputeEndCheck = [&]() -> Value * {
2117 // Checking <u 0 is always false.
2118 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2119 return ConstantInt::getFalse(Loc->getContext());
2120
2121 // Get the backedge taken count and truncate or extended to the AR type.
2122 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2123
2124 Value *MulV, *OfMul;
2125 if (Step->isOne()) {
2126 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2127 // needed, there is never an overflow, so to avoid artificially inflating
2128 // the cost of the check, directly emit the optimized IR.
2129 MulV = TruncTripCount;
2130 OfMul = ConstantInt::getFalse(MulV->getContext());
2131 } else {
2132 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2133 Intrinsic::umul_with_overflow, Ty);
2134 CallInst *Mul =
2135 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2136 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2137 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2138 }
2139
2140 Value *Add = nullptr, *Sub = nullptr;
2141 bool NeedPosCheck = !SE.isKnownNegative(Step);
2142 bool NeedNegCheck = !SE.isKnownPositive(Step);
2143
2144 if (isa<PointerType>(ARTy)) {
2145 Value *NegMulV = Builder.CreateNeg(MulV);
2146 if (NeedPosCheck)
2147 Add = Builder.CreatePtrAdd(StartValue, MulV);
2148 if (NeedNegCheck)
2149 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2150 } else {
2151 if (NeedPosCheck)
2152 Add = Builder.CreateAdd(StartValue, MulV);
2153 if (NeedNegCheck)
2154 Sub = Builder.CreateSub(StartValue, MulV);
2155 }
2156
2157 Value *EndCompareLT = nullptr;
2158 Value *EndCompareGT = nullptr;
2159 Value *EndCheck = nullptr;
2160 if (NeedPosCheck)
2161 EndCheck = EndCompareLT = Builder.CreateICmp(
2163 if (NeedNegCheck)
2164 EndCheck = EndCompareGT = Builder.CreateICmp(
2165 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2166 if (NeedPosCheck && NeedNegCheck) {
2167 // Select the answer based on the sign of Step.
2168 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2169 }
2170 return Builder.CreateOr(EndCheck, OfMul);
2171 };
2172 Value *EndCheck = ComputeEndCheck();
2173
2174 // If the backedge taken count type is larger than the AR type,
2175 // check that we don't drop any bits by truncating it. If we are
2176 // dropping bits, then we have overflow (unless the step is zero).
2177 if (SrcBits > DstBits) {
2178 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2179 auto *BackedgeCheck =
2180 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2181 ConstantInt::get(Loc->getContext(), MaxVal));
2182 BackedgeCheck = Builder.CreateAnd(
2183 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2184
2185 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2186 }
2187
2188 return EndCheck;
2189}
2190
2192 Instruction *IP) {
2193 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2194 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2195
2196 // Add a check for NUSW
2198 NUSWCheck = generateOverflowCheck(A, IP, false);
2199
2200 // Add a check for NSSW
2202 NSSWCheck = generateOverflowCheck(A, IP, true);
2203
2204 if (NUSWCheck && NSSWCheck)
2205 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2206
2207 if (NUSWCheck)
2208 return NUSWCheck;
2209
2210 if (NSSWCheck)
2211 return NSSWCheck;
2212
2213 return ConstantInt::getFalse(IP->getContext());
2214}
2215
2217 Instruction *IP) {
2218 // Loop over all checks in this set.
2219 SmallVector<Value *> Checks;
2220 for (const auto *Pred : Union->getPredicates()) {
2221 Checks.push_back(expandCodeForPredicate(Pred, IP));
2222 Builder.SetInsertPoint(IP);
2223 }
2224
2225 if (Checks.empty())
2226 return ConstantInt::getFalse(IP->getContext());
2227 return Builder.CreateOr(Checks);
2228}
2229
2230Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2231 auto *DefI = dyn_cast<Instruction>(V);
2232 if (!PreserveLCSSA || !DefI)
2233 return V;
2234
2235 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2236 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2237 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2238 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2239 return V;
2240
2241 // Create a temporary instruction to at the current insertion point, so we
2242 // can hand it off to the helper to create LCSSA PHIs if required for the
2243 // new use.
2244 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2245 // would accept a insertion point and return an LCSSA phi for that
2246 // insertion point, so there is no need to insert & remove the temporary
2247 // instruction.
2248 Type *ToTy;
2249 if (DefI->getType()->isIntegerTy())
2250 ToTy = PointerType::get(DefI->getContext(), 0);
2251 else
2252 ToTy = Type::getInt32Ty(DefI->getContext());
2253 Instruction *User =
2254 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2255 auto RemoveUserOnExit =
2256 make_scope_exit([User]() { User->eraseFromParent(); });
2257
2259 ToUpdate.push_back(DefI);
2260 SmallVector<PHINode *, 16> PHIsToRemove;
2261 SmallVector<PHINode *, 16> InsertedPHIs;
2262 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2263 &InsertedPHIs);
2264 for (PHINode *PN : InsertedPHIs)
2265 rememberInstruction(PN);
2266 for (PHINode *PN : PHIsToRemove) {
2267 if (!PN->use_empty())
2268 continue;
2269 InsertedValues.erase(PN);
2270 InsertedPostIncValues.erase(PN);
2271 PN->eraseFromParent();
2272 }
2273
2274 return User->getOperand(0);
2275}
2276
2277namespace {
2278// Search for a SCEV subexpression that is not safe to expand. Any expression
2279// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2280// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2281// instruction, but the important thing is that we prove the denominator is
2282// nonzero before expansion.
2283//
2284// IVUsers already checks that IV-derived expressions are safe. So this check is
2285// only needed when the expression includes some subexpression that is not IV
2286// derived.
2287//
2288// Currently, we only allow division by a value provably non-zero here.
2289//
2290// We cannot generally expand recurrences unless the step dominates the loop
2291// header. The expander handles the special case of affine recurrences by
2292// scaling the recurrence outside the loop, but this technique isn't generally
2293// applicable. Expanding a nested recurrence outside a loop requires computing
2294// binomial coefficients. This could be done, but the recurrence has to be in a
2295// perfectly reduced form, which can't be guaranteed.
2296struct SCEVFindUnsafe {
2297 ScalarEvolution &SE;
2298 bool CanonicalMode;
2299 bool IsUnsafe = false;
2300
2301 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2302 : SE(SE), CanonicalMode(CanonicalMode) {}
2303
2304 bool follow(const SCEV *S) {
2305 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2306 if (!SE.isKnownNonZero(D->getRHS())) {
2307 IsUnsafe = true;
2308 return false;
2309 }
2310 }
2311 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2312 // For non-affine addrecs or in non-canonical mode we need a preheader
2313 // to insert into.
2314 if (!AR->getLoop()->getLoopPreheader() &&
2315 (!CanonicalMode || !AR->isAffine())) {
2316 IsUnsafe = true;
2317 return false;
2318 }
2319 }
2320 return true;
2321 }
2322 bool isDone() const { return IsUnsafe; }
2323};
2324} // namespace
2325
2327 SCEVFindUnsafe Search(SE, CanonicalMode);
2328 visitAll(S, Search);
2329 return !Search.IsUnsafe;
2330}
2331
2333 const Instruction *InsertionPoint) const {
2334 if (!isSafeToExpand(S))
2335 return false;
2336 // We have to prove that the expanded site of S dominates InsertionPoint.
2337 // This is easy when not in the same block, but hard when S is an instruction
2338 // to be expanded somewhere inside the same block as our insertion point.
2339 // What we really need here is something analogous to an OrderedBasicBlock,
2340 // but for the moment, we paper over the problem by handling two common and
2341 // cheap to check cases.
2342 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2343 return true;
2344 if (SE.dominates(S, InsertionPoint->getParent())) {
2345 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2346 return true;
2347 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2348 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2349 return true;
2350 }
2351 return false;
2352}
2353
2355 // Result is used, nothing to remove.
2356 if (ResultUsed)
2357 return;
2358
2359 // Restore original poison flags.
2360 for (auto [I, Flags] : Expander.OrigFlags)
2361 Flags.apply(I);
2362
2363 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2364#ifndef NDEBUG
2365 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2366 InsertedInstructions.end());
2367 (void)InsertedSet;
2368#endif
2369 // Remove sets with value handles.
2370 Expander.clear();
2371
2372 // Remove all inserted instructions.
2373 for (Instruction *I : reverse(InsertedInstructions)) {
2374#ifndef NDEBUG
2375 assert(all_of(I->users(),
2376 [&InsertedSet](Value *U) {
2377 return InsertedSet.contains(cast<Instruction>(U));
2378 }) &&
2379 "removed instruction should only be used by instructions inserted "
2380 "during expansion");
2381#endif
2382 assert(!I->getType()->isVoidTy() &&
2383 "inserted instruction should have non-void types");
2384 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2385 I->eraseFromParent();
2386 }
2387}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:908
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1323
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:994
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:993
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2041
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:398
A debug info location.
Definition: DebugLoc.h:33
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1715
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1977
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2028
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2022
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:145
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2144
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2136
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2334
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:510
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:465
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getPredicatedBackedgeTakenCount(const Loop *L, SmallVector< const SCEVPredicate *, 4 > &Predicates)
Similar to getBackedgeTakenCount, except it will add a set of SCEV predicates to Predicates that are ...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
@ None
The cast is not used with a load/store of any kind.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:568
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:765
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:184
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
void stable_sort(R &&Range)
Definition: STLExtras.h:2004
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
InstructionCost Cost
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
unsigned pred_size(const MachineBasicBlock *BB)
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
void apply(Instruction *I)
PoisonFlags(const Instruction *I)
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...