LLVM 19.0.0git
ScalarEvolutionExpander.cpp
Go to the documentation of this file.
1//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the implementation of the scalar evolution expander,
10// which is used to generate the code corresponding to a given scalar evolution
11// expression.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
18#include "llvm/ADT/SmallSet.h"
23#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Dominators.h"
30
31#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
32#define SCEV_DEBUG_WITH_TYPE(TYPE, X) DEBUG_WITH_TYPE(TYPE, X)
33#else
34#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
35#endif
36
37using namespace llvm;
38
40 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4),
41 cl::desc("When performing SCEV expansion only if it is cheap to do, this "
42 "controls the budget that is considered cheap (default = 4)"));
43
44using namespace PatternMatch;
45
47 NUW = false;
48 NSW = false;
49 Exact = false;
50 Disjoint = false;
51 NNeg = false;
52 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I)) {
53 NUW = OBO->hasNoUnsignedWrap();
54 NSW = OBO->hasNoSignedWrap();
55 }
56 if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
57 Exact = PEO->isExact();
58 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
59 Disjoint = PDI->isDisjoint();
60 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
61 NNeg = PNI->hasNonNeg();
62 if (auto *TI = dyn_cast<TruncInst>(I)) {
63 NUW = TI->hasNoUnsignedWrap();
64 NSW = TI->hasNoSignedWrap();
65 }
66}
67
69 if (isa<OverflowingBinaryOperator>(I)) {
70 I->setHasNoUnsignedWrap(NUW);
71 I->setHasNoSignedWrap(NSW);
72 }
73 if (isa<PossiblyExactOperator>(I))
74 I->setIsExact(Exact);
75 if (auto *PDI = dyn_cast<PossiblyDisjointInst>(I))
76 PDI->setIsDisjoint(Disjoint);
77 if (auto *PNI = dyn_cast<PossiblyNonNegInst>(I))
78 PNI->setNonNeg(NNeg);
79 if (isa<TruncInst>(I)) {
80 I->setHasNoUnsignedWrap(NUW);
81 I->setHasNoSignedWrap(NSW);
82 }
83}
84
85/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
86/// reusing an existing cast if a suitable one (= dominating IP) exists, or
87/// creating a new one.
88Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
91 // This function must be called with the builder having a valid insertion
92 // point. It doesn't need to be the actual IP where the uses of the returned
93 // cast will be added, but it must dominate such IP.
94 // We use this precondition to produce a cast that will dominate all its
95 // uses. In particular, this is crucial for the case where the builder's
96 // insertion point *is* the point where we were asked to put the cast.
97 // Since we don't know the builder's insertion point is actually
98 // where the uses will be added (only that it dominates it), we are
99 // not allowed to move it.
100 BasicBlock::iterator BIP = Builder.GetInsertPoint();
101
102 Value *Ret = nullptr;
103
104 // Check to see if there is already a cast!
105 for (User *U : V->users()) {
106 if (U->getType() != Ty)
107 continue;
108 CastInst *CI = dyn_cast<CastInst>(U);
109 if (!CI || CI->getOpcode() != Op)
110 continue;
111
112 // Found a suitable cast that is at IP or comes before IP. Use it. Note that
113 // the cast must also properly dominate the Builder's insertion point.
114 if (IP->getParent() == CI->getParent() && &*BIP != CI &&
115 (&*IP == CI || CI->comesBefore(&*IP))) {
116 Ret = CI;
117 break;
118 }
119 }
120
121 // Create a new cast.
122 if (!Ret) {
123 SCEVInsertPointGuard Guard(Builder, this);
124 Builder.SetInsertPoint(&*IP);
125 Ret = Builder.CreateCast(Op, V, Ty, V->getName());
126 }
127
128 // We assert at the end of the function since IP might point to an
129 // instruction with different dominance properties than a cast
130 // (an invoke for example) and not dominate BIP (but the cast does).
131 assert(!isa<Instruction>(Ret) ||
132 SE.DT.dominates(cast<Instruction>(Ret), &*BIP));
133
134 return Ret;
135}
136
139 Instruction *MustDominate) const {
140 BasicBlock::iterator IP = ++I->getIterator();
141 if (auto *II = dyn_cast<InvokeInst>(I))
142 IP = II->getNormalDest()->begin();
143
144 while (isa<PHINode>(IP))
145 ++IP;
146
147 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
148 ++IP;
149 } else if (isa<CatchSwitchInst>(IP)) {
150 IP = MustDominate->getParent()->getFirstInsertionPt();
151 } else {
152 assert(!IP->isEHPad() && "unexpected eh pad!");
153 }
154
155 // Adjust insert point to be after instructions inserted by the expander, so
156 // we can re-use already inserted instructions. Avoid skipping past the
157 // original \p MustDominate, in case it is an inserted instruction.
158 while (isInsertedInstruction(&*IP) && &*IP != MustDominate)
159 ++IP;
160
161 return IP;
162}
163
165SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const {
166 // Cast the argument at the beginning of the entry block, after
167 // any bitcasts of other arguments.
168 if (Argument *A = dyn_cast<Argument>(V)) {
169 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
170 while ((isa<BitCastInst>(IP) &&
171 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
172 cast<BitCastInst>(IP)->getOperand(0) != A) ||
173 isa<DbgInfoIntrinsic>(IP))
174 ++IP;
175 return IP;
176 }
177
178 // Cast the instruction immediately after the instruction.
179 if (Instruction *I = dyn_cast<Instruction>(V))
180 return findInsertPointAfter(I, &*Builder.GetInsertPoint());
181
182 // Otherwise, this must be some kind of a constant,
183 // so let's plop this cast into the function's entry block.
184 assert(isa<Constant>(V) &&
185 "Expected the cast argument to be a global/constant");
186 return Builder.GetInsertBlock()
187 ->getParent()
188 ->getEntryBlock()
190}
191
192/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
193/// which must be possible with a noop cast, doing what we can to share
194/// the casts.
195Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
196 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
197 assert((Op == Instruction::BitCast ||
198 Op == Instruction::PtrToInt ||
199 Op == Instruction::IntToPtr) &&
200 "InsertNoopCastOfTo cannot perform non-noop casts!");
201 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
202 "InsertNoopCastOfTo cannot change sizes!");
203
204 // inttoptr only works for integral pointers. For non-integral pointers, we
205 // can create a GEP on null with the integral value as index. Note that
206 // it is safe to use GEP of null instead of inttoptr here, because only
207 // expressions already based on a GEP of null should be converted to pointers
208 // during expansion.
209 if (Op == Instruction::IntToPtr) {
210 auto *PtrTy = cast<PointerType>(Ty);
211 if (DL.isNonIntegralPointerType(PtrTy))
212 return Builder.CreatePtrAdd(Constant::getNullValue(PtrTy), V, "scevgep");
213 }
214 // Short-circuit unnecessary bitcasts.
215 if (Op == Instruction::BitCast) {
216 if (V->getType() == Ty)
217 return V;
218 if (CastInst *CI = dyn_cast<CastInst>(V)) {
219 if (CI->getOperand(0)->getType() == Ty)
220 return CI->getOperand(0);
221 }
222 }
223 // Short-circuit unnecessary inttoptr<->ptrtoint casts.
224 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
225 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
226 if (CastInst *CI = dyn_cast<CastInst>(V))
227 if ((CI->getOpcode() == Instruction::PtrToInt ||
228 CI->getOpcode() == Instruction::IntToPtr) &&
229 SE.getTypeSizeInBits(CI->getType()) ==
231 return CI->getOperand(0);
232 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
233 if ((CE->getOpcode() == Instruction::PtrToInt ||
234 CE->getOpcode() == Instruction::IntToPtr) &&
235 SE.getTypeSizeInBits(CE->getType()) ==
236 SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
237 return CE->getOperand(0);
238 }
239
240 // Fold a cast of a constant.
241 if (Constant *C = dyn_cast<Constant>(V))
242 return ConstantExpr::getCast(Op, C, Ty);
243
244 // Try to reuse existing cast, or insert one.
245 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V));
246}
247
248/// InsertBinop - Insert the specified binary operator, doing a small amount
249/// of work to avoid inserting an obviously redundant operation, and hoisting
250/// to an outer loop when the opportunity is there and it is safe.
251Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
252 Value *LHS, Value *RHS,
253 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) {
254 // Fold a binop with constant operands.
255 if (Constant *CLHS = dyn_cast<Constant>(LHS))
256 if (Constant *CRHS = dyn_cast<Constant>(RHS))
257 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
258 return Res;
259
260 // Do a quick scan to see if we have this binop nearby. If so, reuse it.
261 unsigned ScanLimit = 6;
262 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
263 // Scanning starts from the last instruction before the insertion point.
265 if (IP != BlockBegin) {
266 --IP;
267 for (; ScanLimit; --IP, --ScanLimit) {
268 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
269 // generated code.
270 if (isa<DbgInfoIntrinsic>(IP))
271 ScanLimit++;
272
273 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) {
274 // Ensure that no-wrap flags match.
275 if (isa<OverflowingBinaryOperator>(I)) {
276 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW))
277 return true;
278 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW))
279 return true;
280 }
281 // Conservatively, do not use any instruction which has any of exact
282 // flags installed.
283 if (isa<PossiblyExactOperator>(I) && I->isExact())
284 return true;
285 return false;
286 };
287 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
288 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP))
289 return &*IP;
290 if (IP == BlockBegin) break;
291 }
292 }
293
294 // Save the original insertion point so we can restore it when we're done.
295 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
296 SCEVInsertPointGuard Guard(Builder, this);
297
298 if (IsSafeToHoist) {
299 // Move the insertion point out of as many loops as we can.
300 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
301 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
302 BasicBlock *Preheader = L->getLoopPreheader();
303 if (!Preheader) break;
304
305 // Ok, move up a level.
306 Builder.SetInsertPoint(Preheader->getTerminator());
307 }
308 }
309
310 // If we haven't found this binop, insert it.
311 // TODO: Use the Builder, which will make CreateBinOp below fold with
312 // InstSimplifyFolder.
313 Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
314 BO->setDebugLoc(Loc);
315 if (Flags & SCEV::FlagNUW)
317 if (Flags & SCEV::FlagNSW)
318 BO->setHasNoSignedWrap();
319
320 return BO;
321}
322
323/// expandAddToGEP - Expand an addition expression with a pointer type into
324/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
325/// BasicAliasAnalysis and other passes analyze the result. See the rules
326/// for getelementptr vs. inttoptr in
327/// http://llvm.org/docs/LangRef.html#pointeraliasing
328/// for details.
329///
330/// Design note: The correctness of using getelementptr here depends on
331/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
332/// they may introduce pointer arithmetic which may not be safely converted
333/// into getelementptr.
334///
335/// Design note: It might seem desirable for this function to be more
336/// loop-aware. If some of the indices are loop-invariant while others
337/// aren't, it might seem desirable to emit multiple GEPs, keeping the
338/// loop-invariant portions of the overall computation outside the loop.
339/// However, there are a few reasons this is not done here. Hoisting simple
340/// arithmetic is a low-level optimization that often isn't very
341/// important until late in the optimization process. In fact, passes
342/// like InstructionCombining will combine GEPs, even if it means
343/// pushing loop-invariant computation down into loops, so even if the
344/// GEPs were split here, the work would quickly be undone. The
345/// LoopStrengthReduction pass, which is usually run quite late (and
346/// after the last InstructionCombining pass), takes care of hoisting
347/// loop-invariant portions of expressions, after considering what
348/// can be folded using target addressing modes.
349///
350Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Value *V) {
351 assert(!isa<Instruction>(V) ||
352 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
353
354 Value *Idx = expand(Offset);
355
356 // Fold a GEP with constant operands.
357 if (Constant *CLHS = dyn_cast<Constant>(V))
358 if (Constant *CRHS = dyn_cast<Constant>(Idx))
359 return Builder.CreatePtrAdd(CLHS, CRHS);
360
361 // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
362 unsigned ScanLimit = 6;
363 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
364 // Scanning starts from the last instruction before the insertion point.
366 if (IP != BlockBegin) {
367 --IP;
368 for (; ScanLimit; --IP, --ScanLimit) {
369 // Don't count dbg.value against the ScanLimit, to avoid perturbing the
370 // generated code.
371 if (isa<DbgInfoIntrinsic>(IP))
372 ScanLimit++;
373 if (IP->getOpcode() == Instruction::GetElementPtr &&
374 IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
375 cast<GEPOperator>(&*IP)->getSourceElementType() ==
376 Builder.getInt8Ty())
377 return &*IP;
378 if (IP == BlockBegin) break;
379 }
380 }
381
382 // Save the original insertion point so we can restore it when we're done.
383 SCEVInsertPointGuard Guard(Builder, this);
384
385 // Move the insertion point out of as many loops as we can.
386 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
387 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
388 BasicBlock *Preheader = L->getLoopPreheader();
389 if (!Preheader) break;
390
391 // Ok, move up a level.
392 Builder.SetInsertPoint(Preheader->getTerminator());
393 }
394
395 // Emit a GEP.
396 return Builder.CreatePtrAdd(V, Idx, "scevgep");
397}
398
399/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
400/// SCEV expansion. If they are nested, this is the most nested. If they are
401/// neighboring, pick the later.
402static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
403 DominatorTree &DT) {
404 if (!A) return B;
405 if (!B) return A;
406 if (A->contains(B)) return B;
407 if (B->contains(A)) return A;
408 if (DT.dominates(A->getHeader(), B->getHeader())) return B;
409 if (DT.dominates(B->getHeader(), A->getHeader())) return A;
410 return A; // Arbitrarily break the tie.
411}
412
413/// getRelevantLoop - Get the most relevant loop associated with the given
414/// expression, according to PickMostRelevantLoop.
415const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
416 // Test whether we've already computed the most relevant loop for this SCEV.
417 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
418 if (!Pair.second)
419 return Pair.first->second;
420
421 switch (S->getSCEVType()) {
422 case scConstant:
423 case scVScale:
424 return nullptr; // A constant has no relevant loops.
425 case scTruncate:
426 case scZeroExtend:
427 case scSignExtend:
428 case scPtrToInt:
429 case scAddExpr:
430 case scMulExpr:
431 case scUDivExpr:
432 case scAddRecExpr:
433 case scUMaxExpr:
434 case scSMaxExpr:
435 case scUMinExpr:
436 case scSMinExpr:
438 const Loop *L = nullptr;
439 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
440 L = AR->getLoop();
441 for (const SCEV *Op : S->operands())
442 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
443 return RelevantLoops[S] = L;
444 }
445 case scUnknown: {
446 const SCEVUnknown *U = cast<SCEVUnknown>(S);
447 if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
448 return Pair.first->second = SE.LI.getLoopFor(I->getParent());
449 // A non-instruction has no relevant loops.
450 return nullptr;
451 }
453 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
454 }
455 llvm_unreachable("Unexpected SCEV type!");
456}
457
458namespace {
459
460/// LoopCompare - Compare loops by PickMostRelevantLoop.
461class LoopCompare {
462 DominatorTree &DT;
463public:
464 explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
465
466 bool operator()(std::pair<const Loop *, const SCEV *> LHS,
467 std::pair<const Loop *, const SCEV *> RHS) const {
468 // Keep pointer operands sorted at the end.
469 if (LHS.second->getType()->isPointerTy() !=
470 RHS.second->getType()->isPointerTy())
471 return LHS.second->getType()->isPointerTy();
472
473 // Compare loops with PickMostRelevantLoop.
474 if (LHS.first != RHS.first)
475 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
476
477 // If one operand is a non-constant negative and the other is not,
478 // put the non-constant negative on the right so that a sub can
479 // be used instead of a negate and add.
480 if (LHS.second->isNonConstantNegative()) {
481 if (!RHS.second->isNonConstantNegative())
482 return false;
483 } else if (RHS.second->isNonConstantNegative())
484 return true;
485
486 // Otherwise they are equivalent according to this comparison.
487 return false;
488 }
489};
490
491}
492
493Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
494 // Collect all the add operands in a loop, along with their associated loops.
495 // Iterate in reverse so that constants are emitted last, all else equal, and
496 // so that pointer operands are inserted first, which the code below relies on
497 // to form more involved GEPs.
499 for (const SCEV *Op : reverse(S->operands()))
500 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
501
502 // Sort by loop. Use a stable sort so that constants follow non-constants and
503 // pointer operands precede non-pointer operands.
504 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
505
506 // Emit instructions to add all the operands. Hoist as much as possible
507 // out of loops, and form meaningful getelementptrs where possible.
508 Value *Sum = nullptr;
509 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
510 const Loop *CurLoop = I->first;
511 const SCEV *Op = I->second;
512 if (!Sum) {
513 // This is the first operand. Just expand it.
514 Sum = expand(Op);
515 ++I;
516 continue;
517 }
518
519 assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
520 if (isa<PointerType>(Sum->getType())) {
521 // The running sum expression is a pointer. Try to form a getelementptr
522 // at this level with that as the base.
524 for (; I != E && I->first == CurLoop; ++I) {
525 // If the operand is SCEVUnknown and not instructions, peek through
526 // it, to enable more of it to be folded into the GEP.
527 const SCEV *X = I->second;
528 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
529 if (!isa<Instruction>(U->getValue()))
530 X = SE.getSCEV(U->getValue());
531 NewOps.push_back(X);
532 }
533 Sum = expandAddToGEP(SE.getAddExpr(NewOps), Sum);
534 } else if (Op->isNonConstantNegative()) {
535 // Instead of doing a negate and add, just do a subtract.
536 Value *W = expand(SE.getNegativeSCEV(Op));
537 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap,
538 /*IsSafeToHoist*/ true);
539 ++I;
540 } else {
541 // A simple add.
542 Value *W = expand(Op);
543 // Canonicalize a constant to the RHS.
544 if (isa<Constant>(Sum))
545 std::swap(Sum, W);
546 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(),
547 /*IsSafeToHoist*/ true);
548 ++I;
549 }
550 }
551
552 return Sum;
553}
554
555Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
556 Type *Ty = S->getType();
557
558 // Collect all the mul operands in a loop, along with their associated loops.
559 // Iterate in reverse so that constants are emitted last, all else equal.
561 for (const SCEV *Op : reverse(S->operands()))
562 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
563
564 // Sort by loop. Use a stable sort so that constants follow non-constants.
565 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
566
567 // Emit instructions to mul all the operands. Hoist as much as possible
568 // out of loops.
569 Value *Prod = nullptr;
570 auto I = OpsAndLoops.begin();
571
572 // Expand the calculation of X pow N in the following manner:
573 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then:
574 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK).
575 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops]() {
576 auto E = I;
577 // Calculate how many times the same operand from the same loop is included
578 // into this power.
579 uint64_t Exponent = 0;
580 const uint64_t MaxExponent = UINT64_MAX >> 1;
581 // No one sane will ever try to calculate such huge exponents, but if we
582 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop
583 // below when the power of 2 exceeds our Exponent, and we want it to be
584 // 1u << 31 at most to not deal with unsigned overflow.
585 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) {
586 ++Exponent;
587 ++E;
588 }
589 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?");
590
591 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them
592 // that are needed into the result.
593 Value *P = expand(I->second);
594 Value *Result = nullptr;
595 if (Exponent & 1)
596 Result = P;
597 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) {
598 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap,
599 /*IsSafeToHoist*/ true);
600 if (Exponent & BinExp)
601 Result = Result ? InsertBinop(Instruction::Mul, Result, P,
603 /*IsSafeToHoist*/ true)
604 : P;
605 }
606
607 I = E;
608 assert(Result && "Nothing was expanded?");
609 return Result;
610 };
611
612 while (I != OpsAndLoops.end()) {
613 if (!Prod) {
614 // This is the first operand. Just expand it.
615 Prod = ExpandOpBinPowN();
616 } else if (I->second->isAllOnesValue()) {
617 // Instead of doing a multiply by negative one, just do a negate.
618 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod,
619 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
620 ++I;
621 } else {
622 // A simple mul.
623 Value *W = ExpandOpBinPowN();
624 // Canonicalize a constant to the RHS.
625 if (isa<Constant>(Prod)) std::swap(Prod, W);
626 const APInt *RHS;
627 if (match(W, m_Power2(RHS))) {
628 // Canonicalize Prod*(1<<C) to Prod<<C.
629 assert(!Ty->isVectorTy() && "vector types are not SCEVable");
630 auto NWFlags = S->getNoWrapFlags();
631 // clear nsw flag if shl will produce poison value.
632 if (RHS->logBase2() == RHS->getBitWidth() - 1)
633 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW);
634 Prod = InsertBinop(Instruction::Shl, Prod,
635 ConstantInt::get(Ty, RHS->logBase2()), NWFlags,
636 /*IsSafeToHoist*/ true);
637 } else {
638 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(),
639 /*IsSafeToHoist*/ true);
640 }
641 }
642 }
643
644 return Prod;
645}
646
647Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
648 Value *LHS = expand(S->getLHS());
649 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
650 const APInt &RHS = SC->getAPInt();
651 if (RHS.isPowerOf2())
652 return InsertBinop(Instruction::LShr, LHS,
653 ConstantInt::get(SC->getType(), RHS.logBase2()),
654 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
655 }
656
657 Value *RHS = expand(S->getRHS());
658 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
659 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
660}
661
662/// Determine if this is a well-behaved chain of instructions leading back to
663/// the PHI. If so, it may be reused by expanded expressions.
664bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
665 const Loop *L) {
666 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
667 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
668 return false;
669 // If any of the operands don't dominate the insert position, bail.
670 // Addrec operands are always loop-invariant, so this can only happen
671 // if there are instructions which haven't been hoisted.
672 if (L == IVIncInsertLoop) {
673 for (Use &Op : llvm::drop_begin(IncV->operands()))
674 if (Instruction *OInst = dyn_cast<Instruction>(Op))
675 if (!SE.DT.dominates(OInst, IVIncInsertPos))
676 return false;
677 }
678 // Advance to the next instruction.
679 IncV = dyn_cast<Instruction>(IncV->getOperand(0));
680 if (!IncV)
681 return false;
682
683 if (IncV->mayHaveSideEffects())
684 return false;
685
686 if (IncV == PN)
687 return true;
688
689 return isNormalAddRecExprPHI(PN, IncV, L);
690}
691
692/// getIVIncOperand returns an induction variable increment's induction
693/// variable operand.
694///
695/// If allowScale is set, any type of GEP is allowed as long as the nonIV
696/// operands dominate InsertPos.
697///
698/// If allowScale is not set, ensure that a GEP increment conforms to one of the
699/// simple patterns generated by getAddRecExprPHILiterally and
700/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
702 Instruction *InsertPos,
703 bool allowScale) {
704 if (IncV == InsertPos)
705 return nullptr;
706
707 switch (IncV->getOpcode()) {
708 default:
709 return nullptr;
710 // Check for a simple Add/Sub or GEP of a loop invariant step.
711 case Instruction::Add:
712 case Instruction::Sub: {
713 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
714 if (!OInst || SE.DT.dominates(OInst, InsertPos))
715 return dyn_cast<Instruction>(IncV->getOperand(0));
716 return nullptr;
717 }
718 case Instruction::BitCast:
719 return dyn_cast<Instruction>(IncV->getOperand(0));
720 case Instruction::GetElementPtr:
721 for (Use &U : llvm::drop_begin(IncV->operands())) {
722 if (isa<Constant>(U))
723 continue;
724 if (Instruction *OInst = dyn_cast<Instruction>(U)) {
725 if (!SE.DT.dominates(OInst, InsertPos))
726 return nullptr;
727 }
728 if (allowScale) {
729 // allow any kind of GEP as long as it can be hoisted.
730 continue;
731 }
732 // GEPs produced by SCEVExpander use i8 element type.
733 if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
734 return nullptr;
735 break;
736 }
737 return dyn_cast<Instruction>(IncV->getOperand(0));
738 }
739}
740
741/// If the insert point of the current builder or any of the builders on the
742/// stack of saved builders has 'I' as its insert point, update it to point to
743/// the instruction after 'I'. This is intended to be used when the instruction
744/// 'I' is being moved. If this fixup is not done and 'I' is moved to a
745/// different block, the inconsistent insert point (with a mismatched
746/// Instruction and Block) can lead to an instruction being inserted in a block
747/// other than its parent.
748void SCEVExpander::fixupInsertPoints(Instruction *I) {
750 BasicBlock::iterator NewInsertPt = std::next(It);
751 if (Builder.GetInsertPoint() == It)
752 Builder.SetInsertPoint(&*NewInsertPt);
753 for (auto *InsertPtGuard : InsertPointGuards)
754 if (InsertPtGuard->GetInsertPoint() == It)
755 InsertPtGuard->SetInsertPoint(NewInsertPt);
756}
757
758/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
759/// it available to other uses in this loop. Recursively hoist any operands,
760/// until we reach a value that dominates InsertPos.
762 bool RecomputePoisonFlags) {
763 auto FixupPoisonFlags = [this](Instruction *I) {
764 // Drop flags that are potentially inferred from old context and infer flags
765 // in new context.
766 rememberFlags(I);
767 I->dropPoisonGeneratingFlags();
768 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
769 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
770 auto *BO = cast<BinaryOperator>(I);
775 }
776 };
777
778 if (SE.DT.dominates(IncV, InsertPos)) {
779 if (RecomputePoisonFlags)
780 FixupPoisonFlags(IncV);
781 return true;
782 }
783
784 // InsertPos must itself dominate IncV so that IncV's new position satisfies
785 // its existing users.
786 if (isa<PHINode>(InsertPos) ||
787 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
788 return false;
789
790 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
791 return false;
792
793 // Check that the chain of IV operands leading back to Phi can be hoisted.
795 for(;;) {
796 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
797 if (!Oper)
798 return false;
799 // IncV is safe to hoist.
800 IVIncs.push_back(IncV);
801 IncV = Oper;
802 if (SE.DT.dominates(IncV, InsertPos))
803 break;
804 }
805 for (Instruction *I : llvm::reverse(IVIncs)) {
806 fixupInsertPoints(I);
807 I->moveBefore(InsertPos);
808 if (RecomputePoisonFlags)
809 FixupPoisonFlags(I);
810 }
811 return true;
812}
813
815 PHINode *WidePhi,
816 Instruction *OrigInc,
817 Instruction *WideInc) {
818 return match(OrigInc, m_c_BinOp(m_Specific(OrigPhi), m_Value())) &&
819 match(WideInc, m_c_BinOp(m_Specific(WidePhi), m_Value())) &&
820 OrigInc->getOpcode() == WideInc->getOpcode();
821}
822
823/// Determine if this cyclic phi is in a form that would have been generated by
824/// LSR. We don't care if the phi was actually expanded in this pass, as long
825/// as it is in a low-cost form, for example, no implied multiplication. This
826/// should match any patterns generated by getAddRecExprPHILiterally and
827/// expandAddtoGEP.
828bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
829 const Loop *L) {
830 for(Instruction *IVOper = IncV;
831 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
832 /*allowScale=*/false));) {
833 if (IVOper == PN)
834 return true;
835 }
836 return false;
837}
838
839/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
840/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
841/// need to materialize IV increments elsewhere to handle difficult situations.
842Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
843 bool useSubtract) {
844 Value *IncV;
845 // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
846 if (PN->getType()->isPointerTy()) {
847 // TODO: Change name to IVName.iv.next.
848 IncV = Builder.CreatePtrAdd(PN, StepV, "scevgep");
849 } else {
850 IncV = useSubtract ?
851 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
852 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
853 }
854 return IncV;
855}
856
857/// Check whether we can cheaply express the requested SCEV in terms of
858/// the available PHI SCEV by truncation and/or inversion of the step.
860 const SCEVAddRecExpr *Phi,
861 const SCEVAddRecExpr *Requested,
862 bool &InvertStep) {
863 // We can't transform to match a pointer PHI.
864 Type *PhiTy = Phi->getType();
865 Type *RequestedTy = Requested->getType();
866 if (PhiTy->isPointerTy() || RequestedTy->isPointerTy())
867 return false;
868
869 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
870 return false;
871
872 // Try truncate it if necessary.
873 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
874 if (!Phi)
875 return false;
876
877 // Check whether truncation will help.
878 if (Phi == Requested) {
879 InvertStep = false;
880 return true;
881 }
882
883 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
884 if (SE.getMinusSCEV(Requested->getStart(), Requested) == Phi) {
885 InvertStep = true;
886 return true;
887 }
888
889 return false;
890}
891
892static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
893 if (!isa<IntegerType>(AR->getType()))
894 return false;
895
896 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
897 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
898 const SCEV *Step = AR->getStepRecurrence(SE);
899 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy),
900 SE.getSignExtendExpr(AR, WideTy));
901 const SCEV *ExtendAfterOp =
902 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy);
903 return ExtendAfterOp == OpAfterExtend;
904}
905
906static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) {
907 if (!isa<IntegerType>(AR->getType()))
908 return false;
909
910 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth();
911 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2);
912 const SCEV *Step = AR->getStepRecurrence(SE);
913 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy),
914 SE.getZeroExtendExpr(AR, WideTy));
915 const SCEV *ExtendAfterOp =
916 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy);
917 return ExtendAfterOp == OpAfterExtend;
918}
919
920/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
921/// the base addrec, which is the addrec without any non-loop-dominating
922/// values, and return the PHI.
923PHINode *
924SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
925 const Loop *L, Type *&TruncTy,
926 bool &InvertStep) {
927 assert((!IVIncInsertLoop || IVIncInsertPos) &&
928 "Uninitialized insert position");
929
930 // Reuse a previously-inserted PHI, if present.
931 BasicBlock *LatchBlock = L->getLoopLatch();
932 if (LatchBlock) {
933 PHINode *AddRecPhiMatch = nullptr;
934 Instruction *IncV = nullptr;
935 TruncTy = nullptr;
936 InvertStep = false;
937
938 // Only try partially matching scevs that need truncation and/or
939 // step-inversion if we know this loop is outside the current loop.
940 bool TryNonMatchingSCEV =
941 IVIncInsertLoop &&
942 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
943
944 for (PHINode &PN : L->getHeader()->phis()) {
945 if (!SE.isSCEVable(PN.getType()))
946 continue;
947
948 // We should not look for a incomplete PHI. Getting SCEV for a incomplete
949 // PHI has no meaning at all.
950 if (!PN.isComplete()) {
952 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n");
953 continue;
954 }
955
956 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
957 if (!PhiSCEV)
958 continue;
959
960 bool IsMatchingSCEV = PhiSCEV == Normalized;
961 // We only handle truncation and inversion of phi recurrences for the
962 // expanded expression if the expanded expression's loop dominates the
963 // loop we insert to. Check now, so we can bail out early.
964 if (!IsMatchingSCEV && !TryNonMatchingSCEV)
965 continue;
966
967 // TODO: this possibly can be reworked to avoid this cast at all.
968 Instruction *TempIncV =
969 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
970 if (!TempIncV)
971 continue;
972
973 // Check whether we can reuse this PHI node.
974 if (LSRMode) {
975 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
976 continue;
977 } else {
978 if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
979 continue;
980 }
981
982 // Stop if we have found an exact match SCEV.
983 if (IsMatchingSCEV) {
984 IncV = TempIncV;
985 TruncTy = nullptr;
986 InvertStep = false;
987 AddRecPhiMatch = &PN;
988 break;
989 }
990
991 // Try whether the phi can be translated into the requested form
992 // (truncated and/or offset by a constant).
993 if ((!TruncTy || InvertStep) &&
994 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
995 // Record the phi node. But don't stop we might find an exact match
996 // later.
997 AddRecPhiMatch = &PN;
998 IncV = TempIncV;
999 TruncTy = Normalized->getType();
1000 }
1001 }
1002
1003 if (AddRecPhiMatch) {
1004 // Ok, the add recurrence looks usable.
1005 // Remember this PHI, even in post-inc mode.
1006 InsertedValues.insert(AddRecPhiMatch);
1007 // Remember the increment.
1008 rememberInstruction(IncV);
1009 // Those values were not actually inserted but re-used.
1010 ReusedValues.insert(AddRecPhiMatch);
1011 ReusedValues.insert(IncV);
1012 return AddRecPhiMatch;
1013 }
1014 }
1015
1016 // Save the original insertion point so we can restore it when we're done.
1017 SCEVInsertPointGuard Guard(Builder, this);
1018
1019 // Another AddRec may need to be recursively expanded below. For example, if
1020 // this AddRec is quadratic, the StepV may itself be an AddRec in this
1021 // loop. Remove this loop from the PostIncLoops set before expanding such
1022 // AddRecs. Otherwise, we cannot find a valid position for the step
1023 // (i.e. StepV can never dominate its loop header). Ideally, we could do
1024 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
1025 // so it's not worth implementing SmallPtrSet::swap.
1026 PostIncLoopSet SavedPostIncLoops = PostIncLoops;
1027 PostIncLoops.clear();
1028
1029 // Expand code for the start value into the loop preheader.
1030 assert(L->getLoopPreheader() &&
1031 "Can't expand add recurrences without a loop preheader!");
1032 Value *StartV =
1033 expand(Normalized->getStart(), L->getLoopPreheader()->getTerminator());
1034
1035 // StartV must have been be inserted into L's preheader to dominate the new
1036 // phi.
1037 assert(!isa<Instruction>(StartV) ||
1038 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
1039 L->getHeader()));
1040
1041 // Expand code for the step value. Do this before creating the PHI so that PHI
1042 // reuse code doesn't see an incomplete PHI.
1043 const SCEV *Step = Normalized->getStepRecurrence(SE);
1044 Type *ExpandTy = Normalized->getType();
1045 // If the stride is negative, insert a sub instead of an add for the increment
1046 // (unless it's a constant, because subtracts of constants are canonicalized
1047 // to adds).
1048 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
1049 if (useSubtract)
1050 Step = SE.getNegativeSCEV(Step);
1051 // Expand the step somewhere that dominates the loop header.
1052 Value *StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1053
1054 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
1055 // we actually do emit an addition. It does not apply if we emit a
1056 // subtraction.
1057 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized);
1058 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized);
1059
1060 // Create the PHI.
1061 BasicBlock *Header = L->getHeader();
1062 Builder.SetInsertPoint(Header, Header->begin());
1063 PHINode *PN =
1064 Builder.CreatePHI(ExpandTy, pred_size(Header), Twine(IVName) + ".iv");
1065
1066 // Create the step instructions and populate the PHI.
1067 for (BasicBlock *Pred : predecessors(Header)) {
1068 // Add a start value.
1069 if (!L->contains(Pred)) {
1070 PN->addIncoming(StartV, Pred);
1071 continue;
1072 }
1073
1074 // Create a step value and add it to the PHI.
1075 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
1076 // instructions at IVIncInsertPos.
1077 Instruction *InsertPos = L == IVIncInsertLoop ?
1078 IVIncInsertPos : Pred->getTerminator();
1079 Builder.SetInsertPoint(InsertPos);
1080 Value *IncV = expandIVInc(PN, StepV, L, useSubtract);
1081
1082 if (isa<OverflowingBinaryOperator>(IncV)) {
1083 if (IncrementIsNUW)
1084 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
1085 if (IncrementIsNSW)
1086 cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
1087 }
1088 PN->addIncoming(IncV, Pred);
1089 }
1090
1091 // After expanding subexpressions, restore the PostIncLoops set so the caller
1092 // can ensure that IVIncrement dominates the current uses.
1093 PostIncLoops = SavedPostIncLoops;
1094
1095 // Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
1096 // effective when we are able to use an IV inserted here, so record it.
1097 InsertedValues.insert(PN);
1098 InsertedIVs.push_back(PN);
1099 return PN;
1100}
1101
1102Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1103 const Loop *L = S->getLoop();
1104
1105 // Determine a normalized form of this expression, which is the expression
1106 // before any post-inc adjustment is made.
1107 const SCEVAddRecExpr *Normalized = S;
1108 if (PostIncLoops.count(L)) {
1110 Loops.insert(L);
1111 Normalized = cast<SCEVAddRecExpr>(
1112 normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
1113 }
1114
1115 [[maybe_unused]] const SCEV *Start = Normalized->getStart();
1116 const SCEV *Step = Normalized->getStepRecurrence(SE);
1117 assert(SE.properlyDominates(Start, L->getHeader()) &&
1118 "Start does not properly dominate loop header");
1119 assert(SE.dominates(Step, L->getHeader()) && "Step not dominate loop header");
1120
1121 // In some cases, we decide to reuse an existing phi node but need to truncate
1122 // it and/or invert the step.
1123 Type *TruncTy = nullptr;
1124 bool InvertStep = false;
1125 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, TruncTy, InvertStep);
1126
1127 // Accommodate post-inc mode, if necessary.
1128 Value *Result;
1129 if (!PostIncLoops.count(L))
1130 Result = PN;
1131 else {
1132 // In PostInc mode, use the post-incremented value.
1133 BasicBlock *LatchBlock = L->getLoopLatch();
1134 assert(LatchBlock && "PostInc mode requires a unique loop latch!");
1135 Result = PN->getIncomingValueForBlock(LatchBlock);
1136
1137 // We might be introducing a new use of the post-inc IV that is not poison
1138 // safe, in which case we should drop poison generating flags. Only keep
1139 // those flags for which SCEV has proven that they always hold.
1140 if (isa<OverflowingBinaryOperator>(Result)) {
1141 auto *I = cast<Instruction>(Result);
1142 if (!S->hasNoUnsignedWrap())
1143 I->setHasNoUnsignedWrap(false);
1144 if (!S->hasNoSignedWrap())
1145 I->setHasNoSignedWrap(false);
1146 }
1147
1148 // For an expansion to use the postinc form, the client must call
1149 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
1150 // or dominated by IVIncInsertPos.
1151 if (isa<Instruction>(Result) &&
1152 !SE.DT.dominates(cast<Instruction>(Result),
1153 &*Builder.GetInsertPoint())) {
1154 // The induction variable's postinc expansion does not dominate this use.
1155 // IVUsers tries to prevent this case, so it is rare. However, it can
1156 // happen when an IVUser outside the loop is not dominated by the latch
1157 // block. Adjusting IVIncInsertPos before expansion begins cannot handle
1158 // all cases. Consider a phi outside whose operand is replaced during
1159 // expansion with the value of the postinc user. Without fundamentally
1160 // changing the way postinc users are tracked, the only remedy is
1161 // inserting an extra IV increment. StepV might fold into PostLoopOffset,
1162 // but hopefully expandCodeFor handles that.
1163 bool useSubtract =
1164 !S->getType()->isPointerTy() && Step->isNonConstantNegative();
1165 if (useSubtract)
1166 Step = SE.getNegativeSCEV(Step);
1167 Value *StepV;
1168 {
1169 // Expand the step somewhere that dominates the loop header.
1170 SCEVInsertPointGuard Guard(Builder, this);
1171 StepV = expand(Step, L->getHeader()->getFirstInsertionPt());
1172 }
1173 Result = expandIVInc(PN, StepV, L, useSubtract);
1174 }
1175 }
1176
1177 // We have decided to reuse an induction variable of a dominating loop. Apply
1178 // truncation and/or inversion of the step.
1179 if (TruncTy) {
1180 // Truncate the result.
1181 if (TruncTy != Result->getType())
1182 Result = Builder.CreateTrunc(Result, TruncTy);
1183
1184 // Invert the result.
1185 if (InvertStep)
1186 Result = Builder.CreateSub(expand(Normalized->getStart()), Result);
1187 }
1188
1189 return Result;
1190}
1191
1192Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1193 // In canonical mode we compute the addrec as an expression of a canonical IV
1194 // using evaluateAtIteration and expand the resulting SCEV expression. This
1195 // way we avoid introducing new IVs to carry on the computation of the addrec
1196 // throughout the loop.
1197 //
1198 // For nested addrecs evaluateAtIteration might need a canonical IV of a
1199 // type wider than the addrec itself. Emitting a canonical IV of the
1200 // proper type might produce non-legal types, for example expanding an i64
1201 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall
1202 // back to non-canonical mode for nested addrecs.
1203 if (!CanonicalMode || (S->getNumOperands() > 2))
1204 return expandAddRecExprLiterally(S);
1205
1206 Type *Ty = SE.getEffectiveSCEVType(S->getType());
1207 const Loop *L = S->getLoop();
1208
1209 // First check for an existing canonical IV in a suitable type.
1210 PHINode *CanonicalIV = nullptr;
1211 if (PHINode *PN = L->getCanonicalInductionVariable())
1212 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
1213 CanonicalIV = PN;
1214
1215 // Rewrite an AddRec in terms of the canonical induction variable, if
1216 // its type is more narrow.
1217 if (CanonicalIV &&
1218 SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty) &&
1219 !S->getType()->isPointerTy()) {
1221 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
1222 NewOps[i] = SE.getAnyExtendExpr(S->getOperand(i), CanonicalIV->getType());
1223 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
1225 BasicBlock::iterator NewInsertPt =
1226 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
1227 V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
1228 return V;
1229 }
1230
1231 // {X,+,F} --> X + {0,+,F}
1232 if (!S->getStart()->isZero()) {
1233 if (isa<PointerType>(S->getType())) {
1234 Value *StartV = expand(SE.getPointerBase(S));
1235 return expandAddToGEP(SE.removePointerBase(S), StartV);
1236 }
1237
1239 NewOps[0] = SE.getConstant(Ty, 0);
1240 const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
1242
1243 // Just do a normal add. Pre-expand the operands to suppress folding.
1244 //
1245 // The LHS and RHS values are factored out of the expand call to make the
1246 // output independent of the argument evaluation order.
1247 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart()));
1248 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest));
1249 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS));
1250 }
1251
1252 // If we don't yet have a canonical IV, create one.
1253 if (!CanonicalIV) {
1254 // Create and insert the PHI node for the induction variable in the
1255 // specified loop.
1256 BasicBlock *Header = L->getHeader();
1257 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
1258 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar");
1259 CanonicalIV->insertBefore(Header->begin());
1260 rememberInstruction(CanonicalIV);
1261
1263 Constant *One = ConstantInt::get(Ty, 1);
1264 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
1265 BasicBlock *HP = *HPI;
1266 if (!PredSeen.insert(HP).second) {
1267 // There must be an incoming value for each predecessor, even the
1268 // duplicates!
1269 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP);
1270 continue;
1271 }
1272
1273 if (L->contains(HP)) {
1274 // Insert a unit add instruction right before the terminator
1275 // corresponding to the back-edge.
1276 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
1277 "indvar.next",
1278 HP->getTerminator()->getIterator());
1279 Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
1280 rememberInstruction(Add);
1281 CanonicalIV->addIncoming(Add, HP);
1282 } else {
1283 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
1284 }
1285 }
1286 }
1287
1288 // {0,+,1} --> Insert a canonical induction variable into the loop!
1289 if (S->isAffine() && S->getOperand(1)->isOne()) {
1290 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
1291 "IVs with types different from the canonical IV should "
1292 "already have been handled!");
1293 return CanonicalIV;
1294 }
1295
1296 // {0,+,F} --> {0,+,1} * F
1297
1298 // If this is a simple linear addrec, emit it now as a special case.
1299 if (S->isAffine()) // {0,+,F} --> i*F
1300 return
1301 expand(SE.getTruncateOrNoop(
1302 SE.getMulExpr(SE.getUnknown(CanonicalIV),
1304 CanonicalIV->getType())),
1305 Ty));
1306
1307 // If this is a chain of recurrences, turn it into a closed form, using the
1308 // folders, then expandCodeFor the closed form. This allows the folders to
1309 // simplify the expression without having to build a bunch of special code
1310 // into this folder.
1311 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
1312
1313 // Promote S up to the canonical IV type, if the cast is foldable.
1314 const SCEV *NewS = S;
1315 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
1316 if (isa<SCEVAddRecExpr>(Ext))
1317 NewS = Ext;
1318
1319 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
1320
1321 // Truncate the result down to the original type, if needed.
1322 const SCEV *T = SE.getTruncateOrNoop(V, Ty);
1323 return expand(T);
1324}
1325
1326Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) {
1327 Value *V = expand(S->getOperand());
1328 return ReuseOrCreateCast(V, S->getType(), CastInst::PtrToInt,
1329 GetOptimalInsertionPointForCastOf(V));
1330}
1331
1332Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
1333 Value *V = expand(S->getOperand());
1334 return Builder.CreateTrunc(V, S->getType());
1335}
1336
1337Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
1338 Value *V = expand(S->getOperand());
1339 return Builder.CreateZExt(V, S->getType(), "",
1341}
1342
1343Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
1344 Value *V = expand(S->getOperand());
1345 return Builder.CreateSExt(V, S->getType());
1346}
1347
1348Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
1349 Intrinsic::ID IntrinID, Twine Name,
1350 bool IsSequential) {
1351 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
1352 Type *Ty = LHS->getType();
1353 if (IsSequential)
1354 LHS = Builder.CreateFreeze(LHS);
1355 for (int i = S->getNumOperands() - 2; i >= 0; --i) {
1356 Value *RHS = expand(S->getOperand(i));
1357 if (IsSequential && i != 0)
1358 RHS = Builder.CreateFreeze(RHS);
1359 Value *Sel;
1360 if (Ty->isIntegerTy())
1361 Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
1362 /*FMFSource=*/nullptr, Name);
1363 else {
1364 Value *ICmp =
1365 Builder.CreateICmp(MinMaxIntrinsic::getPredicate(IntrinID), LHS, RHS);
1366 Sel = Builder.CreateSelect(ICmp, LHS, RHS, Name);
1367 }
1368 LHS = Sel;
1369 }
1370 return LHS;
1371}
1372
1373Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
1374 return expandMinMaxExpr(S, Intrinsic::smax, "smax");
1375}
1376
1377Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
1378 return expandMinMaxExpr(S, Intrinsic::umax, "umax");
1379}
1380
1381Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
1382 return expandMinMaxExpr(S, Intrinsic::smin, "smin");
1383}
1384
1385Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
1386 return expandMinMaxExpr(S, Intrinsic::umin, "umin");
1387}
1388
1389Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
1390 return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
1391}
1392
1393Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
1394 return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
1395}
1396
1399 setInsertPoint(IP);
1400 Value *V = expandCodeFor(SH, Ty);
1401 return V;
1402}
1403
1405 // Expand the code for this SCEV.
1406 Value *V = expand(SH);
1407
1408 if (Ty) {
1409 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
1410 "non-trivial casts should be done with the SCEVs directly!");
1411 V = InsertNoopCastOfTo(V, Ty);
1412 }
1413 return V;
1414}
1415
1416Value *SCEVExpander::FindValueInExprValueMap(
1417 const SCEV *S, const Instruction *InsertPt,
1418 SmallVectorImpl<Instruction *> &DropPoisonGeneratingInsts) {
1419 // If the expansion is not in CanonicalMode, and the SCEV contains any
1420 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
1421 if (!CanonicalMode && SE.containsAddRecurrence(S))
1422 return nullptr;
1423
1424 // If S is a constant, it may be worse to reuse an existing Value.
1425 if (isa<SCEVConstant>(S))
1426 return nullptr;
1427
1428 for (Value *V : SE.getSCEVValues(S)) {
1429 Instruction *EntInst = dyn_cast<Instruction>(V);
1430 if (!EntInst)
1431 continue;
1432
1433 // Choose a Value from the set which dominates the InsertPt.
1434 // InsertPt should be inside the Value's parent loop so as not to break
1435 // the LCSSA form.
1436 assert(EntInst->getFunction() == InsertPt->getFunction());
1437 if (S->getType() != V->getType() || !SE.DT.dominates(EntInst, InsertPt) ||
1438 !(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
1439 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
1440 continue;
1441
1442 // Make sure reusing the instruction is poison-safe.
1443 if (SE.canReuseInstruction(S, EntInst, DropPoisonGeneratingInsts))
1444 return V;
1445 DropPoisonGeneratingInsts.clear();
1446 }
1447 return nullptr;
1448}
1449
1450// The expansion of SCEV will either reuse a previous Value in ExprValueMap,
1451// or expand the SCEV literally. Specifically, if the expansion is in LSRMode,
1452// and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded
1453// literally, to prevent LSR's transformed SCEV from being reverted. Otherwise,
1454// the expansion will try to reuse Value from ExprValueMap, and only when it
1455// fails, expand the SCEV literally.
1456Value *SCEVExpander::expand(const SCEV *S) {
1457 // Compute an insertion point for this SCEV object. Hoist the instructions
1458 // as far out in the loop nest as possible.
1459 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
1460
1461 // We can move insertion point only if there is no div or rem operations
1462 // otherwise we are risky to move it over the check for zero denominator.
1463 auto SafeToHoist = [](const SCEV *S) {
1464 return !SCEVExprContains(S, [](const SCEV *S) {
1465 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
1466 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
1467 // Division by non-zero constants can be hoisted.
1468 return SC->getValue()->isZero();
1469 // All other divisions should not be moved as they may be
1470 // divisions by zero and should be kept within the
1471 // conditions of the surrounding loops that guard their
1472 // execution (see PR35406).
1473 return true;
1474 }
1475 return false;
1476 });
1477 };
1478 if (SafeToHoist(S)) {
1479 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
1480 L = L->getParentLoop()) {
1481 if (SE.isLoopInvariant(S, L)) {
1482 if (!L) break;
1483 if (BasicBlock *Preheader = L->getLoopPreheader()) {
1484 InsertPt = Preheader->getTerminator()->getIterator();
1485 } else {
1486 // LSR sets the insertion point for AddRec start/step values to the
1487 // block start to simplify value reuse, even though it's an invalid
1488 // position. SCEVExpander must correct for this in all cases.
1489 InsertPt = L->getHeader()->getFirstInsertionPt();
1490 }
1491 } else {
1492 // If the SCEV is computable at this level, insert it into the header
1493 // after the PHIs (and after any other instructions that we've inserted
1494 // there) so that it is guaranteed to dominate any user inside the loop.
1495 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
1496 InsertPt = L->getHeader()->getFirstInsertionPt();
1497
1498 while (InsertPt != Builder.GetInsertPoint() &&
1499 (isInsertedInstruction(&*InsertPt) ||
1500 isa<DbgInfoIntrinsic>(&*InsertPt))) {
1501 InsertPt = std::next(InsertPt);
1502 }
1503 break;
1504 }
1505 }
1506 }
1507
1508 // Check to see if we already expanded this here.
1509 auto I = InsertedExpressions.find(std::make_pair(S, &*InsertPt));
1510 if (I != InsertedExpressions.end())
1511 return I->second;
1512
1513 SCEVInsertPointGuard Guard(Builder, this);
1514 Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
1515
1516 // Expand the expression into instructions.
1517 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1518 Value *V = FindValueInExprValueMap(S, &*InsertPt, DropPoisonGeneratingInsts);
1519 if (!V) {
1520 V = visit(S);
1521 V = fixupLCSSAFormFor(V);
1522 } else {
1523 for (Instruction *I : DropPoisonGeneratingInsts) {
1524 rememberFlags(I);
1525 I->dropPoisonGeneratingAnnotations();
1526 // See if we can re-infer from first principles any of the flags we just
1527 // dropped.
1528 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(I))
1529 if (auto Flags = SE.getStrengthenedNoWrapFlagsFromBinOp(OBO)) {
1530 auto *BO = cast<BinaryOperator>(I);
1535 }
1536 if (auto *NNI = dyn_cast<PossiblyNonNegInst>(I)) {
1537 auto *Src = NNI->getOperand(0);
1539 Constant::getNullValue(Src->getType()), I,
1540 DL).value_or(false))
1541 NNI->setNonNeg(true);
1542 }
1543 }
1544 }
1545 // Remember the expanded value for this SCEV at this location.
1546 //
1547 // This is independent of PostIncLoops. The mapped value simply materializes
1548 // the expression at this insertion point. If the mapped value happened to be
1549 // a postinc expansion, it could be reused by a non-postinc user, but only if
1550 // its insertion point was already at the head of the loop.
1551 InsertedExpressions[std::make_pair(S, &*InsertPt)] = V;
1552 return V;
1553}
1554
1555void SCEVExpander::rememberInstruction(Value *I) {
1556 auto DoInsert = [this](Value *V) {
1557 if (!PostIncLoops.empty())
1558 InsertedPostIncValues.insert(V);
1559 else
1560 InsertedValues.insert(V);
1561 };
1562 DoInsert(I);
1563}
1564
1565void SCEVExpander::rememberFlags(Instruction *I) {
1566 // If we already have flags for the instruction, keep the existing ones.
1567 OrigFlags.try_emplace(I, PoisonFlags(I));
1568}
1569
1570void SCEVExpander::replaceCongruentIVInc(
1571 PHINode *&Phi, PHINode *&OrigPhi, Loop *L, const DominatorTree *DT,
1573 BasicBlock *LatchBlock = L->getLoopLatch();
1574 if (!LatchBlock)
1575 return;
1576
1577 Instruction *OrigInc =
1578 dyn_cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1579 Instruction *IsomorphicInc =
1580 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
1581 if (!OrigInc || !IsomorphicInc)
1582 return;
1583
1584 // If this phi has the same width but is more canonical, replace the
1585 // original with it. As part of the "more canonical" determination,
1586 // respect a prior decision to use an IV chain.
1587 if (OrigPhi->getType() == Phi->getType() &&
1588 !(ChainedPhis.count(Phi) ||
1589 isExpandedAddRecExprPHI(OrigPhi, OrigInc, L)) &&
1590 (ChainedPhis.count(Phi) ||
1591 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
1592 std::swap(OrigPhi, Phi);
1593 std::swap(OrigInc, IsomorphicInc);
1594 }
1595
1596 // Replacing the congruent phi is sufficient because acyclic
1597 // redundancy elimination, CSE/GVN, should handle the
1598 // rest. However, once SCEV proves that a phi is congruent,
1599 // it's often the head of an IV user cycle that is isomorphic
1600 // with the original phi. It's worth eagerly cleaning up the
1601 // common case of a single IV increment so that DeleteDeadPHIs
1602 // can remove cycles that had postinc uses.
1603 // Because we may potentially introduce a new use of OrigIV that didn't
1604 // exist before at this point, its poison flags need readjustment.
1605 const SCEV *TruncExpr =
1606 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType());
1607 if (OrigInc == IsomorphicInc || TruncExpr != SE.getSCEV(IsomorphicInc) ||
1608 !SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc))
1609 return;
1610
1611 bool BothHaveNUW = false;
1612 bool BothHaveNSW = false;
1613 auto *OBOIncV = dyn_cast<OverflowingBinaryOperator>(OrigInc);
1614 auto *OBOIsomorphic = dyn_cast<OverflowingBinaryOperator>(IsomorphicInc);
1615 if (OBOIncV && OBOIsomorphic) {
1616 BothHaveNUW =
1617 OBOIncV->hasNoUnsignedWrap() && OBOIsomorphic->hasNoUnsignedWrap();
1618 BothHaveNSW =
1619 OBOIncV->hasNoSignedWrap() && OBOIsomorphic->hasNoSignedWrap();
1620 }
1621
1622 if (!hoistIVInc(OrigInc, IsomorphicInc,
1623 /*RecomputePoisonFlags*/ true))
1624 return;
1625
1626 // We are replacing with a wider increment. If both OrigInc and IsomorphicInc
1627 // are NUW/NSW, then we can preserve them on the wider increment; the narrower
1628 // IsomorphicInc would wrap before the wider OrigInc, so the replacement won't
1629 // make IsomorphicInc's uses more poisonous.
1630 assert(OrigInc->getType()->getScalarSizeInBits() >=
1631 IsomorphicInc->getType()->getScalarSizeInBits() &&
1632 "Should only replace an increment with a wider one.");
1633 if (BothHaveNUW || BothHaveNSW) {
1634 OrigInc->setHasNoUnsignedWrap(OBOIncV->hasNoUnsignedWrap() || BothHaveNUW);
1635 OrigInc->setHasNoSignedWrap(OBOIncV->hasNoSignedWrap() || BothHaveNSW);
1636 }
1637
1638 SCEV_DEBUG_WITH_TYPE(DebugType,
1639 dbgs() << "INDVARS: Eliminated congruent iv.inc: "
1640 << *IsomorphicInc << '\n');
1641 Value *NewInc = OrigInc;
1642 if (OrigInc->getType() != IsomorphicInc->getType()) {
1644 if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
1645 IP = PN->getParent()->getFirstInsertionPt();
1646 else
1647 IP = OrigInc->getNextNonDebugInstruction()->getIterator();
1648
1649 IRBuilder<> Builder(IP->getParent(), IP);
1650 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
1651 NewInc =
1652 Builder.CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
1653 }
1654 IsomorphicInc->replaceAllUsesWith(NewInc);
1655 DeadInsts.emplace_back(IsomorphicInc);
1656}
1657
1658/// replaceCongruentIVs - Check for congruent phis in this loop header and
1659/// replace them with their most canonical representative. Return the number of
1660/// phis eliminated.
1661///
1662/// This does not depend on any SCEVExpander state but should be used in
1663/// the same context that SCEVExpander is used.
1664unsigned
1667 const TargetTransformInfo *TTI) {
1668 // Find integer phis in order of increasing width.
1670 for (PHINode &PN : L->getHeader()->phis())
1671 Phis.push_back(&PN);
1672
1673 if (TTI)
1674 // Use stable_sort to preserve order of equivalent PHIs, so the order
1675 // of the sorted Phis is the same from run to run on the same loop.
1676 llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
1677 // Put pointers at the back and make sure pointer < pointer = false.
1678 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
1679 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
1682 });
1683
1684 unsigned NumElim = 0;
1686 // Process phis from wide to narrow. Map wide phis to their truncation
1687 // so narrow phis can reuse them.
1688 for (PHINode *Phi : Phis) {
1689 auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
1690 if (Value *V = simplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC}))
1691 return V;
1692 if (!SE.isSCEVable(PN->getType()))
1693 return nullptr;
1694 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
1695 if (!Const)
1696 return nullptr;
1697 return Const->getValue();
1698 };
1699
1700 // Fold constant phis. They may be congruent to other constant phis and
1701 // would confuse the logic below that expects proper IVs.
1702 if (Value *V = SimplifyPHINode(Phi)) {
1703 if (V->getType() != Phi->getType())
1704 continue;
1705 SE.forgetValue(Phi);
1706 Phi->replaceAllUsesWith(V);
1707 DeadInsts.emplace_back(Phi);
1708 ++NumElim;
1709 SCEV_DEBUG_WITH_TYPE(DebugType,
1710 dbgs() << "INDVARS: Eliminated constant iv: " << *Phi
1711 << '\n');
1712 continue;
1713 }
1714
1715 if (!SE.isSCEVable(Phi->getType()))
1716 continue;
1717
1718 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
1719 if (!OrigPhiRef) {
1720 OrigPhiRef = Phi;
1721 if (Phi->getType()->isIntegerTy() && TTI &&
1722 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
1723 // Make sure we only rewrite using simple induction variables;
1724 // otherwise, we can make the trip count of a loop unanalyzable
1725 // to SCEV.
1726 const SCEV *PhiExpr = SE.getSCEV(Phi);
1727 if (isa<SCEVAddRecExpr>(PhiExpr)) {
1728 // This phi can be freely truncated to the narrowest phi type. Map the
1729 // truncated expression to it so it will be reused for narrow types.
1730 const SCEV *TruncExpr =
1731 SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
1732 ExprToIVMap[TruncExpr] = Phi;
1733 }
1734 }
1735 continue;
1736 }
1737
1738 // Replacing a pointer phi with an integer phi or vice-versa doesn't make
1739 // sense.
1740 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
1741 continue;
1742
1743 replaceCongruentIVInc(Phi, OrigPhiRef, L, DT, DeadInsts);
1744 SCEV_DEBUG_WITH_TYPE(DebugType,
1745 dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi
1746 << '\n');
1748 DebugType, dbgs() << "INDVARS: Original iv: " << *OrigPhiRef << '\n');
1749 ++NumElim;
1750 Value *NewIV = OrigPhiRef;
1751 if (OrigPhiRef->getType() != Phi->getType()) {
1752 IRBuilder<> Builder(L->getHeader(),
1753 L->getHeader()->getFirstInsertionPt());
1754 Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
1755 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
1756 }
1757 Phi->replaceAllUsesWith(NewIV);
1758 DeadInsts.emplace_back(Phi);
1759 }
1760 return NumElim;
1761}
1762
1764 const Instruction *At,
1765 Loop *L) {
1766 using namespace llvm::PatternMatch;
1767
1768 SmallVector<BasicBlock *, 4> ExitingBlocks;
1769 L->getExitingBlocks(ExitingBlocks);
1770
1771 // Look for suitable value in simple conditions at the loop exits.
1772 for (BasicBlock *BB : ExitingBlocks) {
1774 Instruction *LHS, *RHS;
1775
1776 if (!match(BB->getTerminator(),
1779 continue;
1780
1781 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
1782 return true;
1783
1784 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
1785 return true;
1786 }
1787
1788 // Use expand's logic which is used for reusing a previous Value in
1789 // ExprValueMap. Note that we don't currently model the cost of
1790 // needing to drop poison generating flags on the instruction if we
1791 // want to reuse it. We effectively assume that has zero cost.
1792 SmallVector<Instruction *> DropPoisonGeneratingInsts;
1793 return FindValueInExprValueMap(S, At, DropPoisonGeneratingInsts) != nullptr;
1794}
1795
1796template<typename T> static InstructionCost costAndCollectOperands(
1799 SmallVectorImpl<SCEVOperand> &Worklist) {
1800
1801 const T *S = cast<T>(WorkItem.S);
1803 // Object to help map SCEV operands to expanded IR instructions.
1804 struct OperationIndices {
1805 OperationIndices(unsigned Opc, size_t min, size_t max) :
1806 Opcode(Opc), MinIdx(min), MaxIdx(max) { }
1807 unsigned Opcode;
1808 size_t MinIdx;
1809 size_t MaxIdx;
1810 };
1811
1812 // Collect the operations of all the instructions that will be needed to
1813 // expand the SCEVExpr. This is so that when we come to cost the operands,
1814 // we know what the generated user(s) will be.
1816
1817 auto CastCost = [&](unsigned Opcode) -> InstructionCost {
1818 Operations.emplace_back(Opcode, 0, 0);
1819 return TTI.getCastInstrCost(Opcode, S->getType(),
1820 S->getOperand(0)->getType(),
1822 };
1823
1824 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired,
1825 unsigned MinIdx = 0,
1826 unsigned MaxIdx = 1) -> InstructionCost {
1827 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1828 return NumRequired *
1829 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind);
1830 };
1831
1832 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx,
1833 unsigned MaxIdx) -> InstructionCost {
1834 Operations.emplace_back(Opcode, MinIdx, MaxIdx);
1835 Type *OpType = S->getType();
1836 return NumRequired * TTI.getCmpSelInstrCost(
1837 Opcode, OpType, CmpInst::makeCmpResultType(OpType),
1839 };
1840
1841 switch (S->getSCEVType()) {
1842 case scCouldNotCompute:
1843 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1844 case scUnknown:
1845 case scConstant:
1846 case scVScale:
1847 return 0;
1848 case scPtrToInt:
1849 Cost = CastCost(Instruction::PtrToInt);
1850 break;
1851 case scTruncate:
1852 Cost = CastCost(Instruction::Trunc);
1853 break;
1854 case scZeroExtend:
1855 Cost = CastCost(Instruction::ZExt);
1856 break;
1857 case scSignExtend:
1858 Cost = CastCost(Instruction::SExt);
1859 break;
1860 case scUDivExpr: {
1861 unsigned Opcode = Instruction::UDiv;
1862 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1863 if (SC->getAPInt().isPowerOf2())
1864 Opcode = Instruction::LShr;
1865 Cost = ArithCost(Opcode, 1);
1866 break;
1867 }
1868 case scAddExpr:
1869 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1);
1870 break;
1871 case scMulExpr:
1872 // TODO: this is a very pessimistic cost modelling for Mul,
1873 // because of Bin Pow algorithm actually used by the expander,
1874 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
1875 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1);
1876 break;
1877 case scSMaxExpr:
1878 case scUMaxExpr:
1879 case scSMinExpr:
1880 case scUMinExpr:
1881 case scSequentialUMinExpr: {
1882 // FIXME: should this ask the cost for Intrinsic's?
1883 // The reduction tree.
1884 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1);
1885 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2);
1886 switch (S->getSCEVType()) {
1887 case scSequentialUMinExpr: {
1888 // The safety net against poison.
1889 // FIXME: this is broken.
1890 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 0);
1891 Cost += ArithCost(Instruction::Or,
1892 S->getNumOperands() > 2 ? S->getNumOperands() - 2 : 0);
1893 Cost += CmpSelCost(Instruction::Select, 1, 0, 1);
1894 break;
1895 }
1896 default:
1897 assert(!isa<SCEVSequentialMinMaxExpr>(S) &&
1898 "Unhandled SCEV expression type?");
1899 break;
1900 }
1901 break;
1902 }
1903 case scAddRecExpr: {
1904 // In this polynominal, we may have some zero operands, and we shouldn't
1905 // really charge for those. So how many non-zero coefficients are there?
1906 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) {
1907 return !Op->isZero();
1908 });
1909
1910 assert(NumTerms >= 1 && "Polynominal should have at least one term.");
1911 assert(!(*std::prev(S->operands().end()))->isZero() &&
1912 "Last operand should not be zero");
1913
1914 // Ignoring constant term (operand 0), how many of the coefficients are u> 1?
1915 int NumNonZeroDegreeNonOneTerms =
1916 llvm::count_if(S->operands(), [](const SCEV *Op) {
1917 auto *SConst = dyn_cast<SCEVConstant>(Op);
1918 return !SConst || SConst->getAPInt().ugt(1);
1919 });
1920
1921 // Much like with normal add expr, the polynominal will require
1922 // one less addition than the number of it's terms.
1923 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1,
1924 /*MinIdx*/ 1, /*MaxIdx*/ 1);
1925 // Here, *each* one of those will require a multiplication.
1926 InstructionCost MulCost =
1927 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms);
1928 Cost = AddCost + MulCost;
1929
1930 // What is the degree of this polynominal?
1931 int PolyDegree = S->getNumOperands() - 1;
1932 assert(PolyDegree >= 1 && "Should be at least affine.");
1933
1934 // The final term will be:
1935 // Op_{PolyDegree} * x ^ {PolyDegree}
1936 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations.
1937 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for
1938 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free.
1939 // FIXME: this is conservatively correct, but might be overly pessimistic.
1940 Cost += MulCost * (PolyDegree - 1);
1941 break;
1942 }
1943 }
1944
1945 for (auto &CostOp : Operations) {
1946 for (auto SCEVOp : enumerate(S->operands())) {
1947 // Clamp the index to account for multiple IR operations being chained.
1948 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx);
1949 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx);
1950 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value());
1951 }
1952 }
1953 return Cost;
1954}
1955
1956bool SCEVExpander::isHighCostExpansionHelper(
1957 const SCEVOperand &WorkItem, Loop *L, const Instruction &At,
1958 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI,
1960 SmallVectorImpl<SCEVOperand> &Worklist) {
1961 if (Cost > Budget)
1962 return true; // Already run out of budget, give up.
1963
1964 const SCEV *S = WorkItem.S;
1965 // Was the cost of expansion of this expression already accounted for?
1966 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second)
1967 return false; // We have already accounted for this expression.
1968
1969 // If we can find an existing value for this scev available at the point "At"
1970 // then consider the expression cheap.
1971 if (hasRelatedExistingExpansion(S, &At, L))
1972 return false; // Consider the expression to be free.
1973
1975 L->getHeader()->getParent()->hasMinSize()
1978
1979 switch (S->getSCEVType()) {
1980 case scCouldNotCompute:
1981 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
1982 case scUnknown:
1983 case scVScale:
1984 // Assume to be zero-cost.
1985 return false;
1986 case scConstant: {
1987 // Only evalulate the costs of constants when optimizing for size.
1989 return false;
1990 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt();
1991 Type *Ty = S->getType();
1993 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind);
1994 return Cost > Budget;
1995 }
1996 case scTruncate:
1997 case scPtrToInt:
1998 case scZeroExtend:
1999 case scSignExtend: {
2000 Cost +=
2001 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist);
2002 return false; // Will answer upon next entry into this function.
2003 }
2004 case scUDivExpr: {
2005 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or
2006 // HowManyLessThans produced to compute a precise expression, rather than a
2007 // UDiv from the user's code. If we can't find a UDiv in the code with some
2008 // simple searching, we need to account for it's cost.
2009
2010 // At the beginning of this function we already tried to find existing
2011 // value for plain 'S'. Now try to lookup 'S + 1' since it is common
2012 // pattern involving division. This is just a simple search heuristic.
2014 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L))
2015 return false; // Consider it to be free.
2016
2017 Cost +=
2018 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist);
2019 return false; // Will answer upon next entry into this function.
2020 }
2021 case scAddExpr:
2022 case scMulExpr:
2023 case scUMaxExpr:
2024 case scSMaxExpr:
2025 case scUMinExpr:
2026 case scSMinExpr:
2027 case scSequentialUMinExpr: {
2028 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 &&
2029 "Nary expr should have more than 1 operand.");
2030 // The simple nary expr will require one less op (or pair of ops)
2031 // than the number of it's terms.
2032 Cost +=
2033 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist);
2034 return Cost > Budget;
2035 }
2036 case scAddRecExpr: {
2037 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 &&
2038 "Polynomial should be at least linear");
2039 Cost += costAndCollectOperands<SCEVAddRecExpr>(
2040 WorkItem, TTI, CostKind, Worklist);
2041 return Cost > Budget;
2042 }
2043 }
2044 llvm_unreachable("Unknown SCEV kind!");
2045}
2046
2048 Instruction *IP) {
2049 assert(IP);
2050 switch (Pred->getKind()) {
2052 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
2054 return expandComparePredicate(cast<SCEVComparePredicate>(Pred), IP);
2055 case SCEVPredicate::P_Wrap: {
2056 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred);
2057 return expandWrapPredicate(AddRecPred, IP);
2058 }
2059 }
2060 llvm_unreachable("Unknown SCEV predicate type");
2061}
2062
2064 Instruction *IP) {
2065 Value *Expr0 = expand(Pred->getLHS(), IP);
2066 Value *Expr1 = expand(Pred->getRHS(), IP);
2067
2068 Builder.SetInsertPoint(IP);
2069 auto InvPred = ICmpInst::getInversePredicate(Pred->getPredicate());
2070 auto *I = Builder.CreateICmp(InvPred, Expr0, Expr1, "ident.check");
2071 return I;
2072}
2073
2075 Instruction *Loc, bool Signed) {
2076 assert(AR->isAffine() && "Cannot generate RT check for "
2077 "non-affine expression");
2078
2079 // FIXME: It is highly suspicious that we're ignoring the predicates here.
2081 const SCEV *ExitCount =
2083
2084 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count");
2085
2086 const SCEV *Step = AR->getStepRecurrence(SE);
2087 const SCEV *Start = AR->getStart();
2088
2089 Type *ARTy = AR->getType();
2090 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
2091 unsigned DstBits = SE.getTypeSizeInBits(ARTy);
2092
2093 // The expression {Start,+,Step} has nusw/nssw if
2094 // Step < 0, Start - |Step| * Backedge <= Start
2095 // Step >= 0, Start + |Step| * Backedge > Start
2096 // and |Step| * Backedge doesn't unsigned overflow.
2097
2098 Builder.SetInsertPoint(Loc);
2099 Value *TripCountVal = expand(ExitCount, Loc);
2100
2101 IntegerType *Ty =
2103
2104 Value *StepValue = expand(Step, Loc);
2105 Value *NegStepValue = expand(SE.getNegativeSCEV(Step), Loc);
2106 Value *StartValue = expand(Start, Loc);
2107
2108 ConstantInt *Zero =
2109 ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
2110
2111 Builder.SetInsertPoint(Loc);
2112 // Compute |Step|
2113 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero);
2114 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue);
2115
2116 // Compute |Step| * Backedge
2117 // Compute:
2118 // 1. Start + |Step| * Backedge < Start
2119 // 2. Start - |Step| * Backedge > Start
2120 //
2121 // And select either 1. or 2. depending on whether step is positive or
2122 // negative. If Step is known to be positive or negative, only create
2123 // either 1. or 2.
2124 auto ComputeEndCheck = [&]() -> Value * {
2125 // Checking <u 0 is always false.
2126 if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2127 return ConstantInt::getFalse(Loc->getContext());
2128
2129 // Get the backedge taken count and truncate or extended to the AR type.
2130 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
2131
2132 Value *MulV, *OfMul;
2133 if (Step->isOne()) {
2134 // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
2135 // needed, there is never an overflow, so to avoid artificially inflating
2136 // the cost of the check, directly emit the optimized IR.
2137 MulV = TruncTripCount;
2138 OfMul = ConstantInt::getFalse(MulV->getContext());
2139 } else {
2140 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
2141 Intrinsic::umul_with_overflow, Ty);
2142 CallInst *Mul =
2143 Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
2144 MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
2145 OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
2146 }
2147
2148 Value *Add = nullptr, *Sub = nullptr;
2149 bool NeedPosCheck = !SE.isKnownNegative(Step);
2150 bool NeedNegCheck = !SE.isKnownPositive(Step);
2151
2152 if (isa<PointerType>(ARTy)) {
2153 Value *NegMulV = Builder.CreateNeg(MulV);
2154 if (NeedPosCheck)
2155 Add = Builder.CreatePtrAdd(StartValue, MulV);
2156 if (NeedNegCheck)
2157 Sub = Builder.CreatePtrAdd(StartValue, NegMulV);
2158 } else {
2159 if (NeedPosCheck)
2160 Add = Builder.CreateAdd(StartValue, MulV);
2161 if (NeedNegCheck)
2162 Sub = Builder.CreateSub(StartValue, MulV);
2163 }
2164
2165 Value *EndCompareLT = nullptr;
2166 Value *EndCompareGT = nullptr;
2167 Value *EndCheck = nullptr;
2168 if (NeedPosCheck)
2169 EndCheck = EndCompareLT = Builder.CreateICmp(
2171 if (NeedNegCheck)
2172 EndCheck = EndCompareGT = Builder.CreateICmp(
2173 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
2174 if (NeedPosCheck && NeedNegCheck) {
2175 // Select the answer based on the sign of Step.
2176 EndCheck = Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT);
2177 }
2178 return Builder.CreateOr(EndCheck, OfMul);
2179 };
2180 Value *EndCheck = ComputeEndCheck();
2181
2182 // If the backedge taken count type is larger than the AR type,
2183 // check that we don't drop any bits by truncating it. If we are
2184 // dropping bits, then we have overflow (unless the step is zero).
2185 if (SrcBits > DstBits) {
2186 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits);
2187 auto *BackedgeCheck =
2188 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal,
2189 ConstantInt::get(Loc->getContext(), MaxVal));
2190 BackedgeCheck = Builder.CreateAnd(
2191 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero));
2192
2193 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck);
2194 }
2195
2196 return EndCheck;
2197}
2198
2200 Instruction *IP) {
2201 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr());
2202 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr;
2203
2204 // Add a check for NUSW
2206 NUSWCheck = generateOverflowCheck(A, IP, false);
2207
2208 // Add a check for NSSW
2210 NSSWCheck = generateOverflowCheck(A, IP, true);
2211
2212 if (NUSWCheck && NSSWCheck)
2213 return Builder.CreateOr(NUSWCheck, NSSWCheck);
2214
2215 if (NUSWCheck)
2216 return NUSWCheck;
2217
2218 if (NSSWCheck)
2219 return NSSWCheck;
2220
2221 return ConstantInt::getFalse(IP->getContext());
2222}
2223
2225 Instruction *IP) {
2226 // Loop over all checks in this set.
2227 SmallVector<Value *> Checks;
2228 for (const auto *Pred : Union->getPredicates()) {
2229 Checks.push_back(expandCodeForPredicate(Pred, IP));
2230 Builder.SetInsertPoint(IP);
2231 }
2232
2233 if (Checks.empty())
2234 return ConstantInt::getFalse(IP->getContext());
2235 return Builder.CreateOr(Checks);
2236}
2237
2238Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
2239 auto *DefI = dyn_cast<Instruction>(V);
2240 if (!PreserveLCSSA || !DefI)
2241 return V;
2242
2243 BasicBlock::iterator InsertPt = Builder.GetInsertPoint();
2244 Loop *DefLoop = SE.LI.getLoopFor(DefI->getParent());
2245 Loop *UseLoop = SE.LI.getLoopFor(InsertPt->getParent());
2246 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop))
2247 return V;
2248
2249 // Create a temporary instruction to at the current insertion point, so we
2250 // can hand it off to the helper to create LCSSA PHIs if required for the
2251 // new use.
2252 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor)
2253 // would accept a insertion point and return an LCSSA phi for that
2254 // insertion point, so there is no need to insert & remove the temporary
2255 // instruction.
2256 Type *ToTy;
2257 if (DefI->getType()->isIntegerTy())
2258 ToTy = PointerType::get(DefI->getContext(), 0);
2259 else
2260 ToTy = Type::getInt32Ty(DefI->getContext());
2261 Instruction *User =
2262 CastInst::CreateBitOrPointerCast(DefI, ToTy, "tmp.lcssa.user", InsertPt);
2263 auto RemoveUserOnExit =
2264 make_scope_exit([User]() { User->eraseFromParent(); });
2265
2267 ToUpdate.push_back(DefI);
2268 SmallVector<PHINode *, 16> PHIsToRemove;
2269 SmallVector<PHINode *, 16> InsertedPHIs;
2270 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
2271 &InsertedPHIs);
2272 for (PHINode *PN : InsertedPHIs)
2273 rememberInstruction(PN);
2274 for (PHINode *PN : PHIsToRemove) {
2275 if (!PN->use_empty())
2276 continue;
2277 InsertedValues.erase(PN);
2278 InsertedPostIncValues.erase(PN);
2279 PN->eraseFromParent();
2280 }
2281
2282 return User->getOperand(0);
2283}
2284
2285namespace {
2286// Search for a SCEV subexpression that is not safe to expand. Any expression
2287// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
2288// UDiv expressions. We don't know if the UDiv is derived from an IR divide
2289// instruction, but the important thing is that we prove the denominator is
2290// nonzero before expansion.
2291//
2292// IVUsers already checks that IV-derived expressions are safe. So this check is
2293// only needed when the expression includes some subexpression that is not IV
2294// derived.
2295//
2296// Currently, we only allow division by a value provably non-zero here.
2297//
2298// We cannot generally expand recurrences unless the step dominates the loop
2299// header. The expander handles the special case of affine recurrences by
2300// scaling the recurrence outside the loop, but this technique isn't generally
2301// applicable. Expanding a nested recurrence outside a loop requires computing
2302// binomial coefficients. This could be done, but the recurrence has to be in a
2303// perfectly reduced form, which can't be guaranteed.
2304struct SCEVFindUnsafe {
2305 ScalarEvolution &SE;
2306 bool CanonicalMode;
2307 bool IsUnsafe = false;
2308
2309 SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
2310 : SE(SE), CanonicalMode(CanonicalMode) {}
2311
2312 bool follow(const SCEV *S) {
2313 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
2314 if (!SE.isKnownNonZero(D->getRHS())) {
2315 IsUnsafe = true;
2316 return false;
2317 }
2318 }
2319 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
2320 // For non-affine addrecs or in non-canonical mode we need a preheader
2321 // to insert into.
2322 if (!AR->getLoop()->getLoopPreheader() &&
2323 (!CanonicalMode || !AR->isAffine())) {
2324 IsUnsafe = true;
2325 return false;
2326 }
2327 }
2328 return true;
2329 }
2330 bool isDone() const { return IsUnsafe; }
2331};
2332} // namespace
2333
2335 SCEVFindUnsafe Search(SE, CanonicalMode);
2336 visitAll(S, Search);
2337 return !Search.IsUnsafe;
2338}
2339
2341 const Instruction *InsertionPoint) const {
2342 if (!isSafeToExpand(S))
2343 return false;
2344 // We have to prove that the expanded site of S dominates InsertionPoint.
2345 // This is easy when not in the same block, but hard when S is an instruction
2346 // to be expanded somewhere inside the same block as our insertion point.
2347 // What we really need here is something analogous to an OrderedBasicBlock,
2348 // but for the moment, we paper over the problem by handling two common and
2349 // cheap to check cases.
2350 if (SE.properlyDominates(S, InsertionPoint->getParent()))
2351 return true;
2352 if (SE.dominates(S, InsertionPoint->getParent())) {
2353 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint)
2354 return true;
2355 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S))
2356 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue()))
2357 return true;
2358 }
2359 return false;
2360}
2361
2363 // Result is used, nothing to remove.
2364 if (ResultUsed)
2365 return;
2366
2367 // Restore original poison flags.
2368 for (auto [I, Flags] : Expander.OrigFlags)
2369 Flags.apply(I);
2370
2371 auto InsertedInstructions = Expander.getAllInsertedInstructions();
2372#ifndef NDEBUG
2373 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(),
2374 InsertedInstructions.end());
2375 (void)InsertedSet;
2376#endif
2377 // Remove sets with value handles.
2378 Expander.clear();
2379
2380 // Remove all inserted instructions.
2381 for (Instruction *I : reverse(InsertedInstructions)) {
2382#ifndef NDEBUG
2383 assert(all_of(I->users(),
2384 [&InsertedSet](Value *U) {
2385 return InsertedSet.contains(cast<Instruction>(U));
2386 }) &&
2387 "removed instruction should only be used by instructions inserted "
2388 "during expansion");
2389#endif
2390 assert(!I->getType()->isVoidTy() &&
2391 "inserted instruction should have non-void types");
2392 I->replaceAllUsesWith(PoisonValue::get(I->getType()));
2393 I->eraseFromParent();
2394 }
2395}
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static const Loop * PickMostRelevantLoop(const Loop *A, const Loop *B, DominatorTree &DT)
PickMostRelevantLoop - Given two loops pick the one that's most relevant for SCEV expansion.
static InstructionCost costAndCollectOperands(const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, SmallVectorImpl< SCEVOperand > &Worklist)
static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR)
static bool canBeCheaplyTransformed(ScalarEvolution &SE, const SCEVAddRecExpr *Phi, const SCEVAddRecExpr *Requested, bool &InvertStep)
Check whether we can cheaply express the requested SCEV in terms of the available PHI SCEV by truncat...
#define SCEV_DEBUG_WITH_TYPE(TYPE, X)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:930
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1362
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:1021
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1129
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getCast(unsigned ops, Constant *C, Type *Ty, bool OnlyIfReduced=false)
Convenience function for getting a Cast operation.
Definition: Constants.cpp:2041
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:398
A debug info location.
Definition: DebugLoc.h:33
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1978
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:932
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1721
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:145
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2161
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2153
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:87
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To)
Returns true if replacing From with To everywhere is guaranteed to preserve LCSSA form.
Definition: LoopInfo.h:439
bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc)
Checks if moving a specific instruction can break LCSSA in any loop.
Definition: LoopInfo.h:465
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
bool isComplete() const
If the PHI node is complete which means all of its parent's predecessors have incoming value in this ...
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const SCEV * getOperand() const
This class represents an assumption that the expression LHS Pred RHS evaluates to true,...
const SCEV * getRHS() const
Returns the right hand side of the predicate.
ICmpInst::Predicate getPredicate() const
const SCEV * getLHS() const
Returns the left hand side of the predicate.
This class represents a constant integer value.
Value * generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, bool Signed)
Generates code that evaluates if the AR expression will overflow.
bool hasRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L)
Determine whether there is an existing expansion of S that can be reused.
SmallVector< Instruction *, 32 > getAllInsertedInstructions() const
Return a vector containing all instructions inserted during expansion.
bool isSafeToExpand(const SCEV *S) const
Return true if the given expression is safe to expand in the sense that all materialized values are s...
bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint) const
Return true if the given expression is safe to expand in the sense that all materialized values are d...
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetTransformInfo *TTI=nullptr)
replace congruent phis with their most canonical representative.
Value * expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
bool hoistIVInc(Instruction *IncV, Instruction *InsertPos, bool RecomputePoisonFlags=false)
Utility for hoisting IncV (with all subexpressions requried for its computation) before InsertPos.
void clear()
Erase the contents of the InsertedExpressions map so that users trying to expand the same expression ...
bool isInsertedInstruction(Instruction *I) const
Return true if the specified instruction was inserted by the code rewriter.
Value * expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc)
Generates a code sequence that evaluates this predicate.
static bool canReuseFlagsFromOriginalIVInc(PHINode *OrigPhi, PHINode *WidePhi, Instruction *OrigInc, Instruction *WideInc)
Return true if both increments directly increment the corresponding IV PHI nodes and have the same op...
Value * expandComparePredicate(const SCEVComparePredicate *Pred, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
Value * expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc)
A specialized variant of expandCodeForPredicate, handling the case when we are expanding code for a S...
Instruction * getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale)
Return the induction variable increment's IV operand.
BasicBlock::iterator findInsertPointAfter(Instruction *I, Instruction *MustDominate) const
Returns a suitable insert point after I, that dominates MustDominate.
void setInsertPoint(Instruction *IP)
Set the current insertion point.
This node represents multiplication of some number of SCEVs.
This node is a base class providing common functionality for n'ary operators.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getOperand(unsigned i) const
ArrayRef< const SCEV * > operands() const
This class represents an assumption made using SCEV expressions which can be checked at run-time.
SCEVPredicateKind getKind() const
This class represents a cast from a pointer to a pointer-sized integer value.
This class represents a signed maximum selection.
This class represents a signed minimum selection.
This class represents a sequential/in-order unsigned minimum selection.
This class represents a sign extension of a small integer value to a larger integer value.
This class represents a truncation of an integer value to a smaller integer value.
This class represents a binary unsigned division operation.
const SCEV * getLHS() const
const SCEV * getRHS() const
This class represents an unsigned maximum selection.
This class represents an unsigned minimum selection.
This class represents a composition of other SCEV predicates, and is the class that most clients will...
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents the value of vscale, as used when defining the length of a scalable vector or r...
This class represents an assumption made on an AddRec expression.
const SCEVAddRecExpr * getExpr() const
Implementation of the SCEVPredicate interface.
IncrementWrapFlags getFlags() const
Returns the set assumed no overflow flags.
This class represents a zero extension of a small integer value to a larger integer value.
This class represents an analyzed expression in the program.
ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
bool isOne() const
Return true if the expression is a constant one.
bool isZero() const
Return true if the expression is a constant zero.
bool isNonConstantNegative() const
Return true if the specified scev is negated, but not a constant.
SCEVTypes getSCEVType() const
Type * getType() const
Return the LLVM type of this SCEV expression.
NoWrapFlags
NoWrapFlags are bitfield indices into SubclassData.
The main scalar evolution driver.
bool isKnownNonNegative(const SCEV *S)
Test if the given expression is known to be non-negative.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
bool isKnownNegative(const SCEV *S)
Test if the given expression is known to be negative.
const SCEV * removePointerBase(const SCEV *S)
Compute an expression equivalent to S - getPointerBase(S).
bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getTruncateOrNoop(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
bool containsAddRecurrence(const SCEV *S)
Return true if the SCEV is a scAddRecExpr or it contains scAddRecExpr.
const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getPredicatedBackedgeTakenCount(const Loop *L, SmallVector< const SCEVPredicate *, 4 > &Predicates)
Similar to getBackedgeTakenCount, except it will add a set of SCEV predicates to Predicates that are ...
static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags)
void forgetValue(Value *V)
This method should be called by the client when it has changed a value in a way that may effect its v...
const SCEV * getNoopOrAnyExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
std::optional< SCEV::NoWrapFlags > getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO)
Parse NSW/NUW flags from add/sub/mul IR binary operation Op into SCEV no-wrap flags,...
const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
bool dominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV dominate the specified basic block.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getUnknown(Value *V)
static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask)
Convenient NoWrapFlags manipulation that hides enum casts and is visible in the ScalarEvolution name ...
bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
bool canReuseInstruction(const SCEV *S, Instruction *I, SmallVectorImpl< Instruction * > &DropPoisonGeneratingInsts)
Check whether it is poison-safe to represent the expression S using the instruction I.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
@ None
The cast is not used with a load/store of any kind.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
#define UINT64_MAX
Definition: DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:580
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:777
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:836
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
class_match< BasicBlock > m_BasicBlock()
Match an arbitrary basic block value and ignore it.
Definition: PatternMatch.h:189
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void visitAll(const SCEV *Root, SV &Visitor)
Use SCEVTraversal to visit all nodes in the given expression tree.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
@ Offset
Definition: DWP.cpp:456
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
cl::opt< unsigned > SCEVCheapExpansionBudget
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
@ Mul
Product of integers.
@ Add
Sum of integers.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
Definition: LCSSA.cpp:77
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
InstructionCost Cost
std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
unsigned pred_size(const MachineBasicBlock *BB)
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
void apply(Instruction *I)
PoisonFlags(const Instruction *I)
struct for holding enough information to help calculate the cost of the given SCEV when expanded into...