LLVM 23.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
20#include "llvm/IR/Dominators.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
35 for (const Use &Use : I->operands())
36 if (!Set.count(dyn_cast<Instruction>(Use)))
37 return false;
38 return true;
39}
40
42 switch (Kind) {
43 default:
44 break;
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
59 return true;
60 }
61 return false;
62}
63
67
68/// Determines if Phi may have been type-promoted. If Phi has a single user
69/// that ANDs the Phi with a type mask, return the user. RT is updated to
70/// account for the narrower bit width represented by the mask, and the AND
71/// instruction is added to CI.
75 if (!Phi->hasOneUse())
76 return Phi;
77
78 const APInt *M = nullptr;
79 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
80
81 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
82 // with a new integer type of the corresponding bit width.
83 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
84 int32_t Bits = (*M + 1).exactLogBase2();
85 if (Bits > 0) {
86 RT = IntegerType::get(Phi->getContext(), Bits);
87 Visited.insert(Phi);
88 CI.insert(J);
89 return J;
90 }
91 }
92 return Phi;
93}
94
98
99/// Compute the minimal bit width needed to represent a reduction whose exit
100/// instruction is given by Exit.
101static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
102 DemandedBits *DB,
103 AssumptionCache *AC,
104 DominatorTree *DT) {
105 bool IsSigned = false;
106 const DataLayout &DL = Exit->getDataLayout();
107 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
108
109 if (DB) {
110 // Use the demanded bits analysis to determine the bits that are live out
111 // of the exit instruction, rounding up to the nearest power of two. If the
112 // use of demanded bits results in a smaller bit width, we know the value
113 // must be positive (i.e., IsSigned = false), because if this were not the
114 // case, the sign bit would have been demanded.
115 auto Mask = DB->getDemandedBits(Exit);
116 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
117 }
118
119 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
120 // If demanded bits wasn't able to limit the bit width, we can try to use
121 // value tracking instead. This can be the case, for example, if the value
122 // may be negative.
123 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
124 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
125 MaxBitWidth = NumTypeBits - NumSignBits;
126 KnownBits Bits = computeKnownBits(Exit, DL);
127 if (!Bits.isNonNegative()) {
128 // If the value is not known to be non-negative, we set IsSigned to true,
129 // meaning that we will use sext instructions instead of zext
130 // instructions to restore the original type.
131 IsSigned = true;
132 // Make sure at least one sign bit is included in the result, so it
133 // will get properly sign-extended.
134 ++MaxBitWidth;
135 }
136 }
137 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
138
139 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
140 IsSigned);
141}
142
143/// Collect cast instructions that can be ignored in the vectorizer's cost
144/// model, given a reduction exit value and the minimal type in which the
145// reduction can be represented. Also search casts to the recurrence type
146// to find the minimum width used by the recurrence.
147static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
148 Type *RecurrenceType,
150 unsigned &MinWidthCastToRecurTy) {
151
154 Worklist.push_back(Exit);
155 MinWidthCastToRecurTy = -1U;
156
157 while (!Worklist.empty()) {
158 Instruction *Val = Worklist.pop_back_val();
159 Visited.insert(Val);
160 if (auto *Cast = dyn_cast<CastInst>(Val)) {
161 if (Cast->getSrcTy() == RecurrenceType) {
162 // If the source type of a cast instruction is equal to the recurrence
163 // type, it will be eliminated, and should be ignored in the vectorizer
164 // cost model.
165 Casts.insert(Cast);
166 continue;
167 }
168 if (Cast->getDestTy() == RecurrenceType) {
169 // The minimum width used by the recurrence is found by checking for
170 // casts on its operands. The minimum width is used by the vectorizer
171 // when finding the widest type for in-loop reductions without any
172 // loads/stores.
173 MinWidthCastToRecurTy = std::min<unsigned>(
174 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
175 continue;
176 }
177 }
178 // Add all operands to the work list if they are loop-varying values that
179 // we haven't yet visited.
180 for (Value *O : cast<User>(Val)->operands())
181 if (auto *I = dyn_cast<Instruction>(O))
182 if (TheLoop->contains(I) && !Visited.count(I))
183 Worklist.push_back(I);
184 }
185}
186
187// Check if a given Phi node can be recognized as an ordered reduction for
188// vectorizing floating point operations without unsafe math.
189static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
190 Instruction *Exit, PHINode *Phi) {
191 // Currently only FAdd and FMulAdd are supported.
192 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
193 return false;
194
195 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
196 return false;
197
198 if (Kind == RecurKind::FMulAdd &&
200 return false;
201
202 // Ensure the exit instruction has only one user other than the reduction PHI
203 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
204 return false;
205
206 // The only pattern accepted is the one in which the reduction PHI
207 // is used as one of the operands of the exit instruction
208 auto *Op0 = Exit->getOperand(0);
209 auto *Op1 = Exit->getOperand(1);
210 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
211 return false;
212 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
213 return false;
214
215 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
216 << ", ExitInst: " << *Exit << "\n");
217
218 return true;
219}
220
221// Collect FMF from a value and its associated fcmp in select patterns
223 FastMathFlags FMF = cast<FPMathOperator>(V)->getFastMathFlags();
224 if (auto *Sel = dyn_cast<SelectInst>(V)) {
225 // Accept FMF from either fcmp or select in a min/max idiom.
226 // TODO: Remove this when FMF propagation is fixed or we standardize on
227 // intrinsics.
228 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
229 FMF |= FCmp->getFastMathFlags();
230 }
231 return FMF;
232}
233
234static std::optional<FastMathFlags>
236 bool HasRequiredFMF = FPOp && FPOp->hasNoNaNs() && FPOp->hasNoSignedZeros();
237 if (HasRequiredFMF)
238 return collectMinMaxFMF(FPOp);
239
240 switch (RK) {
245 break;
246
247 case RecurKind::FMax:
249 return std::nullopt;
251 break;
252 case RecurKind::FMin:
254 return std::nullopt;
256 break;
257 default:
258 return std::nullopt;
259 }
260 return collectMinMaxFMF(FPOp);
261}
262
264 ScalarEvolution *SE) {
265 Type *Ty = Phi->getType();
266 BasicBlock *Latch = TheLoop->getLoopLatch();
267 if (Phi->getNumIncomingValues() != 2 ||
268 Phi->getParent() != TheLoop->getHeader() ||
269 (!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || !Latch)
270 return {};
271
272 auto GetMinMaxRK = [](Value *V, Value *&A, Value *&B) -> RecurKind {
273 if (match(V, m_UMin(m_Value(A), m_Value(B))))
274 return RecurKind::UMin;
275 if (match(V, m_UMax(m_Value(A), m_Value(B))))
276 return RecurKind::UMax;
277 if (match(V, m_SMax(m_Value(A), m_Value(B))))
278 return RecurKind::SMax;
279 if (match(V, m_SMin(m_Value(A), m_Value(B))))
280 return RecurKind::SMin;
281 if (match(V, m_OrdOrUnordFMin(m_Value(A), m_Value(B))) ||
283 return RecurKind::FMin;
284 if (match(V, m_OrdOrUnordFMax(m_Value(A), m_Value(B))) ||
286 return RecurKind::FMax;
287 if (match(V, m_FMinimum(m_Value(A), m_Value(B))))
288 return RecurKind::FMinimum;
289 if (match(V, m_FMaximum(m_Value(A), m_Value(B))))
290 return RecurKind::FMaximum;
295 return RecurKind::None;
296 };
297
299 Value *BackedgeValue = Phi->getIncomingValueForBlock(Latch);
301 // Walk def-use chains upwards from BackedgeValue to identify min/max
302 // recurrences.
303 SmallVector<Value *> WorkList({BackedgeValue});
304 SmallPtrSet<Value *, 8> Chain({Phi});
305 while (!WorkList.empty()) {
306 Value *Cur = WorkList.pop_back_val();
307 if (!Chain.insert(Cur).second)
308 continue;
309 auto *I = dyn_cast<Instruction>(Cur);
310 if (!I || !TheLoop->contains(I))
311 return {};
312 if (auto *PN = dyn_cast<PHINode>(I)) {
313 append_range(WorkList, PN->operands());
314 continue;
315 }
316 Value *A, *B;
317 RecurKind CurRK = GetMinMaxRK(Cur, A, B);
318 if (CurRK == RecurKind::None || (RK != RecurKind::None && CurRK != RK))
319 return {};
320
321 RK = CurRK;
322 // Check required fast-math flags for FP recurrences.
324 auto CurFMF = hasRequiredFastMathFlags(cast<FPMathOperator>(Cur), RK);
325 if (!CurFMF)
326 return {};
327 FMF &= *CurFMF;
328 }
329
330 if (auto *SI = dyn_cast<SelectInst>(I))
331 Chain.insert(SI->getCondition());
332
333 if (A == Phi || B == Phi)
334 continue;
335
336 // Add operand to worklist if it matches the pattern (exactly one must
337 // match)
338 Value *X, *Y;
339 auto *IA = dyn_cast<Instruction>(A);
340 auto *IB = dyn_cast<Instruction>(B);
341 bool AMatches = IA && TheLoop->contains(IA) && GetMinMaxRK(A, X, Y) == RK;
342 bool BMatches = IB && TheLoop->contains(IB) && GetMinMaxRK(B, X, Y) == RK;
343 if (AMatches == BMatches) // Both or neither match
344 return {};
345 WorkList.push_back(AMatches ? A : B);
346 }
347
348 // Handle argmin/argmax pattern: PHI has uses outside the reduction chain
349 // that are not intermediate min/max operations (which are handled below).
350 // Requires integer min/max, and single-use BackedgeValue (so vectorizer can
351 // handle both PHIs together).
352 bool PhiHasInvalidUses = any_of(Phi->users(), [&](User *U) {
353 Value *A, *B;
354 return !Chain.contains(U) && TheLoop->contains(cast<Instruction>(U)) &&
355 GetMinMaxRK(U, A, B) == RecurKind::None;
356 });
357 if (PhiHasInvalidUses) {
359 !BackedgeValue->hasOneUse())
360 return {};
362 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
363 /*Exit=*/nullptr, /*Store=*/nullptr, RK, FastMathFlags(),
364 /*ExactFP=*/nullptr, Phi->getType(), /*IsMultiUse=*/true);
365 }
366
367 // Validate chain entries and collect stores from chain entries and
368 // intermediate ops.
370 for (Value *V : Chain) {
371 for (User *U : V->users()) {
372 if (Chain.contains(U))
373 continue;
374 auto *I = dyn_cast<Instruction>(U);
375 if (!I || (!TheLoop->contains(I) && V != BackedgeValue))
376 return {};
377 if (!TheLoop->contains(I))
378 continue;
379 if (auto *SI = dyn_cast<StoreInst>(I)) {
380 Stores.push_back(SI);
381 continue;
382 }
383 // Must be intermediate min/max of the same kind.
384 Value *A, *B;
385 if (GetMinMaxRK(I, A, B) != RK)
386 return {};
387 for (User *IU : I->users()) {
388 if (auto *SI = dyn_cast<StoreInst>(IU))
389 Stores.push_back(SI);
390 else if (!Chain.contains(IU))
391 return {};
392 }
393 }
394 }
395
396 // Validate all stores go to same invariant address and are in the same block.
397 StoreInst *IntermediateStore = nullptr;
398 const SCEV *StorePtrSCEV = nullptr;
399 for (StoreInst *SI : Stores) {
400 if (!SE)
401 return {};
402 const SCEV *Ptr = SE->getSCEV(SI->getPointerOperand());
403 if (!SE->isLoopInvariant(Ptr, TheLoop) ||
404 (StorePtrSCEV && StorePtrSCEV != Ptr))
405 return {};
406 StorePtrSCEV = Ptr;
407 if (!IntermediateStore)
408 IntermediateStore = SI;
409 else if (IntermediateStore->getParent() != SI->getParent())
410 return {};
411 else if (IntermediateStore->comesBefore(SI))
412 IntermediateStore = SI;
413 }
414
416 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
417 cast<Instruction>(BackedgeValue), IntermediateStore, RK, FMF, nullptr,
418 Phi->getType());
419}
420
421// This matches a phi that selects between the original value (HeaderPhi) and an
422// arbitrary non-reduction value.
423static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi,
424 SmallPtrSetImpl<Instruction *> &ReductionInstrs) {
425 unsigned NumNonReduxInputs = 0;
426 for (const Value *Op : Phi->operands()) {
427 if (!ReductionInstrs.contains(dyn_cast<Instruction>(Op))) {
428 if (++NumNonReduxInputs > 1)
429 return false;
430 } else if (Op != HeaderPhi) {
431 // TODO: Remove this restriction once chained phis are supported.
432 return false;
433 }
434 }
435 return NumNonReduxInputs == 1;
436}
437
439 PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes,
441 ScalarEvolution *SE) {
442 if (Phi->getNumIncomingValues() != 2)
443 return false;
444
445 // Reduction variables are only found in the loop header block.
446 if (Phi->getParent() != TheLoop->getHeader())
447 return false;
448
449 // Obtain the reduction start value from the value that comes from the loop
450 // preheader.
451 if (!TheLoop->getLoopPreheader())
452 return false;
453
454 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
455 // ExitInstruction is the single value which is used outside the loop.
456 // We only allow for a single reduction value to be used outside the loop.
457 // This includes users of the reduction, variables (which form a cycle
458 // which ends in the phi node).
459 Instruction *ExitInstruction = nullptr;
460
461 // Variable to keep last visited store instruction. By the end of the
462 // algorithm this variable will be either empty or having intermediate
463 // reduction value stored in invariant address.
464 StoreInst *IntermediateStore = nullptr;
465
466 // Indicates that we found a reduction operation in our scan.
467 bool FoundReduxOp = false;
468
469 // We start with the PHI node and scan for all of the users of this
470 // instruction. All users must be instructions that can be used as reduction
471 // variables (such as ADD). We must have a single out-of-block user. The cycle
472 // must include the original PHI.
473 bool FoundStartPHI = false;
474
475 // To recognize AnyOf patterns formed by a icmp select sequence, we store
476 // the number of instruction we saw to make sure we only see one.
477 unsigned NumCmpSelectPatternInst = 0;
478 InstDesc ReduxDesc(false, nullptr);
479
480 // To recognize find-lasts of conditional operations (such as loads or
481 // divides), that need masking, we track non-phi users and if we've found a
482 // "find-last-like" phi (see isFindLastLikePhi). We currently only support
483 // find-last reduction chains with a single "find-last-like" phi and do not
484 // allow any other operations.
485 [[maybe_unused]] unsigned NumNonPHIUsers = 0;
486 bool FoundFindLastLikePhi = false;
487
488 // Data used for determining if the recurrence has been type-promoted.
489 Type *RecurrenceType = Phi->getType();
491 unsigned MinWidthCastToRecurrenceType;
492 Instruction *Start = Phi;
493 bool IsSigned = false;
494
497
498 // Return early if the recurrence kind does not match the type of Phi. If the
499 // recurrence kind is arithmetic, we attempt to look through AND operations
500 // resulting from the type promotion performed by InstCombine. Vector
501 // operations are not limited to the legal integer widths, so we may be able
502 // to evaluate the reduction in the narrower width.
503 // Check the scalar type to handle both scalar and vector types.
504 Type *ScalarTy = RecurrenceType->getScalarType();
505 if (Kind == RecurKind::FindLast) {
506 // FindLast supports all primitive scalar types.
507 if (!ScalarTy->isFloatingPointTy() && !ScalarTy->isIntegerTy() &&
508 !ScalarTy->isPointerTy())
509 return false;
510 } else if (ScalarTy->isFloatingPointTy()) {
512 return false;
513 } else if (ScalarTy->isIntegerTy()) {
514 if (!isIntegerRecurrenceKind(Kind))
515 return false;
516 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
517 } else {
518 // Pointer min/max may exist, but it is not supported as a reduction op.
519 return false;
520 }
521
522 Worklist.push_back(Start);
523 VisitedInsts.insert(Start);
524
525 // Start with all flags set because we will intersect this with the reduction
526 // flags from all the reduction operations.
528
529 // The first instruction in the use-def chain of the Phi node that requires
530 // exact floating point operations.
531 Instruction *ExactFPMathInst = nullptr;
532
533 // A value in the reduction can be used:
534 // - By the reduction:
535 // - Reduction operation:
536 // - One use of reduction value (safe).
537 // - Multiple use of reduction value (not safe).
538 // - PHI:
539 // - All uses of the PHI must be the reduction (safe).
540 // - Otherwise, not safe.
541 // - By instructions outside of the loop (safe).
542 // * One value may have several outside users, but all outside
543 // uses must be of the same value.
544 // - By store instructions with a loop invariant address (safe with
545 // the following restrictions):
546 // * If there are several stores, all must have the same address.
547 // * Final value should be stored in that loop invariant address.
548 // - By an instruction that is not part of the reduction (not safe).
549 // This is either:
550 // * An instruction type other than PHI or the reduction operation.
551 // * A PHI in the header other than the initial PHI.
552 while (!Worklist.empty()) {
553 Instruction *Cur = Worklist.pop_back_val();
554
555 // Store instructions are allowed iff it is the store of the reduction
556 // value to the same loop invariant memory location.
557 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
558 if (!SE) {
559 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
560 << "Scalar Evolution Analysis\n");
561 return false;
562 }
563
564 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
565 // Check it is the same address as previous stores
566 if (IntermediateStore) {
567 const SCEV *OtherScev =
568 SE->getSCEV(IntermediateStore->getPointerOperand());
569
570 if (OtherScev != PtrScev) {
571 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
572 << "inside the loop: " << *SI->getPointerOperand()
573 << " and "
574 << *IntermediateStore->getPointerOperand() << '\n');
575 return false;
576 }
577 }
578
579 // Check the pointer is loop invariant
580 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
581 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
582 << "inside the loop: " << *SI->getPointerOperand()
583 << '\n');
584 return false;
585 }
586
587 // IntermediateStore is always the last store in the loop.
589 continue;
590 }
591
592 // No Users.
593 // If the instruction has no users then this is a broken chain and can't be
594 // a reduction variable.
595 if (Cur->use_empty())
596 return false;
597
598 bool IsAPhi = isa<PHINode>(Cur);
599 if (!IsAPhi)
600 ++NumNonPHIUsers;
601
602 // A header PHI use other than the original PHI.
603 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
604 return false;
605
606 // Reductions of instructions such as Div, and Sub is only possible if the
607 // LHS is the reduction variable.
608 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
609 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
610 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
611 return false;
612
613 // Any reduction instruction must be of one of the allowed kinds. We ignore
614 // the starting value (the Phi or an AND instruction if the Phi has been
615 // type-promoted).
616 if (Cur != Start) {
617 ReduxDesc = isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, SE);
618 ExactFPMathInst = ExactFPMathInst == nullptr
619 ? ReduxDesc.getExactFPMathInst()
620 : ExactFPMathInst;
621 if (!ReduxDesc.isRecurrence())
622 return false;
623 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
624 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
625 FMF &= collectMinMaxFMF(ReduxDesc.getPatternInst());
626 // Update this reduction kind if we matched a new instruction.
627 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
628 // state accurate while processing the worklist?
629 if (ReduxDesc.getRecKind() != RecurKind::None)
630 Kind = ReduxDesc.getRecKind();
631 }
632
633 bool IsASelect = isa<SelectInst>(Cur);
634
635 // A conditional reduction operation must only have 2 or less uses in
636 // VisitedInsts.
637 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
638 hasMultipleUsesOf(Cur, VisitedInsts, 2))
639 return false;
640
641 // A reduction operation must only have one use of the reduction value.
642 if (!IsAPhi && !IsASelect && !isAnyOfRecurrenceKind(Kind) &&
643 hasMultipleUsesOf(Cur, VisitedInsts, 1))
644 return false;
645
646 // All inputs to a PHI node must be a reduction value, unless the phi is a
647 // "FindLast-like" phi (described below).
648 if (IsAPhi && Cur != Phi) {
649 if (!areAllUsesIn(Cur, VisitedInsts)) {
650 // A "FindLast-like" phi acts like a conditional select between the
651 // previous reduction value, and an arbitrary value. Note: Multiple
652 // "FindLast-like" phis are not supported see:
653 // IVDescriptorsTest.UnsupportedFindLastPhi.
654 FoundFindLastLikePhi =
655 Kind == RecurKind::FindLast && !FoundFindLastLikePhi &&
656 isFindLastLikePhi(cast<PHINode>(Cur), Phi, VisitedInsts);
657 if (!FoundFindLastLikePhi)
658 return false;
659 }
660 }
661
662 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
663 ++NumCmpSelectPatternInst;
664
665 // Check whether we found a reduction operator.
666 FoundReduxOp |= (!IsAPhi || FoundFindLastLikePhi) && Cur != Start;
667
668 // Process users of current instruction. Push non-PHI nodes after PHI nodes
669 // onto the stack. This way we are going to have seen all inputs to PHI
670 // nodes once we get to them.
673 for (User *U : Cur->users()) {
675
676 // If the user is a call to llvm.fmuladd then the instruction can only be
677 // the final operand.
678 if (isFMulAddIntrinsic(UI))
679 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
680 return false;
681
682 // Check if we found the exit user.
683 BasicBlock *Parent = UI->getParent();
684 if (!TheLoop->contains(Parent)) {
685 // If we already know this instruction is used externally, move on to
686 // the next user.
687 if (ExitInstruction == Cur)
688 continue;
689
690 // Exit if you find multiple values used outside or if the header phi
691 // node is being used. In this case the user uses the value of the
692 // previous iteration, in which case we would loose "VF-1" iterations of
693 // the reduction operation if we vectorize.
694 if (ExitInstruction != nullptr || Cur == Phi)
695 return false;
696
697 // The instruction used by an outside user must be the last instruction
698 // before we feed back to the reduction phi. Otherwise, we loose VF-1
699 // operations on the value.
700 if (!is_contained(Phi->operands(), Cur))
701 return false;
702
703 ExitInstruction = Cur;
704 continue;
705 }
706
707 // Process instructions only once (termination). Each reduction cycle
708 // value must only be used once, except by phi nodes and conditional
709 // reductions which are represented as a cmp followed by a select.
710 InstDesc IgnoredVal(false, nullptr);
711 if (VisitedInsts.insert(UI).second) {
712 if (isa<PHINode>(UI)) {
713 PHIs.push_back(UI);
714 } else {
716 if (SI && SI->getPointerOperand() == Cur) {
717 // Reduction variable chain can only be stored somewhere but it
718 // can't be used as an address.
719 return false;
720 }
721 NonPHIs.push_back(UI);
722 }
723 } else if (!isa<PHINode>(UI) &&
724 ((!isConditionalRdxPattern(UI).isRecurrence() &&
725 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
726 .isRecurrence())))
727 return false;
728
729 // Remember that we completed the cycle.
730 if (UI == Phi)
731 FoundStartPHI = true;
732 }
733 Worklist.append(PHIs.begin(), PHIs.end());
734 Worklist.append(NonPHIs.begin(), NonPHIs.end());
735 }
736
737 // We only expect to match a single "find-last-like" phi per find-last
738 // reduction, with no non-phi operations in the reduction use chain.
739 assert((!FoundFindLastLikePhi ||
740 (Kind == RecurKind::FindLast && NumNonPHIUsers == 0)) &&
741 "Unexpectedly matched a 'find-last-like' phi");
742
743 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
744 return false;
745
746 if (IntermediateStore) {
747 // Check that stored value goes to the phi node again. This way we make sure
748 // that the value stored in IntermediateStore is indeed the final reduction
749 // value.
750 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
751 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
752 << *IntermediateStore << '\n');
753 return false;
754 }
755
756 // If there is an exit instruction it's value should be stored in
757 // IntermediateStore
758 if (ExitInstruction &&
759 IntermediateStore->getValueOperand() != ExitInstruction) {
760 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
761 "store last calculated value of the reduction: "
762 << *IntermediateStore << '\n');
763 return false;
764 }
765
766 // If all uses are inside the loop (intermediate stores), then the
767 // reduction value after the loop will be the one used in the last store.
768 if (!ExitInstruction)
769 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
770 }
771
772 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
773 return false;
774
775 const bool IsOrdered =
776 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
777
778 if (Start != Phi) {
779 // If the starting value is not the same as the phi node, we speculatively
780 // looked through an 'and' instruction when evaluating a potential
781 // arithmetic reduction to determine if it may have been type-promoted.
782 //
783 // We now compute the minimal bit width that is required to represent the
784 // reduction. If this is the same width that was indicated by the 'and', we
785 // can represent the reduction in the smaller type. The 'and' instruction
786 // will be eliminated since it will essentially be a cast instruction that
787 // can be ignore in the cost model. If we compute a different type than we
788 // did when evaluating the 'and', the 'and' will not be eliminated, and we
789 // will end up with different kinds of operations in the recurrence
790 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
791 // the case.
792 //
793 // The vectorizer relies on InstCombine to perform the actual
794 // type-shrinking. It does this by inserting instructions to truncate the
795 // exit value of the reduction to the width indicated by RecurrenceType and
796 // then extend this value back to the original width. If IsSigned is false,
797 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
798 // used.
799 //
800 // TODO: We should not rely on InstCombine to rewrite the reduction in the
801 // smaller type. We should just generate a correctly typed expression
802 // to begin with.
803 Type *ComputedType;
804 std::tie(ComputedType, IsSigned) =
805 computeRecurrenceType(ExitInstruction, DB, AC, DT);
806 if (ComputedType != RecurrenceType)
807 return false;
808 }
809
810 // Collect cast instructions and the minimum width used by the recurrence.
811 // If the starting value is not the same as the phi node and the computed
812 // recurrence type is equal to the recurrence type, the recurrence expression
813 // will be represented in a narrower or wider type. If there are any cast
814 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
815 // Note that the 'and' instruction was already included in this list.
816 //
817 // TODO: A better way to represent this may be to tag in some way all the
818 // instructions that are a part of the reduction. The vectorizer cost
819 // model could then apply the recurrence type to these instructions,
820 // without needing a white list of instructions to ignore.
821 // This may also be useful for the inloop reductions, if it can be
822 // kept simple enough.
823 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
824 MinWidthCastToRecurrenceType);
825
826 // We found a reduction var if we have reached the original phi node and we
827 // only have a single instruction with out-of-loop users.
828
829 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
830 // is saved as part of the RecurrenceDescriptor.
831
832 // Save the description of this reduction variable.
833 RedDes =
834 RecurrenceDescriptor(RdxStart, ExitInstruction, IntermediateStore, Kind,
835 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
836 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
837 return true;
838}
839
840// We are looking for loops that do something like this:
841// int r = 0;
842// for (int i = 0; i < n; i++) {
843// if (src[i] > 3)
844// r = 3;
845// }
846// where the reduction value (r) only has two states, in this example 0 or 3.
847// The generated LLVM IR for this type of loop will be like this:
848// for.body:
849// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
850// ...
851// %cmp = icmp sgt i32 %5, 3
852// %spec.select = select i1 %cmp, i32 3, i32 %r
853// ...
854// In general we can support vectorization of loops where 'r' flips between
855// any two non-constants, provided they are loop invariant. The only thing
856// we actually care about at the end of the loop is whether or not any lane
857// in the selected vector is different from the start value. The final
858// across-vector reduction after the loop simply involves choosing the start
859// value if nothing changed (0 in the example above) or the other selected
860// value (3 in the example above).
863 Instruction *I, InstDesc &Prev) {
864 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
865 // the select.
866 if (match(I, m_OneUse(m_Cmp()))) {
867 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
868 return InstDesc(Select, Prev.getRecKind());
869 }
870
871 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
872 return InstDesc(false, I);
873
875 Value *NonPhi = nullptr;
876
877 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
878 NonPhi = SI->getFalseValue();
879 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
880 NonPhi = SI->getTrueValue();
881 else
882 return InstDesc(false, I);
883
884 // We are looking for selects of the form:
885 // select(cmp(), phi, loop_invariant) or
886 // select(cmp(), loop_invariant, phi)
887 if (!Loop->isLoopInvariant(NonPhi))
888 return InstDesc(false, I);
889
890 return InstDesc(I, RecurKind::AnyOf);
891}
892
893// We are looking for loops that do something like this:
894// int r = 0;
895// for (int i = 0; i < n; i++) {
896// if (src[i] > 3)
897// r = i;
898// }
899// or like this:
900// int r = 0;
901// for (int i = 0; i < n; i++) {
902// if (src[i] > 3)
903// r = <loop-varying value>;
904// }
905// The reduction value (r) is derived from either the values of an induction
906// variable (i) sequence, an arbitrary loop-varying value, or from the start
907// value (0). The LLVM IR generated for such loops would be as follows:
908// for.body:
909// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
910// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
911// ...
912// %cmp = icmp sgt i32 %5, 3
913// %spec.select = select i1 %cmp, i32 %i, i32 %r
914// %inc = add nsw i32 %i, 1
915// ...
916//
917// When searching for an arbitrary loop-varying value, the reduction value will
918// either be the initial value (0) if the condition was never met, or the value
919// of the loop-varying value in the most recent loop iteration where the
920// condition was met.
924 // TODO: Support the vectorization of FindLastIV when the reduction phi is
925 // used by more than one select instruction. This vectorization is only
926 // performed when the SCEV of each increasing induction variable used by the
927 // select instructions is identical.
928 if (!OrigPhi->hasOneUse())
929 return InstDesc(false, I);
930
931 // We are looking for selects of the form:
932 // select(cmp(), phi, value) or
933 // select(cmp(), value, phi)
934 if (!match(I, m_CombineOr(m_Select(m_Cmp(), m_Value(), m_Specific(OrigPhi)),
935 m_Select(m_Cmp(), m_Specific(OrigPhi), m_Value()))))
936 return InstDesc(false, I);
937
939}
940
941/// Returns true if the select instruction has users in the compare-and-add
942/// reduction pattern below. The select instruction argument is the last one
943/// in the sequence.
944///
945/// %sum.1 = phi ...
946/// ...
947/// %cmp = fcmp pred %0, %CFP
948/// %add = fadd %0, %sum.1
949/// %sum.2 = select %cmp, %add, %sum.1
952 Value *TrueVal, *FalseVal;
953 // Only handle single use cases for now.
954 if (!match(I,
955 m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
956 return InstDesc(false, I);
957
958 // Handle only when either of operands of select instruction is a PHI
959 // node for now.
960 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
961 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
962 return InstDesc(false, I);
963
964 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
965 : dyn_cast<Instruction>(TrueVal);
966 if (!I1 || !I1->isBinaryOp())
967 return InstDesc(false, I);
968
969 Value *Op1, *Op2;
970 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
971 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
972 I1->isFast()) ||
973 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
974 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
975 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
976 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
977 return InstDesc(false, I);
978
981 if (!IPhi || IPhi != FalseVal)
982 return InstDesc(false, I);
983
984 return InstDesc(true, I);
985}
986
989 Instruction *I, RecurKind Kind,
990 InstDesc &Prev, ScalarEvolution *SE) {
991 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
992 switch (I->getOpcode()) {
993 default:
994 return InstDesc(false, I);
995 case Instruction::PHI:
996 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
997 case Instruction::Sub:
998 return InstDesc(
999 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
1000 case Instruction::Add:
1001 return InstDesc(
1002 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
1003 case Instruction::Mul:
1004 return InstDesc(Kind == RecurKind::Mul, I);
1005 case Instruction::And:
1006 return InstDesc(Kind == RecurKind::And, I);
1007 case Instruction::Or:
1008 return InstDesc(Kind == RecurKind::Or, I);
1009 case Instruction::Xor:
1010 return InstDesc(Kind == RecurKind::Xor, I);
1011 case Instruction::FDiv:
1012 case Instruction::FMul:
1013 return InstDesc(Kind == RecurKind::FMul, I,
1014 I->hasAllowReassoc() ? nullptr : I);
1015 case Instruction::FSub:
1016 return InstDesc(Kind == RecurKind::FSub ||
1018 I, I->hasAllowReassoc() ? nullptr : I);
1019 case Instruction::FAdd:
1020 return InstDesc(Kind == RecurKind::FAdd ||
1022 I, I->hasAllowReassoc() ? nullptr : I);
1023 case Instruction::Select:
1024 if (isSubRecurrenceKind(Kind) || Kind == RecurKind::FAdd ||
1025 Kind == RecurKind::FMul || Kind == RecurKind::Add ||
1026 Kind == RecurKind::Mul || Kind == RecurKind::AddChainWithSubs ||
1028 return isConditionalRdxPattern(I);
1029 if (isFindRecurrenceKind(Kind) && SE)
1030 return isFindPattern(L, OrigPhi, I, *SE);
1031 [[fallthrough]];
1032 case Instruction::FCmp:
1033 case Instruction::ICmp:
1034 case Instruction::Call:
1035 if (isAnyOfRecurrenceKind(Kind))
1036 return isAnyOfPattern(L, OrigPhi, I, Prev);
1037 if (isFMulAddIntrinsic(I))
1038 return InstDesc(Kind == RecurKind::FMulAdd, I,
1039 I->hasAllowReassoc() ? nullptr : I);
1040 return InstDesc(false, I);
1041 }
1042}
1043
1046 unsigned MaxNumUses) {
1047 unsigned NumUses = 0;
1048 for (const Use &U : I->operands()) {
1049 if (Insts.count(dyn_cast<Instruction>(U)))
1050 ++NumUses;
1051 if (NumUses > MaxNumUses)
1052 return true;
1053 }
1054
1055 return false;
1056}
1057
1059 RecurrenceDescriptor &RedDes,
1061 DominatorTree *DT,
1062 ScalarEvolution *SE) {
1063 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, RedDes, DB, AC, DT, SE)) {
1064 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1065 return true;
1066 }
1067 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, RedDes, DB, AC, DT, SE)) {
1068 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1069 return true;
1070 }
1071 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, RedDes, DB, AC,
1072 DT, SE)) {
1073 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1074 << "\n");
1075 return true;
1076 }
1077 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, RedDes, DB, AC, DT, SE)) {
1078 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1079 return true;
1080 }
1081 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, RedDes, DB, AC, DT, SE)) {
1082 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1083 return true;
1084 }
1085 if (AddReductionVar(Phi, RecurKind::And, TheLoop, RedDes, DB, AC, DT, SE)) {
1086 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1087 return true;
1088 }
1089 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, RedDes, DB, AC, DT, SE)) {
1090 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1091 return true;
1092 }
1093 auto RD = getMinMaxRecurrence(Phi, TheLoop, SE);
1094 if (RD.getRecurrenceKind() != RecurKind::None) {
1095 assert(
1096 RecurrenceDescriptor::isMinMaxRecurrenceKind(RD.getRecurrenceKind()) &&
1097 "Expected a min/max recurrence kind");
1098 LLVM_DEBUG(dbgs() << "Found a min/max reduction PHI." << *Phi << "\n");
1099 RedDes = std::move(RD);
1100 return true;
1101 }
1102 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, RedDes, DB, AC, DT, SE)) {
1103 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1104 << "\n");
1105 return true;
1106 }
1107 if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, RedDes, DB, AC, DT,
1108 SE)) {
1109 LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
1110 return true;
1111 }
1112 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, RedDes, DB, AC, DT, SE)) {
1113 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1114 return true;
1115 }
1116 if (AddReductionVar(Phi, RecurKind::FSub, TheLoop, RedDes, DB, AC, DT, SE)) {
1117 LLVM_DEBUG(dbgs() << "Found an FSub reduction PHI." << *Phi << "\n");
1118 return true;
1119 }
1120 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, RedDes, DB, AC, DT, SE)) {
1121 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1122 return true;
1123 }
1124 if (AddReductionVar(Phi, RecurKind::FAddChainWithSubs, TheLoop, RedDes, DB,
1125 AC, DT, SE)) {
1126 LLVM_DEBUG(dbgs() << "Found a chained FADD-FSUB chained reduction PHI."
1127 << *Phi << "\n");
1128 return true;
1129 }
1130 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, RedDes, DB, AC, DT,
1131 SE)) {
1132 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1133 return true;
1134 }
1135
1136 // Not a reduction of known type.
1137 return false;
1138}
1139
1141 DominatorTree *DT) {
1142
1143 // Ensure the phi node is in the loop header and has two incoming values.
1144 if (Phi->getParent() != TheLoop->getHeader() ||
1145 Phi->getNumIncomingValues() != 2)
1146 return false;
1147
1148 // Ensure the loop has a preheader and a single latch block. The loop
1149 // vectorizer will need the latch to set up the next iteration of the loop.
1150 auto *Preheader = TheLoop->getLoopPreheader();
1151 auto *Latch = TheLoop->getLoopLatch();
1152 if (!Preheader || !Latch)
1153 return false;
1154
1155 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1156 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1157 Phi->getBasicBlockIndex(Latch) < 0)
1158 return false;
1159
1160 // Get the previous value. The previous value comes from the latch edge while
1161 // the initial value comes from the preheader edge.
1162 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1163
1164 // If Previous is a phi in the header, go through incoming values from the
1165 // latch until we find a non-phi value. Use this as the new Previous, all uses
1166 // in the header will be dominated by the original phi, but need to be moved
1167 // after the non-phi previous value.
1169 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1170 if (PrevPhi->getParent() != Phi->getParent())
1171 return false;
1172 if (!SeenPhis.insert(PrevPhi).second)
1173 return false;
1174 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1175 }
1176
1177 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1178 return false;
1179
1180 // Ensure every user of the phi node (recursively) is dominated by the
1181 // previous value. The dominance requirement ensures the loop vectorizer will
1182 // not need to vectorize the initial value prior to the first iteration of the
1183 // loop.
1184 // TODO: Consider extending this sinking to handle memory instructions.
1185
1187 BasicBlock *PhiBB = Phi->getParent();
1189 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1190 // Cyclic dependence.
1191 if (Previous == SinkCandidate)
1192 return false;
1193
1194 if (!Seen.insert(SinkCandidate).second)
1195 return true;
1196 if (DT->dominates(Previous,
1197 SinkCandidate)) // We already are good w/o sinking.
1198 return true;
1199
1200 if (SinkCandidate->getParent() != PhiBB ||
1201 SinkCandidate->mayHaveSideEffects() ||
1202 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1203 return false;
1204
1205 // If we reach a PHI node that is not dominated by Previous, we reached a
1206 // header PHI. No need for sinking.
1207 if (isa<PHINode>(SinkCandidate))
1208 return true;
1209
1210 // Sink User tentatively and check its users
1211 WorkList.push_back(SinkCandidate);
1212 return true;
1213 };
1214
1215 WorkList.push_back(Phi);
1216 // Try to recursively sink instructions and their users after Previous.
1217 while (!WorkList.empty()) {
1218 Instruction *Current = WorkList.pop_back_val();
1219 for (User *User : Current->users()) {
1220 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1221 return false;
1222 }
1223 }
1224
1225 return true;
1226}
1227
1229 switch (Kind) {
1230 case RecurKind::Sub:
1231 return Instruction::Sub;
1233 case RecurKind::Add:
1234 return Instruction::Add;
1235 case RecurKind::Mul:
1236 return Instruction::Mul;
1237 case RecurKind::Or:
1238 return Instruction::Or;
1239 case RecurKind::And:
1240 return Instruction::And;
1241 case RecurKind::Xor:
1242 return Instruction::Xor;
1243 case RecurKind::FMul:
1244 return Instruction::FMul;
1245 case RecurKind::FMulAdd:
1247 case RecurKind::FAdd:
1248 return Instruction::FAdd;
1249 case RecurKind::FSub:
1250 return Instruction::FSub;
1251 case RecurKind::SMax:
1252 case RecurKind::SMin:
1253 case RecurKind::UMax:
1254 case RecurKind::UMin:
1255 return Instruction::ICmp;
1256 case RecurKind::FMax:
1257 case RecurKind::FMin:
1262 return Instruction::FCmp;
1264 case RecurKind::AnyOf:
1265 case RecurKind::FindIV:
1266 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1267 // are supported.
1268 default:
1269 llvm_unreachable("Unknown recurrence operation");
1270 }
1271}
1272
1275 SmallVector<Instruction *, 4> ReductionOperations;
1276 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1277
1278 // Search down from the Phi to the LoopExitInstr, looking for instructions
1279 // with a single user of the correct type for the reduction.
1280
1281 // Note that we check that the type of the operand is correct for each item in
1282 // the chain, including the last (the loop exit value). This can come up from
1283 // sub, which would otherwise be treated as an add reduction. MinMax also need
1284 // to check for a pair of icmp/select, for which we use getNextInstruction and
1285 // isCorrectOpcode functions to step the right number of instruction, and
1286 // check the icmp/select pair.
1287 // FIXME: We also do not attempt to look through Select's yet, which might
1288 // be part of the reduction chain, or attempt to looks through And's to find a
1289 // smaller bitwidth. Subs are also currently not allowed (which are usually
1290 // treated as part of a add reduction) as they are expected to generally be
1291 // more expensive than out-of-loop reductions, and need to be costed more
1292 // carefully.
1293 unsigned ExpectedUses = 1;
1294 if (IsMinMax)
1295 ExpectedUses = 2;
1296
1297 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1298 for (auto *User : Cur->users()) {
1300 if (isa<PHINode>(UI))
1301 continue;
1302 if (IsMinMax) {
1303 // We are expecting a icmp/select pair, which we go to the next select
1304 // instruction if we can. We already know that Cur has 2 uses.
1305 if (isa<SelectInst>(UI))
1306 return UI;
1307 continue;
1308 }
1309 return UI;
1310 }
1311 return nullptr;
1312 };
1313 auto isCorrectOpcode = [&](Instruction *Cur) {
1314 if (IsMinMax) {
1315 Value *LHS, *RHS;
1317 matchSelectPattern(Cur, LHS, RHS).Flavor);
1318 }
1319 // Recognize a call to the llvm.fmuladd intrinsic.
1320 if (isFMulAddIntrinsic(Cur))
1321 return true;
1322
1323 if (Cur->getOpcode() == Instruction::Sub &&
1325 return true;
1326
1327 if (Cur->getOpcode() == Instruction::FSub &&
1329 return true;
1330
1331 return Cur->getOpcode() == getOpcode();
1332 };
1333
1334 // Attempt to look through Phis which are part of the reduction chain
1335 unsigned ExtraPhiUses = 0;
1336 Instruction *RdxInstr = LoopExitInstr;
1337 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1338 if (ExitPhi->getNumIncomingValues() != 2)
1339 return {};
1340
1341 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1342 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1343
1344 Instruction *Chain = nullptr;
1345 if (Inc0 == Phi)
1346 Chain = Inc1;
1347 else if (Inc1 == Phi)
1348 Chain = Inc0;
1349 else
1350 return {};
1351
1352 RdxInstr = Chain;
1353 ExtraPhiUses = 1;
1354 }
1355
1356 // The loop exit instruction we check first (as a quick test) but add last. We
1357 // check the opcode is correct (and dont allow them to be Subs) and that they
1358 // have expected to have the expected number of uses. They will have one use
1359 // from the phi and one from a LCSSA value, no matter the type.
1360 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1361 return {};
1362
1363 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1364 // for conditional reductions.
1365 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1366 return {};
1367
1368 Instruction *Cur = getNextInstruction(Phi);
1369
1370 // Each other instruction in the chain should have the expected number of uses
1371 // and be the correct opcode.
1372 while (Cur != RdxInstr) {
1373 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1374 return {};
1375
1376 ReductionOperations.push_back(Cur);
1377 Cur = getNextInstruction(Cur);
1378 }
1379
1380 ReductionOperations.push_back(Cur);
1381 return ReductionOperations;
1382}
1383
1384InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1385 const SCEV *Step, BinaryOperator *BOp,
1387 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1388 assert(IK != IK_NoInduction && "Not an induction");
1389
1390 // Start value type should match the induction kind and the value
1391 // itself should not be null.
1392 assert(StartValue && "StartValue is null");
1393 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1394 "StartValue is not a pointer for pointer induction");
1395 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1396 "StartValue is not an integer for integer induction");
1397
1398 // Check the Step Value. It should be non-zero integer value.
1399 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1400 "Step value is zero");
1401
1402 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1403 "StepValue is not an integer");
1404
1405 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1406 "StepValue is not FP for FpInduction");
1407 assert((IK != IK_FpInduction ||
1408 (InductionBinOp &&
1409 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1410 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1411 "Binary opcode should be specified for FP induction");
1412
1413 if (Casts)
1414 llvm::append_range(RedundantCasts, *Casts);
1415}
1416
1422
1424 if (auto *ConstStep = dyn_cast<SCEVConstant>(Step))
1425 return ConstStep->getValue();
1426 return nullptr;
1427}
1428
1430 ScalarEvolution *SE,
1432
1433 // Here we only handle FP induction variables.
1434 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1435
1436 if (TheLoop->getHeader() != Phi->getParent())
1437 return false;
1438
1439 // The loop may have multiple entrances or multiple exits; we can analyze
1440 // this phi if it has a unique entry value and a unique backedge value.
1441 if (Phi->getNumIncomingValues() != 2)
1442 return false;
1443 Value *BEValue = nullptr, *StartValue = nullptr;
1444 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1445 BEValue = Phi->getIncomingValue(0);
1446 StartValue = Phi->getIncomingValue(1);
1447 } else {
1448 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1449 "Unexpected Phi node in the loop");
1450 BEValue = Phi->getIncomingValue(1);
1451 StartValue = Phi->getIncomingValue(0);
1452 }
1453
1455 if (!BOp)
1456 return false;
1457
1458 Value *Addend = nullptr;
1459 if (BOp->getOpcode() == Instruction::FAdd) {
1460 if (BOp->getOperand(0) == Phi)
1461 Addend = BOp->getOperand(1);
1462 else if (BOp->getOperand(1) == Phi)
1463 Addend = BOp->getOperand(0);
1464 } else if (BOp->getOpcode() == Instruction::FSub)
1465 if (BOp->getOperand(0) == Phi)
1466 Addend = BOp->getOperand(1);
1467
1468 if (!Addend)
1469 return false;
1470
1471 // The addend should be loop invariant
1472 if (auto *I = dyn_cast<Instruction>(Addend))
1473 if (TheLoop->contains(I))
1474 return false;
1475
1476 // FP Step has unknown SCEV
1477 const SCEV *Step = SE->getUnknown(Addend);
1478 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1479 return true;
1480}
1481
1482/// This function is called when we suspect that the update-chain of a phi node
1483/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1484/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1485/// predicate P under which the SCEV expression for the phi can be the
1486/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1487/// cast instructions that are involved in the update-chain of this induction.
1488/// A caller that adds the required runtime predicate can be free to drop these
1489/// cast instructions, and compute the phi using \p AR (instead of some scev
1490/// expression with casts).
1491///
1492/// For example, without a predicate the scev expression can take the following
1493/// form:
1494/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1495///
1496/// It corresponds to the following IR sequence:
1497/// %for.body:
1498/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1499/// %casted_phi = "ExtTrunc i64 %x"
1500/// %add = add i64 %casted_phi, %step
1501///
1502/// where %x is given in \p PN,
1503/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1504/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1505/// several forms, for example, such as:
1506/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1507/// or:
1508/// ExtTrunc2: %t = shl %x, m
1509/// %casted_phi = ashr %t, m
1510///
1511/// If we are able to find such sequence, we return the instructions
1512/// we found, namely %casted_phi and the instructions on its use-def chain up
1513/// to the phi (not including the phi).
1515 const SCEVUnknown *PhiScev,
1516 const SCEVAddRecExpr *AR,
1517 SmallVectorImpl<Instruction *> &CastInsts) {
1518
1519 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1520 auto *PN = cast<PHINode>(PhiScev->getValue());
1521 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1522 const Loop *L = AR->getLoop();
1523
1524 // Find any cast instructions that participate in the def-use chain of
1525 // PhiScev in the loop.
1526 // FORNOW/TODO: We currently expect the def-use chain to include only
1527 // two-operand instructions, where one of the operands is an invariant.
1528 // createAddRecFromPHIWithCasts() currently does not support anything more
1529 // involved than that, so we keep the search simple. This can be
1530 // extended/generalized as needed.
1531
1532 auto getDef = [&](const Value *Val) -> Value * {
1533 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1534 if (!BinOp)
1535 return nullptr;
1536 Value *Op0 = BinOp->getOperand(0);
1537 Value *Op1 = BinOp->getOperand(1);
1538 Value *Def = nullptr;
1539 if (L->isLoopInvariant(Op0))
1540 Def = Op1;
1541 else if (L->isLoopInvariant(Op1))
1542 Def = Op0;
1543 return Def;
1544 };
1545
1546 // Look for the instruction that defines the induction via the
1547 // loop backedge.
1548 BasicBlock *Latch = L->getLoopLatch();
1549 if (!Latch)
1550 return false;
1551 Value *Val = PN->getIncomingValueForBlock(Latch);
1552 if (!Val)
1553 return false;
1554
1555 // Follow the def-use chain until the induction phi is reached.
1556 // If on the way we encounter a Value that has the same SCEV Expr as the
1557 // phi node, we can consider the instructions we visit from that point
1558 // as part of the cast-sequence that can be ignored.
1559 bool InCastSequence = false;
1560 auto *Inst = dyn_cast<Instruction>(Val);
1561 while (Val != PN) {
1562 // If we encountered a phi node other than PN, or if we left the loop,
1563 // we bail out.
1564 if (!Inst || !L->contains(Inst)) {
1565 return false;
1566 }
1567 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1568 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1569 InCastSequence = true;
1570 if (InCastSequence) {
1571 // Only the last instruction in the cast sequence is expected to have
1572 // uses outside the induction def-use chain.
1573 if (!CastInsts.empty())
1574 if (!Inst->hasOneUse())
1575 return false;
1576 CastInsts.push_back(Inst);
1577 }
1578 Val = getDef(Val);
1579 if (!Val)
1580 return false;
1581 Inst = dyn_cast<Instruction>(Val);
1582 }
1583
1584 return InCastSequence;
1585}
1586
1589 InductionDescriptor &D, bool Assume) {
1590 Type *PhiTy = Phi->getType();
1591
1592 // Handle integer and pointer inductions variables.
1593 // Now we handle also FP induction but not trying to make a
1594 // recurrent expression from the PHI node in-place.
1595
1596 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1597 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1598 return false;
1599
1600 if (PhiTy->isFloatingPointTy())
1601 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1602
1603 const SCEV *PhiScev = PSE.getSCEV(Phi);
1604 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1605
1606 // We need this expression to be an AddRecExpr.
1607 if (Assume && !AR)
1608 AR = PSE.getAsAddRec(Phi);
1609
1610 if (!AR) {
1611 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1612 return false;
1613 }
1614
1615 // Record any Cast instructions that participate in the induction update
1616 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1617 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1618 // only after enabling Assume with PSCEV, this means we may have encountered
1619 // cast instructions that required adding a runtime check in order to
1620 // guarantee the correctness of the AddRecurrence respresentation of the
1621 // induction.
1622 if (PhiScev != AR && SymbolicPhi) {
1624 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1625 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1626 }
1627
1628 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1629}
1630
1632 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1633 InductionDescriptor &D, const SCEV *Expr,
1634 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1635 Type *PhiTy = Phi->getType();
1636 // isSCEVable returns true for integer and pointer types.
1637 if (!SE->isSCEVable(PhiTy))
1638 return false;
1639
1640 // Check that the PHI is consecutive.
1641 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1642 const SCEV *Step;
1643
1644 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1645 // match, we should treat it as a uniform. Unfortunately, we don't currently
1646 // know how to handled uniform PHIs.
1647 if (!match(PhiScev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
1648 m_SpecificLoop(TheLoop)))) {
1649 LLVM_DEBUG(
1650 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1651 return false;
1652 }
1653
1654 // This function assumes that InductionPhi is called only on Phi nodes
1655 // present inside loop headers. Check for the same, and throw an assert if
1656 // the current Phi is not present inside the loop header.
1657 assert(Phi->getParent() == TheLoop->getHeader() &&
1658 "Invalid Phi node, not present in loop header");
1659
1660 if (!TheLoop->getLoopPreheader())
1661 return false;
1662
1663 Value *StartValue =
1664 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
1665
1666 BasicBlock *Latch = TheLoop->getLoopLatch();
1667 if (!Latch)
1668 return false;
1669
1670 if (PhiTy->isIntegerTy()) {
1671 BinaryOperator *BOp =
1672 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1673 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1674 CastsToIgnore);
1675 return true;
1676 }
1677
1678 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1679
1680 // This allows induction variables w/non-constant steps.
1681 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1682 return true;
1683}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static std::optional< FastMathFlags > hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK)
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi, SmallPtrSetImpl< Instruction * > &ReductionInstrs)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static FastMathFlags collectMinMaxFMF(Value *V)
static RecurrenceDescriptor getMinMaxRecurrence(PHINode *Phi, Loop *TheLoop, ScalarEvolution *SE)
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define I(x, y, z)
Definition MD5.cpp:57
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Class for arbitrary precision integers.
Definition APInt.h:78
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
bool hasNoSignedZeros() const
Test if this operation can ignore the sign of zero.
Definition Operator.h:312
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:67
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool isSubRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is for a sub operation.
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isFindRecurrenceKind(RecurKind Kind)
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI InstDesc isFindPattern(Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:146
bool use_empty() const
Definition Value.h:346
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ FAddChainWithSubs
A chain of fadds and fsubs.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?