LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
23#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/InstrTypes.h"
31#include "llvm/IR/MDBuilder.h"
32#include "llvm/Support/Debug.h"
35
36#define DEBUG_TYPE "vplan"
37
38using namespace llvm;
39using namespace VPlanPatternMatch;
40
41namespace {
42// Class that is used to build the plain CFG for the incoming IR.
43class PlainCFGBuilder {
44 // The outermost loop of the input loop nest considered for vectorization.
45 Loop *TheLoop;
46
47 // Loop Info analysis.
48 LoopInfo *LI;
49
50 // Loop versioning for alias metadata.
51 LoopVersioning *LVer;
52
53 // Vectorization plan that we are working on.
54 std::unique_ptr<VPlan> Plan;
55
56 // Builder of the VPlan instruction-level representation.
57 VPBuilder VPIRBuilder;
58
59 // NOTE: The following maps are intentionally destroyed after the plain CFG
60 // construction because subsequent VPlan-to-VPlan transformation may
61 // invalidate them.
62 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
64 // Map incoming Value definitions to their newly-created VPValues.
65 DenseMap<Value *, VPValue *> IRDef2VPValue;
66
67 // Hold phi node's that need to be fixed once the plain CFG has been built.
69
70 // Utility functions.
71 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
72 void fixHeaderPhis();
73 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
74#ifndef NDEBUG
75 bool isExternalDef(Value *Val);
76#endif
77 VPValue *getOrCreateVPOperand(Value *IRVal);
78 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
79
80public:
81 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
82 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
83
84 /// Build plain CFG for TheLoop and connect it to Plan's entry.
85 std::unique_ptr<VPlan> buildPlainCFG();
86};
87} // anonymous namespace
88
89// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
90// must have no predecessors.
91void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
92 // Collect VPBB predecessors.
94 for (BasicBlock *Pred : predecessors(BB))
95 VPBBPreds.push_back(getOrCreateVPBB(Pred));
96 VPBB->setPredecessors(VPBBPreds);
97}
98
99static bool isHeaderBB(BasicBlock *BB, Loop *L) {
100 return L && BB == L->getHeader();
101}
102
103// Add operands to VPInstructions representing phi nodes from the input IR.
104void PlainCFGBuilder::fixHeaderPhis() {
105 for (auto *Phi : PhisToFix) {
106 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
107 VPValue *VPVal = IRDef2VPValue[Phi];
108 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
109 auto *PhiR = cast<VPPhi>(VPVal);
110 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
111 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
112 "Expected Phi in header block.");
113 assert(Phi->getNumOperands() == 2 &&
114 "header phi must have exactly 2 operands");
115 for (BasicBlock *Pred : predecessors(Phi->getParent()))
116 PhiR->addOperand(
117 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
118 }
119}
120
121// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
122// existing one if it was already created.
123VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
124 if (auto *VPBB = BB2VPBB.lookup(BB)) {
125 // Retrieve existing VPBB.
126 return VPBB;
127 }
128
129 // Create new VPBB.
130 StringRef Name = BB->getName();
131 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
132 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
133 BB2VPBB[BB] = VPBB;
134 return VPBB;
135}
136
137#ifndef NDEBUG
138// Return true if \p Val is considered an external definition. An external
139// definition is either:
140// 1. A Value that is not an Instruction. This will be refined in the future.
141// 2. An Instruction that is outside of the IR region represented in VPlan,
142// i.e., is not part of the loop nest.
143bool PlainCFGBuilder::isExternalDef(Value *Val) {
144 // All the Values that are not Instructions are considered external
145 // definitions for now.
147 if (!Inst)
148 return true;
149
150 // Check whether Instruction definition is in loop body.
151 return !TheLoop->contains(Inst);
152}
153#endif
154
155// Create a new VPValue or retrieve an existing one for the Instruction's
156// operand \p IRVal. This function must only be used to create/retrieve VPValues
157// for *Instruction's operands* and not to create regular VPInstruction's. For
158// the latter, please, look at 'createVPInstructionsForVPBB'.
159VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
160 auto VPValIt = IRDef2VPValue.find(IRVal);
161 if (VPValIt != IRDef2VPValue.end())
162 // Operand has an associated VPInstruction or VPValue that was previously
163 // created.
164 return VPValIt->second;
165
166 // Operand doesn't have a previously created VPInstruction/VPValue. This
167 // means that operand is:
168 // A) a definition external to VPlan,
169 // B) any other Value without specific representation in VPlan.
170 // For now, we use VPValue to represent A and B and classify both as external
171 // definitions. We may introduce specific VPValue subclasses for them in the
172 // future.
173 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
174
175 // A and B: Create VPValue and add it to the pool of external definitions and
176 // to the Value->VPValue map.
177 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
178 IRDef2VPValue[IRVal] = NewVPVal;
179 return NewVPVal;
180}
181
182// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
183// counterpart. This function must be invoked in RPO so that the operands of a
184// VPInstruction in \p BB have been visited before (except for Phi nodes).
185void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
186 BasicBlock *BB) {
187 VPIRBuilder.setInsertPoint(VPBB);
188 // TODO: Model and preserve debug intrinsics in VPlan.
189 for (Instruction &InstRef : *BB) {
190 Instruction *Inst = &InstRef;
191
192 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
193 // visited Inst when we shouldn't, breaking the RPO traversal order.
194 assert(!IRDef2VPValue.count(Inst) &&
195 "Instruction shouldn't have been visited.");
196
197 if (isa<UncondBrInst>(Inst))
198 // Skip the rest of the Instruction processing for Branch instructions.
199 continue;
200
201 if (auto *Br = dyn_cast<CondBrInst>(Inst)) {
202 // Conditional branch instruction are represented using BranchOnCond
203 // recipes.
204 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
205 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
206 VPIRMetadata(*Inst), Inst->getDebugLoc());
207 continue;
208 }
209
210 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
211 // Don't emit recipes for unconditional switch instructions.
212 if (SI->getNumCases() == 0)
213 continue;
214 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
215 for (auto Case : SI->cases())
216 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
217 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
218 VPIRMetadata(*Inst), Inst->getDebugLoc());
219 continue;
220 }
221
222 VPSingleDefRecipe *NewR;
223 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
224 // Phi node's operands may not have been visited at this point. We create
225 // an empty VPInstruction that we will fix once the whole plain CFG has
226 // been built.
227 NewR =
228 VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi", *Phi);
229 NewR->setUnderlyingValue(Phi);
230 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
231 // Header phis need to be fixed after the VPBB for the latch has been
232 // created.
233 PhisToFix.push_back(Phi);
234 } else {
235 // Add operands for VPPhi in the order matching its predecessors in
236 // VPlan.
237 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
238 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
239 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
240 getOrCreateVPOperand(Phi->getIncomingValue(I));
241 }
242 for (VPBlockBase *Pred : VPBB->getPredecessors())
243 NewR->addOperand(
244 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
245 }
246 } else {
247 // Build VPIRMetadata from the instruction and add loop versioning
248 // metadata for loads and stores.
249 VPIRMetadata MD(*Inst);
250 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
251 const auto &[AliasScopeMD, NoAliasMD] =
252 LVer->getNoAliasMetadataFor(Inst);
253 if (AliasScopeMD)
254 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
255 if (NoAliasMD)
256 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
257 }
258
259 // Translate LLVM-IR operands into VPValue operands and set them in the
260 // new VPInstruction.
261 SmallVector<VPValue *, 4> VPOperands;
262 for (Value *Op : Inst->operands())
263 VPOperands.push_back(getOrCreateVPOperand(Op));
264
265 if (auto *CI = dyn_cast<CastInst>(Inst)) {
266 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
267 CI->getType(), CI->getDebugLoc(),
268 VPIRFlags(*CI), MD);
269 NewR->setUnderlyingValue(CI);
270 } else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
271 NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
272 LI->getDebugLoc(), MD);
273 NewR->setUnderlyingValue(LI);
274 } else {
275 // Build VPInstruction for any arbitrary Instruction without specific
276 // representation in VPlan.
277 NewR =
278 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
279 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
280 }
281 }
282
283 IRDef2VPValue[Inst] = NewR;
284 }
285}
286
287// Main interface to build the plain CFG.
288std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
289 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
290 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
291 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
292 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
293
294 // 1. Scan the body of the loop in a topological order to visit each basic
295 // block after having visited its predecessor basic blocks. Create a VPBB for
296 // each BB and link it to its successor and predecessor VPBBs. Note that
297 // predecessors must be set in the same order as they are in the incomming IR.
298 // Otherwise, there might be problems with existing phi nodes and algorithm
299 // based on predecessors traversal.
300
301 // Loop PH needs to be explicitly visited since it's not taken into account by
302 // LoopBlocksDFS.
303 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
304 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
305 "Unexpected loop preheader");
306 for (auto &I : *ThePreheaderBB) {
307 if (I.getType()->isVoidTy())
308 continue;
309 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
310 }
311
312 LoopBlocksRPO RPO(TheLoop);
313 RPO.perform(LI);
314
315 for (BasicBlock *BB : RPO) {
316 // Create or retrieve the VPBasicBlock for this BB.
317 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
318 // Set VPBB predecessors in the same order as they are in the incoming BB.
319 setVPBBPredsFromBB(VPBB, BB);
320
321 // Create VPInstructions for BB.
322 createVPInstructionsForVPBB(VPBB, BB);
323
324 // Set VPBB successors. We create empty VPBBs for successors if they don't
325 // exist already. Recipes will be created when the successor is visited
326 // during the RPO traversal.
327 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
329 getOrCreateVPBB(SI->getDefaultDest())};
330 for (auto Case : SI->cases())
331 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
332 VPBB->setSuccessors(Succs);
333 continue;
334 }
335 if (auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator())) {
336 VPBB->setOneSuccessor(getOrCreateVPBB(BI->getSuccessor()));
337 continue;
338 }
339 auto *BI = cast<CondBrInst>(BB->getTerminator());
340 BasicBlock *IRSucc0 = BI->getSuccessor(0);
341 BasicBlock *IRSucc1 = BI->getSuccessor(1);
342 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
343 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
344 VPBB->setTwoSuccessors(Successor0, Successor1);
345 }
346
347 for (auto *EB : Plan->getExitBlocks())
348 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
349
350 // 2. The whole CFG has been built at this point so all the input Values must
351 // have a VPlan counterpart. Fix VPlan header phi by adding their
352 // corresponding VPlan operands.
353 fixHeaderPhis();
354
355 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
356 Plan->getEntry()->setPlan(&*Plan);
357
358 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
359 // VPIRInstructions wrapping them.
360 // // Note that the operand order corresponds to IR predecessor order, and may
361 // need adjusting when VPlan predecessors are added, if an exit block has
362 // multiple predecessor.
363 for (auto *EB : Plan->getExitBlocks()) {
364 for (VPRecipeBase &R : EB->phis()) {
365 auto *PhiR = cast<VPIRPhi>(&R);
366 PHINode &Phi = PhiR->getIRPhi();
367 assert(PhiR->getNumOperands() == 0 &&
368 "no phi operands should be added yet");
369 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
370 PhiR->addOperand(
371 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
372 }
373 }
374
375 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
376 return std::move(Plan);
377}
378
379/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
380/// has exactly 2 predecessors (preheader and latch), where the block
381/// dominates the latch and the preheader dominates the block. If it is a
382/// header block return true and canonicalize the predecessors of the header
383/// (making sure the preheader appears first and the latch second) and the
384/// successors of the latch (making sure the loop exit comes first). Otherwise
385/// return false.
387 const VPDominatorTree &VPDT) {
388 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
389 if (Preds.size() != 2)
390 return false;
391
392 auto *PreheaderVPBB = Preds[0];
393 auto *LatchVPBB = Preds[1];
394 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
395 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
396 std::swap(PreheaderVPBB, LatchVPBB);
397
398 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
399 !VPDT.dominates(HeaderVPB, LatchVPBB))
400 return false;
401
402 // Canonicalize predecessors of header so that preheader is first and
403 // latch second.
404 HeaderVPB->swapPredecessors();
405 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
406 R.swapOperands();
407 }
408
409 // The two successors of conditional branch match the condition, with the
410 // first successor corresponding to true and the second to false. We
411 // canonicalize the successors of the latch when introducing the region, such
412 // that the latch exits the region when its condition is true; invert the
413 // original condition if the original CFG branches to the header on true.
414 // Note that the exit edge is not yet connected for top-level loops.
415 if (LatchVPBB->getSingleSuccessor() ||
416 LatchVPBB->getSuccessors()[0] != HeaderVPB)
417 return true;
418
419 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
420 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
423 "terminator must be a BranchOnCond");
424 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
425 Not->insertBefore(Term);
426 Term->setOperand(0, Not);
427 LatchVPBB->swapSuccessors();
428
429 return true;
430}
431
432/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
433static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
434 // Get type info and debug location from the scalar phi corresponding to the
435 // canonical IV of the outermost (to be vectorized) loop. Only the outermost
436 // header will have a canonical IV. Other, nested loops are assigned a
437 // canonical IV of null type and debug location.
438 Type *CanIVTy = nullptr;
440 auto *OutermostHeaderVPBB = cast<VPBasicBlock>(
441 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
442 VPPhi *OutermostVPPhi = nullptr;
443 if (HeaderVPB == OutermostHeaderVPBB) {
444 OutermostVPPhi = cast<VPPhi>(&OutermostHeaderVPBB->front());
445 CanIVTy = OutermostVPPhi->getOperand(0)->getLiveInIRValue()->getType();
446 DL = OutermostVPPhi->getDebugLoc();
447 }
448
449 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
450 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
451
452 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
453 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
454
455 // Create an empty region first and insert it between PreheaderVPBB and
456 // the exit blocks, taking care to preserve the original predecessor &
457 // successor order of blocks. Set region entry and exiting after both
458 // HeaderVPB and LatchVPBB have been disconnected from their
459 // predecessors/successors.
460 auto *R = Plan.createLoopRegion(CanIVTy, DL);
461
462 // Transfer latch's successors to the region.
464
465 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
466 R->setEntry(HeaderVPB);
467 R->setExiting(LatchVPBB);
468
469 // Update canonical IV users for the outermost loop only.
470 if (OutermostVPPhi) {
471 OutermostVPPhi->replaceAllUsesWith(R->getCanonicalIV());
472 OutermostVPPhi->eraseFromParent();
473 }
474
475 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
476 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
477 VPBB->setParent(R);
478}
479
480// Add the necessary canonical IV and branch recipes required to control the
481// loop.
482static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
483 VPBasicBlock *LatchVPBB, Type *IdxTy,
484 DebugLoc DL) {
485 // Add a VPPhi for the canonical IV starting at 0 as first recipe in header.
486 auto *CanonicalIVPHI = new VPPhi(Plan.getConstantInt(IdxTy, 0), {}, DL);
487 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
488
489 // We are about to replace the branch to exit the region. Remove the original
490 // BranchOnCond, if there is any.
491 DebugLoc LatchDL = DL;
492 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
493 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
494 LatchVPBB->getTerminator()->eraseFromParent();
495 }
496
497 VPBuilder Builder(LatchVPBB);
498 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
499 // Initially the induction increment is guaranteed to not wrap, but that may
500 // change later, e.g. when tail-folding, when the flags need to be dropped.
501 auto *CanonicalIVIncrement = Builder.createAdd(
502 CanonicalIVPHI, &Plan.getVFxUF(), DL, "index.next", {true, false});
503 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
504
505 // Add the BranchOnCount VPInstruction to the latch.
506 Builder.createNaryOp(VPInstruction::BranchOnCount,
507 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
508 LatchDL);
509}
510
511/// Creates extracts for values in \p Plan defined in a loop region and used
512/// outside a loop region.
513static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
514 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
515 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
516 if (!is_contained(EB->predecessors(), MiddleVPBB))
517 continue;
518
519 for (VPRecipeBase &R : EB->phis()) {
520 auto *ExitIRI = cast<VPIRPhi>(&R);
521 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
522 if (isa<VPIRValue>(Exiting))
523 continue;
524 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
525 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
526 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
527 }
528 }
529}
530
531/// Return an iterator range to iterate over pairs of matching phi nodes in
532/// \p Header and \p ScalarHeader, skipping the canonical IV in the former.
534 VPBasicBlock *ScalarHeader) {
535 return zip_equal(drop_begin(Header->phis()), ScalarHeader->phis());
536}
537
538static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
539 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
540 VPDominatorTree VPDT(Plan);
541
542 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
543 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
544 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
545
546 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
547 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
548
549 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
550 // The canonical LatchVPBB has the header block as last successor. If it has
551 // another successor, this successor is an exit block - insert middle block on
552 // its edge. Otherwise, add middle block as another successor retaining header
553 // as last.
554 if (LatchVPBB->getNumSuccessors() == 2) {
555 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
556 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
557 } else {
558 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
559 LatchVPBB->swapSuccessors();
560 }
561
562 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
563
564 // Create SCEV and VPValue for the trip count.
565 // We use the symbolic max backedge-taken-count, which works also when
566 // vectorizing loops with uncountable early exits.
567 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
568 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
569 "Invalid backedge-taken count");
570 ScalarEvolution &SE = *PSE.getSE();
571 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
572 InductionTy, TheLoop);
574
575 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
577
578 // The connection order corresponds to the operands of the conditional branch,
579 // with the middle block already connected to the exit block.
580 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
581 // Also connect the entry block to the scalar preheader.
582 // TODO: Also introduce a branch recipe together with the minimum trip count
583 // check.
584 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
585 Plan.getEntry()->swapSuccessors();
586
587 createExtractsForLiveOuts(Plan, MiddleVPBB);
588
589 // Create resume phis in the scalar preheader for each phi in the scalar loop.
590 // Their incoming value from the vector loop will be the last lane of the
591 // corresponding vector loop header phi.
592 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
593 VPBuilder ScalarPHBuilder(ScalarPH);
594 assert(equal(ScalarPH->getPredecessors(),
595 ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
596 "unexpected predecessor order of scalar ph");
597 for (const auto &[PhiR, ScalarPhiR] :
598 getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarHeader())) {
599 auto *VectorPhiR = cast<VPPhi>(&PhiR);
600 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
601 VPValue *ResumeFromVectorLoop =
602 MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
603 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
604 VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
605 // Create scalar resume phi, with the first operand being the incoming value
606 // from the middle block and the second operand coming from the entry block.
607 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
608 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
609 VectorPhiR->getDebugLoc());
610 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
611 }
612}
613
614/// Check \p Plan's live-in and replace them with constants, if they can be
615/// simplified via SCEV.
618 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
619 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
620 if (auto *C = dyn_cast<SCEVConstant>(Expr))
621 return Plan.getOrAddLiveIn(C->getValue());
622 return nullptr;
623 };
624
625 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
626 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
627 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
628 }
629}
630
631/// To make RUN_VPLAN_PASS print initial VPlan.
633
634std::unique_ptr<VPlan>
635VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
637 LoopVersioning *LVer) {
638 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
639 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
640 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
641 simplifyLiveInsWithSCEV(*VPlan0, PSE);
642
644 return VPlan0;
645}
646
647/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
648/// for \p Phi based on \p IndDesc.
649static VPHeaderPHIRecipe *
651 const InductionDescriptor &IndDesc, VPlan &Plan,
652 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
653 DebugLoc DL) {
654 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
655 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
656 "step must be loop invariant");
657 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
658 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
659 SE.getSCEV(IndDesc.getStartValue()) ==
660 vputils::getSCEVExprForVPValue(Start, PSE))) &&
661 "Start VPValue must match IndDesc's start value");
662
663 VPValue *Step =
665
666 VPValue *BackedgeVal = PhiR->getOperand(1);
667 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
668 // recipes. optimizeInductionLiveOutUsers will later compute the proper
669 // DerivedIV.
670 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
671 for (VPUser *U : to_vector(BackedgeVal->users())) {
673 continue;
674 auto *ExtractLastPart = cast<VPInstruction>(U);
675 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
676 assert(ExtractLastPartUser && "must have a single user");
677 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
678 continue;
679 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
680 assert(is_contained(ExtractLastLane->getParent()->successors(),
681 Plan.getScalarPreheader()) &&
682 "last lane must be extracted in the middle block");
683 VPBuilder Builder(ExtractLastLane);
684 ExtractLastLane->replaceAllUsesWith(
685 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
686 ExtractLastLane->eraseFromParent();
687 ExtractLastPart->eraseFromParent();
688 }
689 };
690
692 auto *WideIV = new VPWidenPointerInductionRecipe(
693 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
694 ReplaceExtractsWithExitingIVValue(WideIV);
695 return WideIV;
696 }
697
700 "must have an integer or float induction at this point");
701
702 // Update wide induction increments to use the same step as the corresponding
703 // wide induction. This enables detecting induction increments directly in
704 // VPlan and removes redundant splats.
705 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
706 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
707
708 // It is always safe to copy over the NoWrap and FastMath flags. In
709 // particular, when folding tail by masking, the masked-off lanes are never
710 // used, so it is safe.
712
713 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
714 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
715
716 ReplaceExtractsWithExitingIVValue(WideIV);
717 return WideIV;
718}
719
720/// Try to sink users of \p FOR after \p Previous. \returns true if sinking
721/// succeeded or was not necessary, and false otherwise.
722static bool
724 VPRecipeBase *Previous,
725 VPDominatorTree &VPDT) {
726 // Collect recipes that need sinking.
729 Seen.insert(Previous);
730 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
731 // The previous value must not depend on the users of the recurrence phi.
732 // In that case, FOR is not a fixed order recurrence.
733 if (SinkCandidate == Previous)
734 return false;
735
736 if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
737 !Seen.insert(SinkCandidate).second ||
738 VPDT.properlyDominates(Previous, SinkCandidate))
739 return true;
740
741 if (vputils::cannotHoistOrSinkRecipe(*SinkCandidate, /*Sinking=*/true))
742 return false;
743
744 WorkList.push_back(SinkCandidate);
745 return true;
746 };
747
748 // Recursively sink users of FOR after Previous.
749 WorkList.push_back(FOR);
750 for (unsigned I = 0; I != WorkList.size(); ++I) {
751 VPRecipeBase *Current = WorkList[I];
752 assert(Current->getNumDefinedValues() == 1 &&
753 "only recipes with a single defined value expected");
754
755 for (VPUser *User : Current->getVPSingleValue()->users()) {
756 if (!TryToPushSinkCandidate(cast<VPRecipeBase>(User)))
757 return false;
758 }
759 }
760
761 // Keep recipes to sink ordered by dominance so earlier instructions are
762 // processed first.
763 sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
764 return VPDT.properlyDominates(A, B);
765 });
766
767 for (VPRecipeBase *SinkCandidate : WorkList) {
768 if (SinkCandidate == FOR)
769 continue;
770
771 SinkCandidate->moveAfter(Previous);
772 Previous = SinkCandidate;
773 }
774 return true;
775}
776
777/// Try to hoist \p Previous and its operands before all users of \p FOR.
778/// \returns true if hoisting succeeded or was not necessary, and false
779/// otherwise.
781 VPRecipeBase *Previous,
782 VPDominatorTree &VPDT) {
784 return false;
785
786 // Collect recipes that need hoisting.
787 SmallVector<VPRecipeBase *> HoistCandidates;
789 // Find the closest hoist point by looking at all users of FOR and selecting
790 // the recipe dominating all other users.
791 VPRecipeBase *HoistPoint = nullptr;
792 for (VPUser *U : FOR->users()) {
793 auto *R = cast<VPRecipeBase>(U);
794 if (!HoistPoint || VPDT.properlyDominates(R, HoistPoint))
795 HoistPoint = R;
796 }
797 assert(all_of(FOR->users(),
798 [&VPDT, HoistPoint](VPUser *U) {
799 auto *R = cast<VPRecipeBase>(U);
800 return HoistPoint == R ||
801 VPDT.properlyDominates(HoistPoint, R);
802 }) &&
803 "HoistPoint must dominate all users of FOR");
804
805 auto NeedsHoisting = [HoistPoint, &VPDT,
806 &Visited](VPValue *HoistCandidateV) -> VPRecipeBase * {
807 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
808 if (!HoistCandidate)
809 return nullptr;
810 // Hoist candidate was already visited, no need to hoist.
811 if (!Visited.insert(HoistCandidate).second)
812 return nullptr;
813 // If we reached a recipe that dominates HoistPoint, we don't need to
814 // hoist the recipe.
815 if (VPDT.properlyDominates(HoistCandidate, HoistPoint))
816 return nullptr;
817 return HoistCandidate;
818 };
819
820 if (!NeedsHoisting(Previous->getVPSingleValue()))
821 return true;
822
823 // Recursively try to hoist Previous and its operands before all users of
824 // FOR.
825 HoistCandidates.push_back(Previous);
826
827 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {
828 VPRecipeBase *Current = HoistCandidates[I];
829 assert(Current->getNumDefinedValues() == 1 &&
830 "only recipes with a single defined value expected");
832 return false;
833
834 for (VPValue *Op : Current->operands()) {
835 // If we reach FOR, it means the original Previous depends on some other
836 // recurrence that in turn depends on FOR. If that is the case, we would
837 // also need to hoist recipes involving the other FOR, which may break
838 // dependencies.
839 if (Op == FOR)
840 return false;
841
842 if (auto *R = NeedsHoisting(Op)) {
843 // Bail out if the recipe defines multiple values.
844 // TODO: Hoisting such recipes requires additional handling.
845 if (R->getNumDefinedValues() != 1)
846 return false;
847 HoistCandidates.push_back(R);
848 }
849 }
850 }
851
852 // Order recipes to hoist by dominance so earlier instructions are processed
853 // first.
854 sort(HoistCandidates, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
855 return VPDT.properlyDominates(A, B);
856 });
857
858 for (VPRecipeBase *HoistCandidate : HoistCandidates) {
859 HoistCandidate->moveBefore(*HoistPoint->getParent(),
860 HoistPoint->getIterator());
861 }
862
863 return true;
864}
865
866/// Sink users of fixed-order recurrences past or hoist before the recipe
867/// defining the previous value, introduce FirstOrderRecurrenceSplice
868/// VPInstructions, and replace FOR uses. Returns false if hoisting or sinking
869/// fails.
871 VPDominatorTree &VPDT) {
872 for (VPRecipeBase &R : HeaderVPBB->phis()) {
874 if (!FOR)
875 continue;
876
877 // Follow through FOR phi chains to find the actual Previous recipe.
878 // Fixed-order recurrences do not contain cycles, so this loop is
879 // guaranteed to terminate.
881 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
882 while (auto *PrevPhi =
884 assert(PrevPhi->getParent() == FOR->getParent() &&
885 "PrevPhi must be in same block as FOR");
886 assert(SeenPhis.insert(PrevPhi).second &&
887 "PrevPhi must not be visited multiple times");
888 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
889 }
890
891 assert(Previous && "Previous must be a recipe");
892 // Sink FOR users after Previous or hoist Previous before FOR users.
893 if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT) &&
894 !hoistPreviousBeforeFORUsers(FOR, Previous, VPDT))
895 return false;
896
897 // Create FirstOrderRecurrenceSplice and replace FOR uses.
898 VPBasicBlock *InsertBlock = Previous->getParent();
899 auto InsertPt = isa<VPHeaderPHIRecipe>(Previous)
900 ? InsertBlock->getFirstNonPhi()
901 : std::next(Previous->getIterator());
902 VPBuilder LoopBuilder(InsertBlock, InsertPt);
903 auto *RecurSplice =
905 {FOR, FOR->getBackedgeValue()});
906 FOR->replaceUsesWithIf(RecurSplice, [RecurSplice](VPUser &U, unsigned) {
907 return &U != RecurSplice;
908 });
909 }
910
911 return true;
912}
913
915 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
918 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
919 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
920 // Retrieve the header manually from the intial plain-CFG VPlan.
921 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
922 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
923 VPDominatorTree VPDT(Plan);
924 assert(VPDT.dominates(HeaderVPBB, HeaderVPBB->getPredecessors()[1]) &&
925 "header must dominate its latch");
926
927 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
928 // TODO: Gradually replace uses of underlying instruction by analyses on
929 // VPlan.
930 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
931 assert(PhiR->getNumOperands() == 2 &&
932 "Must have 2 operands for header phis");
933
934 // Extract common values once.
935 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
936 VPValue *BackedgeValue = PhiR->getOperand(1);
937
938 if (FixedOrderRecurrences.contains(Phi)) {
939 // TODO: Currently fixed-order recurrences are modeled as chains of
940 // first-order recurrences. If there are no users of the intermediate
941 // recurrences in the chain, the fixed order recurrence should be
942 // modeled directly, enabling more efficient codegen.
943 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
944 }
945
946 auto InductionIt = Inductions.find(Phi);
947 if (InductionIt != Inductions.end())
948 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
949 Plan, PSE, OrigLoop,
950 PhiR->getDebugLoc());
951
952 assert(Reductions.contains(Phi) && "only reductions are expected now");
953 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
955 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
956 "incoming value must match start value");
957 // Will be updated later to >1 if reduction is partial.
958 unsigned ScaleFactor = 1;
959 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
960 return new VPReductionPHIRecipe(
961 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
962 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
963 ScaleFactor),
964 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
965 : VPIRFlags(),
967 };
968
969 for (VPRecipeBase &R : make_early_inc_range(drop_begin(HeaderVPBB->phis()))) {
970 auto *PhiR = cast<VPPhi>(&R);
971 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
972 HeaderPhiR->insertBefore(PhiR);
973 PhiR->replaceAllUsesWith(HeaderPhiR);
974 PhiR->eraseFromParent();
975 }
976
977 if (!tryToSinkOrHoistRecurrenceUsers(HeaderVPBB, VPDT))
978 return false;
979
980 for (const auto &[HeaderPhiR, ScalarPhiR] :
982 auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
983 if (isa<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
984 ResumePhiR->setName("scalar.recur.init");
985 auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
986 ExtractLastLane->setName("vector.recur.extract");
987 continue;
988 }
989 ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
990 ? "bc.resume.val"
991 : "bc.merge.rdx");
992 }
993 return true;
994}
995
997 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
998 ElementCount MinVF) {
999 VPTypeAnalysis TypeInfo(Plan);
1002
1003 for (VPRecipeBase &R : Header->phis()) {
1004 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
1005 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
1006 continue;
1007
1008 RecurKind Kind = PhiR->getRecurrenceKind();
1012 "AnyOf and Find reductions are not allowed for in-loop reductions");
1013
1014 bool IsFPRecurrence =
1016 FastMathFlags FMFs =
1017 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
1018
1019 // Collect the chain of "link" recipes for the reduction starting at PhiR.
1021 Worklist.insert(PhiR);
1022 for (unsigned I = 0; I != Worklist.size(); ++I) {
1023 VPSingleDefRecipe *Cur = Worklist[I];
1024 for (VPUser *U : Cur->users()) {
1025 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
1026 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
1027 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
1028 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
1029 "U must be either in the loop region, the middle block or the "
1030 "scalar preheader.");
1031 continue;
1032 }
1033
1034 // Stores using instructions will be sunk later.
1035 if (match(UserRecipe, m_VPInstruction<Instruction::Store>()))
1036 continue;
1037 Worklist.insert(UserRecipe);
1038 }
1039 }
1040
1041 // Visit operation "Links" along the reduction chain top-down starting from
1042 // the phi until LoopExitValue. We keep track of the previous item
1043 // (PreviousLink) to tell which of the two operands of a Link will remain
1044 // scalar and which will be reduced. For minmax by select(cmp), Link will be
1045 // the select instructions. Blend recipes of in-loop reduction phi's will
1046 // get folded to their non-phi operand, as the reduction recipe handles the
1047 // condition directly.
1048 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
1049 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
1050 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
1051 assert(Blend->getNumIncomingValues() == 2 &&
1052 "Blend must have 2 incoming values");
1053 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
1054 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
1055 "PhiR must be an operand of the blend");
1056 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
1057 continue;
1058 }
1059
1060 if (IsFPRecurrence) {
1061 FastMathFlags CurFMF =
1062 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
1063 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
1064 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
1065 ->getFastMathFlags();
1066 FMFs &= CurFMF;
1067 }
1068
1069 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
1070
1071 // Recognize a call to the llvm.fmuladd intrinsic.
1072 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
1073 VPValue *VecOp;
1074 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
1075 if (IsFMulAdd) {
1077 "Expected current VPInstruction to be a call to the "
1078 "llvm.fmuladd intrinsic");
1079 assert(CurrentLink->getOperand(2) == PreviousLink &&
1080 "expected a call where the previous link is the added operand");
1081
1082 // If the instruction is a call to the llvm.fmuladd intrinsic then we
1083 // need to create an fmul recipe (multiplying the first two operands of
1084 // the fmuladd together) to use as the vector operand for the fadd
1085 // reduction.
1086 auto *FMulRecipe = new VPInstruction(
1087 Instruction::FMul,
1088 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
1089 CurrentLinkI->getFastMathFlags());
1090 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
1091 VecOp = FMulRecipe;
1092 } else if (Kind == RecurKind::AddChainWithSubs &&
1093 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
1094 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
1095 auto *Zero = Plan.getConstantInt(PhiTy, 0);
1096 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
1097 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
1098 CurrentLinkI->getDebugLoc());
1099 Sub->setUnderlyingValue(CurrentLinkI);
1100 VecOp = Sub;
1101 } else {
1102 // Index of the first operand which holds a non-mask vector operand.
1103 unsigned IndexOfFirstOperand = 0;
1105 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
1106 continue;
1107 assert(match(CurrentLink,
1109 "must be a select recipe");
1110 IndexOfFirstOperand = 1;
1111 }
1112 // Note that for non-commutable operands (cmp-selects), the semantics of
1113 // the cmp-select are captured in the recurrence kind.
1114 unsigned VecOpId =
1115 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
1116 ? IndexOfFirstOperand + 1
1117 : IndexOfFirstOperand;
1118 VecOp = CurrentLink->getOperand(VecOpId);
1119 assert(
1120 VecOp != PreviousLink &&
1121 CurrentLink->getOperand(
1122 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
1123 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
1124 "PreviousLink must be the operand other than VecOp");
1125 }
1126
1127 // Get block mask from CurrentLink, if it needs predication.
1128 VPValue *CondOp = nullptr;
1129 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
1130 CondOp = cast<VPInstruction>(CurrentLink)->getMask();
1131
1132 assert(PhiR->getVFScaleFactor() == 1 &&
1133 "inloop reductions must be unscaled");
1134 auto *RedRecipe = new VPReductionRecipe(
1135 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
1136 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
1137 CurrentLinkI->getDebugLoc());
1138 // Append the recipe to the end of the VPBasicBlock because we need to
1139 // ensure that it comes after all of it's inputs, including CondOp.
1140 // Delete CurrentLink as it will be invalid if its operand is replaced
1141 // with a reduction defined at the bottom of the block in the next link.
1142 if (LinkVPBB->getNumSuccessors() == 0)
1143 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
1144 else
1145 LinkVPBB->appendRecipe(RedRecipe);
1146
1147 CurrentLink->replaceAllUsesWith(RedRecipe);
1148 // Move any store recipes using the RedRecipe that appear before it in the
1149 // same block to just after the RedRecipe.
1150 for (VPUser *U : make_early_inc_range(RedRecipe->users())) {
1151 auto *UserR = dyn_cast<VPRecipeBase>(U);
1152 if (!UserR || UserR->getParent() != LinkVPBB)
1153 continue;
1155 continue;
1156 UserR->moveAfter(RedRecipe);
1157 }
1158 ToDelete.push_back(CurrentLink);
1159 PreviousLink = RedRecipe;
1160 }
1161 }
1162
1163 for (VPRecipeBase *R : ToDelete)
1164 R->eraseFromParent();
1165}
1166
1167/// Check if all loads in the loop are dereferenceable. Iterates over all blocks
1168/// reachable from \p HeaderVPBB, skipping \p MiddleVPBB. Returns false if any
1169/// non-dereferenceable load is found.
1171 VPBasicBlock *MiddleVPBB, Loop *TheLoop,
1173 DominatorTree &DT, AssumptionCache *AC) {
1174 ScalarEvolution &SE = *PSE.getSE();
1175 const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
1177 vp_depth_first_shallow(HeaderVPBB))) {
1178 // Skip blocks outside the loop (exit blocks and their successors).
1179 if (VPBB == MiddleVPBB || isa<VPIRBasicBlock>(VPBB))
1180 continue;
1181 for (VPRecipeBase &R : *VPBB) {
1182 auto *VPI = dyn_cast<VPInstructionWithType>(&R);
1183 if (!VPI || VPI->getOpcode() != Instruction::Load) {
1184 assert(!R.mayReadFromMemory() && "unexpected recipe reading memory");
1185 continue;
1186 }
1187
1188 // Get the pointer SCEV for dereferenceability checking.
1189 VPValue *Ptr = VPI->getOperand(0);
1190 const SCEV *PtrSCEV = vputils::getSCEVExprForVPValue(Ptr, PSE, TheLoop);
1191 if (isa<SCEVCouldNotCompute>(PtrSCEV)) {
1192 LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Found non-dereferenceable "
1193 "load with SCEVCouldNotCompute pointer\n");
1194 return false;
1195 }
1196
1197 // Check dereferenceability using the SCEV-based version.
1198 Type *LoadTy = VPI->getResultType();
1199 const SCEV *SizeSCEV =
1200 SE.getStoreSizeOfExpr(DL.getIndexType(PtrSCEV->getType()), LoadTy);
1201 auto *Load = cast<LoadInst>(VPI->getUnderlyingValue());
1203 if (isDereferenceableAndAlignedInLoop(PtrSCEV, Load->getAlign(), SizeSCEV,
1204 TheLoop, SE, DT, AC, &Preds))
1205 continue;
1206
1207 LLVM_DEBUG(
1208 dbgs() << "LV: Not vectorizing: Auto-vectorization of loops with "
1209 "potentially faulting load is not supported.\n");
1210 return false;
1211 }
1212 }
1213 return true;
1214}
1215
1217 Loop *TheLoop,
1219 DominatorTree &DT, AssumptionCache *AC) {
1220 auto *MiddleVPBB = cast<VPBasicBlock>(
1222 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1223 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
1224
1225 // TODO: We would like to detect uncountable exits and stores within loops
1226 // with such exits from the VPlan alone. Exit detection can be moved
1227 // here from handleUncountableEarlyExits, but we need to improve
1228 // detection of recipes which may write to memory.
1230 if (!areAllLoadsDereferenceable(HeaderVPBB, MiddleVPBB, TheLoop, PSE, DT,
1231 AC))
1232 return false;
1233 // TODO: Check target preference for style.
1234 handleUncountableEarlyExits(Plan, HeaderVPBB, LatchVPBB, MiddleVPBB, Style);
1235 return true;
1236 }
1237
1238 // Disconnect countable early exits from the loop, leaving it with a single
1239 // exit from the latch. Countable early exits are left for a scalar epilog.
1240 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
1241 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
1242 if (Pred == MiddleVPBB)
1243 continue;
1244
1245 // Remove phi operands for the early exiting block.
1246 for (VPRecipeBase &R : EB->phis())
1247 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
1248 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
1249 EarlyExitingVPBB->getTerminator()->eraseFromParent();
1251 }
1252 }
1253 return true;
1254}
1255
1256void VPlanTransforms::addMiddleCheck(VPlan &Plan, bool TailFolded) {
1257 auto *MiddleVPBB = cast<VPBasicBlock>(
1259 // If MiddleVPBB has a single successor then the original loop does not exit
1260 // via the latch and the single successor must be the scalar preheader.
1261 // There's no need to add a runtime check to MiddleVPBB.
1262 if (MiddleVPBB->getNumSuccessors() == 1) {
1263 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
1264 "must have ScalarPH as single successor");
1265 return;
1266 }
1267
1268 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
1269
1270 // Add a check in the middle block to see if we have completed all of the
1271 // iterations in the first vector loop.
1272 //
1273 // Three cases:
1274 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
1275 // condition to false.
1276 // 2) If (N - N%VF) == N, then we *don't* need to run the
1277 // remainder. Thus if tail is to be folded, we know we don't need to run
1278 // the remainder and we can set the condition to true.
1279 // 3) Otherwise, construct a runtime check.
1280
1281 // We use the same DebugLoc as the scalar loop latch terminator instead of
1282 // the corresponding compare because they may have ended up with different
1283 // line numbers and we want to avoid awkward line stepping while debugging.
1284 // E.g., if the compare has got a line number inside the loop.
1285 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1286 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1287 VPBuilder Builder(MiddleVPBB);
1288 VPValue *Cmp;
1289 if (TailFolded)
1290 Cmp = Plan.getTrue();
1291 else
1292 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
1293 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
1294 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
1295}
1296
1298 VPDominatorTree VPDT(Plan);
1300 Plan.getEntry());
1301 for (VPBlockBase *HeaderVPB : POT)
1302 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
1303 createLoopRegion(Plan, HeaderVPB);
1304
1305 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1306 TopRegion->setName("vector loop");
1307 TopRegion->getEntryBasicBlock()->setName("vector.body");
1308}
1309
1311 assert(Plan.getExitBlocks().size() == 1 &&
1312 "only a single-exit block is supported currently");
1313 assert(Plan.getExitBlocks().front()->getSinglePredecessor() ==
1314 Plan.getMiddleBlock() &&
1315 "the exit block must have middle block as single predecessor");
1316
1317 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1318 assert(LoopRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
1319 "The vector loop region must have the middle block as its single "
1320 "successor for now");
1321 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1322
1323 Header->splitAt(Header->getFirstNonPhi());
1324
1325 // Create the header mask, insert it in the header and branch on it.
1326 auto *IV =
1327 new VPWidenCanonicalIVRecipe(Header->getParent()->getCanonicalIV());
1328 VPBuilder Builder(Header, Header->getFirstNonPhi());
1329 Builder.insert(IV);
1331 VPValue *HeaderMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1332 Builder.createNaryOp(VPInstruction::BranchOnCond, HeaderMask);
1333
1334 VPBasicBlock *OrigLatch = LoopRegion->getExitingBasicBlock();
1335 VPValue *IVInc;
1336 [[maybe_unused]] bool TermBranchOnCount =
1337 match(OrigLatch->getTerminator(),
1339 m_Specific(&Plan.getVectorTripCount())));
1340 assert(TermBranchOnCount &&
1341 match(IVInc, m_Add(m_Specific(LoopRegion->getCanonicalIV()),
1342 m_Specific(&Plan.getVFxUF()))) &&
1343 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1344 OrigLatch->getTerminator()->getIterator() &&
1345 "Unexpected canonical iv increment");
1346
1347 // Split the latch at the IV update, and branch to it from the header mask.
1348 VPBasicBlock *Latch =
1349 OrigLatch->splitAt(IVInc->getDefiningRecipe()->getIterator());
1350 Latch->setName("vector.latch");
1351 VPBlockUtils::connectBlocks(Header, Latch);
1352
1353 // Collect any values defined in the loop that need a phi. Currently this
1354 // includes header phi backedges and live-outs extracted in the middle block.
1355 // TODO: Handle early exits via Plan.getExitBlocks()
1357 for (VPRecipeBase &R : Header->phis())
1359 NeedsPhi[cast<VPHeaderPHIRecipe>(R).getBackedgeValue()].push_back(&R);
1360
1361 VPValue *V;
1362 for (VPRecipeBase &R : *Plan.getMiddleBlock())
1363 if (match(&R, m_ExtractLastPart(m_VPValue(V))))
1364 NeedsPhi[V].push_back(&R);
1365
1366 // Insert phis with a poison incoming value for past the end of the tail.
1367 Builder.setInsertPoint(Latch, Latch->begin());
1368 VPTypeAnalysis TypeInfo(Plan);
1369 for (const auto &[V, Users] : NeedsPhi) {
1370 if (isa<VPIRValue>(V))
1371 continue;
1372 // TODO: For reduction phis, use phi value instead of poison so we can
1373 // remove the special casing for tail folding in
1374 // LoopVectorizationPlanner::addReductionResultComputation
1375 VPValue *Poison =
1377 VPInstruction *Phi = Builder.createScalarPhi({V, Poison});
1378 for (VPUser *U : Users)
1379 U->replaceUsesOfWith(V, Phi);
1380 }
1381
1382 // Any extract of the last element must be updated to extract from the last
1383 // active lane of the header mask instead (i.e., the lane corresponding to the
1384 // last active iteration).
1385 Builder.setInsertPoint(Plan.getMiddleBlock()->getTerminator());
1386 for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
1387 VPValue *Op;
1389 continue;
1390
1391 // Compute the index of the last active lane.
1392 VPValue *LastActiveLane =
1393 Builder.createNaryOp(VPInstruction::LastActiveLane, HeaderMask);
1394 auto *Ext =
1395 Builder.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
1396 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1397 }
1398}
1399
1400/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
1401/// connecting it to both vector and scalar preheaders. Updates scalar
1402/// preheader phis to account for the new predecessor.
1404 VPBasicBlock *CheckBlockVPBB) {
1405 VPBlockBase *VectorPH = Plan.getVectorPreheader();
1406 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
1407 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
1408 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
1409 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1410 CheckBlockVPBB->swapSuccessors();
1411 unsigned NumPreds = ScalarPH->getNumPredecessors();
1412 for (VPRecipeBase &R : ScalarPH->phis()) {
1413 auto *Phi = cast<VPPhi>(&R);
1414 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1415 "must have incoming values for all predecessors");
1416 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1417 }
1418}
1419
1420// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1421// including memory overlap checks block and wrapping/unit-stride checks block.
1422static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1423
1424/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1425/// branch weights.
1426static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1427 VPValue *Cond, bool AddBranchWeights) {
1429 auto *Term = VPBuilder(CheckBlockVPBB)
1431 if (AddBranchWeights) {
1432 MDBuilder MDB(Plan.getContext());
1433 MDNode *BranchWeights =
1434 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1435 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1436 }
1437}
1438
1440 BasicBlock *CheckBlock,
1441 bool AddBranchWeights) {
1442 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1443 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1444 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1445 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1446}
1447
1448/// Return an insert point in \p EntryVPBB after existing VPIRPhi,
1449/// VPIRInstruction and VPExpandSCEVRecipe recipes.
1451 auto InsertPt = EntryVPBB->begin();
1452 while (InsertPt != EntryVPBB->end() &&
1454 ++InsertPt;
1455 return InsertPt;
1456}
1457
1459 VPlan &Plan, ElementCount VF, unsigned UF,
1460 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1461 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1463 // Generate code to check if the loop's trip count is less than VF * UF, or
1464 // equal to it in case a scalar epilogue is required; this implies that the
1465 // vector trip count is zero. This check also covers the case where adding one
1466 // to the backedge-taken count overflowed leading to an incorrect trip count
1467 // of zero. In this case we will also jump to the scalar loop.
1468 CmpInst::Predicate CmpPred =
1469 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1470 // If tail is to be folded, vector loop takes care of all iterations.
1471 VPValue *TripCountVPV = Plan.getTripCount();
1472 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1473 Type *TripCountTy = TripCount->getType();
1474 ScalarEvolution &SE = *PSE.getSE();
1475 auto GetMinTripCount = [&]() -> const SCEV * {
1476 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1477 const SCEV *VFxUF =
1478 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1479 if (UF * VF.getKnownMinValue() >=
1480 MinProfitableTripCount.getKnownMinValue()) {
1481 // TODO: SCEV should be able to simplify test.
1482 return VFxUF;
1483 }
1484 const SCEV *MinProfitableTripCountSCEV =
1485 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1486 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1487 };
1488
1489 VPBasicBlock *EntryVPBB = Plan.getEntry();
1490 // Place compare and branch in CheckBlock if given, ExpandSCEVs in Entry.
1491 VPBasicBlock *CheckVPBB = CheckBlock ? CheckBlock : EntryVPBB;
1492 VPBuilder Builder(CheckVPBB);
1493 VPValue *TripCountCheck = Plan.getFalse();
1494 const SCEV *Step = GetMinTripCount();
1495 // TripCountCheck = false, folding tail implies positive vector trip
1496 // count.
1497 if (!TailFolded) {
1498 // TODO: Emit unconditional branch to vector preheader instead of
1499 // conditional branch with known condition.
1500 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1501 // Check if the trip count is < the step.
1502 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1503 // TODO: Ensure step is at most the trip count when determining max VF and
1504 // UF, w/o tail folding.
1505 TripCountCheck = Plan.getTrue();
1506 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1507 TripCount, Step)) {
1508 // Generate the minimum iteration check only if we cannot prove the
1509 // check is known to be true, or known to be false.
1510 // ExpandSCEV must be placed in Entry.
1511 VPBuilder SCEVBuilder(EntryVPBB, getExpandSCEVInsertPt(EntryVPBB));
1512 VPValue *MinTripCountVPV = SCEVBuilder.createExpandSCEV(Step);
1513 TripCountCheck = Builder.createICmp(
1514 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1515 } // else step known to be < trip count, use TripCountCheck preset to false.
1516 }
1517 VPInstruction *Term =
1518 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1520 MDBuilder MDB(Plan.getContext());
1521 MDNode *BranchWeights = MDB.createBranchWeights(
1522 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1523 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1524 }
1525}
1526
1528 VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue,
1529 Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL,
1531 auto *CheckBlock = Plan.createVPBasicBlock("vector.main.loop.iter.check");
1532 insertCheckBlockBeforeVectorLoop(Plan, CheckBlock);
1534 RequiresScalarEpilogue, /*TailFolded=*/false,
1535 OrigLoop, MinItersBypassWeights, DL, PSE,
1536 CheckBlock);
1537}
1538
1540 VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue,
1541 ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep,
1542 unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1543 // Add the minimum iteration check for the epilogue vector loop.
1544 VPValue *TC = Plan.getTripCount();
1545 Value *TripCount = TC->getLiveInIRValue();
1546 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1547 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1548 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1549 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1550 DebugLoc::getUnknown(), "n.vec.remaining");
1551
1552 // Generate code to check if the loop's trip count is less than VF * UF of
1553 // the vector epilogue loop.
1554 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1555 auto *CheckMinIters = Builder.createICmp(
1556 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1557 VPInstruction *Branch =
1558 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1559
1560 // We assume the remaining `Count` is equally distributed in
1561 // [0, MainLoopStep)
1562 // So the probability for `Count < EpilogueLoopStep` should be
1563 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1564 // TODO: Improve the estimate by taking the estimated trip count into
1565 // consideration.
1566 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1567 const uint32_t Weights[] = {EstimatedSkipCount,
1568 MainLoopStep - EstimatedSkipCount};
1569 MDBuilder MDB(Plan.getContext());
1570 MDNode *BranchWeights =
1571 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1572 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1573}
1574
1575/// Find and return the final select instruction of the FindIV result pattern
1576/// for the given \p BackedgeVal:
1577/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1578/// ComputeReductionResult(ReducedIV), Start.
1580 return cast<VPInstruction>(
1581 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1582 auto *VPI = dyn_cast<VPInstruction>(R);
1583 return VPI &&
1584 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1585 }));
1586}
1587
1589 auto GetMinOrMaxCompareValue =
1590 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1591 auto *MinOrMaxR =
1592 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1593 if (!MinOrMaxR)
1594 return nullptr;
1595
1596 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1597 // with an intrinsic that matches the reduction kind.
1598 Intrinsic::ID ExpectedIntrinsicID =
1599 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1600 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1601 return nullptr;
1602
1603 if (MinOrMaxR->getOperand(0) == RedPhiR)
1604 return MinOrMaxR->getOperand(1);
1605
1606 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1607 "Reduction phi operand expected");
1608 return MinOrMaxR->getOperand(0);
1609 };
1610
1611 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1613 MinOrMaxNumReductionsToHandle;
1614 bool HasUnsupportedPhi = false;
1615 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1617 continue;
1618 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1619 if (!Cur) {
1620 // TODO: Also support fixed-order recurrence phis.
1621 HasUnsupportedPhi = true;
1622 continue;
1623 }
1625 Cur->getRecurrenceKind())) {
1626 HasUnsupportedPhi = true;
1627 continue;
1628 }
1629
1630 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1631 if (!MinOrMaxOp)
1632 return false;
1633
1634 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1635 }
1636
1637 if (MinOrMaxNumReductionsToHandle.empty())
1638 return true;
1639
1640 // We won't be able to resume execution in the scalar tail, if there are
1641 // unsupported header phis or there is no scalar tail at all, due to
1642 // tail-folding.
1643 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1644 return false;
1645
1646 /// Check if the vector loop of \p Plan can early exit and restart
1647 /// execution of last vector iteration in the scalar loop. This requires all
1648 /// recipes up to early exit point be side-effect free as they are
1649 /// re-executed. Currently we check that the loop is free of any recipe that
1650 /// may write to memory. Expected to operate on an early VPlan w/o nested
1651 /// regions.
1654 auto *VPBB = cast<VPBasicBlock>(VPB);
1655 for (auto &R : *VPBB) {
1656 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1657 return false;
1658 }
1659 }
1660
1661 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1662 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1663 VPValue *AllNaNLanes = nullptr;
1664 SmallPtrSet<VPValue *, 2> RdxResults;
1665 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1666 VPValue *RedNaNLanes =
1667 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1668 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1669 : RedNaNLanes;
1670 }
1671
1672 VPValue *AnyNaNLane =
1673 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1674 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1675 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1676 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1678 RedPhiR->getRecurrenceKind()) &&
1679 "unsupported reduction");
1680
1681 // If we exit early due to NaNs, compute the final reduction result based on
1682 // the reduction phi at the beginning of the last vector iteration.
1683 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1684 assert(RdxResult && "must find a ComputeReductionResult");
1685
1686 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1687 RdxResult->getOperand(0));
1688 RdxResult->setOperand(0, NewSel);
1689 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1690 RdxResults.insert(RdxResult);
1691 }
1692
1693 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1694 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1695 "Unexpected terminator");
1696 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1697 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1698 LatchExitingBranch->getOperand(1));
1699 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1700 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1701 LatchExitingBranch->eraseFromParent();
1702
1703 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1704 // true, the resume from the start of the last vector iteration via the
1705 // canonical IV, otherwise from the original value.
1706 auto IsTC = [&Plan](VPValue *V) {
1707 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1708 };
1709 for (auto &R : Plan.getScalarPreheader()->phis()) {
1710 auto *ResumeR = cast<VPPhi>(&R);
1711 VPValue *VecV = ResumeR->getOperand(0);
1712 if (RdxResults.contains(VecV))
1713 continue;
1714 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1715 VPValue *DIVTC = DerivedIV->getOperand(1);
1716 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1717 auto *NewSel = MiddleBuilder.createSelect(
1718 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1719 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1720 DerivedIV->setOperand(1, NewSel);
1721 continue;
1722 }
1723 }
1724 // Bail out and abandon the current, partially modified, VPlan if we
1725 // encounter resume phi that cannot be updated yet.
1726 if (!IsTC(VecV)) {
1727 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1728 "FMaxNum/FMinNum reduction.\n");
1729 return false;
1730 }
1731 auto *NewSel = MiddleBuilder.createSelect(
1732 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1733 ResumeR->setOperand(0, NewSel);
1734 }
1735
1736 auto *MiddleTerm = MiddleVPBB->getTerminator();
1737 MiddleBuilder.setInsertPoint(MiddleTerm);
1738 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1739 VPValue *NewCond =
1740 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1741 MiddleTerm->setOperand(0, NewCond);
1742 return true;
1743}
1744
1746 if (Plan.hasScalarVFOnly())
1747 return false;
1748
1749 // We want to create the following nodes:
1750 // vector.body:
1751 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1752 // iteration where any lane was active.
1753 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1754 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1755 // exists, but needs updating to use 'new.data' for the backedge value.
1756 // data.phi = phi ir<default.val>, vp<new.data>
1757 //
1758 // ...'data' and 'compare' created by existing nodes...
1759 //
1760 // ...new recipes introduced to determine whether to update the reduction
1761 // values or keep the current one.
1762 // any.active = i1 any-of ir<compare>
1763 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1764 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1765 //
1766 // middle.block:
1767 // ...extract-last-active replaces compute-reduction-result.
1768 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1769
1771 for (VPRecipeBase &Phi :
1773 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1775 PhiR->getRecurrenceKind()))
1776 Phis.push_back(PhiR);
1777 }
1778
1779 if (Phis.empty())
1780 return true;
1781
1782 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1783 for (VPReductionPHIRecipe *PhiR : Phis) {
1784 // Find the condition for the select/blend.
1785 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1786 VPValue *CondSelect = BackedgeSelect;
1787
1788 // If there's a header mask, the backedge select will not be the find-last
1789 // select.
1790 if (HeaderMask && !match(BackedgeSelect,
1791 m_Select(m_Specific(HeaderMask),
1792 m_VPValue(CondSelect), m_Specific(PhiR))))
1793 llvm_unreachable("expected header mask select");
1794
1795 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1796
1797 // If we're matching a blend rather than a select, there should be one
1798 // incoming value which is the data, then all other incoming values should
1799 // be the phi.
1800 auto MatchBlend = [&](VPRecipeBase *R) {
1801 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1802 if (!Blend)
1803 return false;
1804 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1805 unsigned NumIncomingDataValues = 0;
1806 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1807 VPValue *Incoming = Blend->getIncomingValue(I);
1808 if (Incoming != PhiR) {
1809 ++NumIncomingDataValues;
1810 Cond = Blend->getMask(I);
1811 Op1 = Incoming;
1812 Op2 = PhiR;
1813 }
1814 }
1815 return NumIncomingDataValues == 1;
1816 };
1817
1818 VPSingleDefRecipe *SelectR =
1820 if (!match(SelectR,
1821 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1822 !MatchBlend(SelectR))
1823 return false;
1824
1825 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1826
1827 // Find final reduction computation and replace it with an
1828 // extract.last.active intrinsic.
1829 auto *RdxResult =
1831 BackedgeSelect);
1832 assert(RdxResult && "Could not find reduction result");
1833
1834 // Add mask phi.
1835 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1836 auto *MaskPHI = Builder.createWidenPhi(Plan.getFalse());
1837
1838 // Add select for mask.
1839 Builder.setInsertPoint(SelectR);
1840
1841 if (Op1 == PhiR) {
1842 // Normalize to selecting the data operand when the condition is true by
1843 // swapping operands and negating the condition.
1844 std::swap(Op1, Op2);
1845 Cond = Builder.createNot(Cond);
1846 }
1847 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1848
1849 if (HeaderMask)
1850 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1851
1852 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1853 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1854 MaskPHI->addOperand(MaskSelect);
1855
1856 // Replace select for data.
1857 VPValue *DataSelect =
1858 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1859 SelectR->replaceAllUsesWith(DataSelect);
1860 PhiR->setBackedgeValue(DataSelect);
1861 SelectR->eraseFromParent();
1862
1863 Builder.setInsertPoint(RdxResult);
1864 auto *ExtractLastActive =
1865 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1866 {PhiR->getStartValue(), DataSelect, MaskSelect},
1867 RdxResult->getDebugLoc());
1868 RdxResult->replaceAllUsesWith(ExtractLastActive);
1869 RdxResult->eraseFromParent();
1870 }
1871
1872 return true;
1873}
1874
1875/// Given a first argmin/argmax pattern with strict predicate consisting of
1876/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1877/// 2) a wide induction \p WideIV,
1878/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1879/// return the smallest index of the FindLastIV reduction result using UMin,
1880/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1881/// In that case, return the start value of the FindLastIV reduction instead.
1882/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1883/// final result is scaled back to the non-canonical \p WideIV.
1884/// The final value of the FindLastIV reduction is originally computed using
1885/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1886/// and removed.
1887/// Returns true if the pattern was handled successfully, false otherwise.
1889 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1890 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1891 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1892 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1893 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1894 "inloop and ordered reductions not supported");
1895 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1896 "FindIV reduction must not be scaled");
1897
1899 // TODO: Support non (i.e., narrower than) canonical IV types.
1900 // TODO: Emit remarks for failed transformations.
1901 if (Ty != VPTypeAnalysis(Plan).inferScalarType(WideIV))
1902 return false;
1903
1904 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1905 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1906 assert(
1907 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1908 "backedge value must be a select");
1909 if (FindIVSelectR->getOperand(1) != WideIV &&
1910 FindIVSelectR->getOperand(2) != WideIV)
1911 return false;
1912
1913 // If the original wide IV is not canonical, create a new one. The canonical
1914 // wide IV is guaranteed to not wrap for all lanes that are active in the
1915 // vector loop.
1916 if (!WideIV->isCanonical()) {
1917 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1918 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1919 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1920 nullptr, Zero, One, WideIV->getVFValue(),
1921 WideIV->getInductionDescriptor(),
1922 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1923 WideIV->getDebugLoc());
1924 WidenCanIV->insertBefore(WideIV);
1925
1926 // Update the select to use the wide canonical IV.
1927 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1928 WidenCanIV);
1929 }
1930 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1931
1932 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1933 // result:
1934 // 1. Find the first canonical indices corresponding to partial min/max
1935 // values, using loop reductions.
1936 // 2. Find which of the partial min/max values are equal to the overall
1937 // min/max value.
1938 // 3. Select among the canonical indices those corresponding to the overall
1939 // min/max value.
1940 // 4. Find the first canonical index of overall min/max and scale it back to
1941 // the original IV using VPDerivedIVRecipe.
1942 // 5. If the overall min/max equals the starting min/max, the condition in
1943 // the loop was always false, due to being strict; return the start value
1944 // of FindLastIVPhiR in that case.
1945 //
1946 // For example, we transforms two independent reduction result computations
1947 // for
1948 //
1949 // <x1> vector loop: {
1950 // vector.body:
1951 // ...
1952 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
1953 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
1954 // ir<%min.idx.next>
1955 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
1956 // ....
1957 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
1958 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
1959 // ...
1960 // }
1961 // Successor(s): middle.block
1962 //
1963 // middle.block:
1964 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1965 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
1966 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
1967 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
1968 //
1969 //
1970 // Into:
1971 //
1972 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
1973 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
1974 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
1975 // ir<MaxUInt>
1976 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
1977 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
1978 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
1979 // vp<%final.idx> = select vp<%always.false>, ir<10>,
1980 // vp<%scaled.idx>
1981
1982 VPBuilder Builder(FindIVRdxResult);
1983 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1984 auto *FinalMinOrMaxCmp =
1985 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1986 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1987 VPValue *MaxIV =
1988 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
1989 auto *FinalIVSelect =
1990 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
1991 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
1992 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
1993 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
1994 FindIVRdxResult->getDebugLoc());
1995
1996 // If we used a new wide canonical IV convert the reduction result back to the
1997 // original IV scale before the final select.
1998 if (!WideIV->isCanonical()) {
1999 auto *DerivedIVRecipe =
2001 nullptr, // No FPBinOp for integer induction
2002 WideIV->getStartValue(), FinalCanIV,
2003 WideIV->getStepValue(), "derived.iv.result");
2004 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
2005 FinalCanIV = DerivedIVRecipe;
2006 }
2007
2008 // If the final min/max value matches its start value, the condition in the
2009 // loop was always false, i.e. no induction value has been selected. If that's
2010 // the case, set the result of the IV reduction to its start value.
2011 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
2012 MinOrMaxPhiR->getStartValue());
2013 VPValue *FinalIV = Builder.createSelect(
2014 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
2015 FindIVSelect->replaceAllUsesWith(FinalIV);
2016
2017 // Erase the old FindIV result pattern which is now dead.
2018 FindIVSelect->eraseFromParent();
2019 FindIVCmp->eraseFromParent();
2020 FindIVRdxResult->eraseFromParent();
2021 return true;
2022}
2023
2026 Loop *TheLoop) {
2027 for (auto &PhiR : make_early_inc_range(
2029 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
2030 // TODO: check for multi-uses in VPlan directly.
2031 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
2032 continue;
2033
2034 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
2035 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
2036 // exactly 2 users:
2037 // 1) the min/max operation of the reduction cycle, and
2038 // 2) the compare of a FindLastIV reduction cycle. This compare must match
2039 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
2040 // min/max operation, and be used only by the select of the FindLastIV
2041 // reduction cycle.
2042 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
2043 assert(
2045 "only min/max recurrences support users outside the reduction chain");
2046
2047 auto *MinOrMaxOp =
2048 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
2049 if (!MinOrMaxOp)
2050 return false;
2051
2052 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
2053 // with an intrinsic that matches the reduction kind.
2054 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
2055 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
2056 return false;
2057
2058 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
2059 // ComputeReductionResult.
2060 assert(MinOrMaxOp->getNumUsers() == 2 &&
2061 "MinOrMaxOp must have exactly 2 users");
2062 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
2063 if (MinOrMaxOpValue == MinOrMaxPhiR)
2064 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
2065
2066 VPValue *CmpOpA;
2067 VPValue *CmpOpB;
2068 CmpPredicate Pred;
2070 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
2071 if (!Cmp || Cmp->getNumUsers() != 1 ||
2072 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
2073 return false;
2074
2075 if (MinOrMaxOpValue != CmpOpB)
2076 Pred = CmpInst::getSwappedPredicate(Pred);
2077
2078 // MinOrMaxPhiR must have exactly 2 users:
2079 // * MinOrMaxOp,
2080 // * Cmp (that's part of a FindLastIV chain).
2081 if (MinOrMaxPhiR->getNumUsers() != 2)
2082 return false;
2083
2084 VPInstruction *MinOrMaxResult =
2086 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
2087 "one user must be MinOrMaxOp");
2088 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
2089
2090 // Cmp must be used by the select of a FindLastIV chain.
2091 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
2092 VPValue *IVOp, *FindIV;
2093 if (!Sel || Sel->getNumUsers() != 2 ||
2094 !match(Sel,
2096 return false;
2097
2099 std::swap(FindIV, IVOp);
2100 Pred = CmpInst::getInversePredicate(Pred);
2101 }
2102
2103 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
2105 FindIVPhiR->getRecurrenceKind()))
2106 return false;
2107
2108 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
2109 "cannot handle inloop/ordered reductions yet");
2110
2111 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
2112 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
2113 VPInstruction *FindIVResult =
2115 FindIVPhiR->getBackedgeValue());
2116 assert(FindIVResult &&
2117 "must be able to retrieve the FindIVResult VPInstruction");
2118 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
2119 if (FindIVMinMaxKind != RecurKind::SMax &&
2120 FindIVMinMaxKind != RecurKind::UMax)
2121 return false;
2122
2123 // TODO: Support cases where IVOp is the IV increment.
2124 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
2126 return false;
2127
2128 // Check if the predicate is compatible with the reduction kind.
2129 bool IsValidKindPred = [RdxKind, Pred]() {
2130 switch (RdxKind) {
2131 case RecurKind::UMin:
2132 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
2133 case RecurKind::UMax:
2134 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
2135 case RecurKind::SMax:
2136 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
2137 case RecurKind::SMin:
2138 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
2139 default:
2140 llvm_unreachable("unhandled recurrence kind");
2141 }
2142 }();
2143 if (!IsValidKindPred) {
2144 ORE->emit([&]() {
2146 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
2147 TheLoop->getStartLoc(), TheLoop->getHeader())
2148 << "Multi-use reduction with predicate "
2150 << " incompatible with reduction kind";
2151 });
2152 return false;
2153 }
2154
2155 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
2156 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
2157 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
2158 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
2159 "both results must be computed in the same block");
2160 // Reducing to a scalar min or max value is placed right before reducing to
2161 // its scalar iteration, in order to generate instructions that use both
2162 // their operands.
2163 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
2164 FindIVRdxResult->getIterator());
2165
2166 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
2167 if (IsStrictPredicate) {
2168 if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
2170 MinOrMaxResult, FindIVSelect, FindIVCmp,
2171 FindIVRdxResult))
2172 return false;
2173 continue;
2174 }
2175
2176 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
2177 // result:
2178 // 1. We need to find the last IV for which the condition based on the
2179 // min/max recurrence is true,
2180 // 2. Compare the partial min/max reduction result to its final value and,
2181 // 3. Select the lanes of the partial FindLastIV reductions which
2182 // correspond to the lanes matching the min/max reduction result.
2183 //
2184 // For example, this transforms
2185 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
2186 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
2187 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2188 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2189 //
2190 // into:
2191 //
2192 // vp<min.result> = compute-reduction-result ir<%min.val.next>
2193 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
2194 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
2195 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
2196 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
2197 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
2198 //
2199 VPBuilder B(FindIVRdxResult);
2200 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
2201 auto *FinalMinOrMaxCmp =
2202 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
2203 VPValue *Sentinel = FindIVCmp->getOperand(1);
2204 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
2205 auto *FinalIVSelect =
2206 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
2207 FindIVRdxResult->setOperand(0, FinalIVSelect);
2208 }
2209 return true;
2210}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to sink users of FOR after Previous.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static bool tryToSinkOrHoistRecurrenceUsers(VPBasicBlock *HeaderVPBB, VPDominatorTree &VPDT)
Sink users of fixed-order recurrences past or hoist before the recipe defining the previous value,...
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)
Try to hoist Previous and its operands before all users of FOR.
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static bool areAllLoadsDereferenceable(VPBasicBlock *HeaderVPBB, VPBasicBlock *MiddleVPBB, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Check if all loads in the loop are dereferenceable.
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static VPBasicBlock::iterator getExpandSCEVInsertPt(VPBasicBlock *EntryVPBB)
Return an insert point in EntryVPBB after existing VPIRPhi, VPIRInstruction and VPExpandSCEVRecipe re...
static auto getMatchingPhisForScalarLoop(VPBasicBlock *Header, VPBasicBlock *ScalarHeader)
Return an iterator range to iterate over pairs of matching phi nodes in Header and ScalarHeader,...
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:659
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
static constexpr auto FlagNUW
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * getUMaxExpr(SCEVUse LHS, SCEVUse RHS)
LLVM_ABI const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4160
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4235
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4187
iterator end()
Definition VPlan.h:4197
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4195
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4248
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:565
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:644
const VPRecipeBase & back() const
Definition VPlan.h:4209
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4226
bool empty() const
Definition VPlan.h:4206
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:319
VPRegionBlock * getParent()
Definition VPlan.h:190
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:202
void setName(const Twine &newName)
Definition VPlan.h:183
size_t getNumSuccessors() const
Definition VPlan.h:241
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:341
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:310
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:301
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:237
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:333
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:290
void setParent(VPRegionBlock *P)
Definition VPlan.h:201
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:184
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:313
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:232
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:250
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:286
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:270
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:498
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:471
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3909
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2300
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2342
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2331
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4313
Class to record and manage LLVM IR flags.
Definition VPlan.h:688
RecurKind getRecurKind() const
Definition VPlan.h:1051
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1223
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1333
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1324
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1340
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1270
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
VPBasicBlock * getParent()
Definition VPlan.h:480
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:554
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2686
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2747
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2726
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2750
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3042
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4370
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4490
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4482
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:228
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:606
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:329
operand_range operands()
Definition VPlanValue.h:397
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:373
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:368
void addOperand(VPValue *Operand)
Definition VPlanValue.h:362
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1498
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1504
user_range users()
Definition VPlanValue.h:155
A Recipe for widening the canonical induction variable of the vector loop.
Definition VPlan.h:3868
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2397
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2417
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2448
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2495
bool isCanonical() const
Returns true if the induction is canonical, i.e.
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4518
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4834
LLVMContext & getContext() const
Definition VPlan.h:4710
VPBasicBlock * getEntry()
Definition VPlan.h:4610
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4669
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4690
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4805
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4708
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4837
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4659
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4698
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4782
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4871
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1095
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4676
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4635
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4860
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1335
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4802
bool hasScalarVFOnly() const
Definition VPlan.h:4750
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4649
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4655
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4615
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4701
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4915
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4816
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:98
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:115
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:136
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2673
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:83
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2145
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
Definition Loads.cpp:289
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2624
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB, UncountableExitStyle Style)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE, VPBasicBlock *CheckBlock=nullptr)
static bool createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static LLVM_ABI_FOR_TEST bool handleEarlyExits(VPlan &Plan, UncountableExitStyle Style, Loop *TheLoop, PredicatedScalarEvolution &PSE, DominatorTree &DT, AssumptionCache *AC)
Update Plan to account for all early exits.
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void addIterationCountCheckBlock(VPlan &Plan, ElementCount VF, unsigned UF, bool RequiresScalarEpilogue, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
Add a new check block before the vector preheader to Plan to check if the main vector loop should be ...
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...