LLVM 23.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55namespace llvm {
57} // namespace llvm
58
59/// @{
60/// Metadata attribute names
61const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
66/// @}
67
69
71
73 "vplan-print-in-dot-format", cl::Hidden,
74 cl::desc("Use dot format instead of plain text when dumping VPlans"));
75
76#define DEBUG_TYPE "loop-vectorize"
77
78#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
80 const VPBasicBlock *Parent = R.getParent();
81 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
82 R.print(OS, "", SlotTracker);
83 return OS;
84}
85#endif
86
88 const ElementCount &VF) const {
89 switch (LaneKind) {
91 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
92 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
93 Builder.getInt32(VF.getKnownMinValue() - Lane));
95 return Builder.getInt64(Lane);
96 }
97 llvm_unreachable("Unknown lane kind");
98}
99
100#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102 if (const VPRecipeBase *R = getDefiningRecipe())
103 R->print(OS, "", SlotTracker);
104 else
106}
107
108void VPValue::dump() const {
109 const VPRecipeBase *Instr = getDefiningRecipe();
111 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
113 dbgs() << "\n";
114}
115
116void VPRecipeBase::dump() const {
117 VPSlotTracker SlotTracker(getParent() ? getParent()->getPlan() : nullptr);
118 print(dbgs(), "", SlotTracker);
119 dbgs() << "\n";
120}
121#endif
122
123#if !defined(NDEBUG)
124bool VPRecipeValue::isDefinedBy(const VPDef *D) const { return Def == D; }
125#endif
126
128 auto *DefValue = dyn_cast<VPRecipeValue>(this);
129 return DefValue ? DefValue->Def : nullptr;
130}
131
133 auto *DefValue = dyn_cast<VPRecipeValue>(this);
134 return DefValue ? DefValue->Def : nullptr;
135}
136
138 return cast<VPIRValue>(this)->getValue();
139}
140
142
144 : VPValue(VPVRecipeValueSC, UV), Def(Def) {
145 assert(Def && "VPRecipeValue requires a defining recipe");
146 Def->addDefinedValue(this);
147}
148
150 assert(Users.empty() &&
151 "trying to delete a VPRecipeValue with remaining users");
152 Def->removeDefinedValue(this);
153}
154
155// Get the top-most entry block of \p Start. This is the entry block of the
156// containing VPlan. This function is templated to support both const and non-const blocks
157template <typename T> static T *getPlanEntry(T *Start) {
158 T *Next = Start;
159 T *Current = Start;
160 while ((Next = Next->getParent()))
161 Current = Next;
162
163 SmallSetVector<T *, 8> WorkList;
164 WorkList.insert(Current);
165
166 for (unsigned i = 0; i < WorkList.size(); i++) {
167 T *Current = WorkList[i];
168 if (!Current->hasPredecessors())
169 return Current;
170 auto &Predecessors = Current->getPredecessors();
171 WorkList.insert_range(Predecessors);
172 }
173
174 llvm_unreachable("VPlan without any entry node without predecessors");
175}
176
177VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
178
179const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
180
181/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
188
195
196void VPBlockBase::setPlan(VPlan *ParentPlan) {
197 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
198 Plan = ParentPlan;
199}
200
201/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
203 const VPBlockBase *Block = this;
205 Block = Region->getExiting();
207}
208
215
217 if (!Successors.empty() || !Parent)
218 return this;
219 assert(Parent->getExiting() == this &&
220 "Block w/o successors not the exiting block of its parent.");
221 return Parent->getEnclosingBlockWithSuccessors();
222}
223
225 if (!Predecessors.empty() || !Parent)
226 return this;
227 assert(Parent->getEntry() == this &&
228 "Block w/o predecessors not the entry of its parent.");
229 return Parent->getEnclosingBlockWithPredecessors();
230}
231
233 iterator It = begin();
234 while (It != end() && It->isPhi())
235 It++;
236 return It;
237}
238
246
249 return Def->getUnderlyingValue();
250
251 if (hasScalarValue(Def, Lane))
252 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
253
254 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
256 return Data.VPV2Scalars[Def][0];
257 }
258
259 // Look through BuildVector to avoid redundant extracts.
260 // TODO: Remove once replicate regions are unrolled explicitly.
261 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
262 auto *BuildVector = cast<VPInstruction>(Def);
263 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
264 }
265
267 auto *VecPart = Data.VPV2Vector[Def];
268 if (!VecPart->getType()->isVectorTy()) {
269 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
270 return VecPart;
271 }
272 // TODO: Cache created scalar values.
273 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
274 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
275 // set(Def, Extract, Instance);
276 return Extract;
277}
278
279Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
280 if (NeedsScalar) {
281 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
283 (hasScalarValue(Def, VPLane(0)) &&
284 Data.VPV2Scalars[Def].size() == 1)) &&
285 "Trying to access a single scalar per part but has multiple scalars "
286 "per part.");
287 return get(Def, VPLane(0));
288 }
289
290 // If Values have been set for this Def return the one relevant for \p Part.
291 if (hasVectorValue(Def))
292 return Data.VPV2Vector[Def];
293
294 auto GetBroadcastInstrs = [this](Value *V) {
295 if (VF.isScalar())
296 return V;
297 // Broadcast the scalar into all locations in the vector.
298 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
299 return Shuf;
300 };
301
302 if (!hasScalarValue(Def, {0})) {
303 Value *IRV = Def->getLiveInIRValue();
304 Value *B = GetBroadcastInstrs(IRV);
305 set(Def, B);
306 return B;
307 }
308
309 Value *ScalarValue = get(Def, VPLane(0));
310 // If we aren't vectorizing, we can just copy the scalar map values over
311 // to the vector map.
312 if (VF.isScalar()) {
313 set(Def, ScalarValue);
314 return ScalarValue;
315 }
316
317 bool IsSingleScalar = vputils::isSingleScalar(Def);
318 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
319
320 // We need to construct the vector value for a single-scalar value by
321 // broadcasting the scalar to all lanes.
322 // TODO: Replace by introducing Broadcast VPInstructions.
323 assert(IsSingleScalar && "must be a single-scalar at this point");
324 // Set the insert point after the last scalarized instruction or after the
325 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
326 // will directly follow the scalar definitions.
327 auto OldIP = Builder.saveIP();
328 auto *LastInst = cast<Instruction>(get(Def, LastLane));
329 auto NewIP = isa<PHINode>(LastInst)
330 ? LastInst->getParent()->getFirstNonPHIIt()
331 : std::next(BasicBlock::iterator(LastInst));
332 Builder.SetInsertPoint(&*NewIP);
333 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
334 set(Def, VectorValue);
335 Builder.restoreIP(OldIP);
336 return VectorValue;
337}
338
340 const DILocation *DIL = DL;
341 // When a FSDiscriminator is enabled, we don't need to add the multiply
342 // factors to the discriminators.
343 if (DIL &&
344 Builder.GetInsertBlock()
345 ->getParent()
346 ->shouldEmitDebugInfoForProfiling() &&
348 // FIXME: For scalable vectors, assume vscale=1.
349 unsigned UF = Plan->getConcreteUF();
350 auto NewDIL =
351 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
352 if (NewDIL)
353 Builder.SetCurrentDebugLocation(*NewDIL);
354 else
355 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
356 << DIL->getFilename() << " Line: " << DIL->getLine());
357 } else
358 Builder.SetCurrentDebugLocation(DL);
359}
360
362 Value *WideValue,
363 const VPLane &Lane) {
364 Value *ScalarInst = get(Def, Lane);
365 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
366 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
367 // We must handle each element of a vectorized struct type.
368 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
369 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
370 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
371 VectorValue =
372 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
373 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
374 }
375 } else {
376 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
377 }
378 return WideValue;
379}
380
381BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
382 auto &CFG = State.CFG;
383 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
384 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
385 BasicBlock *PrevBB = CFG.PrevBB;
386 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
387 PrevBB->getParent(), CFG.ExitBB);
388 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
389
390 return NewBB;
391}
392
394 auto &CFG = State.CFG;
395 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
396
397 // Register NewBB in its loop. In innermost loops its the same for all
398 // BB's.
399 Loop *ParentLoop = State.CurrentParentLoop;
400 // If this block has a sole successor that is an exit block or is an exit
401 // block itself then it needs adding to the same parent loop as the exit
402 // block.
403 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
404 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
405 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
406 ParentLoop = State.LI->getLoopFor(
407 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
408 }
409
410 if (ParentLoop && !State.LI->getLoopFor(NewBB))
411 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
412
414 if (VPBlockUtils::isHeader(this, State.VPDT)) {
415 // There's no block for the latch yet, connect to the preheader only.
416 Preds = {getPredecessors()[0]};
417 } else {
418 Preds = to_vector(getPredecessors());
419 }
420
421 // Hook up the new basic block to its predecessors.
422 for (VPBlockBase *PredVPBlock : Preds) {
423 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
424 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
425 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
426 "Predecessor basic-block not found building successor.");
427 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
428 auto *PredBBTerminator = PredBB->getTerminator();
429 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
430
431 if (isa<UnreachableInst>(PredBBTerminator)) {
432 assert(PredVPSuccessors.size() == 1 &&
433 "Predecessor ending w/o branch must have single successor.");
434 DebugLoc DL = PredBBTerminator->getDebugLoc();
435 PredBBTerminator->eraseFromParent();
436 auto *Br = UncondBrInst::Create(NewBB, PredBB);
437 Br->setDebugLoc(DL);
438 } else if (auto *UBI = dyn_cast<UncondBrInst>(PredBBTerminator)) {
439 UBI->setSuccessor(NewBB);
440 } else {
441 // Set each forward successor here when it is created, excluding
442 // backedges. A backward successor is set when the branch is created.
443 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
444 // in the original IR, except when the predecessor is the entry block.
445 // This enables including SCEV and memory runtime check blocks in VPlan.
446 // TODO: Remove exception by modeling the terminator of entry block using
447 // BranchOnCond.
448 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
449 auto *TermBr = cast<CondBrInst>(PredBBTerminator);
450 assert((!TermBr->getSuccessor(idx) ||
451 (isa<VPIRBasicBlock>(this) &&
452 (TermBr->getSuccessor(idx) == NewBB ||
453 PredVPBlock == getPlan()->getEntry()))) &&
454 "Trying to reset an existing successor block.");
455 TermBr->setSuccessor(idx, NewBB);
456 }
457 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
458 }
459}
460
463 "VPIRBasicBlock can have at most two successors at the moment!");
464 // Move completely disconnected blocks to their final position.
465 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
466 IRBB->moveAfter(State->CFG.PrevBB);
467 State->Builder.SetInsertPoint(IRBB->getTerminator());
468 State->CFG.PrevBB = IRBB;
469 State->CFG.VPBB2IRBB[this] = IRBB;
470 executeRecipes(State, IRBB);
471 // Create a branch instruction to terminate IRBB if one was not created yet
472 // and is needed.
473 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
474 auto *Br = State->Builder.CreateBr(IRBB);
475 Br->setOperand(0, nullptr);
476 IRBB->getTerminator()->eraseFromParent();
477 } else {
478 assert((getNumSuccessors() == 0 ||
479 isa<UncondBrInst, CondBrInst>(IRBB->getTerminator())) &&
480 "other blocks must be terminated by a branch");
481 }
482
483 connectToPredecessors(*State);
484}
485
486VPIRBasicBlock *VPIRBasicBlock::clone() {
487 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
488 for (VPRecipeBase &R : Recipes)
489 NewBlock->appendRecipe(R.clone());
490 return NewBlock;
491}
492
494 bool Replica = bool(State->Lane);
495 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
496
497 if (VPBlockUtils::isHeader(this, State->VPDT)) {
498 // Create and register the new vector loop.
499 Loop *PrevParentLoop = State->CurrentParentLoop;
500 State->CurrentParentLoop = State->LI->AllocateLoop();
501
502 // Insert the new loop into the loop nest and register the new basic blocks
503 // before calling any utilities such as SCEV that require valid LoopInfo.
504 if (PrevParentLoop)
505 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
506 else
507 State->LI->addTopLevelLoop(State->CurrentParentLoop);
508 }
509
510 auto IsReplicateRegion = [](VPBlockBase *BB) {
512 assert((!R || R->isReplicator()) &&
513 "only replicate region blocks should remain");
514 return R;
515 };
516 // 1. Create an IR basic block.
517 if ((Replica && this == getParent()->getEntry()) ||
518 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
519 // Reuse the previous basic block if the current VPBB is either
520 // * the entry to a replicate region, or
521 // * the exit of a replicate region.
522 State->CFG.VPBB2IRBB[this] = NewBB;
523 } else {
524 NewBB = createEmptyBasicBlock(*State);
525
526 State->Builder.SetInsertPoint(NewBB);
527 // Temporarily terminate with unreachable until CFG is rewired.
528 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
529 State->Builder.SetInsertPoint(Terminator);
530
531 State->CFG.PrevBB = NewBB;
532 State->CFG.VPBB2IRBB[this] = NewBB;
533 connectToPredecessors(*State);
534 }
535
536 // 2. Fill the IR basic block with IR instructions.
537 executeRecipes(State, NewBB);
538
539 // If this block is a latch, update CurrentParentLoop.
540 if (VPBlockUtils::isLatch(this, State->VPDT))
541 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
542}
543
544VPBasicBlock *VPBasicBlock::clone() {
545 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
546 for (VPRecipeBase &R : *this)
547 NewBlock->appendRecipe(R.clone());
548 return NewBlock;
549}
550
552 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
553 << " in BB: " << BB->getName() << '\n');
554
555 State->CFG.PrevVPBB = this;
556
557 for (VPRecipeBase &Recipe : Recipes) {
558 State->setDebugLocFrom(Recipe.getDebugLoc());
559 Recipe.execute(*State);
560 }
561
562 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
563}
564
565VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
566 assert((SplitAt == end() || SplitAt->getParent() == this) &&
567 "can only split at a position in the same block");
568
569 // Create new empty block after the block to split.
570 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
572
573 // If this is the exiting block, make the split the new exiting block.
574 auto *ParentRegion = getParent();
575 if (ParentRegion && ParentRegion->getExiting() == this)
576 ParentRegion->setExiting(SplitBlock);
577
578 // Finally, move the recipes starting at SplitAt to new block.
579 for (VPRecipeBase &ToMove :
580 make_early_inc_range(make_range(SplitAt, this->end())))
581 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
582
583 return SplitBlock;
584}
585
586/// Return the enclosing loop region for region \p P. The templated version is
587/// used to support both const and non-const block arguments.
588template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
589 if (P && P->isReplicator()) {
590 P = P->getParent();
591 // Multiple loop regions can be nested, but replicate regions can only be
592 // nested inside a loop region or must be outside any other region.
593 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
594 }
595 return P;
596}
597
601
605
606static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
607 if (VPBB->empty()) {
608 assert(
609 VPBB->getNumSuccessors() < 2 &&
610 "block with multiple successors doesn't have a recipe as terminator");
611 return false;
612 }
613
614 const VPRecipeBase *R = &VPBB->back();
615 [[maybe_unused]] bool IsSwitch =
617 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
618 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
619 [[maybe_unused]] bool IsCondBranch =
622 if (VPBB->getNumSuccessors() == 2 ||
623 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
624 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
625 "block with multiple successors not terminated by "
626 "conditional branch nor switch recipe");
627
628 return true;
629 }
630
631 if (VPBB->getNumSuccessors() > 2) {
632 assert((IsSwitch || IsBranchOnTwoConds) &&
633 "block with more than 2 successors not terminated by a switch or "
634 "branch-on-two-conds recipe");
635 return true;
636 }
637
638 assert(
639 !IsCondBranch && !IsBranchOnTwoConds &&
640 "block with 0 or 1 successors terminated by conditional branch recipe");
641 return false;
642}
643
645 if (hasConditionalTerminator(this))
646 return &back();
647 return nullptr;
648}
649
651 if (hasConditionalTerminator(this))
652 return &back();
653 return nullptr;
654}
655
657 return getParent() && getParent()->getExitingBasicBlock() == this;
658}
659
660#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
665
666void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
667 if (!hasSuccessors()) {
668 O << Indent << "No successors\n";
669 } else {
670 O << Indent << "Successor(s): ";
671 ListSeparator LS;
672 for (auto *Succ : getSuccessors())
673 O << LS << Succ->getName();
674 O << '\n';
675 }
676}
677
678void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
679 VPSlotTracker &SlotTracker) const {
680 O << Indent << getName() << ":\n";
681
682 auto RecipeIndent = Indent + " ";
683 for (const VPRecipeBase &Recipe : *this) {
684 Recipe.print(O, RecipeIndent, SlotTracker);
685 O << '\n';
686 }
687
688 printSuccessors(O, Indent);
689}
690#endif
691
692std::pair<VPBlockBase *, VPBlockBase *>
695 VPBlockBase *Exiting = nullptr;
696 bool InRegion = Entry->getParent();
697 // First, clone blocks reachable from Entry.
698 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
699 VPBlockBase *NewBB = BB->clone();
700 Old2NewVPBlocks[BB] = NewBB;
701 if (InRegion && BB->getNumSuccessors() == 0) {
702 assert(!Exiting && "Multiple exiting blocks?");
703 Exiting = BB;
704 }
705 }
706 assert((!InRegion || Exiting) && "regions must have a single exiting block");
707
708 // Second, update the predecessors & successors of the cloned blocks.
709 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
710 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
712 for (VPBlockBase *Pred : BB->getPredecessors()) {
713 NewPreds.push_back(Old2NewVPBlocks[Pred]);
714 }
715 NewBB->setPredecessors(NewPreds);
717 for (VPBlockBase *Succ : BB->successors()) {
718 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
719 }
720 NewBB->setSuccessors(NewSuccs);
721 }
722
723#if !defined(NDEBUG)
724 // Verify that the order of predecessors and successors matches in the cloned
725 // version.
726 for (const auto &[OldBB, NewBB] :
728 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
729 for (const auto &[OldPred, NewPred] :
730 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
731 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
732
733 for (const auto &[OldSucc, NewSucc] :
734 zip(OldBB->successors(), NewBB->successors()))
735 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
736 }
737#endif
738
739 return std::make_pair(Old2NewVPBlocks[Entry],
740 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
741}
742
743VPRegionBlock *VPRegionBlock::clone() {
744 const auto &[NewEntry, NewExiting] = VPBlockUtils::cloneFrom(getEntry());
745 VPlan &Plan = *getPlan();
746 VPRegionValue *CanIV = getCanonicalIV();
747 VPRegionBlock *NewRegion =
748 CanIV ? Plan.createLoopRegion(CanIV->getType(), CanIV->getDebugLoc(),
749 getName(), NewEntry, NewExiting)
750 : Plan.createReplicateRegion(NewEntry, NewExiting, getName());
751
752 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
753 Block->setParent(NewRegion);
754 return NewRegion;
755}
756
759 "Loop regions should have been lowered to plain CFG");
760 assert(!State->Lane && "Replicating a Region with non-null instance.");
761 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
762
764 Entry);
765 State->Lane = VPLane(0);
766 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
767 State->Lane = VPLane(Lane, VPLane::Kind::First);
768 // Visit the VPBlocks connected to \p this, starting from it.
769 for (VPBlockBase *Block : RPOT) {
770 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
771 Block->execute(State);
772 }
773 }
774
775 // Exit replicating mode.
776 State->Lane.reset();
777}
778
781 for (VPRecipeBase &R : Recipes)
782 Cost += R.cost(VF, Ctx);
783 return Cost;
784}
785
786const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
787 const VPBlockBase *Pred = nullptr;
788 if (hasPredecessors()) {
789 Pred = getPredecessors()[Idx];
790 } else {
791 auto *Region = getParent();
792 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
793 "must be in the entry block of a non-replicate region");
794 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
795 "loop region has a single predecessor (preheader), its entry block "
796 "has 2 incoming blocks");
797
798 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
799 // region itself whose exiting block feeds the phi across the backedge.
800 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
801 }
802 return Pred->getExitingBasicBlock();
803}
804
806 if (!isReplicator()) {
807 // Neglect the cost of canonical IV, matching the legacy cost model.
810 Cost += Block->cost(VF, Ctx);
811 InstructionCost BackedgeCost =
812 ForceTargetInstructionCost.getNumOccurrences()
814 : Ctx.TTI.getCFInstrCost(Instruction::UncondBr, Ctx.CostKind);
815 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
816 << ": vector loop backedge\n");
817 Cost += BackedgeCost;
818 return Cost;
819 }
820
821 // Compute the cost of a replicate region. Replicating isn't supported for
822 // scalable vectors, return an invalid cost for them.
823 // TODO: Discard scalable VPlans with replicate recipes earlier after
824 // construction.
825 if (VF.isScalable())
827
828 // Compute and return the cost of the conditionally executed recipes.
829 assert(VF.isVector() && "Can only compute vector cost at the moment.");
831 return Then->cost(VF, Ctx);
832}
833
834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
836 VPSlotTracker &SlotTracker) const {
837 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
838 auto NewIndent = Indent + " ";
839 if (auto *CanIV = getCanonicalIV()) {
840 O << '\n';
841 CanIV->print(O, SlotTracker);
842 O << " = CANONICAL-IV\n";
843 }
844 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
845 O << '\n';
846 BlockBase->print(O, NewIndent, SlotTracker);
847 }
848 O << Indent << "}\n";
849
850 printSuccessors(O, Indent);
851}
852#endif
853
855 auto *Header = cast<VPBasicBlock>(getEntry());
856 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
857 auto *CanIV = getCanonicalIV();
858 if (CanIV->getNumUsers() > 0) {
859 VPlan &Plan = *getPlan();
860 auto *Zero = Plan.getZero(CanIV->getType());
861 DebugLoc DL = CanIV->getDebugLoc();
863 VPBuilder HeaderBuilder(Header, Header->begin());
864 auto *ScalarR =
865 HeaderBuilder.createScalarPhi({Zero, CanIVInc}, DL, "index");
866 CanIV->replaceAllUsesWith(ScalarR);
867 }
868
869 VPBlockBase *Preheader = getSinglePredecessor();
870 VPBlockUtils::disconnectBlocks(Preheader, this);
871
872 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
873 VPB->setParent(getParent());
874
875 VPBlockUtils::connectBlocks(Preheader, Header);
876 VPBlockUtils::transferSuccessors(this, ExitingLatch);
877 VPBlockUtils::connectBlocks(ExitingLatch, Header);
878}
879
881 // TODO: Represent the increment as VPRegionValue as well.
882 VPRegionValue *CanIV = getCanonicalIV();
883 assert(CanIV && "Expected a canonical IV");
884
885 if (auto *Inc = vputils::findCanonicalIVIncrement(*getPlan()))
886 return Inc;
887
888 assert(!getPlan()->getVFxUF().isMaterialized() &&
889 "VFxUF can be used only before it is materialized.");
890 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
891 return VPBuilder(ExitingLatch->getTerminator())
892 .createOverflowingOp(Instruction::Add, {CanIV, &getPlan()->getVFxUF()},
893 {hasCanonicalIVNUW(), /* HasNSW */ false},
894 CanIV->getDebugLoc(), "index.next");
895}
896
897VPlan::VPlan(Loop *L) {
898 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
899 ScalarHeader = createVPIRBasicBlock(L->getHeader());
900
901 SmallVector<BasicBlock *> IRExitBlocks;
902 L->getUniqueExitBlocks(IRExitBlocks);
903 for (BasicBlock *EB : IRExitBlocks)
904 ExitBlocks.push_back(createVPIRBasicBlock(EB));
905}
906
908 VPSymbolicValue DummyValue;
909
910 for (auto *VPB : CreatedBlocks) {
911 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
912 // Replace all operands of recipes and all VPValues defined in VPBB with
913 // DummyValue so the block can be deleted.
914 for (VPRecipeBase &R : *VPBB) {
915 for (auto *Def : R.definedValues())
916 Def->replaceAllUsesWith(&DummyValue);
917
918 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
919 R.setOperand(I, &DummyValue);
920 }
921 } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
922 CanIV->replaceAllUsesWith(&DummyValue);
923 }
924
925 delete VPB;
926 }
927 for (VPValue *VPV : getLiveIns())
928 delete VPV;
929 delete BackedgeTakenCount;
930}
931
933 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
934 return VPIRBB->getIRBasicBlock() == IRBB;
935 });
936 assert(Iter != getExitBlocks().end() && "no exit block found");
937 return *Iter;
938}
939
941 return is_contained(ExitBlocks, VPBB);
942}
943
944/// To make RUN_VPLAN_PASS print final VPlan.
945static void printFinalVPlan(VPlan &) {}
946
947/// Generate the code inside the preheader and body of the vectorized loop.
948/// Assumes a single pre-header basic-block was created for this. Introduce
949/// additional basic-blocks as needed, and fill them all.
951 // Initialize CFG state.
952 State->CFG.PrevVPBB = nullptr;
953 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
954
955 // Update VPDominatorTree since VPBasicBlock may be removed after State was
956 // constructed.
957 State->VPDT.recalculate(*this);
958
959 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
960 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
961 cast<UncondBrInst>(VectorPreHeader->getTerminator())->setSuccessor(nullptr);
962 State->CFG.DTU.applyUpdates(
963 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
964
965 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
966 << ", UF=" << getConcreteUF() << '\n');
967 setName("Final VPlan");
968 // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
969 // VPlans after some specific stages when "-debug" is specified, but that
970 // hasn't been implemented yet. For now, just do both:
971 LLVM_DEBUG(dump());
973
974 BasicBlock *ScalarPh = State->CFG.ExitBB;
975 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
976 if (ScalarPhVPBB) {
977 // Disconnect scalar preheader and scalar header, as the dominator tree edge
978 // will be updated as part of VPlan execution. This allows keeping the DTU
979 // logic generic during VPlan execution.
980 State->CFG.DTU.applyUpdates(
981 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
982 }
984 Entry);
985 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
986 // successor blocks including the middle, exit and scalar preheader blocks.
987 for (VPBlockBase *Block : RPOT)
988 Block->execute(State);
989
990 if (hasEarlyExit()) {
991 // Fix up LoopInfo for extra dispatch blocks when vectorizing loops with
992 // early exits. For dispatch blocks, we need to find the smallest common
993 // loop of all successors that are in a loop. Note: we only need to update
994 // loop info for blocks after the middle block, but there is no easy way to
995 // get those at this point.
996 for (VPBlockBase *VPB : reverse(RPOT)) {
997 auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
998 if (!VPBB || isa<VPIRBasicBlock>(VPBB))
999 continue;
1000 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
1001 Loop *L = State->LI->getLoopFor(BB);
1002 if (!L || any_of(successors(BB),
1003 [L](BasicBlock *Succ) { return L->contains(Succ); }))
1004 continue;
1005 // Find the innermost loop containing all successors that are in a loop.
1006 // Successors not in any loop don't constrain the target loop.
1007 Loop *Target = nullptr;
1008 for (BasicBlock *Succ : successors(BB)) {
1009 Loop *SuccLoop = State->LI->getLoopFor(Succ);
1010 if (!SuccLoop)
1011 continue;
1012 if (!Target)
1013 Target = SuccLoop;
1014 else
1015 Target = State->LI->getSmallestCommonLoop(Target, SuccLoop);
1016 }
1017 State->LI->removeBlock(BB);
1018 if (Target)
1019 Target->addBasicBlockToLoop(BB, *State->LI);
1020 }
1021 }
1022
1023 // If the original loop is unreachable, delete it and all its blocks.
1024 if (!ScalarPhVPBB) {
1025 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
1026 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
1027 // IR instructions.
1028 for (VPIRBasicBlock *EB : getExitBlocks()) {
1029 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
1030 if (R.getNumOperands() == 1)
1031 R.eraseFromParent();
1032 }
1033 }
1034
1035 Loop *OrigLoop =
1036 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
1037 auto Blocks = OrigLoop->getBlocksVector();
1038 Blocks.push_back(ScalarPh);
1039 while (!OrigLoop->isInnermost())
1040 State->LI->erase(*OrigLoop->begin());
1041 State->LI->erase(OrigLoop);
1042 for (auto *BB : Blocks)
1043 State->LI->removeBlock(BB);
1044 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
1045 }
1046
1047 State->CFG.DTU.flush();
1048
1049 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
1050 if (!Header)
1051 return;
1052
1053 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
1054 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1055
1056 // Fix the latch value of canonical, reduction and first-order recurrences
1057 // phis in the vector loop.
1058 for (VPRecipeBase &R : Header->phis()) {
1059 // Skip phi-like recipes that generate their backedege values themselves.
1060 if (isa<VPWidenPHIRecipe>(&R))
1061 continue;
1062
1063 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1064 // VPInstructions currently model scalar Phis only.
1065 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1067 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1068
1069 Value *Phi = State->get(PhiR, NeedsScalar);
1070 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1071 // not.
1072 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1073 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1074 }
1075}
1076
1078 // For now only return the cost of the vector loop region, ignoring any other
1079 // blocks, like the preheader or middle blocks, expect for checking them for
1080 // recipes with invalid costs.
1082
1083 // If the cost of the loop region is invalid or any recipe in the skeleton
1084 // outside loop regions are invalid return an invalid cost.
1087 [&VF, &Ctx](VPBasicBlock *VPBB) {
1088 return !VPBB->cost(VF, Ctx).isValid();
1089 }))
1091
1092 return Cost;
1093}
1094
1096 // TODO: Cache if possible.
1098 if (auto *R = dyn_cast<VPRegionBlock>(B))
1099 return R->isReplicator() ? nullptr : R;
1100 return nullptr;
1101}
1102
1105 if (auto *R = dyn_cast<VPRegionBlock>(B))
1106 return R->isReplicator() ? nullptr : R;
1107 return nullptr;
1108}
1109
1110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1113
1114 if (VF.getNumUsers() > 0) {
1115 O << "\nLive-in ";
1116 VF.printAsOperand(O, SlotTracker);
1117 O << " = VF";
1118 }
1119
1120 if (UF.getNumUsers() > 0) {
1121 O << "\nLive-in ";
1122 UF.printAsOperand(O, SlotTracker);
1123 O << " = UF";
1124 }
1125
1126 if (VFxUF.getNumUsers() > 0) {
1127 O << "\nLive-in ";
1128 VFxUF.printAsOperand(O, SlotTracker);
1129 O << " = VF * UF";
1130 }
1131
1132 if (VectorTripCount.getNumUsers() > 0) {
1133 O << "\nLive-in ";
1134 VectorTripCount.printAsOperand(O, SlotTracker);
1135 O << " = vector-trip-count";
1136 }
1137
1138 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1139 O << "\nLive-in ";
1140 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1141 O << " = backedge-taken count";
1142 }
1143
1144 O << "\n";
1145 if (TripCount) {
1146 if (isa<VPIRValue>(TripCount))
1147 O << "Live-in ";
1148 TripCount->printAsOperand(O, SlotTracker);
1149 O << " = original trip-count";
1150 O << "\n";
1151 }
1152}
1153
1157
1158 O << "VPlan '" << getName() << "' {";
1159
1160 printLiveIns(O);
1161
1163 RPOT(getEntry());
1164 for (const VPBlockBase *Block : RPOT) {
1165 O << '\n';
1166 Block->print(O, "", SlotTracker);
1167 }
1168
1169 O << "}\n";
1170}
1171
1172std::string VPlan::getName() const {
1173 std::string Out;
1174 raw_string_ostream RSO(Out);
1175 RSO << Name << " for ";
1176 if (!VFs.empty()) {
1177 RSO << "VF={" << VFs[0];
1178 for (ElementCount VF : drop_begin(VFs))
1179 RSO << "," << VF;
1180 RSO << "},";
1181 }
1182
1183 if (UFs.empty()) {
1184 RSO << "UF>=1";
1185 } else {
1186 RSO << "UF={" << UFs[0];
1187 for (unsigned UF : drop_begin(UFs))
1188 RSO << "," << UF;
1189 RSO << "}";
1190 }
1191
1192 return Out;
1193}
1194
1197 VPlanPrinter Printer(O, *this);
1198 Printer.dump();
1199}
1200
1202void VPlan::dump() const { print(dbgs()); }
1203#endif
1204
1205static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1206 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1207 // Update the operands of all cloned recipes starting at NewEntry. This
1208 // traverses all reachable blocks. This is done in two steps, to handle cycles
1209 // in PHI recipes.
1211 OldDeepRPOT(Entry);
1213 NewDeepRPOT(NewEntry);
1214 // First, collect all mappings from old to new VPValues defined by cloned
1215 // recipes.
1216 for (const auto &[OldBB, NewBB] :
1219 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1220 "blocks must have the same number of recipes");
1221 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1222 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1223 "recipes must have the same number of operands");
1224 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1225 "recipes must define the same number of operands");
1226 for (const auto &[OldV, NewV] :
1227 zip(OldR.definedValues(), NewR.definedValues()))
1228 Old2NewVPValues[OldV] = NewV;
1229 }
1230 }
1231
1232 // Update all operands to use cloned VPValues.
1233 for (VPBasicBlock *NewBB :
1235 for (VPRecipeBase &NewR : *NewBB)
1236 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1237 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1238 NewR.setOperand(I, NewOp);
1239 }
1240 }
1241}
1242
1244 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1245 // Clone blocks.
1246 const auto &[NewEntry, __] = VPBlockUtils::cloneFrom(Entry);
1247
1248 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1249 VPIRBasicBlock *NewScalarHeader = nullptr;
1250 if (getScalarHeader()->hasPredecessors()) {
1251 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1252 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1253 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1254 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1255 }));
1256 } else {
1257 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1258 }
1259 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1260 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1261 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1262 for (VPIRValue *OldLiveIn : getLiveIns())
1263 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1264
1265 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1266 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1267 // else NewTripCount will be created and inserted into Old2NewVPValues when
1268 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1269
1270 if (auto *LoopRegion = getVectorLoopRegion()) {
1271 auto *OldCanIV = LoopRegion->getCanonicalIV();
1272 auto *NewCanIV = NewPlan->getVectorLoopRegion()->getCanonicalIV();
1273 assert(OldCanIV && NewCanIV &&
1274 "Loop regions of both plans must have canonical IVs.");
1275 Old2NewVPValues[OldCanIV] = NewCanIV;
1276 }
1277
1278 assert(none_of(Old2NewVPValues.keys(), IsaPred<VPSymbolicValue>) &&
1279 "All VPSymbolicValues must be handled below");
1280
1281 if (BackedgeTakenCount)
1282 NewPlan->BackedgeTakenCount = new VPSymbolicValue();
1283
1284 // Map and propagate materialized state for symbolic values.
1285 for (auto [OldSV, NewSV] :
1286 {std::pair{&VectorTripCount, &NewPlan->VectorTripCount},
1287 {&VF, &NewPlan->VF},
1288 {&UF, &NewPlan->UF},
1289 {&VFxUF, &NewPlan->VFxUF},
1290 {BackedgeTakenCount, NewPlan->BackedgeTakenCount}}) {
1291 if (!OldSV)
1292 continue;
1293 Old2NewVPValues[OldSV] = NewSV;
1294 if (OldSV->isMaterialized())
1295 NewSV->markMaterialized();
1296 }
1297
1298 remapOperands(Entry, NewEntry, Old2NewVPValues);
1299
1300 // Initialize remaining fields of cloned VPlan.
1301 NewPlan->VFs = VFs;
1302 NewPlan->UFs = UFs;
1303 // TODO: Adjust names.
1304 NewPlan->Name = Name;
1305 if (TripCount) {
1306 assert(Old2NewVPValues.contains(TripCount) &&
1307 "TripCount must have been added to Old2NewVPValues");
1308 NewPlan->TripCount = Old2NewVPValues[TripCount];
1309 }
1310
1311 // Transfer all cloned blocks (the second half of all current blocks) from
1312 // current to new VPlan.
1313 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1314 for (unsigned I :
1315 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1316 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1317 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1318
1319 // Update ExitBlocks of the new plan.
1320 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1321 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1322 VPB != NewScalarHeader)
1323 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1324 }
1325
1326 return NewPlan;
1327}
1328
1330 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1331 CreatedBlocks.push_back(VPIRBB);
1332 return VPIRBB;
1333}
1334
1336 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1337 for (Instruction &I :
1338 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1339 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1340 return VPIRBB;
1341}
1342
1343#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1344
1345Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1346 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1347 Twine(getOrCreateBID(Block));
1348}
1349
1350Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1351 const std::string &Name = Block->getName();
1352 if (!Name.empty())
1353 return Name;
1354 return "VPB" + Twine(getOrCreateBID(Block));
1355}
1356
1358 Depth = 1;
1359 bumpIndent(0);
1360 OS << "digraph VPlan {\n";
1361 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1362 if (!Plan.getName().empty())
1363 OS << "\\n" << DOT::EscapeString(Plan.getName());
1364
1365 {
1366 // Print live-ins.
1367 std::string Str;
1368 raw_string_ostream SS(Str);
1369 Plan.printLiveIns(SS);
1371 StringRef(Str).rtrim('\n').split(Lines, "\n");
1372 for (auto Line : Lines)
1373 OS << DOT::EscapeString(Line.str()) << "\\n";
1374 }
1375
1376 OS << "\"]\n";
1377 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1378 OS << "edge [fontname=Courier, fontsize=30]\n";
1379 OS << "compound=true\n";
1380
1381 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1382 dumpBlock(Block);
1383
1384 OS << "}\n";
1385}
1386
1387void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1389 dumpBasicBlock(BasicBlock);
1391 dumpRegion(Region);
1392 else
1393 llvm_unreachable("Unsupported kind of VPBlock.");
1394}
1395
1396void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1397 bool Hidden, const Twine &Label) {
1398 // Due to "dot" we print an edge between two regions as an edge between the
1399 // exiting basic block and the entry basic of the respective regions.
1400 const VPBlockBase *Tail = From->getExitingBasicBlock();
1401 const VPBlockBase *Head = To->getEntryBasicBlock();
1402 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1403 OS << " [ label=\"" << Label << '\"';
1404 if (Tail != From)
1405 OS << " ltail=" << getUID(From);
1406 if (Head != To)
1407 OS << " lhead=" << getUID(To);
1408 if (Hidden)
1409 OS << "; splines=none";
1410 OS << "]\n";
1411}
1412
1413void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1414 auto &Successors = Block->getSuccessors();
1415 if (Successors.size() == 1)
1416 drawEdge(Block, Successors.front(), false, "");
1417 else if (Successors.size() == 2) {
1418 drawEdge(Block, Successors.front(), false, "T");
1419 drawEdge(Block, Successors.back(), false, "F");
1420 } else {
1421 unsigned SuccessorNumber = 0;
1422 for (auto *Successor : Successors)
1423 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1424 }
1425}
1426
1427void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1428 // Implement dot-formatted dump by performing plain-text dump into the
1429 // temporary storage followed by some post-processing.
1430 OS << Indent << getUID(BasicBlock) << " [label =\n";
1431 bumpIndent(1);
1432 std::string Str;
1433 raw_string_ostream SS(Str);
1434 // Use no indentation as we need to wrap the lines into quotes ourselves.
1435 BasicBlock->print(SS, "", SlotTracker);
1436
1437 // We need to process each line of the output separately, so split
1438 // single-string plain-text dump.
1440 StringRef(Str).rtrim('\n').split(Lines, "\n");
1441
1442 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1443 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1444 };
1445
1446 // Don't need the "+" after the last line.
1447 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1448 EmitLine(Line, " +\n");
1449 EmitLine(Lines.back(), "\n");
1450
1451 bumpIndent(-1);
1452 OS << Indent << "]\n";
1453
1454 dumpEdges(BasicBlock);
1455}
1456
1457void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1458 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1459 bumpIndent(1);
1460 OS << Indent << "fontname=Courier\n"
1461 << Indent << "label=\""
1462 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1463 << DOT::EscapeString(Region->getName()) << "\"\n";
1464
1465 if (auto *CanIV = Region->getCanonicalIV()) {
1466 OS << Indent << "\"";
1467 std::string Op;
1468 raw_string_ostream S(Op);
1469 CanIV->printAsOperand(S, SlotTracker);
1470 OS << DOT::EscapeString(Op);
1471 OS << " = CANONICAL-IV\"\n";
1472 }
1473
1474 // Dump the blocks of the region.
1475 assert(Region->getEntry() && "Region contains no inner blocks.");
1476 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1477 dumpBlock(Block);
1478 bumpIndent(-1);
1479 OS << Indent << "}\n";
1480 dumpEdges(Region);
1481}
1482
1483#endif
1484
1485/// Returns true if there is a vector loop region and \p VPV is defined in a
1486/// loop region.
1487static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1488 if (isa<VPRegionValue>(VPV))
1489 return true;
1490 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1491 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1493}
1494
1499 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1500 if (auto *SV = dyn_cast<VPSymbolicValue>(this))
1501 SV->markMaterialized();
1502}
1503
1505 VPValue *New,
1506 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1508 // Note that this early exit is required for correctness; the implementation
1509 // below relies on the number of users for this VPValue to decrease, which
1510 // isn't the case if this == New.
1511 if (this == New)
1512 return;
1513
1514 for (unsigned J = 0; J < getNumUsers();) {
1515 VPUser *User = Users[J];
1516 bool RemovedUser = false;
1517 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1518 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1519 continue;
1520
1521 RemovedUser = true;
1522 User->setOperand(I, New);
1523 }
1524 // If a user got removed after updating the current user, the next user to
1525 // update will be moved to the current position, so we only need to
1526 // increment the index if the number of users did not change.
1527 if (!RemovedUser)
1528 J++;
1529 }
1530}
1531
1533 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1534 if (getOperand(Idx) == From)
1535 setOperand(Idx, To);
1536 }
1537}
1538
1539#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1541 OS << Tracker.getOrCreateName(this);
1542}
1543
1546 Op->printAsOperand(O, SlotTracker);
1547 });
1548}
1549#endif
1550
1551void VPSlotTracker::assignName(const VPValue *V) {
1552 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1553 auto *UV = V->getUnderlyingValue();
1554 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1555 if (!UV && !(VPI && !VPI->getName().empty())) {
1556 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1557 NextSlot++;
1558 return;
1559 }
1560
1561 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1562 // appending ".Number" to the name if there are multiple uses.
1563 std::string Name;
1564 if (UV)
1565 Name = getName(UV);
1566 else
1567 Name = VPI->getName();
1568
1569 assert(!Name.empty() && "Name cannot be empty.");
1570 StringRef Prefix = UV ? "ir<" : "vp<%";
1571 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1572
1573 // First assign the base name for V.
1574 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1575 // Integer or FP constants with different types will result in the same string
1576 // due to stripping types.
1578 return;
1579
1580 // If it is already used by C > 0 other VPValues, increase the version counter
1581 // C and use it for V.
1582 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1583 if (!UseInserted) {
1584 C->second++;
1585 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1586 }
1587}
1588
1589void VPSlotTracker::assignNames(const VPlan &Plan) {
1590 if (Plan.VF.getNumUsers() > 0)
1591 assignName(&Plan.VF);
1592 if (Plan.UF.getNumUsers() > 0)
1593 assignName(&Plan.UF);
1594 if (Plan.VFxUF.getNumUsers() > 0)
1595 assignName(&Plan.VFxUF);
1596 assignName(&Plan.VectorTripCount);
1597 if (Plan.BackedgeTakenCount)
1598 assignName(Plan.BackedgeTakenCount);
1599 for (VPValue *LI : Plan.getLiveIns())
1600 assignName(LI);
1601
1602 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1603 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1604 for (const VPBlockBase *VPB : RPOT) {
1605 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB))
1606 assignNames(VPBB);
1607 else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
1608 assignName(CanIV);
1609 }
1610}
1611
1612void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1613 for (const VPRecipeBase &Recipe : *VPBB)
1614 for (VPValue *Def : Recipe.definedValues())
1615 assignName(Def);
1616}
1617
1618std::string VPSlotTracker::getName(const Value *V) {
1619 std::string Name;
1620 raw_string_ostream S(Name);
1621 if (V->hasName() || !isa<Instruction>(V)) {
1622 V->printAsOperand(S, false);
1623 return Name;
1624 }
1625
1626 if (!MST) {
1627 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1628 // instruction.
1629 auto *I = cast<Instruction>(V);
1630 // This check is required to support unit tests with incomplete IR.
1631 if (I->getParent()) {
1632 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1633 MST->incorporateFunction(*I->getFunction());
1634 } else {
1635 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1636 }
1637 }
1638 V->printAsOperand(S, false, *MST);
1639 return Name;
1640}
1641
1642std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1643 std::string Name = VPValue2Name.lookup(V);
1644 if (!Name.empty())
1645 return Name;
1646
1647 // If no name was assigned, no VPlan was provided when creating the slot
1648 // tracker or it is not reachable from the provided VPlan. This can happen,
1649 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1650 // in a debugger.
1651 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1652 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1653 // here.
1654 const VPRecipeBase *DefR = V->getDefiningRecipe();
1655 (void)DefR;
1656 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1657 "VPValue defined by a recipe in a VPlan?");
1658
1659 // Use the underlying value's name, if there is one.
1660 if (auto *UV = V->getUnderlyingValue()) {
1661 std::string Name;
1662 raw_string_ostream S(Name);
1663 UV->printAsOperand(S, false);
1664 return (Twine("ir<") + Name + ">").str();
1665 }
1666
1667 return "<badref>";
1668}
1669
1671 VPValue *TrueVal,
1672 VPValue *FalseVal, DebugLoc DL) {
1673 assert(VPTypeAnalysis(*getInsertBlock()->getPlan())
1674 .inferScalarType(ChainOp)
1675 ->isIntegerTy(1) &&
1676 "ChainOp must be i1 for AnyOf reduction");
1677 VPIRFlags Flags(RecurKind::Or, /*IsOrdered=*/false, /*IsInLoop=*/false,
1678 FastMathFlags());
1679 auto *OrReduce =
1681 auto *Freeze = createNaryOp(Instruction::Freeze, {OrReduce}, DL);
1682 return createSelect(Freeze, TrueVal, FalseVal, DL, "rdx.select");
1683}
1684
1686 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1687 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1688 bool PredicateAtRangeStart = Predicate(Range.Start);
1689
1690 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1691 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1692 Range.End = TmpVF;
1693 break;
1694 }
1695
1696 return PredicateAtRangeStart;
1697}
1698
1699/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1700/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1701/// of VF's starting at a given VF and extending it as much as possible. Each
1702/// vectorization decision can potentially shorten this sub-range during
1703/// buildVPlan().
1705 ElementCount MaxVF) {
1706 auto MaxVFTimes2 = MaxVF * 2;
1707 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1708 VFRange SubRange = {VF, MaxVFTimes2};
1709 if (auto Plan = tryToBuildVPlan(SubRange)) {
1711 // Update the name of the latch of the top-level vector loop region region
1712 // after optimizations which includes block folding.
1713 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1714 VPlans.push_back(std::move(Plan));
1715 }
1716 VF = SubRange.End;
1717 }
1718}
1719
1721 assert(count_if(VPlans,
1722 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1723 1 &&
1724 "Multiple VPlans for VF.");
1725
1726 for (const VPlanPtr &Plan : VPlans) {
1727 if (Plan->hasVF(VF))
1728 return *Plan.get();
1729 }
1730 llvm_unreachable("No plan found!");
1731}
1732
1735 // Reserve first location for self reference to the LoopID metadata node.
1736 MDs.push_back(nullptr);
1737 bool IsUnrollMetadata = false;
1738 MDNode *LoopID = L->getLoopID();
1739 if (LoopID) {
1740 // First find existing loop unrolling disable metadata.
1741 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1742 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1743 if (MD) {
1744 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1745 if (!S)
1746 continue;
1747 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1748 continue;
1749 IsUnrollMetadata =
1750 S->getString().starts_with("llvm.loop.unroll.disable");
1751 }
1752 MDs.push_back(LoopID->getOperand(I));
1753 }
1754 }
1755
1756 if (!IsUnrollMetadata) {
1757 // Add runtime unroll disable metadata.
1758 LLVMContext &Context = L->getHeader()->getContext();
1759 SmallVector<Metadata *, 1> DisableOperands;
1760 DisableOperands.push_back(
1761 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1762 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1763 MDs.push_back(DisableNode);
1764 MDNode *NewLoopID = MDNode::get(Context, MDs);
1765 // Set operand 0 to refer to the loop id itself.
1766 NewLoopID->replaceOperandWith(0, NewLoopID);
1767 L->setLoopID(NewLoopID);
1768 }
1769}
1770
1772 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1773 bool VectorizingEpilogue, MDNode *OrigLoopID,
1774 std::optional<unsigned> OrigAverageTripCount,
1775 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1776 bool DisableRuntimeUnroll) {
1777 // Update the metadata of the scalar loop. Skip the update when vectorizing
1778 // the epilogue loop to ensure it is updated only once. Also skip the update
1779 // when the scalar loop became unreachable.
1780 auto *ScalarPH = Plan.getScalarPreheader();
1781 if (ScalarPH && !VectorizingEpilogue) {
1782 std::optional<MDNode *> RemainderLoopID =
1785 if (RemainderLoopID) {
1786 OrigLoop->setLoopID(*RemainderLoopID);
1787 } else {
1788 if (DisableRuntimeUnroll)
1790
1791 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1792 Hints.setAlreadyVectorized();
1793 }
1794 }
1795
1796 if (!VectorLoop)
1797 return;
1798
1799 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1800 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1802 VectorLoop->setLoopID(*VectorizedLoopID);
1803 } else {
1804 // Keep all loop hints from the original loop on the vector loop (we'll
1805 // replace the vectorizer-specific hints below).
1806 if (OrigLoopID)
1807 VectorLoop->setLoopID(OrigLoopID);
1808
1809 if (!VectorizingEpilogue) {
1810 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1811 Hints.setAlreadyVectorized();
1812 }
1813 }
1815 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1816 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1818
1819 // Set/update profile weights for the vector and remainder loops as original
1820 // loop iterations are now distributed among them. Note that original loop
1821 // becomes the scalar remainder loop after vectorization.
1822 //
1823 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1824 // end up getting slightly roughened result but that should be OK since
1825 // profile is not inherently precise anyway. Note also possible bypass of
1826 // vector code caused by legality checks is ignored, assigning all the weight
1827 // to the vector loop, optimistically.
1828 //
1829 // For scalable vectorization we can't know at compile time how many
1830 // iterations of the loop are handled in one vector iteration, so instead
1831 // use the value of vscale used for tuning.
1832 unsigned AverageVectorTripCount = 0;
1833 unsigned RemainderAverageTripCount = 0;
1834 auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
1835 auto IsProfiled = EC && EC->getCount();
1836 if (!OrigAverageTripCount) {
1837 if (!IsProfiled)
1838 return;
1839 auto &SE = *PSE.getSE();
1840 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1841 if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
1842 return;
1843 if (ScalarPH)
1844 RemainderAverageTripCount =
1845 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1846 // Setting to 1 should be sufficient to generate the correct branch weights.
1847 OrigLoopInvocationWeight = 1;
1848 } else {
1849 // Calculate number of iterations in unrolled loop.
1850 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1851 // Calculate number of iterations for remainder loop.
1852 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1853 }
1854 if (HeaderVPBB) {
1855 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1856 OrigLoopInvocationWeight);
1857 }
1858
1859 if (ScalarPH) {
1860 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1861 OrigLoopInvocationWeight);
1862 }
1863}
1864
1865#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1867 if (VPlans.empty()) {
1868 O << "LV: No VPlans built.\n";
1869 return;
1870 }
1871 for (const auto &Plan : VPlans)
1873 Plan->printDOT(O);
1874 else
1875 Plan->print(O);
1876}
1877#endif
1878
1879bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1881 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1882 unsigned WideSize = C->getBitWidth();
1883 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1884 ? TruncatedVal.sext(WideSize)
1885 : TruncatedVal.zext(WideSize);
1886 return ExtendedVal == *C;
1887}
1888
1891 if (auto *IRV = dyn_cast<VPIRValue>(V))
1892 return TTI::getOperandInfo(IRV->getValue());
1893
1894 return {};
1895}
1896
1898 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1899 TTI::VectorInstrContext VIC, bool AlwaysIncludeReplicatingR) {
1900 if (VF.isScalar())
1901 return 0;
1902
1903 assert(!VF.isScalable() &&
1904 "Scalarization overhead not supported for scalable vectors");
1905
1906 InstructionCost ScalarizationCost = 0;
1907 // Compute the cost of scalarizing the result if needed.
1908 if (!ResultTy->isVoidTy()) {
1909 for (Type *VectorTy :
1910 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1911 ScalarizationCost += TTI.getScalarizationOverhead(
1913 /*Insert=*/true, /*Extract=*/false, CostKind,
1914 /*ForPoisonSrc=*/true, {}, VIC);
1915 }
1916 }
1917 // Compute the cost of scalarizing the operands, skipping ones that do not
1918 // require extraction/scalarization and do not incur any overhead.
1919 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1921 for (auto *Op : Operands) {
1922 if (isa<VPIRValue>(Op) ||
1923 (!AlwaysIncludeReplicatingR &&
1926 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1927 !UniqueOperands.insert(Op).second)
1928 continue;
1929 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1930 }
1931 return ScalarizationCost +
1932 TTI.getOperandsScalarizationOverhead(Tys, CostKind, VIC);
1933}
1934
1936 ElementCount VF) {
1937 const Instruction *UI = R->getUnderlyingInstr();
1938 if (isa<LoadInst>(UI))
1939 return true;
1940 assert(isa<StoreInst>(UI) && "R must either be a load or store");
1941
1942 if (!NumPredStores) {
1943 // Count the number of predicated stores in the VPlan, caching the result.
1944 // Only stores where scatter is not legal are counted, matching the legacy
1945 // cost model behavior.
1946 const VPlan &Plan = *R->getParent()->getPlan();
1947 NumPredStores = 0;
1948 for (const VPRegionBlock *VPRB :
1951 assert(VPRB->isReplicator() && "must only contain replicate regions");
1952 for (const VPBasicBlock *VPBB :
1954 vp_depth_first_shallow(VPRB->getEntry()))) {
1955 for (const VPRecipeBase &Recipe : *VPBB) {
1956 auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
1957 if (!RepR)
1958 continue;
1959 if (!isa<StoreInst>(RepR->getUnderlyingInstr()))
1960 continue;
1961 // Check if scatter is legal for this store. If so, don't count it.
1962 Type *Ty = Types.inferScalarType(RepR->getOperand(0));
1963 auto *VTy = VectorType::get(Ty, VF);
1964 const Align Alignment =
1965 getLoadStoreAlignment(RepR->getUnderlyingInstr());
1966 if (!TTI.isLegalMaskedScatter(VTy, Alignment))
1967 ++(*NumPredStores);
1968 }
1969 }
1970 }
1971 }
1973}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
amdgpu next use AMDGPU Next Use Analysis Printer
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
cl::opt< unsigned > NumberOfStoresToPredicate("vectorize-num-stores-pred", cl::init(1), cl::Hidden, cl::desc("Max number of stores to be predicated behind an if."))
The number of stores in a loop that are allowed to need predication.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS(PASS,...)
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1733
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:157
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
Definition VPlan.cpp:945
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:588
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:61
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1487
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:606
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:62
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1205
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:64
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1027
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
iterator begin() const
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1720
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1771
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1704
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1685
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1866
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:547
Metadata node.
Definition Metadata.h:1080
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:832
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4160
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4235
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4187
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:493
iterator end()
Definition VPlan.h:4197
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4195
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:544
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:779
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:786
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:393
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:598
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:565
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4175
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:551
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:678
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:656
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:644
const VPRecipeBase & back() const
Definition VPlan.h:4209
bool empty() const
Definition VPlan.h:4206
size_t size() const
Definition VPlan.h:4205
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:319
VPRegionBlock * getParent()
Definition VPlan.h:190
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:202
void setName(const Twine &newName)
Definition VPlan.h:183
size_t getNumSuccessors() const
Definition VPlan.h:241
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:223
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:221
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:666
size_t getNumPredecessors() const
Definition VPlan.h:242
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:310
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:224
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:219
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
VPlan * getPlan()
Definition VPlan.cpp:177
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:196
const std::string & getName() const
Definition VPlan.h:181
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:237
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:261
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:167
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:216
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:283
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:174
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:222
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:240
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:276
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:260
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Clone the CFG for all nodes reachable from Entry, including cloning the blocks and their recipes.
Definition VPlan.cpp:693
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1670
VPBasicBlock * getInsertBlock() const
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:432
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4313
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4337
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:688
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1223
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1270
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:87
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:116
VPBasicBlock * getParent()
Definition VPlan.h:480
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()
Definition VPlan.cpp:149
friend class VPValue
Definition VPlanValue.h:304
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4370
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:743
const VPBlockBase * getEntry() const
Definition VPlan.h:4414
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:854
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4446
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
Definition VPlan.cpp:880
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:805
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:835
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4495
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:757
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4482
const VPBlockBase * getExiting() const
Definition VPlan.h:4426
friend class VPlan
Definition VPlan.h:4371
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
Type * getType() const
Returns the type of the VPRegionValue.
Definition VPlanValue.h:225
DebugLoc getDebugLoc() const
Returns the debug location of the VPRegionValue.
Definition VPlanValue.h:228
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3196
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1642
An analysis for type-inference for VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:329
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1532
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1544
operand_range operands()
Definition VPlanValue.h:397
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:373
unsigned getNumOperands() const
Definition VPlanValue.h:367
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:368
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1495
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1540
friend class VPRecipeValue
Definition VPlanValue.h:52
void assertNotMaterialized() const
Assert that this VPValue has not been materialized, if it is a VPSymbolicValue.
Definition VPlanValue.h:501
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
@ VPVRecipeValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:84
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:108
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:101
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1498
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1504
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1357
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4518
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1196
friend class VPSlotTracker
Definition VPlan.h:4520
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1172
VPBasicBlock * getEntry()
Definition VPlan.h:4610
void setName(const Twine &newName)
Definition VPlan.h:4778
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:932
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:907
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:940
friend class VPlanPrinter
Definition VPlan.h:4519
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4708
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1329
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4837
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4659
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1095
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4905
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1077
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4760
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4599
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4860
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1335
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1202
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4649
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:950
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1155
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4655
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1111
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1243
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
bool match(Val *V, const Pattern &P)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2313
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1879
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
@ Or
Bitwise or logical OR of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1890
std::optional< unsigned > NumPredStores
Number of predicated stores in the VPlan, computed on demand.
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1897
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
bool useEmulatedMaskMemRefHack(const VPReplicateRecipe *R, ElementCount VF)
Returns true if an artificially high cost for emulated masked memrefs should be used.
Definition VPlan.cpp:1935
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:141
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:279
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:239
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:361
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:339
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...