LLVM 22.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55/// @{
56/// Metadata attribute names
57const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
59 "llvm.loop.vectorize.followup_vectorized";
61 "llvm.loop.vectorize.followup_epilogue";
62/// @}
63
65
67 "vplan-print-in-dot-format", cl::Hidden,
68 cl::desc("Use dot format instead of plain text when dumping VPlans"));
69
70#define DEBUG_TYPE "loop-vectorize"
71
72#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
74 const VPBasicBlock *Parent = R.getParent();
75 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
76 R.print(OS, "", SlotTracker);
77 return OS;
78}
79#endif
80
82 const ElementCount &VF) const {
83 switch (LaneKind) {
85 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
86 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
87 Builder.getInt32(VF.getKnownMinValue() - Lane));
89 return Builder.getInt32(Lane);
90 }
91 llvm_unreachable("Unknown lane kind");
92}
93
94#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
96 if (const VPRecipeBase *R = getDefiningRecipe())
97 R->print(OS, "", SlotTracker);
98 else
100}
101
102void VPValue::dump() const {
103 const VPRecipeBase *Instr = getDefiningRecipe();
105 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
107 dbgs() << "\n";
108}
109
110void VPDef::dump() const {
111 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
113 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
114 print(dbgs(), "", SlotTracker);
115 dbgs() << "\n";
116}
117#endif
118
120 auto *DefValue = dyn_cast<VPRecipeValue>(this);
121 if (!DefValue)
122 return nullptr;
123 return cast<VPRecipeBase>(DefValue->Def);
124}
125
127 auto *DefValue = dyn_cast<VPRecipeValue>(this);
128 if (!DefValue)
129 return nullptr;
130 return cast<VPRecipeBase>(DefValue->Def);
131}
132
134 return cast<VPIRValue>(this)->getValue();
135}
136
138
140 : VPValue(VPVRecipeValueSC, UV), Def(Def) {
141 assert(Def && "VPRecipeValue requires a defining recipe");
142 Def->addDefinedValue(this);
143}
144
146 assert(Users.empty() &&
147 "trying to delete a VPRecipeValue with remaining users");
148 Def->removeDefinedValue(this);
149}
150
151// Get the top-most entry block of \p Start. This is the entry block of the
152// containing VPlan. This function is templated to support both const and non-const blocks
153template <typename T> static T *getPlanEntry(T *Start) {
154 T *Next = Start;
155 T *Current = Start;
156 while ((Next = Next->getParent()))
157 Current = Next;
158
159 SmallSetVector<T *, 8> WorkList;
160 WorkList.insert(Current);
161
162 for (unsigned i = 0; i < WorkList.size(); i++) {
163 T *Current = WorkList[i];
164 if (!Current->hasPredecessors())
165 return Current;
166 auto &Predecessors = Current->getPredecessors();
167 WorkList.insert_range(Predecessors);
168 }
169
170 llvm_unreachable("VPlan without any entry node without predecessors");
171}
172
173VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
174
175const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
176
177/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
184
191
192void VPBlockBase::setPlan(VPlan *ParentPlan) {
193 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
194 Plan = ParentPlan;
195}
196
197/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
199 const VPBlockBase *Block = this;
201 Block = Region->getExiting();
203}
204
211
213 if (!Successors.empty() || !Parent)
214 return this;
215 assert(Parent->getExiting() == this &&
216 "Block w/o successors not the exiting block of its parent.");
217 return Parent->getEnclosingBlockWithSuccessors();
218}
219
221 if (!Predecessors.empty() || !Parent)
222 return this;
223 assert(Parent->getEntry() == this &&
224 "Block w/o predecessors not the entry of its parent.");
225 return Parent->getEnclosingBlockWithPredecessors();
226}
227
229 iterator It = begin();
230 while (It != end() && It->isPhi())
231 It++;
232 return It;
233}
234
242
245 return Def->getUnderlyingValue();
246
247 if (hasScalarValue(Def, Lane))
248 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
249
250 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
252 return Data.VPV2Scalars[Def][0];
253 }
254
255 // Look through BuildVector to avoid redundant extracts.
256 // TODO: Remove once replicate regions are unrolled explicitly.
257 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
258 auto *BuildVector = cast<VPInstruction>(Def);
259 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
260 }
261
263 auto *VecPart = Data.VPV2Vector[Def];
264 if (!VecPart->getType()->isVectorTy()) {
265 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
266 return VecPart;
267 }
268 // TODO: Cache created scalar values.
269 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
270 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
271 // set(Def, Extract, Instance);
272 return Extract;
273}
274
275Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
276 if (NeedsScalar) {
277 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
279 (hasScalarValue(Def, VPLane(0)) &&
280 Data.VPV2Scalars[Def].size() == 1)) &&
281 "Trying to access a single scalar per part but has multiple scalars "
282 "per part.");
283 return get(Def, VPLane(0));
284 }
285
286 // If Values have been set for this Def return the one relevant for \p Part.
287 if (hasVectorValue(Def))
288 return Data.VPV2Vector[Def];
289
290 auto GetBroadcastInstrs = [this](Value *V) {
291 if (VF.isScalar())
292 return V;
293 // Broadcast the scalar into all locations in the vector.
294 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
295 return Shuf;
296 };
297
298 if (!hasScalarValue(Def, {0})) {
299 Value *IRV = Def->getLiveInIRValue();
300 Value *B = GetBroadcastInstrs(IRV);
301 set(Def, B);
302 return B;
303 }
304
305 Value *ScalarValue = get(Def, VPLane(0));
306 // If we aren't vectorizing, we can just copy the scalar map values over
307 // to the vector map.
308 if (VF.isScalar()) {
309 set(Def, ScalarValue);
310 return ScalarValue;
311 }
312
313 bool IsSingleScalar = vputils::isSingleScalar(Def);
314 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
315
316 // We need to construct the vector value for a single-scalar value by
317 // broadcasting the scalar to all lanes.
318 // TODO: Replace by introducing Broadcast VPInstructions.
319 assert(IsSingleScalar && "must be a single-scalar at this point");
320 // Set the insert point after the last scalarized instruction or after the
321 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
322 // will directly follow the scalar definitions.
323 auto OldIP = Builder.saveIP();
324 auto *LastInst = cast<Instruction>(get(Def, LastLane));
325 auto NewIP = isa<PHINode>(LastInst)
326 ? LastInst->getParent()->getFirstNonPHIIt()
327 : std::next(BasicBlock::iterator(LastInst));
328 Builder.SetInsertPoint(&*NewIP);
329 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
330 set(Def, VectorValue);
331 Builder.restoreIP(OldIP);
332 return VectorValue;
333}
334
336 const DILocation *DIL = DL;
337 // When a FSDiscriminator is enabled, we don't need to add the multiply
338 // factors to the discriminators.
339 if (DIL &&
340 Builder.GetInsertBlock()
341 ->getParent()
342 ->shouldEmitDebugInfoForProfiling() &&
344 // FIXME: For scalable vectors, assume vscale=1.
345 unsigned UF = Plan->getUF();
346 auto NewDIL =
347 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
348 if (NewDIL)
349 Builder.SetCurrentDebugLocation(*NewDIL);
350 else
351 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
352 << DIL->getFilename() << " Line: " << DIL->getLine());
353 } else
354 Builder.SetCurrentDebugLocation(DL);
355}
356
358 Value *WideValue,
359 const VPLane &Lane) {
360 Value *ScalarInst = get(Def, Lane);
361 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
362 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
363 // We must handle each element of a vectorized struct type.
364 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
365 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
366 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
367 VectorValue =
368 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
369 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
370 }
371 } else {
372 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
373 }
374 return WideValue;
375}
376
377BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
378 auto &CFG = State.CFG;
379 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
380 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
381 BasicBlock *PrevBB = CFG.PrevBB;
382 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
383 PrevBB->getParent(), CFG.ExitBB);
384 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
385
386 return NewBB;
387}
388
390 auto &CFG = State.CFG;
391 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
392
393 // Register NewBB in its loop. In innermost loops its the same for all
394 // BB's.
395 Loop *ParentLoop = State.CurrentParentLoop;
396 // If this block has a sole successor that is an exit block or is an exit
397 // block itself then it needs adding to the same parent loop as the exit
398 // block.
399 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
400 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
401 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
402 ParentLoop = State.LI->getLoopFor(
403 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
404 }
405
406 if (ParentLoop && !State.LI->getLoopFor(NewBB))
407 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
408
410 if (VPBlockUtils::isHeader(this, State.VPDT)) {
411 // There's no block for the latch yet, connect to the preheader only.
412 Preds = {getPredecessors()[0]};
413 } else {
414 Preds = to_vector(getPredecessors());
415 }
416
417 // Hook up the new basic block to its predecessors.
418 for (VPBlockBase *PredVPBlock : Preds) {
419 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
420 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
421 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
422 "Predecessor basic-block not found building successor.");
423 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
424 auto *PredBBTerminator = PredBB->getTerminator();
425 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
426
427 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
428 if (isa<UnreachableInst>(PredBBTerminator)) {
429 assert(PredVPSuccessors.size() == 1 &&
430 "Predecessor ending w/o branch must have single successor.");
431 DebugLoc DL = PredBBTerminator->getDebugLoc();
432 PredBBTerminator->eraseFromParent();
433 auto *Br = BranchInst::Create(NewBB, PredBB);
434 Br->setDebugLoc(DL);
435 } else if (TermBr && !TermBr->isConditional()) {
436 TermBr->setSuccessor(0, NewBB);
437 } else {
438 // Set each forward successor here when it is created, excluding
439 // backedges. A backward successor is set when the branch is created.
440 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
441 // in the original IR, except when the predecessor is the entry block.
442 // This enables including SCEV and memory runtime check blocks in VPlan.
443 // TODO: Remove exception by modeling the terminator of entry block using
444 // BranchOnCond.
445 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
446 assert((TermBr && (!TermBr->getSuccessor(idx) ||
447 (isa<VPIRBasicBlock>(this) &&
448 (TermBr->getSuccessor(idx) == NewBB ||
449 PredVPBlock == getPlan()->getEntry())))) &&
450 "Trying to reset an existing successor block.");
451 TermBr->setSuccessor(idx, NewBB);
452 }
453 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
454 }
455}
456
459 "VPIRBasicBlock can have at most two successors at the moment!");
460 // Move completely disconnected blocks to their final position.
461 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
462 IRBB->moveAfter(State->CFG.PrevBB);
463 State->Builder.SetInsertPoint(IRBB->getTerminator());
464 State->CFG.PrevBB = IRBB;
465 State->CFG.VPBB2IRBB[this] = IRBB;
466 executeRecipes(State, IRBB);
467 // Create a branch instruction to terminate IRBB if one was not created yet
468 // and is needed.
469 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
470 auto *Br = State->Builder.CreateBr(IRBB);
471 Br->setOperand(0, nullptr);
472 IRBB->getTerminator()->eraseFromParent();
473 } else {
474 assert(
475 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
476 "other blocks must be terminated by a branch");
477 }
478
479 connectToPredecessors(*State);
480}
481
482VPIRBasicBlock *VPIRBasicBlock::clone() {
483 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
484 for (VPRecipeBase &R : Recipes)
485 NewBlock->appendRecipe(R.clone());
486 return NewBlock;
487}
488
490 bool Replica = bool(State->Lane);
491 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
492
493 if (VPBlockUtils::isHeader(this, State->VPDT)) {
494 // Create and register the new vector loop.
495 Loop *PrevParentLoop = State->CurrentParentLoop;
496 State->CurrentParentLoop = State->LI->AllocateLoop();
497
498 // Insert the new loop into the loop nest and register the new basic blocks
499 // before calling any utilities such as SCEV that require valid LoopInfo.
500 if (PrevParentLoop)
501 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
502 else
503 State->LI->addTopLevelLoop(State->CurrentParentLoop);
504 }
505
506 auto IsReplicateRegion = [](VPBlockBase *BB) {
508 assert((!R || R->isReplicator()) &&
509 "only replicate region blocks should remain");
510 return R;
511 };
512 // 1. Create an IR basic block.
513 if ((Replica && this == getParent()->getEntry()) ||
514 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
515 // Reuse the previous basic block if the current VPBB is either
516 // * the entry to a replicate region, or
517 // * the exit of a replicate region.
518 State->CFG.VPBB2IRBB[this] = NewBB;
519 } else {
520 NewBB = createEmptyBasicBlock(*State);
521
522 State->Builder.SetInsertPoint(NewBB);
523 // Temporarily terminate with unreachable until CFG is rewired.
524 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
525 State->Builder.SetInsertPoint(Terminator);
526
527 State->CFG.PrevBB = NewBB;
528 State->CFG.VPBB2IRBB[this] = NewBB;
529 connectToPredecessors(*State);
530 }
531
532 // 2. Fill the IR basic block with IR instructions.
533 executeRecipes(State, NewBB);
534
535 // If this block is a latch, update CurrentParentLoop.
536 if (VPBlockUtils::isLatch(this, State->VPDT))
537 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
538}
539
540VPBasicBlock *VPBasicBlock::clone() {
541 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
542 for (VPRecipeBase &R : *this)
543 NewBlock->appendRecipe(R.clone());
544 return NewBlock;
545}
546
548 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
549 << " in BB: " << BB->getName() << '\n');
550
551 State->CFG.PrevVPBB = this;
552
553 for (VPRecipeBase &Recipe : Recipes) {
554 State->setDebugLocFrom(Recipe.getDebugLoc());
555 Recipe.execute(*State);
556 }
557
558 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
559}
560
561VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
562 assert((SplitAt == end() || SplitAt->getParent() == this) &&
563 "can only split at a position in the same block");
564
565 // Create new empty block after the block to split.
566 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
568
569 // Finally, move the recipes starting at SplitAt to new block.
570 for (VPRecipeBase &ToMove :
571 make_early_inc_range(make_range(SplitAt, this->end())))
572 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
573
574 return SplitBlock;
575}
576
577/// Return the enclosing loop region for region \p P. The templated version is
578/// used to support both const and non-const block arguments.
579template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
580 if (P && P->isReplicator()) {
581 P = P->getParent();
582 // Multiple loop regions can be nested, but replicate regions can only be
583 // nested inside a loop region or must be outside any other region.
584 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
585 }
586 return P;
587}
588
592
596
597static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
598 if (VPBB->empty()) {
599 assert(
600 VPBB->getNumSuccessors() < 2 &&
601 "block with multiple successors doesn't have a recipe as terminator");
602 return false;
603 }
604
605 const VPRecipeBase *R = &VPBB->back();
606 [[maybe_unused]] bool IsSwitch =
608 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
609 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
610 [[maybe_unused]] bool IsCondBranch =
613 if (VPBB->getNumSuccessors() == 2 ||
614 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
615 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
616 "block with multiple successors not terminated by "
617 "conditional branch nor switch recipe");
618
619 return true;
620 }
621
622 if (VPBB->getNumSuccessors() > 2) {
623 assert((IsSwitch || IsBranchOnTwoConds) &&
624 "block with more than 2 successors not terminated by a switch or "
625 "branch-on-two-conds recipe");
626 return true;
627 }
628
629 assert(
630 !IsCondBranch && !IsBranchOnTwoConds &&
631 "block with 0 or 1 successors terminated by conditional branch recipe");
632 return false;
633}
634
636 if (hasConditionalTerminator(this))
637 return &back();
638 return nullptr;
639}
640
642 if (hasConditionalTerminator(this))
643 return &back();
644 return nullptr;
645}
646
648 return getParent() && getParent()->getExitingBasicBlock() == this;
649}
650
651#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
656
657void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
658 if (getSuccessors().empty()) {
659 O << Indent << "No successors\n";
660 } else {
661 O << Indent << "Successor(s): ";
662 ListSeparator LS;
663 for (auto *Succ : getSuccessors())
664 O << LS << Succ->getName();
665 O << '\n';
666 }
667}
668
669void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
670 VPSlotTracker &SlotTracker) const {
671 O << Indent << getName() << ":\n";
672
673 auto RecipeIndent = Indent + " ";
674 for (const VPRecipeBase &Recipe : *this) {
675 Recipe.print(O, RecipeIndent, SlotTracker);
676 O << '\n';
677 }
678
679 printSuccessors(O, Indent);
680}
681#endif
682
683static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
684
685// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
686// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
687// Remapping of operands must be done separately. Returns a pair with the new
688// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
689// region, return nullptr for the exiting block.
690static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
692 VPBlockBase *Exiting = nullptr;
693 bool InRegion = Entry->getParent();
694 // First, clone blocks reachable from Entry.
695 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
696 VPBlockBase *NewBB = BB->clone();
697 Old2NewVPBlocks[BB] = NewBB;
698 if (InRegion && BB->getNumSuccessors() == 0) {
699 assert(!Exiting && "Multiple exiting blocks?");
700 Exiting = BB;
701 }
702 }
703 assert((!InRegion || Exiting) && "regions must have a single exiting block");
704
705 // Second, update the predecessors & successors of the cloned blocks.
706 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
707 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
709 for (VPBlockBase *Pred : BB->getPredecessors()) {
710 NewPreds.push_back(Old2NewVPBlocks[Pred]);
711 }
712 NewBB->setPredecessors(NewPreds);
714 for (VPBlockBase *Succ : BB->successors()) {
715 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
716 }
717 NewBB->setSuccessors(NewSuccs);
718 }
719
720#if !defined(NDEBUG)
721 // Verify that the order of predecessors and successors matches in the cloned
722 // version.
723 for (const auto &[OldBB, NewBB] :
725 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
726 for (const auto &[OldPred, NewPred] :
727 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
728 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
729
730 for (const auto &[OldSucc, NewSucc] :
731 zip(OldBB->successors(), NewBB->successors()))
732 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
733 }
734#endif
735
736 return std::make_pair(Old2NewVPBlocks[Entry],
737 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
738}
739
740VPRegionBlock *VPRegionBlock::clone() {
741 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
742 VPlan &Plan = *getPlan();
743 VPRegionBlock *NewRegion =
745 ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
746 : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
747
748 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
749 Block->setParent(NewRegion);
750 return NewRegion;
751}
752
755 "Loop regions should have been lowered to plain CFG");
756 assert(!State->Lane && "Replicating a Region with non-null instance.");
757 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
758
760 Entry);
761 State->Lane = VPLane(0);
762 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
763 State->Lane = VPLane(Lane, VPLane::Kind::First);
764 // Visit the VPBlocks connected to \p this, starting from it.
765 for (VPBlockBase *Block : RPOT) {
766 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
767 Block->execute(State);
768 }
769 }
770
771 // Exit replicating mode.
772 State->Lane.reset();
773}
774
777 for (VPRecipeBase &R : Recipes)
778 Cost += R.cost(VF, Ctx);
779 return Cost;
780}
781
782const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
783 const VPBlockBase *Pred = nullptr;
784 if (hasPredecessors()) {
785 Pred = getPredecessors()[Idx];
786 } else {
787 auto *Region = getParent();
788 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
789 "must be in the entry block of a non-replicate region");
790 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
791 "loop region has a single predecessor (preheader), its entry block "
792 "has 2 incoming blocks");
793
794 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
795 // region itself whose exiting block feeds the phi across the backedge.
796 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
797 }
798 return Pred->getExitingBasicBlock();
799}
800
802 if (!isReplicator()) {
805 Cost += Block->cost(VF, Ctx);
806 InstructionCost BackedgeCost =
807 ForceTargetInstructionCost.getNumOccurrences()
808 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
809 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
810 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
811 << ": vector loop backedge\n");
812 Cost += BackedgeCost;
813 return Cost;
814 }
815
816 // Compute the cost of a replicate region. Replicating isn't supported for
817 // scalable vectors, return an invalid cost for them.
818 // TODO: Discard scalable VPlans with replicate recipes earlier after
819 // construction.
820 if (VF.isScalable())
822
823 // Compute and return the cost of the conditionally executed recipes.
824 assert(VF.isVector() && "Can only compute vector cost at the moment.");
826 return Then->cost(VF, Ctx);
827}
828
829#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
831 VPSlotTracker &SlotTracker) const {
832 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
833 auto NewIndent = Indent + " ";
834 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
835 O << '\n';
836 BlockBase->print(O, NewIndent, SlotTracker);
837 }
838 O << Indent << "}\n";
839
840 printSuccessors(O, Indent);
841}
842#endif
843
845 auto *Header = cast<VPBasicBlock>(getEntry());
846 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
847 assert(this == getPlan()->getVectorLoopRegion() &&
848 "Canonical IV must be in the entry of the top-level loop region");
849 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
850 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
851 CanIV->getDebugLoc(), "index");
852 CanIV->replaceAllUsesWith(ScalarR);
853 CanIV->eraseFromParent();
854 }
855
856 VPBlockBase *Preheader = getSinglePredecessor();
857 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
858
859 VPBlockUtils::disconnectBlocks(Preheader, this);
860
861 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
862 VPB->setParent(getParent());
863
864 VPBlockUtils::connectBlocks(Preheader, Header);
865 VPBlockUtils::transferSuccessors(this, ExitingLatch);
866 VPBlockUtils::connectBlocks(ExitingLatch, Header);
867}
868
869VPlan::VPlan(Loop *L) {
870 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
871 ScalarHeader = createVPIRBasicBlock(L->getHeader());
872
873 SmallVector<BasicBlock *> IRExitBlocks;
874 L->getUniqueExitBlocks(IRExitBlocks);
875 for (BasicBlock *EB : IRExitBlocks)
876 ExitBlocks.push_back(createVPIRBasicBlock(EB));
877}
878
880 VPSymbolicValue DummyValue;
881
882 for (auto *VPB : CreatedBlocks) {
883 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
884 // Replace all operands of recipes and all VPValues defined in VPBB with
885 // DummyValue so the block can be deleted.
886 for (VPRecipeBase &R : *VPBB) {
887 for (auto *Def : R.definedValues())
888 Def->replaceAllUsesWith(&DummyValue);
889
890 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
891 R.setOperand(I, &DummyValue);
892 }
893 }
894 delete VPB;
895 }
896 for (VPValue *VPV : getLiveIns())
897 delete VPV;
898 delete BackedgeTakenCount;
899}
900
902 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
903 return VPIRBB->getIRBasicBlock() == IRBB;
904 });
905 assert(Iter != getExitBlocks().end() && "no exit block found");
906 return *Iter;
907}
908
910 return is_contained(ExitBlocks, VPBB);
911}
912
913/// Generate the code inside the preheader and body of the vectorized loop.
914/// Assumes a single pre-header basic-block was created for this. Introduce
915/// additional basic-blocks as needed, and fill them all.
917 // Initialize CFG state.
918 State->CFG.PrevVPBB = nullptr;
919 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
920
921 // Update VPDominatorTree since VPBasicBlock may be removed after State was
922 // constructed.
923 State->VPDT.recalculate(*this);
924
925 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
926 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
927 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
928 State->CFG.DTU.applyUpdates(
929 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
930
931 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
932 << ", UF=" << getUF() << '\n');
933 setName("Final VPlan");
934 LLVM_DEBUG(dump());
935
936 BasicBlock *ScalarPh = State->CFG.ExitBB;
937 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
938 if (ScalarPhVPBB->hasPredecessors()) {
939 // Disconnect scalar preheader and scalar header, as the dominator tree edge
940 // will be updated as part of VPlan execution. This allows keeping the DTU
941 // logic generic during VPlan execution.
942 State->CFG.DTU.applyUpdates(
943 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
944 }
946 Entry);
947 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
948 // successor blocks including the middle, exit and scalar preheader blocks.
949 for (VPBlockBase *Block : RPOT)
950 Block->execute(State);
951
952 // If the original loop is unreachable, delete it and all its blocks.
953 if (!ScalarPhVPBB->hasPredecessors()) {
954 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
955 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
956 // IR instructions.
957 for (VPIRBasicBlock *EB : getExitBlocks()) {
958 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
959 if (R.getNumOperands() == 1)
960 R.eraseFromParent();
961 }
962 }
963
964 Loop *OrigLoop =
965 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
966 auto Blocks = OrigLoop->getBlocksVector();
967 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
968 for (auto *BB : Blocks)
969 State->LI->removeBlock(BB);
970 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
971 State->LI->erase(OrigLoop);
972 }
973
974 State->CFG.DTU.flush();
975
976 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
977 if (!Header)
978 return;
979
980 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
981 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
982
983 // Fix the latch value of canonical, reduction and first-order recurrences
984 // phis in the vector loop.
985 for (VPRecipeBase &R : Header->phis()) {
986 // Skip phi-like recipes that generate their backedege values themselves.
987 if (isa<VPWidenPHIRecipe>(&R))
988 continue;
989
990 auto *PhiR = cast<VPSingleDefRecipe>(&R);
991 // VPInstructions currently model scalar Phis only.
992 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
994 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
995
996 Value *Phi = State->get(PhiR, NeedsScalar);
997 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
998 // not.
999 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1000 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1001 }
1002}
1003
1005 // For now only return the cost of the vector loop region, ignoring any other
1006 // blocks, like the preheader or middle blocks, expect for checking them for
1007 // recipes with invalid costs.
1009
1010 // If the cost of the loop region is invalid or any recipe in the skeleton
1011 // outside loop regions are invalid return an invalid cost.
1014 [&VF, &Ctx](VPBasicBlock *VPBB) {
1015 return !VPBB->cost(VF, Ctx).isValid();
1016 }))
1018
1019 return Cost;
1020}
1021
1023 // TODO: Cache if possible.
1025 if (auto *R = dyn_cast<VPRegionBlock>(B))
1026 return R->isReplicator() ? nullptr : R;
1027 return nullptr;
1028}
1029
1032 if (auto *R = dyn_cast<VPRegionBlock>(B))
1033 return R->isReplicator() ? nullptr : R;
1034 return nullptr;
1035}
1036
1037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1040
1041 if (VF.getNumUsers() > 0) {
1042 O << "\nLive-in ";
1043 VF.printAsOperand(O, SlotTracker);
1044 O << " = VF";
1045 }
1046
1047 if (VFxUF.getNumUsers() > 0) {
1048 O << "\nLive-in ";
1049 VFxUF.printAsOperand(O, SlotTracker);
1050 O << " = VF * UF";
1051 }
1052
1053 if (VectorTripCount.getNumUsers() > 0) {
1054 O << "\nLive-in ";
1055 VectorTripCount.printAsOperand(O, SlotTracker);
1056 O << " = vector-trip-count";
1057 }
1058
1059 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1060 O << "\nLive-in ";
1061 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1062 O << " = backedge-taken count";
1063 }
1064
1065 O << "\n";
1066 if (TripCount) {
1067 if (isa<VPIRValue>(TripCount))
1068 O << "Live-in ";
1069 TripCount->printAsOperand(O, SlotTracker);
1070 O << " = original trip-count";
1071 O << "\n";
1072 }
1073}
1074
1078
1079 O << "VPlan '" << getName() << "' {";
1080
1081 printLiveIns(O);
1082
1084 RPOT(getEntry());
1085 for (const VPBlockBase *Block : RPOT) {
1086 O << '\n';
1087 Block->print(O, "", SlotTracker);
1088 }
1089
1090 O << "}\n";
1091}
1092
1093std::string VPlan::getName() const {
1094 std::string Out;
1095 raw_string_ostream RSO(Out);
1096 RSO << Name << " for ";
1097 if (!VFs.empty()) {
1098 RSO << "VF={" << VFs[0];
1099 for (ElementCount VF : drop_begin(VFs))
1100 RSO << "," << VF;
1101 RSO << "},";
1102 }
1103
1104 if (UFs.empty()) {
1105 RSO << "UF>=1";
1106 } else {
1107 RSO << "UF={" << UFs[0];
1108 for (unsigned UF : drop_begin(UFs))
1109 RSO << "," << UF;
1110 RSO << "}";
1111 }
1112
1113 return Out;
1114}
1115
1118 VPlanPrinter Printer(O, *this);
1119 Printer.dump();
1120}
1121
1123void VPlan::dump() const { print(dbgs()); }
1124#endif
1125
1126static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1127 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1128 // Update the operands of all cloned recipes starting at NewEntry. This
1129 // traverses all reachable blocks. This is done in two steps, to handle cycles
1130 // in PHI recipes.
1132 OldDeepRPOT(Entry);
1134 NewDeepRPOT(NewEntry);
1135 // First, collect all mappings from old to new VPValues defined by cloned
1136 // recipes.
1137 for (const auto &[OldBB, NewBB] :
1140 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1141 "blocks must have the same number of recipes");
1142 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1143 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1144 "recipes must have the same number of operands");
1145 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1146 "recipes must define the same number of operands");
1147 for (const auto &[OldV, NewV] :
1148 zip(OldR.definedValues(), NewR.definedValues()))
1149 Old2NewVPValues[OldV] = NewV;
1150 }
1151 }
1152
1153 // Update all operands to use cloned VPValues.
1154 for (VPBasicBlock *NewBB :
1156 for (VPRecipeBase &NewR : *NewBB)
1157 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1158 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1159 NewR.setOperand(I, NewOp);
1160 }
1161 }
1162}
1163
1165 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1166 // Clone blocks.
1167 const auto &[NewEntry, __] = cloneFrom(Entry);
1168
1169 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1170 VPIRBasicBlock *NewScalarHeader = nullptr;
1171 if (getScalarHeader()->hasPredecessors()) {
1172 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1173 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1174 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1175 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1176 }));
1177 } else {
1178 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1179 }
1180 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1181 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1182 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1183 for (VPIRValue *OldLiveIn : getLiveIns())
1184 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1185 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1186 Old2NewVPValues[&VF] = &NewPlan->VF;
1187 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1188 if (BackedgeTakenCount) {
1189 NewPlan->BackedgeTakenCount = new VPSymbolicValue();
1190 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1191 }
1192 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1193 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1194 // else NewTripCount will be created and inserted into Old2NewVPValues when
1195 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1196
1197 remapOperands(Entry, NewEntry, Old2NewVPValues);
1198
1199 // Initialize remaining fields of cloned VPlan.
1200 NewPlan->VFs = VFs;
1201 NewPlan->UFs = UFs;
1202 // TODO: Adjust names.
1203 NewPlan->Name = Name;
1204 if (TripCount) {
1205 assert(Old2NewVPValues.contains(TripCount) &&
1206 "TripCount must have been added to Old2NewVPValues");
1207 NewPlan->TripCount = Old2NewVPValues[TripCount];
1208 }
1209
1210 // Transfer all cloned blocks (the second half of all current blocks) from
1211 // current to new VPlan.
1212 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1213 for (unsigned I :
1214 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1215 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1216 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1217
1218 // Update ExitBlocks of the new plan.
1219 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1220 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1221 VPB != NewScalarHeader)
1222 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1223 }
1224
1225 return NewPlan;
1226}
1227
1229 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1230 CreatedBlocks.push_back(VPIRBB);
1231 return VPIRBB;
1232}
1233
1235 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1236 for (Instruction &I :
1237 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1238 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1239 return VPIRBB;
1240}
1241
1242#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1243
1244Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1245 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1246 Twine(getOrCreateBID(Block));
1247}
1248
1249Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1250 const std::string &Name = Block->getName();
1251 if (!Name.empty())
1252 return Name;
1253 return "VPB" + Twine(getOrCreateBID(Block));
1254}
1255
1257 Depth = 1;
1258 bumpIndent(0);
1259 OS << "digraph VPlan {\n";
1260 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1261 if (!Plan.getName().empty())
1262 OS << "\\n" << DOT::EscapeString(Plan.getName());
1263
1264 {
1265 // Print live-ins.
1266 std::string Str;
1267 raw_string_ostream SS(Str);
1268 Plan.printLiveIns(SS);
1270 StringRef(Str).rtrim('\n').split(Lines, "\n");
1271 for (auto Line : Lines)
1272 OS << DOT::EscapeString(Line.str()) << "\\n";
1273 }
1274
1275 OS << "\"]\n";
1276 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1277 OS << "edge [fontname=Courier, fontsize=30]\n";
1278 OS << "compound=true\n";
1279
1280 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1281 dumpBlock(Block);
1282
1283 OS << "}\n";
1284}
1285
1286void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1288 dumpBasicBlock(BasicBlock);
1290 dumpRegion(Region);
1291 else
1292 llvm_unreachable("Unsupported kind of VPBlock.");
1293}
1294
1295void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1296 bool Hidden, const Twine &Label) {
1297 // Due to "dot" we print an edge between two regions as an edge between the
1298 // exiting basic block and the entry basic of the respective regions.
1299 const VPBlockBase *Tail = From->getExitingBasicBlock();
1300 const VPBlockBase *Head = To->getEntryBasicBlock();
1301 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1302 OS << " [ label=\"" << Label << '\"';
1303 if (Tail != From)
1304 OS << " ltail=" << getUID(From);
1305 if (Head != To)
1306 OS << " lhead=" << getUID(To);
1307 if (Hidden)
1308 OS << "; splines=none";
1309 OS << "]\n";
1310}
1311
1312void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1313 auto &Successors = Block->getSuccessors();
1314 if (Successors.size() == 1)
1315 drawEdge(Block, Successors.front(), false, "");
1316 else if (Successors.size() == 2) {
1317 drawEdge(Block, Successors.front(), false, "T");
1318 drawEdge(Block, Successors.back(), false, "F");
1319 } else {
1320 unsigned SuccessorNumber = 0;
1321 for (auto *Successor : Successors)
1322 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1323 }
1324}
1325
1326void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1327 // Implement dot-formatted dump by performing plain-text dump into the
1328 // temporary storage followed by some post-processing.
1329 OS << Indent << getUID(BasicBlock) << " [label =\n";
1330 bumpIndent(1);
1331 std::string Str;
1332 raw_string_ostream SS(Str);
1333 // Use no indentation as we need to wrap the lines into quotes ourselves.
1334 BasicBlock->print(SS, "", SlotTracker);
1335
1336 // We need to process each line of the output separately, so split
1337 // single-string plain-text dump.
1339 StringRef(Str).rtrim('\n').split(Lines, "\n");
1340
1341 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1342 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1343 };
1344
1345 // Don't need the "+" after the last line.
1346 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1347 EmitLine(Line, " +\n");
1348 EmitLine(Lines.back(), "\n");
1349
1350 bumpIndent(-1);
1351 OS << Indent << "]\n";
1352
1353 dumpEdges(BasicBlock);
1354}
1355
1356void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1357 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1358 bumpIndent(1);
1359 OS << Indent << "fontname=Courier\n"
1360 << Indent << "label=\""
1361 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1362 << DOT::EscapeString(Region->getName()) << "\"\n";
1363 // Dump the blocks of the region.
1364 assert(Region->getEntry() && "Region contains no inner blocks.");
1365 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1366 dumpBlock(Block);
1367 bumpIndent(-1);
1368 OS << Indent << "}\n";
1369 dumpEdges(Region);
1370}
1371
1372#endif
1373
1374/// Returns true if there is a vector loop region and \p VPV is defined in a
1375/// loop region.
1376static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1377 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1378 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1380}
1381
1386 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1387}
1388
1390 VPValue *New,
1391 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1392 // Note that this early exit is required for correctness; the implementation
1393 // below relies on the number of users for this VPValue to decrease, which
1394 // isn't the case if this == New.
1395 if (this == New)
1396 return;
1397
1398 for (unsigned J = 0; J < getNumUsers();) {
1399 VPUser *User = Users[J];
1400 bool RemovedUser = false;
1401 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1402 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1403 continue;
1404
1405 RemovedUser = true;
1406 User->setOperand(I, New);
1407 }
1408 // If a user got removed after updating the current user, the next user to
1409 // update will be moved to the current position, so we only need to
1410 // increment the index if the number of users did not change.
1411 if (!RemovedUser)
1412 J++;
1413 }
1414}
1415
1417 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1418 if (getOperand(Idx) == From)
1419 setOperand(Idx, To);
1420 }
1421}
1422
1423#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1425 OS << Tracker.getOrCreateName(this);
1426}
1427
1430 Op->printAsOperand(O, SlotTracker);
1431 });
1432}
1433#endif
1434
1435void VPSlotTracker::assignName(const VPValue *V) {
1436 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1437 auto *UV = V->getUnderlyingValue();
1438 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1439 if (!UV && !(VPI && !VPI->getName().empty())) {
1440 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1441 NextSlot++;
1442 return;
1443 }
1444
1445 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1446 // appending ".Number" to the name if there are multiple uses.
1447 std::string Name;
1448 if (UV)
1449 Name = getName(UV);
1450 else
1451 Name = VPI->getName();
1452
1453 assert(!Name.empty() && "Name cannot be empty.");
1454 StringRef Prefix = UV ? "ir<" : "vp<%";
1455 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1456
1457 // First assign the base name for V.
1458 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1459 // Integer or FP constants with different types will result in the same string
1460 // due to stripping types.
1462 return;
1463
1464 // If it is already used by C > 0 other VPValues, increase the version counter
1465 // C and use it for V.
1466 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1467 if (!UseInserted) {
1468 C->second++;
1469 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1470 }
1471}
1472
1473void VPSlotTracker::assignNames(const VPlan &Plan) {
1474 if (Plan.VF.getNumUsers() > 0)
1475 assignName(&Plan.VF);
1476 if (Plan.VFxUF.getNumUsers() > 0)
1477 assignName(&Plan.VFxUF);
1478 assignName(&Plan.VectorTripCount);
1479 if (Plan.BackedgeTakenCount)
1480 assignName(Plan.BackedgeTakenCount);
1481 for (VPValue *LI : Plan.getLiveIns())
1482 assignName(LI);
1483
1484 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1485 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1486 for (const VPBasicBlock *VPBB :
1488 assignNames(VPBB);
1489}
1490
1491void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1492 for (const VPRecipeBase &Recipe : *VPBB)
1493 for (VPValue *Def : Recipe.definedValues())
1494 assignName(Def);
1495}
1496
1497std::string VPSlotTracker::getName(const Value *V) {
1498 std::string Name;
1499 raw_string_ostream S(Name);
1500 if (V->hasName() || !isa<Instruction>(V)) {
1501 V->printAsOperand(S, false);
1502 return Name;
1503 }
1504
1505 if (!MST) {
1506 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1507 // instruction.
1508 auto *I = cast<Instruction>(V);
1509 // This check is required to support unit tests with incomplete IR.
1510 if (I->getParent()) {
1511 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1512 MST->incorporateFunction(*I->getFunction());
1513 } else {
1514 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1515 }
1516 }
1517 V->printAsOperand(S, false, *MST);
1518 return Name;
1519}
1520
1521std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1522 std::string Name = VPValue2Name.lookup(V);
1523 if (!Name.empty())
1524 return Name;
1525
1526 // If no name was assigned, no VPlan was provided when creating the slot
1527 // tracker or it is not reachable from the provided VPlan. This can happen,
1528 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1529 // in a debugger.
1530 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1531 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1532 // here.
1533 const VPRecipeBase *DefR = V->getDefiningRecipe();
1534 (void)DefR;
1535 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1536 "VPValue defined by a recipe in a VPlan?");
1537
1538 // Use the underlying value's name, if there is one.
1539 if (auto *UV = V->getUnderlyingValue()) {
1540 std::string Name;
1541 raw_string_ostream S(Name);
1542 UV->printAsOperand(S, false);
1543 return (Twine("ir<") + Name + ">").str();
1544 }
1545
1546 return "<badref>";
1547}
1548
1550 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1551 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1552 bool PredicateAtRangeStart = Predicate(Range.Start);
1553
1554 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1555 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1556 Range.End = TmpVF;
1557 break;
1558 }
1559
1560 return PredicateAtRangeStart;
1561}
1562
1563/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1564/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1565/// of VF's starting at a given VF and extending it as much as possible. Each
1566/// vectorization decision can potentially shorten this sub-range during
1567/// buildVPlan().
1569 ElementCount MaxVF) {
1570 auto MaxVFTimes2 = MaxVF * 2;
1571 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1572 VFRange SubRange = {VF, MaxVFTimes2};
1573 if (auto Plan = tryToBuildVPlan(SubRange)) {
1575 // Update the name of the latch of the top-level vector loop region region
1576 // after optimizations which includes block folding.
1577 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1578 VPlans.push_back(std::move(Plan));
1579 }
1580 VF = SubRange.End;
1581 }
1582}
1583
1585 assert(count_if(VPlans,
1586 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1587 1 &&
1588 "Multiple VPlans for VF.");
1589
1590 for (const VPlanPtr &Plan : VPlans) {
1591 if (Plan->hasVF(VF))
1592 return *Plan.get();
1593 }
1594 llvm_unreachable("No plan found!");
1595}
1596
1599 // Reserve first location for self reference to the LoopID metadata node.
1600 MDs.push_back(nullptr);
1601 bool IsUnrollMetadata = false;
1602 MDNode *LoopID = L->getLoopID();
1603 if (LoopID) {
1604 // First find existing loop unrolling disable metadata.
1605 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1606 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1607 if (MD) {
1608 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1609 if (!S)
1610 continue;
1611 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1612 continue;
1613 IsUnrollMetadata =
1614 S->getString().starts_with("llvm.loop.unroll.disable");
1615 }
1616 MDs.push_back(LoopID->getOperand(I));
1617 }
1618 }
1619
1620 if (!IsUnrollMetadata) {
1621 // Add runtime unroll disable metadata.
1622 LLVMContext &Context = L->getHeader()->getContext();
1623 SmallVector<Metadata *, 1> DisableOperands;
1624 DisableOperands.push_back(
1625 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1626 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1627 MDs.push_back(DisableNode);
1628 MDNode *NewLoopID = MDNode::get(Context, MDs);
1629 // Set operand 0 to refer to the loop id itself.
1630 NewLoopID->replaceOperandWith(0, NewLoopID);
1631 L->setLoopID(NewLoopID);
1632 }
1633}
1634
1636 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1637 bool VectorizingEpilogue, MDNode *OrigLoopID,
1638 std::optional<unsigned> OrigAverageTripCount,
1639 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1640 bool DisableRuntimeUnroll) {
1641 // Update the metadata of the scalar loop. Skip the update when vectorizing
1642 // the epilogue loop to ensure it is updated only once. Also skip the update
1643 // when the scalar loop became unreachable.
1644 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1645 std::optional<MDNode *> RemainderLoopID =
1648 if (RemainderLoopID) {
1649 OrigLoop->setLoopID(*RemainderLoopID);
1650 } else {
1651 if (DisableRuntimeUnroll)
1653
1654 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1655 Hints.setAlreadyVectorized();
1656 }
1657 }
1658
1659 if (!VectorLoop)
1660 return;
1661
1662 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1663 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1665 VectorLoop->setLoopID(*VectorizedLoopID);
1666 } else {
1667 // Keep all loop hints from the original loop on the vector loop (we'll
1668 // replace the vectorizer-specific hints below).
1669 if (OrigLoopID)
1670 VectorLoop->setLoopID(OrigLoopID);
1671
1672 if (!VectorizingEpilogue) {
1673 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1674 Hints.setAlreadyVectorized();
1675 }
1676 }
1678 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1679 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1681
1682 // Set/update profile weights for the vector and remainder loops as original
1683 // loop iterations are now distributed among them. Note that original loop
1684 // becomes the scalar remainder loop after vectorization.
1685 //
1686 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1687 // end up getting slightly roughened result but that should be OK since
1688 // profile is not inherently precise anyway. Note also possible bypass of
1689 // vector code caused by legality checks is ignored, assigning all the weight
1690 // to the vector loop, optimistically.
1691 //
1692 // For scalable vectorization we can't know at compile time how many
1693 // iterations of the loop are handled in one vector iteration, so instead
1694 // use the value of vscale used for tuning.
1695 if (!OrigAverageTripCount)
1696 return;
1697 // Calculate number of iterations in unrolled loop.
1698 unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1699 // Calculate number of iterations for remainder loop.
1700 unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1701
1702 if (HeaderVPBB) {
1703 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1704 OrigLoopInvocationWeight);
1705 }
1706 if (Plan.getScalarPreheader()->hasPredecessors()) {
1707 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1708 OrigLoopInvocationWeight);
1709 }
1710}
1711
1712#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1714 if (VPlans.empty()) {
1715 O << "LV: No VPlans built.\n";
1716 return;
1717 }
1718 for (const auto &Plan : VPlans)
1720 Plan->printDOT(O);
1721 else
1722 Plan->print(O);
1723}
1724#endif
1725
1726bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1728 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1729 unsigned WideSize = C->getBitWidth();
1730 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1731 ? TruncatedVal.sext(WideSize)
1732 : TruncatedVal.zext(WideSize);
1733 return ExtendedVal == *C;
1734}
1735
1738 if (auto *IRV = dyn_cast<VPIRValue>(V))
1739 return TTI::getOperandInfo(IRV->getValue());
1740
1741 return {};
1742}
1743
1745 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1746 bool AlwaysIncludeReplicatingR) {
1747 if (VF.isScalar())
1748 return 0;
1749
1750 assert(!VF.isScalable() &&
1751 "Scalarization overhead not supported for scalable vectors");
1752
1753 InstructionCost ScalarizationCost = 0;
1754 // Compute the cost of scalarizing the result if needed.
1755 if (!ResultTy->isVoidTy()) {
1756 for (Type *VectorTy :
1757 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1758 ScalarizationCost += TTI.getScalarizationOverhead(
1760 /*Insert=*/true,
1761 /*Extract=*/false, CostKind);
1762 }
1763 }
1764 // Compute the cost of scalarizing the operands, skipping ones that do not
1765 // require extraction/scalarization and do not incur any overhead.
1766 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1768 for (auto *Op : Operands) {
1769 if (isa<VPIRValue>(Op) ||
1770 (!AlwaysIncludeReplicatingR &&
1773 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1774 !UniqueOperands.insert(Op).second)
1775 continue;
1776 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1777 }
1778 return ScalarizationCost +
1779 TTI.getOperandsScalarizationOverhead(Tys, CostKind);
1780}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1597
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:153
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:579
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:57
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1376
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:597
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:58
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1126
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:60
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:690
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1584
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1635
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1568
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1549
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1713
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
Metadata node.
Definition Metadata.h:1078
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:814
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:238
Value * getOperand(unsigned i) const
Definition User.h:233
unsigned getNumOperands() const
Definition User.h:255
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3949
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4024
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3976
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:489
iterator end()
Definition VPlan.h:3986
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3984
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:540
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:775
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:782
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:389
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:589
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:561
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3964
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:547
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:669
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:647
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:635
const VPRecipeBase & back() const
Definition VPlan.h:3998
bool empty() const
Definition VPlan.h:3995
size_t size() const
Definition VPlan.h:3994
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:198
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:220
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
VPlan * getPlan()
Definition VPlan.cpp:173
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:192
const std::string & getName() const
Definition VPlan.h:164
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:212
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:221
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:122
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:173
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:191
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:211
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
LLVM_ABI_FOR_TEST void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:110
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4102
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:457
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4126
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:482
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:81
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe, delegating to printRecipe().
VPBasicBlock * getParent()
Definition VPlan.h:408
virtual ~VPRecipeValue()
Definition VPlan.cpp:145
friend class VPDef
Definition VPlanValue.h:213
friend class VPValue
Definition VPlanValue.h:212
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4137
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:740
const VPBlockBase * getEntry() const
Definition VPlan.h:4173
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:844
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4205
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:801
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:830
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:753
const VPBlockBase * getExiting() const
Definition VPlan.h:4185
friend class VPlan
Definition VPlan.h:4138
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1521
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:229
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1416
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428
operand_range operands()
Definition VPlanValue.h:297
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:273
unsigned getNumOperands() const
Definition VPlanValue.h:267
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:268
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:45
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1382
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1424
friend class VPRecipeValue
Definition VPlanValue.h:51
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:72
@ VPVRecipeValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:82
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:102
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:95
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1385
unsigned getNumUsers() const
Definition VPlanValue.h:105
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1389
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1256
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4267
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1117
friend class VPSlotTracker
Definition VPlan.h:4269
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1093
VPBasicBlock * getEntry()
Definition VPlan.h:4356
void setName(const Twine &newName)
Definition VPlan.h:4502
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:879
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
friend class VPlanPrinter
Definition VPlan.h:4268
unsigned getUF() const
Definition VPlan.h:4484
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1228
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4549
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4404
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1004
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4345
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4572
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1123
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4395
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:916
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1076
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4400
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1038
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1164
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2289
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1726
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1737
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1744
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:184
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:137
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:202
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:275
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:235
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:357
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:335
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...