LLVM 22.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55/// @{
56/// Metadata attribute names
57const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
59 "llvm.loop.vectorize.followup_vectorized";
61 "llvm.loop.vectorize.followup_epilogue";
62/// @}
63
65
67 "vplan-print-in-dot-format", cl::Hidden,
68 cl::desc("Use dot format instead of plain text when dumping VPlans"));
69
70#define DEBUG_TYPE "loop-vectorize"
71
72#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
74 const VPBasicBlock *Parent = R.getParent();
75 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
76 R.print(OS, "", SlotTracker);
77 return OS;
78}
79#endif
80
82 const ElementCount &VF) const {
83 switch (LaneKind) {
85 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
86 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
87 Builder.getInt32(VF.getKnownMinValue() - Lane));
89 return Builder.getInt32(Lane);
90 }
91 llvm_unreachable("Unknown lane kind");
92}
93
94VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
95 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
96 if (Def)
97 Def->addDefinedValue(this);
98}
99
101 assert(Users.empty() && "trying to delete a VPValue with remaining users");
102 if (VPDef *Def = getDefiningRecipe())
103 Def->removeDefinedValue(this);
104}
105
106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
108 if (const VPRecipeBase *R = getDefiningRecipe())
109 R->print(OS, "", SlotTracker);
110 else
112}
113
114void VPValue::dump() const {
115 const VPRecipeBase *Instr = getDefiningRecipe();
117 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
119 dbgs() << "\n";
120}
121
122void VPDef::dump() const {
123 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
125 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
126 print(dbgs(), "", SlotTracker);
127 dbgs() << "\n";
128}
129#endif
130
134
138
139// Get the top-most entry block of \p Start. This is the entry block of the
140// containing VPlan. This function is templated to support both const and non-const blocks
141template <typename T> static T *getPlanEntry(T *Start) {
142 T *Next = Start;
143 T *Current = Start;
144 while ((Next = Next->getParent()))
145 Current = Next;
146
147 SmallSetVector<T *, 8> WorkList;
148 WorkList.insert(Current);
149
150 for (unsigned i = 0; i < WorkList.size(); i++) {
151 T *Current = WorkList[i];
152 if (!Current->hasPredecessors())
153 return Current;
154 auto &Predecessors = Current->getPredecessors();
155 WorkList.insert_range(Predecessors);
156 }
157
158 llvm_unreachable("VPlan without any entry node without predecessors");
159}
160
161VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
162
163const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
164
165/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
172
179
180void VPBlockBase::setPlan(VPlan *ParentPlan) {
181 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
182 Plan = ParentPlan;
183}
184
185/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
187 const VPBlockBase *Block = this;
189 Block = Region->getExiting();
191}
192
199
201 if (!Successors.empty() || !Parent)
202 return this;
203 assert(Parent->getExiting() == this &&
204 "Block w/o successors not the exiting block of its parent.");
205 return Parent->getEnclosingBlockWithSuccessors();
206}
207
209 if (!Predecessors.empty() || !Parent)
210 return this;
211 assert(Parent->getEntry() == this &&
212 "Block w/o predecessors not the entry of its parent.");
213 return Parent->getEnclosingBlockWithPredecessors();
214}
215
217 iterator It = begin();
218 while (It != end() && It->isPhi())
219 It++;
220 return It;
221}
222
230
232 if (Def->isLiveIn())
233 return Def->getLiveInIRValue();
234
235 if (hasScalarValue(Def, Lane))
236 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
237
238 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
240 return Data.VPV2Scalars[Def][0];
241 }
242
243 // Look through BuildVector to avoid redundant extracts.
244 // TODO: Remove once replicate regions are unrolled explicitly.
245 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
246 auto *BuildVector = cast<VPInstruction>(Def);
247 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
248 }
249
251 auto *VecPart = Data.VPV2Vector[Def];
252 if (!VecPart->getType()->isVectorTy()) {
253 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
254 return VecPart;
255 }
256 // TODO: Cache created scalar values.
257 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
258 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
259 // set(Def, Extract, Instance);
260 return Extract;
261}
262
263Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
264 if (NeedsScalar) {
265 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
267 (hasScalarValue(Def, VPLane(0)) &&
268 Data.VPV2Scalars[Def].size() == 1)) &&
269 "Trying to access a single scalar per part but has multiple scalars "
270 "per part.");
271 return get(Def, VPLane(0));
272 }
273
274 // If Values have been set for this Def return the one relevant for \p Part.
275 if (hasVectorValue(Def))
276 return Data.VPV2Vector[Def];
277
278 auto GetBroadcastInstrs = [this](Value *V) {
279 if (VF.isScalar())
280 return V;
281 // Broadcast the scalar into all locations in the vector.
282 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
283 return Shuf;
284 };
285
286 if (!hasScalarValue(Def, {0})) {
287 assert(Def->isLiveIn() && "expected a live-in");
288 Value *IRV = Def->getLiveInIRValue();
289 Value *B = GetBroadcastInstrs(IRV);
290 set(Def, B);
291 return B;
292 }
293
294 Value *ScalarValue = get(Def, VPLane(0));
295 // If we aren't vectorizing, we can just copy the scalar map values over
296 // to the vector map.
297 if (VF.isScalar()) {
298 set(Def, ScalarValue);
299 return ScalarValue;
300 }
301
302 bool IsSingleScalar = vputils::isSingleScalar(Def);
303 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
304
305 // We need to construct the vector value for a single-scalar value by
306 // broadcasting the scalar to all lanes.
307 // TODO: Replace by introducing Broadcast VPInstructions.
308 assert(IsSingleScalar && "must be a single-scalar at this point");
309 // Set the insert point after the last scalarized instruction or after the
310 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
311 // will directly follow the scalar definitions.
312 auto OldIP = Builder.saveIP();
313 auto *LastInst = cast<Instruction>(get(Def, LastLane));
314 auto NewIP = isa<PHINode>(LastInst)
315 ? LastInst->getParent()->getFirstNonPHIIt()
316 : std::next(BasicBlock::iterator(LastInst));
317 Builder.SetInsertPoint(&*NewIP);
318 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
319 set(Def, VectorValue);
320 Builder.restoreIP(OldIP);
321 return VectorValue;
322}
323
325 const DILocation *DIL = DL;
326 // When a FSDiscriminator is enabled, we don't need to add the multiply
327 // factors to the discriminators.
328 if (DIL &&
329 Builder.GetInsertBlock()
330 ->getParent()
331 ->shouldEmitDebugInfoForProfiling() &&
333 // FIXME: For scalable vectors, assume vscale=1.
334 unsigned UF = Plan->getUF();
335 auto NewDIL =
336 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
337 if (NewDIL)
338 Builder.SetCurrentDebugLocation(*NewDIL);
339 else
340 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
341 << DIL->getFilename() << " Line: " << DIL->getLine());
342 } else
343 Builder.SetCurrentDebugLocation(DL);
344}
345
347 Value *WideValue,
348 const VPLane &Lane) {
349 Value *ScalarInst = get(Def, Lane);
350 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
351 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
352 // We must handle each element of a vectorized struct type.
353 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
354 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
355 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
356 VectorValue =
357 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
358 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
359 }
360 } else {
361 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
362 }
363 return WideValue;
364}
365
366BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
367 auto &CFG = State.CFG;
368 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
369 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
370 BasicBlock *PrevBB = CFG.PrevBB;
371 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
372 PrevBB->getParent(), CFG.ExitBB);
373 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
374
375 return NewBB;
376}
377
379 auto &CFG = State.CFG;
380 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
381
382 // Register NewBB in its loop. In innermost loops its the same for all
383 // BB's.
384 Loop *ParentLoop = State.CurrentParentLoop;
385 // If this block has a sole successor that is an exit block or is an exit
386 // block itself then it needs adding to the same parent loop as the exit
387 // block.
388 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
389 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
390 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
391 ParentLoop = State.LI->getLoopFor(
392 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
393 }
394
395 if (ParentLoop && !State.LI->getLoopFor(NewBB))
396 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
397
399 if (VPBlockUtils::isHeader(this, State.VPDT)) {
400 // There's no block for the latch yet, connect to the preheader only.
401 Preds = {getPredecessors()[0]};
402 } else {
403 Preds = to_vector(getPredecessors());
404 }
405
406 // Hook up the new basic block to its predecessors.
407 for (VPBlockBase *PredVPBlock : Preds) {
408 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
409 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
410 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
411 "Predecessor basic-block not found building successor.");
412 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
413 auto *PredBBTerminator = PredBB->getTerminator();
414 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
415
416 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
417 if (isa<UnreachableInst>(PredBBTerminator)) {
418 assert(PredVPSuccessors.size() == 1 &&
419 "Predecessor ending w/o branch must have single successor.");
420 DebugLoc DL = PredBBTerminator->getDebugLoc();
421 PredBBTerminator->eraseFromParent();
422 auto *Br = BranchInst::Create(NewBB, PredBB);
423 Br->setDebugLoc(DL);
424 } else if (TermBr && !TermBr->isConditional()) {
425 TermBr->setSuccessor(0, NewBB);
426 } else {
427 // Set each forward successor here when it is created, excluding
428 // backedges. A backward successor is set when the branch is created.
429 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
430 // in the original IR, except when the predecessor is the entry block.
431 // This enables including SCEV and memory runtime check blocks in VPlan.
432 // TODO: Remove exception by modeling the terminator of entry block using
433 // BranchOnCond.
434 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
435 assert((TermBr && (!TermBr->getSuccessor(idx) ||
436 (isa<VPIRBasicBlock>(this) &&
437 (TermBr->getSuccessor(idx) == NewBB ||
438 PredVPBlock == getPlan()->getEntry())))) &&
439 "Trying to reset an existing successor block.");
440 TermBr->setSuccessor(idx, NewBB);
441 }
442 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
443 }
444}
445
448 "VPIRBasicBlock can have at most two successors at the moment!");
449 // Move completely disconnected blocks to their final position.
450 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
451 IRBB->moveAfter(State->CFG.PrevBB);
452 State->Builder.SetInsertPoint(IRBB->getTerminator());
453 State->CFG.PrevBB = IRBB;
454 State->CFG.VPBB2IRBB[this] = IRBB;
455 executeRecipes(State, IRBB);
456 // Create a branch instruction to terminate IRBB if one was not created yet
457 // and is needed.
458 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
459 auto *Br = State->Builder.CreateBr(IRBB);
460 Br->setOperand(0, nullptr);
461 IRBB->getTerminator()->eraseFromParent();
462 } else {
463 assert(
464 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
465 "other blocks must be terminated by a branch");
466 }
467
468 connectToPredecessors(*State);
469}
470
471VPIRBasicBlock *VPIRBasicBlock::clone() {
472 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
473 for (VPRecipeBase &R : Recipes)
474 NewBlock->appendRecipe(R.clone());
475 return NewBlock;
476}
477
479 bool Replica = bool(State->Lane);
480 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
481
482 if (VPBlockUtils::isHeader(this, State->VPDT)) {
483 // Create and register the new vector loop.
484 Loop *PrevParentLoop = State->CurrentParentLoop;
485 State->CurrentParentLoop = State->LI->AllocateLoop();
486
487 // Insert the new loop into the loop nest and register the new basic blocks
488 // before calling any utilities such as SCEV that require valid LoopInfo.
489 if (PrevParentLoop)
490 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
491 else
492 State->LI->addTopLevelLoop(State->CurrentParentLoop);
493 }
494
495 auto IsReplicateRegion = [](VPBlockBase *BB) {
497 assert((!R || R->isReplicator()) &&
498 "only replicate region blocks should remain");
499 return R;
500 };
501 // 1. Create an IR basic block.
502 if ((Replica && this == getParent()->getEntry()) ||
503 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
504 // Reuse the previous basic block if the current VPBB is either
505 // * the entry to a replicate region, or
506 // * the exit of a replicate region.
507 State->CFG.VPBB2IRBB[this] = NewBB;
508 } else {
509 NewBB = createEmptyBasicBlock(*State);
510
511 State->Builder.SetInsertPoint(NewBB);
512 // Temporarily terminate with unreachable until CFG is rewired.
513 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
514 State->Builder.SetInsertPoint(Terminator);
515
516 State->CFG.PrevBB = NewBB;
517 State->CFG.VPBB2IRBB[this] = NewBB;
518 connectToPredecessors(*State);
519 }
520
521 // 2. Fill the IR basic block with IR instructions.
522 executeRecipes(State, NewBB);
523
524 // If this block is a latch, update CurrentParentLoop.
525 if (VPBlockUtils::isLatch(this, State->VPDT))
526 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
527}
528
529VPBasicBlock *VPBasicBlock::clone() {
530 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
531 for (VPRecipeBase &R : *this)
532 NewBlock->appendRecipe(R.clone());
533 return NewBlock;
534}
535
537 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
538 << " in BB: " << BB->getName() << '\n');
539
540 State->CFG.PrevVPBB = this;
541
542 for (VPRecipeBase &Recipe : Recipes) {
543 State->setDebugLocFrom(Recipe.getDebugLoc());
544 Recipe.execute(*State);
545 }
546
547 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
548}
549
550VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
551 assert((SplitAt == end() || SplitAt->getParent() == this) &&
552 "can only split at a position in the same block");
553
554 // Create new empty block after the block to split.
555 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
557
558 // Finally, move the recipes starting at SplitAt to new block.
559 for (VPRecipeBase &ToMove :
560 make_early_inc_range(make_range(SplitAt, this->end())))
561 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
562
563 return SplitBlock;
564}
565
566/// Return the enclosing loop region for region \p P. The templated version is
567/// used to support both const and non-const block arguments.
568template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
569 if (P && P->isReplicator()) {
570 P = P->getParent();
571 // Multiple loop regions can be nested, but replicate regions can only be
572 // nested inside a loop region or must be outside any other region.
573 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
574 }
575 return P;
576}
577
581
585
586static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
587 if (VPBB->empty()) {
588 assert(
589 VPBB->getNumSuccessors() < 2 &&
590 "block with multiple successors doesn't have a recipe as terminator");
591 return false;
592 }
593
594 const VPRecipeBase *R = &VPBB->back();
595 bool IsSwitch = isa<VPInstruction>(R) &&
596 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
597 bool IsCondBranch =
600 (void)IsCondBranch;
601 (void)IsSwitch;
602 if (VPBB->getNumSuccessors() == 2 ||
603 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
604 assert((IsCondBranch || IsSwitch) &&
605 "block with multiple successors not terminated by "
606 "conditional branch nor switch recipe");
607
608 return true;
609 }
610
611 if (VPBB->getNumSuccessors() > 2) {
612 assert(IsSwitch && "block with more than 2 successors not terminated by "
613 "a switch recipe");
614 return true;
615 }
616
617 assert(
618 !IsCondBranch &&
619 "block with 0 or 1 successors terminated by conditional branch recipe");
620 return false;
621}
622
624 if (hasConditionalTerminator(this))
625 return &back();
626 return nullptr;
627}
628
630 if (hasConditionalTerminator(this))
631 return &back();
632 return nullptr;
633}
634
636 return getParent() && getParent()->getExitingBasicBlock() == this;
637}
638
639#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
644
645void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
646 if (getSuccessors().empty()) {
647 O << Indent << "No successors\n";
648 } else {
649 O << Indent << "Successor(s): ";
650 ListSeparator LS;
651 for (auto *Succ : getSuccessors())
652 O << LS << Succ->getName();
653 O << '\n';
654 }
655}
656
657void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
658 VPSlotTracker &SlotTracker) const {
659 O << Indent << getName() << ":\n";
660
661 auto RecipeIndent = Indent + " ";
662 for (const VPRecipeBase &Recipe : *this) {
663 Recipe.print(O, RecipeIndent, SlotTracker);
664 O << '\n';
665 }
666
667 printSuccessors(O, Indent);
668}
669#endif
670
671static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
672
673// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
674// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
675// Remapping of operands must be done separately. Returns a pair with the new
676// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
677// region, return nullptr for the exiting block.
678static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
680 VPBlockBase *Exiting = nullptr;
681 bool InRegion = Entry->getParent();
682 // First, clone blocks reachable from Entry.
683 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
684 VPBlockBase *NewBB = BB->clone();
685 Old2NewVPBlocks[BB] = NewBB;
686 if (InRegion && BB->getNumSuccessors() == 0) {
687 assert(!Exiting && "Multiple exiting blocks?");
688 Exiting = BB;
689 }
690 }
691 assert((!InRegion || Exiting) && "regions must have a single exiting block");
692
693 // Second, update the predecessors & successors of the cloned blocks.
694 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
695 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
697 for (VPBlockBase *Pred : BB->getPredecessors()) {
698 NewPreds.push_back(Old2NewVPBlocks[Pred]);
699 }
700 NewBB->setPredecessors(NewPreds);
702 for (VPBlockBase *Succ : BB->successors()) {
703 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
704 }
705 NewBB->setSuccessors(NewSuccs);
706 }
707
708#if !defined(NDEBUG)
709 // Verify that the order of predecessors and successors matches in the cloned
710 // version.
711 for (const auto &[OldBB, NewBB] :
713 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
714 for (const auto &[OldPred, NewPred] :
715 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
716 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
717
718 for (const auto &[OldSucc, NewSucc] :
719 zip(OldBB->successors(), NewBB->successors()))
720 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
721 }
722#endif
723
724 return std::make_pair(Old2NewVPBlocks[Entry],
725 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
726}
727
728VPRegionBlock *VPRegionBlock::clone() {
729 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
730 VPlan &Plan = *getPlan();
731 VPRegionBlock *NewRegion =
733 ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
734 : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
735
736 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
737 Block->setParent(NewRegion);
738 return NewRegion;
739}
740
743 "Loop regions should have been lowered to plain CFG");
744 assert(!State->Lane && "Replicating a Region with non-null instance.");
745 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
746
748 Entry);
749 State->Lane = VPLane(0);
750 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
751 State->Lane = VPLane(Lane, VPLane::Kind::First);
752 // Visit the VPBlocks connected to \p this, starting from it.
753 for (VPBlockBase *Block : RPOT) {
754 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
755 Block->execute(State);
756 }
757 }
758
759 // Exit replicating mode.
760 State->Lane.reset();
761}
762
765 for (VPRecipeBase &R : Recipes)
766 Cost += R.cost(VF, Ctx);
767 return Cost;
768}
769
770const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
771 const VPBlockBase *Pred = nullptr;
772 if (hasPredecessors()) {
773 Pred = getPredecessors()[Idx];
774 } else {
775 auto *Region = getParent();
776 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
777 "must be in the entry block of a non-replicate region");
778 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
779 "loop region has a single predecessor (preheader), its entry block "
780 "has 2 incoming blocks");
781
782 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
783 // region itself whose exiting block feeds the phi across the backedge.
784 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
785 }
786 return Pred->getExitingBasicBlock();
787}
788
790 if (!isReplicator()) {
793 Cost += Block->cost(VF, Ctx);
794 InstructionCost BackedgeCost =
795 ForceTargetInstructionCost.getNumOccurrences()
796 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
797 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
798 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
799 << ": vector loop backedge\n");
800 Cost += BackedgeCost;
801 return Cost;
802 }
803
804 // Compute the cost of a replicate region. Replicating isn't supported for
805 // scalable vectors, return an invalid cost for them.
806 // TODO: Discard scalable VPlans with replicate recipes earlier after
807 // construction.
808 if (VF.isScalable())
810
811 // Compute and return the cost of the conditionally executed recipes.
812 assert(VF.isVector() && "Can only compute vector cost at the moment.");
814 return Then->cost(VF, Ctx);
815}
816
817#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
819 VPSlotTracker &SlotTracker) const {
820 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
821 auto NewIndent = Indent + " ";
822 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
823 O << '\n';
824 BlockBase->print(O, NewIndent, SlotTracker);
825 }
826 O << Indent << "}\n";
827
828 printSuccessors(O, Indent);
829}
830#endif
831
833 auto *Header = cast<VPBasicBlock>(getEntry());
834 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
835 assert(this == getPlan()->getVectorLoopRegion() &&
836 "Canonical IV must be in the entry of the top-level loop region");
837 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
838 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
839 CanIV->getDebugLoc(), "index");
840 CanIV->replaceAllUsesWith(ScalarR);
841 CanIV->eraseFromParent();
842 }
843
844 VPBlockBase *Preheader = getSinglePredecessor();
845 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
846
847 VPBlockUtils::disconnectBlocks(Preheader, this);
848
849 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
850 VPB->setParent(getParent());
851
852 VPBlockUtils::connectBlocks(Preheader, Header);
853 VPBlockUtils::transferSuccessors(this, ExitingLatch);
854 VPBlockUtils::connectBlocks(ExitingLatch, Header);
855}
856
857VPlan::VPlan(Loop *L) {
858 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
859 ScalarHeader = createVPIRBasicBlock(L->getHeader());
860
861 SmallVector<BasicBlock *> IRExitBlocks;
862 L->getUniqueExitBlocks(IRExitBlocks);
863 for (BasicBlock *EB : IRExitBlocks)
864 ExitBlocks.push_back(createVPIRBasicBlock(EB));
865}
866
868 VPValue DummyValue;
869
870 for (auto *VPB : CreatedBlocks) {
871 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
872 // Replace all operands of recipes and all VPValues defined in VPBB with
873 // DummyValue so the block can be deleted.
874 for (VPRecipeBase &R : *VPBB) {
875 for (auto *Def : R.definedValues())
876 Def->replaceAllUsesWith(&DummyValue);
877
878 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
879 R.setOperand(I, &DummyValue);
880 }
881 }
882 delete VPB;
883 }
884 for (VPValue *VPV : getLiveIns())
885 delete VPV;
886 delete BackedgeTakenCount;
887}
888
890 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
891 return VPIRBB->getIRBasicBlock() == IRBB;
892 });
893 assert(Iter != getExitBlocks().end() && "no exit block found");
894 return *Iter;
895}
896
898 return is_contained(ExitBlocks, VPBB);
899}
900
901/// Generate the code inside the preheader and body of the vectorized loop.
902/// Assumes a single pre-header basic-block was created for this. Introduce
903/// additional basic-blocks as needed, and fill them all.
905 // Initialize CFG state.
906 State->CFG.PrevVPBB = nullptr;
907 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
908
909 // Update VPDominatorTree since VPBasicBlock may be removed after State was
910 // constructed.
911 State->VPDT.recalculate(*this);
912
913 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
914 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
915 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
916 State->CFG.DTU.applyUpdates(
917 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
918
919 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
920 << ", UF=" << getUF() << '\n');
921 setName("Final VPlan");
922 LLVM_DEBUG(dump());
923
924 BasicBlock *ScalarPh = State->CFG.ExitBB;
925 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
926 if (ScalarPhVPBB->hasPredecessors()) {
927 // Disconnect scalar preheader and scalar header, as the dominator tree edge
928 // will be updated as part of VPlan execution. This allows keeping the DTU
929 // logic generic during VPlan execution.
930 State->CFG.DTU.applyUpdates(
931 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
932 }
934 Entry);
935 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
936 // successor blocks including the middle, exit and scalar preheader blocks.
937 for (VPBlockBase *Block : RPOT)
938 Block->execute(State);
939
940 // If the original loop is unreachable, delete it and all its blocks.
941 if (!ScalarPhVPBB->hasPredecessors()) {
942 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
943 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
944 // IR instructions.
945 for (VPIRBasicBlock *EB : getExitBlocks()) {
946 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
947 if (R.getNumOperands() == 1)
948 R.eraseFromParent();
949 }
950 }
951
952 Loop *OrigLoop =
953 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
954 auto Blocks = OrigLoop->getBlocksVector();
955 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
956 for (auto *BB : Blocks)
957 State->LI->removeBlock(BB);
958 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
959 State->LI->erase(OrigLoop);
960 }
961
962 State->CFG.DTU.flush();
963
964 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
965 if (!Header)
966 return;
967
968 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
969 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
970
971 // Fix the latch value of canonical, reduction and first-order recurrences
972 // phis in the vector loop.
973 for (VPRecipeBase &R : Header->phis()) {
974 // Skip phi-like recipes that generate their backedege values themselves.
975 if (isa<VPWidenPHIRecipe>(&R))
976 continue;
977
978 auto *PhiR = cast<VPSingleDefRecipe>(&R);
979 // VPInstructions currently model scalar Phis only.
980 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
982 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
983
984 Value *Phi = State->get(PhiR, NeedsScalar);
985 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
986 // not.
987 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
988 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
989 }
990}
991
993 // For now only return the cost of the vector loop region, ignoring any other
994 // blocks, like the preheader or middle blocks, expect for checking them for
995 // recipes with invalid costs.
997
998 // If the cost of the loop region is invalid or any recipe in the skeleton
999 // outside loop regions are invalid return an invalid cost.
1002 [&VF, &Ctx](VPBasicBlock *VPBB) {
1003 return !VPBB->cost(VF, Ctx).isValid();
1004 }))
1006
1007 return Cost;
1008}
1009
1011 // TODO: Cache if possible.
1013 if (auto *R = dyn_cast<VPRegionBlock>(B))
1014 return R->isReplicator() ? nullptr : R;
1015 return nullptr;
1016}
1017
1020 if (auto *R = dyn_cast<VPRegionBlock>(B))
1021 return R->isReplicator() ? nullptr : R;
1022 return nullptr;
1023}
1024
1025#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1028
1029 if (VF.getNumUsers() > 0) {
1030 O << "\nLive-in ";
1031 VF.printAsOperand(O, SlotTracker);
1032 O << " = VF";
1033 }
1034
1035 if (VFxUF.getNumUsers() > 0) {
1036 O << "\nLive-in ";
1037 VFxUF.printAsOperand(O, SlotTracker);
1038 O << " = VF * UF";
1039 }
1040
1041 if (VectorTripCount.getNumUsers() > 0) {
1042 O << "\nLive-in ";
1043 VectorTripCount.printAsOperand(O, SlotTracker);
1044 O << " = vector-trip-count";
1045 }
1046
1047 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1048 O << "\nLive-in ";
1049 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1050 O << " = backedge-taken count";
1051 }
1052
1053 O << "\n";
1054 if (TripCount) {
1055 if (TripCount->isLiveIn())
1056 O << "Live-in ";
1057 TripCount->printAsOperand(O, SlotTracker);
1058 O << " = original trip-count";
1059 O << "\n";
1060 }
1061}
1062
1066
1067 O << "VPlan '" << getName() << "' {";
1068
1069 printLiveIns(O);
1070
1072 RPOT(getEntry());
1073 for (const VPBlockBase *Block : RPOT) {
1074 O << '\n';
1075 Block->print(O, "", SlotTracker);
1076 }
1077
1078 O << "}\n";
1079}
1080
1081std::string VPlan::getName() const {
1082 std::string Out;
1083 raw_string_ostream RSO(Out);
1084 RSO << Name << " for ";
1085 if (!VFs.empty()) {
1086 RSO << "VF={" << VFs[0];
1087 for (ElementCount VF : drop_begin(VFs))
1088 RSO << "," << VF;
1089 RSO << "},";
1090 }
1091
1092 if (UFs.empty()) {
1093 RSO << "UF>=1";
1094 } else {
1095 RSO << "UF={" << UFs[0];
1096 for (unsigned UF : drop_begin(UFs))
1097 RSO << "," << UF;
1098 RSO << "}";
1099 }
1100
1101 return Out;
1102}
1103
1106 VPlanPrinter Printer(O, *this);
1107 Printer.dump();
1108}
1109
1111void VPlan::dump() const { print(dbgs()); }
1112#endif
1113
1114static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1115 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1116 // Update the operands of all cloned recipes starting at NewEntry. This
1117 // traverses all reachable blocks. This is done in two steps, to handle cycles
1118 // in PHI recipes.
1120 OldDeepRPOT(Entry);
1122 NewDeepRPOT(NewEntry);
1123 // First, collect all mappings from old to new VPValues defined by cloned
1124 // recipes.
1125 for (const auto &[OldBB, NewBB] :
1128 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1129 "blocks must have the same number of recipes");
1130 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1131 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1132 "recipes must have the same number of operands");
1133 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1134 "recipes must define the same number of operands");
1135 for (const auto &[OldV, NewV] :
1136 zip(OldR.definedValues(), NewR.definedValues()))
1137 Old2NewVPValues[OldV] = NewV;
1138 }
1139 }
1140
1141 // Update all operands to use cloned VPValues.
1142 for (VPBasicBlock *NewBB :
1144 for (VPRecipeBase &NewR : *NewBB)
1145 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1146 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1147 NewR.setOperand(I, NewOp);
1148 }
1149 }
1150}
1151
1153 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1154 // Clone blocks.
1155 const auto &[NewEntry, __] = cloneFrom(Entry);
1156
1157 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1158 VPIRBasicBlock *NewScalarHeader = nullptr;
1159 if (getScalarHeader()->hasPredecessors()) {
1160 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1161 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1162 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1163 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1164 }));
1165 } else {
1166 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1167 }
1168 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1169 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1170 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1171 for (VPValue *OldLiveIn : getLiveIns()) {
1172 Old2NewVPValues[OldLiveIn] =
1173 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1174 }
1175 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1176 Old2NewVPValues[&VF] = &NewPlan->VF;
1177 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1178 if (BackedgeTakenCount) {
1179 NewPlan->BackedgeTakenCount = new VPValue();
1180 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1181 }
1182 if (TripCount && TripCount->isLiveIn())
1183 Old2NewVPValues[TripCount] =
1184 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1185 // else NewTripCount will be created and inserted into Old2NewVPValues when
1186 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1187
1188 remapOperands(Entry, NewEntry, Old2NewVPValues);
1189
1190 // Initialize remaining fields of cloned VPlan.
1191 NewPlan->VFs = VFs;
1192 NewPlan->UFs = UFs;
1193 // TODO: Adjust names.
1194 NewPlan->Name = Name;
1195 if (TripCount) {
1196 assert(Old2NewVPValues.contains(TripCount) &&
1197 "TripCount must have been added to Old2NewVPValues");
1198 NewPlan->TripCount = Old2NewVPValues[TripCount];
1199 }
1200
1201 // Transfer all cloned blocks (the second half of all current blocks) from
1202 // current to new VPlan.
1203 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1204 for (unsigned I :
1205 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1206 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1207 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1208
1209 // Update ExitBlocks of the new plan.
1210 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1211 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1212 VPB != NewScalarHeader)
1213 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1214 }
1215
1216 return NewPlan;
1217}
1218
1220 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1221 CreatedBlocks.push_back(VPIRBB);
1222 return VPIRBB;
1223}
1224
1226 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1227 for (Instruction &I :
1228 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1229 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1230 return VPIRBB;
1231}
1232
1233#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1234
1235Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1236 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1237 Twine(getOrCreateBID(Block));
1238}
1239
1240Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1241 const std::string &Name = Block->getName();
1242 if (!Name.empty())
1243 return Name;
1244 return "VPB" + Twine(getOrCreateBID(Block));
1245}
1246
1248 Depth = 1;
1249 bumpIndent(0);
1250 OS << "digraph VPlan {\n";
1251 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1252 if (!Plan.getName().empty())
1253 OS << "\\n" << DOT::EscapeString(Plan.getName());
1254
1255 {
1256 // Print live-ins.
1257 std::string Str;
1258 raw_string_ostream SS(Str);
1259 Plan.printLiveIns(SS);
1261 StringRef(Str).rtrim('\n').split(Lines, "\n");
1262 for (auto Line : Lines)
1263 OS << DOT::EscapeString(Line.str()) << "\\n";
1264 }
1265
1266 OS << "\"]\n";
1267 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1268 OS << "edge [fontname=Courier, fontsize=30]\n";
1269 OS << "compound=true\n";
1270
1271 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1272 dumpBlock(Block);
1273
1274 OS << "}\n";
1275}
1276
1277void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1279 dumpBasicBlock(BasicBlock);
1281 dumpRegion(Region);
1282 else
1283 llvm_unreachable("Unsupported kind of VPBlock.");
1284}
1285
1286void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1287 bool Hidden, const Twine &Label) {
1288 // Due to "dot" we print an edge between two regions as an edge between the
1289 // exiting basic block and the entry basic of the respective regions.
1290 const VPBlockBase *Tail = From->getExitingBasicBlock();
1291 const VPBlockBase *Head = To->getEntryBasicBlock();
1292 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1293 OS << " [ label=\"" << Label << '\"';
1294 if (Tail != From)
1295 OS << " ltail=" << getUID(From);
1296 if (Head != To)
1297 OS << " lhead=" << getUID(To);
1298 if (Hidden)
1299 OS << "; splines=none";
1300 OS << "]\n";
1301}
1302
1303void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1304 auto &Successors = Block->getSuccessors();
1305 if (Successors.size() == 1)
1306 drawEdge(Block, Successors.front(), false, "");
1307 else if (Successors.size() == 2) {
1308 drawEdge(Block, Successors.front(), false, "T");
1309 drawEdge(Block, Successors.back(), false, "F");
1310 } else {
1311 unsigned SuccessorNumber = 0;
1312 for (auto *Successor : Successors)
1313 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1314 }
1315}
1316
1317void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1318 // Implement dot-formatted dump by performing plain-text dump into the
1319 // temporary storage followed by some post-processing.
1320 OS << Indent << getUID(BasicBlock) << " [label =\n";
1321 bumpIndent(1);
1322 std::string Str;
1323 raw_string_ostream SS(Str);
1324 // Use no indentation as we need to wrap the lines into quotes ourselves.
1325 BasicBlock->print(SS, "", SlotTracker);
1326
1327 // We need to process each line of the output separately, so split
1328 // single-string plain-text dump.
1330 StringRef(Str).rtrim('\n').split(Lines, "\n");
1331
1332 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1333 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1334 };
1335
1336 // Don't need the "+" after the last line.
1337 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1338 EmitLine(Line, " +\n");
1339 EmitLine(Lines.back(), "\n");
1340
1341 bumpIndent(-1);
1342 OS << Indent << "]\n";
1343
1344 dumpEdges(BasicBlock);
1345}
1346
1347void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1348 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1349 bumpIndent(1);
1350 OS << Indent << "fontname=Courier\n"
1351 << Indent << "label=\""
1352 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1353 << DOT::EscapeString(Region->getName()) << "\"\n";
1354 // Dump the blocks of the region.
1355 assert(Region->getEntry() && "Region contains no inner blocks.");
1356 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1357 dumpBlock(Block);
1358 bumpIndent(-1);
1359 OS << Indent << "}\n";
1360 dumpEdges(Region);
1361}
1362
1363#endif
1364
1365/// Returns true if there is a vector loop region and \p VPV is defined in a
1366/// loop region.
1367static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1368 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1369 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1371}
1372
1377 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1378}
1379
1381 VPValue *New,
1382 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1383 // Note that this early exit is required for correctness; the implementation
1384 // below relies on the number of users for this VPValue to decrease, which
1385 // isn't the case if this == New.
1386 if (this == New)
1387 return;
1388
1389 for (unsigned J = 0; J < getNumUsers();) {
1390 VPUser *User = Users[J];
1391 bool RemovedUser = false;
1392 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1393 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1394 continue;
1395
1396 RemovedUser = true;
1397 User->setOperand(I, New);
1398 }
1399 // If a user got removed after updating the current user, the next user to
1400 // update will be moved to the current position, so we only need to
1401 // increment the index if the number of users did not change.
1402 if (!RemovedUser)
1403 J++;
1404 }
1405}
1406
1408 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1409 if (getOperand(Idx) == From)
1410 setOperand(Idx, To);
1411 }
1412}
1413
1414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1416 OS << Tracker.getOrCreateName(this);
1417}
1418
1421 Op->printAsOperand(O, SlotTracker);
1422 });
1423}
1424#endif
1425
1426void VPSlotTracker::assignName(const VPValue *V) {
1427 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1428 auto *UV = V->getUnderlyingValue();
1429 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1430 if (!UV && !(VPI && !VPI->getName().empty())) {
1431 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1432 NextSlot++;
1433 return;
1434 }
1435
1436 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1437 // appending ".Number" to the name if there are multiple uses.
1438 std::string Name;
1439 if (UV)
1440 Name = getName(UV);
1441 else
1442 Name = VPI->getName();
1443
1444 assert(!Name.empty() && "Name cannot be empty.");
1445 StringRef Prefix = UV ? "ir<" : "vp<%";
1446 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1447
1448 // First assign the base name for V.
1449 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1450 // Integer or FP constants with different types will result in he same string
1451 // due to stripping types.
1452 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1453 return;
1454
1455 // If it is already used by C > 0 other VPValues, increase the version counter
1456 // C and use it for V.
1457 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1458 if (!UseInserted) {
1459 C->second++;
1460 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1461 }
1462}
1463
1464void VPSlotTracker::assignNames(const VPlan &Plan) {
1465 if (Plan.VF.getNumUsers() > 0)
1466 assignName(&Plan.VF);
1467 if (Plan.VFxUF.getNumUsers() > 0)
1468 assignName(&Plan.VFxUF);
1469 assignName(&Plan.VectorTripCount);
1470 if (Plan.BackedgeTakenCount)
1471 assignName(Plan.BackedgeTakenCount);
1472 for (VPValue *LI : Plan.getLiveIns())
1473 assignName(LI);
1474
1475 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1476 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1477 for (const VPBasicBlock *VPBB :
1479 assignNames(VPBB);
1480}
1481
1482void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1483 for (const VPRecipeBase &Recipe : *VPBB)
1484 for (VPValue *Def : Recipe.definedValues())
1485 assignName(Def);
1486}
1487
1488std::string VPSlotTracker::getName(const Value *V) {
1489 std::string Name;
1490 raw_string_ostream S(Name);
1491 if (V->hasName() || !isa<Instruction>(V)) {
1492 V->printAsOperand(S, false);
1493 return Name;
1494 }
1495
1496 if (!MST) {
1497 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1498 // instruction.
1499 auto *I = cast<Instruction>(V);
1500 // This check is required to support unit tests with incomplete IR.
1501 if (I->getParent()) {
1502 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1503 MST->incorporateFunction(*I->getFunction());
1504 } else {
1505 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1506 }
1507 }
1508 V->printAsOperand(S, false, *MST);
1509 return Name;
1510}
1511
1512std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1513 std::string Name = VPValue2Name.lookup(V);
1514 if (!Name.empty())
1515 return Name;
1516
1517 // If no name was assigned, no VPlan was provided when creating the slot
1518 // tracker or it is not reachable from the provided VPlan. This can happen,
1519 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1520 // in a debugger.
1521 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1522 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1523 // here.
1524 const VPRecipeBase *DefR = V->getDefiningRecipe();
1525 (void)DefR;
1526 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1527 "VPValue defined by a recipe in a VPlan?");
1528
1529 // Use the underlying value's name, if there is one.
1530 if (auto *UV = V->getUnderlyingValue()) {
1531 std::string Name;
1532 raw_string_ostream S(Name);
1533 UV->printAsOperand(S, false);
1534 return (Twine("ir<") + Name + ">").str();
1535 }
1536
1537 return "<badref>";
1538}
1539
1541 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1542 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1543 bool PredicateAtRangeStart = Predicate(Range.Start);
1544
1545 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1546 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1547 Range.End = TmpVF;
1548 break;
1549 }
1550
1551 return PredicateAtRangeStart;
1552}
1553
1554/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1555/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1556/// of VF's starting at a given VF and extending it as much as possible. Each
1557/// vectorization decision can potentially shorten this sub-range during
1558/// buildVPlan().
1560 ElementCount MaxVF) {
1561 auto MaxVFTimes2 = MaxVF * 2;
1562 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1563 VFRange SubRange = {VF, MaxVFTimes2};
1564 if (auto Plan = tryToBuildVPlan(SubRange)) {
1566 // Update the name of the latch of the top-level vector loop region region
1567 // after optimizations which includes block folding.
1568 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1569 VPlans.push_back(std::move(Plan));
1570 }
1571 VF = SubRange.End;
1572 }
1573}
1574
1576 assert(count_if(VPlans,
1577 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1578 1 &&
1579 "Multiple VPlans for VF.");
1580
1581 for (const VPlanPtr &Plan : VPlans) {
1582 if (Plan->hasVF(VF))
1583 return *Plan.get();
1584 }
1585 llvm_unreachable("No plan found!");
1586}
1587
1590 // Reserve first location for self reference to the LoopID metadata node.
1591 MDs.push_back(nullptr);
1592 bool IsUnrollMetadata = false;
1593 MDNode *LoopID = L->getLoopID();
1594 if (LoopID) {
1595 // First find existing loop unrolling disable metadata.
1596 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1597 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1598 if (MD) {
1599 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1600 if (!S)
1601 continue;
1602 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1603 continue;
1604 IsUnrollMetadata =
1605 S->getString().starts_with("llvm.loop.unroll.disable");
1606 }
1607 MDs.push_back(LoopID->getOperand(I));
1608 }
1609 }
1610
1611 if (!IsUnrollMetadata) {
1612 // Add runtime unroll disable metadata.
1613 LLVMContext &Context = L->getHeader()->getContext();
1614 SmallVector<Metadata *, 1> DisableOperands;
1615 DisableOperands.push_back(
1616 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1617 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1618 MDs.push_back(DisableNode);
1619 MDNode *NewLoopID = MDNode::get(Context, MDs);
1620 // Set operand 0 to refer to the loop id itself.
1621 NewLoopID->replaceOperandWith(0, NewLoopID);
1622 L->setLoopID(NewLoopID);
1623 }
1624}
1625
1627 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1628 bool VectorizingEpilogue, MDNode *OrigLoopID,
1629 std::optional<unsigned> OrigAverageTripCount,
1630 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1631 bool DisableRuntimeUnroll) {
1632 // Update the metadata of the scalar loop. Skip the update when vectorizing
1633 // the epilogue loop to ensure it is updated only once. Also skip the update
1634 // when the scalar loop became unreachable.
1635 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1636 std::optional<MDNode *> RemainderLoopID =
1639 if (RemainderLoopID) {
1640 OrigLoop->setLoopID(*RemainderLoopID);
1641 } else {
1642 if (DisableRuntimeUnroll)
1644
1645 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1646 Hints.setAlreadyVectorized();
1647 }
1648 }
1649
1650 if (!VectorLoop)
1651 return;
1652
1653 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1654 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1656 VectorLoop->setLoopID(*VectorizedLoopID);
1657 } else {
1658 // Keep all loop hints from the original loop on the vector loop (we'll
1659 // replace the vectorizer-specific hints below).
1660 if (OrigLoopID)
1661 VectorLoop->setLoopID(OrigLoopID);
1662
1663 if (!VectorizingEpilogue) {
1664 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1665 Hints.setAlreadyVectorized();
1666 }
1667 }
1669 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1670 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1672
1673 // Set/update profile weights for the vector and remainder loops as original
1674 // loop iterations are now distributed among them. Note that original loop
1675 // becomes the scalar remainder loop after vectorization.
1676 //
1677 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1678 // end up getting slightly roughened result but that should be OK since
1679 // profile is not inherently precise anyway. Note also possible bypass of
1680 // vector code caused by legality checks is ignored, assigning all the weight
1681 // to the vector loop, optimistically.
1682 //
1683 // For scalable vectorization we can't know at compile time how many
1684 // iterations of the loop are handled in one vector iteration, so instead
1685 // use the value of vscale used for tuning.
1686 if (!OrigAverageTripCount)
1687 return;
1688 // Calculate number of iterations in unrolled loop.
1689 unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1690 // Calculate number of iterations for remainder loop.
1691 unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1692
1693 if (HeaderVPBB) {
1694 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1695 OrigLoopInvocationWeight);
1696 }
1697 if (Plan.getScalarPreheader()->hasPredecessors()) {
1698 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1699 OrigLoopInvocationWeight);
1700 }
1701}
1702
1703#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1705 if (VPlans.empty()) {
1706 O << "LV: No VPlans built.\n";
1707 return;
1708 }
1709 for (const auto &Plan : VPlans)
1711 Plan->printDOT(O);
1712 else
1713 Plan->print(O);
1714}
1715#endif
1716
1717bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1719 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1720 unsigned WideSize = C->getBitWidth();
1721 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1722 ? TruncatedVal.sext(WideSize)
1723 : TruncatedVal.zext(WideSize);
1724 return ExtendedVal == *C;
1725}
1726
1729 if (!V->isLiveIn())
1730 return {};
1731
1732 return TTI::getOperandInfo(V->getLiveInIRValue());
1733}
1734
1736 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1737 bool AlwaysIncludeReplicatingR) {
1738 if (VF.isScalar())
1739 return 0;
1740
1741 assert(!VF.isScalable() &&
1742 "Scalarization overhead not supported for scalable vectors");
1743
1744 InstructionCost ScalarizationCost = 0;
1745 // Compute the cost of scalarizing the result if needed.
1746 if (!ResultTy->isVoidTy()) {
1747 for (Type *VectorTy :
1748 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1749 ScalarizationCost += TTI.getScalarizationOverhead(
1751 /*Insert=*/true,
1752 /*Extract=*/false, CostKind);
1753 }
1754 }
1755 // Compute the cost of scalarizing the operands, skipping ones that do not
1756 // require extraction/scalarization and do not incur any overhead.
1757 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1759 for (auto *Op : Operands) {
1760 if (Op->isLiveIn() ||
1761 (!AlwaysIncludeReplicatingR &&
1764 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1765 !UniqueOperands.insert(Op).second)
1766 continue;
1767 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1768 }
1769 return ScalarizationCost +
1770 TTI.getOperandsScalarizationOverhead(Tys, CostKind);
1771}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1588
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:141
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:568
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:57
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1367
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:586
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:58
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1114
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:60
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:678
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1575
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1626
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1559
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1540
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1704
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
Metadata node.
Definition Metadata.h:1078
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:804
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3980
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4055
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4007
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:478
iterator end()
Definition VPlan.h:4017
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4015
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:529
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:763
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:378
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:578
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:550
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3995
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:536
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:635
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:623
const VPRecipeBase & back() const
Definition VPlan.h:4029
bool empty() const
Definition VPlan.h:4026
size_t size() const
Definition VPlan.h:4025
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:186
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
VPlan * getPlan()
Definition VPlan.cpp:161
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const std::string & getName() const
Definition VPlan.h:164
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:214
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:114
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:165
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:184
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:204
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
LLVM_ABI_FOR_TEST void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:122
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4133
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4157
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:81
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe, delegating to printRecipe().
VPBasicBlock * getParent()
Definition VPlan.h:408
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4168
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:728
const VPBlockBase * getEntry() const
Definition VPlan.h:4204
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:832
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4236
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:789
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:818
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:741
const VPBlockBase * getExiting() const
Definition VPlan.h:4216
friend class VPlan
Definition VPlan.h:4169
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1512
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:202
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1407
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1419
operand_range operands()
Definition VPlanValue.h:270
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:246
unsigned getNumOperands() const
Definition VPlanValue.h:240
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:241
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1373
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1415
friend class VPDef
Definition VPlanValue.h:47
Value * UnderlyingVal
Hold the underlying Value, if any, attached to this VPValue.
Definition VPlanValue.h:59
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:114
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
virtual ~VPValue()
Definition VPlan.cpp:100
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:107
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1376
unsigned getNumUsers() const
Definition VPlanValue.h:111
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1380
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition VPlanValue.h:63
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1247
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4298
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1105
friend class VPSlotTracker
Definition VPlan.h:4300
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1081
VPBasicBlock * getEntry()
Definition VPlan.h:4387
void setName(const Twine &newName)
Definition VPlan.h:4537
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:889
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:867
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:897
friend class VPlanPrinter
Definition VPlan.h:4299
unsigned getUF() const
Definition VPlan.h:4519
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1219
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4580
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4439
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1010
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:992
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4376
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4603
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1225
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1111
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4430
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:904
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1064
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4435
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1026
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1152
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2262
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1717
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1982
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1918
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1728
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1735
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:263
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:223
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:346
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:324
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...