LLVM 22.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55namespace llvm {
57}
58
59/// @{
60/// Metadata attribute names
61const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
66/// @}
67
69
71 "vplan-print-in-dot-format", cl::Hidden,
72 cl::desc("Use dot format instead of plain text when dumping VPlans"));
73
74#define DEBUG_TYPE "loop-vectorize"
75
76#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
78 const VPBasicBlock *Parent = R.getParent();
79 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
80 R.print(OS, "", SlotTracker);
81 return OS;
82}
83#endif
84
86 const ElementCount &VF) const {
87 switch (LaneKind) {
89 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
90 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
91 Builder.getInt32(VF.getKnownMinValue() - Lane));
93 return Builder.getInt32(Lane);
94 }
95 llvm_unreachable("Unknown lane kind");
96}
97
98VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
99 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
100 if (Def)
101 Def->addDefinedValue(this);
102}
103
105 assert(Users.empty() && "trying to delete a VPValue with remaining users");
106 if (Def)
107 Def->removeDefinedValue(this);
108}
109
110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
113 R->print(OS, "", SlotTracker);
114 else
116}
117
118void VPValue::dump() const {
119 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
121 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
123 dbgs() << "\n";
124}
125
126void VPDef::dump() const {
127 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
129 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
130 print(dbgs(), "", SlotTracker);
131 dbgs() << "\n";
132}
133#endif
134
138
142
143// Get the top-most entry block of \p Start. This is the entry block of the
144// containing VPlan. This function is templated to support both const and non-const blocks
145template <typename T> static T *getPlanEntry(T *Start) {
146 T *Next = Start;
147 T *Current = Start;
148 while ((Next = Next->getParent()))
149 Current = Next;
150
151 SmallSetVector<T *, 8> WorkList;
152 WorkList.insert(Current);
153
154 for (unsigned i = 0; i < WorkList.size(); i++) {
155 T *Current = WorkList[i];
156 if (!Current->hasPredecessors())
157 return Current;
158 auto &Predecessors = Current->getPredecessors();
159 WorkList.insert_range(Predecessors);
160 }
161
162 llvm_unreachable("VPlan without any entry node without predecessors");
163}
164
165VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
166
167const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
168
169/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
176
183
184void VPBlockBase::setPlan(VPlan *ParentPlan) {
185 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
186 Plan = ParentPlan;
187}
188
189/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
191 const VPBlockBase *Block = this;
193 Block = Region->getExiting();
195}
196
203
205 if (!Successors.empty() || !Parent)
206 return this;
207 assert(Parent->getExiting() == this &&
208 "Block w/o successors not the exiting block of its parent.");
209 return Parent->getEnclosingBlockWithSuccessors();
210}
211
213 if (!Predecessors.empty() || !Parent)
214 return this;
215 assert(Parent->getEntry() == this &&
216 "Block w/o predecessors not the entry of its parent.");
217 return Parent->getEnclosingBlockWithPredecessors();
218}
219
221 iterator It = begin();
222 while (It != end() && It->isPhi())
223 It++;
224 return It;
225}
226
234
236 if (Def->isLiveIn())
237 return Def->getLiveInIRValue();
238
239 if (hasScalarValue(Def, Lane))
240 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
241
242 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
244 return Data.VPV2Scalars[Def][0];
245 }
246
247 // Look through BuildVector to avoid redundant extracts.
248 // TODO: Remove once replicate regions are unrolled explicitly.
249 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
250 auto *BuildVector = cast<VPInstruction>(Def);
251 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
252 }
253
255 auto *VecPart = Data.VPV2Vector[Def];
256 if (!VecPart->getType()->isVectorTy()) {
257 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
258 return VecPart;
259 }
260 // TODO: Cache created scalar values.
261 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
262 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
263 // set(Def, Extract, Instance);
264 return Extract;
265}
266
267Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
268 if (NeedsScalar) {
269 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
271 (hasScalarValue(Def, VPLane(0)) &&
272 Data.VPV2Scalars[Def].size() == 1)) &&
273 "Trying to access a single scalar per part but has multiple scalars "
274 "per part.");
275 return get(Def, VPLane(0));
276 }
277
278 // If Values have been set for this Def return the one relevant for \p Part.
279 if (hasVectorValue(Def))
280 return Data.VPV2Vector[Def];
281
282 auto GetBroadcastInstrs = [this](Value *V) {
283 if (VF.isScalar())
284 return V;
285 // Broadcast the scalar into all locations in the vector.
286 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
287 return Shuf;
288 };
289
290 if (!hasScalarValue(Def, {0})) {
291 assert(Def->isLiveIn() && "expected a live-in");
292 Value *IRV = Def->getLiveInIRValue();
293 Value *B = GetBroadcastInstrs(IRV);
294 set(Def, B);
295 return B;
296 }
297
298 Value *ScalarValue = get(Def, VPLane(0));
299 // If we aren't vectorizing, we can just copy the scalar map values over
300 // to the vector map.
301 if (VF.isScalar()) {
302 set(Def, ScalarValue);
303 return ScalarValue;
304 }
305
306 bool IsSingleScalar = vputils::isSingleScalar(Def);
307
308 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
309 // Check if there is a scalar value for the selected lane.
310 if (!hasScalarValue(Def, LastLane)) {
311 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
312 // VPExpandSCEVRecipes can also be a single scalar.
314 VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
315 "unexpected recipe found to be invariant");
316 IsSingleScalar = true;
317 LastLane = 0;
318 }
319
320 // We need to construct the vector value for a single-scalar value by
321 // broadcasting the scalar to all lanes.
322 // TODO: Replace by introducing Broadcast VPInstructions.
323 assert(IsSingleScalar && "must be a single-scalar at this point");
324 // Set the insert point after the last scalarized instruction or after the
325 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
326 // will directly follow the scalar definitions.
327 auto OldIP = Builder.saveIP();
328 auto *LastInst = cast<Instruction>(get(Def, LastLane));
329 auto NewIP = isa<PHINode>(LastInst)
330 ? LastInst->getParent()->getFirstNonPHIIt()
331 : std::next(BasicBlock::iterator(LastInst));
332 Builder.SetInsertPoint(&*NewIP);
333 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
334 set(Def, VectorValue);
335 Builder.restoreIP(OldIP);
336 return VectorValue;
337}
338
340 const DILocation *DIL = DL;
341 // When a FSDiscriminator is enabled, we don't need to add the multiply
342 // factors to the discriminators.
343 if (DIL &&
344 Builder.GetInsertBlock()
345 ->getParent()
346 ->shouldEmitDebugInfoForProfiling() &&
348 // FIXME: For scalable vectors, assume vscale=1.
349 unsigned UF = Plan->getUF();
350 auto NewDIL =
351 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
352 if (NewDIL)
353 Builder.SetCurrentDebugLocation(*NewDIL);
354 else
355 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
356 << DIL->getFilename() << " Line: " << DIL->getLine());
357 } else
358 Builder.SetCurrentDebugLocation(DL);
359}
360
362 Value *WideValue,
363 const VPLane &Lane) {
364 Value *ScalarInst = get(Def, Lane);
365 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
366 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
367 // We must handle each element of a vectorized struct type.
368 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
369 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
370 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
371 VectorValue =
372 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
373 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
374 }
375 } else {
376 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
377 }
378 return WideValue;
379}
380
381BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
382 auto &CFG = State.CFG;
383 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
384 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
385 BasicBlock *PrevBB = CFG.PrevBB;
386 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
387 PrevBB->getParent(), CFG.ExitBB);
388 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
389
390 return NewBB;
391}
392
394 auto &CFG = State.CFG;
395 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
396
397 // Register NewBB in its loop. In innermost loops its the same for all
398 // BB's.
399 Loop *ParentLoop = State.CurrentParentLoop;
400 // If this block has a sole successor that is an exit block or is an exit
401 // block itself then it needs adding to the same parent loop as the exit
402 // block.
403 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
404 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
405 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
406 ParentLoop = State.LI->getLoopFor(
407 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
408 }
409
410 if (ParentLoop && !State.LI->getLoopFor(NewBB))
411 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
412
414 if (VPBlockUtils::isHeader(this, State.VPDT)) {
415 // There's no block for the latch yet, connect to the preheader only.
416 Preds = {getPredecessors()[0]};
417 } else {
418 Preds = to_vector(getPredecessors());
419 }
420
421 // Hook up the new basic block to its predecessors.
422 for (VPBlockBase *PredVPBlock : Preds) {
423 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
424 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
425 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
426 "Predecessor basic-block not found building successor.");
427 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
428 auto *PredBBTerminator = PredBB->getTerminator();
429 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
430
431 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
432 if (isa<UnreachableInst>(PredBBTerminator)) {
433 assert(PredVPSuccessors.size() == 1 &&
434 "Predecessor ending w/o branch must have single successor.");
435 DebugLoc DL = PredBBTerminator->getDebugLoc();
436 PredBBTerminator->eraseFromParent();
437 auto *Br = BranchInst::Create(NewBB, PredBB);
438 Br->setDebugLoc(DL);
439 } else if (TermBr && !TermBr->isConditional()) {
440 TermBr->setSuccessor(0, NewBB);
441 } else {
442 // Set each forward successor here when it is created, excluding
443 // backedges. A backward successor is set when the branch is created.
444 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
445 // in the original IR, except when the predecessor is the entry block.
446 // This enables including SCEV and memory runtime check blocks in VPlan.
447 // TODO: Remove exception by modeling the terminator of entry block using
448 // BranchOnCond.
449 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
450 assert((TermBr && (!TermBr->getSuccessor(idx) ||
451 (isa<VPIRBasicBlock>(this) &&
452 (TermBr->getSuccessor(idx) == NewBB ||
453 PredVPBlock == getPlan()->getEntry())))) &&
454 "Trying to reset an existing successor block.");
455 TermBr->setSuccessor(idx, NewBB);
456 }
457 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
458 }
459}
460
463 "VPIRBasicBlock can have at most two successors at the moment!");
464 // Move completely disconnected blocks to their final position.
465 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
466 IRBB->moveAfter(State->CFG.PrevBB);
467 State->Builder.SetInsertPoint(IRBB->getTerminator());
468 State->CFG.PrevBB = IRBB;
469 State->CFG.VPBB2IRBB[this] = IRBB;
470 executeRecipes(State, IRBB);
471 // Create a branch instruction to terminate IRBB if one was not created yet
472 // and is needed.
473 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
474 auto *Br = State->Builder.CreateBr(IRBB);
475 Br->setOperand(0, nullptr);
476 IRBB->getTerminator()->eraseFromParent();
477 } else {
478 assert(
479 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
480 "other blocks must be terminated by a branch");
481 }
482
483 connectToPredecessors(*State);
484}
485
486VPIRBasicBlock *VPIRBasicBlock::clone() {
487 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
488 for (VPRecipeBase &R : Recipes)
489 NewBlock->appendRecipe(R.clone());
490 return NewBlock;
491}
492
494 bool Replica = bool(State->Lane);
495 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
496
497 if (VPBlockUtils::isHeader(this, State->VPDT)) {
498 // Create and register the new vector loop.
499 Loop *PrevParentLoop = State->CurrentParentLoop;
500 State->CurrentParentLoop = State->LI->AllocateLoop();
501
502 // Insert the new loop into the loop nest and register the new basic blocks
503 // before calling any utilities such as SCEV that require valid LoopInfo.
504 if (PrevParentLoop)
505 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
506 else
507 State->LI->addTopLevelLoop(State->CurrentParentLoop);
508 }
509
510 auto IsReplicateRegion = [](VPBlockBase *BB) {
512 assert((!R || R->isReplicator()) &&
513 "only replicate region blocks should remain");
514 return R;
515 };
516 // 1. Create an IR basic block.
517 if ((Replica && this == getParent()->getEntry()) ||
518 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
519 // Reuse the previous basic block if the current VPBB is either
520 // * the entry to a replicate region, or
521 // * the exit of a replicate region.
522 State->CFG.VPBB2IRBB[this] = NewBB;
523 } else {
524 NewBB = createEmptyBasicBlock(*State);
525
526 State->Builder.SetInsertPoint(NewBB);
527 // Temporarily terminate with unreachable until CFG is rewired.
528 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
529 State->Builder.SetInsertPoint(Terminator);
530
531 State->CFG.PrevBB = NewBB;
532 State->CFG.VPBB2IRBB[this] = NewBB;
533 connectToPredecessors(*State);
534 }
535
536 // 2. Fill the IR basic block with IR instructions.
537 executeRecipes(State, NewBB);
538
539 // If this block is a latch, update CurrentParentLoop.
540 if (VPBlockUtils::isLatch(this, State->VPDT))
541 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
542}
543
544VPBasicBlock *VPBasicBlock::clone() {
545 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
546 for (VPRecipeBase &R : *this)
547 NewBlock->appendRecipe(R.clone());
548 return NewBlock;
549}
550
552 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
553 << " in BB: " << BB->getName() << '\n');
554
555 State->CFG.PrevVPBB = this;
556
557 for (VPRecipeBase &Recipe : Recipes) {
558 State->setDebugLocFrom(Recipe.getDebugLoc());
559 Recipe.execute(*State);
560 }
561
562 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
563}
564
565VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
566 assert((SplitAt == end() || SplitAt->getParent() == this) &&
567 "can only split at a position in the same block");
568
569 // Create new empty block after the block to split.
570 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
572
573 // Finally, move the recipes starting at SplitAt to new block.
574 for (VPRecipeBase &ToMove :
575 make_early_inc_range(make_range(SplitAt, this->end())))
576 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
577
578 return SplitBlock;
579}
580
581/// Return the enclosing loop region for region \p P. The templated version is
582/// used to support both const and non-const block arguments.
583template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
584 if (P && P->isReplicator()) {
585 P = P->getParent();
586 // Multiple loop regions can be nested, but replicate regions can only be
587 // nested inside a loop region or must be outside any other region.
588 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
589 }
590 return P;
591}
592
596
600
601static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
602 if (VPBB->empty()) {
603 assert(
604 VPBB->getNumSuccessors() < 2 &&
605 "block with multiple successors doesn't have a recipe as terminator");
606 return false;
607 }
608
609 const VPRecipeBase *R = &VPBB->back();
610 bool IsSwitch = isa<VPInstruction>(R) &&
611 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
612 bool IsCondBranch =
615 (void)IsCondBranch;
616 (void)IsSwitch;
617 if (VPBB->getNumSuccessors() == 2 ||
618 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
619 assert((IsCondBranch || IsSwitch) &&
620 "block with multiple successors not terminated by "
621 "conditional branch nor switch recipe");
622
623 return true;
624 }
625
626 if (VPBB->getNumSuccessors() > 2) {
627 assert(IsSwitch && "block with more than 2 successors not terminated by "
628 "a switch recipe");
629 return true;
630 }
631
632 assert(
633 !IsCondBranch &&
634 "block with 0 or 1 successors terminated by conditional branch recipe");
635 return false;
636}
637
639 if (hasConditionalTerminator(this))
640 return &back();
641 return nullptr;
642}
643
645 if (hasConditionalTerminator(this))
646 return &back();
647 return nullptr;
648}
649
651 return getParent() && getParent()->getExitingBasicBlock() == this;
652}
653
654#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659
660void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
661 if (getSuccessors().empty()) {
662 O << Indent << "No successors\n";
663 } else {
664 O << Indent << "Successor(s): ";
665 ListSeparator LS;
666 for (auto *Succ : getSuccessors())
667 O << LS << Succ->getName();
668 O << '\n';
669 }
670}
671
672void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
673 VPSlotTracker &SlotTracker) const {
674 O << Indent << getName() << ":\n";
675
676 auto RecipeIndent = Indent + " ";
677 for (const VPRecipeBase &Recipe : *this) {
678 Recipe.print(O, RecipeIndent, SlotTracker);
679 O << '\n';
680 }
681
682 printSuccessors(O, Indent);
683}
684#endif
685
686static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
687
688// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
689// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
690// Remapping of operands must be done separately. Returns a pair with the new
691// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
692// region, return nullptr for the exiting block.
693static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
695 VPBlockBase *Exiting = nullptr;
696 bool InRegion = Entry->getParent();
697 // First, clone blocks reachable from Entry.
698 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
699 VPBlockBase *NewBB = BB->clone();
700 Old2NewVPBlocks[BB] = NewBB;
701 if (InRegion && BB->getNumSuccessors() == 0) {
702 assert(!Exiting && "Multiple exiting blocks?");
703 Exiting = BB;
704 }
705 }
706 assert((!InRegion || Exiting) && "regions must have a single exiting block");
707
708 // Second, update the predecessors & successors of the cloned blocks.
709 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
710 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
712 for (VPBlockBase *Pred : BB->getPredecessors()) {
713 NewPreds.push_back(Old2NewVPBlocks[Pred]);
714 }
715 NewBB->setPredecessors(NewPreds);
717 for (VPBlockBase *Succ : BB->successors()) {
718 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
719 }
720 NewBB->setSuccessors(NewSuccs);
721 }
722
723#if !defined(NDEBUG)
724 // Verify that the order of predecessors and successors matches in the cloned
725 // version.
726 for (const auto &[OldBB, NewBB] :
728 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
729 for (const auto &[OldPred, NewPred] :
730 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
731 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
732
733 for (const auto &[OldSucc, NewSucc] :
734 zip(OldBB->successors(), NewBB->successors()))
735 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
736 }
737#endif
738
739 return std::make_pair(Old2NewVPBlocks[Entry],
740 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
741}
742
743VPRegionBlock *VPRegionBlock::clone() {
744 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
745 VPlan &Plan = *getPlan();
746 VPRegionBlock *NewRegion =
748 ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
749 : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
750
751 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
752 Block->setParent(NewRegion);
753 return NewRegion;
754}
755
758 "Loop regions should have been lowered to plain CFG");
759 assert(!State->Lane && "Replicating a Region with non-null instance.");
760 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
761
763 Entry);
764 State->Lane = VPLane(0);
765 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
766 State->Lane = VPLane(Lane, VPLane::Kind::First);
767 // Visit the VPBlocks connected to \p this, starting from it.
768 for (VPBlockBase *Block : RPOT) {
769 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
770 Block->execute(State);
771 }
772 }
773
774 // Exit replicating mode.
775 State->Lane.reset();
776}
777
780 for (VPRecipeBase &R : Recipes)
781 Cost += R.cost(VF, Ctx);
782 return Cost;
783}
784
785const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
786 const VPBlockBase *Pred = nullptr;
787 if (hasPredecessors()) {
788 Pred = getPredecessors()[Idx];
789 } else {
790 auto *Region = getParent();
791 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
792 "must be in the entry block of a non-replicate region");
793 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
794 "loop region has a single predecessor (preheader), its entry block "
795 "has 2 incoming blocks");
796
797 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
798 // region itself whose exiting block feeds the phi across the backedge.
799 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
800 }
801 return Pred->getExitingBasicBlock();
802}
803
805 if (!isReplicator()) {
808 Cost += Block->cost(VF, Ctx);
809 InstructionCost BackedgeCost =
810 ForceTargetInstructionCost.getNumOccurrences()
811 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
812 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
813 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
814 << ": vector loop backedge\n");
815 Cost += BackedgeCost;
816 return Cost;
817 }
818
819 // Compute the cost of a replicate region. Replicating isn't supported for
820 // scalable vectors, return an invalid cost for them.
821 // TODO: Discard scalable VPlans with replicate recipes earlier after
822 // construction.
823 if (VF.isScalable())
825
826 // Compute and return the cost of the conditionally executed recipes.
827 assert(VF.isVector() && "Can only compute vector cost at the moment.");
829 return Then->cost(VF, Ctx);
830}
831
832#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
834 VPSlotTracker &SlotTracker) const {
835 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
836 auto NewIndent = Indent + " ";
837 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
838 O << '\n';
839 BlockBase->print(O, NewIndent, SlotTracker);
840 }
841 O << Indent << "}\n";
842
843 printSuccessors(O, Indent);
844}
845#endif
846
848 auto *Header = cast<VPBasicBlock>(getEntry());
849 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
850 assert(this == getPlan()->getVectorLoopRegion() &&
851 "Canonical IV must be in the entry of the top-level loop region");
852 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
853 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
854 CanIV->getDebugLoc(), "index");
855 CanIV->replaceAllUsesWith(ScalarR);
856 CanIV->eraseFromParent();
857 }
858
859 VPBlockBase *Preheader = getSinglePredecessor();
860 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
862 VPBlockUtils::disconnectBlocks(Preheader, this);
863 VPBlockUtils::disconnectBlocks(this, Middle);
864
865 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
866 VPB->setParent(getParent());
867
868 VPBlockUtils::connectBlocks(Preheader, Header);
869 VPBlockUtils::connectBlocks(ExitingLatch, Middle);
870 VPBlockUtils::connectBlocks(ExitingLatch, Header);
871}
872
873VPlan::VPlan(Loop *L) {
874 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
875 ScalarHeader = createVPIRBasicBlock(L->getHeader());
876
877 SmallVector<BasicBlock *> IRExitBlocks;
878 L->getUniqueExitBlocks(IRExitBlocks);
879 for (BasicBlock *EB : IRExitBlocks)
880 ExitBlocks.push_back(createVPIRBasicBlock(EB));
881}
882
884 VPValue DummyValue;
885
886 for (auto *VPB : CreatedBlocks) {
887 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
888 // Replace all operands of recipes and all VPValues defined in VPBB with
889 // DummyValue so the block can be deleted.
890 for (VPRecipeBase &R : *VPBB) {
891 for (auto *Def : R.definedValues())
892 Def->replaceAllUsesWith(&DummyValue);
893
894 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
895 R.setOperand(I, &DummyValue);
896 }
897 }
898 delete VPB;
899 }
900 for (VPValue *VPV : getLiveIns())
901 delete VPV;
902 if (BackedgeTakenCount)
903 delete BackedgeTakenCount;
904}
905
907 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
908 return VPIRBB->getIRBasicBlock() == IRBB;
909 });
910 assert(Iter != getExitBlocks().end() && "no exit block found");
911 return *Iter;
912}
913
915 return is_contained(ExitBlocks, VPBB);
916}
917
918/// Generate the code inside the preheader and body of the vectorized loop.
919/// Assumes a single pre-header basic-block was created for this. Introduce
920/// additional basic-blocks as needed, and fill them all.
922 // Initialize CFG state.
923 State->CFG.PrevVPBB = nullptr;
924 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
925
926 // Update VPDominatorTree since VPBasicBlock may be removed after State was
927 // constructed.
928 State->VPDT.recalculate(*this);
929
930 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
931 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
932 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
933 State->CFG.DTU.applyUpdates(
934 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
935
936 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
937 << ", UF=" << getUF() << '\n');
938 setName("Final VPlan");
939 LLVM_DEBUG(dump());
940
941 BasicBlock *ScalarPh = State->CFG.ExitBB;
942 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
943 if (ScalarPhVPBB->hasPredecessors()) {
944 // Disconnect scalar preheader and scalar header, as the dominator tree edge
945 // will be updated as part of VPlan execution. This allows keeping the DTU
946 // logic generic during VPlan execution.
947 State->CFG.DTU.applyUpdates(
948 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
949 }
951 Entry);
952 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
953 // successor blocks including the middle, exit and scalar preheader blocks.
954 for (VPBlockBase *Block : RPOT)
955 Block->execute(State);
956
957 // If the original loop is unreachable, delete it and all its blocks.
958 if (!ScalarPhVPBB->hasPredecessors()) {
959 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
960 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
961 // IR instructions.
962 for (VPIRBasicBlock *EB : getExitBlocks()) {
963 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
964 if (R.getNumOperands() == 1)
965 R.eraseFromParent();
966 }
967 }
968
969 Loop *OrigLoop =
970 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
971 auto Blocks = OrigLoop->getBlocksVector();
972 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
973 for (auto *BB : Blocks)
974 State->LI->removeBlock(BB);
975 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
976 State->LI->erase(OrigLoop);
977 }
978
979 State->CFG.DTU.flush();
980
981 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
982 if (!Header)
983 return;
984
985 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
986 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
987
988 // Fix the latch value of canonical, reduction and first-order recurrences
989 // phis in the vector loop.
990 for (VPRecipeBase &R : Header->phis()) {
991 // Skip phi-like recipes that generate their backedege values themselves.
992 if (isa<VPWidenPHIRecipe>(&R))
993 continue;
994
995 auto *PhiR = cast<VPSingleDefRecipe>(&R);
996 // VPInstructions currently model scalar Phis only.
997 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
999 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1000
1001 Value *Phi = State->get(PhiR, NeedsScalar);
1002 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1003 // not.
1004 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1005 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1006 }
1007}
1008
1010 // For now only return the cost of the vector loop region, ignoring any other
1011 // blocks, like the preheader or middle blocks, expect for checking them for
1012 // recipes with invalid costs.
1014
1015 // If the cost of the loop region is invalid or any recipe in the skeleton
1016 // outside loop regions are invalid return an invalid cost.
1019 [&VF, &Ctx](VPBasicBlock *VPBB) {
1020 return !VPBB->cost(VF, Ctx).isValid();
1021 }))
1023
1024 return Cost;
1025}
1026
1028 // TODO: Cache if possible.
1030 if (auto *R = dyn_cast<VPRegionBlock>(B))
1031 return R->isReplicator() ? nullptr : R;
1032 return nullptr;
1033}
1034
1037 if (auto *R = dyn_cast<VPRegionBlock>(B))
1038 return R->isReplicator() ? nullptr : R;
1039 return nullptr;
1040}
1041
1042#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1045
1046 if (VF.getNumUsers() > 0) {
1047 O << "\nLive-in ";
1048 VF.printAsOperand(O, SlotTracker);
1049 O << " = VF";
1050 }
1051
1052 if (VFxUF.getNumUsers() > 0) {
1053 O << "\nLive-in ";
1054 VFxUF.printAsOperand(O, SlotTracker);
1055 O << " = VF * UF";
1056 }
1057
1058 if (VectorTripCount.getNumUsers() > 0) {
1059 O << "\nLive-in ";
1060 VectorTripCount.printAsOperand(O, SlotTracker);
1061 O << " = vector-trip-count";
1062 }
1063
1064 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1065 O << "\nLive-in ";
1066 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1067 O << " = backedge-taken count";
1068 }
1069
1070 O << "\n";
1071 if (TripCount) {
1072 if (TripCount->isLiveIn())
1073 O << "Live-in ";
1074 TripCount->printAsOperand(O, SlotTracker);
1075 O << " = original trip-count";
1076 O << "\n";
1077 }
1078}
1079
1083
1084 O << "VPlan '" << getName() << "' {";
1085
1086 printLiveIns(O);
1087
1089 RPOT(getEntry());
1090 for (const VPBlockBase *Block : RPOT) {
1091 O << '\n';
1092 Block->print(O, "", SlotTracker);
1093 }
1094
1095 O << "}\n";
1096}
1097
1098std::string VPlan::getName() const {
1099 std::string Out;
1100 raw_string_ostream RSO(Out);
1101 RSO << Name << " for ";
1102 if (!VFs.empty()) {
1103 RSO << "VF={" << VFs[0];
1104 for (ElementCount VF : drop_begin(VFs))
1105 RSO << "," << VF;
1106 RSO << "},";
1107 }
1108
1109 if (UFs.empty()) {
1110 RSO << "UF>=1";
1111 } else {
1112 RSO << "UF={" << UFs[0];
1113 for (unsigned UF : drop_begin(UFs))
1114 RSO << "," << UF;
1115 RSO << "}";
1116 }
1117
1118 return Out;
1119}
1120
1123 VPlanPrinter Printer(O, *this);
1124 Printer.dump();
1125}
1126
1128void VPlan::dump() const { print(dbgs()); }
1129#endif
1130
1131static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1132 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1133 // Update the operands of all cloned recipes starting at NewEntry. This
1134 // traverses all reachable blocks. This is done in two steps, to handle cycles
1135 // in PHI recipes.
1137 OldDeepRPOT(Entry);
1139 NewDeepRPOT(NewEntry);
1140 // First, collect all mappings from old to new VPValues defined by cloned
1141 // recipes.
1142 for (const auto &[OldBB, NewBB] :
1145 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1146 "blocks must have the same number of recipes");
1147 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1148 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1149 "recipes must have the same number of operands");
1150 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1151 "recipes must define the same number of operands");
1152 for (const auto &[OldV, NewV] :
1153 zip(OldR.definedValues(), NewR.definedValues()))
1154 Old2NewVPValues[OldV] = NewV;
1155 }
1156 }
1157
1158 // Update all operands to use cloned VPValues.
1159 for (VPBasicBlock *NewBB :
1161 for (VPRecipeBase &NewR : *NewBB)
1162 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1163 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1164 NewR.setOperand(I, NewOp);
1165 }
1166 }
1167}
1168
1170 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1171 // Clone blocks.
1172 const auto &[NewEntry, __] = cloneFrom(Entry);
1173
1174 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1175 VPIRBasicBlock *NewScalarHeader = nullptr;
1176 if (getScalarHeader()->hasPredecessors()) {
1177 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1178 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1179 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1180 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1181 }));
1182 } else {
1183 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1184 }
1185 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1186 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1187 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1188 for (VPValue *OldLiveIn : getLiveIns()) {
1189 Old2NewVPValues[OldLiveIn] =
1190 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue());
1191 }
1192 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1193 Old2NewVPValues[&VF] = &NewPlan->VF;
1194 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1195 if (BackedgeTakenCount) {
1196 NewPlan->BackedgeTakenCount = new VPValue();
1197 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1198 }
1199 if (TripCount && TripCount->isLiveIn())
1200 Old2NewVPValues[TripCount] =
1201 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue());
1202 // else NewTripCount will be created and inserted into Old2NewVPValues when
1203 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1204
1205 remapOperands(Entry, NewEntry, Old2NewVPValues);
1206
1207 // Initialize remaining fields of cloned VPlan.
1208 NewPlan->VFs = VFs;
1209 NewPlan->UFs = UFs;
1210 // TODO: Adjust names.
1211 NewPlan->Name = Name;
1212 if (TripCount) {
1213 assert(Old2NewVPValues.contains(TripCount) &&
1214 "TripCount must have been added to Old2NewVPValues");
1215 NewPlan->TripCount = Old2NewVPValues[TripCount];
1216 }
1217
1218 // Transfer all cloned blocks (the second half of all current blocks) from
1219 // current to new VPlan.
1220 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1221 for (unsigned I :
1222 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1223 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1224 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1225
1226 // Update ExitBlocks of the new plan.
1227 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1228 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1229 VPB != NewScalarHeader)
1230 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1231 }
1232
1233 return NewPlan;
1234}
1235
1237 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1238 CreatedBlocks.push_back(VPIRBB);
1239 return VPIRBB;
1240}
1241
1243 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1244 for (Instruction &I :
1245 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1246 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1247 return VPIRBB;
1248}
1249
1250#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1251
1252Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1253 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1254 Twine(getOrCreateBID(Block));
1255}
1256
1257Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1258 const std::string &Name = Block->getName();
1259 if (!Name.empty())
1260 return Name;
1261 return "VPB" + Twine(getOrCreateBID(Block));
1262}
1263
1265 Depth = 1;
1266 bumpIndent(0);
1267 OS << "digraph VPlan {\n";
1268 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1269 if (!Plan.getName().empty())
1270 OS << "\\n" << DOT::EscapeString(Plan.getName());
1271
1272 {
1273 // Print live-ins.
1274 std::string Str;
1275 raw_string_ostream SS(Str);
1276 Plan.printLiveIns(SS);
1278 StringRef(Str).rtrim('\n').split(Lines, "\n");
1279 for (auto Line : Lines)
1280 OS << DOT::EscapeString(Line.str()) << "\\n";
1281 }
1282
1283 OS << "\"]\n";
1284 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1285 OS << "edge [fontname=Courier, fontsize=30]\n";
1286 OS << "compound=true\n";
1287
1288 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1289 dumpBlock(Block);
1290
1291 OS << "}\n";
1292}
1293
1294void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1296 dumpBasicBlock(BasicBlock);
1298 dumpRegion(Region);
1299 else
1300 llvm_unreachable("Unsupported kind of VPBlock.");
1301}
1302
1303void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1304 bool Hidden, const Twine &Label) {
1305 // Due to "dot" we print an edge between two regions as an edge between the
1306 // exiting basic block and the entry basic of the respective regions.
1307 const VPBlockBase *Tail = From->getExitingBasicBlock();
1308 const VPBlockBase *Head = To->getEntryBasicBlock();
1309 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1310 OS << " [ label=\"" << Label << '\"';
1311 if (Tail != From)
1312 OS << " ltail=" << getUID(From);
1313 if (Head != To)
1314 OS << " lhead=" << getUID(To);
1315 if (Hidden)
1316 OS << "; splines=none";
1317 OS << "]\n";
1318}
1319
1320void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1321 auto &Successors = Block->getSuccessors();
1322 if (Successors.size() == 1)
1323 drawEdge(Block, Successors.front(), false, "");
1324 else if (Successors.size() == 2) {
1325 drawEdge(Block, Successors.front(), false, "T");
1326 drawEdge(Block, Successors.back(), false, "F");
1327 } else {
1328 unsigned SuccessorNumber = 0;
1329 for (auto *Successor : Successors)
1330 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1331 }
1332}
1333
1334void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1335 // Implement dot-formatted dump by performing plain-text dump into the
1336 // temporary storage followed by some post-processing.
1337 OS << Indent << getUID(BasicBlock) << " [label =\n";
1338 bumpIndent(1);
1339 std::string Str;
1340 raw_string_ostream SS(Str);
1341 // Use no indentation as we need to wrap the lines into quotes ourselves.
1342 BasicBlock->print(SS, "", SlotTracker);
1343
1344 // We need to process each line of the output separately, so split
1345 // single-string plain-text dump.
1347 StringRef(Str).rtrim('\n').split(Lines, "\n");
1348
1349 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1350 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1351 };
1352
1353 // Don't need the "+" after the last line.
1354 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1355 EmitLine(Line, " +\n");
1356 EmitLine(Lines.back(), "\n");
1357
1358 bumpIndent(-1);
1359 OS << Indent << "]\n";
1360
1361 dumpEdges(BasicBlock);
1362}
1363
1364void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1365 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1366 bumpIndent(1);
1367 OS << Indent << "fontname=Courier\n"
1368 << Indent << "label=\""
1369 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1370 << DOT::EscapeString(Region->getName()) << "\"\n";
1371 // Dump the blocks of the region.
1372 assert(Region->getEntry() && "Region contains no inner blocks.");
1373 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1374 dumpBlock(Block);
1375 bumpIndent(-1);
1376 OS << Indent << "}\n";
1377 dumpEdges(Region);
1378}
1379
1380#endif
1381
1382/// Returns true if there is a vector loop region and \p VPV is defined in a
1383/// loop region.
1384static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1385 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1386 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1388}
1389
1394 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1395}
1396
1398 VPValue *New,
1399 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1400 // Note that this early exit is required for correctness; the implementation
1401 // below relies on the number of users for this VPValue to decrease, which
1402 // isn't the case if this == New.
1403 if (this == New)
1404 return;
1405
1406 for (unsigned J = 0; J < getNumUsers();) {
1407 VPUser *User = Users[J];
1408 bool RemovedUser = false;
1409 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1410 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1411 continue;
1412
1413 RemovedUser = true;
1414 User->setOperand(I, New);
1415 }
1416 // If a user got removed after updating the current user, the next user to
1417 // update will be moved to the current position, so we only need to
1418 // increment the index if the number of users did not change.
1419 if (!RemovedUser)
1420 J++;
1421 }
1422}
1423
1425 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1426 if (getOperand(Idx) == From)
1427 setOperand(Idx, To);
1428 }
1429}
1430
1431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1433 OS << Tracker.getOrCreateName(this);
1434}
1435
1438 Op->printAsOperand(O, SlotTracker);
1439 });
1440}
1441#endif
1442
1443void VPSlotTracker::assignName(const VPValue *V) {
1444 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1445 auto *UV = V->getUnderlyingValue();
1446 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe());
1447 if (!UV && !(VPI && !VPI->getName().empty())) {
1448 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1449 NextSlot++;
1450 return;
1451 }
1452
1453 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1454 // appending ".Number" to the name if there are multiple uses.
1455 std::string Name;
1456 if (UV)
1457 Name = getName(UV);
1458 else
1459 Name = VPI->getName();
1460
1461 assert(!Name.empty() && "Name cannot be empty.");
1462 StringRef Prefix = UV ? "ir<" : "vp<%";
1463 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1464
1465 // First assign the base name for V.
1466 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1467 // Integer or FP constants with different types will result in he same string
1468 // due to stripping types.
1469 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV))
1470 return;
1471
1472 // If it is already used by C > 0 other VPValues, increase the version counter
1473 // C and use it for V.
1474 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1475 if (!UseInserted) {
1476 C->second++;
1477 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1478 }
1479}
1480
1481void VPSlotTracker::assignNames(const VPlan &Plan) {
1482 if (Plan.VF.getNumUsers() > 0)
1483 assignName(&Plan.VF);
1484 if (Plan.VFxUF.getNumUsers() > 0)
1485 assignName(&Plan.VFxUF);
1486 assignName(&Plan.VectorTripCount);
1487 if (Plan.BackedgeTakenCount)
1488 assignName(Plan.BackedgeTakenCount);
1489 for (VPValue *LI : Plan.getLiveIns())
1490 assignName(LI);
1491
1492 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1493 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1494 for (const VPBasicBlock *VPBB :
1496 assignNames(VPBB);
1497}
1498
1499void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1500 for (const VPRecipeBase &Recipe : *VPBB)
1501 for (VPValue *Def : Recipe.definedValues())
1502 assignName(Def);
1503}
1504
1505std::string VPSlotTracker::getName(const Value *V) {
1506 std::string Name;
1507 raw_string_ostream S(Name);
1508 if (V->hasName() || !isa<Instruction>(V)) {
1509 V->printAsOperand(S, false);
1510 return Name;
1511 }
1512
1513 if (!MST) {
1514 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1515 // instruction.
1516 auto *I = cast<Instruction>(V);
1517 // This check is required to support unit tests with incomplete IR.
1518 if (I->getParent()) {
1519 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1520 MST->incorporateFunction(*I->getFunction());
1521 } else {
1522 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1523 }
1524 }
1525 V->printAsOperand(S, false, *MST);
1526 return Name;
1527}
1528
1529std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1530 std::string Name = VPValue2Name.lookup(V);
1531 if (!Name.empty())
1532 return Name;
1533
1534 // If no name was assigned, no VPlan was provided when creating the slot
1535 // tracker or it is not reachable from the provided VPlan. This can happen,
1536 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1537 // in a debugger.
1538 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1539 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1540 // here.
1541 const VPRecipeBase *DefR = V->getDefiningRecipe();
1542 (void)DefR;
1543 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1544 "VPValue defined by a recipe in a VPlan?");
1545
1546 // Use the underlying value's name, if there is one.
1547 if (auto *UV = V->getUnderlyingValue()) {
1548 std::string Name;
1549 raw_string_ostream S(Name);
1550 UV->printAsOperand(S, false);
1551 return (Twine("ir<") + Name + ">").str();
1552 }
1553
1554 return "<badref>";
1555}
1556
1558 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1559 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1560 bool PredicateAtRangeStart = Predicate(Range.Start);
1561
1562 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1563 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1564 Range.End = TmpVF;
1565 break;
1566 }
1567
1568 return PredicateAtRangeStart;
1569}
1570
1571/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1572/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1573/// of VF's starting at a given VF and extending it as much as possible. Each
1574/// vectorization decision can potentially shorten this sub-range during
1575/// buildVPlan().
1577 ElementCount MaxVF) {
1578 auto MaxVFTimes2 = MaxVF * 2;
1579 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1580 VFRange SubRange = {VF, MaxVFTimes2};
1581 if (auto Plan = tryToBuildVPlan(SubRange)) {
1583 // Update the name of the latch of the top-level vector loop region region
1584 // after optimizations which includes block folding.
1585 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1586 VPlans.push_back(std::move(Plan));
1587 }
1588 VF = SubRange.End;
1589 }
1590}
1591
1593 assert(count_if(VPlans,
1594 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1595 1 &&
1596 "Multiple VPlans for VF.");
1597
1598 for (const VPlanPtr &Plan : VPlans) {
1599 if (Plan->hasVF(VF))
1600 return *Plan.get();
1601 }
1602 llvm_unreachable("No plan found!");
1603}
1604
1607 // Reserve first location for self reference to the LoopID metadata node.
1608 MDs.push_back(nullptr);
1609 bool IsUnrollMetadata = false;
1610 MDNode *LoopID = L->getLoopID();
1611 if (LoopID) {
1612 // First find existing loop unrolling disable metadata.
1613 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1614 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1615 if (MD) {
1616 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1617 if (!S)
1618 continue;
1619 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1620 continue;
1621 IsUnrollMetadata =
1622 S->getString().starts_with("llvm.loop.unroll.disable");
1623 }
1624 MDs.push_back(LoopID->getOperand(I));
1625 }
1626 }
1627
1628 if (!IsUnrollMetadata) {
1629 // Add runtime unroll disable metadata.
1630 LLVMContext &Context = L->getHeader()->getContext();
1631 SmallVector<Metadata *, 1> DisableOperands;
1632 DisableOperands.push_back(
1633 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1634 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1635 MDs.push_back(DisableNode);
1636 MDNode *NewLoopID = MDNode::get(Context, MDs);
1637 // Set operand 0 to refer to the loop id itself.
1638 NewLoopID->replaceOperandWith(0, NewLoopID);
1639 L->setLoopID(NewLoopID);
1640 }
1641}
1642
1644 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1645 bool VectorizingEpilogue, MDNode *OrigLoopID,
1646 std::optional<unsigned> OrigAverageTripCount,
1647 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1648 bool DisableRuntimeUnroll) {
1649 // Update the metadata of the scalar loop. Skip the update when vectorizing
1650 // the epilogue loop to ensure it is updated only once. Also skip the update
1651 // when the scalar loop became unreachable.
1652 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1653 std::optional<MDNode *> RemainderLoopID =
1656 if (RemainderLoopID) {
1657 OrigLoop->setLoopID(*RemainderLoopID);
1658 } else {
1659 if (DisableRuntimeUnroll)
1661
1662 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1663 Hints.setAlreadyVectorized();
1664 }
1665 }
1666
1667 if (!VectorLoop)
1668 return;
1669
1670 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1671 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1673 VectorLoop->setLoopID(*VectorizedLoopID);
1674 } else {
1675 // Keep all loop hints from the original loop on the vector loop (we'll
1676 // replace the vectorizer-specific hints below).
1677 if (OrigLoopID)
1678 VectorLoop->setLoopID(OrigLoopID);
1679
1680 if (!VectorizingEpilogue) {
1681 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1682 Hints.setAlreadyVectorized();
1683 }
1684 }
1686 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1687 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1689
1690 // Set/update profile weights for the vector and remainder loops as original
1691 // loop iterations are now distributed among them. Note that original loop
1692 // becomes the scalar remainder loop after vectorization.
1693 //
1694 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1695 // end up getting slightly roughened result but that should be OK since
1696 // profile is not inherently precise anyway. Note also possible bypass of
1697 // vector code caused by legality checks is ignored, assigning all the weight
1698 // to the vector loop, optimistically.
1699 //
1700 // For scalable vectorization we can't know at compile time how many
1701 // iterations of the loop are handled in one vector iteration, so instead
1702 // use the value of vscale used for tuning.
1703 if (!OrigAverageTripCount)
1704 return;
1705 // Calculate number of iterations in unrolled loop.
1706 unsigned AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1707 // Calculate number of iterations for remainder loop.
1708 unsigned RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1709
1710 if (HeaderVPBB) {
1711 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1712 OrigLoopInvocationWeight);
1713 }
1714 if (Plan.getScalarPreheader()->hasPredecessors()) {
1715 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1716 OrigLoopInvocationWeight);
1717 }
1718}
1719
1720#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1722 if (VPlans.empty()) {
1723 O << "LV: No VPlans built.\n";
1724 return;
1725 }
1726 for (const auto &Plan : VPlans)
1728 Plan->printDOT(O);
1729 else
1730 Plan->print(O);
1731}
1732#endif
1733
1734bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1736 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1737 unsigned WideSize = C->getBitWidth();
1738 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1739 ? TruncatedVal.sext(WideSize)
1740 : TruncatedVal.zext(WideSize);
1741 return ExtendedVal == *C;
1742}
1743
1746 if (!V->isLiveIn())
1747 return {};
1748
1749 return TTI::getOperandInfo(V->getLiveInIRValue());
1750}
1751
1753 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1754 bool AlwaysIncludeReplicatingR) {
1755 if (VF.isScalar())
1756 return 0;
1757
1758 assert(!VF.isScalable() &&
1759 "Scalarization overhead not supported for scalable vectors");
1760
1761 InstructionCost ScalarizationCost = 0;
1762 // Compute the cost of scalarizing the result if needed.
1763 if (!ResultTy->isVoidTy()) {
1764 for (Type *VectorTy :
1765 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1766 ScalarizationCost += TTI.getScalarizationOverhead(
1768 /*Insert=*/true,
1769 /*Extract=*/false, CostKind);
1770 }
1771 }
1772 // Compute the cost of scalarizing the operands, skipping ones that do not
1773 // require extraction/scalarization and do not incur any overhead.
1774 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1776 for (auto *Op : Operands) {
1777 if (Op->isLiveIn() ||
1778 (!AlwaysIncludeReplicatingR &&
1781 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1782 !UniqueOperands.insert(Op).second)
1783 continue;
1784 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1785 }
1786 return ScalarizationCost +
1787 TTI.getOperandsScalarizationOverhead(Tys, CostKind);
1788}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:58
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file provides utility VPlan to VPlan transformations.
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1605
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:145
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:583
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:61
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1384
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:601
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:62
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1131
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:64
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:693
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:480
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:124
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:325
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:321
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1592
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1643
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1576
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1557
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1721
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
Metadata node.
Definition Metadata.h:1078
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:608
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
void insert_range(Range &&R)
Definition SetVector.h:175
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:804
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3823
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3898
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3850
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:493
iterator end()
Definition VPlan.h:3860
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3858
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:544
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:778
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:220
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:393
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:593
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:565
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3838
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:551
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:672
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:650
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:638
const VPRecipeBase & back() const
Definition VPlan.h:3872
bool empty() const
Definition VPlan.h:3869
size_t size() const
Definition VPlan.h:3868
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:190
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:660
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:212
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
VPlan * getPlan()
Definition VPlan.cpp:165
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:184
const std::string & getName() const
Definition VPlan.h:163
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:204
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:170
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:187
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:90
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:146
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:165
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
void dump() const
Dump the VPDef to stderr (for debugging).
Definition VPlan.cpp:126
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
Definition VPlan.h:3442
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3976
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4000
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:85
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
VPBasicBlock * getParent()
Definition VPlan.h:407
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4011
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:743
const VPBlockBase * getEntry() const
Definition VPlan.h:4047
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:847
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4079
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:804
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:833
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:756
const VPBlockBase * getExiting() const
Definition VPlan.h:4059
friend class VPlan
Definition VPlan.h:4012
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3713
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1529
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1424
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1436
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1390
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:135
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1432
friend class VPDef
Definition VPlanValue.h:49
Value * UnderlyingVal
Hold the underlying Value, if any, attached to this VPValue.
Definition VPlanValue.h:61
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:118
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:98
virtual ~VPValue()
Definition VPlan.cpp:104
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:111
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1393
unsigned getNumUsers() const
Definition VPlanValue.h:113
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1397
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition VPlanValue.h:65
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2129
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1264
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4141
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1122
friend class VPSlotTracker
Definition VPlan.h:4143
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1098
VPBasicBlock * getEntry()
Definition VPlan.h:4235
void setName(const Twine &newName)
Definition VPlan.h:4385
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:906
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:883
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:914
friend class VPlanPrinter
Definition VPlan.h:4142
unsigned getUF() const
Definition VPlan.h:4367
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1236
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4287
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1027
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1009
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4224
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4459
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1242
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1128
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4278
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:921
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4429
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1081
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4283
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1043
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1169
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:217
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2231
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1734
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
cl::opt< bool > EnableVPlanNativePath
Definition VPlan.cpp:56
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1745
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1752
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:267
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:227
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:361
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:339
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...