LLVM 23.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
21#include "VPlanCFG.h"
22#include "VPlanDominatorTree.h"
23#include "VPlanHelpers.h"
24#include "VPlanPatternMatch.h"
25#include "VPlanTransforms.h"
26#include "VPlanUtils.h"
28#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/Twine.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Type.h"
40#include "llvm/IR/Value.h"
43#include "llvm/Support/Debug.h"
49#include <cassert>
50#include <string>
51
52using namespace llvm;
53using namespace llvm::VPlanPatternMatch;
54
55namespace llvm {
57} // namespace llvm
58
59/// @{
60/// Metadata attribute names
61const char LLVMLoopVectorizeFollowupAll[] = "llvm.loop.vectorize.followup_all";
63 "llvm.loop.vectorize.followup_vectorized";
65 "llvm.loop.vectorize.followup_epilogue";
66/// @}
67
69
71 "vplan-print-in-dot-format", cl::Hidden,
72 cl::desc("Use dot format instead of plain text when dumping VPlans"));
73
74#define DEBUG_TYPE "loop-vectorize"
75
76#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
78 const VPBasicBlock *Parent = R.getParent();
79 VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr);
80 R.print(OS, "", SlotTracker);
81 return OS;
82}
83#endif
84
86 const ElementCount &VF) const {
87 switch (LaneKind) {
89 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
90 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
91 Builder.getInt32(VF.getKnownMinValue() - Lane));
93 return Builder.getInt32(Lane);
94 }
95 llvm_unreachable("Unknown lane kind");
96}
97
98#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
100 if (const VPRecipeBase *R = getDefiningRecipe())
101 R->print(OS, "", SlotTracker);
102 else
104}
105
106void VPValue::dump() const {
107 const VPRecipeBase *Instr = getDefiningRecipe();
109 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
111 dbgs() << "\n";
112}
113
114void VPRecipeBase::dump() const {
115 VPSlotTracker SlotTracker(getParent() ? getParent()->getPlan() : nullptr);
116 print(dbgs(), "", SlotTracker);
117 dbgs() << "\n";
118}
119#endif
120
121#if !defined(NDEBUG)
122bool VPRecipeValue::isDefinedBy(const VPDef *D) const { return Def == D; }
123#endif
124
126 auto *DefValue = dyn_cast<VPRecipeValue>(this);
127 return DefValue ? DefValue->Def : nullptr;
128}
129
131 auto *DefValue = dyn_cast<VPRecipeValue>(this);
132 return DefValue ? DefValue->Def : nullptr;
133}
134
136 return cast<VPIRValue>(this)->getValue();
137}
138
140
142 : VPValue(VPVRecipeValueSC, UV), Def(Def) {
143 assert(Def && "VPRecipeValue requires a defining recipe");
144 Def->addDefinedValue(this);
145}
146
148 assert(Users.empty() &&
149 "trying to delete a VPRecipeValue with remaining users");
150 Def->removeDefinedValue(this);
151}
152
153// Get the top-most entry block of \p Start. This is the entry block of the
154// containing VPlan. This function is templated to support both const and non-const blocks
155template <typename T> static T *getPlanEntry(T *Start) {
156 T *Next = Start;
157 T *Current = Start;
158 while ((Next = Next->getParent()))
159 Current = Next;
160
161 SmallSetVector<T *, 8> WorkList;
162 WorkList.insert(Current);
163
164 for (unsigned i = 0; i < WorkList.size(); i++) {
165 T *Current = WorkList[i];
166 if (!Current->hasPredecessors())
167 return Current;
168 auto &Predecessors = Current->getPredecessors();
169 WorkList.insert_range(Predecessors);
170 }
171
172 llvm_unreachable("VPlan without any entry node without predecessors");
173}
174
175VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
176
177const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
178
179/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
186
193
194void VPBlockBase::setPlan(VPlan *ParentPlan) {
195 assert(ParentPlan->getEntry() == this && "Can only set plan on its entry.");
196 Plan = ParentPlan;
197}
198
199/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
201 const VPBlockBase *Block = this;
203 Block = Region->getExiting();
205}
206
213
215 if (!Successors.empty() || !Parent)
216 return this;
217 assert(Parent->getExiting() == this &&
218 "Block w/o successors not the exiting block of its parent.");
219 return Parent->getEnclosingBlockWithSuccessors();
220}
221
223 if (!Predecessors.empty() || !Parent)
224 return this;
225 assert(Parent->getEntry() == this &&
226 "Block w/o predecessors not the entry of its parent.");
227 return Parent->getEnclosingBlockWithPredecessors();
228}
229
231 iterator It = begin();
232 while (It != end() && It->isPhi())
233 It++;
234 return It;
235}
236
244
247 return Def->getUnderlyingValue();
248
249 if (hasScalarValue(Def, Lane))
250 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
251
252 if (!Lane.isFirstLane() && vputils::isSingleScalar(Def) &&
254 return Data.VPV2Scalars[Def][0];
255 }
256
257 // Look through BuildVector to avoid redundant extracts.
258 // TODO: Remove once replicate regions are unrolled explicitly.
259 if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) {
260 auto *BuildVector = cast<VPInstruction>(Def);
261 return get(BuildVector->getOperand(Lane.getKnownLane()), true);
262 }
263
265 auto *VecPart = Data.VPV2Vector[Def];
266 if (!VecPart->getType()->isVectorTy()) {
267 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
268 return VecPart;
269 }
270 // TODO: Cache created scalar values.
271 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
272 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
273 // set(Def, Extract, Instance);
274 return Extract;
275}
276
277Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
278 if (NeedsScalar) {
279 assert((VF.isScalar() || isa<VPIRValue, VPSymbolicValue>(Def) ||
281 (hasScalarValue(Def, VPLane(0)) &&
282 Data.VPV2Scalars[Def].size() == 1)) &&
283 "Trying to access a single scalar per part but has multiple scalars "
284 "per part.");
285 return get(Def, VPLane(0));
286 }
287
288 // If Values have been set for this Def return the one relevant for \p Part.
289 if (hasVectorValue(Def))
290 return Data.VPV2Vector[Def];
291
292 auto GetBroadcastInstrs = [this](Value *V) {
293 if (VF.isScalar())
294 return V;
295 // Broadcast the scalar into all locations in the vector.
296 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
297 return Shuf;
298 };
299
300 if (!hasScalarValue(Def, {0})) {
301 Value *IRV = Def->getLiveInIRValue();
302 Value *B = GetBroadcastInstrs(IRV);
303 set(Def, B);
304 return B;
305 }
306
307 Value *ScalarValue = get(Def, VPLane(0));
308 // If we aren't vectorizing, we can just copy the scalar map values over
309 // to the vector map.
310 if (VF.isScalar()) {
311 set(Def, ScalarValue);
312 return ScalarValue;
313 }
314
315 bool IsSingleScalar = vputils::isSingleScalar(Def);
316 VPLane LastLane(IsSingleScalar ? 0 : VF.getFixedValue() - 1);
317
318 // We need to construct the vector value for a single-scalar value by
319 // broadcasting the scalar to all lanes.
320 // TODO: Replace by introducing Broadcast VPInstructions.
321 assert(IsSingleScalar && "must be a single-scalar at this point");
322 // Set the insert point after the last scalarized instruction or after the
323 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
324 // will directly follow the scalar definitions.
325 auto OldIP = Builder.saveIP();
326 auto *LastInst = cast<Instruction>(get(Def, LastLane));
327 auto NewIP = isa<PHINode>(LastInst)
328 ? LastInst->getParent()->getFirstNonPHIIt()
329 : std::next(BasicBlock::iterator(LastInst));
330 Builder.SetInsertPoint(&*NewIP);
331 Value *VectorValue = GetBroadcastInstrs(ScalarValue);
332 set(Def, VectorValue);
333 Builder.restoreIP(OldIP);
334 return VectorValue;
335}
336
338 const DILocation *DIL = DL;
339 // When a FSDiscriminator is enabled, we don't need to add the multiply
340 // factors to the discriminators.
341 if (DIL &&
342 Builder.GetInsertBlock()
343 ->getParent()
344 ->shouldEmitDebugInfoForProfiling() &&
346 // FIXME: For scalable vectors, assume vscale=1.
347 unsigned UF = Plan->getUF();
348 auto NewDIL =
349 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
350 if (NewDIL)
351 Builder.SetCurrentDebugLocation(*NewDIL);
352 else
353 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
354 << DIL->getFilename() << " Line: " << DIL->getLine());
355 } else
356 Builder.SetCurrentDebugLocation(DL);
357}
358
360 Value *WideValue,
361 const VPLane &Lane) {
362 Value *ScalarInst = get(Def, Lane);
363 Value *LaneExpr = Lane.getAsRuntimeExpr(Builder, VF);
364 if (auto *StructTy = dyn_cast<StructType>(WideValue->getType())) {
365 // We must handle each element of a vectorized struct type.
366 for (unsigned I = 0, E = StructTy->getNumElements(); I != E; I++) {
367 Value *ScalarValue = Builder.CreateExtractValue(ScalarInst, I);
368 Value *VectorValue = Builder.CreateExtractValue(WideValue, I);
369 VectorValue =
370 Builder.CreateInsertElement(VectorValue, ScalarValue, LaneExpr);
371 WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
372 }
373 } else {
374 WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
375 }
376 return WideValue;
377}
378
379BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
380 auto &CFG = State.CFG;
381 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
382 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
383 BasicBlock *PrevBB = CFG.PrevBB;
384 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
385 PrevBB->getParent(), CFG.ExitBB);
386 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
387
388 return NewBB;
389}
390
392 auto &CFG = State.CFG;
393 BasicBlock *NewBB = CFG.VPBB2IRBB[this];
394
395 // Register NewBB in its loop. In innermost loops its the same for all
396 // BB's.
397 Loop *ParentLoop = State.CurrentParentLoop;
398 // If this block has a sole successor that is an exit block or is an exit
399 // block itself then it needs adding to the same parent loop as the exit
400 // block.
401 VPBlockBase *SuccOrExitVPB = getSingleSuccessor();
402 SuccOrExitVPB = SuccOrExitVPB ? SuccOrExitVPB : this;
403 if (State.Plan->isExitBlock(SuccOrExitVPB)) {
404 ParentLoop = State.LI->getLoopFor(
405 cast<VPIRBasicBlock>(SuccOrExitVPB)->getIRBasicBlock());
406 }
407
408 if (ParentLoop && !State.LI->getLoopFor(NewBB))
409 ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
410
412 if (VPBlockUtils::isHeader(this, State.VPDT)) {
413 // There's no block for the latch yet, connect to the preheader only.
414 Preds = {getPredecessors()[0]};
415 } else {
416 Preds = to_vector(getPredecessors());
417 }
418
419 // Hook up the new basic block to its predecessors.
420 for (VPBlockBase *PredVPBlock : Preds) {
421 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
422 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
423 assert(CFG.VPBB2IRBB.contains(PredVPBB) &&
424 "Predecessor basic-block not found building successor.");
425 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
426 auto *PredBBTerminator = PredBB->getTerminator();
427 LLVM_DEBUG(dbgs() << "LV: draw edge from " << PredBB->getName() << '\n');
428
429 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
430 if (isa<UnreachableInst>(PredBBTerminator)) {
431 assert(PredVPSuccessors.size() == 1 &&
432 "Predecessor ending w/o branch must have single successor.");
433 DebugLoc DL = PredBBTerminator->getDebugLoc();
434 PredBBTerminator->eraseFromParent();
435 auto *Br = BranchInst::Create(NewBB, PredBB);
436 Br->setDebugLoc(DL);
437 } else if (TermBr && !TermBr->isConditional()) {
438 TermBr->setSuccessor(0, NewBB);
439 } else {
440 // Set each forward successor here when it is created, excluding
441 // backedges. A backward successor is set when the branch is created.
442 // Branches to VPIRBasicBlocks must have the same successors in VPlan as
443 // in the original IR, except when the predecessor is the entry block.
444 // This enables including SCEV and memory runtime check blocks in VPlan.
445 // TODO: Remove exception by modeling the terminator of entry block using
446 // BranchOnCond.
447 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
448 assert((TermBr && (!TermBr->getSuccessor(idx) ||
449 (isa<VPIRBasicBlock>(this) &&
450 (TermBr->getSuccessor(idx) == NewBB ||
451 PredVPBlock == getPlan()->getEntry())))) &&
452 "Trying to reset an existing successor block.");
453 TermBr->setSuccessor(idx, NewBB);
454 }
455 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}});
456 }
457}
458
461 "VPIRBasicBlock can have at most two successors at the moment!");
462 // Move completely disconnected blocks to their final position.
463 if (IRBB->hasNPredecessors(0) && succ_begin(IRBB) == succ_end(IRBB))
464 IRBB->moveAfter(State->CFG.PrevBB);
465 State->Builder.SetInsertPoint(IRBB->getTerminator());
466 State->CFG.PrevBB = IRBB;
467 State->CFG.VPBB2IRBB[this] = IRBB;
468 executeRecipes(State, IRBB);
469 // Create a branch instruction to terminate IRBB if one was not created yet
470 // and is needed.
471 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) {
472 auto *Br = State->Builder.CreateBr(IRBB);
473 Br->setOperand(0, nullptr);
474 IRBB->getTerminator()->eraseFromParent();
475 } else {
476 assert(
477 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
478 "other blocks must be terminated by a branch");
479 }
480
481 connectToPredecessors(*State);
482}
483
484VPIRBasicBlock *VPIRBasicBlock::clone() {
485 auto *NewBlock = getPlan()->createEmptyVPIRBasicBlock(IRBB);
486 for (VPRecipeBase &R : Recipes)
487 NewBlock->appendRecipe(R.clone());
488 return NewBlock;
489}
490
492 bool Replica = bool(State->Lane);
493 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
494
495 if (VPBlockUtils::isHeader(this, State->VPDT)) {
496 // Create and register the new vector loop.
497 Loop *PrevParentLoop = State->CurrentParentLoop;
498 State->CurrentParentLoop = State->LI->AllocateLoop();
499
500 // Insert the new loop into the loop nest and register the new basic blocks
501 // before calling any utilities such as SCEV that require valid LoopInfo.
502 if (PrevParentLoop)
503 PrevParentLoop->addChildLoop(State->CurrentParentLoop);
504 else
505 State->LI->addTopLevelLoop(State->CurrentParentLoop);
506 }
507
508 auto IsReplicateRegion = [](VPBlockBase *BB) {
510 assert((!R || R->isReplicator()) &&
511 "only replicate region blocks should remain");
512 return R;
513 };
514 // 1. Create an IR basic block.
515 if ((Replica && this == getParent()->getEntry()) ||
516 IsReplicateRegion(getSingleHierarchicalPredecessor())) {
517 // Reuse the previous basic block if the current VPBB is either
518 // * the entry to a replicate region, or
519 // * the exit of a replicate region.
520 State->CFG.VPBB2IRBB[this] = NewBB;
521 } else {
522 NewBB = createEmptyBasicBlock(*State);
523
524 State->Builder.SetInsertPoint(NewBB);
525 // Temporarily terminate with unreachable until CFG is rewired.
526 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
527 State->Builder.SetInsertPoint(Terminator);
528
529 State->CFG.PrevBB = NewBB;
530 State->CFG.VPBB2IRBB[this] = NewBB;
531 connectToPredecessors(*State);
532 }
533
534 // 2. Fill the IR basic block with IR instructions.
535 executeRecipes(State, NewBB);
536
537 // If this block is a latch, update CurrentParentLoop.
538 if (VPBlockUtils::isLatch(this, State->VPDT))
539 State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
540}
541
542VPBasicBlock *VPBasicBlock::clone() {
543 auto *NewBlock = getPlan()->createVPBasicBlock(getName());
544 for (VPRecipeBase &R : *this)
545 NewBlock->appendRecipe(R.clone());
546 return NewBlock;
547}
548
550 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB: " << getName()
551 << " in BB: " << BB->getName() << '\n');
552
553 State->CFG.PrevVPBB = this;
554
555 for (VPRecipeBase &Recipe : Recipes) {
556 State->setDebugLocFrom(Recipe.getDebugLoc());
557 Recipe.execute(*State);
558 }
559
560 LLVM_DEBUG(dbgs() << "LV: filled BB: " << *BB);
561}
562
563VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
564 assert((SplitAt == end() || SplitAt->getParent() == this) &&
565 "can only split at a position in the same block");
566
567 // Create new empty block after the block to split.
568 auto *SplitBlock = getPlan()->createVPBasicBlock(getName() + ".split");
570
571 // Finally, move the recipes starting at SplitAt to new block.
572 for (VPRecipeBase &ToMove :
573 make_early_inc_range(make_range(SplitAt, this->end())))
574 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
575
576 return SplitBlock;
577}
578
579/// Return the enclosing loop region for region \p P. The templated version is
580/// used to support both const and non-const block arguments.
581template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) {
582 if (P && P->isReplicator()) {
583 P = P->getParent();
584 // Multiple loop regions can be nested, but replicate regions can only be
585 // nested inside a loop region or must be outside any other region.
586 assert((!P || !P->isReplicator()) && "unexpected nested replicate regions");
587 }
588 return P;
589}
590
594
598
599static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
600 if (VPBB->empty()) {
601 assert(
602 VPBB->getNumSuccessors() < 2 &&
603 "block with multiple successors doesn't have a recipe as terminator");
604 return false;
605 }
606
607 const VPRecipeBase *R = &VPBB->back();
608 [[maybe_unused]] bool IsSwitch =
610 cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
611 [[maybe_unused]] bool IsBranchOnTwoConds = match(R, m_BranchOnTwoConds());
612 [[maybe_unused]] bool IsCondBranch =
615 if (VPBB->getNumSuccessors() == 2 ||
616 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
617 assert((IsCondBranch || IsSwitch || IsBranchOnTwoConds) &&
618 "block with multiple successors not terminated by "
619 "conditional branch nor switch recipe");
620
621 return true;
622 }
623
624 if (VPBB->getNumSuccessors() > 2) {
625 assert((IsSwitch || IsBranchOnTwoConds) &&
626 "block with more than 2 successors not terminated by a switch or "
627 "branch-on-two-conds recipe");
628 return true;
629 }
630
631 assert(
632 !IsCondBranch && !IsBranchOnTwoConds &&
633 "block with 0 or 1 successors terminated by conditional branch recipe");
634 return false;
635}
636
638 if (hasConditionalTerminator(this))
639 return &back();
640 return nullptr;
641}
642
644 if (hasConditionalTerminator(this))
645 return &back();
646 return nullptr;
647}
648
650 return getParent() && getParent()->getExitingBasicBlock() == this;
651}
652
653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
658
659void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
660 if (getSuccessors().empty()) {
661 O << Indent << "No successors\n";
662 } else {
663 O << Indent << "Successor(s): ";
664 ListSeparator LS;
665 for (auto *Succ : getSuccessors())
666 O << LS << Succ->getName();
667 O << '\n';
668 }
669}
670
671void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
672 VPSlotTracker &SlotTracker) const {
673 O << Indent << getName() << ":\n";
674
675 auto RecipeIndent = Indent + " ";
676 for (const VPRecipeBase &Recipe : *this) {
677 Recipe.print(O, RecipeIndent, SlotTracker);
678 O << '\n';
679 }
680
681 printSuccessors(O, Indent);
682}
683#endif
684
685static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry);
686
687// Clone the CFG for all nodes reachable from \p Entry, this includes cloning
688// the blocks and their recipes. Operands of cloned recipes will NOT be updated.
689// Remapping of operands must be done separately. Returns a pair with the new
690// entry and exiting blocks of the cloned region. If \p Entry isn't part of a
691// region, return nullptr for the exiting block.
692static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
694 VPBlockBase *Exiting = nullptr;
695 bool InRegion = Entry->getParent();
696 // First, clone blocks reachable from Entry.
697 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
698 VPBlockBase *NewBB = BB->clone();
699 Old2NewVPBlocks[BB] = NewBB;
700 if (InRegion && BB->getNumSuccessors() == 0) {
701 assert(!Exiting && "Multiple exiting blocks?");
702 Exiting = BB;
703 }
704 }
705 assert((!InRegion || Exiting) && "regions must have a single exiting block");
706
707 // Second, update the predecessors & successors of the cloned blocks.
708 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) {
709 VPBlockBase *NewBB = Old2NewVPBlocks[BB];
711 for (VPBlockBase *Pred : BB->getPredecessors()) {
712 NewPreds.push_back(Old2NewVPBlocks[Pred]);
713 }
714 NewBB->setPredecessors(NewPreds);
716 for (VPBlockBase *Succ : BB->successors()) {
717 NewSuccs.push_back(Old2NewVPBlocks[Succ]);
718 }
719 NewBB->setSuccessors(NewSuccs);
720 }
721
722#if !defined(NDEBUG)
723 // Verify that the order of predecessors and successors matches in the cloned
724 // version.
725 for (const auto &[OldBB, NewBB] :
727 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) {
728 for (const auto &[OldPred, NewPred] :
729 zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
730 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
731
732 for (const auto &[OldSucc, NewSucc] :
733 zip(OldBB->successors(), NewBB->successors()))
734 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
735 }
736#endif
737
738 return std::make_pair(Old2NewVPBlocks[Entry],
739 Exiting ? Old2NewVPBlocks[Exiting] : nullptr);
740}
741
742VPRegionBlock *VPRegionBlock::clone() {
743 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
744 VPlan &Plan = *getPlan();
745 VPRegionBlock *NewRegion =
747 ? Plan.createReplicateRegion(NewEntry, NewExiting, getName())
748 : Plan.createLoopRegion(getName(), NewEntry, NewExiting);
749
750 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
751 Block->setParent(NewRegion);
752 return NewRegion;
753}
754
757 "Loop regions should have been lowered to plain CFG");
758 assert(!State->Lane && "Replicating a Region with non-null instance.");
759 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
760
762 Entry);
763 State->Lane = VPLane(0);
764 for (unsigned Lane = 0, VF = State->VF.getFixedValue(); Lane < VF; ++Lane) {
765 State->Lane = VPLane(Lane, VPLane::Kind::First);
766 // Visit the VPBlocks connected to \p this, starting from it.
767 for (VPBlockBase *Block : RPOT) {
768 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
769 Block->execute(State);
770 }
771 }
772
773 // Exit replicating mode.
774 State->Lane.reset();
775}
776
779 for (VPRecipeBase &R : Recipes)
780 Cost += R.cost(VF, Ctx);
781 return Cost;
782}
783
784const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const {
785 const VPBlockBase *Pred = nullptr;
786 if (hasPredecessors()) {
787 Pred = getPredecessors()[Idx];
788 } else {
789 auto *Region = getParent();
790 assert(Region && !Region->isReplicator() && Region->getEntry() == this &&
791 "must be in the entry block of a non-replicate region");
792 assert(Idx < 2 && Region->getNumPredecessors() == 1 &&
793 "loop region has a single predecessor (preheader), its entry block "
794 "has 2 incoming blocks");
795
796 // Idx == 0 selects the predecessor of the region, Idx == 1 selects the
797 // region itself whose exiting block feeds the phi across the backedge.
798 Pred = Idx == 0 ? Region->getSinglePredecessor() : Region;
799 }
800 return Pred->getExitingBasicBlock();
801}
802
804 if (!isReplicator()) {
807 Cost += Block->cost(VF, Ctx);
808 InstructionCost BackedgeCost =
809 ForceTargetInstructionCost.getNumOccurrences()
810 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences())
811 : Ctx.TTI.getCFInstrCost(Instruction::Br, Ctx.CostKind);
812 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF
813 << ": vector loop backedge\n");
814 Cost += BackedgeCost;
815 return Cost;
816 }
817
818 // Compute the cost of a replicate region. Replicating isn't supported for
819 // scalable vectors, return an invalid cost for them.
820 // TODO: Discard scalable VPlans with replicate recipes earlier after
821 // construction.
822 if (VF.isScalable())
824
825 // Compute and return the cost of the conditionally executed recipes.
826 assert(VF.isVector() && "Can only compute vector cost at the moment.");
828 return Then->cost(VF, Ctx);
829}
830
831#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
833 VPSlotTracker &SlotTracker) const {
834 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
835 auto NewIndent = Indent + " ";
836 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
837 O << '\n';
838 BlockBase->print(O, NewIndent, SlotTracker);
839 }
840 O << Indent << "}\n";
841
842 printSuccessors(O, Indent);
843}
844#endif
845
847 auto *Header = cast<VPBasicBlock>(getEntry());
848 if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
849 assert(this == getPlan()->getVectorLoopRegion() &&
850 "Canonical IV must be in the entry of the top-level loop region");
851 auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
852 {CanIV->getStartValue(), CanIV->getBackedgeValue()},
853 CanIV->getDebugLoc(), "index");
854 CanIV->replaceAllUsesWith(ScalarR);
855 CanIV->eraseFromParent();
856 }
857
858 VPBlockBase *Preheader = getSinglePredecessor();
859 auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
860
861 VPBlockUtils::disconnectBlocks(Preheader, this);
862
863 for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
864 VPB->setParent(getParent());
865
866 VPBlockUtils::connectBlocks(Preheader, Header);
867 VPBlockUtils::transferSuccessors(this, ExitingLatch);
868 VPBlockUtils::connectBlocks(ExitingLatch, Header);
869}
870
871VPlan::VPlan(Loop *L) {
872 setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
873 ScalarHeader = createVPIRBasicBlock(L->getHeader());
874
875 SmallVector<BasicBlock *> IRExitBlocks;
876 L->getUniqueExitBlocks(IRExitBlocks);
877 for (BasicBlock *EB : IRExitBlocks)
878 ExitBlocks.push_back(createVPIRBasicBlock(EB));
879}
880
882 VPSymbolicValue DummyValue;
883
884 for (auto *VPB : CreatedBlocks) {
885 if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
886 // Replace all operands of recipes and all VPValues defined in VPBB with
887 // DummyValue so the block can be deleted.
888 for (VPRecipeBase &R : *VPBB) {
889 for (auto *Def : R.definedValues())
890 Def->replaceAllUsesWith(&DummyValue);
891
892 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
893 R.setOperand(I, &DummyValue);
894 }
895 }
896 delete VPB;
897 }
898 for (VPValue *VPV : getLiveIns())
899 delete VPV;
900 delete BackedgeTakenCount;
901}
902
904 auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) {
905 return VPIRBB->getIRBasicBlock() == IRBB;
906 });
907 assert(Iter != getExitBlocks().end() && "no exit block found");
908 return *Iter;
909}
910
912 return is_contained(ExitBlocks, VPBB);
913}
914
915/// To make RUN_VPLAN_PASS print final VPlan.
916static void printFinalVPlan(VPlan &) {}
917
918/// Generate the code inside the preheader and body of the vectorized loop.
919/// Assumes a single pre-header basic-block was created for this. Introduce
920/// additional basic-blocks as needed, and fill them all.
922 // Initialize CFG state.
923 State->CFG.PrevVPBB = nullptr;
924 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
925
926 // Update VPDominatorTree since VPBasicBlock may be removed after State was
927 // constructed.
928 State->VPDT.recalculate(*this);
929
930 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
931 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
932 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
933 State->CFG.DTU.applyUpdates(
934 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});
935
936 LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF
937 << ", UF=" << getUF() << '\n');
938 setName("Final VPlan");
939 // TODO: RUN_VPLAN_PASS/VPlanTransforms::runPass should automatically dump
940 // VPlans after some specific stages when "-debug" is specified, but that
941 // hasn't been implemented yet. For now, just do both:
942 LLVM_DEBUG(dump());
944
945 BasicBlock *ScalarPh = State->CFG.ExitBB;
946 VPBasicBlock *ScalarPhVPBB = getScalarPreheader();
947 if (ScalarPhVPBB->hasPredecessors()) {
948 // Disconnect scalar preheader and scalar header, as the dominator tree edge
949 // will be updated as part of VPlan execution. This allows keeping the DTU
950 // logic generic during VPlan execution.
951 State->CFG.DTU.applyUpdates(
952 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
953 }
955 Entry);
956 // Generate code for the VPlan, in parts of the vector skeleton, loop body and
957 // successor blocks including the middle, exit and scalar preheader blocks.
958 for (VPBlockBase *Block : RPOT)
959 Block->execute(State);
960
961 // If the original loop is unreachable, delete it and all its blocks.
962 if (!ScalarPhVPBB->hasPredecessors()) {
963 // DeleteDeadBlocks will remove single-entry phis. Remove them from the exit
964 // VPIRBBs in VPlan as well, otherwise we would retain references to deleted
965 // IR instructions.
966 for (VPIRBasicBlock *EB : getExitBlocks()) {
967 for (VPRecipeBase &R : make_early_inc_range(EB->phis())) {
968 if (R.getNumOperands() == 1)
969 R.eraseFromParent();
970 }
971 }
972
973 Loop *OrigLoop =
974 State->LI->getLoopFor(getScalarHeader()->getIRBasicBlock());
975 auto Blocks = OrigLoop->getBlocksVector();
976 Blocks.push_back(cast<VPIRBasicBlock>(ScalarPhVPBB)->getIRBasicBlock());
977 for (auto *BB : Blocks)
978 State->LI->removeBlock(BB);
979 DeleteDeadBlocks(Blocks, &State->CFG.DTU);
980 State->LI->erase(OrigLoop);
981 }
982
983 State->CFG.DTU.flush();
984
985 VPBasicBlock *Header = vputils::getFirstLoopHeader(*this, State->VPDT);
986 if (!Header)
987 return;
988
989 auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
990 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
991
992 // Fix the latch value of canonical, reduction and first-order recurrences
993 // phis in the vector loop.
994 for (VPRecipeBase &R : Header->phis()) {
995 // Skip phi-like recipes that generate their backedege values themselves.
996 if (isa<VPWidenPHIRecipe>(&R))
997 continue;
998
999 auto *PhiR = cast<VPSingleDefRecipe>(&R);
1000 // VPInstructions currently model scalar Phis only.
1001 bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1003 cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1004
1005 Value *Phi = State->get(PhiR, NeedsScalar);
1006 // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1007 // not.
1008 Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1009 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1010 }
1011}
1012
1014 // For now only return the cost of the vector loop region, ignoring any other
1015 // blocks, like the preheader or middle blocks, expect for checking them for
1016 // recipes with invalid costs.
1018
1019 // If the cost of the loop region is invalid or any recipe in the skeleton
1020 // outside loop regions are invalid return an invalid cost.
1023 [&VF, &Ctx](VPBasicBlock *VPBB) {
1024 return !VPBB->cost(VF, Ctx).isValid();
1025 }))
1027
1028 return Cost;
1029}
1030
1032 // TODO: Cache if possible.
1034 if (auto *R = dyn_cast<VPRegionBlock>(B))
1035 return R->isReplicator() ? nullptr : R;
1036 return nullptr;
1037}
1038
1041 if (auto *R = dyn_cast<VPRegionBlock>(B))
1042 return R->isReplicator() ? nullptr : R;
1043 return nullptr;
1044}
1045
1046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1049
1050 if (VF.getNumUsers() > 0) {
1051 O << "\nLive-in ";
1052 VF.printAsOperand(O, SlotTracker);
1053 O << " = VF";
1054 }
1055
1056 if (VFxUF.getNumUsers() > 0) {
1057 O << "\nLive-in ";
1058 VFxUF.printAsOperand(O, SlotTracker);
1059 O << " = VF * UF";
1060 }
1061
1062 if (VectorTripCount.getNumUsers() > 0) {
1063 O << "\nLive-in ";
1064 VectorTripCount.printAsOperand(O, SlotTracker);
1065 O << " = vector-trip-count";
1066 }
1067
1068 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
1069 O << "\nLive-in ";
1070 BackedgeTakenCount->printAsOperand(O, SlotTracker);
1071 O << " = backedge-taken count";
1072 }
1073
1074 O << "\n";
1075 if (TripCount) {
1076 if (isa<VPIRValue>(TripCount))
1077 O << "Live-in ";
1078 TripCount->printAsOperand(O, SlotTracker);
1079 O << " = original trip-count";
1080 O << "\n";
1081 }
1082}
1083
1087
1088 O << "VPlan '" << getName() << "' {";
1089
1090 printLiveIns(O);
1091
1093 RPOT(getEntry());
1094 for (const VPBlockBase *Block : RPOT) {
1095 O << '\n';
1096 Block->print(O, "", SlotTracker);
1097 }
1098
1099 O << "}\n";
1100}
1101
1102std::string VPlan::getName() const {
1103 std::string Out;
1104 raw_string_ostream RSO(Out);
1105 RSO << Name << " for ";
1106 if (!VFs.empty()) {
1107 RSO << "VF={" << VFs[0];
1108 for (ElementCount VF : drop_begin(VFs))
1109 RSO << "," << VF;
1110 RSO << "},";
1111 }
1112
1113 if (UFs.empty()) {
1114 RSO << "UF>=1";
1115 } else {
1116 RSO << "UF={" << UFs[0];
1117 for (unsigned UF : drop_begin(UFs))
1118 RSO << "," << UF;
1119 RSO << "}";
1120 }
1121
1122 return Out;
1123}
1124
1127 VPlanPrinter Printer(O, *this);
1128 Printer.dump();
1129}
1130
1132void VPlan::dump() const { print(dbgs()); }
1133#endif
1134
1135static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
1136 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
1137 // Update the operands of all cloned recipes starting at NewEntry. This
1138 // traverses all reachable blocks. This is done in two steps, to handle cycles
1139 // in PHI recipes.
1141 OldDeepRPOT(Entry);
1143 NewDeepRPOT(NewEntry);
1144 // First, collect all mappings from old to new VPValues defined by cloned
1145 // recipes.
1146 for (const auto &[OldBB, NewBB] :
1149 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
1150 "blocks must have the same number of recipes");
1151 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
1152 assert(OldR.getNumOperands() == NewR.getNumOperands() &&
1153 "recipes must have the same number of operands");
1154 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
1155 "recipes must define the same number of operands");
1156 for (const auto &[OldV, NewV] :
1157 zip(OldR.definedValues(), NewR.definedValues()))
1158 Old2NewVPValues[OldV] = NewV;
1159 }
1160 }
1161
1162 // Update all operands to use cloned VPValues.
1163 for (VPBasicBlock *NewBB :
1165 for (VPRecipeBase &NewR : *NewBB)
1166 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
1167 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
1168 NewR.setOperand(I, NewOp);
1169 }
1170 }
1171}
1172
1174 unsigned NumBlocksBeforeCloning = CreatedBlocks.size();
1175 // Clone blocks.
1176 const auto &[NewEntry, __] = cloneFrom(Entry);
1177
1178 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
1179 VPIRBasicBlock *NewScalarHeader = nullptr;
1180 if (getScalarHeader()->hasPredecessors()) {
1181 NewScalarHeader = cast<VPIRBasicBlock>(*find_if(
1182 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) {
1183 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB);
1184 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
1185 }));
1186 } else {
1187 NewScalarHeader = createVPIRBasicBlock(ScalarHeaderIRBB);
1188 }
1189 // Create VPlan, clone live-ins and remap operands in the cloned blocks.
1190 auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
1191 DenseMap<VPValue *, VPValue *> Old2NewVPValues;
1192 for (VPIRValue *OldLiveIn : getLiveIns())
1193 Old2NewVPValues[OldLiveIn] = NewPlan->getOrAddLiveIn(OldLiveIn);
1194 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
1195 Old2NewVPValues[&VF] = &NewPlan->VF;
1196 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
1197 if (BackedgeTakenCount) {
1198 NewPlan->BackedgeTakenCount = new VPSymbolicValue();
1199 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
1200 }
1201 if (auto *TripCountIRV = dyn_cast_or_null<VPIRValue>(TripCount))
1202 Old2NewVPValues[TripCountIRV] = NewPlan->getOrAddLiveIn(TripCountIRV);
1203 // else NewTripCount will be created and inserted into Old2NewVPValues when
1204 // TripCount is cloned. In any case NewPlan->TripCount is updated below.
1205
1206 remapOperands(Entry, NewEntry, Old2NewVPValues);
1207
1208 // Initialize remaining fields of cloned VPlan.
1209 NewPlan->VFs = VFs;
1210 NewPlan->UFs = UFs;
1211 // TODO: Adjust names.
1212 NewPlan->Name = Name;
1213 if (TripCount) {
1214 assert(Old2NewVPValues.contains(TripCount) &&
1215 "TripCount must have been added to Old2NewVPValues");
1216 NewPlan->TripCount = Old2NewVPValues[TripCount];
1217 }
1218
1219 // Transfer all cloned blocks (the second half of all current blocks) from
1220 // current to new VPlan.
1221 unsigned NumBlocksAfterCloning = CreatedBlocks.size();
1222 for (unsigned I :
1223 seq<unsigned>(NumBlocksBeforeCloning, NumBlocksAfterCloning))
1224 NewPlan->CreatedBlocks.push_back(this->CreatedBlocks[I]);
1225 CreatedBlocks.truncate(NumBlocksBeforeCloning);
1226
1227 // Update ExitBlocks of the new plan.
1228 for (VPBlockBase *VPB : NewPlan->CreatedBlocks) {
1229 if (VPB->getNumSuccessors() == 0 && isa<VPIRBasicBlock>(VPB) &&
1230 VPB != NewScalarHeader)
1231 NewPlan->ExitBlocks.push_back(cast<VPIRBasicBlock>(VPB));
1232 }
1233
1234 return NewPlan;
1235}
1236
1238 auto *VPIRBB = new VPIRBasicBlock(IRBB);
1239 CreatedBlocks.push_back(VPIRBB);
1240 return VPIRBB;
1241}
1242
1244 auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB);
1245 for (Instruction &I :
1246 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator()))
1247 VPIRBB->appendRecipe(VPIRInstruction::create(I));
1248 return VPIRBB;
1249}
1250
1251#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1252
1253Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
1254 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
1255 Twine(getOrCreateBID(Block));
1256}
1257
1258Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
1259 const std::string &Name = Block->getName();
1260 if (!Name.empty())
1261 return Name;
1262 return "VPB" + Twine(getOrCreateBID(Block));
1263}
1264
1266 Depth = 1;
1267 bumpIndent(0);
1268 OS << "digraph VPlan {\n";
1269 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
1270 if (!Plan.getName().empty())
1271 OS << "\\n" << DOT::EscapeString(Plan.getName());
1272
1273 {
1274 // Print live-ins.
1275 std::string Str;
1276 raw_string_ostream SS(Str);
1277 Plan.printLiveIns(SS);
1279 StringRef(Str).rtrim('\n').split(Lines, "\n");
1280 for (auto Line : Lines)
1281 OS << DOT::EscapeString(Line.str()) << "\\n";
1282 }
1283
1284 OS << "\"]\n";
1285 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
1286 OS << "edge [fontname=Courier, fontsize=30]\n";
1287 OS << "compound=true\n";
1288
1289 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
1290 dumpBlock(Block);
1291
1292 OS << "}\n";
1293}
1294
1295void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
1297 dumpBasicBlock(BasicBlock);
1299 dumpRegion(Region);
1300 else
1301 llvm_unreachable("Unsupported kind of VPBlock.");
1302}
1303
1304void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
1305 bool Hidden, const Twine &Label) {
1306 // Due to "dot" we print an edge between two regions as an edge between the
1307 // exiting basic block and the entry basic of the respective regions.
1308 const VPBlockBase *Tail = From->getExitingBasicBlock();
1309 const VPBlockBase *Head = To->getEntryBasicBlock();
1310 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
1311 OS << " [ label=\"" << Label << '\"';
1312 if (Tail != From)
1313 OS << " ltail=" << getUID(From);
1314 if (Head != To)
1315 OS << " lhead=" << getUID(To);
1316 if (Hidden)
1317 OS << "; splines=none";
1318 OS << "]\n";
1319}
1320
1321void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
1322 auto &Successors = Block->getSuccessors();
1323 if (Successors.size() == 1)
1324 drawEdge(Block, Successors.front(), false, "");
1325 else if (Successors.size() == 2) {
1326 drawEdge(Block, Successors.front(), false, "T");
1327 drawEdge(Block, Successors.back(), false, "F");
1328 } else {
1329 unsigned SuccessorNumber = 0;
1330 for (auto *Successor : Successors)
1331 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
1332 }
1333}
1334
1335void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
1336 // Implement dot-formatted dump by performing plain-text dump into the
1337 // temporary storage followed by some post-processing.
1338 OS << Indent << getUID(BasicBlock) << " [label =\n";
1339 bumpIndent(1);
1340 std::string Str;
1341 raw_string_ostream SS(Str);
1342 // Use no indentation as we need to wrap the lines into quotes ourselves.
1343 BasicBlock->print(SS, "", SlotTracker);
1344
1345 // We need to process each line of the output separately, so split
1346 // single-string plain-text dump.
1348 StringRef(Str).rtrim('\n').split(Lines, "\n");
1349
1350 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
1351 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
1352 };
1353
1354 // Don't need the "+" after the last line.
1355 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
1356 EmitLine(Line, " +\n");
1357 EmitLine(Lines.back(), "\n");
1358
1359 bumpIndent(-1);
1360 OS << Indent << "]\n";
1361
1362 dumpEdges(BasicBlock);
1363}
1364
1365void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1366 OS << Indent << "subgraph " << getUID(Region) << " {\n";
1367 bumpIndent(1);
1368 OS << Indent << "fontname=Courier\n"
1369 << Indent << "label=\""
1370 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
1371 << DOT::EscapeString(Region->getName()) << "\"\n";
1372 // Dump the blocks of the region.
1373 assert(Region->getEntry() && "Region contains no inner blocks.");
1374 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry()))
1375 dumpBlock(Block);
1376 bumpIndent(-1);
1377 OS << Indent << "}\n";
1378 dumpEdges(Region);
1379}
1380
1381#endif
1382
1383/// Returns true if there is a vector loop region and \p VPV is defined in a
1384/// loop region.
1385static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1386 const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1387 return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1389}
1390
1395 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });
1396}
1397
1399 VPValue *New,
1400 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) {
1401 // Note that this early exit is required for correctness; the implementation
1402 // below relies on the number of users for this VPValue to decrease, which
1403 // isn't the case if this == New.
1404 if (this == New)
1405 return;
1406
1407 for (unsigned J = 0; J < getNumUsers();) {
1408 VPUser *User = Users[J];
1409 bool RemovedUser = false;
1410 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) {
1411 if (User->getOperand(I) != this || !ShouldReplace(*User, I))
1412 continue;
1413
1414 RemovedUser = true;
1415 User->setOperand(I, New);
1416 }
1417 // If a user got removed after updating the current user, the next user to
1418 // update will be moved to the current position, so we only need to
1419 // increment the index if the number of users did not change.
1420 if (!RemovedUser)
1421 J++;
1422 }
1423}
1424
1426 for (unsigned Idx = 0; Idx != getNumOperands(); ++Idx) {
1427 if (getOperand(Idx) == From)
1428 setOperand(Idx, To);
1429 }
1430}
1431
1432#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 OS << Tracker.getOrCreateName(this);
1435}
1436
1439 Op->printAsOperand(O, SlotTracker);
1440 });
1441}
1442#endif
1443
1444void VPSlotTracker::assignName(const VPValue *V) {
1445 assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
1446 auto *UV = V->getUnderlyingValue();
1447 auto *VPI = dyn_cast_or_null<VPInstruction>(V);
1448 if (!UV && !(VPI && !VPI->getName().empty())) {
1449 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str();
1450 NextSlot++;
1451 return;
1452 }
1453
1454 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by
1455 // appending ".Number" to the name if there are multiple uses.
1456 std::string Name;
1457 if (UV)
1458 Name = getName(UV);
1459 else
1460 Name = VPI->getName();
1461
1462 assert(!Name.empty() && "Name cannot be empty.");
1463 StringRef Prefix = UV ? "ir<" : "vp<%";
1464 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str();
1465
1466 // First assign the base name for V.
1467 const auto &[A, _] = VPValue2Name.try_emplace(V, BaseName);
1468 // Integer or FP constants with different types will result in the same string
1469 // due to stripping types.
1471 return;
1472
1473 // If it is already used by C > 0 other VPValues, increase the version counter
1474 // C and use it for V.
1475 const auto &[C, UseInserted] = BaseName2Version.try_emplace(BaseName, 0);
1476 if (!UseInserted) {
1477 C->second++;
1478 A->second = (BaseName + Twine(".") + Twine(C->second)).str();
1479 }
1480}
1481
1482void VPSlotTracker::assignNames(const VPlan &Plan) {
1483 if (Plan.VF.getNumUsers() > 0)
1484 assignName(&Plan.VF);
1485 if (Plan.VFxUF.getNumUsers() > 0)
1486 assignName(&Plan.VFxUF);
1487 assignName(&Plan.VectorTripCount);
1488 if (Plan.BackedgeTakenCount)
1489 assignName(Plan.BackedgeTakenCount);
1490 for (VPValue *LI : Plan.getLiveIns())
1491 assignName(LI);
1492
1493 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
1494 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
1495 for (const VPBasicBlock *VPBB :
1497 assignNames(VPBB);
1498}
1499
1500void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
1501 for (const VPRecipeBase &Recipe : *VPBB)
1502 for (VPValue *Def : Recipe.definedValues())
1503 assignName(Def);
1504}
1505
1506std::string VPSlotTracker::getName(const Value *V) {
1507 std::string Name;
1508 raw_string_ostream S(Name);
1509 if (V->hasName() || !isa<Instruction>(V)) {
1510 V->printAsOperand(S, false);
1511 return Name;
1512 }
1513
1514 if (!MST) {
1515 // Lazily create the ModuleSlotTracker when we first hit an unnamed
1516 // instruction.
1517 auto *I = cast<Instruction>(V);
1518 // This check is required to support unit tests with incomplete IR.
1519 if (I->getParent()) {
1520 MST = std::make_unique<ModuleSlotTracker>(I->getModule());
1521 MST->incorporateFunction(*I->getFunction());
1522 } else {
1523 MST = std::make_unique<ModuleSlotTracker>(nullptr);
1524 }
1525 }
1526 V->printAsOperand(S, false, *MST);
1527 return Name;
1528}
1529
1530std::string VPSlotTracker::getOrCreateName(const VPValue *V) const {
1531 std::string Name = VPValue2Name.lookup(V);
1532 if (!Name.empty())
1533 return Name;
1534
1535 // If no name was assigned, no VPlan was provided when creating the slot
1536 // tracker or it is not reachable from the provided VPlan. This can happen,
1537 // e.g. when trying to print a recipe that has not been inserted into a VPlan
1538 // in a debugger.
1539 // TODO: Update VPSlotTracker constructor to assign names to recipes &
1540 // VPValues not associated with a VPlan, instead of constructing names ad-hoc
1541 // here.
1542 const VPRecipeBase *DefR = V->getDefiningRecipe();
1543 (void)DefR;
1544 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) &&
1545 "VPValue defined by a recipe in a VPlan?");
1546
1547 // Use the underlying value's name, if there is one.
1548 if (auto *UV = V->getUnderlyingValue()) {
1549 std::string Name;
1550 raw_string_ostream S(Name);
1551 UV->printAsOperand(S, false);
1552 return (Twine("ir<") + Name + ">").str();
1553 }
1554
1555 return "<badref>";
1556}
1557
1559 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) {
1560 assert(!Range.isEmpty() && "Trying to test an empty VF range.");
1561 bool PredicateAtRangeStart = Predicate(Range.Start);
1562
1563 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
1564 if (Predicate(TmpVF) != PredicateAtRangeStart) {
1565 Range.End = TmpVF;
1566 break;
1567 }
1568
1569 return PredicateAtRangeStart;
1570}
1571
1572/// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF,
1573/// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range
1574/// of VF's starting at a given VF and extending it as much as possible. Each
1575/// vectorization decision can potentially shorten this sub-range during
1576/// buildVPlan().
1578 ElementCount MaxVF) {
1579 auto MaxVFTimes2 = MaxVF * 2;
1580 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
1581 VFRange SubRange = {VF, MaxVFTimes2};
1582 if (auto Plan = tryToBuildVPlan(SubRange)) {
1584 // Update the name of the latch of the top-level vector loop region region
1585 // after optimizations which includes block folding.
1586 Plan->getVectorLoopRegion()->getExiting()->setName("vector.latch");
1587 VPlans.push_back(std::move(Plan));
1588 }
1589 VF = SubRange.End;
1590 }
1591}
1592
1594 assert(count_if(VPlans,
1595 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
1596 1 &&
1597 "Multiple VPlans for VF.");
1598
1599 for (const VPlanPtr &Plan : VPlans) {
1600 if (Plan->hasVF(VF))
1601 return *Plan.get();
1602 }
1603 llvm_unreachable("No plan found!");
1604}
1605
1608 // Reserve first location for self reference to the LoopID metadata node.
1609 MDs.push_back(nullptr);
1610 bool IsUnrollMetadata = false;
1611 MDNode *LoopID = L->getLoopID();
1612 if (LoopID) {
1613 // First find existing loop unrolling disable metadata.
1614 for (unsigned I = 1, IE = LoopID->getNumOperands(); I < IE; ++I) {
1615 auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I));
1616 if (MD) {
1617 const auto *S = dyn_cast<MDString>(MD->getOperand(0));
1618 if (!S)
1619 continue;
1620 if (S->getString().starts_with("llvm.loop.unroll.runtime.disable"))
1621 continue;
1622 IsUnrollMetadata =
1623 S->getString().starts_with("llvm.loop.unroll.disable");
1624 }
1625 MDs.push_back(LoopID->getOperand(I));
1626 }
1627 }
1628
1629 if (!IsUnrollMetadata) {
1630 // Add runtime unroll disable metadata.
1631 LLVMContext &Context = L->getHeader()->getContext();
1632 SmallVector<Metadata *, 1> DisableOperands;
1633 DisableOperands.push_back(
1634 MDString::get(Context, "llvm.loop.unroll.runtime.disable"));
1635 MDNode *DisableNode = MDNode::get(Context, DisableOperands);
1636 MDs.push_back(DisableNode);
1637 MDNode *NewLoopID = MDNode::get(Context, MDs);
1638 // Set operand 0 to refer to the loop id itself.
1639 NewLoopID->replaceOperandWith(0, NewLoopID);
1640 L->setLoopID(NewLoopID);
1641 }
1642}
1643
1645 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
1646 bool VectorizingEpilogue, MDNode *OrigLoopID,
1647 std::optional<unsigned> OrigAverageTripCount,
1648 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
1649 bool DisableRuntimeUnroll) {
1650 // Update the metadata of the scalar loop. Skip the update when vectorizing
1651 // the epilogue loop to ensure it is updated only once. Also skip the update
1652 // when the scalar loop became unreachable.
1653 if (Plan.getScalarPreheader()->hasPredecessors() && !VectorizingEpilogue) {
1654 std::optional<MDNode *> RemainderLoopID =
1657 if (RemainderLoopID) {
1658 OrigLoop->setLoopID(*RemainderLoopID);
1659 } else {
1660 if (DisableRuntimeUnroll)
1662
1663 LoopVectorizeHints Hints(OrigLoop, true, *ORE);
1664 Hints.setAlreadyVectorized();
1665 }
1666 }
1667
1668 if (!VectorLoop)
1669 return;
1670
1671 if (std::optional<MDNode *> VectorizedLoopID = makeFollowupLoopID(
1672 OrigLoopID, {LLVMLoopVectorizeFollowupAll,
1674 VectorLoop->setLoopID(*VectorizedLoopID);
1675 } else {
1676 // Keep all loop hints from the original loop on the vector loop (we'll
1677 // replace the vectorizer-specific hints below).
1678 if (OrigLoopID)
1679 VectorLoop->setLoopID(OrigLoopID);
1680
1681 if (!VectorizingEpilogue) {
1682 LoopVectorizeHints Hints(VectorLoop, true, *ORE);
1683 Hints.setAlreadyVectorized();
1684 }
1685 }
1687 TTI.getUnrollingPreferences(VectorLoop, *PSE.getSE(), UP, ORE);
1688 if (!UP.UnrollVectorizedLoop || VectorizingEpilogue)
1690
1691 // Set/update profile weights for the vector and remainder loops as original
1692 // loop iterations are now distributed among them. Note that original loop
1693 // becomes the scalar remainder loop after vectorization.
1694 //
1695 // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
1696 // end up getting slightly roughened result but that should be OK since
1697 // profile is not inherently precise anyway. Note also possible bypass of
1698 // vector code caused by legality checks is ignored, assigning all the weight
1699 // to the vector loop, optimistically.
1700 //
1701 // For scalable vectorization we can't know at compile time how many
1702 // iterations of the loop are handled in one vector iteration, so instead
1703 // use the value of vscale used for tuning.
1704 unsigned AverageVectorTripCount = 0;
1705 unsigned RemainderAverageTripCount = 0;
1706 auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount();
1707 auto IsProfiled = EC && EC->getCount();
1708 if (!OrigAverageTripCount) {
1709 if (!IsProfiled)
1710 return;
1711 auto &SE = *PSE.getSE();
1712 AverageVectorTripCount = SE.getSmallConstantTripCount(VectorLoop);
1713 if (ProfcheckDisableMetadataFixes || !AverageVectorTripCount)
1714 return;
1715 if (Plan.getScalarPreheader()->hasPredecessors())
1716 RemainderAverageTripCount =
1717 SE.getSmallConstantTripCount(OrigLoop) % EstimatedVFxUF;
1718 // Setting to 1 should be sufficient to generate the correct branch weights.
1719 OrigLoopInvocationWeight = 1;
1720 } else {
1721 // Calculate number of iterations in unrolled loop.
1722 AverageVectorTripCount = *OrigAverageTripCount / EstimatedVFxUF;
1723 // Calculate number of iterations for remainder loop.
1724 RemainderAverageTripCount = *OrigAverageTripCount % EstimatedVFxUF;
1725 }
1726 if (HeaderVPBB) {
1727 setLoopEstimatedTripCount(VectorLoop, AverageVectorTripCount,
1728 OrigLoopInvocationWeight);
1729 }
1730
1731 if (Plan.getScalarPreheader()->hasPredecessors()) {
1732 setLoopEstimatedTripCount(OrigLoop, RemainderAverageTripCount,
1733 OrigLoopInvocationWeight);
1734 }
1735}
1736
1737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1739 if (VPlans.empty()) {
1740 O << "LV: No VPlans built.\n";
1741 return;
1742 }
1743 for (const auto &Plan : VPlans)
1745 Plan->printDOT(O);
1746 else
1747 Plan->print(O);
1748}
1749#endif
1750
1751bool llvm::canConstantBeExtended(const APInt *C, Type *NarrowType,
1753 APInt TruncatedVal = C->trunc(NarrowType->getScalarSizeInBits());
1754 unsigned WideSize = C->getBitWidth();
1755 APInt ExtendedVal = ExtKind == TTI::PR_SignExtend
1756 ? TruncatedVal.sext(WideSize)
1757 : TruncatedVal.zext(WideSize);
1758 return ExtendedVal == *C;
1759}
1760
1763 if (auto *IRV = dyn_cast<VPIRValue>(V))
1764 return TTI::getOperandInfo(IRV->getValue());
1765
1766 return {};
1767}
1768
1770 Type *ResultTy, ArrayRef<const VPValue *> Operands, ElementCount VF,
1771 TTI::VectorInstrContext VIC, bool AlwaysIncludeReplicatingR) {
1772 if (VF.isScalar())
1773 return 0;
1774
1775 assert(!VF.isScalable() &&
1776 "Scalarization overhead not supported for scalable vectors");
1777
1778 InstructionCost ScalarizationCost = 0;
1779 // Compute the cost of scalarizing the result if needed.
1780 if (!ResultTy->isVoidTy()) {
1781 for (Type *VectorTy :
1782 to_vector(getContainedTypes(toVectorizedTy(ResultTy, VF)))) {
1783 ScalarizationCost += TTI.getScalarizationOverhead(
1785 /*Insert=*/true, /*Extract=*/false, CostKind,
1786 /*ForPoisonSrc=*/true, {}, VIC);
1787 }
1788 }
1789 // Compute the cost of scalarizing the operands, skipping ones that do not
1790 // require extraction/scalarization and do not incur any overhead.
1791 SmallPtrSet<const VPValue *, 4> UniqueOperands;
1793 for (auto *Op : Operands) {
1794 if (isa<VPIRValue>(Op) ||
1795 (!AlwaysIncludeReplicatingR &&
1798 cast<VPReplicateRecipe>(Op)->getOpcode() == Instruction::Load) ||
1799 !UniqueOperands.insert(Op).second)
1800 continue;
1801 Tys.push_back(toVectorizedTy(Types.inferScalarType(Op), VF));
1802 }
1803 return ScalarizationCost +
1804 TTI.getOperandsScalarizationOverhead(Tys, CostKind, VIC);
1805}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
dxil pretty DXIL Metadata Pretty Printer
Flatten the CFG
#define _
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This file defines the LoopVectorizationLegality class.
This file provides a LoopVectorizationPlanner class.
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS(PASS,...)
static void addRuntimeUnrollDisableMetaData(Loop *L)
Definition VPlan.cpp:1606
static T * getPlanEntry(T *Start)
Definition VPlan.cpp:155
static void printFinalVPlan(VPlan &)
To make RUN_VPLAN_PASS print final VPlan.
Definition VPlan.cpp:916
static T * getEnclosingLoopRegionForRegion(T *P)
Return the enclosing loop region for region P.
Definition VPlan.cpp:581
const char LLVMLoopVectorizeFollowupAll[]
Definition VPlan.cpp:61
static bool isDefinedInsideLoopRegions(const VPValue *VPV)
Returns true if there is a vector loop region and VPV is defined in a loop region.
Definition VPlan.cpp:1385
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition VPlan.cpp:599
const char LLVMLoopVectorizeFollowupVectorized[]
Definition VPlan.cpp:62
static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, DenseMap< VPValue *, VPValue * > &Old2NewVPValues)
Definition VPlan.cpp:1135
const char LLVMLoopVectorizeFollowupEpilogue[]
Definition VPlan.cpp:64
static std::pair< VPBlockBase *, VPBlockBase * > cloneFrom(VPBlockBase *Entry)
Definition VPlan.cpp:692
static cl::opt< bool > PrintVPlansInDotFormat("vplan-print-in-dot-format", cl::Hidden, cl::desc("Use dot format instead of plain text when dumping VPlans"))
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:491
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition DebugLoc.h:123
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
std::optional< ProfileCount > getEntryCount(bool AllowSynthetic=false) const
Get the entry count for this function.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
static InstructionCost getInvalid(CostType Val=0)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
A helper class to return the specified delimiter string after the first invocation of operator String...
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
std::vector< BlockT * > & getBlocksVector()
Return a direct, mutable handle to the blocks vector so that we can mutate it efficiently with techni...
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1593
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1644
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1577
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1558
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1738
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
void setLoopID(MDNode *LoopID) const
Set the llvm.loop loop id metadata for this loop.
Definition LoopInfo.cpp:526
Metadata node.
Definition Metadata.h:1080
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
std::pair< iterator, bool > try_emplace(StringRef Key, ArgsTy &&...Args)
Emplace a new element for the specified key into the map if the key isn't already in the map.
Definition StringMap.h:381
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:814
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4081
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4156
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4108
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:491
iterator end()
Definition VPlan.h:4118
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4116
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:542
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition VPlan.cpp:777
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:784
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:230
void connectToPredecessors(VPTransformState &State)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition VPlan.cpp:391
VPRegionBlock * getEnclosingLoopRegion()
Definition VPlan.cpp:591
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:563
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4096
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition VPlan.cpp:549
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition VPlan.cpp:671
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition VPlan.cpp:649
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:637
const VPRecipeBase & back() const
Definition VPlan.h:4130
bool empty() const
Definition VPlan.h:4127
size_t size() const
Definition VPlan.h:4126
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:200
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:659
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:222
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
VPlan * getPlan()
Definition VPlan.cpp:175
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:194
const std::string & getName() const
Definition VPlan.h:164
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:214
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:180
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition VPlanUtils.h:243
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:144
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static bool isHeader(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop header, based on regions or VPDT in their absence.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:195
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:213
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:233
VPlan-based builder utility analogous to IRBuilder.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:361
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4234
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:459
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4258
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:484
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition VPlan.cpp:85
static VPLane getFirstLane()
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:114
VPBasicBlock * getParent()
Definition VPlan.h:462
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
virtual LLVM_ABI_FOR_TEST ~VPRecipeValue()
Definition VPlan.cpp:147
friend class VPValue
Definition VPlanValue.h:233
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4269
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition VPlan.cpp:742
const VPBlockBase * getEntry() const
Definition VPlan.h:4305
void dissolveToCFGLoop()
Remove the current region from its VPlan, connecting its predecessor to its entry,...
Definition VPlan.cpp:846
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4337
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition VPlan.cpp:803
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition VPlan.cpp:832
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition VPlan.cpp:755
const VPBlockBase * getExiting() const
Definition VPlan.h:4317
friend class VPlan
Definition VPlan.h:4270
This class can be used to assign names to VPValues.
std::string getOrCreateName(const VPValue *V) const
Returns the name assigned to V, if there is one, otherwise try to construct one from the underlying v...
Definition VPlan.cpp:1530
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void replaceUsesOfWith(VPValue *From, VPValue *To)
Replaces all uses of From in the VPUser with To.
Definition VPlan.cpp:1425
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1437
operand_range operands()
Definition VPlanValue.h:326
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
unsigned getNumOperands() const
Definition VPlanValue.h:296
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:135
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1391
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:125
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1433
friend class VPRecipeValue
Definition VPlanValue.h:50
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:71
@ VPVRecipeValueSC
A symbolic live-in VPValue without IR backing.
Definition VPlanValue.h:81
void dump() const
Dump the value to stderr (for debugging).
Definition VPlan.cpp:106
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:99
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1394
unsigned getNumUsers() const
Definition VPlanValue.h:104
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
Definition VPlan.cpp:1398
LLVM_DUMP_METHOD void dump()
Definition VPlan.cpp:1265
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4399
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1126
friend class VPSlotTracker
Definition VPlan.h:4401
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1102
VPBasicBlock * getEntry()
Definition VPlan.h:4488
void setName(const Twine &newName)
Definition VPlan.h:4634
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:903
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:881
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:911
friend class VPlanPrinter
Definition VPlan.h:4400
unsigned getUF() const
Definition VPlan.h:4616
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1237
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4685
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4536
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1031
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1013
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4477
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4708
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1243
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1132
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4527
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:921
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1085
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4532
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1047
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1173
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::string EscapeString(const std::string &Label)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
bool match(Val *V, const Pattern &P)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPBasicBlock * getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT)
Returns the header block of the first, top-level loop, or null if none exist.
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI cl::opt< bool > EnableFSDiscriminator
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< MDNode * > makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef< StringRef > FollowupAttrs, const char *InheritOptionsAttrsPrefix="", bool AlwaysNew=false)
Create a new loop identifier for a loop created from a loop transformation.
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2303
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:216
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
Definition VPlan.cpp:1751
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
cl::opt< unsigned > ForceTargetInstructionCost
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, std::optional< unsigned > EstimatedLoopInvocationWeight=std::nullopt)
Set llvm.loop.estimated_trip_count with the value EstimatedTripCount in the loop metadata of L.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Parameters that control the generic loop unrolling transformation.
bool UnrollVectorizedLoop
Disable runtime unrolling by default for vectorized loops.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
ElementCount End
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition VPlan.cpp:1762
InstructionCost getScalarizationOverhead(Type *ResultTy, ArrayRef< const VPValue * > Operands, ElementCount VF, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None, bool AlwaysIncludeReplicatingR=false)
Estimate the overhead of scalarizing a recipe with result type ResultTy and Operands with VF.
Definition VPlan.cpp:1769
TargetTransformInfo::TargetCostKind CostKind
VPTypeAnalysis Types
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:139
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:223
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
struct llvm::VPTransformState::DataState Data
struct llvm::VPTransformState::CFGState CFG
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:277
VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, IRBuilderBase &Builder, VPlan *Plan, Loop *CurrentParentLoop, Type *CanonicalIVTy)
Definition VPlan.cpp:237
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
bool hasScalarValue(const VPValue *Def, VPLane Lane)
const TargetTransformInfo * TTI
Target Transform Info.
VPlan * Plan
Pointer to the VPlan code is generated for.
void set(const VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
bool hasVectorValue(const VPValue *Def)
VPDominatorTree VPDT
VPlan-based dominator tree.
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Value * packScalarIntoVectorizedValue(const VPValue *Def, Value *WideValue, const VPLane &Lane)
Insert the scalar value of Def at Lane into Lane of WideValue and return the resulting value.
Definition VPlan.cpp:359
AssumptionCache * AC
Hold a pointer to AssumptionCache to register new assumptions after replicating assume calls.
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition VPlan.cpp:337
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)
Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...