LLVM 17.0.0git
VPlan.cpp
Go to the documentation of this file.
1//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This is the LLVM vectorization plan. It represents a candidate for
11/// vectorization, allowing to plan and optimize how to vectorize a given loop
12/// before generating LLVM-IR.
13/// The vectorizer uses vectorization plans to estimate the costs of potential
14/// candidates and if profitable to execute the desired plan, generating vector
15/// LLVM-IR code.
16///
17//===----------------------------------------------------------------------===//
18
19#include "VPlan.h"
20#include "VPlanCFG.h"
21#include "VPlanDominatorTree.h"
24#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/Twine.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
37#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <string>
46#include <vector>
47
48using namespace llvm;
49
50namespace llvm {
52}
53
54#define DEBUG_TYPE "vplan"
55
56#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
58 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
60 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
61 V.print(OS, SlotTracker);
62 return OS;
63}
64#endif
65
67 const ElementCount &VF) const {
68 switch (LaneKind) {
70 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
71 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
72 Builder.getInt32(VF.getKnownMinValue() - Lane));
74 return Builder.getInt32(Lane);
75 }
76 llvm_unreachable("Unknown lane kind");
77}
78
79VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
80 : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
81 if (Def)
82 Def->addDefinedValue(this);
83}
84
86 assert(Users.empty() && "trying to delete a VPValue with remaining users");
87 if (Def)
88 Def->removeDefinedValue(this);
89}
90
91#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
93 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
94 R->print(OS, "", SlotTracker);
95 else
97}
98
99void VPValue::dump() const {
100 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
102 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
104 dbgs() << "\n";
105}
106
107void VPDef::dump() const {
108 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
110 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
111 print(dbgs(), "", SlotTracker);
112 dbgs() << "\n";
113}
114#endif
115
117 return cast_or_null<VPRecipeBase>(Def);
118}
119
121 return cast_or_null<VPRecipeBase>(Def);
122}
123
124// Get the top-most entry block of \p Start. This is the entry block of the
125// containing VPlan. This function is templated to support both const and non-const blocks
126template <typename T> static T *getPlanEntry(T *Start) {
127 T *Next = Start;
128 T *Current = Start;
129 while ((Next = Next->getParent()))
130 Current = Next;
131
132 SmallSetVector<T *, 8> WorkList;
133 WorkList.insert(Current);
134
135 for (unsigned i = 0; i < WorkList.size(); i++) {
136 T *Current = WorkList[i];
137 if (Current->getNumPredecessors() == 0)
138 return Current;
139 auto &Predecessors = Current->getPredecessors();
140 WorkList.insert(Predecessors.begin(), Predecessors.end());
141 }
142
143 llvm_unreachable("VPlan without any entry node without predecessors");
144}
145
146VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
147
148const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
149
150/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
152 const VPBlockBase *Block = this;
153 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
154 Block = Region->getEntry();
155 return cast<VPBasicBlock>(Block);
156}
157
159 VPBlockBase *Block = this;
160 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
161 Block = Region->getEntry();
162 return cast<VPBasicBlock>(Block);
163}
164
165void VPBlockBase::setPlan(VPlan *ParentPlan) {
166 assert(ParentPlan->getEntry() == this &&
167 "Can only set plan on its entry block.");
168 Plan = ParentPlan;
169}
170
171/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
173 const VPBlockBase *Block = this;
174 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
175 Block = Region->getExiting();
176 return cast<VPBasicBlock>(Block);
177}
178
180 VPBlockBase *Block = this;
181 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
182 Block = Region->getExiting();
183 return cast<VPBasicBlock>(Block);
184}
185
187 if (!Successors.empty() || !Parent)
188 return this;
189 assert(Parent->getExiting() == this &&
190 "Block w/o successors not the exiting block of its parent.");
191 return Parent->getEnclosingBlockWithSuccessors();
192}
193
195 if (!Predecessors.empty() || !Parent)
196 return this;
197 assert(Parent->getEntry() == this &&
198 "Block w/o predecessors not the entry of its parent.");
199 return Parent->getEnclosingBlockWithPredecessors();
200}
201
204 delete Block;
205}
206
208 iterator It = begin();
209 while (It != end() && It->isPhi())
210 It++;
211 return It;
212}
213
215 if (!Def->hasDefiningRecipe())
216 return Def->getLiveInIRValue();
217
218 if (hasScalarValue(Def, Instance)) {
219 return Data
220 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
221 }
222
223 assert(hasVectorValue(Def, Instance.Part));
224 auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
225 if (!VecPart->getType()->isVectorTy()) {
226 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
227 return VecPart;
228 }
229 // TODO: Cache created scalar values.
230 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
231 auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
232 // set(Def, Extract, Instance);
233 return Extract;
234}
236 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
237 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
238}
239
241 const Instruction *Orig) {
242 // If the loop was versioned with memchecks, add the corresponding no-alias
243 // metadata.
244 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig)))
245 LVer->annotateInstWithNoAlias(To, Orig);
246}
247
250 addNewMetadata(To, From);
251}
252
254 for (Value *V : To) {
255 if (Instruction *I = dyn_cast<Instruction>(V))
257 }
258}
259
261 const Instruction *Inst = dyn_cast<Instruction>(V);
262 if (!Inst) {
264 return;
265 }
266
267 const DILocation *DIL = Inst->getDebugLoc();
268 // When a FSDiscriminator is enabled, we don't need to add the multiply
269 // factors to the discriminators.
270 if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
271 !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
272 // FIXME: For scalable vectors, assume vscale=1.
273 auto NewDIL =
275 if (NewDIL)
277 else
278 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
279 << DIL->getFilename() << " Line: " << DIL->getLine());
280 } else
282}
283
285VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
286 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
287 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
288 BasicBlock *PrevBB = CFG.PrevBB;
289 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
290 PrevBB->getParent(), CFG.ExitBB);
291 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
292
293 // Hook up the new basic block to its predecessors.
294 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
295 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
296 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
297 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
298
299 assert(PredBB && "Predecessor basic-block not found building successor.");
300 auto *PredBBTerminator = PredBB->getTerminator();
301 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
302
303 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
304 if (isa<UnreachableInst>(PredBBTerminator)) {
305 assert(PredVPSuccessors.size() == 1 &&
306 "Predecessor ending w/o branch must have single successor.");
307 DebugLoc DL = PredBBTerminator->getDebugLoc();
308 PredBBTerminator->eraseFromParent();
309 auto *Br = BranchInst::Create(NewBB, PredBB);
310 Br->setDebugLoc(DL);
311 } else if (TermBr && !TermBr->isConditional()) {
312 TermBr->setSuccessor(0, NewBB);
313 } else {
314 // Set each forward successor here when it is created, excluding
315 // backedges. A backward successor is set when the branch is created.
316 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
317 assert(!TermBr->getSuccessor(idx) &&
318 "Trying to reset an existing successor block.");
319 TermBr->setSuccessor(idx, NewBB);
320 }
321 }
322 return NewBB;
323}
324
326 bool Replica = State->Instance && !State->Instance->isFirstIteration();
327 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
328 VPBlockBase *SingleHPred = nullptr;
329 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
330
331 auto IsLoopRegion = [](VPBlockBase *BB) {
332 auto *R = dyn_cast<VPRegionBlock>(BB);
333 return R && !R->isReplicator();
334 };
335
336 // 1. Create an IR basic block, or reuse the last one or ExitBB if possible.
337 if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) {
338 // ExitBB can be re-used for the exit block of the Plan.
339 NewBB = State->CFG.ExitBB;
340 State->CFG.PrevBB = NewBB;
341
342 // Update the branch instruction in the predecessor to branch to ExitBB.
343 VPBlockBase *PredVPB = getSingleHierarchicalPredecessor();
344 VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock();
345 assert(PredVPB->getSingleSuccessor() == this &&
346 "predecessor must have the current block as only successor");
347 BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB];
348 // The Exit block of a loop is always set to be successor 0 of the Exiting
349 // block.
350 cast<BranchInst>(ExitingBB->getTerminator())->setSuccessor(0, NewBB);
351 } else if (PrevVPBB && /* A */
352 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
353 SingleHPred->getExitingBasicBlock() == PrevVPBB &&
354 PrevVPBB->getSingleHierarchicalSuccessor() &&
355 (SingleHPred->getParent() == getEnclosingLoopRegion() &&
356 !IsLoopRegion(SingleHPred))) && /* B */
357 !(Replica && getPredecessors().empty())) { /* C */
358 // The last IR basic block is reused, as an optimization, in three cases:
359 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null;
360 // B. when the current VPBB has a single (hierarchical) predecessor which
361 // is PrevVPBB and the latter has a single (hierarchical) successor which
362 // both are in the same non-replicator region; and
363 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
364 // is the exiting VPBB of this region from a previous instance, or the
365 // predecessor of this region.
366
367 NewBB = createEmptyBasicBlock(State->CFG);
368 State->Builder.SetInsertPoint(NewBB);
369 // Temporarily terminate with unreachable until CFG is rewired.
370 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
371 // Register NewBB in its loop. In innermost loops its the same for all
372 // BB's.
373 if (State->CurrentVectorLoop)
374 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
375 State->Builder.SetInsertPoint(Terminator);
376 State->CFG.PrevBB = NewBB;
377 }
378
379 // 2. Fill the IR basic block with IR instructions.
380 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
381 << " in BB:" << NewBB->getName() << '\n');
382
383 State->CFG.VPBB2IRBB[this] = NewBB;
384 State->CFG.PrevVPBB = this;
385
386 for (VPRecipeBase &Recipe : Recipes)
387 Recipe.execute(*State);
388
389 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
390}
391
393 for (VPRecipeBase &R : Recipes) {
394 for (auto *Def : R.definedValues())
395 Def->replaceAllUsesWith(NewValue);
396
397 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
398 R.setOperand(I, NewValue);
399 }
400}
401
403 assert((SplitAt == end() || SplitAt->getParent() == this) &&
404 "can only split at a position in the same block");
405
407 // First, disconnect the current block from its successors.
408 for (VPBlockBase *Succ : Succs)
410
411 // Create new empty block after the block to split.
412 auto *SplitBlock = new VPBasicBlock(getName() + ".split");
414
415 // Add successors for block to split to new block.
416 for (VPBlockBase *Succ : Succs)
418
419 // Finally, move the recipes starting at SplitAt to new block.
420 for (VPRecipeBase &ToMove :
421 make_early_inc_range(make_range(SplitAt, this->end())))
422 ToMove.moveBefore(*SplitBlock, SplitBlock->end());
423
424 return SplitBlock;
425}
426
429 if (P && P->isReplicator()) {
430 P = P->getParent();
431 assert(!cast<VPRegionBlock>(P)->isReplicator() &&
432 "unexpected nested replicate regions");
433 }
434 return P;
435}
436
437static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
438 if (VPBB->empty()) {
439 assert(
440 VPBB->getNumSuccessors() < 2 &&
441 "block with multiple successors doesn't have a recipe as terminator");
442 return false;
443 }
444
445 const VPRecipeBase *R = &VPBB->back();
446 auto *VPI = dyn_cast<VPInstruction>(R);
447 bool IsCondBranch =
448 isa<VPBranchOnMaskRecipe>(R) ||
449 (VPI && (VPI->getOpcode() == VPInstruction::BranchOnCond ||
450 VPI->getOpcode() == VPInstruction::BranchOnCount));
451 (void)IsCondBranch;
452
453 if (VPBB->getNumSuccessors() >= 2 || VPBB->isExiting()) {
454 assert(IsCondBranch && "block with multiple successors not terminated by "
455 "conditional branch recipe");
456
457 return true;
458 }
459
460 assert(
461 !IsCondBranch &&
462 "block with 0 or 1 successors terminated by conditional branch recipe");
463 return false;
464}
465
467 if (hasConditionalTerminator(this))
468 return &back();
469 return nullptr;
470}
471
473 if (hasConditionalTerminator(this))
474 return &back();
475 return nullptr;
476}
477
479 return getParent()->getExitingBasicBlock() == this;
480}
481
482#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
483void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
484 if (getSuccessors().empty()) {
485 O << Indent << "No successors\n";
486 } else {
487 O << Indent << "Successor(s): ";
488 ListSeparator LS;
489 for (auto *Succ : getSuccessors())
490 O << LS << Succ->getName();
491 O << '\n';
492 }
493}
494
495void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
496 VPSlotTracker &SlotTracker) const {
497 O << Indent << getName() << ":\n";
498
499 auto RecipeIndent = Indent + " ";
500 for (const VPRecipeBase &Recipe : *this) {
501 Recipe.print(O, RecipeIndent, SlotTracker);
502 O << '\n';
503 }
504
505 printSuccessors(O, Indent);
506}
507#endif
508
511 // Drop all references in VPBasicBlocks and replace all uses with
512 // DummyValue.
513 Block->dropAllReferences(NewValue);
514}
515
518 RPOT(Entry);
519
520 if (!isReplicator()) {
521 // Create and register the new vector loop.
522 Loop *PrevLoop = State->CurrentVectorLoop;
523 State->CurrentVectorLoop = State->LI->AllocateLoop();
524 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
525 Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
526
527 // Insert the new loop into the loop nest and register the new basic blocks
528 // before calling any utilities such as SCEV that require valid LoopInfo.
529 if (ParentLoop)
530 ParentLoop->addChildLoop(State->CurrentVectorLoop);
531 else
532 State->LI->addTopLevelLoop(State->CurrentVectorLoop);
533
534 // Visit the VPBlocks connected to "this", starting from it.
535 for (VPBlockBase *Block : RPOT) {
536 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
537 Block->execute(State);
538 }
539
540 State->CurrentVectorLoop = PrevLoop;
541 return;
542 }
543
544 assert(!State->Instance && "Replicating a Region with non-null instance.");
545
546 // Enter replicating mode.
547 State->Instance = VPIteration(0, 0);
548
549 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
550 State->Instance->Part = Part;
551 assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
552 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
553 ++Lane) {
554 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
555 // Visit the VPBlocks connected to \p this, starting from it.
556 for (VPBlockBase *Block : RPOT) {
557 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
558 Block->execute(State);
559 }
560 }
561 }
562
563 // Exit replicating mode.
564 State->Instance.reset();
565}
566
567#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
569 VPSlotTracker &SlotTracker) const {
570 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
571 auto NewIndent = Indent + " ";
572 for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
573 O << '\n';
574 BlockBase->print(O, NewIndent, SlotTracker);
575 }
576 O << Indent << "}\n";
577
578 printSuccessors(O, Indent);
579}
580#endif
581
583 clearLiveOuts();
584
585 if (Entry) {
586 VPValue DummyValue;
588 Block->dropAllReferences(&DummyValue);
589
591 }
592 for (VPValue *VPV : VPValuesToFree)
593 delete VPV;
594 if (TripCount)
595 delete TripCount;
596 if (BackedgeTakenCount)
597 delete BackedgeTakenCount;
598 for (auto &P : VPExternalDefs)
599 delete P.second;
600}
601
603 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
604 for (VPRecipeBase &R : Header->phis()) {
605 if (isa<VPActiveLaneMaskPHIRecipe>(&R))
606 return cast<VPActiveLaneMaskPHIRecipe>(&R);
607 }
608 return nullptr;
609}
610
611void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
612 Value *CanonicalIVStartValue,
613 VPTransformState &State,
614 bool IsEpilogueVectorization) {
615
616 // Check if the trip count is needed, and if so build it.
617 if (TripCount && TripCount->getNumUsers()) {
618 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
619 State.set(TripCount, TripCountV, Part);
620 }
621
622 // Check if the backedge taken count is needed, and if so build it.
623 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
625 auto *TCMO = Builder.CreateSub(TripCountV,
626 ConstantInt::get(TripCountV->getType(), 1),
627 "trip.count.minus.1");
628 auto VF = State.VF;
629 Value *VTCMO =
630 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
631 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
632 State.set(BackedgeTakenCount, VTCMO, Part);
633 }
634
635 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
636 State.set(&VectorTripCount, VectorTripCountV, Part);
637
638 // When vectorizing the epilogue loop, the canonical induction start value
639 // needs to be changed from zero to the value after the main vector loop.
640 // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
641 if (CanonicalIVStartValue) {
642 VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
643 auto *IV = getCanonicalIV();
644 assert(all_of(IV->users(),
645 [](const VPUser *U) {
646 if (isa<VPScalarIVStepsRecipe>(U) ||
647 isa<VPDerivedIVRecipe>(U))
648 return true;
649 auto *VPI = cast<VPInstruction>(U);
650 return VPI->getOpcode() ==
651 VPInstruction::CanonicalIVIncrement ||
652 VPI->getOpcode() ==
653 VPInstruction::CanonicalIVIncrementNUW;
654 }) &&
655 "the canonical IV should only be used by its increments or "
656 "ScalarIVSteps when "
657 "resetting the start value");
658 IV->setOperand(0, VPV);
659 }
660}
661
662/// Generate the code inside the preheader and body of the vectorized loop.
663/// Assumes a single pre-header basic-block was created for this. Introduce
664/// additional basic-blocks as needed, and fill them all.
666 // Set the reverse mapping from VPValues to Values for code generation.
667 for (auto &Entry : Value2VPValue)
668 State->VPValue2Value[Entry.second] = Entry.first;
669
670 // Initialize CFG state.
671 State->CFG.PrevVPBB = nullptr;
672 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
673 BasicBlock *VectorPreHeader = State->CFG.PrevBB;
674 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());
675
676 // Generate code in the loop pre-header and body.
678 Block->execute(State);
679
680 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
681 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
682
683 // Fix the latch value of canonical, reduction and first-order recurrences
684 // phis in the vector loop.
685 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
686 for (VPRecipeBase &R : Header->phis()) {
687 // Skip phi-like recipes that generate their backedege values themselves.
688 if (isa<VPWidenPHIRecipe>(&R))
689 continue;
690
691 if (isa<VPWidenPointerInductionRecipe>(&R) ||
692 isa<VPWidenIntOrFpInductionRecipe>(&R)) {
693 PHINode *Phi = nullptr;
694 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
695 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
696 } else {
697 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
698 // TODO: Split off the case that all users of a pointer phi are scalar
699 // from the VPWidenPointerInductionRecipe.
700 if (WidenPhi->onlyScalarsGenerated(State->VF))
701 continue;
702
703 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
704 Phi = cast<PHINode>(GEP->getPointerOperand());
705 }
706
707 Phi->setIncomingBlock(1, VectorLatchBB);
708
709 // Move the last step to the end of the latch block. This ensures
710 // consistent placement of all induction updates.
711 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
712 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
713 continue;
714 }
715
716 auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
717 // For canonical IV, first-order recurrences and in-order reduction phis,
718 // only a single part is generated, which provides the last part from the
719 // previous iteration. For non-ordered reductions all UF parts are
720 // generated.
721 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
722 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
723 (isa<VPReductionPHIRecipe>(PhiR) &&
724 cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
725 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
726
727 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
728 Value *Phi = State->get(PhiR, Part);
729 Value *Val = State->get(PhiR->getBackedgeValue(),
730 SinglePartNeeded ? State->UF - 1 : Part);
731 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
732 }
733 }
734
735 // We do not attempt to preserve DT for outer loop vectorization currently.
737 BasicBlock *VectorHeaderBB = State->CFG.VPBB2IRBB[Header];
738 State->DT->addNewBlock(VectorHeaderBB, VectorPreHeader);
739 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB,
740 State->CFG.ExitBB);
741 }
742}
743
744#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
746void VPlan::print(raw_ostream &O) const {
748
749 O << "VPlan '" << getName() << "' {";
750
751 bool AnyLiveIn = false;
752 if (VectorTripCount.getNumUsers() > 0) {
753 O << "\nLive-in ";
754 VectorTripCount.printAsOperand(O, SlotTracker);
755 O << " = vector-trip-count";
756 AnyLiveIn = true;
757 }
758
759 if (TripCount && TripCount->getNumUsers() > 0) {
760 O << "\nLive-in ";
761 TripCount->printAsOperand(O, SlotTracker);
762 O << " = original trip-count";
763 AnyLiveIn = true;
764 }
765
766 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
767 O << "\nLive-in ";
768 BackedgeTakenCount->printAsOperand(O, SlotTracker);
769 O << " = backedge-taken count";
770 AnyLiveIn = true;
771 }
772
773 if (AnyLiveIn)
774 O << "\n";
775
776 for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
777 O << '\n';
778 Block->print(O, "", SlotTracker);
779 }
780
781 if (!LiveOuts.empty())
782 O << "\n";
783 for (const auto &KV : LiveOuts) {
784 O << "Live-out ";
785 KV.second->getPhi()->printAsOperand(O);
786 O << " = ";
787 KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
788 O << "\n";
789 }
790
791 O << "}\n";
792}
793
794std::string VPlan::getName() const {
795 std::string Out;
796 raw_string_ostream RSO(Out);
797 RSO << Name << " for ";
798 if (!VFs.empty()) {
799 RSO << "VF={" << VFs[0];
800 for (ElementCount VF : drop_begin(VFs))
801 RSO << "," << VF;
802 RSO << "},";
803 }
804
805 if (UFs.empty()) {
806 RSO << "UF>=1";
807 } else {
808 RSO << "UF={" << UFs[0];
809 for (unsigned UF : drop_begin(UFs))
810 RSO << "," << UF;
811 RSO << "}";
812 }
813
814 return Out;
815}
816
819 VPlanPrinter Printer(O, *this);
820 Printer.dump();
821}
822
824void VPlan::dump() const { print(dbgs()); }
825#endif
826
828 assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists");
829 LiveOuts.insert({PN, new VPLiveOut(PN, V)});
830}
831
832void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
833 BasicBlock *LoopLatchBB,
834 BasicBlock *LoopExitBB) {
835 // The vector body may be more than a single basic-block by this point.
836 // Update the dominator tree information inside the vector body by propagating
837 // it from header to latch, expecting only triangular control-flow, if any.
838 BasicBlock *PostDomSucc = nullptr;
839 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
840 // Get the list of successors of this block.
841 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
842 assert(Succs.size() <= 2 &&
843 "Basic block in vector loop has more than 2 successors.");
844 PostDomSucc = Succs[0];
845 if (Succs.size() == 1) {
846 assert(PostDomSucc->getSinglePredecessor() &&
847 "PostDom successor has more than one predecessor.");
848 DT->addNewBlock(PostDomSucc, BB);
849 continue;
850 }
851 BasicBlock *InterimSucc = Succs[1];
852 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
853 PostDomSucc = Succs[1];
854 InterimSucc = Succs[0];
855 }
856 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
857 "One successor of a basic block does not lead to the other.");
858 assert(InterimSucc->getSinglePredecessor() &&
859 "Interim successor has more than one predecessor.");
860 assert(PostDomSucc->hasNPredecessors(2) &&
861 "PostDom successor has more than two predecessors.");
862 DT->addNewBlock(InterimSucc, BB);
863 DT->addNewBlock(PostDomSucc, BB);
864 }
865 // Latch block is a new dominator for the loop exit.
866 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
867 assert(DT->verify(DominatorTree::VerificationLevel::Fast));
868}
869
870#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871
872Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
873 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
874 Twine(getOrCreateBID(Block));
875}
876
877Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
878 const std::string &Name = Block->getName();
879 if (!Name.empty())
880 return Name;
881 return "VPB" + Twine(getOrCreateBID(Block));
882}
883
885 Depth = 1;
886 bumpIndent(0);
887 OS << "digraph VPlan {\n";
888 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
889 if (!Plan.getName().empty())
890 OS << "\\n" << DOT::EscapeString(Plan.getName());
891 if (Plan.BackedgeTakenCount) {
892 OS << ", where:\\n";
893 Plan.BackedgeTakenCount->print(OS, SlotTracker);
894 OS << " := BackedgeTakenCount";
895 }
896 OS << "\"]\n";
897 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
898 OS << "edge [fontname=Courier, fontsize=30]\n";
899 OS << "compound=true\n";
900
902 dumpBlock(Block);
903
904 OS << "}\n";
905}
906
907void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
908 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
909 dumpBasicBlock(BasicBlock);
910 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
911 dumpRegion(Region);
912 else
913 llvm_unreachable("Unsupported kind of VPBlock.");
914}
915
916void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
917 bool Hidden, const Twine &Label) {
918 // Due to "dot" we print an edge between two regions as an edge between the
919 // exiting basic block and the entry basic of the respective regions.
920 const VPBlockBase *Tail = From->getExitingBasicBlock();
921 const VPBlockBase *Head = To->getEntryBasicBlock();
922 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
923 OS << " [ label=\"" << Label << '\"';
924 if (Tail != From)
925 OS << " ltail=" << getUID(From);
926 if (Head != To)
927 OS << " lhead=" << getUID(To);
928 if (Hidden)
929 OS << "; splines=none";
930 OS << "]\n";
931}
932
933void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
934 auto &Successors = Block->getSuccessors();
935 if (Successors.size() == 1)
936 drawEdge(Block, Successors.front(), false, "");
937 else if (Successors.size() == 2) {
938 drawEdge(Block, Successors.front(), false, "T");
939 drawEdge(Block, Successors.back(), false, "F");
940 } else {
941 unsigned SuccessorNumber = 0;
942 for (auto *Successor : Successors)
943 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
944 }
945}
946
947void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
948 // Implement dot-formatted dump by performing plain-text dump into the
949 // temporary storage followed by some post-processing.
950 OS << Indent << getUID(BasicBlock) << " [label =\n";
951 bumpIndent(1);
952 std::string Str;
954 // Use no indentation as we need to wrap the lines into quotes ourselves.
955 BasicBlock->print(SS, "", SlotTracker);
956
957 // We need to process each line of the output separately, so split
958 // single-string plain-text dump.
960 StringRef(Str).rtrim('\n').split(Lines, "\n");
961
962 auto EmitLine = [&](StringRef Line, StringRef Suffix) {
963 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
964 };
965
966 // Don't need the "+" after the last line.
967 for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
968 EmitLine(Line, " +\n");
969 EmitLine(Lines.back(), "\n");
970
971 bumpIndent(-1);
972 OS << Indent << "]\n";
973
975}
976
977void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
978 OS << Indent << "subgraph " << getUID(Region) << " {\n";
979 bumpIndent(1);
980 OS << Indent << "fontname=Courier\n"
981 << Indent << "label=\""
982 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
983 << DOT::EscapeString(Region->getName()) << "\"\n";
984 // Dump the blocks of the region.
985 assert(Region->getEntry() && "Region contains no inner blocks.");
987 dumpBlock(Block);
988 bumpIndent(-1);
989 OS << Indent << "}\n";
991}
992
994 if (auto *Inst = dyn_cast<Instruction>(V)) {
995 if (!Inst->getType()->isVoidTy()) {
996 Inst->printAsOperand(O, false);
997 O << " = ";
998 }
999 O << Inst->getOpcodeName() << " ";
1000 unsigned E = Inst->getNumOperands();
1001 if (E > 0) {
1002 Inst->getOperand(0)->printAsOperand(O, false);
1003 for (unsigned I = 1; I < E; ++I)
1004 Inst->getOperand(I)->printAsOperand(O << ", ", false);
1005 }
1006 } else // !Inst
1007 V->printAsOperand(O, false);
1008}
1009
1010#endif
1011
1012template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1013
1015 for (unsigned J = 0; J < getNumUsers();) {
1016 VPUser *User = Users[J];
1017 unsigned NumUsers = getNumUsers();
1018 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
1019 if (User->getOperand(I) == this)
1020 User->setOperand(I, New);
1021 // If a user got removed after updating the current user, the next user to
1022 // update will be moved to the current position, so we only need to
1023 // increment the index if the number of users did not change.
1024 if (NumUsers == getNumUsers())
1025 J++;
1026 }
1027}
1028
1029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1031 if (const Value *UV = getUnderlyingValue()) {
1032 OS << "ir<";
1033 UV->printAsOperand(OS, false);
1034 OS << ">";
1035 return;
1036 }
1037
1038 unsigned Slot = Tracker.getSlot(this);
1039 if (Slot == unsigned(-1))
1040 OS << "<badref>";
1041 else
1042 OS << "vp<%" << Tracker.getSlot(this) << ">";
1043}
1044
1046 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
1047 Op->printAsOperand(O, SlotTracker);
1048 });
1049}
1050#endif
1051
1052void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
1053 Old2NewTy &Old2New,
1054 InterleavedAccessInfo &IAI) {
1056 RPOT(Region->getEntry());
1057 for (VPBlockBase *Base : RPOT) {
1058 visitBlock(Base, Old2New, IAI);
1059 }
1060}
1061
1062void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
1063 InterleavedAccessInfo &IAI) {
1064 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
1065 for (VPRecipeBase &VPI : *VPBB) {
1066 if (isa<VPHeaderPHIRecipe>(&VPI))
1067 continue;
1068 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
1069 auto *VPInst = cast<VPInstruction>(&VPI);
1070
1071 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
1072 if (!Inst)
1073 continue;
1074 auto *IG = IAI.getInterleaveGroup(Inst);
1075 if (!IG)
1076 continue;
1077
1078 auto NewIGIter = Old2New.find(IG);
1079 if (NewIGIter == Old2New.end())
1080 Old2New[IG] = new InterleaveGroup<VPInstruction>(
1081 IG->getFactor(), IG->isReverse(), IG->getAlign());
1082
1083 if (Inst == IG->getInsertPos())
1084 Old2New[IG]->setInsertPos(VPInst);
1085
1086 InterleaveGroupMap[VPInst] = Old2New[IG];
1087 InterleaveGroupMap[VPInst]->insertMember(
1088 VPInst, IG->getIndex(Inst),
1089 Align(IG->isReverse() ? (-1) * int(IG->getFactor())
1090 : IG->getFactor()));
1091 }
1092 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
1093 visitRegion(Region, Old2New, IAI);
1094 else
1095 llvm_unreachable("Unsupported kind of VPBlock.");
1096}
1097
1099 InterleavedAccessInfo &IAI) {
1100 Old2NewTy Old2New;
1101 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
1102}
1103
1104void VPSlotTracker::assignSlot(const VPValue *V) {
1105 assert(!Slots.contains(V) && "VPValue already has a slot!");
1106 Slots[V] = NextSlot++;
1107}
1108
1109void VPSlotTracker::assignSlots(const VPlan &Plan) {
1110
1111 for (const auto &P : Plan.VPExternalDefs)
1112 assignSlot(P.second);
1113
1114 assignSlot(&Plan.VectorTripCount);
1115 if (Plan.BackedgeTakenCount)
1116 assignSlot(Plan.BackedgeTakenCount);
1117 if (Plan.TripCount)
1118 assignSlot(Plan.TripCount);
1119
1122 for (const VPBasicBlock *VPBB :
1123 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
1124 for (const VPRecipeBase &Recipe : *VPBB)
1125 for (VPValue *Def : Recipe.definedValues())
1126 assignSlot(Def);
1127}
1128
1130 return all_of(Def->users(),
1131 [Def](VPUser *U) { return U->onlyFirstLaneUsed(Def); });
1132}
1133
1135 ScalarEvolution &SE) {
1136 if (auto *E = dyn_cast<SCEVConstant>(Expr))
1137 return Plan.getOrAddExternalDef(E->getValue());
1138 if (auto *E = dyn_cast<SCEVUnknown>(Expr))
1139 return Plan.getOrAddExternalDef(E->getValue());
1140
1141 VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
1142 VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE);
1143 Preheader->appendRecipe(Step);
1144 return Step;
1145}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
assume Assume Builder
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
std::string Name
static void dumpEdges(CFGMST< Edge, BBInfo > &MST, GCOVFunction &GF)
Generic dominator tree construction - this file provides routines to construct immediate dominator in...
Hexagon Common GEP
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define I(x, y, z)
Definition: MD5.cpp:58
Memory true print Memory SSA Printer
Definition: MemorySSA.cpp:78
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallVector class.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
static T * getPlanEntry(T *Start)
Definition: VPlan.cpp:126
static bool hasConditionalTerminator(const VPBasicBlock *VPBB)
Definition: VPlan.cpp:437
This file contains the declarations of the Vectorization Plan base classes:
static bool IsCondBranch(unsigned BrOpc)
static const uint32_t IV[8]
Definition: blake3_impl.h:77
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:325
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the basic block to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4604
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
Definition: BasicBlock.cpp:315
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:293
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:323
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
Debug location.
std::optional< const DILocation * > cloneByMultiplyingDuplicationFactor(unsigned DF) const
Returns a new DILocation with duplication factor DF * current duplication factor encoded in the discr...
A debug info location.
Definition: DebugLoc.h:33
Core dominator tree base class.
bool verify(VerificationLevel VL=VerificationLevel::Full) const
verify - checks if the tree is correct.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
bool shouldEmitDebugInfoForProfiling() const
Returns true if we should emit debug info for profiling.
Definition: Metadata.cpp:1629
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2355
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1192
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.cpp:1257
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1273
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:639
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:781
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VectorUtils.h:826
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase< BlockT, LoopT > &LI)
This method is used by other analyses to update loop information.
Definition: LoopInfoImpl.h:258
void addChildLoop(LoopT *NewChild)
Add the specified loop to be a child of this loop.
Definition: LoopInfo.h:412
LoopT * AllocateLoop(ArgsTy &&... Args)
Definition: LoopInfo.h:956
void addTopLevelLoop(LoopT *New)
This adds the specified loop to the collection of top-level loops.
Definition: LoopInfo.h:1049
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:992
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
void setIncomingBlock(unsigned i, BasicBlock *BB)
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:322
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:88
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:678
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:687
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition: StringRef.h:790
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This function has undefined behavior.
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:1820
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1956
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2024
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:1977
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:325
iterator end()
Definition: VPlan.h:1987
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:1985
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:207
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:427
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:392
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:402
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:495
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:478
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:466
const VPRecipeBase & back() const
Definition: VPlan.h:1999
bool empty() const
Definition: VPlan.h:1996
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:390
VPRegionBlock * getParent()
Definition: VPlan.h:462
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:172
size_t getNumSuccessors() const
Definition: VPlan.h:507
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:483
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:194
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:202
VPlan * getPlan()
Definition: VPlan.cpp:146
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:165
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:533
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:527
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:186
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:151
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:497
Helper for GraphTraits specialization that traverses through VPRegionBlocks.
Definition: VPlanCFG.h:114
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:2499
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2546
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:2535
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:303
void dump() const
Dump the VPDef to stderr (for debugging).
Definition: VPlan.cpp:107
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPDef prints itself.
Recipe to expand a SCEV expression.
Definition: VPlan.h:1746
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:779
VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI)
Definition: VPlan.cpp:1098
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:113
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:66
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
A value that is used outside the VPlan.
Definition: VPlan.h:635
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:666
VPBasicBlock * getParent()
Definition: VPlan.h:683
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2080
const VPBlockBase * getEntry() const
Definition: VPlan.h:2119
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:509
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:568
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:516
const VPBlockBase * getExiting() const
Definition: VPlan.h:2131
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:2144
This class can be used to assign consecutive numbers to all VPValues in a VPlan and allows querying t...
Definition: VPlanValue.h:434
unsigned getSlot(const VPValue *V) const
Definition: VPlanValue.h:447
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:201
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition: VPlan.cpp:1045
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:116
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:1030
void dump() const
Dump the value to stderr (for debugging).
Definition: VPlan.cpp:99
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition: VPlan.cpp:79
virtual ~VPValue()
Definition: VPlan.cpp:85
void print(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition: VPlan.cpp:92
void replaceAllUsesWith(VPValue *New)
Definition: VPlan.cpp:1014
VPDef * Def
Pointer to the VPDef that defines this VPValue.
Definition: VPlanValue.h:65
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:2419
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:884
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2177
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:818
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:794
VPBlockBase * getEntry()
Definition: VPlan.h:2240
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:827
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:611
VPValue * getOrAddExternalDef(Value *V)
Get the existing or add a new external definition for V.
Definition: VPlan.h:2296
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:2370
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:824
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:665
VPActiveLaneMaskPHIRecipe * getActiveLaneMaskPhi()
Find and return the VPActiveLaneMaskPHIRecipe from the header - there be only one at most.
Definition: VPlan.cpp:602
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:746
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
std::string EscapeString(const std::string &Label)
Definition: GraphWriter.cpp:55
@ SS
Definition: X86.h:209
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1134
bool onlyFirstLaneUsed(VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1129
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
auto successors(const MachineBasicBlock *BB)
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition: STLExtras.h:2213
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition: VPlanCFG.h:213
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
cl::opt< bool > EnableFSDiscriminator
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
cl::opt< bool > EnableVPlanNativePath("enable-vplan-native-path", cl::Hidden, cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization."))
Definition: VPlan.cpp:51
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1298
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:189
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:330
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:336
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:344
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:332
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:340
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:235
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:231
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:228
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:206
VPValue2ValueTy VPValue2Value
Definition: VPlan.h:362
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:354
void addMetadata(Instruction *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:248
Value * get(VPValue *Def, unsigned Part)
Get the generated Value for a given VPValue and a given Part.
struct llvm::VPTransformState::DataState Data
void setDebugLocFromInst(const Value *V)
Set the debug location in the builder using the debug location in V.
Definition: VPlan.cpp:260
struct llvm::VPTransformState::CFGState CFG
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:240
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:220
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:360
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:357
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:254
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:371
std::unique_ptr< LoopVersioning > LVer
LoopVersioning.
Definition: VPlan.h:385
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:244
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:214
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, unsigned Part)
Set the generated Value for a given VPValue and a given Part.
Definition: VPlan.h:265
void print(raw_ostream &O) const
Definition: VPlan.cpp:993