LLVM  15.0.0git
VPlanHCFGBuilder.cpp
Go to the documentation of this file.
1 //===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the construction of a VPlan-based Hierarchical CFG
11 /// (H-CFG) for an incoming IR. This construction comprises the following
12 /// components and steps:
13 //
14 /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
15 /// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
16 /// Region) is created to enclose and serve as parent of all the VPBasicBlocks
17 /// in the plain CFG.
18 /// NOTE: At this point, there is a direct correspondence between all the
19 /// VPBasicBlocks created for the initial plain CFG and the incoming
20 /// BasicBlocks. However, this might change in the future.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "VPlanHCFGBuilder.h"
27 
28 #define DEBUG_TYPE "loop-vectorize"
29 
30 using namespace llvm;
31 
32 namespace {
33 // Class that is used to build the plain CFG for the incoming IR.
34 class PlainCFGBuilder {
35 private:
36  // The outermost loop of the input loop nest considered for vectorization.
37  Loop *TheLoop;
38 
39  // Loop Info analysis.
40  LoopInfo *LI;
41 
42  // Vectorization plan that we are working on.
43  VPlan &Plan;
44 
45  // Output Top Region.
46  VPRegionBlock *TopRegion = nullptr;
47 
48  // Builder of the VPlan instruction-level representation.
49  VPBuilder VPIRBuilder;
50 
51  // NOTE: The following maps are intentionally destroyed after the plain CFG
52  // construction because subsequent VPlan-to-VPlan transformation may
53  // invalidate them.
54  // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
56  // Map incoming Value definitions to their newly-created VPValues.
57  DenseMap<Value *, VPValue *> IRDef2VPValue;
58 
59  // Hold phi node's that need to be fixed once the plain CFG has been built.
60  SmallVector<PHINode *, 8> PhisToFix;
61 
62  // Utility functions.
63  void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
64  void fixPhiNodes();
65  VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
66 #ifndef NDEBUG
67  bool isExternalDef(Value *Val);
68 #endif
69  VPValue *getOrCreateVPOperand(Value *IRVal);
70  void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
71 
72 public:
73  PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
74  : TheLoop(Lp), LI(LI), Plan(P) {}
75 
76  // Build the plain CFG and return its Top Region.
77  VPRegionBlock *buildPlainCFG();
78 };
79 } // anonymous namespace
80 
81 // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
82 // must have no predecessors.
83 void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
85  // Collect VPBB predecessors.
86  for (BasicBlock *Pred : predecessors(BB))
87  VPBBPreds.push_back(getOrCreateVPBB(Pred));
88 
89  VPBB->setPredecessors(VPBBPreds);
90 }
91 
92 // Add operands to VPInstructions representing phi nodes from the input IR.
93 void PlainCFGBuilder::fixPhiNodes() {
94  for (auto *Phi : PhisToFix) {
95  assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
96  VPValue *VPVal = IRDef2VPValue[Phi];
97  assert(isa<VPWidenPHIRecipe>(VPVal) &&
98  "Expected WidenPHIRecipe for phi node.");
99  auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
100  assert(VPPhi->getNumOperands() == 0 &&
101  "Expected VPInstruction with no operands.");
102 
103  for (unsigned I = 0; I != Phi->getNumOperands(); ++I)
104  VPPhi->addIncoming(getOrCreateVPOperand(Phi->getIncomingValue(I)),
105  BB2VPBB[Phi->getIncomingBlock(I)]);
106  }
107 }
108 
109 // Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
110 // existing one if it was already created.
111 VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
112  auto BlockIt = BB2VPBB.find(BB);
113  if (BlockIt != BB2VPBB.end())
114  // Retrieve existing VPBB.
115  return BlockIt->second;
116 
117  // Create new VPBB.
118  LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n");
119  VPBasicBlock *VPBB = new VPBasicBlock(BB->getName());
120  BB2VPBB[BB] = VPBB;
121  VPBB->setParent(TopRegion);
122  return VPBB;
123 }
124 
125 #ifndef NDEBUG
126 // Return true if \p Val is considered an external definition. An external
127 // definition is either:
128 // 1. A Value that is not an Instruction. This will be refined in the future.
129 // 2. An Instruction that is outside of the CFG snippet represented in VPlan,
130 // i.e., is not part of: a) the loop nest, b) outermost loop PH and, c)
131 // outermost loop exits.
132 bool PlainCFGBuilder::isExternalDef(Value *Val) {
133  // All the Values that are not Instructions are considered external
134  // definitions for now.
135  Instruction *Inst = dyn_cast<Instruction>(Val);
136  if (!Inst)
137  return true;
138 
139  BasicBlock *InstParent = Inst->getParent();
140  assert(InstParent && "Expected instruction parent.");
141 
142  // Check whether Instruction definition is in loop PH.
143  BasicBlock *PH = TheLoop->getLoopPreheader();
144  assert(PH && "Expected loop pre-header.");
145 
146  if (InstParent == PH)
147  // Instruction definition is in outermost loop PH.
148  return false;
149 
150  // Check whether Instruction definition is in the loop exit.
151  BasicBlock *Exit = TheLoop->getUniqueExitBlock();
152  assert(Exit && "Expected loop with single exit.");
153  if (InstParent == Exit) {
154  // Instruction definition is in outermost loop exit.
155  return false;
156  }
157 
158  // Check whether Instruction definition is in loop body.
159  return !TheLoop->contains(Inst);
160 }
161 #endif
162 
163 // Create a new VPValue or retrieve an existing one for the Instruction's
164 // operand \p IRVal. This function must only be used to create/retrieve VPValues
165 // for *Instruction's operands* and not to create regular VPInstruction's. For
166 // the latter, please, look at 'createVPInstructionsForVPBB'.
167 VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
168  auto VPValIt = IRDef2VPValue.find(IRVal);
169  if (VPValIt != IRDef2VPValue.end())
170  // Operand has an associated VPInstruction or VPValue that was previously
171  // created.
172  return VPValIt->second;
173 
174  // Operand doesn't have a previously created VPInstruction/VPValue. This
175  // means that operand is:
176  // A) a definition external to VPlan,
177  // B) any other Value without specific representation in VPlan.
178  // For now, we use VPValue to represent A and B and classify both as external
179  // definitions. We may introduce specific VPValue subclasses for them in the
180  // future.
181  assert(isExternalDef(IRVal) && "Expected external definition as operand.");
182 
183  // A and B: Create VPValue and add it to the pool of external definitions and
184  // to the Value->VPValue map.
185  VPValue *NewVPVal = Plan.getOrAddExternalDef(IRVal);
186  IRDef2VPValue[IRVal] = NewVPVal;
187  return NewVPVal;
188 }
189 
190 // Create new VPInstructions in a VPBasicBlock, given its BasicBlock
191 // counterpart. This function must be invoked in RPO so that the operands of a
192 // VPInstruction in \p BB have been visited before (except for Phi nodes).
193 void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
194  BasicBlock *BB) {
195  VPIRBuilder.setInsertPoint(VPBB);
196  for (Instruction &InstRef : *BB) {
197  Instruction *Inst = &InstRef;
198 
199  // There shouldn't be any VPValue for Inst at this point. Otherwise, we
200  // visited Inst when we shouldn't, breaking the RPO traversal order.
201  assert(!IRDef2VPValue.count(Inst) &&
202  "Instruction shouldn't have been visited.");
203 
204  if (auto *Br = dyn_cast<BranchInst>(Inst)) {
205  // Branch instruction is not explicitly represented in VPlan but we need
206  // to represent its condition bit when it's conditional.
207  if (Br->isConditional())
208  getOrCreateVPOperand(Br->getCondition());
209 
210  // Skip the rest of the Instruction processing for Branch instructions.
211  continue;
212  }
213 
214  VPValue *NewVPV;
215  if (auto *Phi = dyn_cast<PHINode>(Inst)) {
216  // Phi node's operands may have not been visited at this point. We create
217  // an empty VPInstruction that we will fix once the whole plain CFG has
218  // been built.
219  NewVPV = new VPWidenPHIRecipe(Phi);
220  VPBB->appendRecipe(cast<VPWidenPHIRecipe>(NewVPV));
221  PhisToFix.push_back(Phi);
222  } else {
223  // Translate LLVM-IR operands into VPValue operands and set them in the
224  // new VPInstruction.
225  SmallVector<VPValue *, 4> VPOperands;
226  for (Value *Op : Inst->operands())
227  VPOperands.push_back(getOrCreateVPOperand(Op));
228 
229  // Build VPInstruction for any arbitraty Instruction without specific
230  // representation in VPlan.
231  NewVPV = cast<VPInstruction>(
232  VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
233  }
234 
235  IRDef2VPValue[Inst] = NewVPV;
236  }
237 }
238 
239 // Main interface to build the plain CFG.
240 VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
241  // 1. Create the Top Region. It will be the parent of all VPBBs.
242  TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/);
243 
244  // 2. Scan the body of the loop in a topological order to visit each basic
245  // block after having visited its predecessor basic blocks. Create a VPBB for
246  // each BB and link it to its successor and predecessor VPBBs. Note that
247  // predecessors must be set in the same order as they are in the incomming IR.
248  // Otherwise, there might be problems with existing phi nodes and algorithm
249  // based on predecessors traversal.
250 
251  // Loop PH needs to be explicitly visited since it's not taken into account by
252  // LoopBlocksDFS.
253  BasicBlock *PreheaderBB = TheLoop->getLoopPreheader();
254  assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
255  "Unexpected loop preheader");
256  VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB);
257  for (auto &I : *PreheaderBB) {
258  if (I.getType()->isVoidTy())
259  continue;
260  IRDef2VPValue[&I] = Plan.getOrAddExternalDef(&I);
261  }
262  // Create empty VPBB for Loop H so that we can link PH->H.
263  VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
264  HeaderVPBB->setName("vector.body");
265  // Preheader's predecessors will be set during the loop RPO traversal below.
266  PreheaderVPBB->setOneSuccessor(HeaderVPBB);
267 
268  LoopBlocksRPO RPO(TheLoop);
269  RPO.perform(LI);
270 
271  for (BasicBlock *BB : RPO) {
272  // Create or retrieve the VPBasicBlock for this BB and create its
273  // VPInstructions.
274  VPBasicBlock *VPBB = getOrCreateVPBB(BB);
275  createVPInstructionsForVPBB(VPBB, BB);
276 
277  // Set VPBB successors. We create empty VPBBs for successors if they don't
278  // exist already. Recipes will be created when the successor is visited
279  // during the RPO traversal.
280  Instruction *TI = BB->getTerminator();
281  assert(TI && "Terminator expected.");
282  unsigned NumSuccs = TI->getNumSuccessors();
283 
284  if (NumSuccs == 1) {
285  VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0));
286  assert(SuccVPBB && "VPBB Successor not found.");
287  VPBB->setOneSuccessor(SuccVPBB);
288  } else if (NumSuccs == 2) {
289  VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0));
290  assert(SuccVPBB0 && "Successor 0 not found.");
291  VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1));
292  assert(SuccVPBB1 && "Successor 1 not found.");
293 
294  // Get VPBB's condition bit.
295  assert(isa<BranchInst>(TI) && "Unsupported terminator!");
296  auto *Br = cast<BranchInst>(TI);
297  Value *BrCond = Br->getCondition();
298  // Look up the branch condition to get the corresponding VPValue
299  // representing the condition bit in VPlan (which may be in another VPBB).
300  assert(IRDef2VPValue.count(BrCond) &&
301  "Missing condition bit in IRDef2VPValue!");
302  VPValue *VPCondBit = IRDef2VPValue[BrCond];
303 
304  // Link successors using condition bit.
305  VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1, VPCondBit);
306  } else
307  llvm_unreachable("Number of successors not supported.");
308 
309  // Set VPBB predecessors in the same order as they are in the incoming BB.
310  setVPBBPredsFromBB(VPBB, BB);
311  }
312 
313  // 3. Process outermost loop exit. We created an empty VPBB for the loop
314  // single exit BB during the RPO traversal of the loop body but Instructions
315  // weren't visited because it's not part of the the loop.
316  BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock();
317  assert(LoopExitBB && "Loops with multiple exits are not supported.");
318  VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB];
319  createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB);
320  // Loop exit was already set as successor of the loop exiting BB.
321  // We only set its predecessor VPBB now.
322  setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB);
323 
324  // 4. The whole CFG has been built at this point so all the input Values must
325  // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding
326  // VPlan operands.
327  fixPhiNodes();
328 
329  // 5. Final Top Region setup. Set outermost loop pre-header and single exit as
330  // Top Region entry and exit.
331  TopRegion->setEntry(PreheaderVPBB);
332  TopRegion->setExit(LoopExitVPBB);
333  return TopRegion;
334 }
335 
336 VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
337  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
338  return PCFGBuilder.buildPlainCFG();
339 }
340 
341 // Public interface to build a H-CFG.
343  // Build Top Region enclosing the plain CFG and set it as VPlan entry.
344  VPRegionBlock *TopRegion = buildPlainCFG();
345  Plan.setEntry(TopRegion);
346  LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
347 
348  Verifier.verifyHierarchicalCFG(TopRegion);
349 
350  // Compute plain CFG dom tree for VPLInfo.
351  VPDomTree.recalculate(*TopRegion);
352  LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
353  VPDomTree.print(dbgs()));
354 
355  // Compute VPLInfo and keep it in Plan.
356  VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
357  VPLInfo.analyze(VPDomTree);
358  LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
359  VPLInfo.print(dbgs()));
360 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::DominatorTreeBase::print
void print(raw_ostream &O) const
print - Convert to human readable form
Definition: GenericDomTree.h:709
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2002
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::VPWidenPHIRecipe
A recipe for handling header phis that are widened in the vector loop.
Definition: VPlan.h:1261
VPlanHCFGBuilder.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
RPO
rpo function Deduce function attributes in RPO
Definition: FunctionAttrs.cpp:1992
llvm::LoopInfoBase::print
void print(raw_ostream &OS) const
Definition: LoopInfoImpl.h:620
llvm::VPlanHCFGBuilder::buildHierarchicalCFG
void buildHierarchicalCFG()
Build H-CFG for TheLoop and update Plan accordingly.
Definition: VPlanHCFGBuilder.cpp:342
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:394
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:777
llvm::VPBasicBlock::appendRecipe
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2070
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::LoopInfoBase::analyze
void analyze(const DominatorTreeBase< BlockT, false > &DomTree)
Create the loop forest using a stable algorithm.
Definition: LoopInfoImpl.h:547
llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition: LoopVectorizationPlanner.h:44
llvm::VPBlockBase::setPredecessors
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:605
llvm::Instruction
Definition: Instruction.h:42
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::VPValue
Definition: VPlanValue.h:44
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:789
LoopIterator.h
llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2118
llvm::VPlan::setName
void setName(const Twine &newName)
Definition: VPlan.h:2587
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
LoopVectorizationPlanner.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:778
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::VPBlockBase::setOneSuccessor
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:584
llvm::VPBlockBase::setParent
void setParent(VPRegionBlock *P)
Definition: VPlan.h:487
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
llvm::LoopInfoBase< VPBlockBase, VPLoop >
llvm::VPlan::setEntry
VPBlockBase * setEntry(VPBlockBase *Block)
Definition: VPlan.h:2554
llvm::VPBlockBase::setName
void setName(const Twine &newName)
Definition: VPlan.h:469
llvm::VPlan::getVPLoopInfo
VPLoopInfo & getVPLoopInfo()
Return the VPLoopInfo analysis for this VPlan.
Definition: VPlan.h:2643
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
llvm::VPlanVerifier::verifyHierarchicalCFG
void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const
Verify the invariants of the H-CFG starting from TopRegion.
Definition: VPlanVerifier.cpp:122
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2472
llvm::VPBlockBase::setTwoSuccessors
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPValue *Condition)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:593
llvm::Value
LLVM Value Representation.
Definition: Value.h:74