LLVM  14.0.0git
VPlanHCFGBuilder.cpp
Go to the documentation of this file.
1 //===-- VPlanHCFGBuilder.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the construction of a VPlan-based Hierarchical CFG
11 /// (H-CFG) for an incoming IR. This construction comprises the following
12 /// components and steps:
13 //
14 /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that
15 /// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top
16 /// Region) is created to enclose and serve as parent of all the VPBasicBlocks
17 /// in the plain CFG.
18 /// NOTE: At this point, there is a direct correspondence between all the
19 /// VPBasicBlocks created for the initial plain CFG and the incoming
20 /// BasicBlocks. However, this might change in the future.
21 ///
22 //===----------------------------------------------------------------------===//
23 
24 #include "VPlanHCFGBuilder.h"
27 
28 #define DEBUG_TYPE "loop-vectorize"
29 
30 using namespace llvm;
31 
32 namespace {
33 // Class that is used to build the plain CFG for the incoming IR.
34 class PlainCFGBuilder {
35 private:
36  // The outermost loop of the input loop nest considered for vectorization.
37  Loop *TheLoop;
38 
39  // Loop Info analysis.
40  LoopInfo *LI;
41 
42  // Vectorization plan that we are working on.
43  VPlan &Plan;
44 
45  // Output Top Region.
46  VPRegionBlock *TopRegion = nullptr;
47 
48  // Builder of the VPlan instruction-level representation.
49  VPBuilder VPIRBuilder;
50 
51  // NOTE: The following maps are intentionally destroyed after the plain CFG
52  // construction because subsequent VPlan-to-VPlan transformation may
53  // invalidate them.
54  // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
56  // Map incoming Value definitions to their newly-created VPValues.
57  DenseMap<Value *, VPValue *> IRDef2VPValue;
58 
59  // Hold phi node's that need to be fixed once the plain CFG has been built.
60  SmallVector<PHINode *, 8> PhisToFix;
61 
62  // Utility functions.
63  void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
64  void fixPhiNodes();
65  VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
66 #ifndef NDEBUG
67  bool isExternalDef(Value *Val);
68 #endif
69  VPValue *getOrCreateVPOperand(Value *IRVal);
70  void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
71 
72 public:
73  PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
74  : TheLoop(Lp), LI(LI), Plan(P) {}
75 
76  // Build the plain CFG and return its Top Region.
77  VPRegionBlock *buildPlainCFG();
78 };
79 } // anonymous namespace
80 
81 // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
82 // must have no predecessors.
83 void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
85  // Collect VPBB predecessors.
86  for (BasicBlock *Pred : predecessors(BB))
87  VPBBPreds.push_back(getOrCreateVPBB(Pred));
88 
89  VPBB->setPredecessors(VPBBPreds);
90 }
91 
92 // Add operands to VPInstructions representing phi nodes from the input IR.
93 void PlainCFGBuilder::fixPhiNodes() {
94  for (auto *Phi : PhisToFix) {
95  assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
96  VPValue *VPVal = IRDef2VPValue[Phi];
97  assert(isa<VPWidenPHIRecipe>(VPVal) &&
98  "Expected WidenPHIRecipe for phi node.");
99  auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal);
100  assert(VPPhi->getNumOperands() == 0 &&
101  "Expected VPInstruction with no operands.");
102 
103  for (unsigned I = 0; I != Phi->getNumOperands(); ++I)
104  VPPhi->addIncoming(getOrCreateVPOperand(Phi->getIncomingValue(I)),
105  BB2VPBB[Phi->getIncomingBlock(I)]);
106  }
107 }
108 
109 // Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
110 // existing one if it was already created.
111 VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
112  auto BlockIt = BB2VPBB.find(BB);
113  if (BlockIt != BB2VPBB.end())
114  // Retrieve existing VPBB.
115  return BlockIt->second;
116 
117  // Create new VPBB.
118  LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n");
119  VPBasicBlock *VPBB = new VPBasicBlock(BB->getName());
120  BB2VPBB[BB] = VPBB;
121  VPBB->setParent(TopRegion);
122  return VPBB;
123 }
124 
125 #ifndef NDEBUG
126 // Return true if \p Val is considered an external definition. An external
127 // definition is either:
128 // 1. A Value that is not an Instruction. This will be refined in the future.
129 // 2. An Instruction that is outside of the CFG snippet represented in VPlan,
130 // i.e., is not part of: a) the loop nest, b) outermost loop PH and, c)
131 // outermost loop exits.
132 bool PlainCFGBuilder::isExternalDef(Value *Val) {
133  // All the Values that are not Instructions are considered external
134  // definitions for now.
135  Instruction *Inst = dyn_cast<Instruction>(Val);
136  if (!Inst)
137  return true;
138 
139  BasicBlock *InstParent = Inst->getParent();
140  assert(InstParent && "Expected instruction parent.");
141 
142  // Check whether Instruction definition is in loop PH.
143  BasicBlock *PH = TheLoop->getLoopPreheader();
144  assert(PH && "Expected loop pre-header.");
145 
146  if (InstParent == PH)
147  // Instruction definition is in outermost loop PH.
148  return false;
149 
150  // Check whether Instruction definition is in the loop exit.
151  BasicBlock *Exit = TheLoop->getUniqueExitBlock();
152  assert(Exit && "Expected loop with single exit.");
153  if (InstParent == Exit) {
154  // Instruction definition is in outermost loop exit.
155  return false;
156  }
157 
158  // Check whether Instruction definition is in loop body.
159  return !TheLoop->contains(Inst);
160 }
161 #endif
162 
163 // Create a new VPValue or retrieve an existing one for the Instruction's
164 // operand \p IRVal. This function must only be used to create/retrieve VPValues
165 // for *Instruction's operands* and not to create regular VPInstruction's. For
166 // the latter, please, look at 'createVPInstructionsForVPBB'.
167 VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
168  auto VPValIt = IRDef2VPValue.find(IRVal);
169  if (VPValIt != IRDef2VPValue.end())
170  // Operand has an associated VPInstruction or VPValue that was previously
171  // created.
172  return VPValIt->second;
173 
174  // Operand doesn't have a previously created VPInstruction/VPValue. This
175  // means that operand is:
176  // A) a definition external to VPlan,
177  // B) any other Value without specific representation in VPlan.
178  // For now, we use VPValue to represent A and B and classify both as external
179  // definitions. We may introduce specific VPValue subclasses for them in the
180  // future.
181  assert(isExternalDef(IRVal) && "Expected external definition as operand.");
182 
183  // A and B: Create VPValue and add it to the pool of external definitions and
184  // to the Value->VPValue map.
185  VPValue *NewVPVal = new VPValue(IRVal);
186  Plan.addExternalDef(NewVPVal);
187  IRDef2VPValue[IRVal] = NewVPVal;
188  return NewVPVal;
189 }
190 
191 // Create new VPInstructions in a VPBasicBlock, given its BasicBlock
192 // counterpart. This function must be invoked in RPO so that the operands of a
193 // VPInstruction in \p BB have been visited before (except for Phi nodes).
194 void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
195  BasicBlock *BB) {
196  VPIRBuilder.setInsertPoint(VPBB);
197  for (Instruction &InstRef : *BB) {
198  Instruction *Inst = &InstRef;
199 
200  // There shouldn't be any VPValue for Inst at this point. Otherwise, we
201  // visited Inst when we shouldn't, breaking the RPO traversal order.
202  assert(!IRDef2VPValue.count(Inst) &&
203  "Instruction shouldn't have been visited.");
204 
205  if (auto *Br = dyn_cast<BranchInst>(Inst)) {
206  // Branch instruction is not explicitly represented in VPlan but we need
207  // to represent its condition bit when it's conditional.
208  if (Br->isConditional())
209  getOrCreateVPOperand(Br->getCondition());
210 
211  // Skip the rest of the Instruction processing for Branch instructions.
212  continue;
213  }
214 
215  VPValue *NewVPV;
216  if (auto *Phi = dyn_cast<PHINode>(Inst)) {
217  // Phi node's operands may have not been visited at this point. We create
218  // an empty VPInstruction that we will fix once the whole plain CFG has
219  // been built.
220  NewVPV = new VPWidenPHIRecipe(Phi);
221  VPBB->appendRecipe(cast<VPWidenPHIRecipe>(NewVPV));
222  PhisToFix.push_back(Phi);
223  } else {
224  // Translate LLVM-IR operands into VPValue operands and set them in the
225  // new VPInstruction.
226  SmallVector<VPValue *, 4> VPOperands;
227  for (Value *Op : Inst->operands())
228  VPOperands.push_back(getOrCreateVPOperand(Op));
229 
230  // Build VPInstruction for any arbitraty Instruction without specific
231  // representation in VPlan.
232  NewVPV = cast<VPInstruction>(
233  VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
234  }
235 
236  IRDef2VPValue[Inst] = NewVPV;
237  }
238 }
239 
240 // Main interface to build the plain CFG.
241 VPRegionBlock *PlainCFGBuilder::buildPlainCFG() {
242  // 1. Create the Top Region. It will be the parent of all VPBBs.
243  TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/);
244 
245  // 2. Scan the body of the loop in a topological order to visit each basic
246  // block after having visited its predecessor basic blocks. Create a VPBB for
247  // each BB and link it to its successor and predecessor VPBBs. Note that
248  // predecessors must be set in the same order as they are in the incomming IR.
249  // Otherwise, there might be problems with existing phi nodes and algorithm
250  // based on predecessors traversal.
251 
252  // Loop PH needs to be explicitly visited since it's not taken into account by
253  // LoopBlocksDFS.
254  BasicBlock *PreheaderBB = TheLoop->getLoopPreheader();
255  assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
256  "Unexpected loop preheader");
257  VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB);
258  for (auto &I : *PreheaderBB) {
259  if (I.getType()->isVoidTy())
260  continue;
261  VPValue *VPV = new VPValue(&I);
262  Plan.addExternalDef(VPV);
263  IRDef2VPValue[&I] = VPV;
264  }
265  // Create empty VPBB for Loop H so that we can link PH->H.
266  VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
267  // Preheader's predecessors will be set during the loop RPO traversal below.
268  PreheaderVPBB->setOneSuccessor(HeaderVPBB);
269 
270  LoopBlocksRPO RPO(TheLoop);
271  RPO.perform(LI);
272 
273  for (BasicBlock *BB : RPO) {
274  // Create or retrieve the VPBasicBlock for this BB and create its
275  // VPInstructions.
276  VPBasicBlock *VPBB = getOrCreateVPBB(BB);
277  createVPInstructionsForVPBB(VPBB, BB);
278 
279  // Set VPBB successors. We create empty VPBBs for successors if they don't
280  // exist already. Recipes will be created when the successor is visited
281  // during the RPO traversal.
282  Instruction *TI = BB->getTerminator();
283  assert(TI && "Terminator expected.");
284  unsigned NumSuccs = TI->getNumSuccessors();
285 
286  if (NumSuccs == 1) {
287  VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0));
288  assert(SuccVPBB && "VPBB Successor not found.");
289  VPBB->setOneSuccessor(SuccVPBB);
290  } else if (NumSuccs == 2) {
291  VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0));
292  assert(SuccVPBB0 && "Successor 0 not found.");
293  VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1));
294  assert(SuccVPBB1 && "Successor 1 not found.");
295 
296  // Get VPBB's condition bit.
297  assert(isa<BranchInst>(TI) && "Unsupported terminator!");
298  auto *Br = cast<BranchInst>(TI);
299  Value *BrCond = Br->getCondition();
300  // Look up the branch condition to get the corresponding VPValue
301  // representing the condition bit in VPlan (which may be in another VPBB).
302  assert(IRDef2VPValue.count(BrCond) &&
303  "Missing condition bit in IRDef2VPValue!");
304  VPValue *VPCondBit = IRDef2VPValue[BrCond];
305 
306  // Link successors using condition bit.
307  VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1, VPCondBit);
308  } else
309  llvm_unreachable("Number of successors not supported.");
310 
311  // Set VPBB predecessors in the same order as they are in the incoming BB.
312  setVPBBPredsFromBB(VPBB, BB);
313  }
314 
315  // 3. Process outermost loop exit. We created an empty VPBB for the loop
316  // single exit BB during the RPO traversal of the loop body but Instructions
317  // weren't visited because it's not part of the the loop.
318  BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock();
319  assert(LoopExitBB && "Loops with multiple exits are not supported.");
320  VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB];
321  createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB);
322  // Loop exit was already set as successor of the loop exiting BB.
323  // We only set its predecessor VPBB now.
324  setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB);
325 
326  // 4. The whole CFG has been built at this point so all the input Values must
327  // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding
328  // VPlan operands.
329  fixPhiNodes();
330 
331  // 5. Final Top Region setup. Set outermost loop pre-header and single exit as
332  // Top Region entry and exit.
333  TopRegion->setEntry(PreheaderVPBB);
334  TopRegion->setExit(LoopExitVPBB);
335  return TopRegion;
336 }
337 
338 VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() {
339  PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
340  return PCFGBuilder.buildPlainCFG();
341 }
342 
343 // Public interface to build a H-CFG.
345  // Build Top Region enclosing the plain CFG and set it as VPlan entry.
346  VPRegionBlock *TopRegion = buildPlainCFG();
347  Plan.setEntry(TopRegion);
348  LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
349 
350  Verifier.verifyHierarchicalCFG(TopRegion);
351 
352  // Compute plain CFG dom tree for VPLInfo.
353  VPDomTree.recalculate(*TopRegion);
354  LLVM_DEBUG(dbgs() << "Dominator Tree after building the plain CFG.\n";
355  VPDomTree.print(dbgs()));
356 
357  // Compute VPLInfo and keep it in Plan.
358  VPLoopInfo &VPLInfo = Plan.getVPLoopInfo();
359  VPLInfo.analyze(VPDomTree);
360  LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n";
361  VPLInfo.print(dbgs()));
362 }
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::DominatorTreeBase::print
void print(raw_ostream &O) const
print - Convert to human readable form
Definition: GenericDomTree.h:709
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:1624
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::VPWidenPHIRecipe
A recipe for handling first order recurrences and pointer inductions.
Definition: VPlan.h:1061
VPlanHCFGBuilder.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
RPO
rpo function Deduce function attributes in RPO
Definition: FunctionAttrs.cpp:1927
llvm::LoopInfoBase::print
void print(raw_ostream &OS) const
Definition: LoopInfoImpl.h:621
llvm::VPlanHCFGBuilder::buildHierarchicalCFG
void buildHierarchicalCFG()
Build H-CFG for TheLoop and update Plan accordingly.
Definition: VPlanHCFGBuilder.cpp:344
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::LoopBlocksRPO
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:388
llvm::Instruction::getNumSuccessors
unsigned getNumSuccessors() const
Return the number of successors that this instruction has.
Definition: Instruction.cpp:765
llvm::VPBasicBlock::appendRecipe
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:1692
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::LoopInfoBase::analyze
void analyze(const DominatorTreeBase< BlockT, false > &DomTree)
Create the loop forest using a stable algorithm.
Definition: LoopInfoImpl.h:548
llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition: LoopVectorizationPlanner.h:43
llvm::VPBlockBase::setPredecessors
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:597
llvm::Instruction
Definition: Instruction.h:45
llvm::VPValue
Definition: VPlanValue.h:44
llvm::Instruction::getSuccessor
BasicBlock * getSuccessor(unsigned Idx) const
Return the specified successor. This instruction must be a terminator.
Definition: Instruction.cpp:777
LoopIterator.h
llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:1738
llvm::VPlan::setName
void setName(const Twine &newName)
Definition: VPlan.h:2176
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
LoopVectorizationPlanner.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DominatorTreeBase::recalculate
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
Definition: GenericDomTree.h:778
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::VPBlockBase::setOneSuccessor
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:576
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:152
llvm::VPBlockBase::setParent
void setParent(VPRegionBlock *P)
Definition: VPlan.h:481
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::LoopInfoBase< VPBlockBase, VPLoop >
llvm::VPlan::setEntry
VPBlockBase * setEntry(VPBlockBase *Block)
Definition: VPlan.h:2153
llvm::VPlan::getVPLoopInfo
VPLoopInfo & getVPLoopInfo()
Return the VPLoopInfo analysis for this VPlan.
Definition: VPlan.h:2228
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::VPlanVerifier::verifyHierarchicalCFG
void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const
Verify the invariants of the H-CFG starting from TopRegion.
Definition: VPlanVerifier.cpp:122
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2086
llvm::VPBlockBase::setTwoSuccessors
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPValue *Condition)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:585
llvm::Value
LLVM Value Representation.
Definition: Value.h:74