LLVM  10.0.0svn
VPlanSLP.cpp
Go to the documentation of this file.
1 //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// This file implements SLP analysis based on VPlan. The analysis is based on
9 /// the ideas described in
10 ///
11 /// Look-ahead SLP: auto-vectorization in the presence of commutative
12 /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
13 /// Luís F. W. Góes
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "VPlan.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/CFG.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/InstrTypes.h"
28 #include "llvm/IR/Instruction.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/Debug.h"
38 #include <cassert>
39 #include <iterator>
40 #include <string>
41 #include <vector>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "vplan-slp"
46 
47 // Number of levels to look ahead when re-ordering multi node operands.
48 static unsigned LookaheadMaxDepth = 5;
49 
50 VPInstruction *VPlanSlp::markFailed() {
51  // FIXME: Currently this is used to signal we hit instructions we cannot
52  // trivially SLP'ize.
53  CompletelySLP = false;
54  return nullptr;
55 }
56 
57 void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
58  if (all_of(Operands, [](VPValue *V) {
59  return cast<VPInstruction>(V)->getUnderlyingInstr();
60  })) {
61  unsigned BundleSize = 0;
62  for (VPValue *V : Operands) {
63  Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
64  assert(!T->isVectorTy() && "Only scalar types supported for now");
65  BundleSize += T->getScalarSizeInBits();
66  }
67  WidestBundleBits = std::max(WidestBundleBits, BundleSize);
68  }
69 
70  auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
71  assert(Res.second &&
72  "Already created a combined instruction for the operand bundle");
73  (void)Res;
74 }
75 
76 bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
77  // Currently we only support VPInstructions.
78  if (!all_of(Operands, [](VPValue *Op) {
79  return Op && isa<VPInstruction>(Op) &&
80  cast<VPInstruction>(Op)->getUnderlyingInstr();
81  })) {
82  LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
83  return false;
84  }
85 
86  // Check if opcodes and type width agree for all instructions in the bundle.
87  // FIXME: Differing widths/opcodes can be handled by inserting additional
88  // instructions.
89  // FIXME: Deal with non-primitive types.
90  const Instruction *OriginalInstr =
91  cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
92  unsigned Opcode = OriginalInstr->getOpcode();
93  unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
94  if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
95  const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
96  return I->getOpcode() == Opcode &&
98  })) {
99  LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
100  return false;
101  }
102 
103  // For now, all operands must be defined in the same BB.
104  if (any_of(Operands, [this](VPValue *Op) {
105  return cast<VPInstruction>(Op)->getParent() != &this->BB;
106  })) {
107  LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
108  return false;
109  }
110 
111  if (any_of(Operands,
112  [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
113  LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
114  return false;
115  }
116 
117  // For loads, check that there are no instructions writing to memory in
118  // between them.
119  // TODO: we only have to forbid instructions writing to memory that could
120  // interfere with any of the loads in the bundle
121  if (Opcode == Instruction::Load) {
122  unsigned LoadsSeen = 0;
123  VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
124  for (auto &I : *Parent) {
125  auto *VPI = cast<VPInstruction>(&I);
126  if (VPI->getOpcode() == Instruction::Load &&
127  std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())
128  LoadsSeen++;
129 
130  if (LoadsSeen == Operands.size())
131  break;
132  if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
133  LLVM_DEBUG(
134  dbgs() << "VPSLP: instruction modifying memory between loads\n");
135  return false;
136  }
137  }
138 
139  if (!all_of(Operands, [](VPValue *Op) {
140  return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
141  ->isSimple();
142  })) {
143  LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
144  return false;
145  }
146  }
147 
148  if (Opcode == Instruction::Store)
149  if (!all_of(Operands, [](VPValue *Op) {
150  return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
151  ->isSimple();
152  })) {
153  LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
154  return false;
155  }
156 
157  return true;
158 }
159 
161  unsigned OperandIndex) {
163  for (VPValue *V : Values) {
164  auto *U = cast<VPUser>(V);
165  Operands.push_back(U->getOperand(OperandIndex));
166  }
167  return Operands;
168 }
169 
170 static bool areCommutative(ArrayRef<VPValue *> Values) {
172  cast<VPInstruction>(Values[0])->getOpcode());
173 }
174 
178  auto *VPI = cast<VPInstruction>(Values[0]);
179 
180  switch (VPI->getOpcode()) {
181  case Instruction::Load:
182  llvm_unreachable("Loads terminate a tree, no need to get operands");
183  case Instruction::Store:
184  Result.push_back(getOperands(Values, 0));
185  break;
186  default:
187  for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
188  Result.push_back(getOperands(Values, I));
189  break;
190  }
191 
192  return Result;
193 }
194 
195 /// Returns the opcode of Values or ~0 if they do not all agree.
197  unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
198  if (any_of(Values, [Opcode](VPValue *V) {
199  return cast<VPInstruction>(V)->getOpcode() != Opcode;
200  }))
201  return None;
202  return {Opcode};
203 }
204 
205 /// Returns true if A and B access sequential memory if they are loads or
206 /// stores or if they have identical opcodes otherwise.
209  if (A->getOpcode() != B->getOpcode())
210  return false;
211 
212  if (A->getOpcode() != Instruction::Load &&
214  return true;
215  auto *GA = IAI.getInterleaveGroup(A);
216  auto *GB = IAI.getInterleaveGroup(B);
217 
218  return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
219 }
220 
221 /// Implements getLAScore from Listing 7 in the paper.
222 /// Traverses and compares operands of V1 and V2 to MaxLevel.
223 static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
225  if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
226  return 0;
227 
228  if (MaxLevel == 0)
229  return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
230  cast<VPInstruction>(V2), IAI);
231 
232  unsigned Score = 0;
233  for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)
234  for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)
235  Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
236  cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);
237  return Score;
238 }
239 
240 std::pair<VPlanSlp::OpMode, VPValue *>
241 VPlanSlp::getBest(OpMode Mode, VPValue *Last,
242  SmallPtrSetImpl<VPValue *> &Candidates,
244  assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
245  "Currently we only handle load and commutative opcodes");
246  LLVM_DEBUG(dbgs() << " getBest\n");
247 
248  SmallVector<VPValue *, 4> BestCandidates;
249  LLVM_DEBUG(dbgs() << " Candidates for "
250  << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
251  for (auto *Candidate : Candidates) {
252  auto *LastI = cast<VPInstruction>(Last);
253  auto *CandidateI = cast<VPInstruction>(Candidate);
254  if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
255  LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
256  << " ");
257  BestCandidates.push_back(Candidate);
258  }
259  }
260  LLVM_DEBUG(dbgs() << "\n");
261 
262  if (BestCandidates.empty())
263  return {OpMode::Failed, nullptr};
264 
265  if (BestCandidates.size() == 1)
266  return {Mode, BestCandidates[0]};
267 
268  VPValue *Best = nullptr;
269  unsigned BestScore = 0;
270  for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
271  unsigned PrevScore = ~0u;
272  bool AllSame = true;
273 
274  // FIXME: Avoid visiting the same operands multiple times.
275  for (auto *Candidate : BestCandidates) {
276  unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
277  if (PrevScore == ~0u)
278  PrevScore = Score;
279  if (PrevScore != Score)
280  AllSame = false;
281  PrevScore = Score;
282 
283  if (Score > BestScore) {
284  BestScore = Score;
285  Best = Candidate;
286  }
287  }
288  if (!AllSame)
289  break;
290  }
291  LLVM_DEBUG(dbgs() << "Found best "
292  << *cast<VPInstruction>(Best)->getUnderlyingInstr()
293  << "\n");
294  Candidates.erase(Best);
295 
296  return {Mode, Best};
297 }
298 
299 SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
302  FinalOrder.reserve(MultiNodeOps.size());
303  Mode.reserve(MultiNodeOps.size());
304 
305  LLVM_DEBUG(dbgs() << "Reordering multinode\n");
306 
307  for (auto &Operands : MultiNodeOps) {
308  FinalOrder.push_back({Operands.first, {Operands.second[0]}});
309  if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
311  Mode.push_back(OpMode::Load);
312  else
313  Mode.push_back(OpMode::Opcode);
314  }
315 
316  for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
317  LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
318  SmallPtrSet<VPValue *, 4> Candidates;
319  LLVM_DEBUG(dbgs() << " Candidates ");
320  for (auto Ops : MultiNodeOps) {
321  LLVM_DEBUG(
322  dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
323  << " ");
324  Candidates.insert(Ops.second[Lane]);
325  }
326  LLVM_DEBUG(dbgs() << "\n");
327 
328  for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
329  LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
330  if (Mode[Op] == OpMode::Failed)
331  continue;
332 
333  VPValue *Last = FinalOrder[Op].second[Lane - 1];
334  std::pair<OpMode, VPValue *> Res =
335  getBest(Mode[Op], Last, Candidates, IAI);
336  if (Res.second)
337  FinalOrder[Op].second.push_back(Res.second);
338  else
339  // TODO: handle this case
340  FinalOrder[Op].second.push_back(markFailed());
341  }
342  }
343 
344  return FinalOrder;
345 }
346 
347 void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
348  dbgs() << " Ops: ";
349  for (auto Op : Values) {
350  if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
351  if (auto *Instr = VPInstr->getUnderlyingInstr()) {
352  dbgs() << *Instr << " | ";
353  continue;
354  }
355  dbgs() << " nullptr | ";
356  }
357  dbgs() << "\n";
358 }
359 
361  assert(!Values.empty() && "Need some operands!");
362 
363  // If we already visited this instruction bundle, re-use the existing node
364  auto I = BundleToCombined.find(to_vector<4>(Values));
365  if (I != BundleToCombined.end()) {
366 #ifndef NDEBUG
367  // Check that the resulting graph is a tree. If we re-use a node, this means
368  // its values have multiple users. We only allow this, if all users of each
369  // value are the same instruction.
370  for (auto *V : Values) {
371  auto UI = V->user_begin();
372  auto *FirstUser = *UI++;
373  while (UI != V->user_end()) {
374  assert(*UI == FirstUser && "Currently we only support SLP trees.");
375  UI++;
376  }
377  }
378 #endif
379  return I->second;
380  }
381 
382  // Dump inputs
383  LLVM_DEBUG({
384  dbgs() << "buildGraph: ";
385  dumpBundle(Values);
386  });
387 
388  if (!areVectorizable(Values))
389  return markFailed();
390 
391  assert(getOpcode(Values) && "Opcodes for all values must match");
392  unsigned ValuesOpcode = getOpcode(Values).getValue();
393 
394  SmallVector<VPValue *, 4> CombinedOperands;
395  if (areCommutative(Values)) {
396  bool MultiNodeRoot = !MultiNodeActive;
397  MultiNodeActive = true;
398  for (auto &Operands : getOperands(Values)) {
399  LLVM_DEBUG({
400  dbgs() << " Visiting Commutative";
401  dumpBundle(Operands);
402  });
403 
404  auto OperandsOpcode = getOpcode(Operands);
405  if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
406  LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
407  CombinedOperands.push_back(buildGraph(Operands));
408  } else {
409  LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
410  // Create dummy VPInstruction, which will we replace later by the
411  // re-ordered operand.
412  VPInstruction *Op = new VPInstruction(0, {});
413  CombinedOperands.push_back(Op);
414  MultiNodeOps.emplace_back(Op, Operands);
415  }
416  }
417 
418  if (MultiNodeRoot) {
419  LLVM_DEBUG(dbgs() << "Reorder \n");
420  MultiNodeActive = false;
421 
422  auto FinalOrder = reorderMultiNodeOps();
423 
424  MultiNodeOps.clear();
425  for (auto &Ops : FinalOrder) {
426  VPInstruction *NewOp = buildGraph(Ops.second);
427  Ops.first->replaceAllUsesWith(NewOp);
428  for (unsigned i = 0; i < CombinedOperands.size(); i++)
429  if (CombinedOperands[i] == Ops.first)
430  CombinedOperands[i] = NewOp;
431  delete Ops.first;
432  Ops.first = NewOp;
433  }
434  LLVM_DEBUG(dbgs() << "Found final order\n");
435  }
436  } else {
437  LLVM_DEBUG(dbgs() << " NonCommuntative\n");
438  if (ValuesOpcode == Instruction::Load)
439  for (VPValue *V : Values)
440  CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
441  else
442  for (auto &Operands : getOperands(Values))
443  CombinedOperands.push_back(buildGraph(Operands));
444  }
445 
446  unsigned Opcode;
447  switch (ValuesOpcode) {
448  case Instruction::Load:
449  Opcode = VPInstruction::SLPLoad;
450  break;
451  case Instruction::Store:
452  Opcode = VPInstruction::SLPStore;
453  break;
454  default:
455  Opcode = ValuesOpcode;
456  break;
457  }
458 
459  if (!CompletelySLP)
460  return markFailed();
461 
462  assert(CombinedOperands.size() > 0 && "Need more some operands");
463  auto *VPI = new VPInstruction(Opcode, CombinedOperands);
464  VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
465 
466  LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
467  cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");
468  addCombined(Values, VPI);
469  return VPI;
470 }
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
SI Whole Quad Mode
This class represents lattice values for constants.
Definition: AllocatorList.h:23
iterator begin() const
Definition: ArrayRef.h:136
bool hasMoreThanOneUniqueUser()
Returns true if the value has more than one unique user.
Definition: VPlanValue.h:110
unsigned second
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
void reserve(size_type N)
Definition: SmallVector.h:369
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
static bool areCommutative(ArrayRef< VPValue *> Values)
Definition: VPlanSLP.cpp:170
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
mir Rename Register Operands
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:255
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:1587
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
This file contains the declarations of the Vectorization Plan base classes:
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:986
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
VPInstruction * buildGraph(ArrayRef< VPValue *> Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands...
Definition: VPlanSLP.cpp:360
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
iterator end() const
Definition: ArrayRef.h:137
static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel, VPInterleavedAccessInfo &IAI)
Implements getLAScore from Listing 7 in the paper.
Definition: VPlanSLP.cpp:223
bool isCommutative() const
Return true if the instruction is commutative:
Definition: Instruction.h:498
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
static unsigned LookaheadMaxDepth
Definition: VPlanSLP.cpp:48
static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B, VPInterleavedAccessInfo &IAI)
Returns true if A and B access sequential memory if they are loads or stores or if they have identica...
Definition: VPlanSLP.cpp:207
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getOpcode() const
Definition: VPlan.h:683
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static const Function * getParent(const Value *V)
static SmallVector< VPValue *, 4 > getOperands(ArrayRef< VPValue *> Values, unsigned OperandIndex)
Definition: VPlanSLP.cpp:160
#define LLVM_DEBUG(X)
Definition: Debug.h:122
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:632
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143