LLVM  13.0.0git
SVEIntrinsicOpts.cpp
Go to the documentation of this file.
1 //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Performs general IR level optimizations on SVE intrinsics.
11 //
12 // This pass performs the following optimizations:
13 //
14 // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g:
15 // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
16 // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
17 // ; (%1 can be replaced with a reinterpret of %2)
18 //
19 // - optimizes ptest intrinsics where the operands are being needlessly
20 // converted to and from svbool_t.
21 //
22 //===----------------------------------------------------------------------===//
23 
24 #include "AArch64.h"
25 #include "Utils/AArch64BaseInfo.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/IntrinsicsAArch64.h"
34 #include "llvm/IR/LLVMContext.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/Support/Debug.h"
38 
39 using namespace llvm;
40 using namespace llvm::PatternMatch;
41 
42 #define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
43 
44 namespace llvm {
46 }
47 
48 namespace {
49 struct SVEIntrinsicOpts : public ModulePass {
50  static char ID; // Pass identification, replacement for typeid
51  SVEIntrinsicOpts() : ModulePass(ID) {
53  }
54 
55  bool runOnModule(Module &M) override;
56  void getAnalysisUsage(AnalysisUsage &AU) const override;
57 
58 private:
59  bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
61  bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
62 
63  /// Operates at the instruction-scope. I.e., optimizations are applied local
64  /// to individual instructions.
65  static bool optimizeIntrinsic(Instruction *I);
66  bool optimizeIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
67 
68  /// Operates at the function-scope. I.e., optimizations are applied local to
69  /// the functions themselves.
70  bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
71 
72  static bool optimizePTest(IntrinsicInst *I);
73  static bool optimizeVectorMul(IntrinsicInst *I);
74  static bool optimizeTBL(IntrinsicInst *I);
75 };
76 } // end anonymous namespace
77 
78 void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
80  AU.setPreservesCFG();
81 }
82 
83 char SVEIntrinsicOpts::ID = 0;
84 static const char *name = "SVE intrinsics optimizations";
85 INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
87 INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
88 
90  return new SVEIntrinsicOpts();
91 }
92 
93 /// Checks if a ptrue intrinsic call is promoted. The act of promoting a
94 /// ptrue will introduce zeroing. For example:
95 ///
96 /// %1 = <vscale x 4 x i1> call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
97 /// %2 = <vscale x 16 x i1> call @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
98 /// %3 = <vscale x 8 x i1> call @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
99 ///
100 /// %1 is promoted, because it is converted:
101 ///
102 /// <vscale x 4 x i1> => <vscale x 16 x i1> => <vscale x 8 x i1>
103 ///
104 /// via a sequence of the SVE reinterpret intrinsics convert.{to,from}.svbool.
105 static bool isPTruePromoted(IntrinsicInst *PTrue) {
106  // Find all users of this intrinsic that are calls to convert-to-svbool
107  // reinterpret intrinsics.
108  SmallVector<IntrinsicInst *, 4> ConvertToUses;
109  for (User *User : PTrue->users()) {
110  if (match(User, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>())) {
111  ConvertToUses.push_back(cast<IntrinsicInst>(User));
112  }
113  }
114 
115  // If no such calls were found, this is ptrue is not promoted.
116  if (ConvertToUses.empty())
117  return false;
118 
119  // Otherwise, try to find users of the convert-to-svbool intrinsics that are
120  // calls to the convert-from-svbool intrinsic, and would result in some lanes
121  // being zeroed.
122  const auto *PTrueVTy = cast<ScalableVectorType>(PTrue->getType());
123  for (IntrinsicInst *ConvertToUse : ConvertToUses) {
124  for (User *User : ConvertToUse->users()) {
125  auto *IntrUser = dyn_cast<IntrinsicInst>(User);
126  if (IntrUser && IntrUser->getIntrinsicID() ==
127  Intrinsic::aarch64_sve_convert_from_svbool) {
128  const auto *IntrUserVTy = cast<ScalableVectorType>(IntrUser->getType());
129 
130  // Would some lanes become zeroed by the conversion?
131  if (IntrUserVTy->getElementCount().getKnownMinValue() >
132  PTrueVTy->getElementCount().getKnownMinValue())
133  // This is a promoted ptrue.
134  return true;
135  }
136  }
137  }
138 
139  // If no matching calls were found, this is not a promoted ptrue.
140  return false;
141 }
142 
143 /// Attempts to coalesce ptrues in a basic block.
144 bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
146  if (PTrues.size() <= 1)
147  return false;
148 
149  // Find the ptrue with the most lanes.
150  auto *MostEncompassingPTrue = *std::max_element(
151  PTrues.begin(), PTrues.end(), [](auto *PTrue1, auto *PTrue2) {
152  auto *PTrue1VTy = cast<ScalableVectorType>(PTrue1->getType());
153  auto *PTrue2VTy = cast<ScalableVectorType>(PTrue2->getType());
154  return PTrue1VTy->getElementCount().getKnownMinValue() <
155  PTrue2VTy->getElementCount().getKnownMinValue();
156  });
157 
158  // Remove the most encompassing ptrue, as well as any promoted ptrues, leaving
159  // behind only the ptrues to be coalesced.
160  PTrues.remove(MostEncompassingPTrue);
161  PTrues.remove_if([](auto *PTrue) { return isPTruePromoted(PTrue); });
162 
163  // Hoist MostEncompassingPTrue to the start of the basic block. It is always
164  // safe to do this, since ptrue intrinsic calls are guaranteed to have no
165  // predecessors.
166  MostEncompassingPTrue->moveBefore(BB, BB.getFirstInsertionPt());
167 
168  LLVMContext &Ctx = BB.getContext();
169  IRBuilder<> Builder(Ctx);
170  Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
171 
172  auto *MostEncompassingPTrueVTy =
173  cast<VectorType>(MostEncompassingPTrue->getType());
174  auto *ConvertToSVBool = Builder.CreateIntrinsic(
175  Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
176  {MostEncompassingPTrue});
177 
178  bool ConvertFromCreated = false;
179  for (auto *PTrue : PTrues) {
180  auto *PTrueVTy = cast<VectorType>(PTrue->getType());
181 
182  // Only create the converts if the types are not already the same, otherwise
183  // just use the most encompassing ptrue.
184  if (MostEncompassingPTrueVTy != PTrueVTy) {
185  ConvertFromCreated = true;
186 
187  Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
188  auto *ConvertFromSVBool =
189  Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
190  {PTrueVTy}, {ConvertToSVBool});
191  PTrue->replaceAllUsesWith(ConvertFromSVBool);
192  } else
193  PTrue->replaceAllUsesWith(MostEncompassingPTrue);
194 
195  PTrue->eraseFromParent();
196  }
197 
198  // We never used the ConvertTo so remove it
199  if (!ConvertFromCreated)
200  ConvertToSVBool->eraseFromParent();
201 
202  return true;
203 }
204 
205 /// The goal of this function is to remove redundant calls to the SVE ptrue
206 /// intrinsic in each basic block within the given functions.
207 ///
208 /// SVE ptrues have two representations in LLVM IR:
209 /// - a logical representation -- an arbitrary-width scalable vector of i1s,
210 /// i.e. <vscale x N x i1>.
211 /// - a physical representation (svbool, <vscale x 16 x i1>) -- a 16-element
212 /// scalable vector of i1s, i.e. <vscale x 16 x i1>.
213 ///
214 /// The SVE ptrue intrinsic is used to create a logical representation of an SVE
215 /// predicate. Suppose that we have two SVE ptrue intrinsic calls: P1 and P2. If
216 /// P1 creates a logical SVE predicate that is at least as wide as the logical
217 /// SVE predicate created by P2, then all of the bits that are true in the
218 /// physical representation of P2 are necessarily also true in the physical
219 /// representation of P1. P1 'encompasses' P2, therefore, the intrinsic call to
220 /// P2 is redundant and can be replaced by an SVE reinterpret of P1 via
221 /// convert.{to,from}.svbool.
222 ///
223 /// Currently, this pass only coalesces calls to SVE ptrue intrinsics
224 /// if they match the following conditions:
225 ///
226 /// - the call to the intrinsic uses either the SV_ALL or SV_POW2 patterns.
227 /// SV_ALL indicates that all bits of the predicate vector are to be set to
228 /// true. SV_POW2 indicates that all bits of the predicate vector up to the
229 /// largest power-of-two are to be set to true.
230 /// - the result of the call to the intrinsic is not promoted to a wider
231 /// predicate. In this case, keeping the extra ptrue leads to better codegen
232 /// -- coalescing here would create an irreducible chain of SVE reinterprets
233 /// via convert.{to,from}.svbool.
234 ///
235 /// EXAMPLE:
236 ///
237 /// %1 = <vscale x 8 x i1> ptrue(i32 SV_ALL)
238 /// ; Logical: <1, 1, 1, 1, 1, 1, 1, 1>
239 /// ; Physical: <1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0>
240 /// ...
241 ///
242 /// %2 = <vscale x 4 x i1> ptrue(i32 SV_ALL)
243 /// ; Logical: <1, 1, 1, 1>
244 /// ; Physical: <1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0>
245 /// ...
246 ///
247 /// Here, %2 can be replaced by an SVE reinterpret of %1, giving, for instance:
248 ///
249 /// %1 = <vscale x 8 x i1> ptrue(i32 i31)
250 /// %2 = <vscale x 16 x i1> convert.to.svbool(<vscale x 8 x i1> %1)
251 /// %3 = <vscale x 4 x i1> convert.from.svbool(<vscale x 16 x i1> %2)
252 ///
253 bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
254  SmallSetVector<Function *, 4> &Functions) {
255  bool Changed = false;
256 
257  for (auto *F : Functions) {
258  for (auto &BB : *F) {
261 
262  // For each basic block, collect the used ptrues and try to coalesce them.
263  for (Instruction &I : BB) {
264  if (I.use_empty())
265  continue;
266 
267  auto *IntrI = dyn_cast<IntrinsicInst>(&I);
268  if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
269  continue;
270 
271  const auto PTruePattern =
272  cast<ConstantInt>(IntrI->getOperand(0))->getZExtValue();
273 
274  if (PTruePattern == AArch64SVEPredPattern::all)
275  SVAllPTrues.insert(IntrI);
276  if (PTruePattern == AArch64SVEPredPattern::pow2)
277  SVPow2PTrues.insert(IntrI);
278  }
279 
280  Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
281  Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
282  }
283  }
284 
285  return Changed;
286 }
287 
288 bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
289  IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
290  IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));
291 
292  if (Op1 && Op2 &&
293  Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
294  Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
295  Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
296 
297  Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
298  Type *Tys[] = {Op1->getArgOperand(0)->getType()};
299  Module *M = I->getParent()->getParent()->getParent();
300 
301  auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
302  auto CI = CallInst::Create(Fn, Ops, I->getName(), I);
303 
304  I->replaceAllUsesWith(CI);
305  I->eraseFromParent();
306  if (Op1->use_empty())
307  Op1->eraseFromParent();
308  if (Op1 != Op2 && Op2->use_empty())
309  Op2->eraseFromParent();
310 
311  return true;
312  }
313 
314  return false;
315 }
316 
317 bool SVEIntrinsicOpts::optimizeVectorMul(IntrinsicInst *I) {
318  assert((I->getIntrinsicID() == Intrinsic::aarch64_sve_mul ||
319  I->getIntrinsicID() == Intrinsic::aarch64_sve_fmul) &&
320  "Unexpected opcode");
321 
322  auto *OpPredicate = I->getOperand(0);
323  auto *OpMultiplicand = I->getOperand(1);
324  auto *OpMultiplier = I->getOperand(2);
325 
326  // Return true if a given instruction is an aarch64_sve_dup_x intrinsic call
327  // with a unit splat value, false otherwise.
328  auto IsUnitDupX = [](auto *I) {
329  auto *IntrI = dyn_cast<IntrinsicInst>(I);
330  if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
331  return false;
332 
333  auto *SplatValue = IntrI->getOperand(0);
334  return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
335  };
336 
337  // Return true if a given instruction is an aarch64_sve_dup intrinsic call
338  // with a unit splat value, false otherwise.
339  auto IsUnitDup = [](auto *I) {
340  auto *IntrI = dyn_cast<IntrinsicInst>(I);
341  if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)
342  return false;
343 
344  auto *SplatValue = IntrI->getOperand(2);
345  return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
346  };
347 
348  bool Changed = true;
349 
350  // The OpMultiplier variable should always point to the dup (if any), so
351  // swap if necessary.
352  if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
353  std::swap(OpMultiplier, OpMultiplicand);
354 
355  if (IsUnitDupX(OpMultiplier)) {
356  // [f]mul pg (dupx 1) %n => %n
357  I->replaceAllUsesWith(OpMultiplicand);
358  I->eraseFromParent();
359  Changed = true;
360  } else if (IsUnitDup(OpMultiplier)) {
361  // [f]mul pg (dup pg 1) %n => %n
362  auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
363  auto *DupPg = DupInst->getOperand(1);
364  // TODO: this is naive. The optimization is still valid if DupPg
365  // 'encompasses' OpPredicate, not only if they're the same predicate.
366  if (OpPredicate == DupPg) {
367  I->replaceAllUsesWith(OpMultiplicand);
368  I->eraseFromParent();
369  Changed = true;
370  }
371  }
372 
373  // If an instruction was optimized out then it is possible that some dangling
374  // instructions are left.
375  if (Changed) {
376  auto *OpPredicateInst = dyn_cast<Instruction>(OpPredicate);
377  auto *OpMultiplierInst = dyn_cast<Instruction>(OpMultiplier);
378  if (OpMultiplierInst && OpMultiplierInst->use_empty())
379  OpMultiplierInst->eraseFromParent();
380  if (OpPredicateInst && OpPredicateInst->use_empty())
381  OpPredicateInst->eraseFromParent();
382  }
383 
384  return Changed;
385 }
386 
387 bool SVEIntrinsicOpts::optimizeTBL(IntrinsicInst *I) {
388  assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_tbl &&
389  "Unexpected opcode");
390 
391  auto *OpVal = I->getOperand(0);
392  auto *OpIndices = I->getOperand(1);
393  VectorType *VTy = cast<VectorType>(I->getType());
394 
395  // Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
396  // constant splat value < minimal element count of result.
397  auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
398  if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
399  return false;
400 
401  auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
402  if (!SplatValue ||
403  SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
404  return false;
405 
406  // Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
407  // splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
408  LLVMContext &Ctx = I->getContext();
409  IRBuilder<> Builder(Ctx);
410  Builder.SetInsertPoint(I);
411  auto *Extract = Builder.CreateExtractElement(OpVal, SplatValue);
412  auto *VectorSplat =
413  Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
414 
415  I->replaceAllUsesWith(VectorSplat);
416  I->eraseFromParent();
417  if (DupXIntrI->use_empty())
418  DupXIntrI->eraseFromParent();
419  return true;
420 }
421 
422 bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
423  IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
424  if (!IntrI)
425  return false;
426 
427  switch (IntrI->getIntrinsicID()) {
428  case Intrinsic::aarch64_sve_fmul:
429  case Intrinsic::aarch64_sve_mul:
430  return optimizeVectorMul(IntrI);
431  case Intrinsic::aarch64_sve_ptest_any:
432  case Intrinsic::aarch64_sve_ptest_first:
433  case Intrinsic::aarch64_sve_ptest_last:
434  return optimizePTest(IntrI);
435  case Intrinsic::aarch64_sve_tbl:
436  return optimizeTBL(IntrI);
437  default:
438  return false;
439  }
440 
441  return true;
442 }
443 
444 bool SVEIntrinsicOpts::optimizeIntrinsicCalls(
445  SmallSetVector<Function *, 4> &Functions) {
446  bool Changed = false;
447  for (auto *F : Functions) {
448  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
449 
450  // Traverse the DT with an rpo walk so we see defs before uses, allowing
451  // simplification to be done incrementally.
452  BasicBlock *Root = DT->getRoot();
454  for (auto *BB : RPOT)
456  Changed |= optimizeIntrinsic(&I);
457  }
458  return Changed;
459 }
460 
461 bool SVEIntrinsicOpts::optimizeFunctions(
462  SmallSetVector<Function *, 4> &Functions) {
463  bool Changed = false;
464 
465  Changed |= optimizePTrueIntrinsicCalls(Functions);
466  Changed |= optimizeIntrinsicCalls(Functions);
467 
468  return Changed;
469 }
470 
471 bool SVEIntrinsicOpts::runOnModule(Module &M) {
472  bool Changed = false;
474 
475  // Check for SVE intrinsic declarations first so that we only iterate over
476  // relevant functions. Where an appropriate declaration is found, store the
477  // function(s) where it is used so we can target these only.
478  for (auto &F : M.getFunctionList()) {
479  if (!F.isDeclaration())
480  continue;
481 
482  switch (F.getIntrinsicID()) {
483  case Intrinsic::aarch64_sve_ptest_any:
484  case Intrinsic::aarch64_sve_ptest_first:
485  case Intrinsic::aarch64_sve_ptest_last:
486  case Intrinsic::aarch64_sve_ptrue:
487  case Intrinsic::aarch64_sve_mul:
488  case Intrinsic::aarch64_sve_fmul:
489  case Intrinsic::aarch64_sve_tbl:
490  for (User *U : F.users())
491  Functions.insert(cast<Instruction>(U)->getFunction());
492  break;
493  default:
494  break;
495  }
496  }
497 
498  if (!Functions.empty())
499  Changed |= optimizeFunctions(Functions);
500 
501  return Changed;
502 }
isPTruePromoted
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
Definition: SVEIntrinsicOpts.cpp:105
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AArch64.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1329
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
IntrinsicInst.h
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:253
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:52
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::PatternMatch::m_FPOne
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:848
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
llvm::IRBuilder<>
AArch64BaseInfo.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::createSVEIntrinsicOptsPass
ModulePass * createSVEIntrinsicOptsPass()
Definition: SVEIntrinsicOpts.cpp:89
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SVEIntrinsicOpts.cpp:42
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove_if
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:199
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::User
Definition: User.h:44
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1521
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:629
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
PatternMatch.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::DominatorTreeBase::getRoot
NodeT * getRoot() const
Definition: GenericDomTree.h:461
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:219
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:297
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:84
llvm::initializeSVEIntrinsicOptsPass
void initializeSVEIntrinsicOptsPass(PassRegistry &)
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:290
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
Instructions.h
PostOrderIterator.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
SetVector.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38