LLVM  15.0.0git
SVEIntrinsicOpts.cpp
Go to the documentation of this file.
1 //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Performs general IR level optimizations on SVE intrinsics.
10 //
11 // This pass performs the following optimizations:
12 //
13 // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g:
14 // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
15 // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
16 // ; (%1 can be replaced with a reinterpret of %2)
17 //
18 // - optimizes ptest intrinsics where the operands are being needlessly
19 // converted to and from svbool_t.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "AArch64.h"
24 #include "Utils/AArch64BaseInfo.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/IRBuilder.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/IntrinsicInst.h"
32 #include "llvm/IR/IntrinsicsAArch64.h"
33 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/IR/PatternMatch.h"
35 #include "llvm/InitializePasses.h"
36 #include "llvm/Support/Debug.h"
37 
38 using namespace llvm;
39 using namespace llvm::PatternMatch;
40 
41 #define DEBUG_TYPE "aarch64-sve-intrinsic-opts"
42 
43 namespace {
44 struct SVEIntrinsicOpts : public ModulePass {
45  static char ID; // Pass identification, replacement for typeid
46  SVEIntrinsicOpts() : ModulePass(ID) {
48  }
49 
50  bool runOnModule(Module &M) override;
51  void getAnalysisUsage(AnalysisUsage &AU) const override;
52 
53 private:
54  bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
56  bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
57  bool optimizePredicateStore(Instruction *I);
58  bool optimizePredicateLoad(Instruction *I);
59 
60  bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions);
61 
62  /// Operates at the function-scope. I.e., optimizations are applied local to
63  /// the functions themselves.
64  bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
65 };
66 } // end anonymous namespace
67 
68 void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
70  AU.setPreservesCFG();
71 }
72 
73 char SVEIntrinsicOpts::ID = 0;
74 static const char *name = "SVE intrinsics optimizations";
75 INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
77 INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
78 
80  return new SVEIntrinsicOpts();
81 }
82 
83 /// Checks if a ptrue intrinsic call is promoted. The act of promoting a
84 /// ptrue will introduce zeroing. For example:
85 ///
86 /// %1 = <vscale x 4 x i1> call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
87 /// %2 = <vscale x 16 x i1> call @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
88 /// %3 = <vscale x 8 x i1> call @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
89 ///
90 /// %1 is promoted, because it is converted:
91 ///
92 /// <vscale x 4 x i1> => <vscale x 16 x i1> => <vscale x 8 x i1>
93 ///
94 /// via a sequence of the SVE reinterpret intrinsics convert.{to,from}.svbool.
95 static bool isPTruePromoted(IntrinsicInst *PTrue) {
96  // Find all users of this intrinsic that are calls to convert-to-svbool
97  // reinterpret intrinsics.
98  SmallVector<IntrinsicInst *, 4> ConvertToUses;
99  for (User *User : PTrue->users()) {
100  if (match(User, m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>())) {
101  ConvertToUses.push_back(cast<IntrinsicInst>(User));
102  }
103  }
104 
105  // If no such calls were found, this is ptrue is not promoted.
106  if (ConvertToUses.empty())
107  return false;
108 
109  // Otherwise, try to find users of the convert-to-svbool intrinsics that are
110  // calls to the convert-from-svbool intrinsic, and would result in some lanes
111  // being zeroed.
112  const auto *PTrueVTy = cast<ScalableVectorType>(PTrue->getType());
113  for (IntrinsicInst *ConvertToUse : ConvertToUses) {
114  for (User *User : ConvertToUse->users()) {
115  auto *IntrUser = dyn_cast<IntrinsicInst>(User);
116  if (IntrUser && IntrUser->getIntrinsicID() ==
117  Intrinsic::aarch64_sve_convert_from_svbool) {
118  const auto *IntrUserVTy = cast<ScalableVectorType>(IntrUser->getType());
119 
120  // Would some lanes become zeroed by the conversion?
121  if (IntrUserVTy->getElementCount().getKnownMinValue() >
122  PTrueVTy->getElementCount().getKnownMinValue())
123  // This is a promoted ptrue.
124  return true;
125  }
126  }
127  }
128 
129  // If no matching calls were found, this is not a promoted ptrue.
130  return false;
131 }
132 
133 /// Attempts to coalesce ptrues in a basic block.
134 bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(
136  if (PTrues.size() <= 1)
137  return false;
138 
139  // Find the ptrue with the most lanes.
140  auto *MostEncompassingPTrue = *std::max_element(
141  PTrues.begin(), PTrues.end(), [](auto *PTrue1, auto *PTrue2) {
142  auto *PTrue1VTy = cast<ScalableVectorType>(PTrue1->getType());
143  auto *PTrue2VTy = cast<ScalableVectorType>(PTrue2->getType());
144  return PTrue1VTy->getElementCount().getKnownMinValue() <
145  PTrue2VTy->getElementCount().getKnownMinValue();
146  });
147 
148  // Remove the most encompassing ptrue, as well as any promoted ptrues, leaving
149  // behind only the ptrues to be coalesced.
150  PTrues.remove(MostEncompassingPTrue);
151  PTrues.remove_if(isPTruePromoted);
152 
153  // Hoist MostEncompassingPTrue to the start of the basic block. It is always
154  // safe to do this, since ptrue intrinsic calls are guaranteed to have no
155  // predecessors.
156  MostEncompassingPTrue->moveBefore(BB, BB.getFirstInsertionPt());
157 
158  LLVMContext &Ctx = BB.getContext();
159  IRBuilder<> Builder(Ctx);
160  Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());
161 
162  auto *MostEncompassingPTrueVTy =
163  cast<VectorType>(MostEncompassingPTrue->getType());
164  auto *ConvertToSVBool = Builder.CreateIntrinsic(
165  Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
166  {MostEncompassingPTrue});
167 
168  bool ConvertFromCreated = false;
169  for (auto *PTrue : PTrues) {
170  auto *PTrueVTy = cast<VectorType>(PTrue->getType());
171 
172  // Only create the converts if the types are not already the same, otherwise
173  // just use the most encompassing ptrue.
174  if (MostEncompassingPTrueVTy != PTrueVTy) {
175  ConvertFromCreated = true;
176 
177  Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
178  auto *ConvertFromSVBool =
179  Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
180  {PTrueVTy}, {ConvertToSVBool});
181  PTrue->replaceAllUsesWith(ConvertFromSVBool);
182  } else
183  PTrue->replaceAllUsesWith(MostEncompassingPTrue);
184 
185  PTrue->eraseFromParent();
186  }
187 
188  // We never used the ConvertTo so remove it
189  if (!ConvertFromCreated)
190  ConvertToSVBool->eraseFromParent();
191 
192  return true;
193 }
194 
195 /// The goal of this function is to remove redundant calls to the SVE ptrue
196 /// intrinsic in each basic block within the given functions.
197 ///
198 /// SVE ptrues have two representations in LLVM IR:
199 /// - a logical representation -- an arbitrary-width scalable vector of i1s,
200 /// i.e. <vscale x N x i1>.
201 /// - a physical representation (svbool, <vscale x 16 x i1>) -- a 16-element
202 /// scalable vector of i1s, i.e. <vscale x 16 x i1>.
203 ///
204 /// The SVE ptrue intrinsic is used to create a logical representation of an SVE
205 /// predicate. Suppose that we have two SVE ptrue intrinsic calls: P1 and P2. If
206 /// P1 creates a logical SVE predicate that is at least as wide as the logical
207 /// SVE predicate created by P2, then all of the bits that are true in the
208 /// physical representation of P2 are necessarily also true in the physical
209 /// representation of P1. P1 'encompasses' P2, therefore, the intrinsic call to
210 /// P2 is redundant and can be replaced by an SVE reinterpret of P1 via
211 /// convert.{to,from}.svbool.
212 ///
213 /// Currently, this pass only coalesces calls to SVE ptrue intrinsics
214 /// if they match the following conditions:
215 ///
216 /// - the call to the intrinsic uses either the SV_ALL or SV_POW2 patterns.
217 /// SV_ALL indicates that all bits of the predicate vector are to be set to
218 /// true. SV_POW2 indicates that all bits of the predicate vector up to the
219 /// largest power-of-two are to be set to true.
220 /// - the result of the call to the intrinsic is not promoted to a wider
221 /// predicate. In this case, keeping the extra ptrue leads to better codegen
222 /// -- coalescing here would create an irreducible chain of SVE reinterprets
223 /// via convert.{to,from}.svbool.
224 ///
225 /// EXAMPLE:
226 ///
227 /// %1 = <vscale x 8 x i1> ptrue(i32 SV_ALL)
228 /// ; Logical: <1, 1, 1, 1, 1, 1, 1, 1>
229 /// ; Physical: <1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0>
230 /// ...
231 ///
232 /// %2 = <vscale x 4 x i1> ptrue(i32 SV_ALL)
233 /// ; Logical: <1, 1, 1, 1>
234 /// ; Physical: <1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0>
235 /// ...
236 ///
237 /// Here, %2 can be replaced by an SVE reinterpret of %1, giving, for instance:
238 ///
239 /// %1 = <vscale x 8 x i1> ptrue(i32 i31)
240 /// %2 = <vscale x 16 x i1> convert.to.svbool(<vscale x 8 x i1> %1)
241 /// %3 = <vscale x 4 x i1> convert.from.svbool(<vscale x 16 x i1> %2)
242 ///
243 bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
244  SmallSetVector<Function *, 4> &Functions) {
245  bool Changed = false;
246 
247  for (auto *F : Functions) {
248  for (auto &BB : *F) {
251 
252  // For each basic block, collect the used ptrues and try to coalesce them.
253  for (Instruction &I : BB) {
254  if (I.use_empty())
255  continue;
256 
257  auto *IntrI = dyn_cast<IntrinsicInst>(&I);
258  if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
259  continue;
260 
261  const auto PTruePattern =
262  cast<ConstantInt>(IntrI->getOperand(0))->getZExtValue();
263 
264  if (PTruePattern == AArch64SVEPredPattern::all)
265  SVAllPTrues.insert(IntrI);
266  if (PTruePattern == AArch64SVEPredPattern::pow2)
267  SVPow2PTrues.insert(IntrI);
268  }
269 
270  Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);
271  Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);
272  }
273  }
274 
275  return Changed;
276 }
277 
278 // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
279 // scalable stores as late as possible
280 bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
281  auto *F = I->getFunction();
282  auto Attr = F->getFnAttribute(Attribute::VScaleRange);
283  if (!Attr.isValid())
284  return false;
285 
286  unsigned MinVScale = Attr.getVScaleRangeMin();
287  Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
288  // The transform needs to know the exact runtime length of scalable vectors
289  if (!MaxVScale || MinVScale != MaxVScale)
290  return false;
291 
292  auto *PredType =
293  ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
294  auto *FixedPredType =
295  FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
296 
297  // If we have a store..
298  auto *Store = dyn_cast<StoreInst>(I);
299  if (!Store || !Store->isSimple())
300  return false;
301 
302  // ..that is storing a predicate vector sized worth of bits..
303  if (Store->getOperand(0)->getType() != FixedPredType)
304  return false;
305 
306  // ..where the value stored comes from a vector extract..
307  auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0));
308  if (!IntrI ||
309  IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract)
310  return false;
311 
312  // ..that is extracting from index 0..
313  if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero())
314  return false;
315 
316  // ..where the value being extract from comes from a bitcast
317  auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0));
318  if (!BitCast)
319  return false;
320 
321  // ..and the bitcast is casting from predicate type
322  if (BitCast->getOperand(0)->getType() != PredType)
323  return false;
324 
325  IRBuilder<> Builder(I->getContext());
326  Builder.SetInsertPoint(I);
327 
328  auto *PtrBitCast = Builder.CreateBitCast(
329  Store->getPointerOperand(),
330  PredType->getPointerTo(Store->getPointerAddressSpace()));
331  Builder.CreateStore(BitCast->getOperand(0), PtrBitCast);
332 
333  Store->eraseFromParent();
334  if (IntrI->getNumUses() == 0)
335  IntrI->eraseFromParent();
336  if (BitCast->getNumUses() == 0)
337  BitCast->eraseFromParent();
338 
339  return true;
340 }
341 
342 // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
343 // scalable loads as late as possible
344 bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
345  auto *F = I->getFunction();
346  auto Attr = F->getFnAttribute(Attribute::VScaleRange);
347  if (!Attr.isValid())
348  return false;
349 
350  unsigned MinVScale = Attr.getVScaleRangeMin();
351  Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax();
352  // The transform needs to know the exact runtime length of scalable vectors
353  if (!MaxVScale || MinVScale != MaxVScale)
354  return false;
355 
356  auto *PredType =
357  ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
358  auto *FixedPredType =
359  FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
360 
361  // If we have a bitcast..
362  auto *BitCast = dyn_cast<BitCastInst>(I);
363  if (!BitCast || BitCast->getType() != PredType)
364  return false;
365 
366  // ..whose operand is a vector_insert..
367  auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
368  if (!IntrI ||
369  IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert)
370  return false;
371 
372  // ..that is inserting into index zero of an undef vector..
373  if (!isa<UndefValue>(IntrI->getOperand(0)) ||
374  !cast<ConstantInt>(IntrI->getOperand(2))->isZero())
375  return false;
376 
377  // ..where the value inserted comes from a load..
378  auto *Load = dyn_cast<LoadInst>(IntrI->getOperand(1));
379  if (!Load || !Load->isSimple())
380  return false;
381 
382  // ..that is loading a predicate vector sized worth of bits..
383  if (Load->getType() != FixedPredType)
384  return false;
385 
386  IRBuilder<> Builder(I->getContext());
387  Builder.SetInsertPoint(Load);
388 
389  auto *PtrBitCast = Builder.CreateBitCast(
390  Load->getPointerOperand(),
391  PredType->getPointerTo(Load->getPointerAddressSpace()));
392  auto *LoadPred = Builder.CreateLoad(PredType, PtrBitCast);
393 
394  BitCast->replaceAllUsesWith(LoadPred);
395  BitCast->eraseFromParent();
396  if (IntrI->getNumUses() == 0)
397  IntrI->eraseFromParent();
398  if (Load->getNumUses() == 0)
399  Load->eraseFromParent();
400 
401  return true;
402 }
403 
404 bool SVEIntrinsicOpts::optimizeInstructions(
405  SmallSetVector<Function *, 4> &Functions) {
406  bool Changed = false;
407 
408  for (auto *F : Functions) {
409  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
410 
411  // Traverse the DT with an rpo walk so we see defs before uses, allowing
412  // simplification to be done incrementally.
413  BasicBlock *Root = DT->getRoot();
415  for (auto *BB : RPOT) {
416  for (Instruction &I : make_early_inc_range(*BB)) {
417  switch (I.getOpcode()) {
418  case Instruction::Store:
419  Changed |= optimizePredicateStore(&I);
420  break;
421  case Instruction::BitCast:
422  Changed |= optimizePredicateLoad(&I);
423  break;
424  }
425  }
426  }
427  }
428 
429  return Changed;
430 }
431 
432 bool SVEIntrinsicOpts::optimizeFunctions(
433  SmallSetVector<Function *, 4> &Functions) {
434  bool Changed = false;
435 
436  Changed |= optimizePTrueIntrinsicCalls(Functions);
437  Changed |= optimizeInstructions(Functions);
438 
439  return Changed;
440 }
441 
442 bool SVEIntrinsicOpts::runOnModule(Module &M) {
443  bool Changed = false;
445 
446  // Check for SVE intrinsic declarations first so that we only iterate over
447  // relevant functions. Where an appropriate declaration is found, store the
448  // function(s) where it is used so we can target these only.
449  for (auto &F : M.getFunctionList()) {
450  if (!F.isDeclaration())
451  continue;
452 
453  switch (F.getIntrinsicID()) {
454  case Intrinsic::experimental_vector_extract:
455  case Intrinsic::experimental_vector_insert:
456  case Intrinsic::aarch64_sve_ptrue:
457  for (User *U : F.users())
458  Functions.insert(cast<Instruction>(U)->getFunction());
459  break;
460  default:
461  break;
462  }
463  }
464 
465  if (!Functions.empty())
466  Changed |= optimizeFunctions(Functions);
467 
468  return Changed;
469 }
isPTruePromoted
static bool isPTruePromoted(IntrinsicInst *PTrue)
Checks if a ptrue intrinsic call is promoted.
Definition: SVEIntrinsicOpts.cpp:95
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
AArch64.h
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
IntrinsicInst.h
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:233
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::IRBuilder<>
AArch64BaseInfo.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::createSVEIntrinsicOptsPass
ModulePass * createSVEIntrinsicOptsPass()
Definition: SVEIntrinsicOpts.cpp:79
llvm::Optional< unsigned >
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SVEIntrinsicOpts.cpp:41
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove_if
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:199
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::Instruction
Definition: Instruction.h:42
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::DominatorTreeBase::getRoot
NodeT * getRoot() const
Definition: GenericDomTree.h:461
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:228
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:608
IRBuilder.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:74
llvm::initializeSVEIntrinsicOptsPass
void initializeSVEIntrinsicOptsPass(PassRegistry &)
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
Instructions.h
PostOrderIterator.h
INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
Dominators.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::ScalableVectorType::get
static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition: Type.cpp:705
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
LLVMContext.h
InitializePasses.h
Debug.h
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
SetVector.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37