LLVM  11.0.0git
SVEIntrinsicOpts.cpp
Go to the documentation of this file.
1 //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Performs general IR level optimizations on SVE intrinsics.
11 //
12 // The main goal of this pass is to remove unnecessary reinterpret
13 // intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g:
14 //
15 // %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
16 // %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
17 //
18 // This pass also looks for ptest intrinsics & phi instructions where the
19 // operands are being needlessly converted to and from svbool_t.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "Utils/AArch64BaseInfo.h"
25 #include "llvm/ADT/SetVector.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/IntrinsicInst.h"
31 #include "llvm/IR/IntrinsicsAArch64.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/InitializePasses.h"
35 #include "llvm/Support/Debug.h"
36 
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39 
40 #define DEBUG_TYPE "sve-intrinsic-opts"
41 
42 namespace llvm {
44 }
45 
46 namespace {
47 struct SVEIntrinsicOpts : public ModulePass {
48  static char ID; // Pass identification, replacement for typeid
49  SVEIntrinsicOpts() : ModulePass(ID) {
51  }
52 
53  bool runOnModule(Module &M) override;
54  void getAnalysisUsage(AnalysisUsage &AU) const override;
55 
56 private:
57  static IntrinsicInst *isReinterpretToSVBool(Value *V);
58 
59  static bool optimizeIntrinsic(Instruction *I);
60 
61  bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
62 
63  static bool optimizeConvertFromSVBool(IntrinsicInst *I);
64  static bool optimizePTest(IntrinsicInst *I);
65 
66  static bool processPhiNode(IntrinsicInst *I);
67 };
68 } // end anonymous namespace
69 
70 void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
72  AU.setPreservesCFG();
73 }
74 
75 char SVEIntrinsicOpts::ID = 0;
76 static const char *name = "SVE intrinsics optimizations";
77 INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
79 INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
80 
81 namespace llvm {
82 ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); }
83 } // namespace llvm
84 
85 /// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr
86 /// otherwise.
87 IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) {
89  if (!I)
90  return nullptr;
91 
92  if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
93  return nullptr;
94 
95  return I;
96 }
97 
98 /// The function will remove redundant reinterprets casting in the presence
99 /// of the control flow
100 bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) {
101 
103  auto RequiredType = X->getType();
104 
105  auto *PN = dyn_cast<PHINode>(X->getArgOperand(0));
106  assert(PN && "Expected Phi Node!");
107 
108  // Don't create a new Phi unless we can remove the old one.
109  if (!PN->hasOneUse())
110  return false;
111 
112  for (Value *IncValPhi : PN->incoming_values()) {
113  auto *Reinterpret = isReinterpretToSVBool(IncValPhi);
114  if (!Reinterpret ||
115  RequiredType != Reinterpret->getArgOperand(0)->getType())
116  return false;
117  }
118 
119  // Create the new Phi
120  LLVMContext &Ctx = PN->getContext();
121  IRBuilder<> Builder(Ctx);
122  Builder.SetInsertPoint(PN);
123  PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
124  Worklist.push_back(PN);
125 
126  for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
127  auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
128  NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
129  Worklist.push_back(Reinterpret);
130  }
131 
132  // Cleanup Phi Node and reinterprets
133  X->replaceAllUsesWith(NPN);
134  X->eraseFromParent();
135 
136  for (auto &I : Worklist)
137  if (I->use_empty())
138  I->eraseFromParent();
139 
140  return true;
141 }
142 
143 bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
146 
147  if (Op1 && Op2 &&
148  Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
149  Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
150  Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {
151 
152  Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
153  Type *Tys[] = {Op1->getArgOperand(0)->getType()};
154  Module *M = I->getParent()->getParent()->getParent();
155 
156  auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
157  auto CI = CallInst::Create(Fn, Ops, I->getName(), I);
158 
159  I->replaceAllUsesWith(CI);
160  I->eraseFromParent();
161  if (Op1->use_empty())
162  Op1->eraseFromParent();
163  if (Op2->use_empty())
164  Op2->eraseFromParent();
165 
166  return true;
167  }
168 
169  return false;
170 }
171 
172 bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
173  assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool &&
174  "Unexpected opcode");
175 
176  // If the reinterpret instruction operand is a PHI Node
177  if (isa<PHINode>(I->getArgOperand(0)))
178  return processPhiNode(I);
179 
180  // If we have a reinterpret intrinsic I of type A which is converting from
181  // another reinterpret Y of type B, and the source type of Y is A, then we can
182  // elide away both reinterprets if there are no other users of Y.
183  auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
184  if (!Y)
185  return false;
186 
187  Value *SourceVal = Y->getArgOperand(0);
188  if (I->getType() != SourceVal->getType())
189  return false;
190 
191  I->replaceAllUsesWith(SourceVal);
192  I->eraseFromParent();
193  if (Y->use_empty())
194  Y->eraseFromParent();
195 
196  return true;
197 }
198 
199 bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
201  if (!IntrI)
202  return false;
203 
204  switch (IntrI->getIntrinsicID()) {
205  case Intrinsic::aarch64_sve_convert_from_svbool:
206  return optimizeConvertFromSVBool(IntrI);
207  case Intrinsic::aarch64_sve_ptest_any:
208  case Intrinsic::aarch64_sve_ptest_first:
209  case Intrinsic::aarch64_sve_ptest_last:
210  return optimizePTest(IntrI);
211  default:
212  return false;
213  }
214 
215  return true;
216 }
217 
218 bool SVEIntrinsicOpts::optimizeFunctions(
219  SmallSetVector<Function *, 4> &Functions) {
220  bool Changed = false;
221  for (auto *F : Functions) {
222  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
223 
224  // Traverse the DT with an rpo walk so we see defs before uses, allowing
225  // simplification to be done incrementally.
226  BasicBlock *Root = DT->getRoot();
228  for (auto *BB : RPOT)
229  for (Instruction &I : make_early_inc_range(*BB))
230  Changed |= optimizeIntrinsic(&I);
231  }
232  return Changed;
233 }
234 
235 bool SVEIntrinsicOpts::runOnModule(Module &M) {
236  bool Changed = false;
238 
239  // Check for SVE intrinsic declarations first so that we only iterate over
240  // relevant functions. Where an appropriate declaration is found, store the
241  // function(s) where it is used so we can target these only.
242  for (auto &F : M.getFunctionList()) {
243  if (!F.isDeclaration())
244  continue;
245 
246  switch (F.getIntrinsicID()) {
247  case Intrinsic::aarch64_sve_convert_from_svbool:
248  case Intrinsic::aarch64_sve_ptest_any:
249  case Intrinsic::aarch64_sve_ptest_first:
250  case Intrinsic::aarch64_sve_ptest_last:
251  for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
252  auto *Inst = dyn_cast<Instruction>(*I++);
253  Functions.insert(Inst->getFunction());
254  }
255  break;
256  default:
257  break;
258  }
259  }
260 
261  if (!Functions.empty())
262  Changed |= optimizeFunctions(Functions);
263 
264  return Changed;
265 }
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:80
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:67
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:584
void initializeSVEIntrinsicOptsPass(PassRegistry &)
F(f)
static const char * name
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1253
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
AnalysisUsage & addRequired()
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:486
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1139
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:161
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2314
const FunctionListType & getFunctionList() const
Get the Module&#39;s list of functions (constant).
Definition: Module.h:544
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Represent the analysis usage information of a pass.
assume Assume Builder
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
ModulePass * createSVEIntrinsicOptsPass()
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:883
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:51
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
#define DEBUG_TYPE
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:270
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:59
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
LLVM Value Representation.
Definition: Value.h:74
NodeT * getRoot() const
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:261
bool use_empty() const
Definition: Value.h:341
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:85
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)