LLVM  15.0.0git
AMDGPULowerIntrinsics.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDGPUSubtarget.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/IntrinsicsR600.h"
17 #include "llvm/IR/Module.h"
21 
22 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
23 
24 using namespace llvm;
25 
26 namespace {
27 
28 static int MaxStaticSize;
29 
30 static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
31  "amdgpu-mem-intrinsic-expand-size",
32  cl::desc("Set minimum mem intrinsic size to expand in IR"),
33  cl::location(MaxStaticSize),
34  cl::init(1024),
35  cl::Hidden);
36 
37 
38 class AMDGPULowerIntrinsics : public ModulePass {
39 private:
40  bool makeLIDRangeMetadata(Function &F) const;
41 
42 public:
43  static char ID;
44 
45  AMDGPULowerIntrinsics() : ModulePass(ID) {}
46 
47  bool runOnModule(Module &M) override;
48  bool expandMemIntrinsicUses(Function &F);
49  StringRef getPassName() const override {
50  return "AMDGPU Lower Intrinsics";
51  }
52 
53  void getAnalysisUsage(AnalysisUsage &AU) const override {
55  }
56 };
57 
58 }
59 
61 
63 
64 INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
65  false)
66 
67 // TODO: Should refine based on estimated number of accesses (e.g. does it
68 // require splitting based on alignment)
69 static bool shouldExpandOperationWithSize(Value *Size) {
70  ConstantInt *CI = dyn_cast<ConstantInt>(Size);
71  return !CI || (CI->getSExtValue() > MaxStaticSize);
72 }
73 
74 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
75  Intrinsic::ID ID = F.getIntrinsicID();
76  bool Changed = false;
77 
78  for (User *U : llvm::make_early_inc_range(F.users())) {
79  Instruction *Inst = cast<Instruction>(U);
80 
81  switch (ID) {
82  case Intrinsic::memcpy: {
83  auto *Memcpy = cast<MemCpyInst>(Inst);
84  if (shouldExpandOperationWithSize(Memcpy->getLength())) {
85  Function *ParentFunc = Memcpy->getParent()->getParent();
86  const TargetTransformInfo &TTI =
87  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
88  expandMemCpyAsLoop(Memcpy, TTI);
89  Changed = true;
90  Memcpy->eraseFromParent();
91  }
92 
93  break;
94  }
95  case Intrinsic::memmove: {
96  auto *Memmove = cast<MemMoveInst>(Inst);
97  if (shouldExpandOperationWithSize(Memmove->getLength())) {
98  expandMemMoveAsLoop(Memmove);
99  Changed = true;
100  Memmove->eraseFromParent();
101  }
102 
103  break;
104  }
105  case Intrinsic::memset: {
106  auto *Memset = cast<MemSetInst>(Inst);
107  if (shouldExpandOperationWithSize(Memset->getLength())) {
108  expandMemSetAsLoop(Memset);
109  Changed = true;
110  Memset->eraseFromParent();
111  }
112 
113  break;
114  }
115  default:
116  break;
117  }
118  }
119 
120  return Changed;
121 }
122 
123 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
124  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
125  if (!TPC)
126  return false;
127 
128  const TargetMachine &TM = TPC->getTM<TargetMachine>();
129  bool Changed = false;
130 
131  for (auto *U : F.users()) {
132  auto *CI = dyn_cast<CallInst>(U);
133  if (!CI)
134  continue;
135 
136  Function *Caller = CI->getParent()->getParent();
137  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *Caller);
138  Changed |= ST.makeLIDRangeMetadata(CI);
139  }
140  return Changed;
141 }
142 
143 bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
144  bool Changed = false;
145 
146  for (Function &F : M) {
147  if (!F.isDeclaration())
148  continue;
149 
150  switch (F.getIntrinsicID()) {
151  case Intrinsic::memcpy:
152  case Intrinsic::memmove:
153  case Intrinsic::memset:
154  if (expandMemIntrinsicUses(F))
155  Changed = true;
156  break;
157 
158  case Intrinsic::r600_read_tidig_x:
159  case Intrinsic::r600_read_tidig_y:
160  case Intrinsic::r600_read_tidig_z:
161  case Intrinsic::r600_read_local_size_x:
162  case Intrinsic::r600_read_local_size_y:
163  case Intrinsic::r600_read_local_size_z:
164  Changed |= makeLIDRangeMetadata(F);
165  break;
166 
167  default:
168  break;
169  }
170  }
171 
172  return Changed;
173 }
174 
176  return new AMDGPULowerIntrinsics();
177 }
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
IntrinsicInst.h
llvm::Function
Definition: Function.h:60
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:447
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
Module.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, false) static bool shouldExpandOperationWithSize(Value *Size)
Definition: AMDGPULowerIntrinsics.cpp:64
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:972
TargetMachine.h
Constants.h
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
AMDGPUSubtarget.h
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:42
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:175
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::cl::opt
Definition: CommandLine.h:1392
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2535
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::expandMemSetAsLoop
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
Definition: LowerMemIntrinsics.cpp:569
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::expandMemMoveAsLoop
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
Definition: LowerMemIntrinsics.cpp:558
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerIntrinsics.cpp:22
Instructions.h
TargetTransformInfo.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:405
llvm::expandMemCpyAsLoop
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI, ScalarEvolution *SE=nullptr)
Expand MemCpy as a loop. MemCpy is not deleted.
Definition: LowerMemIntrinsics.cpp:527
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
LowerMemIntrinsics.h
llvm::AMDGPULowerIntrinsicsID
char & AMDGPULowerIntrinsicsID
Definition: AMDGPULowerIntrinsics.cpp:62
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38