LLVM  14.0.0git
AMDGPULowerIntrinsics.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDGPUSubtarget.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/IR/Module.h"
22 
23 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
24 
25 using namespace llvm;
26 
27 namespace {
28 
29 static int MaxStaticSize;
30 
31 static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
32  "amdgpu-mem-intrinsic-expand-size",
33  cl::desc("Set minimum mem intrinsic size to expand in IR"),
34  cl::location(MaxStaticSize),
35  cl::init(1024),
36  cl::Hidden);
37 
38 
39 class AMDGPULowerIntrinsics : public ModulePass {
40 private:
41  bool makeLIDRangeMetadata(Function &F) const;
42 
43 public:
44  static char ID;
45 
46  AMDGPULowerIntrinsics() : ModulePass(ID) {}
47 
48  bool runOnModule(Module &M) override;
49  bool expandMemIntrinsicUses(Function &F);
50  StringRef getPassName() const override {
51  return "AMDGPU Lower Intrinsics";
52  }
53 
54  void getAnalysisUsage(AnalysisUsage &AU) const override {
56  }
57 };
58 
59 }
60 
62 
64 
65 INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
66  false)
67 
68 // TODO: Should refine based on estimated number of accesses (e.g. does it
69 // require splitting based on alignment)
70 static bool shouldExpandOperationWithSize(Value *Size) {
71  ConstantInt *CI = dyn_cast<ConstantInt>(Size);
72  return !CI || (CI->getSExtValue() > MaxStaticSize);
73 }
74 
75 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
76  Intrinsic::ID ID = F.getIntrinsicID();
77  bool Changed = false;
78 
79  for (User *U : llvm::make_early_inc_range(F.users())) {
80  Instruction *Inst = cast<Instruction>(U);
81 
82  switch (ID) {
83  case Intrinsic::memcpy: {
84  auto *Memcpy = cast<MemCpyInst>(Inst);
85  if (shouldExpandOperationWithSize(Memcpy->getLength())) {
86  Function *ParentFunc = Memcpy->getParent()->getParent();
87  const TargetTransformInfo &TTI =
88  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
89  expandMemCpyAsLoop(Memcpy, TTI);
90  Changed = true;
91  Memcpy->eraseFromParent();
92  }
93 
94  break;
95  }
96  case Intrinsic::memmove: {
97  auto *Memmove = cast<MemMoveInst>(Inst);
98  if (shouldExpandOperationWithSize(Memmove->getLength())) {
99  expandMemMoveAsLoop(Memmove);
100  Changed = true;
101  Memmove->eraseFromParent();
102  }
103 
104  break;
105  }
106  case Intrinsic::memset: {
107  auto *Memset = cast<MemSetInst>(Inst);
108  if (shouldExpandOperationWithSize(Memset->getLength())) {
109  expandMemSetAsLoop(Memset);
110  Changed = true;
111  Memset->eraseFromParent();
112  }
113 
114  break;
115  }
116  default:
117  break;
118  }
119  }
120 
121  return Changed;
122 }
123 
124 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
125  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
126  if (!TPC)
127  return false;
128 
129  const TargetMachine &TM = TPC->getTM<TargetMachine>();
130  bool Changed = false;
131 
132  for (auto *U : F.users()) {
133  auto *CI = dyn_cast<CallInst>(U);
134  if (!CI)
135  continue;
136 
137  Function *Caller = CI->getParent()->getParent();
138  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *Caller);
139  Changed |= ST.makeLIDRangeMetadata(CI);
140  }
141  return Changed;
142 }
143 
144 bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
145  bool Changed = false;
146 
147  for (Function &F : M) {
148  if (!F.isDeclaration())
149  continue;
150 
151  switch (F.getIntrinsicID()) {
152  case Intrinsic::memcpy:
153  case Intrinsic::memmove:
154  case Intrinsic::memset:
155  if (expandMemIntrinsicUses(F))
156  Changed = true;
157  break;
158 
159  case Intrinsic::amdgcn_workitem_id_x:
160  case Intrinsic::r600_read_tidig_x:
161  case Intrinsic::amdgcn_workitem_id_y:
162  case Intrinsic::r600_read_tidig_y:
163  case Intrinsic::amdgcn_workitem_id_z:
164  case Intrinsic::r600_read_tidig_z:
165  case Intrinsic::r600_read_local_size_x:
166  case Intrinsic::r600_read_local_size_y:
167  case Intrinsic::r600_read_local_size_z:
168  Changed |= makeLIDRangeMetadata(F);
169  break;
170 
171  default:
172  break;
173  }
174  }
175 
176  return Changed;
177 }
178 
180  return new AMDGPULowerIntrinsics();
181 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
IntrinsicInst.h
llvm::Function
Definition: Function.h:62
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:457
llvm::expandMemCpyAsLoop
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI)
Expand MemCpy as a loop. MemCpy is not deleted.
Definition: LowerMemIntrinsics.cpp:422
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:169
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
Module.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, false) static bool shouldExpandOperationWithSize(Value *Size)
Definition: AMDGPULowerIntrinsics.cpp:65
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1113
TargetMachine.h
Constants.h
llvm::User
Definition: User.h:44
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
AMDGPUSubtarget.h
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:179
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2428
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:593
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::expandMemSetAsLoop
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
Definition: LowerMemIntrinsics.cpp:460
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::expandMemMoveAsLoop
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
Definition: LowerMemIntrinsics.cpp:449
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerIntrinsics.cpp:23
Instructions.h
TargetTransformInfo.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:412
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
LowerMemIntrinsics.h
llvm::AMDGPULowerIntrinsicsID
char & AMDGPULowerIntrinsicsID
Definition: AMDGPULowerIntrinsics.cpp:63
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38