LLVM  14.0.0git
AMDGPULowerIntrinsics.cpp
Go to the documentation of this file.
1 //===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDGPUSubtarget.h"
13 #include "llvm/IR/Constants.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/IR/Module.h"
22 
23 #define DEBUG_TYPE "amdgpu-lower-intrinsics"
24 
25 using namespace llvm;
26 
27 namespace {
28 
29 static int MaxStaticSize;
30 
31 static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
32  "amdgpu-mem-intrinsic-expand-size",
33  cl::desc("Set minimum mem intrinsic size to expand in IR"),
34  cl::location(MaxStaticSize),
35  cl::init(1024),
36  cl::Hidden);
37 
38 
39 class AMDGPULowerIntrinsics : public ModulePass {
40 private:
41  bool makeLIDRangeMetadata(Function &F) const;
42 
43 public:
44  static char ID;
45 
46  AMDGPULowerIntrinsics() : ModulePass(ID) {}
47 
48  bool runOnModule(Module &M) override;
49  bool expandMemIntrinsicUses(Function &F);
50  StringRef getPassName() const override {
51  return "AMDGPU Lower Intrinsics";
52  }
53 
54  void getAnalysisUsage(AnalysisUsage &AU) const override {
56  }
57 };
58 
59 }
60 
62 
64 
65 INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
66  false)
67 
68 // TODO: Should refine based on estimated number of accesses (e.g. does it
69 // require splitting based on alignment)
70 static bool shouldExpandOperationWithSize(Value *Size) {
71  ConstantInt *CI = dyn_cast<ConstantInt>(Size);
72  return !CI || (CI->getSExtValue() > MaxStaticSize);
73 }
74 
75 bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
76  Intrinsic::ID ID = F.getIntrinsicID();
77  bool Changed = false;
78 
79  for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
80  Instruction *Inst = cast<Instruction>(*I);
81  ++I;
82 
83  switch (ID) {
84  case Intrinsic::memcpy: {
85  auto *Memcpy = cast<MemCpyInst>(Inst);
86  if (shouldExpandOperationWithSize(Memcpy->getLength())) {
87  Function *ParentFunc = Memcpy->getParent()->getParent();
88  const TargetTransformInfo &TTI =
89  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
90  expandMemCpyAsLoop(Memcpy, TTI);
91  Changed = true;
92  Memcpy->eraseFromParent();
93  }
94 
95  break;
96  }
97  case Intrinsic::memmove: {
98  auto *Memmove = cast<MemMoveInst>(Inst);
99  if (shouldExpandOperationWithSize(Memmove->getLength())) {
100  expandMemMoveAsLoop(Memmove);
101  Changed = true;
102  Memmove->eraseFromParent();
103  }
104 
105  break;
106  }
107  case Intrinsic::memset: {
108  auto *Memset = cast<MemSetInst>(Inst);
109  if (shouldExpandOperationWithSize(Memset->getLength())) {
110  expandMemSetAsLoop(Memset);
111  Changed = true;
112  Memset->eraseFromParent();
113  }
114 
115  break;
116  }
117  default:
118  break;
119  }
120  }
121 
122  return Changed;
123 }
124 
125 bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
126  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
127  if (!TPC)
128  return false;
129 
130  const TargetMachine &TM = TPC->getTM<TargetMachine>();
131  bool Changed = false;
132 
133  for (auto *U : F.users()) {
134  auto *CI = dyn_cast<CallInst>(U);
135  if (!CI)
136  continue;
137 
138  Function *Caller = CI->getParent()->getParent();
139  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *Caller);
140  Changed |= ST.makeLIDRangeMetadata(CI);
141  }
142  return Changed;
143 }
144 
145 bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
146  bool Changed = false;
147 
148  for (Function &F : M) {
149  if (!F.isDeclaration())
150  continue;
151 
152  switch (F.getIntrinsicID()) {
153  case Intrinsic::memcpy:
154  case Intrinsic::memmove:
155  case Intrinsic::memset:
156  if (expandMemIntrinsicUses(F))
157  Changed = true;
158  break;
159 
160  case Intrinsic::amdgcn_workitem_id_x:
161  case Intrinsic::r600_read_tidig_x:
162  case Intrinsic::amdgcn_workitem_id_y:
163  case Intrinsic::r600_read_tidig_y:
164  case Intrinsic::amdgcn_workitem_id_z:
165  case Intrinsic::r600_read_tidig_z:
166  case Intrinsic::r600_read_local_size_x:
167  case Intrinsic::r600_read_local_size_y:
168  case Intrinsic::r600_read_local_size_z:
169  Changed |= makeLIDRangeMetadata(F);
170  break;
171 
172  default:
173  break;
174  }
175  }
176 
177  return Changed;
178 }
179 
181  return new AMDGPULowerIntrinsics();
182 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
IntrinsicInst.h
llvm::Function
Definition: Function.h:61
llvm::cl::location
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:459
llvm::expandMemCpyAsLoop
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI)
Expand MemCpy as a loop. MemCpy is not deleted.
Definition: LowerMemIntrinsics.cpp:422
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
Module.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
CommandLine.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false, false) static bool shouldExpandOperationWithSize(Value *Size)
Definition: AMDGPULowerIntrinsics.cpp:65
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:1111
TargetMachine.h
Constants.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
AMDGPUSubtarget.h
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::Instruction
Definition: Instruction.h:45
llvm::createAMDGPULowerIntrinsicsPass
ModulePass * createAMDGPULowerIntrinsicsPass()
Definition: AMDGPULowerIntrinsics.cpp:180
llvm::cl::opt
Definition: CommandLine.h:1434
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::expandMemSetAsLoop
void expandMemSetAsLoop(MemSetInst *MemSet)
Expand MemSet as a loop. MemSet is not deleted.
Definition: LowerMemIntrinsics.cpp:460
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::expandMemMoveAsLoop
void expandMemMoveAsLoop(MemMoveInst *MemMove)
Expand MemMove as a loop. MemMove is not deleted.
Definition: LowerMemIntrinsics.cpp:449
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPULowerIntrinsics.cpp:23
Instructions.h
TargetTransformInfo.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:414
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
LowerMemIntrinsics.h
llvm::AMDGPULowerIntrinsicsID
char & AMDGPULowerIntrinsicsID
Definition: AMDGPULowerIntrinsics.cpp:63
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37