LLVM  13.0.0git
NVPTXUtilities.cpp
Go to the documentation of this file.
1 //===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains miscellaneous utility functions
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXUtilities.h"
14 #include "NVPTX.h"
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalVariable.h"
18 #include "llvm/IR/InstIterator.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/Operator.h"
22 #include "llvm/Support/Mutex.h"
23 #include <algorithm>
24 #include <cstring>
25 #include <map>
26 #include <mutex>
27 #include <string>
28 #include <vector>
29 
30 namespace llvm {
31 
32 namespace {
33 typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
34 typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
35 typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
36 } // anonymous namespace
37 
40 
41 void clearAnnotationCache(const Module *Mod) {
42  std::lock_guard<sys::Mutex> Guard(Lock);
43  annotationCache->erase(Mod);
44 }
45 
46 static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
47  std::lock_guard<sys::Mutex> Guard(Lock);
48  assert(md && "Invalid mdnode for annotation");
49  assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
50  // start index = 1, to skip the global variable key
51  // increment = 2, to skip the value for each property-value pairs
52  for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
53  // property
54  const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
55  assert(prop && "Annotation property not a string");
56 
57  // value
58  ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1));
59  assert(Val && "Value operand not a constant int");
60 
61  std::string keyname = prop->getString().str();
62  if (retval.find(keyname) != retval.end())
63  retval[keyname].push_back(Val->getZExtValue());
64  else {
65  std::vector<unsigned> tmp;
66  tmp.push_back(Val->getZExtValue());
67  retval[keyname] = tmp;
68  }
69  }
70 }
71 
72 static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
73  std::lock_guard<sys::Mutex> Guard(Lock);
74  NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations");
75  if (!NMD)
76  return;
77  key_val_pair_t tmp;
78  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
79  const MDNode *elem = NMD->getOperand(i);
80 
81  GlobalValue *entity =
82  mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0));
83  // entity may be null due to DCE
84  if (!entity)
85  continue;
86  if (entity != gv)
87  continue;
88 
89  // accumulate annotations for entity in tmp
91  }
92 
93  if (tmp.empty()) // no annotations for this gv
94  return;
95 
96  if ((*annotationCache).find(m) != (*annotationCache).end())
97  (*annotationCache)[m][gv] = std::move(tmp);
98  else {
99  global_val_annot_t tmp1;
100  tmp1[gv] = std::move(tmp);
101  (*annotationCache)[m] = std::move(tmp1);
102  }
103 }
104 
105 bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
106  unsigned &retval) {
107  std::lock_guard<sys::Mutex> Guard(Lock);
108  const Module *m = gv->getParent();
109  if ((*annotationCache).find(m) == (*annotationCache).end())
110  cacheAnnotationFromMD(m, gv);
111  else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
112  cacheAnnotationFromMD(m, gv);
113  if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
114  return false;
115  retval = (*annotationCache)[m][gv][prop][0];
116  return true;
117 }
118 
119 bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
120  std::vector<unsigned> &retval) {
121  std::lock_guard<sys::Mutex> Guard(Lock);
122  const Module *m = gv->getParent();
123  if ((*annotationCache).find(m) == (*annotationCache).end())
124  cacheAnnotationFromMD(m, gv);
125  else if ((*annotationCache)[m].find(gv) == (*annotationCache)[m].end())
126  cacheAnnotationFromMD(m, gv);
127  if ((*annotationCache)[m][gv].find(prop) == (*annotationCache)[m][gv].end())
128  return false;
129  retval = (*annotationCache)[m][gv][prop];
130  return true;
131 }
132 
133 bool isTexture(const Value &val) {
134  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
135  unsigned annot;
136  if (findOneNVVMAnnotation(gv, "texture", annot)) {
137  assert((annot == 1) && "Unexpected annotation on a texture symbol");
138  return true;
139  }
140  }
141  return false;
142 }
143 
144 bool isSurface(const Value &val) {
145  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
146  unsigned annot;
147  if (findOneNVVMAnnotation(gv, "surface", annot)) {
148  assert((annot == 1) && "Unexpected annotation on a surface symbol");
149  return true;
150  }
151  }
152  return false;
153 }
154 
155 bool isSampler(const Value &val) {
156  const char *AnnotationName = "sampler";
157 
158  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
159  unsigned annot;
160  if (findOneNVVMAnnotation(gv, AnnotationName, annot)) {
161  assert((annot == 1) && "Unexpected annotation on a sampler symbol");
162  return true;
163  }
164  }
165  if (const Argument *arg = dyn_cast<Argument>(&val)) {
166  const Function *func = arg->getParent();
167  std::vector<unsigned> annot;
168  if (findAllNVVMAnnotation(func, AnnotationName, annot)) {
169  if (is_contained(annot, arg->getArgNo()))
170  return true;
171  }
172  }
173  return false;
174 }
175 
176 bool isImageReadOnly(const Value &val) {
177  if (const Argument *arg = dyn_cast<Argument>(&val)) {
178  const Function *func = arg->getParent();
179  std::vector<unsigned> annot;
180  if (findAllNVVMAnnotation(func, "rdoimage", annot)) {
181  if (is_contained(annot, arg->getArgNo()))
182  return true;
183  }
184  }
185  return false;
186 }
187 
188 bool isImageWriteOnly(const Value &val) {
189  if (const Argument *arg = dyn_cast<Argument>(&val)) {
190  const Function *func = arg->getParent();
191  std::vector<unsigned> annot;
192  if (findAllNVVMAnnotation(func, "wroimage", annot)) {
193  if (is_contained(annot, arg->getArgNo()))
194  return true;
195  }
196  }
197  return false;
198 }
199 
200 bool isImageReadWrite(const Value &val) {
201  if (const Argument *arg = dyn_cast<Argument>(&val)) {
202  const Function *func = arg->getParent();
203  std::vector<unsigned> annot;
204  if (findAllNVVMAnnotation(func, "rdwrimage", annot)) {
205  if (is_contained(annot, arg->getArgNo()))
206  return true;
207  }
208  }
209  return false;
210 }
211 
212 bool isImage(const Value &val) {
214 }
215 
216 bool isManaged(const Value &val) {
217  if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
218  unsigned annot;
219  if (findOneNVVMAnnotation(gv, "managed", annot)) {
220  assert((annot == 1) && "Unexpected annotation on a managed symbol");
221  return true;
222  }
223  }
224  return false;
225 }
226 
227 std::string getTextureName(const Value &val) {
228  assert(val.hasName() && "Found texture variable with no name");
229  return std::string(val.getName());
230 }
231 
232 std::string getSurfaceName(const Value &val) {
233  assert(val.hasName() && "Found surface variable with no name");
234  return std::string(val.getName());
235 }
236 
237 std::string getSamplerName(const Value &val) {
238  assert(val.hasName() && "Found sampler variable with no name");
239  return std::string(val.getName());
240 }
241 
242 bool getMaxNTIDx(const Function &F, unsigned &x) {
243  return findOneNVVMAnnotation(&F, "maxntidx", x);
244 }
245 
246 bool getMaxNTIDy(const Function &F, unsigned &y) {
247  return findOneNVVMAnnotation(&F, "maxntidy", y);
248 }
249 
250 bool getMaxNTIDz(const Function &F, unsigned &z) {
251  return findOneNVVMAnnotation(&F, "maxntidz", z);
252 }
253 
254 bool getReqNTIDx(const Function &F, unsigned &x) {
255  return findOneNVVMAnnotation(&F, "reqntidx", x);
256 }
257 
258 bool getReqNTIDy(const Function &F, unsigned &y) {
259  return findOneNVVMAnnotation(&F, "reqntidy", y);
260 }
261 
262 bool getReqNTIDz(const Function &F, unsigned &z) {
263  return findOneNVVMAnnotation(&F, "reqntidz", z);
264 }
265 
266 bool getMinCTASm(const Function &F, unsigned &x) {
267  return findOneNVVMAnnotation(&F, "minctasm", x);
268 }
269 
270 bool getMaxNReg(const Function &F, unsigned &x) {
271  return findOneNVVMAnnotation(&F, "maxnreg", x);
272 }
273 
275  unsigned x = 0;
276  bool retval = findOneNVVMAnnotation(&F, "kernel", x);
277  if (!retval) {
278  // There is no NVVM metadata, check the calling convention
279  return F.getCallingConv() == CallingConv::PTX_Kernel;
280  }
281  return (x == 1);
282 }
283 
284 bool getAlign(const Function &F, unsigned index, unsigned &align) {
285  std::vector<unsigned> Vs;
286  bool retval = findAllNVVMAnnotation(&F, "align", Vs);
287  if (!retval)
288  return false;
289  for (int i = 0, e = Vs.size(); i < e; i++) {
290  unsigned v = Vs[i];
291  if ((v >> 16) == index) {
292  align = v & 0xFFFF;
293  return true;
294  }
295  }
296  return false;
297 }
298 
299 bool getAlign(const CallInst &I, unsigned index, unsigned &align) {
300  if (MDNode *alignNode = I.getMetadata("callalign")) {
301  for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
302  if (const ConstantInt *CI =
303  mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) {
304  unsigned v = CI->getZExtValue();
305  if ((v >> 16) == index) {
306  align = v & 0xFFFF;
307  return true;
308  }
309  if ((v >> 16) > index) {
310  return false;
311  }
312  }
313  }
314  }
315  return false;
316 }
317 
318 } // namespace llvm
z
return z
Definition: README.txt:14
llvm::cacheAnnotationFromMD
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval)
Definition: NVPTXUtilities.cpp:46
i
i
Definition: README.txt:29
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:254
llvm
Definition: AllocatorList.h:23
llvm::NamedMDNode
A tuple of MDNodes.
Definition: Metadata.h:1386
llvm::NamedMDNode::getNumOperands
unsigned getNumOperands() const
Definition: Metadata.cpp:1114
InstIterator.h
llvm::Function
Definition: Function.h:61
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:200
ManagedStatic.h
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:266
Module.h
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:212
Operator.h
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:133
F
#define F(x, y, z)
Definition: MD5.cpp:56
NVPTX.h
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:155
NVPTXUtilities.h
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
llvm::Lock
static sys::Mutex Lock
Definition: NVPTXUtilities.cpp:39
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:258
Constants.h
llvm::annotationCache
static ManagedStatic< per_module_annot_t > annotationCache
Definition: NVPTXUtilities.cpp:38
llvm::ManagedStatic
ManagedStatic - This transparently changes the behavior of global statics to be lazily constructed on...
Definition: ManagedStatic.h:83
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:237
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:262
llvm::sys::SmartMutex< false >
llvm::StringRef::str
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:244
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::findOneNVVMAnnotation
bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval)
Definition: NVPTXUtilities.cpp:105
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1502
llvm::isImageReadOnly
bool isImageReadOnly(const Value &val)
Definition: NVPTXUtilities.cpp:176
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1547
llvm::NamedMDNode::getOperand
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1118
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
Mutex.h
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:250
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:144
Function.h
x
TODO unsigned x
Definition: README.txt:10
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:284
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:232
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:246
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:188
align
mov r0 ldr L5 sub r0 lr needed for prologue ldmia ip add bx lr r2 The last stmia stores r2 into the address passed in there is one additional stmia that stores and r2 to some stack location The store is dead The llvm gcc generated code looks like align
Definition: README.txt:236
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:41
llvm::Module::getNamedMetadata
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:250
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::CallingConv::PTX_Kernel
@ PTX_Kernel
PTX_Kernel - Call to a PTX kernel.
Definition: CallingConv.h:130
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:242
llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:477
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:227
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:270
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:216
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::findAllNVVMAnnotation
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector< unsigned > &retval)
Definition: NVPTXUtilities.cpp:119