LLVM  16.0.0git
NVPTXUtilities.cpp
Go to the documentation of this file.
1 //===- NVPTXUtilities.cpp - Utility Functions -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains miscellaneous utility functions
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "NVPTXUtilities.h"
14 #include "NVPTX.h"
15 #include "llvm/IR/Constants.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalVariable.h"
18 #include "llvm/IR/InstIterator.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/IR/Operator.h"
21 #include "llvm/Support/Mutex.h"
22 #include <algorithm>
23 #include <cstring>
24 #include <map>
25 #include <mutex>
26 #include <string>
27 #include <vector>
28 
29 namespace llvm {
30 
31 namespace {
32 typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
33 typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
34 
35 struct AnnotationCache {
36  sys::Mutex Lock;
37  std::map<const Module *, global_val_annot_t> Cache;
38 };
39 
40 AnnotationCache &getAnnotationCache() {
41  static AnnotationCache AC;
42  return AC;
43 }
44 } // anonymous namespace
45 
46 void clearAnnotationCache(const Module *Mod) {
47  auto &AC = getAnnotationCache();
48  std::lock_guard<sys::Mutex> Guard(AC.Lock);
49  AC.Cache.erase(Mod);
50 }
51 
52 static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
53  auto &AC = getAnnotationCache();
54  std::lock_guard<sys::Mutex> Guard(AC.Lock);
55  assert(md && "Invalid mdnode for annotation");
56  assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
57  // start index = 1, to skip the global variable key
58  // increment = 2, to skip the value for each property-value pairs
59  for (unsigned i = 1, e = md->getNumOperands(); i != e; i += 2) {
60  // property
61  const MDString *prop = dyn_cast<MDString>(md->getOperand(i));
62  assert(prop && "Annotation property not a string");
63 
64  // value
65  ConstantInt *Val = mdconst::dyn_extract<ConstantInt>(md->getOperand(i + 1));
66  assert(Val && "Value operand not a constant int");
67 
68  std::string keyname = prop->getString().str();
69  if (retval.find(keyname) != retval.end())
70  retval[keyname].push_back(Val->getZExtValue());
71  else {
72  std::vector<unsigned> tmp;
73  tmp.push_back(Val->getZExtValue());
74  retval[keyname] = tmp;
75  }
76  }
77 }
78 
79 static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
80  auto &AC = getAnnotationCache();
81  std::lock_guard<sys::Mutex> Guard(AC.Lock);
82  NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations");
83  if (!NMD)
84  return;
85  key_val_pair_t tmp;
86  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
87  const MDNode *elem = NMD->getOperand(i);
88 
89  GlobalValue *entity =
90  mdconst::dyn_extract_or_null<GlobalValue>(elem->getOperand(0));
91  // entity may be null due to DCE
92  if (!entity)
93  continue;
94  if (entity != gv)
95  continue;
96 
97  // accumulate annotations for entity in tmp
99  }
100 
101  if (tmp.empty()) // no annotations for this gv
102  return;
103 
104  if (AC.Cache.find(m) != AC.Cache.end())
105  AC.Cache[m][gv] = std::move(tmp);
106  else {
107  global_val_annot_t tmp1;
108  tmp1[gv] = std::move(tmp);
109  AC.Cache[m] = std::move(tmp1);
110  }
111 }
112 
113 bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
114  unsigned &retval) {
115  auto &AC = getAnnotationCache();
116  std::lock_guard<sys::Mutex> Guard(AC.Lock);
117  const Module *m = gv->getParent();
118  if (AC.Cache.find(m) == AC.Cache.end())
119  cacheAnnotationFromMD(m, gv);
120  else if (AC.Cache[m].find(gv) == AC.Cache[m].end())
121  cacheAnnotationFromMD(m, gv);
122  if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end())
123  return false;
124  retval = AC.Cache[m][gv][prop][0];
125  return true;
126 }
127 
128 bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
129  std::vector<unsigned> &retval) {
130  auto &AC = getAnnotationCache();
131  std::lock_guard<sys::Mutex> Guard(AC.Lock);
132  const Module *m = gv->getParent();
133  if (AC.Cache.find(m) == AC.Cache.end())
134  cacheAnnotationFromMD(m, gv);
135  else if (AC.Cache[m].find(gv) == AC.Cache[m].end())
136  cacheAnnotationFromMD(m, gv);
137  if (AC.Cache[m][gv].find(prop) == AC.Cache[m][gv].end())
138  return false;
139  retval = AC.Cache[m][gv][prop];
140  return true;
141 }
142 
143 bool isTexture(const Value &val) {
144  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
145  unsigned annot;
146  if (findOneNVVMAnnotation(gv, "texture", annot)) {
147  assert((annot == 1) && "Unexpected annotation on a texture symbol");
148  return true;
149  }
150  }
151  return false;
152 }
153 
154 bool isSurface(const Value &val) {
155  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
156  unsigned annot;
157  if (findOneNVVMAnnotation(gv, "surface", annot)) {
158  assert((annot == 1) && "Unexpected annotation on a surface symbol");
159  return true;
160  }
161  }
162  return false;
163 }
164 
165 bool isSampler(const Value &val) {
166  const char *AnnotationName = "sampler";
167 
168  if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
169  unsigned annot;
170  if (findOneNVVMAnnotation(gv, AnnotationName, annot)) {
171  assert((annot == 1) && "Unexpected annotation on a sampler symbol");
172  return true;
173  }
174  }
175  if (const Argument *arg = dyn_cast<Argument>(&val)) {
176  const Function *func = arg->getParent();
177  std::vector<unsigned> annot;
178  if (findAllNVVMAnnotation(func, AnnotationName, annot)) {
179  if (is_contained(annot, arg->getArgNo()))
180  return true;
181  }
182  }
183  return false;
184 }
185 
186 bool isImageReadOnly(const Value &val) {
187  if (const Argument *arg = dyn_cast<Argument>(&val)) {
188  const Function *func = arg->getParent();
189  std::vector<unsigned> annot;
190  if (findAllNVVMAnnotation(func, "rdoimage", annot)) {
191  if (is_contained(annot, arg->getArgNo()))
192  return true;
193  }
194  }
195  return false;
196 }
197 
198 bool isImageWriteOnly(const Value &val) {
199  if (const Argument *arg = dyn_cast<Argument>(&val)) {
200  const Function *func = arg->getParent();
201  std::vector<unsigned> annot;
202  if (findAllNVVMAnnotation(func, "wroimage", annot)) {
203  if (is_contained(annot, arg->getArgNo()))
204  return true;
205  }
206  }
207  return false;
208 }
209 
210 bool isImageReadWrite(const Value &val) {
211  if (const Argument *arg = dyn_cast<Argument>(&val)) {
212  const Function *func = arg->getParent();
213  std::vector<unsigned> annot;
214  if (findAllNVVMAnnotation(func, "rdwrimage", annot)) {
215  if (is_contained(annot, arg->getArgNo()))
216  return true;
217  }
218  }
219  return false;
220 }
221 
222 bool isImage(const Value &val) {
224 }
225 
226 bool isManaged(const Value &val) {
227  if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
228  unsigned annot;
229  if (findOneNVVMAnnotation(gv, "managed", annot)) {
230  assert((annot == 1) && "Unexpected annotation on a managed symbol");
231  return true;
232  }
233  }
234  return false;
235 }
236 
237 std::string getTextureName(const Value &val) {
238  assert(val.hasName() && "Found texture variable with no name");
239  return std::string(val.getName());
240 }
241 
242 std::string getSurfaceName(const Value &val) {
243  assert(val.hasName() && "Found surface variable with no name");
244  return std::string(val.getName());
245 }
246 
247 std::string getSamplerName(const Value &val) {
248  assert(val.hasName() && "Found sampler variable with no name");
249  return std::string(val.getName());
250 }
251 
252 bool getMaxNTIDx(const Function &F, unsigned &x) {
253  return findOneNVVMAnnotation(&F, "maxntidx", x);
254 }
255 
256 bool getMaxNTIDy(const Function &F, unsigned &y) {
257  return findOneNVVMAnnotation(&F, "maxntidy", y);
258 }
259 
260 bool getMaxNTIDz(const Function &F, unsigned &z) {
261  return findOneNVVMAnnotation(&F, "maxntidz", z);
262 }
263 
264 bool getReqNTIDx(const Function &F, unsigned &x) {
265  return findOneNVVMAnnotation(&F, "reqntidx", x);
266 }
267 
268 bool getReqNTIDy(const Function &F, unsigned &y) {
269  return findOneNVVMAnnotation(&F, "reqntidy", y);
270 }
271 
272 bool getReqNTIDz(const Function &F, unsigned &z) {
273  return findOneNVVMAnnotation(&F, "reqntidz", z);
274 }
275 
276 bool getMinCTASm(const Function &F, unsigned &x) {
277  return findOneNVVMAnnotation(&F, "minctasm", x);
278 }
279 
280 bool getMaxNReg(const Function &F, unsigned &x) {
281  return findOneNVVMAnnotation(&F, "maxnreg", x);
282 }
283 
285  unsigned x = 0;
286  bool retval = findOneNVVMAnnotation(&F, "kernel", x);
287  if (!retval) {
288  // There is no NVVM metadata, check the calling convention
289  return F.getCallingConv() == CallingConv::PTX_Kernel;
290  }
291  return (x == 1);
292 }
293 
294 bool getAlign(const Function &F, unsigned index, unsigned &align) {
295  std::vector<unsigned> Vs;
296  bool retval = findAllNVVMAnnotation(&F, "align", Vs);
297  if (!retval)
298  return false;
299  for (unsigned v : Vs) {
300  if ((v >> 16) == index) {
301  align = v & 0xFFFF;
302  return true;
303  }
304  }
305  return false;
306 }
307 
308 bool getAlign(const CallInst &I, unsigned index, unsigned &align) {
309  if (MDNode *alignNode = I.getMetadata("callalign")) {
310  for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
311  if (const ConstantInt *CI =
312  mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(i))) {
313  unsigned v = CI->getZExtValue();
314  if ((v >> 16) == index) {
315  align = v & 0xFFFF;
316  return true;
317  }
318  if ((v >> 16) > index) {
319  return false;
320  }
321  }
322  }
323  }
324  return false;
325 }
326 
328  return dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
329 }
330 
331 } // namespace llvm
z
return z
Definition: README.txt:14
llvm::cacheAnnotationFromMD
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval)
Definition: NVPTXUtilities.cpp:52
i
i
Definition: README.txt:29
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:264
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::NamedMDNode
A tuple of MDNodes.
Definition: Metadata.h:1588
llvm::NamedMDNode::getNumOperands
unsigned getNumOperands() const
Definition: Metadata.cpp:1212
InstIterator.h
llvm::Function
Definition: Function.h:60
llvm::CallingConv::PTX_Kernel
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
Definition: CallingConv.h:122
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:210
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:276
Module.h
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:222
Operator.h
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:143
F
#define F(x, y, z)
Definition: MD5.cpp:55
NVPTX.h
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:165
NVPTXUtilities.h
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:268
Constants.h
llvm::getMaybeBitcastedCallee
Function * getMaybeBitcastedCallee(const CallBase *CB)
Definition: NVPTXUtilities.cpp:327
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:247
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:272
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:284
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::findOneNVVMAnnotation
bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval)
Definition: NVPTXUtilities.cpp:113
index
splat index
Definition: README_ALTIVEC.txt:181
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
llvm::isImageReadOnly
bool isImageReadOnly(const Value &val)
Definition: NVPTXUtilities.cpp:186
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
llvm::NamedMDNode::getOperand
MDNode * getOperand(unsigned i) const
Definition: Metadata.cpp:1216
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
Mutex.h
llvm::sys::Mutex
SmartMutex< false > Mutex
Mutex - A standard, always enforced mutex.
Definition: Mutex.h:66
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:260
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:154
Function.h
x
TODO unsigned x
Definition: README.txt:10
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1389
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:294
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:242
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:256
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:198
align
mov r0 ldr L5 sub r0 lr needed for prologue ldmia ip add bx lr r2 The last stmia stores r2 into the address passed in there is one additional stmia that stores and r2 to some stack location The store is dead The llvm gcc generated code looks like align
Definition: README.txt:236
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:46
llvm::Module::getNamedMetadata
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:251
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:221
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1473
Mod
Module * Mod
Definition: PassBuilderBindings.cpp:54
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:252
llvm::MDString::getString
StringRef getString() const
Definition: Metadata.cpp:508
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:237
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:280
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:226
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::findAllNVVMAnnotation
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector< unsigned > &retval)
Definition: NVPTXUtilities.cpp:128