Go to the documentation of this file.
33 typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
34 typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
35 typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
42 std::lock_guard<sys::Mutex> Guard(
Lock);
47 std::lock_guard<sys::Mutex> Guard(
Lock);
48 assert(md &&
"Invalid mdnode for annotation");
55 assert(prop &&
"Annotation property not a string");
59 assert(Val &&
"Value operand not a constant int");
62 if (retval.find(keyname) != retval.end())
65 std::vector<unsigned>
tmp;
67 retval[keyname] =
tmp;
73 std::lock_guard<sys::Mutex> Guard(
Lock);
82 mdconst::dyn_extract_or_null<GlobalValue>(elem->
getOperand(0));
96 if ((*annotationCache).find(m) != (*annotationCache).end())
99 global_val_annot_t
tmp1;
107 std::lock_guard<sys::Mutex> Guard(
Lock);
109 if ((*annotationCache).find(m) == (*annotationCache).end())
115 retval = (*annotationCache)[m][gv][prop][0];
120 std::vector<unsigned> &retval) {
121 std::lock_guard<sys::Mutex> Guard(
Lock);
123 if ((*annotationCache).find(m) == (*annotationCache).end())
129 retval = (*annotationCache)[m][gv][prop];
137 assert((annot == 1) &&
"Unexpected annotation on a texture symbol");
148 assert((annot == 1) &&
"Unexpected annotation on a surface symbol");
156 const char *AnnotationName =
"sampler";
161 assert((annot == 1) &&
"Unexpected annotation on a sampler symbol");
165 if (
const Argument *arg = dyn_cast<Argument>(&
val)) {
167 std::vector<unsigned> annot;
177 if (
const Argument *arg = dyn_cast<Argument>(&
val)) {
179 std::vector<unsigned> annot;
189 if (
const Argument *arg = dyn_cast<Argument>(&
val)) {
191 std::vector<unsigned> annot;
201 if (
const Argument *arg = dyn_cast<Argument>(&
val)) {
203 std::vector<unsigned> annot;
220 assert((annot == 1) &&
"Unexpected annotation on a managed symbol");
228 assert(
val.hasName() &&
"Found texture variable with no name");
229 return std::string(
val.getName());
233 assert(
val.hasName() &&
"Found surface variable with no name");
234 return std::string(
val.getName());
238 assert(
val.hasName() &&
"Found sampler variable with no name");
239 return std::string(
val.getName());
285 std::vector<unsigned> Vs;
289 for (
unsigned v : Vs) {
290 if ((v >> 16) ==
index) {
299 if (
MDNode *alignNode =
I.getMetadata(
"callalign")) {
300 for (
int i = 0,
n = alignNode->getNumOperands();
i <
n;
i++) {
302 mdconst::dyn_extract<ConstantInt>(alignNode->getOperand(
i))) {
303 unsigned v = CI->getZExtValue();
304 if ((v >> 16) ==
index) {
308 if ((v >> 16) >
index) {
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval)
This class represents an incoming formal argument to a Function.
bool getReqNTIDx(const Function &F, unsigned &x)
This is an optimization pass for GlobalISel generic memory operations.
unsigned getNumOperands() const
bool isImageReadWrite(const Value &val)
bool getMinCTASm(const Function &F, unsigned &x)
const_iterator end(StringRef path)
Get end iterator over path.
bool isImage(const Value &val)
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
bool isTexture(const Value &val)
bool isSampler(const Value &val)
unsigned getNumOperands() const
Return number of MDNode operands.
This is the shared class of boolean and integer constants.
bool getReqNTIDy(const Function &F, unsigned &y)
static ManagedStatic< per_module_annot_t > annotationCache
ManagedStatic - This transparently changes the behavior of global statics to be lazily constructed on...
std::string getSamplerName(const Value &val)
bool getReqNTIDz(const Function &F, unsigned &z)
LLVM_NODISCARD std::string str() const
str - Get the contents as an std::string.
const MDOperand & getOperand(unsigned I) const
bool isKernelFunction(const Function &F)
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop, unsigned &retval)
Module * getParent()
Get the module that this global value is contained inside of...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isImageReadOnly(const Value &val)
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
MDNode * getOperand(unsigned i) const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A Module instance is used to store all the information related to an LLVM module.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
bool getMaxNTIDz(const Function &F, unsigned &z)
bool isSurface(const Value &val)
@ PTX_Kernel
PTX_Kernel - Call to a PTX kernel.
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
bool getAlign(const Function &F, unsigned index, unsigned &align)
std::string getSurfaceName(const Value &val)
bool getMaxNTIDy(const Function &F, unsigned &y)
bool isImageWriteOnly(const Value &val)
mov r0 ldr L5 sub r0 lr needed for prologue ldmia ip add bx lr r2 The last stmia stores r2 into the address passed in there is one additional stmia that stores and r2 to some stack location The store is dead The llvm gcc generated code looks like align
void clearAnnotationCache(const Module *Mod)
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
This class represents a function call, abstracting a target machine's calling convention.
bool getMaxNTIDx(const Function &F, unsigned &x)
StringRef getString() const
std::string getTextureName(const Value &val)
bool getMaxNReg(const Function &F, unsigned &x)
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
bool isManaged(const Value &val)
LLVM Value Representation.
bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop, std::vector< unsigned > &retval)