Go to the documentation of this file.
29 template <
class ArgIt>
31 ArgIt ArgBegin, ArgIt ArgEnd,
37 std::vector<Type *> ParamTys;
38 for (ArgIt
I = ArgBegin;
I != ArgEnd; ++
I)
39 ParamTys.push_back((*I)->getType());
67 V =
Builder.CreateOr(Tmp1, Tmp2,
"bswap.i16");
85 Tmp4 =
Builder.CreateOr(Tmp4, Tmp3,
"bswap.or1");
86 Tmp2 =
Builder.CreateOr(Tmp2, Tmp1,
"bswap.or2");
87 V =
Builder.CreateOr(Tmp4, Tmp2,
"bswap.i32");
112 0xFF000000000000ULL),
134 Tmp8 =
Builder.CreateOr(Tmp8, Tmp7,
"bswap.or1");
135 Tmp6 =
Builder.CreateOr(Tmp6, Tmp5,
"bswap.or2");
136 Tmp4 =
Builder.CreateOr(Tmp4, Tmp3,
"bswap.or3");
137 Tmp2 =
Builder.CreateOr(Tmp2, Tmp1,
"bswap.or4");
138 Tmp8 =
Builder.CreateOr(Tmp8, Tmp6,
"bswap.or5");
139 Tmp4 =
Builder.CreateOr(Tmp4, Tmp2,
"bswap.or6");
140 V =
Builder.CreateOr(Tmp8, Tmp4,
"bswap.i64");
151 static const uint64_t MaskValues[6] = {
152 0x5555555555555555ULL, 0x3333333333333333ULL,
153 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
154 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
160 unsigned WordSize = (BitSize + 63) / 64;
163 for (
unsigned n = 0;
n < WordSize; ++
n) {
164 Value *PartValue = V;
165 for (
unsigned i = 1, ct = 0;
i < (BitSize>64 ? 64 : BitSize);
175 Count =
Builder.CreateAdd(PartValue, Count,
"ctpop.part");
192 for (
unsigned i = 1;
i < BitSize;
i <<= 1) {
194 ShVal =
Builder.CreateLShr(V, ShVal,
"ctlz.sh");
195 V =
Builder.CreateOr(V, ShVal,
"ctlz.step");
204 const char *LDname) {
231 switch (
Callee->getIntrinsicID()) {
234 Callee->getName() +
"'!");
239 case Intrinsic::expect: {
246 case Intrinsic::ctpop:
250 case Intrinsic::bswap:
254 case Intrinsic::ctlz:
258 case Intrinsic::cttz: {
262 NotSrc->
setName(Src->getName() +
".not");
264 SrcM1 =
Builder.CreateSub(Src, SrcM1);
270 case Intrinsic::stacksave:
271 case Intrinsic::stackrestore: {
273 errs() <<
"WARNING: this target does not support the llvm.stack"
274 << (
Callee->getIntrinsicID() == Intrinsic::stacksave ?
275 "save" :
"restore") <<
" intrinsic.\n";
277 if (
Callee->getIntrinsicID() == Intrinsic::stacksave)
282 case Intrinsic::get_dynamic_area_offset:
283 errs() <<
"WARNING: this target does not support the custom llvm.get."
284 "dynamic.area.offset. It is being lowered to a constant 0\n";
289 case Intrinsic::returnaddress:
290 case Intrinsic::frameaddress:
291 errs() <<
"WARNING: this target does not support the llvm."
292 << (
Callee->getIntrinsicID() == Intrinsic::returnaddress ?
293 "return" :
"frame") <<
"address intrinsic.\n";
297 case Intrinsic::addressofreturnaddress:
298 errs() <<
"WARNING: this target does not support the "
299 "llvm.addressofreturnaddress intrinsic.\n";
307 case Intrinsic::pcmarker:
309 case Intrinsic::readcyclecounter: {
310 errs() <<
"WARNING: this target does not support the llvm.readcyclecoun"
311 <<
"ter intrinsic. It is being lowered to a constant 0\n";
316 case Intrinsic::dbg_declare:
317 case Intrinsic::dbg_label:
320 case Intrinsic::eh_typeid_for:
325 case Intrinsic::annotation:
326 case Intrinsic::ptr_annotation:
331 case Intrinsic::assume:
332 case Intrinsic::experimental_noalias_scope_decl:
333 case Intrinsic::var_annotation:
347 case Intrinsic::memmove: {
358 case Intrinsic::memset: {
373 case Intrinsic::sqrt: {
377 case Intrinsic::log: {
385 case Intrinsic::log10: {
389 case Intrinsic::exp: {
393 case Intrinsic::exp2: {
397 case Intrinsic::pow: {
401 case Intrinsic::sin: {
405 case Intrinsic::cos: {
425 case Intrinsic::roundeven: {
429 case Intrinsic::copysign: {
433 case Intrinsic::flt_rounds:
438 case Intrinsic::invariant_start:
439 case Intrinsic::lifetime_start:
443 case Intrinsic::invariant_end:
444 case Intrinsic::lifetime_end:
450 "Lowering should have eliminated any uses of the intrinsic call!");
@ FloatTyID
32-bit floating point type
@ DoubleTyID
64-bit floating point type
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
TypeID getTypeID() const
Return the type id for the type.
The instances of the Type class are immutable: once they are created, they are never changed.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
static IntegerType * getInt32Ty(LLVMContext &C)
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
static Type * getDoubleTy(LLVMContext &C)
static uint64_t round(uint64_t Acc, uint64_t Input)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Class to represent integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void setName(const Twine &Name)
Change the name of the value.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static Value * LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctlz of V before the specified instruction IP.
bool isIntegerTy() const
True if this is an instance of IntegerType.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important class for using LLVM in a threaded context.
static CallInst * ReplaceCallWith(const char *NewFn, CallInst *CI, ArgIt ArgBegin, ArgIt ArgEnd, Type *RetTy)
This function is used when we want to lower an intrinsic call to a call of an external function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
bool isVoidTy() const
Return true if this is 'void'.
A Module instance is used to store all the information related to an LLVM module.
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
@ FP128TyID
128-bit floating point type (112-bit significand)
@ PPC_FP128TyID
128-bit floating point type (two 64-bits, PowerPC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
self_iterator getIterator()
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
StringRef getName() const
Return a constant reference to the value's name.
amdgpu Simplify well known AMD library false FunctionCallee Callee
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
static IntegerType * getInt64Ty(LLVMContext &C)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
unsigned arg_size() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static double log2(double V)
static Value * LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower bswap of V before the specified instruction IP.
Value * getArgOperand(unsigned i) const
const BasicBlock * getParent() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
This class represents a function call, abstracting a target machine's calling convention.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
void LowerIntrinsicCall(CallInst *CI)
Replace a call to the specified intrinsic function.
LLVM Value Representation.
@ X86_FP80TyID
80-bit floating point type (X87)
static Type * getFloatTy(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname)