Go to the documentation of this file.
25 #define DEBUG_TYPE "systemztti"
34 bool UsedAsMemCpySource =
false;
37 if (isa<BitCastInst>(
User) || isa<GetElementPtrInst>(
User)) {
42 if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) {
43 UsedAsMemCpySource =
true;
49 return UsedAsMemCpySource;
59 bool OtherUse =
false;
65 dbgs() <<
"++ SZTTI Adding inlining bonus: " << Bonus <<
"\n";);
120 case Instruction::GetElementPtr:
137 case Instruction::ICmp:
148 case Instruction::Sub:
165 case Instruction::Or:
166 case Instruction::Xor:
176 case Instruction::And:
194 case Instruction::Shl:
195 case Instruction::LShr:
196 case Instruction::AShr:
201 case Instruction::UDiv:
202 case Instruction::SDiv:
203 case Instruction::URem:
204 case Instruction::SRem:
205 case Instruction::Trunc:
206 case Instruction::ZExt:
207 case Instruction::SExt:
208 case Instruction::IntToPtr:
209 case Instruction::PtrToInt:
210 case Instruction::BitCast:
211 case Instruction::PHI:
240 case Intrinsic::sadd_with_overflow:
241 case Intrinsic::uadd_with_overflow:
242 case Intrinsic::ssub_with_overflow:
243 case Intrinsic::usub_with_overflow:
252 case Intrinsic::smul_with_overflow:
253 case Intrinsic::umul_with_overflow:
260 case Intrinsic::experimental_stackmap:
264 case Intrinsic::experimental_patchpoint_void:
265 case Intrinsic::experimental_patchpoint_i64:
286 bool HasCall =
false;
289 for (
auto &
I : *
BB) {
290 if (isa<CallInst>(&
I) || isa<InvokeInst>(&
I)) {
295 F->getIntrinsicID() == Intrinsic::memset)
301 if (isa<StoreInst>(&
I)) {
302 Type *MemAccessTy =
I.getOperand(0)->getType();
311 unsigned const NumStoresVal = *NumStores.
getValue();
312 unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
346 return std::tie(
C1.Insns,
C1.NumRegs,
C1.AddRecCost,
347 C1.NumIVMuls,
C1.NumBaseAdds,
348 C1.ScaleCost,
C1.SetupCost) <
355 bool Vector = (ClassID == 1);
380 unsigned NumStridedMemAccesses,
381 unsigned NumPrefetches,
382 bool HasCall)
const {
384 if (NumPrefetches > 16)
389 if (NumStridedMemAccesses > 32 && !HasCall &&
390 (NumMemAccesses - NumStridedMemAccesses) * 32 <= NumStridedMemAccesses)
406 assert(Size > 0 &&
"Element must have non-zero size.");
414 auto *VTy = cast<FixedVectorType>(Ty);
416 assert(WideBits > 0 &&
"Could not compute size of vector");
417 return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
430 Op2Info, Opd1PropInfo,
431 Opd2PropInfo,
Args, CxtI);
445 const unsigned DivInstrCost = 20;
446 const unsigned DivMulSeqCost = 10;
447 const unsigned SDivPow2Cost = 4;
450 Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
451 bool UnsignedDivRem =
452 Opcode == Instruction::UDiv || Opcode == Instruction::URem;
455 bool DivRemConst =
false;
456 bool DivRemConstPow2 =
false;
457 if ((SignedDivRem || UnsignedDivRem) &&
Args.size() == 2) {
460 (
C->getType()->isVectorTy()
461 ? dyn_cast_or_null<const ConstantInt>(
C->getSplatValue())
462 : dyn_cast<const ConstantInt>(
C));
465 DivRemConstPow2 =
true;
475 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
476 Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
480 if (Opcode == Instruction::FRem)
485 if (Opcode == Instruction::Xor) {
488 if (
I->hasOneUse() &&
489 (
I->getOpcode() == Instruction::And ||
490 I->getOpcode() == Instruction::Or ||
491 I->getOpcode() == Instruction::Xor))
495 else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
498 if (
I->hasOneUse() &&
I->getOpcode() == Instruction::Xor)
505 if (Opcode == Instruction::Or)
508 if (Opcode == Instruction::Xor && ScalarBits == 1) {
515 return (SignedDivRem ? SDivPow2Cost : 1);
517 return DivMulSeqCost;
518 if (SignedDivRem || UnsignedDivRem)
522 auto *VTy = cast<FixedVectorType>(Ty);
523 unsigned VF = VTy->getNumElements();
528 if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
529 Opcode == Instruction::AShr) {
534 return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
539 if ((SignedDivRem || UnsignedDivRem) && VF > 4)
550 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
551 Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
552 switch (ScalarBits) {
579 if (Opcode == Instruction::FRem) {
584 if (VF == 2 && ScalarBits == 32)
592 Opd1PropInfo, Opd2PropInfo,
Args, CxtI);
617 return (Index == 0 ? 0 : NumVectors);
624 return NumVectors - 1;
653 "Packing must reduce size of vector type.");
654 assert(cast<FixedVectorType>(SrcTy)->getNumElements() ==
655 cast<FixedVectorType>(DstTy)->getNumElements() &&
656 "Packing should not change number of elements.");
671 unsigned VF = cast<FixedVectorType>(SrcTy)->getNumElements();
672 for (
unsigned P = 0;
P < Log2Diff; ++
P) {
693 "Should only be called with vector types.");
695 unsigned PackCost = 0;
699 if (SrcScalarBits > DstScalarBits)
702 else if (SrcScalarBits < DstScalarBits) {
705 PackCost = Log2Diff * DstNumParts;
707 PackCost += DstNumParts - 1;
716 Type *OpTy =
nullptr;
717 if (
CmpInst *CI = dyn_cast<CmpInst>(
I->getOperand(0)))
718 OpTy = CI->getOperand(0)->getType();
719 else if (
Instruction *LogicI = dyn_cast<Instruction>(
I->getOperand(0)))
720 if (LogicI->getNumOperands() == 2)
721 if (
CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
722 if (isa<CmpInst>(LogicI->getOperand(1)))
723 OpTy = CI0->getOperand(0)->getType();
725 if (OpTy !=
nullptr) {
744 auto *DstVTy = cast<FixedVectorType>(Dst);
745 unsigned VF = DstVTy->getNumElements();
750 if (CmpOpTy !=
nullptr)
752 if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
766 return BaseCost == 0 ? BaseCost : 1;
769 unsigned DstScalarBits = Dst->getScalarSizeInBits();
770 unsigned SrcScalarBits = Src->getScalarSizeInBits();
772 if (!Src->isVectorTy()) {
773 assert (!Dst->isVectorTy());
775 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
776 if (SrcScalarBits >= 32 ||
777 (
I !=
nullptr && isa<LoadInst>(
I->getOperand(0))))
779 return SrcScalarBits > 1 ? 2 : 5 ;
782 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
783 Src->isIntegerTy(1)) {
790 if (Opcode == Instruction::SExt)
791 Cost = (DstScalarBits < 64 ? 3 : 4);
792 if (Opcode == Instruction::ZExt)
803 auto *SrcVecTy = cast<FixedVectorType>(Src);
804 auto *DstVecTy = dyn_cast<FixedVectorType>(Dst);
809 unsigned VF = SrcVecTy->getNumElements();
813 if (Opcode == Instruction::Trunc) {
814 if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
819 if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
820 if (SrcScalarBits >= 8) {
826 unsigned NumSrcVectorOps =
827 (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
828 : (NumDstVectors / 2));
830 return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
832 else if (SrcScalarBits == 1)
836 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
837 Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
843 if (SrcScalarBits == DstScalarBits)
844 return NumDstVectors;
846 if (SrcScalarBits == 1)
854 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH,
CostKind);
856 bool NeedsInserts =
true, NeedsExtracts =
true;
858 if (DstScalarBits == 128 &&
859 (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
860 NeedsInserts =
false;
861 if (SrcScalarBits == 128 &&
862 (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
863 NeedsExtracts =
false;
869 if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
875 if (Opcode == Instruction::FPTrunc) {
876 if (SrcScalarBits == 128)
880 return VF / 2 +
std::max(1U, VF / 4 );
883 if (Opcode == Instruction::FPExt) {
884 if (SrcScalarBits == 32 && DstScalarBits == 64) {
901 unsigned ExtCost = 0;
904 if (!isa<LoadInst>(
Op) && !isa<ConstantInt>(
Op))
920 case Instruction::ICmp: {
924 if (
I !=
nullptr && ScalarBits >= 32)
925 if (
LoadInst *Ld = dyn_cast<LoadInst>(
I->getOperand(0)))
926 if (
const ConstantInt *
C = dyn_cast<ConstantInt>(
I->getOperand(1)))
927 if (!Ld->hasOneUse() && Ld->getParent() ==
I->getParent() &&
943 unsigned VF = cast<FixedVectorType>(ValTy)->getNumElements();
946 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
947 unsigned PredicateExtraCost = 0;
951 case CmpInst::Predicate::ICMP_NE:
952 case CmpInst::Predicate::ICMP_UGE:
953 case CmpInst::Predicate::ICMP_ULE:
954 case CmpInst::Predicate::ICMP_SGE:
955 case CmpInst::Predicate::ICMP_SLE:
956 PredicateExtraCost = 1;
958 case CmpInst::Predicate::FCMP_ONE:
959 case CmpInst::Predicate::FCMP_ORD:
960 case CmpInst::Predicate::FCMP_UEQ:
961 case CmpInst::Predicate::FCMP_UNO:
962 PredicateExtraCost = 2;
974 unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
982 unsigned PackCost = 0;
984 if (CmpOpTy !=
nullptr)
1000 return ((Index % 2 == 0) ? 1 : 0);
1002 if (Opcode == Instruction::ExtractElement) {
1023 unsigned TruncBits = 0;
1024 unsigned SExtBits = 0;
1025 unsigned ZExtBits = 0;
1028 if (isa<TruncInst>(UserI))
1029 TruncBits = UserBits;
1030 else if (isa<SExtInst>(UserI))
1031 SExtBits = UserBits;
1032 else if (isa<ZExtInst>(UserI))
1033 ZExtBits = UserBits;
1035 if (TruncBits || SExtBits || ZExtBits) {
1036 FoldedValue = UserI;
1037 UserI = cast<Instruction>(*UserI->
user_begin());
1040 if ((UserI->
getOpcode() == Instruction::Sub ||
1041 UserI->
getOpcode() == Instruction::SDiv ||
1042 UserI->
getOpcode() == Instruction::UDiv) &&
1047 unsigned LoadOrTruncBits =
1048 ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
1051 case Instruction::Sub:
1052 case Instruction::ICmp:
1053 if (LoadedBits == 32 && ZExtBits == 64)
1057 if (UserI->
getOpcode() != Instruction::ICmp) {
1058 if (LoadedBits == 16 &&
1062 if (LoadOrTruncBits == 16)
1066 case Instruction::SDiv:
1067 if (LoadedBits == 32 && SExtBits == 64)
1070 case Instruction::UDiv:
1071 case Instruction::And:
1072 case Instruction::Or:
1073 case Instruction::Xor:
1085 if (UserI->
getOpcode() == Instruction::ICmp)
1087 if (CI->getValue().isIntN(16))
1089 return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
1097 if (
auto *CI = dyn_cast<CallInst>(
I))
1098 if (
auto *
F = CI->getCalledFunction())
1099 if (
F->getIntrinsicID() == Intrinsic::bswap)
1109 assert(!Src->isVoidTy() &&
"Invalid type");
1124 for (
unsigned i = 0;
i < 2; ++
i) {
1129 LoadInst *OtherLoad = dyn_cast<LoadInst>(OtherOp);
1131 (isa<TruncInst>(OtherOp) || isa<SExtInst>(OtherOp) ||
1132 isa<ZExtInst>(OtherOp)))
1133 OtherLoad = dyn_cast<LoadInst>(OtherOp->getOperand(0));
1150 const Instruction *LdUser = cast<Instruction>(*
I->user_begin());
1156 else if (
const StoreInst *
SI = dyn_cast<StoreInst>(
I)) {
1157 const Value *StoredVal =
SI->getValueOperand();
1163 if (Src->getScalarSizeInBits() == 128)
1178 bool UseMaskForCond,
bool UseMaskForGaps) {
1179 if (UseMaskForCond || UseMaskForGaps)
1182 UseMaskForCond, UseMaskForGaps);
1183 assert(isa<VectorType>(VecTy) &&
1184 "Expect a vector type for interleaved memory op");
1186 unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
1187 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1188 unsigned VF = NumElts / Factor;
1191 unsigned NumPermutes = 0;
1197 BitVector UsedInsts(NumVectorMemOps,
false);
1198 std::vector<BitVector> ValueVecs(Factor,
BitVector(NumVectorMemOps,
false));
1199 for (
unsigned Index : Indices)
1200 for (
unsigned Elt = 0; Elt < VF; ++Elt) {
1201 unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
1203 ValueVecs[Index].
set(Vec);
1205 NumVectorMemOps = UsedInsts.
count();
1207 for (
unsigned Index : Indices) {
1211 unsigned NumSrcVecs = ValueVecs[Index].count();
1213 assert (NumSrcVecs >= NumDstVecs &&
"Expected at least as many sources");
1214 NumPermutes +=
std::max(1U, NumSrcVecs - NumDstVecs);
1220 unsigned NumSrcVecs =
std::min(NumEltsPerVecReg, Factor);
1221 unsigned NumDstVecs = NumVectorMemOps;
1222 assert (NumSrcVecs > 1 &&
"Expected at least two source vectors.");
1223 NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
1227 return NumVectorMemOps + NumPermutes;
This class represents an incoming formal argument to a Function.
bool hasLoadStoreOnCond2() const
This is an optimization pass for GlobalISel generic memory operations.
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I)
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
bool hasOneUse() const
Return true if there is exactly one use of this value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
ScalarTy getFixedSize() const
Represents a single loop in the control flow graph.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getSExtValue() const
Get sign extended value.
Type * getReturnType() const
The main scalar evolution driver.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getBitWidth() const
Return the number of bits in the APInt.
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
So we should use XX3Form_Rcr to implement intrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
user_iterator user_begin()
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
unsigned getNumberOfRegisters(unsigned ClassID) const
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
This is the shared class of boolean and integer constants.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool hasMiscellaneousExtensions3() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
(vector float) vec_cmpeq(*A, *B) C
size_type count() const
count - Returns the number of bits which are set.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
iterator_range< block_iterator > blocks() const
bool isVectorTy() const
True if this is an instance of VectorType.
const HexagonInstrInfo * TII
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
uint64_t getZExtValue() const
Get zero extended value.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned adjustInliningThreshold(const CallBase *CB) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
static TypeSize getFixed(ScalarTy MinVal)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool hasVectorEnhancements2() const
This class is the base class for the comparison instructions.
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Base class of all SIMD vector types.
An instruction for storing to memory.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
This is an important base class in LLVM.
constexpr bool isInt< 32 >(int64_t x)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue)
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy)
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy)
constexpr bool isUInt< 32 >(uint64_t x)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
StandardInstrumentations SI(Debug, VerifyEach)
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2)
bool hasVectorEnhancements1() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Class for arbitrary precision integers.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Type * getType() const
All values are typed, get the type of this value.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
An instruction for reading from memory.
amdgpu Simplify well known AMD library false FunctionCallee Callee
constexpr bool isInt< 16 >(int64_t x)
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const override
bool hasPopulationCount() const
bool hasMiscellaneousExtensions2() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
static TypeSize getScalable(ScalarTy MinVal)
const SystemZInstrInfo * getInstrInfo() const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
This class wraps the llvm.memcpy intrinsic.
unsigned getNumOperands() const
Intrinsic::ID getID() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getNumberOfParts(Type *Tp)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool hasDivRemOp(Type *DataType, bool IsSigned)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
LLVM Value Representation.
bool isFP128Ty() const
Return true if this is 'fp128'.
iterator_range< user_iterator > users()
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)