24 #include "llvm/IR/IntrinsicsAMDGPU.h"
31 #include "AMDGPUGenCallingConv.inc"
34 "amdgpu-bypass-slow-div",
35 cl::desc(
"Skip 64-bit divide for dynamic 32-bit values"),
44 assert(StoreSize % 32 == 0 &&
"Store size not a multiple of 32");
332 for (
MVT VT : ScalarIntVTs) {
374 for (
MVT VT : VectorIntTypes) {
394 for (
MVT VT : FloatVectorTypes) {
429 for (
int I = 0;
I < RTLIB::UNKNOWN_LIBCALL; ++
I)
484 const auto Flags =
Op.getNode()->getFlags();
485 if (Flags.hasNoSignedZeros())
532 return N->getNumOperands() > 2 || VT ==
MVT::f64;
539 if (isa<MemSDNode>(
N))
542 switch (
N->getOpcode()) {
558 switch (cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue()) {
559 case Intrinsic::amdgcn_interp_p1:
560 case Intrinsic::amdgcn_interp_p2:
561 case Intrinsic::amdgcn_interp_mov:
562 case Intrinsic::amdgcn_interp_p1_f16:
563 case Intrinsic::amdgcn_interp_p2_f16:
581 unsigned NumMayIncreaseSize = 0;
582 MVT VT =
N->getValueType(0).getScalarType().getSimpleVT();
585 for (
const SDNode *U :
N->uses()) {
620 bool ForCodeSize)
const {
646 EVT OldVT =
N->getValueType(0);
653 if (OldSize >= 32 && NewSize < 32 && MN->
getAlign() >=
Align(4) &&
668 return (OldSize < 32);
683 if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
688 CastTy, MMO, &Fast) &&
705 switch (
N->getOpcode()) {
710 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
712 case Intrinsic::amdgcn_readfirstlane:
713 case Intrinsic::amdgcn_readlane:
733 switch (
Op.getOpcode()) {
746 ForCodeSize, Cost,
Depth);
789 unsigned SrcSize =
Source.getSizeInBits();
792 return DestSize < SrcSize && DestSize % 32 == 0 ;
798 unsigned SrcSize =
Source->getScalarSizeInBits();
802 return SrcSize >= 32;
804 return DestSize < SrcSize && DestSize % 32 == 0;
808 unsigned SrcSize = Src->getScalarSizeInBits();
812 return DestSize >= 32;
814 return SrcSize == 32 && DestSize == 64;
861 return CC_AMDGPU_Func;
884 return RetCC_SI_Shader;
890 return RetCC_AMDGPU_Func;
929 const unsigned ExplicitOffset =
ST.getExplicitKernelArgOffset(Fn);
936 unsigned InIndex = 0;
939 const bool IsByRef =
Arg.hasByRefAttr();
940 Type *BaseArgTy =
Arg.getType();
941 Type *MemArgTy = IsByRef ?
Arg.getParamByRefType() : BaseArgTy;
942 Align Alignment =
DL.getValueOrABITypeAlignment(
943 IsByRef ?
Arg.getParamAlign() :
None, MemArgTy);
944 MaxAlign =
std::max(Alignment, MaxAlign);
945 uint64_t AllocSize =
DL.getTypeAllocSize(MemArgTy);
947 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
948 ExplicitArgOffset =
alignTo(ExplicitArgOffset, Alignment) + AllocSize;
961 for (
unsigned Value = 0, NumValues = ValueVTs.size();
1002 assert(MemoryBits % NumElements == 0);
1006 MemoryBits / NumElements);
1026 unsigned PartOffset = 0;
1027 for (
unsigned i = 0;
i != NumRegs; ++
i) {
1029 BasePartOffset + PartOffset,
1068 int ClobberedFI)
const {
1071 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
1076 ArgChains.push_back(Chain);
1080 if (
LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
1082 if (FI->getIndex() < 0) {
1084 int64_t InLastByte = InFirstByte;
1087 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
1088 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
1089 ArgChains.push_back(
SDValue(L, 1));
1110 FuncName =
G->getSymbol();
1112 FuncName =
G->getGlobal()->getName();
1119 for (
unsigned I = 0,
E = CLI.
Ins.size();
I !=
E; ++
I)
1144 switch (
Op.getOpcode()) {
1148 "instruction is not implemented yet!");
1187 switch (
N->getOpcode()) {
1216 Fn,
"local memory global used by non-kernel function",
1234 "Do not know what to do with an non-zero offset");
1249 EVT VT =
Op.getValueType();
1259 for (
const SDUse &U :
Op->ops())
1269 unsigned Start = cast<ConstantSDNode>(
Op.getOperand(1))->getZExtValue();
1270 EVT VT =
Op.getValueType();
1271 EVT SrcVT =
Op.getOperand(0).getValueType();
1281 (Start == 0 || Start == 4))
1296 if (!(
LHS == True &&
RHS == False) && !(
LHS == False &&
RHS == True))
1364 std::pair<SDValue, SDValue>
1376 return std::make_pair(Lo, Hi);
1405 HiVT = NumElts - LoNumElts == 1
1408 return std::make_pair(LoVT, HiVT);
1413 std::pair<SDValue, SDValue>
1415 const EVT &LoVT,
const EVT &HiVT,
1419 N.getValueType().getVectorNumElements() &&
1420 "More vector elements requested than available!");
1426 return std::make_pair(Lo, Hi);
1432 EVT VT =
Op.getValueType();
1445 EVT MemVT =
Load->getMemoryVT();
1450 EVT LoMemVT, HiMemVT;
1462 Load->getChain(), BasePtr, SrcValue, LoMemVT,
1463 BaseAlign,
Load->getMemOperand()->getFlags());
1468 HiMemVT, HiAlign,
Load->getMemOperand()->getFlags());
1492 EVT VT =
Op.getValueType();
1494 EVT MemVT =
Load->getMemoryVT();
1502 if (NumElements != 3 ||
1503 (BaseAlign <
Align(8) &&
1507 assert(NumElements == 3);
1514 Load->getExtensionType(), SL, WideVT,
Load->getChain(), BasePtr, SrcValue,
1515 WideMemVT, BaseAlign,
Load->getMemOperand()->getFlags());
1540 EVT LoMemVT, HiMemVT;
1545 std::tie(Lo, Hi) =
splitVector(Val, SL, LoVT, HiVT, DAG);
1555 DAG.
getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1556 Store->getMemOperand()->getFlags());
1559 HiMemVT, HiAlign,
Store->getMemOperand()->getFlags());
1570 EVT VT =
Op.getValueType();
1577 if (LHSSignBits < 9)
1581 if (RHSSignBits < 9)
1585 unsigned SignBits =
std::min(LHSSignBits, RHSSignBits);
1586 unsigned DivBits = BitSize - SignBits;
1682 EVT VT =
Op.getValueType();
1778 SDValue Add2 = DAG.getBitcast(VT,
1779 DAG.getBuildVector(
MVT::v2i32,
DL, {Add2_Lo, Add2_Hi}));
1792 SDValue Sub1 = DAG.getBitcast(VT,
1793 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub1_Lo, Sub1_Hi}));
1795 SDValue MinusOne = DAG.getConstant(0xffffffffu,
DL, HalfVT);
1796 SDValue C1 = DAG.getSelectCC(
DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
1798 SDValue C2 = DAG.getSelectCC(
DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
1813 SDValue Sub2 = DAG.getBitcast(VT,
1814 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub2_Lo, Sub2_Hi}));
1818 SDValue C4 = DAG.getSelectCC(
DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
1820 SDValue C5 = DAG.getSelectCC(
DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
1833 SDValue Sub3 = DAG.getBitcast(VT,
1834 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub3_Lo, Sub3_Hi}));
1863 const unsigned halfBitWidth = HalfVT.getSizeInBits();
1865 for (
unsigned i = 0;
i < halfBitWidth; ++
i) {
1866 const unsigned bitPos = halfBitWidth -
i - 1;
1867 SDValue POS = DAG.getConstant(bitPos,
DL, HalfVT);
1878 SDValue BIT = DAG.getConstant(1ULL << bitPos,
DL, HalfVT);
1897 EVT VT =
Op.getValueType();
1952 EVT VT =
Op.getValueType();
2012 EVT VT =
Op.getValueType();
2013 auto Flags =
Op->getFlags();
2051 const unsigned FractBits = 52;
2052 const unsigned ExpBits = 11;
2078 const unsigned FractBits = 52;
2152 EVT VT =
Op.getValueType();
2204 double Log2BaseInverted)
const {
2205 EVT VT =
Op.getValueType();
2212 return DAG.
getNode(
ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
2217 EVT VT =
Op.getValueType();
2245 if (Src.getValueType() ==
MVT::i32) {
2391 if (Subtarget->
isGCN())
2434 EVT DestVT =
Op.getValueType();
2436 EVT SrcVT = Src.getValueType();
2470 EVT DestVT =
Op.getValueType();
2473 EVT SrcVT = Src.getValueType();
2513 EVT SrcVT = Src.getValueType();
2597 const unsigned ExpMask = 0x7ff;
2598 const unsigned ExpBiasf64 = 1023;
2599 const unsigned ExpBiasf16 = 15;
2683 unsigned OpOpcode =
Op.getOpcode();
2684 EVT SrcVT = Src.getValueType();
2685 EVT DestVT =
Op.getValueType();
2717 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
2718 MVT VT =
Op.getSimpleValueType();
2732 for (
unsigned I = 0;
I < NElts; ++
I)
2747 EVT VT =
Op.getValueType();
2761 unsigned NewOpcode = Node24->
getOpcode();
2763 unsigned IID = cast<ConstantSDNode>(Node24->
getOperand(0))->getZExtValue();
2765 case Intrinsic::amdgcn_mul_i24:
2768 case Intrinsic::amdgcn_mul_u24:
2771 case Intrinsic::amdgcn_mulhi_i24:
2774 case Intrinsic::amdgcn_mulhi_u24:
2789 if (DemandedLHS || DemandedRHS)
2791 DemandedLHS ? DemandedLHS :
LHS,
2792 DemandedRHS ? DemandedRHS :
RHS);
2804 template <
typename IntTy>
2807 if (
Width + Offset < 32) {
2809 IntTy Result =
static_cast<IntTy
>(Shl) >> (32 -
Width);
2819 if (
M->isVolatile())
2837 if ((Size == 1 || Size == 2 || Size == 4) && !VT.
isVector())
2840 if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
2967 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
2971 EVT SrcVT = Src.getValueType();
2972 if (SrcVT.
bitsGE(ExtVT)) {
2983 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2985 case Intrinsic::amdgcn_mul_i24:
2986 case Intrinsic::amdgcn_mul_u24:
2987 case Intrinsic::amdgcn_mulhi_i24:
2988 case Intrinsic::amdgcn_mulhi_u24:
2990 case Intrinsic::amdgcn_fract:
2991 case Intrinsic::amdgcn_rsq:
2992 case Intrinsic::amdgcn_rcp_legacy:
2993 case Intrinsic::amdgcn_rsq_legacy:
2994 case Intrinsic::amdgcn_rsq_clamp:
2995 case Intrinsic::amdgcn_ldexp: {
2998 return Src.isUndef() ? Src :
SDValue();
3032 EVT VT =
N->getValueType(0);
3039 unsigned RHSVal =
RHS->getZExtValue();
3046 switch (
LHS->getOpcode()) {
3070 EVT XVT =
X.getValueType();
3109 unsigned RHSVal =
RHS->getZExtValue();
3135 auto *
RHS = dyn_cast<ConstantSDNode>(
N->getOperand(1));
3139 EVT VT =
N->getValueType(0);
3141 unsigned ShiftAmt =
RHS->getZExtValue();
3148 if (
auto *
Mask = dyn_cast<ConstantSDNode>(
LHS.getOperand(1))) {
3149 unsigned MaskIdx, MaskLen;
3150 if (
Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
3151 MaskIdx == ShiftAmt) {
3185 EVT VT =
N->getValueType(0);
3190 SDValue Vec = Src.getOperand(0);
3210 if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
3232 EVT SrcVT = Src.getValueType();
3237 SDValue Amt = Src.getOperand(1);
3288 EVT VT =
N->getValueType(0);
3294 if (!
N->isDivergent())
3363 unsigned LoOpcode, HiOpcode;
3386 EVT VT =
N->getValueType(0);
3397 if (Subtarget->
hasSMulHi() && !
N->isDivergent())
3419 EVT VT =
N->getValueType(0);
3430 if (Subtarget->
hasSMulHi() && !
N->isDivergent())
3452 return C->isAllOnes();
3459 unsigned Opc)
const {
3460 EVT VT =
Op.getValueType();
3487 if (!CmpRhs || !CmpRhs->
isZero())
3501 return getFFBX_U32(DAG, CmpLHS, SL, Opc);
3512 return getFFBX_U32(DAG, CmpLHS, SL, Opc);
3530 return DAG.
getNode(
Op, SL, VT, NewSelect);
3547 EVT VT =
N.getValueType();
3570 bool ShouldFoldNeg =
true;
3575 ShouldFoldNeg =
false;
3577 ShouldFoldNeg =
false;
3580 if (ShouldFoldNeg) {
3590 Cond, NewLHS, NewRHS);
3592 return DAG.
getNode(
LHS.getOpcode(), SL, VT, NewSelect);
3609 EVT VT =
N->getValueType(0);
3617 if (
Cond.hasOneUse()) {
3660 if (
C->isZero() && !
C->isNegative())
3693 EVT VT =
N->getValueType(0);
3822 for (
unsigned I = 0;
I < 3; ++
I)
3887 EVT SrcVT = Src.getValueType();