24#include "llvm/IR/IntrinsicsAMDGPU.h"
31#include "AMDGPUGenCallingConv.inc"
34 "amdgpu-bypass-slow-div",
35 cl::desc(
"Skip 64-bit divide for dynamic 32-bit values"),
44 assert(StoreSize % 32 == 0 &&
"Store size not a multiple of 32");
381 for (
MVT VT : ScalarIntVTs) {
424 for (
MVT VT : VectorIntTypes) {
445 for (
MVT VT : FloatVectorTypes) {
492 for (
int I = 0;
I < RTLIB::UNKNOWN_LIBCALL; ++
I)
547 const auto Flags = Op.getNode()->getFlags();
548 if (
Flags.hasNoSignedZeros())
596 return (
N->getNumOperands() > 2 &&
N->getOpcode() !=
ISD::SELECT) ||
612 if (isa<MemSDNode>(
N))
615 switch (
N->getOpcode()) {
630 switch (cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue()) {
631 case Intrinsic::amdgcn_interp_p1:
632 case Intrinsic::amdgcn_interp_p2:
633 case Intrinsic::amdgcn_interp_mov:
634 case Intrinsic::amdgcn_interp_p1_f16:
635 case Intrinsic::amdgcn_interp_p2_f16:
655 unsigned NumMayIncreaseSize = 0;
656 MVT VT =
N->getValueType(0).getScalarType().getSimpleVT();
661 for (
const SDNode *U :
N->uses()) {
696 bool ForCodeSize)
const {
722 EVT OldVT =
N->getValueType(0);
729 if (OldSize >= 32 && NewSize < 32 && MN->
getAlign() >=
Align(4) &&
744 return (OldSize < 32);
759 if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
764 CastTy, MMO, &
Fast) &&
780 switch (
N->getOpcode()) {
785 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
787 case Intrinsic::amdgcn_readfirstlane:
788 case Intrinsic::amdgcn_readlane:
808 switch (Op.getOpcode()) {
817 SDValue Src = Op.getOperand(0);
818 EVT VT = Op.getValueType();
875 unsigned SrcSize = Source.getSizeInBits();
878 return DestSize < SrcSize && DestSize % 32 == 0 ;
884 unsigned SrcSize = Source->getScalarSizeInBits();
888 return SrcSize >= 32;
890 return DestSize < SrcSize && DestSize % 32 == 0;
894 unsigned SrcSize = Src->getScalarSizeInBits();
898 return DestSize >= 32;
900 return SrcSize == 32 && DestSize == 64;
929 "Expected shift op");
937 if (
N->getValueType(0) ==
MVT::i32 &&
N->use_size() == 1 &&
938 (
N->use_begin()->getOpcode() ==
ISD::SRA ||
939 N->use_begin()->getOpcode() ==
ISD::SRL))
946 auto *RHSLd = dyn_cast<LoadSDNode>(
RHS);
947 auto *LHS0 = dyn_cast<LoadSDNode>(
LHS.getOperand(0));
948 auto *LHS1 = dyn_cast<ConstantSDNode>(
LHS.getOperand(1));
949 return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() ==
ISD::ZEXTLOAD &&
950 LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&
955 return !(IsShiftAndLoad(
LHS,
RHS) || IsShiftAndLoad(
RHS,
LHS));
976 return CC_AMDGPU_Func;
999 return RetCC_SI_Shader;
1001 return RetCC_SI_Gfx;
1005 return RetCC_AMDGPU_Func;
1044 const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
1051 unsigned InIndex = 0;
1054 const bool IsByRef =
Arg.hasByRefAttr();
1055 Type *BaseArgTy =
Arg.getType();
1056 Type *MemArgTy = IsByRef ?
Arg.getParamByRefType() : BaseArgTy;
1057 Align Alignment =
DL.getValueOrABITypeAlignment(
1058 IsByRef ?
Arg.getParamAlign() : std::nullopt, MemArgTy);
1059 MaxAlign = std::max(Alignment, MaxAlign);
1060 uint64_t AllocSize =
DL.getTypeAllocSize(MemArgTy);
1062 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
1063 ExplicitArgOffset =
alignTo(ExplicitArgOffset, Alignment) + AllocSize;
1076 for (
unsigned Value = 0, NumValues = ValueVTs.
size();
1114 }
else if (RegisterVT.
isVector()) {
1117 assert(MemoryBits % NumElements == 0);
1121 MemoryBits / NumElements);
1143 unsigned PartOffset = 0;
1144 for (
unsigned i = 0; i != NumRegs; ++i) {
1146 BasePartOffset + PartOffset,
1185 int ClobberedFI)
const {
1188 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
1197 if (
LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
1199 if (FI->getIndex() < 0) {
1201 int64_t InLastByte = InFirstByte;
1204 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
1205 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
1227 FuncName =
G->getSymbol();
1229 FuncName =
G->getGlobal()->getName();
1236 for (
unsigned I = 0,
E = CLI.
Ins.size();
I !=
E; ++
I)
1255 auto Ops = {DAG.
getConstant(0,
SDLoc(), Op.getValueType()), Op.getOperand(0)};
1261 switch (Op.getOpcode()) {
1263 Op->print(
errs(), &DAG);
1265 "instruction is not implemented yet!");
1306 switch (
N->getOpcode()) {
1329 if (std::optional<uint32_t>
Address =
1342 Fn,
"local memory global used by non-kernel function",
1355 return DAG.
getUNDEF(Op.getValueType());
1360 "Do not know what to do with an non-zero offset");
1376 EVT VT = Op.getValueType();
1378 unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();
1379 if (OpBitSize >= 32 && OpBitSize % 32 == 0) {
1380 unsigned NewNumElt = OpBitSize / 32;
1384 for (
const SDUse &U : Op->ops()) {
1390 Args.push_back(NewIn);
1394 NewNumElt * Op.getNumOperands());
1400 for (
const SDUse &U : Op->ops())
1410 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1411 EVT VT = Op.getValueType();
1412 EVT SrcVT = Op.getOperand(0).getValueType();
1422 (Start == 0 || Start == 4))
1427 (Start == 0 || Start == 8))
1517 if ((
LHS == True &&
RHS == False) || (
LHS == False &&
RHS == True))
1536 if (
LHS == NegTrue && CFalse && CRHS) {
1550std::pair<SDValue, SDValue>
1562 return std::pair(
Lo,
Hi);
1591 HiVT = NumElts - LoNumElts == 1
1594 return std::pair(LoVT, HiVT);
1599std::pair<SDValue, SDValue>
1601 const EVT &LoVT,
const EVT &HiVT,
1605 N.getValueType().getVectorNumElements() &&
1606 "More vector elements requested than available!");
1612 return std::pair(
Lo,
Hi);
1618 EVT VT = Op.getValueType();
1630 SDValue BasePtr = Load->getBasePtr();
1631 EVT MemVT = Load->getMemoryVT();
1636 EVT LoMemVT, HiMemVT;
1644 Align BaseAlign = Load->getAlign();
1648 Load->getChain(), BasePtr, SrcValue, LoMemVT,
1649 BaseAlign, Load->getMemOperand()->getFlags());
1652 DAG.
getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
1654 HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
1678 EVT VT = Op.getValueType();
1679 SDValue BasePtr = Load->getBasePtr();
1680 EVT MemVT = Load->getMemoryVT();
1683 Align BaseAlign = Load->getAlign();
1688 if (NumElements != 3 ||
1689 (BaseAlign <
Align(8) &&
1693 assert(NumElements == 3);
1700 Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
1701 WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
1712 SDValue Val = Store->getValue();
1720 EVT MemVT = Store->getMemoryVT();
1721 SDValue Chain = Store->getChain();
1722 SDValue BasePtr = Store->getBasePtr();
1726 EVT LoMemVT, HiMemVT;
1736 Align BaseAlign = Store->getAlign();
1741 DAG.
getTruncStore(Chain, SL,
Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1742 Store->getMemOperand()->getFlags());
1745 HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
1756 EVT VT = Op.getValueType();
1763 if (LHSSignBits < 9)
1767 if (RHSSignBits < 9)
1771 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1772 unsigned DivBits = BitSize - SignBits;
1816 bool UseFmadFtz =
false;
1817 if (Subtarget->
isGCN()) {
1871 EVT VT = Op.getValueType();
1967 SDValue Add2 = DAG.getBitcast(VT,
1968 DAG.getBuildVector(
MVT::v2i32,
DL, {Add2_Lo, Add2_Hi}));
1981 SDValue Sub1 = DAG.getBitcast(VT,
1982 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub1_Lo, Sub1_Hi}));
1984 SDValue MinusOne = DAG.getConstant(0xffffffffu,
DL, HalfVT);
1985 SDValue C1 = DAG.getSelectCC(
DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
1987 SDValue C2 = DAG.getSelectCC(
DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
2002 SDValue Sub2 = DAG.getBitcast(VT,
2003 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub2_Lo, Sub2_Hi}));
2007 SDValue C4 = DAG.getSelectCC(
DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
2009 SDValue C5 = DAG.getSelectCC(
DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
2022 SDValue Sub3 = DAG.getBitcast(VT,
2023 DAG.getBuildVector(
MVT::v2i32,
DL, {Sub3_Lo, Sub3_Hi}));
2052 const unsigned halfBitWidth = HalfVT.getSizeInBits();
2054 for (
unsigned i = 0; i < halfBitWidth; ++i) {
2055 const unsigned bitPos = halfBitWidth - i - 1;
2056 SDValue POS = DAG.getConstant(bitPos,
DL, HalfVT);
2067 SDValue BIT = DAG.getConstant(1ULL << bitPos,
DL, HalfVT);
2074 REM = DAG.getSelectCC(
DL, REM, RHS, REM_sub, REM,
ISD::SETUGE);
2086 EVT VT = Op.getValueType();
2141 EVT VT = Op.getValueType();
2201 EVT VT = Op.getValueType();
2202 auto Flags = Op->getFlags();
2215 SDValue Src = Op.getOperand(0);
2240 const unsigned FractBits = 52;
2241 const unsigned ExpBits = 11;
2255 SDValue Src = Op.getOperand(0);
2267 const unsigned FractBits = 52;
2301 SDValue Src = Op.getOperand(0);
2335 auto VT = Op.getValueType();
2336 auto Arg = Op.getOperand(0u);
2348 EVT VT = Op.getValueType();
2376 SDValue Src = Op.getOperand(0);
2400 double Log2BaseInverted)
const {
2401 EVT VT = Op.getValueType();
2404 SDValue Operand = Op.getOperand(0);
2408 return DAG.
getNode(
ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
2413 EVT VT = Op.getValueType();
2415 SDValue Src = Op.getOperand(0);
2432 SDValue Src = Op.getOperand(0);
2441 if (Src.getValueType() ==
MVT::i32) {
2511 SDValue Src = Op.getOperand(0);
2587 if (Subtarget->
isGCN())
2611 SDValue Src = Op.getOperand(0);
2630 EVT DestVT = Op.getValueType();
2631 SDValue Src = Op.getOperand(0);
2632 EVT SrcVT = Src.getValueType();
2667 EVT DestVT = Op.getValueType();
2669 SDValue Src = Op.getOperand(0);
2670 EVT SrcVT = Src.getValueType();
2688 SDValue Src = Op.getOperand(0);
2710 SDValue Src = Op.getOperand(0);
2711 EVT SrcVT = Src.getValueType();
2741 llvm::bit_cast<double>(UINT64_C( 0x3df0000000000000)), SL,
2744 llvm::bit_cast<double>(UINT64_C( 0xc1f0000000000000)), SL,
2748 llvm::bit_cast<float>(UINT32_C( 0x2f800000)), SL, SrcVT);
2750 llvm::bit_cast<float>(UINT32_C( 0xcf800000)), SL, SrcVT);
2783 SDValue N0 = Op.getOperand(0);
2797 const unsigned ExpMask = 0x7ff;
2798 const unsigned ExpBiasf64 = 1023;
2799 const unsigned ExpBiasf16 = 15;
2882 SDValue Src = Op.getOperand(0);
2883 unsigned OpOpcode = Op.getOpcode();
2884 EVT SrcVT = Src.getValueType();
2885 EVT DestVT = Op.getValueType();
2917 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2918 MVT VT = Op.getSimpleValueType();
2923 SDValue Src = Op.getOperand(0);
2932 for (
unsigned I = 0;
I < NElts; ++
I)
2947 EVT VT = Op.getValueType();
2961 unsigned NewOpcode = Node24->
getOpcode();
2963 unsigned IID = cast<ConstantSDNode>(Node24->
getOperand(0))->getZExtValue();
2965 case Intrinsic::amdgcn_mul_i24:
2968 case Intrinsic::amdgcn_mul_u24:
2971 case Intrinsic::amdgcn_mulhi_i24:
2974 case Intrinsic::amdgcn_mulhi_u24:
2989 if (DemandedLHS || DemandedRHS)
2991 DemandedLHS ? DemandedLHS :
LHS,
2992 DemandedRHS ? DemandedRHS :
RHS);
3004template <
typename IntTy>
3007 if (Width +
Offset < 32) {
3009 IntTy Result =
static_cast<IntTy
>(Shl) >> (32 - Width);
3018 if (
MemSDNode *M = dyn_cast<MemSDNode>(U)) {
3019 if (M->isVolatile())
3167 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
3171 EVT SrcVT = Src.getValueType();
3172 if (SrcVT.
bitsGE(ExtVT)) {
3183 unsigned IID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
3185 case Intrinsic::amdgcn_mul_i24:
3186 case Intrinsic::amdgcn_mul_u24:
3187 case Intrinsic::amdgcn_mulhi_i24:
3188 case Intrinsic::amdgcn_mulhi_u24:
3190 case Intrinsic::amdgcn_fract:
3191 case Intrinsic::amdgcn_rsq:
3192 case Intrinsic::amdgcn_rcp_legacy:
3193 case Intrinsic::amdgcn_rsq_legacy:
3194 case Intrinsic::amdgcn_rsq_clamp:
3195 case Intrinsic::amdgcn_ldexp: {
3198 return Src.isUndef() ? Src :
SDValue();
3232 EVT VT =
N->getValueType(0);
3239 unsigned RHSVal =
RHS->getZExtValue();
3246 switch (
LHS->getOpcode()) {
3270 EVT XVT =
X.getValueType();
3309 unsigned RHSVal =
RHS->getZExtValue();
3335 auto *
RHS = dyn_cast<ConstantSDNode>(
N->getOperand(1));
3339 EVT VT =
N->getValueType(0);
3341 unsigned ShiftAmt =
RHS->getZExtValue();
3348 if (
auto *Mask = dyn_cast<ConstantSDNode>(
LHS.getOperand(1))) {
3349 unsigned MaskIdx, MaskLen;
3350 if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
3351 MaskIdx == ShiftAmt) {
3385 EVT VT =
N->getValueType(0);
3390 SDValue Vec = Src.getOperand(0);
3410 if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
3432 EVT SrcVT = Src.getValueType();
3437 SDValue Amt = Src.getOperand(1);
3444 const unsigned MaxCstSize =
3493 EVT VT =
N->getValueType(0);
3499 if (!
N->isDivergent())
3568 unsigned LoOpcode, HiOpcode;
3591 EVT VT =
N->getValueType(0);