30#include "llvm/IR/IntrinsicsAMDGPU.h"
34#ifdef EXPENSIVE_CHECKS
39#define DEBUG_TYPE "amdgpu-isel"
54 In = stripBitcast(In);
60 Out = In.getOperand(0);
71 if (ShiftAmt->getZExtValue() == 16) {
87 return In.getOperand(0);
92 if (Src.getValueType().getSizeInBits() == 32)
93 return stripBitcast(Src);
102 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
106#ifdef EXPENSIVE_CHECKS
111 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
127#ifdef EXPENSIVE_CHECKS
128 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
129 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
131 assert(L->isLCSSAForm(DT));
140bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
206#ifdef EXPENSIVE_CHECKS
215 MVT VT =
N->getValueType(0).getSimpleVT();
216 if (VT != MVT::v2i16 && VT != MVT::v2f16)
222 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
259 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
260 if (LdLo &&
Lo.hasOneUse()) {
299 bool MadeChange =
false;
305 switch (
N->getOpcode()) {
322bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
328 return TII->isInlineConstant(
C->getAPIntValue());
331 return TII->isInlineConstant(
C->getValueAPF());
341 unsigned OpNo)
const {
342 if (!
N->isMachineOpcode()) {
344 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
345 if (
Reg.isVirtual()) {
347 return MRI.getRegClass(Reg);
351 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
352 return TRI->getPhysRegBaseClass(Reg);
358 switch (
N->getMachineOpcode()) {
362 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
363 if (OpIdx >=
Desc.getNumOperands())
365 int RegClass =
Desc.operands()[OpIdx].RegClass;
371 case AMDGPU::REG_SEQUENCE: {
372 unsigned RCID =
N->getConstantOperandVal(0);
376 SDValue SubRegOp =
N->getOperand(OpNo + 1);
386 SmallVector <SDValue, 8> Ops;
388 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
399 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
402 return glueCopyToOp(
N,
M0,
M0.getValue(1));
405SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
406 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
422 AMDGPU::S_MOV_B32,
DL, MVT::i32,
436 EVT VT =
N->getValueType(0);
442 if (NumVectorElts == 1) {
448 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
458 bool IsRegSeq =
true;
459 unsigned NOps =
N->getNumOperands();
460 for (
unsigned i = 0; i < NOps; i++) {
462 if (isa<RegisterSDNode>(
N->getOperand(i))) {
468 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
471 if (NOps != NumVectorElts) {
476 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
479 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
480 RegSeqArgs[1 + (2 * i) + 1] =
491 unsigned int Opc =
N->getOpcode();
492 if (
N->isMachineOpcode()) {
502 N = glueCopyToM0LDSInit(
N);
517 if (
N->getValueType(0) != MVT::i64)
520 SelectADD_SUB_I64(
N);
525 if (
N->getValueType(0) != MVT::i32)
532 SelectUADDO_USUBO(
N);
536 SelectFMUL_W_CHAIN(
N);
540 SelectFMA_W_CHAIN(
N);
546 EVT VT =
N->getValueType(0);
560 unsigned RegClassID =
568 if (
N->getValueType(0) == MVT::i128) {
572 }
else if (
N->getValueType(0) == MVT::i64) {
579 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
580 N->getOperand(1), SubReg1 };
582 N->getValueType(0), Ops));
588 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
593 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
598 Imm =
C->getZExtValue();
645 return SelectMUL_LOHI(
N);
656 if (
N->getValueType(0) != MVT::i32)
673 if (
N->getValueType(0) == MVT::i32) {
676 { N->getOperand(0), N->getOperand(1) });
684 SelectINTRINSIC_W_CHAIN(
N);
688 SelectINTRINSIC_WO_CHAIN(
N);
692 SelectINTRINSIC_VOID(
N);
696 SelectWAVE_ADDRESS(
N);
700 SelectSTACKRESTORE(
N);
708bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
711 return Term->getMetadata(
"amdgpu.uniform") ||
712 Term->getMetadata(
"structurizecfg.uniform");
715bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
716 unsigned ShAmtBits)
const {
719 const APInt &
RHS =
N->getConstantOperandAPInt(1);
720 if (
RHS.countr_one() >= ShAmtBits)
724 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
750 N1 =
Lo.getOperand(1);
767 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
775 return "AMDGPU DAG->DAG Pattern Instruction Selection";
792 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
796 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
800 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
811SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
814 AMDGPU::S_MOV_B32,
DL, MVT::i32,
820void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
825 unsigned Opcode =
N->getOpcode();
835 DL, MVT::i32, LHS, Sub0);
837 DL, MVT::i32, LHS, Sub1);
840 DL, MVT::i32, RHS, Sub0);
842 DL, MVT::i32, RHS, Sub1);
846 static const unsigned OpcMap[2][2][2] = {
847 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
848 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
849 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
850 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
852 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
853 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
878 MVT::i64, RegSequenceArgs);
889void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
895 if (
N->isDivergent()) {
897 : AMDGPU::V_SUBB_U32_e64;
899 N, Opc,
N->getVTList(),
901 CurDAG->getTargetConstant(0, {}, MVT::i1) });
904 : AMDGPU::S_SUB_CO_PSEUDO;
905 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
909void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
914 bool IsVALU =
N->isDivergent();
918 if (UI.getUse().getResNo() == 1) {
927 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
930 N, Opc,
N->getVTList(),
931 {N->getOperand(0), N->getOperand(1),
932 CurDAG->getTargetConstant(0, {}, MVT::i1) });
934 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
935 : AMDGPU::S_USUBO_PSEUDO;
937 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
938 {
N->getOperand(0),
N->getOperand(1)});
942void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
947 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
948 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
949 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
950 Ops[8] =
N->getOperand(0);
951 Ops[9] =
N->getOperand(4);
956 cast<ConstantSDNode>(Ops[0])->isZero() &&
957 cast<ConstantSDNode>(Ops[2])->isZero() &&
958 cast<ConstantSDNode>(Ops[4])->isZero();
959 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
963void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
968 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
969 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
970 Ops[6] =
N->getOperand(0);
971 Ops[7] =
N->getOperand(3);
978void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
980 EVT VT =
N->getValueType(0);
982 assert(VT == MVT::f32 || VT == MVT::f64);
985 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
990 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
991 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
992 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
998void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1003 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1004 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1006 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1009 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1016void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1021 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1022 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1024 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1028 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1033 MVT::i32,
SDValue(Mad, 0), Sub0);
1039 MVT::i32,
SDValue(Mad, 0), Sub1);
1074 int64_t ByteOffset =
C->getSExtValue();
1075 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1082 Zero,
Addr.getOperand(1));
1084 if (isDSOffsetLegal(Sub, ByteOffset)) {
1090 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1092 SubOp = AMDGPU::V_SUB_U32_e64;
1114 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1117 DL, MVT::i32, Zero);
1130bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1132 unsigned Size)
const {
1133 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1135 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1150 Addr->getFlags().hasNoUnsignedWrap()) ||
1157bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1166 auto LHS =
Addr.getOperand(0);
1167 auto RHS =
Addr.getOperand(1);
1174 if (
Addr.getOpcode() ==
ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
1184bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1193 auto LHS =
Addr.getOperand(0);
1194 auto RHS =
Addr.getOperand(1);
1200bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1207 auto *RHSImm = cast<ConstantSDNode>(
Addr.getOperand(1));
1214 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1217 auto LHS =
Base.getOperand(0);
1218 auto RHS =
Base.getOperand(1);
1226 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1232 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1237 unsigned Size)
const {
1245 unsigned OffsetValue1 = OffsetValue0 +
Size;
1248 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1257 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1258 unsigned OffsetValue0 =
C->getZExtValue();
1259 unsigned OffsetValue1 = OffsetValue0 +
Size;
1261 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1271 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1275 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1277 SubOp = AMDGPU::V_SUB_U32_e64;
1293 unsigned OffsetValue0 = CAddr->getZExtValue();
1294 unsigned OffsetValue1 = OffsetValue0 +
Size;
1296 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1336 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1338 N0 =
Addr.getOperand(0);
1397 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1413 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1417 if (
C->getSExtValue()) {
1430std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1433 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1444bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1456 int64_t
Imm = CAddr->getSExtValue();
1457 const int64_t NullPtr =
1460 if (Imm != NullPtr) {
1465 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1466 VAddr =
SDValue(MovHighBits, 0);
1496 if (
TII->isLegalMUBUFImmOffset(C1) &&
1499 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1506 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1514 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1515 if (!Reg.isPhysical())
1517 auto RC =
TRI.getPhysRegBaseClass(Reg);
1518 return RC &&
TRI.isSGPRClass(RC);
1521bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1544 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1550 SOffset =
Addr.getOperand(0);
1551 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1571 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1574 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1575 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1576 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1590bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1597 SOffset = ByteOffsetNode;
1607 assert(isa<BuildVectorSDNode>(
N));
1618 int64_t OffsetVal = 0;
1622 bool CanHaveFlatSegmentOffsetBug =
1629 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1631 isFlatScratchBaseLegal(
Addr))) {
1632 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1635 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1637 OffsetVal = COffsetVal;
1652 std::tie(OffsetVal, RemainderOffset) =
1653 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1656 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1659 if (
Addr.getValueType().getSizeInBits() == 32) {
1663 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1665 AddOp = AMDGPU::V_ADD_U32_e64;
1676 DL, MVT::i32, N0, Sub0);
1678 DL, MVT::i32, N0, Sub1);
1681 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1687 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1690 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1698 MVT::i64, RegSequenceArgs),
1725 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1739bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1744 int64_t ImmOffset = 0;
1750 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1751 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1757 ImmOffset = COffsetVal;
1758 }
else if (!
LHS->isDivergent()) {
1759 if (COffsetVal > 0) {
1764 int64_t SplitImmOffset, RemainderOffset;
1765 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1768 if (isUInt<32>(RemainderOffset)) {
1770 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1784 unsigned NumLiterals =
1785 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1786 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1797 if (!
LHS->isDivergent()) {
1805 if (!SAddr && !
RHS->isDivergent()) {
1820 isa<ConstantSDNode>(
Addr))
1835 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1838 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1841 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1843 FI->getValueType(0));
1856 if (
Addr->isDivergent())
1861 int64_t COffsetVal = 0;
1864 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1865 SAddr =
Addr.getOperand(0);
1876 int64_t SplitImmOffset, RemainderOffset;
1877 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1880 COffsetVal = SplitImmOffset;
1884 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1885 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1897bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1912 return (VMax & 3) + (
SMax & 3) >= 4;
1918 int64_t ImmOffset = 0;
1922 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1923 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1928 ImmOffset = COffsetVal;
1929 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1933 int64_t SplitImmOffset, RemainderOffset;
1934 std::tie(SplitImmOffset, RemainderOffset)
1937 if (isUInt<32>(RemainderOffset)) {
1939 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1943 if (!isFlatScratchBaseLegal(
Addr))
1945 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1959 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1962 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1969 if (OrigAddr !=
Addr) {
1970 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
1973 if (!isFlatScratchBaseLegalSV(OrigAddr))
1977 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1987bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1989 bool Imm32Only,
bool IsBuffer)
const {
1991 "Cannot match both soffset and offset at the same time!");
1999 *SOffset = ByteOffsetNode;
2011 SDLoc SL(ByteOffsetNode);
2015 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2016 std::optional<int64_t> EncodedOffset =
2018 if (EncodedOffset &&
Offset && !Imm32Only) {
2028 if (EncodedOffset &&
Offset && Imm32Only) {
2033 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
2047 if (
Addr.getValueType() != MVT::i32)
2055 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2077 bool IsBuffer)
const {
2079 assert(!Imm32Only && !IsBuffer);
2081 return SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset) &&
2082 SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr);
2088 !
Addr->getFlags().hasNoUnsignedWrap())
2094 N0 =
Addr.getOperand(0);
2095 N1 =
Addr.getOperand(1);
2097 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2101 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer)) {
2105 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer)) {
2114 bool Imm32Only)
const {
2115 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2116 SBase = Expand32BitAddress(SBase);
2120 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2131 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2137 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2143 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2149 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2153 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2157bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2160 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2164bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2168 return N.getValueType() == MVT::i32 &&
2169 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2174bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2196 if (isa<ConstantSDNode>(
Index))
2204SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2208 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2214 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2224void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2229 const SDValue &Shl =
N->getOperand(0);
2237 if (0 < BVal && BVal <= CVal && CVal < 32) {
2247void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2248 switch (
N->getOpcode()) {
2250 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2253 const SDValue &Srl =
N->getOperand(0);
2257 if (Shift && Mask) {
2271 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2278 if (Shift && Mask) {
2289 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2290 SelectS_BFEFromShifts(
N);
2295 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2296 SelectS_BFEFromShifts(
N);
2307 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2311 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2321bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2323 if (!
N->hasOneUse())
2333 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2337 if (VT == MVT::i64) {
2358 auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand(2))->get();
2362 auto Cond = VCMP.getOperand(0);
2374void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2377 if (
Cond.isUndef()) {
2379 N->getOperand(2),
N->getOperand(0));
2386 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2387 bool AndExec = !UseSCCBr;
2388 bool Negate =
false;
2393 auto CC = cast<CondCodeSDNode>(
Cond->getOperand(2))->get();
2397 VCMP.getValueType().getSizeInBits() ==
ST->getWavefrontSize()) {
2406 bool NegatedBallot =
false;
2409 UseSCCBr = !BallotCond->isDivergent();
2410 Negate = Negate ^ NegatedBallot;
2425 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2426 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2427 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2445 : AMDGPU::S_AND_B64,
2460void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2462 !
N->isDivergent()) {
2464 if (Src.getValueType() == MVT::f16) {
2476void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2479 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2480 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2494 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2495 N = glueCopyToM0(
N, PtrBase);
2501 N = glueCopyToM0(
N,
Ptr);
2509 N->getOperand(
N->getNumOperands() - 1)
2518void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2519 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2520 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2521 N->getOperand(5),
N->getOperand(0)};
2531 case Intrinsic::amdgcn_ds_gws_init:
2532 return AMDGPU::DS_GWS_INIT;
2533 case Intrinsic::amdgcn_ds_gws_barrier:
2534 return AMDGPU::DS_GWS_BARRIER;
2535 case Intrinsic::amdgcn_ds_gws_sema_v:
2536 return AMDGPU::DS_GWS_SEMA_V;
2537 case Intrinsic::amdgcn_ds_gws_sema_br:
2538 return AMDGPU::DS_GWS_SEMA_BR;
2539 case Intrinsic::amdgcn_ds_gws_sema_p:
2540 return AMDGPU::DS_GWS_SEMA_P;
2541 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2542 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2548void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2549 if (!Subtarget->
hasGWS() ||
2550 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2558 const bool HasVSrc =
N->getNumOperands() == 4;
2559 assert(HasVSrc ||
N->getNumOperands() == 3);
2562 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2573 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2579 ImmOffset = ConstOffset->getZExtValue();
2597 glueCopyToM0(
N,
SDValue(M0Base, 0));
2614void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2672void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2673 unsigned IntrID =
N->getConstantOperandVal(1);
2675 case Intrinsic::amdgcn_ds_append:
2676 case Intrinsic::amdgcn_ds_consume: {
2677 if (
N->getValueType(0) != MVT::i32)
2679 SelectDSAppendConsume(
N, IntrID);
2682 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2683 SelectDSBvhStackIntrinsic(
N);
2690void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2691 unsigned IntrID =
N->getConstantOperandVal(0);
2692 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
2693 SDNode *ConvGlueNode =
N->getGluedNode();
2700 MVT::Glue,
SDValue(ConvGlueNode, 0));
2702 ConvGlueNode =
nullptr;
2705 case Intrinsic::amdgcn_wqm:
2706 Opcode = AMDGPU::WQM;
2708 case Intrinsic::amdgcn_softwqm:
2709 Opcode = AMDGPU::SOFT_WQM;
2711 case Intrinsic::amdgcn_wwm:
2712 case Intrinsic::amdgcn_strict_wwm:
2713 Opcode = AMDGPU::STRICT_WWM;
2715 case Intrinsic::amdgcn_strict_wqm:
2716 Opcode = AMDGPU::STRICT_WQM;
2718 case Intrinsic::amdgcn_interp_p1_f16:
2719 SelectInterpP1F16(
N);
2721 case Intrinsic::amdgcn_inverse_ballot:
2722 switch (
N->getOperand(1).getValueSizeInBits()) {
2724 Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
2727 Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
2738 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
2745 NewOps.push_back(
SDValue(ConvGlueNode, 0));
2750void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2751 unsigned IntrID =
N->getConstantOperandVal(1);
2753 case Intrinsic::amdgcn_ds_gws_init:
2754 case Intrinsic::amdgcn_ds_gws_barrier:
2755 case Intrinsic::amdgcn_ds_gws_sema_v:
2756 case Intrinsic::amdgcn_ds_gws_sema_br:
2757 case Intrinsic::amdgcn_ds_gws_sema_p:
2758 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2759 SelectDS_GWS(
N, IntrID);
2768void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2772 {N->getOperand(0), Log2WaveSize});
2775void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2792 if (
N->isDivergent()) {
2799 {SrcVal, Log2WaveSize}),
2807bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2809 bool IsCanonicalizing,
2810 bool AllowAbs)
const {
2816 Src = Src.getOperand(0);
2817 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2820 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2821 if (LHS &&
LHS->isZero()) {
2823 Src = Src.getOperand(1);
2827 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2829 Src = Src.getOperand(0);
2838 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2847bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2850 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2859bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2862 if (SelectVOP3ModsImpl(In, Src, Mods,
2872bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2880bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2884 if (SelectVOP3ModsImpl(In, Src, Mods,
2896bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2898 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2901bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2903 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2906bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2913 return SelectVOP3Mods(In, Src, SrcMods);
2916bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2923 return SelectVOP3BMods(In, Src, SrcMods);
2926bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2937bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2938 SDValue &SrcMods,
bool IsDOT)
const {
2945 Src = Src.getOperand(0);
2950 unsigned VecMods = Mods;
2952 SDValue Lo = stripBitcast(Src.getOperand(0));
2953 SDValue Hi = stripBitcast(Src.getOperand(1));
2956 Lo = stripBitcast(
Lo.getOperand(0));
2961 Hi = stripBitcast(
Hi.getOperand(0));
2971 unsigned VecSize = Src.getValueSizeInBits();
2972 Lo = stripExtractLoElt(
Lo);
2973 Hi = stripExtractLoElt(
Hi);
2975 if (
Lo.getValueSizeInBits() > VecSize) {
2977 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2981 if (
Hi.getValueSizeInBits() > VecSize) {
2983 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2987 assert(
Lo.getValueSizeInBits() <= VecSize &&
2988 Hi.getValueSizeInBits() <= VecSize);
2990 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2994 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
2997 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3002 Lo.getValueType()), 0);
3003 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3004 : AMDGPU::SReg_64RegClassID;
3011 Src.getValueType(), Ops), 0);
3017 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
3018 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
3019 .bitcastToAPInt().getZExtValue();
3037bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3039 return SelectVOP3PMods(In, Src, SrcMods,
true);
3042bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(
SDValue In,
SDValue &Src)
const {
3046 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3049 unsigned SrcSign =
C->getZExtValue();
3057bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3060 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3063 unsigned SrcVal =
C->getZExtValue();
3074 unsigned DstRegClass;
3076 switch (Elts.
size()) {
3078 DstRegClass = AMDGPU::VReg_256RegClassID;
3082 DstRegClass = AMDGPU::VReg_128RegClassID;
3086 DstRegClass = AMDGPU::VReg_64RegClassID;
3095 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3107 assert(
"unhandled Reg sequence size" &&
3108 (Elts.
size() == 8 || Elts.
size() == 16));
3112 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3113 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3121 {Elts[i + 1], Elts[i], PackLoLo});
3131 const SDLoc &
DL,
unsigned ElementSize) {
3132 if (ElementSize == 16)
3134 if (ElementSize == 32)
3142 unsigned ElementSize) {
3147 for (
auto El : Elts) {
3150 NegAbsElts.
push_back(El->getOperand(0));
3152 if (Elts.size() != NegAbsElts.
size()) {
3172 std::function<
bool(
SDValue)> ModifierCheck) {
3175 dyn_cast<BuildVectorSDNode>(stripBitcast(BV->
getOperand(i)))) {
3176 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3177 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3178 if (!ModifierCheck(ElF16))
3185bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3191 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3210 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3232bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3239 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3243 if (EltsF16.
empty())
3258 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3264 if (EltsV2F16.
empty())
3281bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3287 if (
auto *BV = dyn_cast<BuildVectorSDNode>(stripBitcast(In))) {
3291 unsigned ModOpcode =
3310bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3311 if (
auto *BV = dyn_cast<BuildVectorSDNode>(In)) {
3314 if (isInlineImmediate(
Splat.getNode())) {
3316 unsigned Imm =
C->getAPIntValue().getSExtValue();
3321 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3330 SDValue SplatSrc32 = stripBitcast(In);
3331 if (
auto *SplatSrc32BV = dyn_cast<BuildVectorSDNode>(SplatSrc32))
3332 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3333 SDValue SplatSrc16 = stripBitcast(Splat32);
3334 if (
auto *SplatSrc16BV = dyn_cast<BuildVectorSDNode>(SplatSrc16))
3337 std::optional<APInt> RawValue;
3339 RawValue =
C->getValueAPF().bitcastToAPInt();
3341 RawValue =
C->getAPIntValue();
3343 if (RawValue.has_value()) {
3344 EVT VT =
In.getValueType().getScalarType();
3350 if (
TII->isInlineConstant(FloatVal)) {
3356 if (
TII->isInlineConstant(RawValue.value())) {
3370bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3389bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3408bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
3416bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
3419 return SelectVOP3Mods(In, Src, SrcMods);
3424bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
3425 unsigned &Mods)
const {
3427 SelectVOP3ModsImpl(In, Src, Mods);
3430 Src = Src.getOperand(0);
3431 assert(Src.getValueType() == MVT::f16);
3432 Src = stripBitcast(Src);
3438 SelectVOP3ModsImpl(Src, Src, ModsTmp);
3465bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
3468 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
3474bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
3477 SelectVOP3PMadMixModsImpl(In, Src, Mods);
3494 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
3504bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3513 bool AllUsesAcceptSReg =
true;
3515 Limit < 10 &&
U != E; ++
U, ++Limit) {
3524 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass) {
3525 AllUsesAcceptSReg =
false;
3527 if (
User->isMachineOpcode()) {
3528 unsigned Opc =
User->getMachineOpcode();
3530 if (
Desc.isCommutable()) {
3531 unsigned OpIdx =
Desc.getNumDefs() +
U.getOperandNo();
3534 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3536 if (CommutedRC == &AMDGPU::VS_32RegClass ||
3537 CommutedRC == &AMDGPU::VS_64RegClass)
3538 AllUsesAcceptSReg =
true;
3546 if (!AllUsesAcceptSReg)
3550 return !AllUsesAcceptSReg && (Limit < 10);
3553bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3554 auto Ld = cast<LoadSDNode>(
N);
3570 ->isMemOpHasNoClobberedMemOperand(
N)));
3576 bool IsModified =
false;
3583 SDNode *Node = &*Position++;
3589 if (ResNode != Node) {
3596 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static MachineSDNode * buildRegSequence(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool isNoUnsignedWrap(SDValue Addr)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< SDValue > &Elts, SDValue &Src, llvm::SelectionDAG *CurDAG, const SDLoc &DL, unsigned ElementSize)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
pre isel intrinsic Pre ISel Intrinsic Lowering
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle16_t & Lo
support::ulittle16_t & Hi
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
unsigned countr_one() const
Count the number of trailing one bits.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasRestrictedSOffset() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
bool isBoolSGPR(SDValue V)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
static const fltSemantics & IEEEhalf() LLVM_READNONE
static const fltSemantics & BFloat() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.