28 #include "llvm/IR/IntrinsicsAMDGPU.h"
31 #ifdef EXPENSIVE_CHECKS
36 #define DEBUG_TYPE "isel"
52 In = stripBitcast(
In);
58 Out =
In.getOperand(0);
69 if (ShiftAmt->getZExtValue() == 16) {
84 if (Idx->isZero() &&
In.getValueSizeInBits() <= 32)
85 return In.getOperand(0);
91 if (Src.getValueType().getSizeInBits() == 32)
92 return stripBitcast(Src);
101 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
105 #ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
115 CodeGenOpt::
Level OptLevel) {
123 EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
127 #ifdef EXPENSIVE_CHECKS
128 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
129 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
131 assert(L->isLCSSAForm(DT));
136 return SelectionDAGISel::runOnMachineFunction(
MF);
139 bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
189 return Subtarget->
getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
193 return Subtarget->
getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
204 #ifdef EXPENSIVE_CHECKS
208 SelectionDAGISel::getAnalysisUsage(AU);
213 MVT VT =
N->getValueType(0).getSimpleVT();
214 if (VT != MVT::v2i16 && VT != MVT::v2f16)
220 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
257 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
258 if (LdLo &&
Lo.hasOneUse()) {
297 bool MadeChange =
false;
303 switch (
N->getOpcode()) {
319 bool AMDGPUDAGToDAGISel::isNoNanSrc(
SDValue N)
const {
324 if (
N->getFlags().hasNoNaNs())
330 bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N,
331 bool Negated)
const {
338 return TII->isInlineConstant(-
C->getAPIntValue());
341 return TII->isInlineConstant(-
C->getValueAPF().bitcastToAPInt());
345 return TII->isInlineConstant(
C->getAPIntValue());
348 return TII->isInlineConstant(
C->getValueAPF().bitcastToAPInt());
359 unsigned OpNo)
const {
360 if (!
N->isMachineOpcode()) {
362 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
363 if (
Reg.isVirtual()) {
369 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
370 return TRI->getPhysRegClass(
Reg);
376 switch (
N->getMachineOpcode()) {
389 case AMDGPU::REG_SEQUENCE: {
390 unsigned RCID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
394 SDValue SubRegOp =
N->getOperand(OpNo + 1);
395 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
405 Ops.push_back(NewChain);
406 for (
unsigned i = 1,
e =
N->getNumOperands();
i !=
e; ++
i)
407 Ops.push_back(
N->getOperand(
i));
420 return glueCopyToOp(
N,
M0,
M0.getValue(1));
423 SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
424 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
454 EVT VT =
N->getValueType(0);
460 if (NumVectorElts == 1) {
466 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
476 bool IsRegSeq =
true;
477 unsigned NOps =
N->getNumOperands();
478 for (
unsigned i = 0;
i < NOps;
i++) {
480 if (isa<RegisterSDNode>(
N->getOperand(
i))) {
484 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(
i)
485 : R600RegisterInfo::getSubRegFromChannel(
i);
486 RegSeqArgs[1 + (2 *
i)] =
N->getOperand(
i);
489 if (NOps != NumVectorElts) {
494 for (
unsigned i = NOps;
i < NumVectorElts; ++
i) {
495 unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(
i)
496 : R600RegisterInfo::getSubRegFromChannel(
i);
497 RegSeqArgs[1 + (2 *
i)] =
SDValue(ImpDef, 0);
498 RegSeqArgs[1 + (2 *
i) + 1] =
509 unsigned int Opc =
N->getOpcode();
510 if (
N->isMachineOpcode()) {
522 N = glueCopyToM0LDSInit(
N);
540 SelectADD_SUB_I64(
N);
552 SelectUADDO_USUBO(
N);
556 SelectFMUL_W_CHAIN(
N);
560 SelectFMA_W_CHAIN(
N);
566 EVT VT =
N->getValueType(0);
580 unsigned RegClassID =
581 SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
588 if (
N->getValueType(0) == MVT::i128) {
592 }
else if (
N->getValueType(0) ==
MVT::i64) {
599 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
600 N->getOperand(1), SubReg1 };
602 N->getValueType(0), Ops));
608 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
613 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
616 Imm =
C->getZExtValue();
661 return SelectMUL_LOHI(
N);
693 { N->getOperand(0), N->getOperand(1) });
701 SelectINTRINSIC_W_CHAIN(
N);
705 SelectINTRINSIC_WO_CHAIN(
N);
709 SelectINTRINSIC_VOID(
N);
717 bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
720 return Term->getMetadata(
"amdgpu.uniform") ||
721 Term->getMetadata(
"structurizecfg.uniform");
724 bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
725 unsigned ShAmtBits)
const {
728 const APInt &
RHS = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
729 if (
RHS.countTrailingOnes() >= ShAmtBits)
759 N1 =
Lo.getOperand(1);
784 return "AMDGPU DAG->DAG Pattern Instruction Selection";
801 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
805 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
809 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
820 SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
829 void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
834 unsigned Opcode =
N->getOpcode();
855 static const unsigned OpcMap[2][2][2] = {
856 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
857 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
858 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
859 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
861 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
862 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
898 void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
904 if (
N->isDivergent()) {
905 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
906 : AMDGPU::V_SUBB_U32_e64;
908 N, Opc,
N->getVTList(),
910 CurDAG->getTargetConstant(0, {},
MVT::i1) });
912 unsigned Opc =
N->getOpcode() ==
ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
913 : AMDGPU::S_SUB_CO_PSEUDO;
914 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
918 void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
923 bool IsVALU =
N->isDivergent();
927 if (UI.getUse().getResNo() == 1) {
936 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
939 N, Opc,
N->getVTList(),
940 {N->getOperand(0), N->getOperand(1),
941 CurDAG->getTargetConstant(0, {},
MVT::i1) });
943 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
944 : AMDGPU::S_USUBO_PSEUDO;
946 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
947 {
N->getOperand(0),
N->getOperand(1)});
951 void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
956 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
957 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
958 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
959 Ops[8] =
N->getOperand(0);
960 Ops[9] =
N->getOperand(4);
965 cast<ConstantSDNode>(Ops[0])->isZero() &&
966 cast<ConstantSDNode>(Ops[2])->isZero() &&
967 cast<ConstantSDNode>(Ops[4])->isZero();
968 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
972 void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
977 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
978 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
979 Ops[6] =
N->getOperand(0);
980 Ops[7] =
N->getOperand(3);
987 void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
989 EVT VT =
N->getValueType(0);
994 = (VT ==
MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
999 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1000 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
1001 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1007 void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1012 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1013 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1015 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1018 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1025 void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1030 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1031 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1033 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1037 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1), Zero,
Clamp};
1074 if (isDSOffsetLegal(N0,
C1->getSExtValue())) {
1083 int64_t ByteOffset =
C->getSExtValue();
1084 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1091 Zero,
Addr.getOperand(1));
1093 if (isDSOffsetLegal(Sub, ByteOffset)) {
1095 Opnds.push_back(Zero);
1096 Opnds.push_back(
Addr.getOperand(1));
1099 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1101 SubOp = AMDGPU::V_SUB_U32_e64;
1123 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1139 bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1141 unsigned Size)
const {
1142 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1160 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1166 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1171 unsigned Size)
const {
1178 unsigned OffsetValue0 =
C1->getZExtValue();
1179 unsigned OffsetValue1 = OffsetValue0 +
Size;
1182 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1191 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1192 unsigned OffsetValue0 =
C->getZExtValue();
1193 unsigned OffsetValue1 = OffsetValue0 +
Size;
1195 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1205 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1207 Opnds.push_back(Zero);
1208 Opnds.push_back(
Addr.getOperand(1));
1209 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1211 SubOp = AMDGPU::V_SUB_U32_e64;
1217 SubOp,
DL, MVT::getIntegerVT(
Size * 8), Opnds);
1227 unsigned OffsetValue0 = CAddr->getZExtValue();
1228 unsigned OffsetValue1 = OffsetValue0 +
Size;
1230 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1268 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1270 N0 =
Addr.getOperand(0);
1318 if (SIInstrInfo::isLegalMUBUFImmOffset(
C1->getZExtValue())) {
1337 SDValue Ptr, Offen, Idxen, Addr64;
1344 if (!SelectMUBUF(
Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1348 if (
C->getSExtValue()) {
1361 std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1364 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1375 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1387 int64_t
Imm = CAddr->getSExtValue();
1388 const int64_t NullPtr =
1391 if (
Imm != NullPtr) {
1394 AMDGPU::V_MOV_B32_e32,
DL,
MVT::i32, HighBits);
1395 VAddr =
SDValue(MovHighBits, 0);
1425 if (SIInstrInfo::isLegalMUBUFImmOffset(
C1->getZExtValue()) &&
1428 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1435 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1444 TRI.getPhysRegClass(cast<RegisterSDNode>(Val.
getOperand(1))->getReg());
1445 return RC &&
TRI.isSGPRClass(RC);
1448 bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1470 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1471 if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->
getZExtValue()))
1476 SOffset =
Addr.getOperand(0);
1477 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1478 SIInstrInfo::isLegalMUBUFImmOffset(CAddr->
getZExtValue())) {
1494 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1498 if (!SelectMUBUF(
Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1501 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1502 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1503 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1505 APInt::getAllOnes(32).getZExtValue();
1520 N = AMDGPUTargetLowering::stripBitcast(
SDValue(
N,0)).getNode();
1523 assert(isa<BuildVectorSDNode>(
N));
1526 dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
1534 int64_t OffsetVal = 0;
1538 bool CanHaveFlatSegmentOffsetBug =
1545 if (isBaseWithConstantOffset64(
Addr, N0, N1)) {
1546 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1549 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1551 OffsetVal = COffsetVal;
1566 std::tie(OffsetVal, RemainderOffset) =
1567 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1570 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1573 if (
Addr.getValueType().getSizeInBits() == 32) {
1575 Opnds.push_back(N0);
1576 Opnds.push_back(AddOffsetLo);
1577 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1579 AddOp = AMDGPU::V_ADD_U32_e64;
1580 Opnds.push_back(Clamp);
1595 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1604 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1639 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1653 bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1658 int64_t ImmOffset = 0;
1664 if (isBaseWithConstantOffset64(
Addr,
LHS,
RHS)) {
1665 int64_t COffsetVal = cast<ConstantSDNode>(
RHS)->getSExtValue();
1671 ImmOffset = COffsetVal;
1672 }
else if (!
LHS->isDivergent()) {
1673 if (COffsetVal > 0) {
1678 int64_t SplitImmOffset, RemainderOffset;
1679 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1684 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1698 unsigned NumLiterals =
1699 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1700 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1711 if (!
LHS->isDivergent()) {
1719 if (!SAddr && !
RHS->isDivergent()) {
1734 isa<ConstantSDNode>(
Addr))
1749 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1752 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1755 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1757 FI->getValueType(0));
1770 if (
Addr->isDivergent())
1775 int64_t COffsetVal = 0;
1778 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1779 SAddr =
Addr.getOperand(0);
1790 int64_t SplitImmOffset, RemainderOffset;
1791 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1794 COffsetVal = SplitImmOffset;
1798 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1811 bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1820 KnownBits SKnown = KnownBits::computeForAddSub(
1822 KnownBits::makeConstant(
APInt(32, ImmOffset)));
1825 return (VMax & 3) + (
SMax & 3) >= 4;
1831 int64_t ImmOffset = 0;
1834 if (isBaseWithConstantOffset64(
Addr,
LHS,
RHS)) {
1835 int64_t COffsetVal = cast<ConstantSDNode>(
RHS)->getSExtValue();
1840 ImmOffset = COffsetVal;
1841 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1845 int64_t SplitImmOffset, RemainderOffset;
1846 std::tie(SplitImmOffset, RemainderOffset)
1851 AMDGPU::V_MOV_B32_e32, SL,
MVT::i32,
1855 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1869 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1872 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1879 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1886 bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1906 SDLoc SL(ByteOffsetNode);
1908 int64_t ByteOffset =
C->getSExtValue();
1911 if (EncodedOffset) {
1922 if (EncodedOffset) {
1946 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
1969 Addr->getFlags().hasNoUnsignedWrap())) {
1974 N0 =
Addr.getOperand(0);
1975 N1 =
Addr.getOperand(1);
1977 assert(N0 && N1 && isa<ConstantSDNode>(N1));
1980 if (SelectSMRDOffset(N1,
Offset,
Imm)) {
1981 SBase = Expand32BitAddress(N0);
2007 return !
Imm && isa<ConstantSDNode>(
Offset);
2014 !isa<ConstantSDNode>(
Offset);
2017 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(
SDValue Addr,
2031 bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue Addr,
2037 C->getZExtValue())) {
2046 bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2068 if (isa<ConstantSDNode>(
Index))
2076 SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2080 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2086 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2096 void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2101 const SDValue &Shl =
N->getOperand(0);
2109 if (0 < BVal && BVal <= CVal && CVal < 32) {
2119 void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2120 switch (
N->getOpcode()) {
2122 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2125 const SDValue &Srl =
N->getOperand(0);
2143 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2161 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2162 SelectS_BFEFromShifts(
N);
2167 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2168 SelectS_BFEFromShifts(
N);
2179 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2183 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2193 bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2195 if (!
N->hasOneUse())
2205 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2219 void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2222 if (
Cond.isUndef()) {
2224 N->getOperand(2),
N->getOperand(0));
2231 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2232 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2251 : AMDGPU::S_AND_B64,
2266 void AMDGPUDAGToDAGISel::SelectFMAD_FMA(
SDNode *
N) {
2267 MVT VT =
N->getSimpleValueType(0);
2268 bool IsFMA =
N->getOpcode() ==
ISD::FMA;
2280 unsigned Src0Mods, Src1Mods, Src2Mods;
2284 bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
2285 bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
2286 bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
2288 assert((IsFMA || !Mode.allFP32Denormals()) &&
2289 "fmad selected with denormals enabled");
2293 if (Sel0 || Sel1 || Sel2) {
2305 IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
2312 void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2315 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2316 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2329 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2330 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2331 N = glueCopyToM0(
N, PtrBase);
2337 N = glueCopyToM0(
N, Ptr);
2345 N->getOperand(
N->getNumOperands() - 1)
2354 case Intrinsic::amdgcn_ds_gws_init:
2355 return AMDGPU::DS_GWS_INIT;
2356 case Intrinsic::amdgcn_ds_gws_barrier:
2357 return AMDGPU::DS_GWS_BARRIER;
2358 case Intrinsic::amdgcn_ds_gws_sema_v:
2359 return AMDGPU::DS_GWS_SEMA_V;
2360 case Intrinsic::amdgcn_ds_gws_sema_br:
2361 return AMDGPU::DS_GWS_SEMA_BR;
2362 case Intrinsic::amdgcn_ds_gws_sema_p:
2363 return AMDGPU::DS_GWS_SEMA_P;
2364 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2365 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2371 void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2372 if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2380 const bool HasVSrc =
N->getNumOperands() == 4;
2381 assert(HasVSrc ||
N->getNumOperands() == 3);
2384 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2395 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2419 glueCopyToM0(
N,
SDValue(M0Base, 0));
2428 Ops.push_back(
N->getOperand(2));
2429 Ops.push_back(OffsetField);
2430 Ops.push_back(Chain);
2436 void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2494 void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2495 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2497 case Intrinsic::amdgcn_ds_append:
2498 case Intrinsic::amdgcn_ds_consume: {
2501 SelectDSAppendConsume(
N, IntrID);
2509 void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2510 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2513 case Intrinsic::amdgcn_wqm:
2516 case Intrinsic::amdgcn_softwqm:
2517 Opcode = AMDGPU::SOFT_WQM;
2519 case Intrinsic::amdgcn_wwm:
2520 case Intrinsic::amdgcn_strict_wwm:
2521 Opcode = AMDGPU::STRICT_WWM;
2523 case Intrinsic::amdgcn_strict_wqm:
2524 Opcode = AMDGPU::STRICT_WQM;
2526 case Intrinsic::amdgcn_interp_p1_f16:
2527 SelectInterpP1F16(
N);
2538 void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2539 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2541 case Intrinsic::amdgcn_ds_gws_init:
2542 case Intrinsic::amdgcn_ds_gws_barrier:
2543 case Intrinsic::amdgcn_ds_gws_sema_v:
2544 case Intrinsic::amdgcn_ds_gws_sema_br:
2545 case Intrinsic::amdgcn_ds_gws_sema_p:
2546 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2547 SelectDS_GWS(
N, IntrID);
2558 bool AllowAbs)
const {
2564 Src = Src.getOperand(0);
2567 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2569 Src = Src.getOperand(0);
2578 if (SelectVOP3ModsImpl(
In, Src, Mods)) {
2589 if (SelectVOP3ModsImpl(
In, Src, Mods,
false)) {
2599 SelectVOP3Mods(
In, Src, SrcMods);
2600 return isNoNanSrc(Src);
2603 bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2615 if (SelectVOP3ModsImpl(
In, Src, Mods,
false)) {
2627 return SelectVINTERPModsImpl(
In, Src, SrcMods,
false);
2632 return SelectVINTERPModsImpl(
In, Src, SrcMods,
true);
2642 return SelectVOP3Mods(
In, Src, SrcMods);
2652 return SelectVOP3BMods(
In, Src, SrcMods);
2673 Src = Src.getOperand(0);
2678 unsigned VecMods = Mods;
2680 SDValue Lo = stripBitcast(Src.getOperand(0));
2681 SDValue Hi = stripBitcast(Src.getOperand(1));
2684 Lo = stripBitcast(
Lo.getOperand(0));
2689 Hi = stripBitcast(
Hi.getOperand(0));
2693 if (isExtractHiElt(
Lo,
Lo))
2696 if (isExtractHiElt(
Hi,
Hi))
2699 unsigned VecSize = Src.getValueSizeInBits();
2700 Lo = stripExtractLoElt(
Lo);
2701 Hi = stripExtractLoElt(
Hi);
2703 if (
Lo.getValueSizeInBits() >
VecSize) {
2709 if (
Hi.getValueSizeInBits() >
VecSize) {
2718 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2730 Lo.getValueType()), 0);
2731 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2732 : AMDGPU::SReg_64RegClassID;
2739 Src.getValueType(), Ops), 0);
2745 if (
VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
2746 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
2747 .bitcastToAPInt().getZExtValue();
2767 return SelectVOP3PMods(
In, Src, SrcMods,
true);
2770 bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(
SDValue In,
SDValue &Src)
const {
2774 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
2777 unsigned SrcSign =
C->getAPIntValue().getZExtValue();
2796 return SelectVOP3Mods(
In, Src, SrcMods);
2801 bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
2802 unsigned &Mods)
const {
2804 SelectVOP3ModsImpl(
In, Src, Mods);
2807 Src = Src.getOperand(0);
2808 assert(Src.getValueType() == MVT::f16);
2809 Src = stripBitcast(Src);
2815 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2830 if (isExtractHiElt(Src, Src)) {
2845 SelectVOP3PMadMixModsImpl(
In, Src, Mods);
2862 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL,
MVT::i32);
2866 if (isExtractHiElt(
In, Src))
2872 bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
2881 bool AllUsesAcceptSReg =
true;
2883 Limit < 10 && U !=
E; ++U, ++Limit) {
2892 if (RC != &AMDGPU::VS_32RegClass) {
2893 AllUsesAcceptSReg =
false;
2895 if (
User->isMachineOpcode()) {
2896 unsigned Opc =
User->getMachineOpcode();
2899 unsigned OpIdx = Desc.
getNumDefs() + U.getOperandNo();
2900 unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
2902 unsigned CommutedOpNo = CommuteIdx1 - Desc.
getNumDefs();
2904 if (CommutedRC == &AMDGPU::VS_32RegClass)
2905 AllUsesAcceptSReg =
true;
2913 if (!AllUsesAcceptSReg)
2917 return !AllUsesAcceptSReg && (Limit < 10);
2920 bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
2921 auto Ld = cast<LoadSDNode>(
N);
2923 return Ld->getAlign() >=
Align(4) &&
2926 !
N->isDivergent()) ||
2929 Ld->isSimple() && !
N->isDivergent() &&
2931 ->isMemOpHasNoClobberedMemOperand(
N)));
2937 bool IsModified =
false;
2944 SDNode *Node = &*Position++;
2950 if (ResNode != Node) {
2957 }
while (IsModified);