29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
86 return In.getOperand(0);
91 if (Src.getValueType().getSizeInBits() == 32)
92 return stripBitcast(Src);
101 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
105#ifdef EXPENSIVE_CHECKS
110 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
false)
126#ifdef EXPENSIVE_CHECKS
127 DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
128 LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
130 assert(L->isLCSSAForm(DT));
138bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
203#ifdef EXPENSIVE_CHECKS
212 MVT VT =
N->getValueType(0).getSimpleVT();
213 if (VT != MVT::v2i16 && VT != MVT::v2f16)
219 LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(
Hi));
256 LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(
Lo));
257 if (LdLo &&
Lo.hasOneUse()) {
296 bool MadeChange =
false;
302 switch (
N->getOpcode()) {
318bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N,
319 bool Negated)
const {
326 return TII->isInlineConstant(-
C->getAPIntValue());
329 return TII->isInlineConstant(-
C->getValueAPF().bitcastToAPInt());
333 return TII->isInlineConstant(
C->getAPIntValue());
336 return TII->isInlineConstant(
C->getValueAPF().bitcastToAPInt());
347 unsigned OpNo)
const {
348 if (!
N->isMachineOpcode()) {
350 Register Reg = cast<RegisterSDNode>(
N->getOperand(1))->getReg();
351 if (
Reg.isVirtual()) {
353 return MRI.getRegClass(Reg);
357 =
static_cast<const GCNSubtarget *
>(Subtarget)->getRegisterInfo();
358 return TRI->getPhysRegBaseClass(Reg);
364 switch (
N->getMachineOpcode()) {
368 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
369 if (OpIdx >=
Desc.getNumOperands())
371 int RegClass =
Desc.operands()[OpIdx].RegClass;
377 case AMDGPU::REG_SEQUENCE: {
378 unsigned RCID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
382 SDValue SubRegOp =
N->getOperand(OpNo + 1);
383 unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
392 SmallVector <SDValue, 8> Ops;
394 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
405 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
408 return glueCopyToOp(
N,
M0,
M0.getValue(1));
411SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
412 unsigned AS = cast<MemSDNode>(
N)->getAddressSpace();
428 AMDGPU::S_MOV_B32,
DL, MVT::i32,
442 EVT VT =
N->getValueType(0);
448 if (NumVectorElts == 1) {
454 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
464 bool IsRegSeq =
true;
465 unsigned NOps =
N->getNumOperands();
466 for (
unsigned i = 0; i < NOps; i++) {
468 if (isa<RegisterSDNode>(
N->getOperand(i))) {
474 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
477 if (NOps != NumVectorElts) {
482 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
485 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
486 RegSeqArgs[1 + (2 * i) + 1] =
497 unsigned int Opc =
N->getOpcode();
498 if (
N->isMachineOpcode()) {
508 N = glueCopyToM0LDSInit(
N);
523 if (
N->getValueType(0) != MVT::i64)
526 SelectADD_SUB_I64(
N);
531 if (
N->getValueType(0) != MVT::i32)
538 SelectUADDO_USUBO(
N);
542 SelectFMUL_W_CHAIN(
N);
546 SelectFMA_W_CHAIN(
N);
552 EVT VT =
N->getValueType(0);
566 unsigned RegClassID =
574 if (
N->getValueType(0) == MVT::i128) {
578 }
else if (
N->getValueType(0) == MVT::i64) {
585 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
586 N->getOperand(1), SubReg1 };
588 N->getValueType(0), Ops));
594 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N))
599 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
602 Imm =
C->getZExtValue();
630 uint32_t WidthVal = Width->getZExtValue();
647 return SelectMUL_LOHI(
N);
658 if (
N->getValueType(0) != MVT::i32)
675 if (
N->getValueType(0) == MVT::i32) {
678 { N->getOperand(0), N->getOperand(1) });
686 SelectINTRINSIC_W_CHAIN(
N);
690 SelectINTRINSIC_WO_CHAIN(
N);
694 SelectINTRINSIC_VOID(
N);
698 SelectWAVE_ADDRESS(
N);
702 SelectSTACKRESTORE(
N);
710bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
713 return Term->getMetadata(
"amdgpu.uniform") ||
714 Term->getMetadata(
"structurizecfg.uniform");
717bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
718 unsigned ShAmtBits)
const {
721 const APInt &
RHS = cast<ConstantSDNode>(
N->getOperand(1))->getAPIntValue();
722 if (
RHS.countr_one() >= ShAmtBits)
726 return (LHSKnownZeros | RHS).
countr_one() >= ShAmtBits;
752 N1 =
Lo.getOperand(1);
769 assert(LHS && RHS && isa<ConstantSDNode>(RHS));
777 return "AMDGPU DAG->DAG Pattern Instruction Selection";
794 if ((
C = dyn_cast<ConstantSDNode>(
Addr))) {
798 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(0)))) {
802 (
C = dyn_cast<ConstantSDNode>(
Addr.getOperand(1)))) {
813SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
816 AMDGPU::S_MOV_B32,
DL, MVT::i32,
822void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
827 unsigned Opcode =
N->getOpcode();
837 DL, MVT::i32, LHS, Sub0);
839 DL, MVT::i32, LHS, Sub1);
842 DL, MVT::i32, RHS, Sub0);
844 DL, MVT::i32, RHS, Sub1);
848 static const unsigned OpcMap[2][2][2] = {
849 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
850 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
851 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
852 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
854 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
855 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
880 MVT::i64, RegSequenceArgs);
891void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
897 if (
N->isDivergent()) {
899 : AMDGPU::V_SUBB_U32_e64;
901 N, Opc,
N->getVTList(),
903 CurDAG->getTargetConstant(0, {}, MVT::i1) });
906 : AMDGPU::S_SUB_CO_PSEUDO;
907 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(), {
LHS,
RHS, CI});
911void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
916 bool IsVALU =
N->isDivergent();
920 if (UI.getUse().getResNo() == 1) {
929 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
932 N, Opc,
N->getVTList(),
933 {N->getOperand(0), N->getOperand(1),
934 CurDAG->getTargetConstant(0, {}, MVT::i1) });
936 unsigned Opc =
N->getOpcode() ==
ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
937 : AMDGPU::S_USUBO_PSEUDO;
939 CurDAG->SelectNodeTo(
N, Opc,
N->getVTList(),
940 {
N->getOperand(0),
N->getOperand(1)});
944void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
949 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
950 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
951 SelectVOP3Mods(
N->getOperand(3), Ops[5], Ops[4]);
952 Ops[8] =
N->getOperand(0);
953 Ops[9] =
N->getOperand(4);
958 cast<ConstantSDNode>(Ops[0])->isZero() &&
959 cast<ConstantSDNode>(Ops[2])->isZero() &&
960 cast<ConstantSDNode>(Ops[4])->isZero();
961 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
965void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
970 SelectVOP3Mods0(
N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
971 SelectVOP3Mods(
N->getOperand(2), Ops[3], Ops[2]);
972 Ops[6] =
N->getOperand(0);
973 Ops[7] =
N->getOperand(3);
980void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
982 EVT VT =
N->getValueType(0);
984 assert(VT == MVT::f32 || VT == MVT::f64);
987 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
992 SelectVOP3BMods0(
N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
993 SelectVOP3BMods(
N->getOperand(1), Ops[3], Ops[2]);
994 SelectVOP3BMods(
N->getOperand(2), Ops[5], Ops[4]);
1000void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1005 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1006 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1008 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1011 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1018void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1023 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1024 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1026 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1030 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
Zero, Clamp};
1035 MVT::i32,
SDValue(Mad, 0), Sub0);
1041 MVT::i32,
SDValue(Mad, 0), Sub1);
1076 int64_t ByteOffset =
C->getSExtValue();
1077 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1084 Zero,
Addr.getOperand(1));
1086 if (isDSOffsetLegal(Sub, ByteOffset)) {
1092 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1094 SubOp = AMDGPU::V_SUB_U32_e64;
1116 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1119 DL, MVT::i32, Zero);
1132bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1134 unsigned Size)
const {
1135 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1137 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
1149bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Base,
1162 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 4);
1168 return SelectDSReadWrite2(
Addr,
Base, Offset0, Offset1, 8);
1173 unsigned Size)
const {
1181 unsigned OffsetValue1 = OffsetValue0 +
Size;
1184 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1193 dyn_cast<ConstantSDNode>(
Addr.getOperand(0))) {
1194 unsigned OffsetValue0 =
C->getZExtValue();
1195 unsigned OffsetValue1 = OffsetValue0 +
Size;
1197 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1207 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1,
Size)) {
1211 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1213 SubOp = AMDGPU::V_SUB_U32_e64;
1229 unsigned OffsetValue0 = CAddr->getZExtValue();
1230 unsigned OffsetValue1 = OffsetValue0 +
Size;
1232 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1270 C1 = cast<ConstantSDNode>(
Addr.getOperand(1));
1272 N0 =
Addr.getOperand(0);
1330 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1346 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1350 if (
C->getSExtValue()) {
1363std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1366 auto *FI = dyn_cast<FrameIndexSDNode>(
N);
1377bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1389 int64_t
Imm = CAddr->getSExtValue();
1390 const int64_t NullPtr =
1393 if (Imm != NullPtr) {
1398 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1399 VAddr =
SDValue(MovHighBits, 0);
1432 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1439 std::tie(VAddr, SOffset) = foldFrameIndex(
Addr);
1447 auto Reg = cast<RegisterSDNode>(Val.
getOperand(1))->getReg();
1448 if (!Reg.isPhysical())
1450 auto RC =
TRI.getPhysRegBaseClass(Reg);
1451 return RC &&
TRI.isSGPRClass(RC);
1454bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1476 CAddr = dyn_cast<ConstantSDNode>(
Addr.getOperand(1));
1482 SOffset =
Addr.getOperand(0);
1483 }
else if ((CAddr = dyn_cast<ConstantSDNode>(
Addr)) &&
1504 if (!SelectMUBUF(
Addr,
Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1507 if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
1508 !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
1509 !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
1529 assert(isa<BuildVectorSDNode>(
N));
1540 int64_t OffsetVal = 0;
1544 bool CanHaveFlatSegmentOffsetBug =
1551 if (isBaseWithConstantOffset64(
Addr, N0, N1) &&
1552 isFlatScratchBaseLegal(N0, FlatVariant)) {
1553 int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
1556 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1558 OffsetVal = COffsetVal;
1573 std::tie(OffsetVal, RemainderOffset) =
1574 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1577 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1580 if (
Addr.getValueType().getSizeInBits() == 32) {
1584 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1586 AddOp = AMDGPU::V_ADD_U32_e64;
1597 DL, MVT::i32, N0, Sub0);
1599 DL, MVT::i32, N0, Sub1);
1602 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1608 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1611 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1619 MVT::i64, RegSequenceArgs),
1646 return SelectFlatOffsetImpl(
N,
Addr, VAddr,
Offset,
1660bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
1665 int64_t ImmOffset = 0;
1671 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1672 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1678 ImmOffset = COffsetVal;
1679 }
else if (!
LHS->isDivergent()) {
1680 if (COffsetVal > 0) {
1685 int64_t SplitImmOffset, RemainderOffset;
1686 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1689 if (isUInt<32>(RemainderOffset)) {
1691 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1705 unsigned NumLiterals =
1706 !
TII->isInlineConstant(
APInt(32, COffsetVal & 0xffffffff)) +
1707 !
TII->isInlineConstant(
APInt(32, COffsetVal >> 32));
1718 if (!
LHS->isDivergent()) {
1726 if (!SAddr && !
RHS->isDivergent()) {
1741 isa<ConstantSDNode>(
Addr))
1756 if (
auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
1759 isa<FrameIndexSDNode>(SAddr.
getOperand(0))) {
1762 auto FI = cast<FrameIndexSDNode>(SAddr.
getOperand(0));
1764 FI->getValueType(0));
1777 if (
Addr->isDivergent())
1782 int64_t COffsetVal = 0;
1785 isFlatScratchBaseLegal(
Addr.getOperand(0))) {
1786 COffsetVal = cast<ConstantSDNode>(
Addr.getOperand(1))->getSExtValue();
1787 SAddr =
Addr.getOperand(0);
1798 int64_t SplitImmOffset, RemainderOffset;
1799 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
1802 COffsetVal = SplitImmOffset;
1806 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
1807 :
CurDAG->getTargetConstant(RemainderOffset,
DL,
MVT::i32);
1819bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1833 return (VMax & 3) + (
SMax & 3) >= 4;
1839 int64_t ImmOffset = 0;
1842 if (isBaseWithConstantOffset64(
Addr, LHS, RHS)) {
1843 int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1848 ImmOffset = COffsetVal;
1849 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
1853 int64_t SplitImmOffset, RemainderOffset;
1854 std::tie(SplitImmOffset, RemainderOffset)
1857 if (isUInt<32>(RemainderOffset)) {
1859 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1863 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1865 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1879 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
1882 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
1889 if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
1892 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1902bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDValue ByteOffsetNode,
1904 bool Imm32Only,
bool IsBuffer)
const {
1906 "Cannot match both soffset and offset at the same time!");
1914 *SOffset = ByteOffsetNode;
1926 SDLoc SL(ByteOffsetNode);
1930 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
1931 std::optional<int64_t> EncodedOffset =
1933 if (EncodedOffset &&
Offset && !Imm32Only) {
1943 if (EncodedOffset &&
Offset && Imm32Only) {
1948 if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1962 if (
Addr.getValueType() != MVT::i32)
1970 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
1992 bool IsBuffer)
const {
1994 assert(!Imm32Only && !IsBuffer);
1996 return SelectSMRDBaseOffset(
Addr,
B,
nullptr,
Offset) &&
1997 SelectSMRDBaseOffset(
B, SBase, SOffset,
nullptr);
2003 !
Addr->getFlags().hasNoUnsignedWrap())
2009 N0 =
Addr.getOperand(0);
2010 N1 =
Addr.getOperand(1);
2012 assert(N0 && N1 && isa<ConstantSDNode>(N1));
2016 if (SelectSMRDOffset(N1, SOffset,
Offset, Imm32Only, IsBuffer)) {
2020 if (SelectSMRDOffset(N0, SOffset,
Offset, Imm32Only, IsBuffer)) {
2029 bool Imm32Only)
const {
2030 if (SelectSMRDBaseOffset(
Addr, SBase, SOffset,
Offset, Imm32Only)) {
2031 SBase = Expand32BitAddress(SBase);
2035 if (
Addr.getValueType() == MVT::i32 &&
Offset && !SOffset) {
2046 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset);
2052 return SelectSMRD(
Addr, SBase,
nullptr, &
Offset,
2058 return SelectSMRD(
Addr, SBase, &SOffset,
nullptr);
2064 return SelectSMRD(
Addr, SBase, &SOffset, &
Offset);
2068 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2072bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2075 return SelectSMRDOffset(
N,
nullptr, &
Offset,
2079bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2083 return N.getValueType() == MVT::i32 &&
2084 SelectSMRDBaseOffset(
N, SOffset,
nullptr,
2089bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2111 if (isa<ConstantSDNode>(
Index))
2119SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2123 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2129 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2139void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2144 const SDValue &Shl =
N->getOperand(0);
2152 if (0 < BVal && BVal <= CVal && CVal < 32) {
2162void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2163 switch (
N->getOpcode()) {
2165 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2168 const SDValue &Srl =
N->getOperand(0);
2172 if (Shift && Mask) {
2186 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2193 if (Shift && Mask) {
2204 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2205 SelectS_BFEFromShifts(
N);
2210 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2211 SelectS_BFEFromShifts(
N);
2222 const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
2226 unsigned Width = cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2236bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2238 if (!
N->hasOneUse())
2248 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2252 if (VT == MVT::i64) {
2262void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2265 if (
Cond.isUndef()) {
2267 N->getOperand(2),
N->getOperand(0));
2274 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2275 unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2276 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2294 : AMDGPU::S_AND_B64,
2309void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2311 !
N->isDivergent()) {
2313 if (Src.getValueType() == MVT::f16) {
2325void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2328 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2329 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2342 const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
2343 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2344 N = glueCopyToM0(
N, PtrBase);
2350 N = glueCopyToM0(
N,
Ptr);
2358 N->getOperand(
N->getNumOperands() - 1)
2367void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N) {
2368 unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2369 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
2370 N->getOperand(5),
N->getOperand(0)};
2380 case Intrinsic::amdgcn_ds_gws_init:
2381 return AMDGPU::DS_GWS_INIT;
2382 case Intrinsic::amdgcn_ds_gws_barrier:
2383 return AMDGPU::DS_GWS_BARRIER;
2384 case Intrinsic::amdgcn_ds_gws_sema_v:
2385 return AMDGPU::DS_GWS_SEMA_V;
2386 case Intrinsic::amdgcn_ds_gws_sema_br:
2387 return AMDGPU::DS_GWS_SEMA_BR;
2388 case Intrinsic::amdgcn_ds_gws_sema_p:
2389 return AMDGPU::DS_GWS_SEMA_P;
2390 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2391 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
2397void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
2398 if (!Subtarget->
hasGWS() ||
2399 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
2407 const bool HasVSrc =
N->getNumOperands() == 4;
2408 assert(HasVSrc ||
N->getNumOperands() == 3);
2411 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
2422 if (
ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
2428 ImmOffset = ConstOffset->getZExtValue();
2446 glueCopyToM0(
N,
SDValue(M0Base, 0));
2463void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
2521void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
2522 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2524 case Intrinsic::amdgcn_ds_append:
2525 case Intrinsic::amdgcn_ds_consume: {
2526 if (
N->getValueType(0) != MVT::i32)
2528 SelectDSAppendConsume(
N, IntrID);
2531 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2532 SelectDSBvhStackIntrinsic(
N);
2539void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
2540 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(0))->getZExtValue();
2543 case Intrinsic::amdgcn_wqm:
2544 Opcode = AMDGPU::WQM;
2546 case Intrinsic::amdgcn_softwqm:
2547 Opcode = AMDGPU::SOFT_WQM;
2549 case Intrinsic::amdgcn_wwm:
2550 case Intrinsic::amdgcn_strict_wwm:
2551 Opcode = AMDGPU::STRICT_WWM;
2553 case Intrinsic::amdgcn_strict_wqm:
2554 Opcode = AMDGPU::STRICT_WQM;
2556 case Intrinsic::amdgcn_interp_p1_f16:
2557 SelectInterpP1F16(
N);
2559 case Intrinsic::amdgcn_inverse_ballot:
2560 switch (
N->getOperand(1).getValueSizeInBits()) {
2562 Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
2565 Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
2580void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
2581 unsigned IntrID = cast<ConstantSDNode>(
N->getOperand(1))->getZExtValue();
2583 case Intrinsic::amdgcn_ds_gws_init:
2584 case Intrinsic::amdgcn_ds_gws_barrier:
2585 case Intrinsic::amdgcn_ds_gws_sema_v:
2586 case Intrinsic::amdgcn_ds_gws_sema_br:
2587 case Intrinsic::amdgcn_ds_gws_sema_p:
2588 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2589 SelectDS_GWS(
N, IntrID);
2598void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
2602 {N->getOperand(0), Log2WaveSize});
2605void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
2622 if (
N->isDivergent()) {
2629 {SrcVal, Log2WaveSize}),
2637bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
2639 bool IsCanonicalizing,
2640 bool AllowAbs)
const {
2646 Src = Src.getOperand(0);
2647 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
2650 auto *
LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
2651 if (LHS &&
LHS->isZero()) {
2653 Src = Src.getOperand(1);
2657 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
2659 Src = Src.getOperand(0);
2668 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
2677bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
2680 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
2689bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
2692 if (SelectVOP3ModsImpl(In, Src, Mods,
2702bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
2710bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
2714 if (SelectVOP3ModsImpl(In, Src, Mods,
2726bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
2728 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
2731bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
2733 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
2736bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
2743 return SelectVOP3Mods(In, Src, SrcMods);
2746bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
2753 return SelectVOP3BMods(In, Src, SrcMods);
2756bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
2767bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
2768 SDValue &SrcMods,
bool IsDOT)
const {
2775 Src = Src.getOperand(0);
2780 unsigned VecMods = Mods;
2782 SDValue Lo = stripBitcast(Src.getOperand(0));
2783 SDValue Hi = stripBitcast(Src.getOperand(1));
2786 Lo = stripBitcast(
Lo.getOperand(0));
2791 Hi = stripBitcast(
Hi.getOperand(0));
2801 unsigned VecSize = Src.getValueSizeInBits();
2802 Lo = stripExtractLoElt(
Lo);
2803 Hi = stripExtractLoElt(
Hi);
2805 if (
Lo.getValueSizeInBits() > VecSize) {
2807 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2811 if (
Hi.getValueSizeInBits() > VecSize) {
2813 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0,
SDLoc(In),
2817 assert(
Lo.getValueSizeInBits() <= VecSize &&
2818 Hi.getValueSizeInBits() <= VecSize);
2820 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
2824 if (VecSize == 32 || VecSize ==
Lo.getValueSizeInBits()) {
2827 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
2832 Lo.getValueType()), 0);
2833 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
2834 : AMDGPU::SReg_64RegClassID;
2841 Src.getValueType(), Ops), 0);
2847 if (VecSize == 64 &&
Lo ==
Hi && isa<ConstantFPSDNode>(
Lo)) {
2848 uint64_t Lit = cast<ConstantFPSDNode>(
Lo)->getValueAPF()
2849 .bitcastToAPInt().getZExtValue();
2867bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
2869 return SelectVOP3PMods(In, Src, SrcMods,
true);
2872bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(
SDValue In,
SDValue &Src)
const {
2876 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
2879 unsigned SrcSign =
C->getZExtValue();
2887bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
2890 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
2893 unsigned SrcVal =
C->getZExtValue();
2901bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
2909bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
2912 return SelectVOP3Mods(In, Src, SrcMods);
2917bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
2918 unsigned &Mods)
const {
2920 SelectVOP3ModsImpl(In, Src, Mods);
2923 Src = Src.getOperand(0);
2924 assert(Src.getValueType() == MVT::f16);
2925 Src = stripBitcast(Src);
2931 SelectVOP3ModsImpl(Src, Src, ModsTmp);
2958bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
2961 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
2967bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
2970 SelectVOP3PMadMixModsImpl(In, Src, Mods);
2987 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
2997bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
3006 bool AllUsesAcceptSReg =
true;
3008 Limit < 10 &&
U !=
E; ++
U, ++Limit) {
3017 if (RC != &AMDGPU::VS_32RegClass) {
3018 AllUsesAcceptSReg =
false;
3020 if (
User->isMachineOpcode()) {
3021 unsigned Opc =
User->getMachineOpcode();
3023 if (
Desc.isCommutable()) {
3024 unsigned OpIdx =
Desc.getNumDefs() +
U.getOperandNo();
3027 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
3029 if (CommutedRC == &AMDGPU::VS_32RegClass)
3030 AllUsesAcceptSReg =
true;
3038 if (!AllUsesAcceptSReg)
3042 return !AllUsesAcceptSReg && (Limit < 10);
3045bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
3046 auto Ld = cast<LoadSDNode>(
N);
3051 return Ld->getAlign() >=
Align(4) &&
3058 ->isMemOpHasNoClobberedMemOperand(
N)));
3064 bool IsModified =
false;
3071 SDNode *Node = &*Position++;
3077 if (ResNode != Node) {
3084 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue matchZExtFromI32(SDValue Op)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
pre isel intrinsic Pre ISel Intrinsic Lowering
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPUDAGToDAGISel()=delete
bool matchLoadD16FromBuildVector(SDNode *N) const
static bool isUniformMMO(const MachineMemOperand *MMO)
unsigned getWavefrontSizeLog2() const
bool hasInv2PiInlineImm() const
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static bool EnableLateStructurizeCFG
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
uint64_t getZExtValue() const
Get zero extended value.
unsigned countr_one() const
Count the number of trailing one bits.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
const SIInstrInfo * getInstrInfo() const override
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
const SIRegisterInfo * getRegisterInfo() const override
bool hasDOTOpSelHazard() const
bool d16PreservesUnusedBits() const
bool hasFlatSegmentOffsetBug() const
bool getScalarizeGlobalBehavior() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
Describe properties that are true of each instruction in the target description file.
const Triple & getTargetTriple() const
static MVT getIntegerVT(unsigned BitWidth)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
const SDValue & getOperand(unsigned Num) const
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static bool isLegalMUBUFImmOffset(unsigned Imm)
static unsigned getMaxMUBUFImmOffset()
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getRegClass(unsigned RCID) const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getTargetFrameIndex(int FI, EVT VT)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
unsigned getID() const
Return the register class ID number.
ArchType getArch() const
Get the parsed architecture type of this triple.
LLVM Value Representation.
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ CLAMP
CLAMP value between 0.0 and 1.0.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr const char32_t SBase
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
CodeGenOptLevel
Code generation optimization level.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
unsigned M0(unsigned Val)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Description of the encoding of one expression Op.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS)
Compute known bits resulting from adding LHS and RHS.
static unsigned getSubRegFromChannel(unsigned Channel)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.