29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
135 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
140#ifdef EXPENSIVE_CHECKS
145 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
193 case ISD::FNEARBYINT:
194 case ISD::FROUNDEVEN:
200 case AMDGPUISD::FRACT:
201 case AMDGPUISD::CLAMP:
202 case AMDGPUISD::COS_HW:
203 case AMDGPUISD::SIN_HW:
204 case AMDGPUISD::FMIN3:
205 case AMDGPUISD::FMAX3:
206 case AMDGPUISD::FMED3:
207 case AMDGPUISD::FMAD_FTZ:
210 case AMDGPUISD::RCP_IFLAG:
220 case AMDGPUISD::DIV_FIXUP:
230#ifdef EXPENSIVE_CHECKS
234 assert(L->isLCSSAForm(DT));
243#ifdef EXPENSIVE_CHECKS
251 assert(Subtarget->d16PreservesUnusedBits());
252 MVT VT =
N->getValueType(0).getSimpleVT();
253 if (VT != MVT::v2i16 && VT != MVT::v2f16)
275 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
278 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
284 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
297 if (LdLo &&
Lo.hasOneUse()) {
303 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
306 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
311 TiedIn =
CurDAG->getNode(ISD::BITCAST,
SDLoc(
N), VT, TiedIn);
318 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
331 if (!Subtarget->d16PreservesUnusedBits())
336 bool MadeChange =
false;
337 while (Position !=
CurDAG->allnodes_begin()) {
342 switch (
N->getOpcode()) {
353 CurDAG->RemoveDeadNodes();
359bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
365 return TII->isInlineConstant(
C->getAPIntValue());
368 return TII->isInlineConstant(
C->getValueAPF());
378 unsigned OpNo)
const {
379 if (!
N->isMachineOpcode()) {
382 if (
Reg.isVirtual()) {
384 return MRI.getRegClass(
Reg);
387 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
388 return TRI->getPhysRegBaseClass(
Reg);
394 switch (
N->getMachineOpcode()) {
396 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
397 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
398 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
402 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
406 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
408 case AMDGPU::REG_SEQUENCE: {
409 unsigned RCID =
N->getConstantOperandVal(0);
410 const TargetRegisterClass *SuperRC =
411 Subtarget->getRegisterInfo()->getRegClass(RCID);
413 SDValue SubRegOp =
N->getOperand(OpNo + 1);
415 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
424 Ops.push_back(NewChain);
425 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
426 Ops.push_back(
N->getOperand(i));
429 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
436 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
439 return glueCopyToOp(
N,
M0,
M0.getValue(1));
442SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
445 if (Subtarget->ldsRequiresM0Init())
447 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
449 MachineFunction &
MF =
CurDAG->getMachineFunction();
450 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
452 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
459 SDNode *
Lo =
CurDAG->getMachineNode(
460 AMDGPU::S_MOV_B32,
DL, MVT::i32,
462 SDNode *
Hi =
CurDAG->getMachineNode(
463 AMDGPU::S_MOV_B32,
DL, MVT::i32,
466 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
470 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
473SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
478 uint32_t LHSVal, RHSVal;
482 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
484 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
492 EVT VT =
N->getValueType(0);
496 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
498 if (NumVectorElts == 1) {
499 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
504 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
505 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
508 bool AllConst =
true;
510 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
518 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
521 C |= Val << (EltSize *
I);
526 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
527 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
533 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
540 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
541 bool IsRegSeq =
true;
542 unsigned NOps =
N->getNumOperands();
543 for (
unsigned i = 0; i < NOps; i++) {
551 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
552 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
554 if (NOps != NumVectorElts) {
559 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
562 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
563 RegSeqArgs[1 + (2 * i) + 1] =
570 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
574 EVT VT =
N->getValueType(0);
578 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
592 Mask[0] < 4 && Mask[1] < 4);
594 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
595 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
596 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
597 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
600 Src0SubReg = Src1SubReg;
602 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
607 Src1SubReg = Src0SubReg;
609 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
619 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
620 Src1SubReg == AMDGPU::sub0) {
636 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
637 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
640 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
641 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
651 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
653 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
656 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
657 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
658 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
659 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
663 unsigned int Opc =
N->getOpcode();
664 if (
N->isMachineOpcode()) {
672 N = glueCopyToM0LDSInit(
N);
687 if (
N->getValueType(0) != MVT::i64)
690 SelectADD_SUB_I64(
N);
695 if (
N->getValueType(0) != MVT::i32)
702 SelectUADDO_USUBO(
N);
705 case AMDGPUISD::FMUL_W_CHAIN: {
706 SelectFMUL_W_CHAIN(
N);
709 case AMDGPUISD::FMA_W_CHAIN: {
710 SelectFMA_W_CHAIN(
N);
716 EVT VT =
N->getValueType(0);
733 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
745 if (
N->getValueType(0) == MVT::i128) {
746 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
747 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
748 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
749 }
else if (
N->getValueType(0) == MVT::i64) {
750 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
751 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
752 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
756 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
757 N->getOperand(1), SubReg1 };
759 N->getValueType(0),
Ops));
765 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
766 Subtarget->has64BitLiterals())
771 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
776 Imm =
C->getZExtValue();
785 case AMDGPUISD::BFE_I32:
786 case AMDGPUISD::BFE_U32: {
812 case AMDGPUISD::DIV_SCALE: {
823 return SelectMUL_LOHI(
N);
834 if (
N->getValueType(0) != MVT::i32)
845 case AMDGPUISD::CVT_PKRTZ_F16_F32:
846 case AMDGPUISD::CVT_PKNORM_I16_F32:
847 case AMDGPUISD::CVT_PKNORM_U16_F32:
848 case AMDGPUISD::CVT_PK_U16_U32:
849 case AMDGPUISD::CVT_PK_I16_I32: {
851 if (
N->getValueType(0) == MVT::i32) {
852 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
854 { N->getOperand(0), N->getOperand(1) });
862 SelectINTRINSIC_W_CHAIN(
N);
866 SelectINTRINSIC_WO_CHAIN(
N);
870 SelectINTRINSIC_VOID(
N);
874 SelectWAVE_ADDRESS(
N);
877 case ISD::STACKRESTORE: {
878 SelectSTACKRESTORE(
N);
886bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
889 return Term->getMetadata(
"amdgpu.uniform") ||
890 Term->getMetadata(
"structurizecfg.uniform");
893bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
894 unsigned ShAmtBits)
const {
897 const APInt &
RHS =
N->getConstantOperandAPInt(1);
898 if (
RHS.countr_one() >= ShAmtBits)
928 N1 =
Lo.getOperand(1);
938 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
953 return "AMDGPU DAG->DAG Pattern Instruction Selection";
963#ifdef EXPENSIVE_CHECKS
969 for (
auto &L : LI.getLoopsInPreorder())
970 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
992 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
994 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1008SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1010 SDNode *Mov =
CurDAG->getMachineNode(
1011 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1012 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1017void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1022 unsigned Opcode =
N->getOpcode();
1031 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1032 DL, MVT::i32,
LHS, Sub0);
1033 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1034 DL, MVT::i32,
LHS, Sub1);
1036 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1037 DL, MVT::i32,
RHS, Sub0);
1038 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1039 DL, MVT::i32,
RHS, Sub1);
1041 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1043 static const unsigned OpcMap[2][2][2] = {
1044 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1045 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1046 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1047 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1049 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1050 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1053 if (!ConsumeCarry) {
1055 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1058 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1065 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1068 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1075 MVT::i64, RegSequenceArgs);
1086void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1091 if (
N->isDivergent()) {
1093 : AMDGPU::V_SUBB_U32_e64;
1095 N,
Opc,
N->getVTList(),
1097 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1100 : AMDGPU::S_SUB_CO_PSEUDO;
1101 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1105void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1110 bool IsVALU =
N->isDivergent();
1112 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1114 if (UI.getUse().getResNo() == 1) {
1115 if (UI->isMachineOpcode()) {
1116 if (UI->getMachineOpcode() !=
1117 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1130 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1133 N,
Opc,
N->getVTList(),
1134 {N->getOperand(0), N->getOperand(1),
1135 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1137 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1139 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1140 {N->getOperand(0), N->getOperand(1)});
1144void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1148 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1149 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1150 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1151 Ops[8] =
N->getOperand(0);
1152 Ops[9] =
N->getOperand(4);
1156 bool UseFMAC = Subtarget->hasDLInsts() &&
1160 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1161 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1164void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1168 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1169 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1170 Ops[6] =
N->getOperand(0);
1171 Ops[7] =
N->getOperand(3);
1173 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1178void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1179 EVT VT =
N->getValueType(0);
1181 assert(VT == MVT::f32 || VT == MVT::f64);
1184 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1189 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1190 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1191 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1197void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1201 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1202 if (Subtarget->hasMADIntraFwdBug())
1203 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1204 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1205 else if (UseNoCarry)
1206 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1208 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1211 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1215 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1226void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1231 if (Subtarget->hasMadU64U32NoCarry()) {
1232 VTList =
CurDAG->getVTList(MVT::i64);
1233 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1235 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1236 if (Subtarget->hasMADIntraFwdBug()) {
1237 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1238 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1240 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1247 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1249 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1250 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1251 MVT::i32,
SDValue(Mad, 0), Sub0);
1255 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1256 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1257 MVT::i32,
SDValue(Mad, 0), Sub1);
1267 if (!
Base || Subtarget->hasUsableDSOffset() ||
1268 Subtarget->unsafeDSOffsetFoldingEnabled())
1279 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1292 int64_t ByteOffset =
C->getSExtValue();
1293 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1302 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1308 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1309 if (Subtarget->hasAddNoCarry()) {
1310 SubOp = AMDGPU::V_SUB_U32_e64;
1312 CurDAG->getTargetConstant(0, {}, MVT::i1));
1315 MachineSDNode *MachineSub =
1316 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1332 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1334 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1335 DL, MVT::i32, Zero);
1337 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1344 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1348bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1350 unsigned Size)
const {
1351 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1356 if (!
Base || Subtarget->hasUsableDSOffset() ||
1357 Subtarget->unsafeDSOffsetFoldingEnabled())
1375bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1381 if (Subtarget->hasSignedScratchOffsets())
1391 ConstantSDNode *ImmOp =
nullptr;
1402bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1408 if (Subtarget->hasSignedScratchOffsets())
1418bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1432 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1435 auto LHS =
Base.getOperand(0);
1436 auto RHS =
Base.getOperand(1);
1444 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1450 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1455 unsigned Size)
const {
1458 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1463 unsigned OffsetValue1 = OffsetValue0 +
Size;
1466 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1468 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1469 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1474 if (
const ConstantSDNode *
C =
1476 unsigned OffsetValue0 =
C->getZExtValue();
1477 unsigned OffsetValue1 = OffsetValue0 +
Size;
1479 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1489 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1493 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1494 if (Subtarget->hasAddNoCarry()) {
1495 SubOp = AMDGPU::V_SUB_U32_e64;
1497 CurDAG->getTargetConstant(0, {}, MVT::i1));
1500 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1505 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1507 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1513 unsigned OffsetValue0 = CAddr->getZExtValue();
1514 unsigned OffsetValue1 = OffsetValue0 +
Size;
1516 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1518 MachineSDNode *MovZero =
1519 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1521 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1522 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1530 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1531 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1541 if (Subtarget->useFlatForGlobal())
1546 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1547 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1548 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1549 SOffset = Subtarget->hasRestrictedSOffset()
1550 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1551 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1553 ConstantSDNode *C1 =
nullptr;
1555 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1568 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1574 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1590 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1592 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1596 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1606 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1617 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1623bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1626 SDValue Ptr, Offen, Idxen, Addr64;
1630 if (!Subtarget->hasAddr64())
1633 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1637 if (
C->getSExtValue()) {
1650std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1655 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1661 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1664bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1670 MachineFunction &
MF =
CurDAG->getMachineFunction();
1671 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1673 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1676 int64_t
Imm = CAddr->getSExtValue();
1677 const int64_t NullPtr =
1680 if (Imm != NullPtr) {
1683 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1684 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1685 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1686 VAddr =
SDValue(MovHighBits, 0);
1688 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1689 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1694 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1715 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1716 if (
TII->isLegalMUBUFImmOffset(C1) &&
1717 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1718 CurDAG->SignBitIsZero(N0))) {
1719 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1720 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1726 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1727 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1735 if (!
Reg.isPhysical())
1737 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1738 return RC &&
TRI.isSGPRClass(RC);
1741bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1746 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1747 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1748 MachineFunction &
MF =
CurDAG->getMachineFunction();
1749 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1754 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1760 ConstantSDNode *CAddr;
1773 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1778 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1784bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1787 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1788 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1790 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1796 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1809bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1811 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1812 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1816 SOffset = ByteOffsetNode;
1834bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1836 uint64_t FlatVariant)
const {
1837 int64_t OffsetVal = 0;
1841 bool CanHaveFlatSegmentOffsetBug =
1842 Subtarget->hasFlatSegmentOffsetBug() &&
1846 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1848 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1850 isFlatScratchBaseLegal(Addr))) {
1859 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1860 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1862 OffsetVal = COffsetVal;
1875 uint64_t RemainderOffset;
1877 std::tie(OffsetVal, RemainderOffset) =
1878 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1881 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1888 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1889 if (Subtarget->hasAddNoCarry()) {
1890 AddOp = AMDGPU::V_ADD_U32_e64;
1899 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1901 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1903 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1904 DL, MVT::i32, N0, Sub0);
1905 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1906 DL, MVT::i32, N0, Sub1);
1909 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1911 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1914 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1915 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1917 SDNode *Addc =
CurDAG->getMachineNode(
1918 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1922 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1927 MVT::i64, RegSequenceArgs),
1936 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1940bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1946bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1952bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1955 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1963 if (
Op.getValueType() == MVT::i32)
1978bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
1981 bool NeedIOffset)
const {
1982 int64_t ImmOffset = 0;
1983 ScaleOffset =
false;
1989 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
1991 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1997 ImmOffset = COffsetVal;
1998 }
else if (!
LHS->isDivergent()) {
1999 if (COffsetVal > 0) {
2004 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2006 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2010 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2012 SDNode *VMov =
CurDAG->getMachineNode(
2013 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2014 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2017 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2027 unsigned NumLiterals =
2028 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2029 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2030 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2039 if (!
LHS->isDivergent()) {
2042 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2044 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2051 if (!SAddr && !
RHS->isDivergent()) {
2053 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2055 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2062 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2067 if (Subtarget->hasScaleOffset() &&
2068 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2083 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2096 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2097 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2099 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2103bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2108 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2116bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2121 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2126 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2132bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2138 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2143 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2149bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2154 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2158 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2162bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2168 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2174 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2180bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2186 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2207 FI->getValueType(0));
2217bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2225 int64_t COffsetVal = 0;
2227 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2236 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2240 int64_t SplitImmOffset, RemainderOffset;
2241 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2244 COffsetVal = SplitImmOffset;
2248 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2249 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2250 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2255 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2261bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2263 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2269 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2276 return (VMax & 3) + (
SMax & 3) >= 4;
2279bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2283 int64_t ImmOffset = 0;
2287 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2289 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2294 ImmOffset = COffsetVal;
2295 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2299 int64_t SplitImmOffset, RemainderOffset;
2300 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2304 SDNode *VMov =
CurDAG->getMachineNode(
2305 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2306 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2309 if (!isFlatScratchBaseLegal(Addr))
2311 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2313 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2314 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2326 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2329 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2336 if (OrigAddr != Addr) {
2337 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2340 if (!isFlatScratchBaseLegalSV(OrigAddr))
2344 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2347 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2349 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2358bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2361 int64_t ImmOffset)
const {
2362 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2364 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2376 bool IsSigned)
const {
2377 bool ScaleOffset =
false;
2378 if (!Subtarget->hasScaleOffset() || !
Offset)
2392 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2393 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2394 (
Offset.isMachineOpcode() &&
2395 Offset.getMachineOpcode() ==
2396 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2397 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2399 ScaleOffset =
C->getZExtValue() ==
Size;
2411bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2413 bool Imm32Only,
bool IsBuffer,
2414 bool HasSOffset, int64_t ImmOffset,
2415 bool *ScaleOffset)
const {
2417 "Cannot match both soffset and offset at the same time!");
2422 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2432 *SOffset = ByteOffsetNode;
2433 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2439 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2446 SDLoc SL(ByteOffsetNode);
2450 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2452 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2453 if (EncodedOffset &&
Offset && !Imm32Only) {
2454 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2463 if (EncodedOffset &&
Offset && Imm32Only) {
2464 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2472 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2474 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2481SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2488 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2489 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2490 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2491 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2494 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2496 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2497 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2499 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2502 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2509bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2512 bool IsBuffer,
bool HasSOffset,
2514 bool *ScaleOffset)
const {
2516 assert(!Imm32Only && !IsBuffer);
2519 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2524 ImmOff =
C->getSExtValue();
2526 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2527 true, ImmOff, ScaleOffset);
2547 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2548 ImmOffset, ScaleOffset)) {
2552 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2553 ImmOffset, ScaleOffset)) {
2562 bool Imm32Only,
bool *ScaleOffset)
const {
2563 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2566 SBase = Expand32BitAddress(SBase);
2571 SBase = Expand32BitAddress(Addr);
2572 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2579bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2581 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2585bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2588 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2595 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2596 false, &ScaleOffset))
2600 SDLoc(
N), MVT::i32);
2604bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2609 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2613 SDLoc(
N), MVT::i32);
2618 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2622bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2625 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2629bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2633 return N.getValueType() == MVT::i32 &&
2634 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2639bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2644 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2669SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2673 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2677 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2679 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2683 uint32_t PackedVal =
Offset | (Width << 16);
2684 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2686 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2689void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2694 const SDValue &Shl =
N->getOperand(0);
2699 uint32_t BVal =
B->getZExtValue();
2700 uint32_t CVal =
C->getZExtValue();
2702 if (0 < BVal && BVal <= CVal && CVal < 32) {
2712void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2713 switch (
N->getOpcode()) {
2715 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2718 const SDValue &Srl =
N->getOperand(0);
2722 if (Shift && Mask) {
2724 uint32_t MaskVal =
Mask->getZExtValue();
2736 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2743 if (Shift && Mask) {
2745 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2754 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2755 SelectS_BFEFromShifts(
N);
2760 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2761 SelectS_BFEFromShifts(
N);
2776 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2786bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2787 assert(
N->getOpcode() == ISD::BRCOND);
2788 if (!
N->hasOneUse())
2798 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2802 if (VT == MVT::i64) {
2805 Subtarget->hasScalarCompareEq64();
2808 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2841void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2844 if (
Cond.isUndef()) {
2845 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2846 N->getOperand(2),
N->getOperand(0));
2850 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2852 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2853 bool AndExec = !UseSCCBr;
2854 bool Negate =
false;
2857 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2872 bool NegatedBallot =
false;
2875 UseSCCBr = !BallotCond->isDivergent();
2876 Negate = Negate ^ NegatedBallot;
2891 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2892 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2893 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2912 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2914 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2922 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2927void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2928 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2929 !
N->isDivergent()) {
2931 if (Src.getValueType() == MVT::f16) {
2933 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2943void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2946 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2947 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2952 MachineMemOperand *MMO =
M->getMemOperand();
2956 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
2961 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2962 N = glueCopyToM0(
N, PtrBase);
2963 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2968 N = glueCopyToM0(
N, Ptr);
2969 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2974 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2979 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
2985void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
2988 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2989 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2990 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2992 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2993 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2995 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2996 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2999 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3000 N->getOperand(5),
N->getOperand(0)};
3003 MachineMemOperand *MMO =
M->getMemOperand();
3004 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3010 case Intrinsic::amdgcn_ds_gws_init:
3011 return AMDGPU::DS_GWS_INIT;
3012 case Intrinsic::amdgcn_ds_gws_barrier:
3013 return AMDGPU::DS_GWS_BARRIER;
3014 case Intrinsic::amdgcn_ds_gws_sema_v:
3015 return AMDGPU::DS_GWS_SEMA_V;
3016 case Intrinsic::amdgcn_ds_gws_sema_br:
3017 return AMDGPU::DS_GWS_SEMA_BR;
3018 case Intrinsic::amdgcn_ds_gws_sema_p:
3019 return AMDGPU::DS_GWS_SEMA_P;
3020 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3021 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3027void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3028 if (!Subtarget->hasGWS() ||
3029 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3030 !Subtarget->hasGWSSemaReleaseAll())) {
3037 const bool HasVSrc =
N->getNumOperands() == 4;
3038 assert(HasVSrc ||
N->getNumOperands() == 3);
3041 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3044 MachineMemOperand *MMO =
M->getMemOperand();
3057 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3058 ImmOffset = ConstOffset->getZExtValue();
3060 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3069 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3073 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3075 CurDAG->getTargetConstant(16, SL, MVT::i32));
3076 glueCopyToM0(
N,
SDValue(M0Base, 0));
3080 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3084 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3085 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3087 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3091 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3094 MVT DataVT =
Data.getValueType().getSimpleVT();
3095 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3097 Ops.push_back(
N->getOperand(2));
3103 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3105 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3107 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3110 SL, MVT::v2i32, RegSeqOps),
3115 Ops.push_back(OffsetField);
3116 Ops.push_back(Chain);
3118 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3122void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3123 if (Subtarget->getLDSBankCount() != 16) {
3153 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3156 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3157 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3163 SDNode *InterpP1LV =
3164 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3165 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3169 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3172 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3173 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3180void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3181 unsigned IntrID =
N->getConstantOperandVal(1);
3183 case Intrinsic::amdgcn_ds_append:
3184 case Intrinsic::amdgcn_ds_consume: {
3185 if (
N->getValueType(0) != MVT::i32)
3187 SelectDSAppendConsume(
N, IntrID);
3190 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3191 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3192 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3193 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3194 SelectDSBvhStackIntrinsic(
N, IntrID);
3196 case Intrinsic::amdgcn_init_whole_wave:
3197 CurDAG->getMachineFunction()
3198 .getInfo<SIMachineFunctionInfo>()
3199 ->setInitWholeWave();
3206void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3207 unsigned IntrID =
N->getConstantOperandVal(0);
3208 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3209 SDNode *ConvGlueNode =
N->getGluedNode();
3215 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3216 MVT::Glue,
SDValue(ConvGlueNode, 0));
3218 ConvGlueNode =
nullptr;
3221 case Intrinsic::amdgcn_wqm:
3222 Opcode = AMDGPU::WQM;
3224 case Intrinsic::amdgcn_softwqm:
3225 Opcode = AMDGPU::SOFT_WQM;
3227 case Intrinsic::amdgcn_wwm:
3228 case Intrinsic::amdgcn_strict_wwm:
3229 Opcode = AMDGPU::STRICT_WWM;
3231 case Intrinsic::amdgcn_strict_wqm:
3232 Opcode = AMDGPU::STRICT_WQM;
3234 case Intrinsic::amdgcn_interp_p1_f16:
3235 SelectInterpP1F16(
N);
3237 case Intrinsic::amdgcn_permlane16_swap:
3238 case Intrinsic::amdgcn_permlane32_swap: {
3239 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3240 !Subtarget->hasPermlane16Swap()) ||
3241 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3242 !Subtarget->hasPermlane32Swap())) {
3247 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3248 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3249 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3253 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3255 bool FI =
N->getConstantOperandVal(3);
3256 NewOps[2] =
CurDAG->getTargetConstant(
3259 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3267 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3269 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3274 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3275 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3279void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3280 unsigned IntrID =
N->getConstantOperandVal(1);
3282 case Intrinsic::amdgcn_ds_gws_init:
3283 case Intrinsic::amdgcn_ds_gws_barrier:
3284 case Intrinsic::amdgcn_ds_gws_sema_v:
3285 case Intrinsic::amdgcn_ds_gws_sema_br:
3286 case Intrinsic::amdgcn_ds_gws_sema_p:
3287 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3288 SelectDS_GWS(
N, IntrID);
3297void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3299 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3300 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3301 {N->getOperand(0), Log2WaveSize});
3304void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3319 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3321 if (
N->isDivergent()) {
3322 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3327 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3328 {SrcVal, Log2WaveSize}),
3332 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3336bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3338 bool IsCanonicalizing,
3339 bool AllowAbs)
const {
3343 if (Src.getOpcode() == ISD::FNEG) {
3345 Src = Src.getOperand(0);
3346 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3350 if (
LHS &&
LHS->isZero()) {
3352 Src = Src.getOperand(1);
3356 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
3358 Src = Src.getOperand(0);
3371 if (IsCanonicalizing)
3386 EVT VT = Src.getValueType();
3388 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3395 auto ReplaceSrc = [&]() ->
SDValue {
3397 return Src.getOperand(0);
3402 Src.getValueType(),
LHS, Index);
3428 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3430 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3437bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3440 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3442 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3449bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3452 if (SelectVOP3ModsImpl(In, Src, Mods,
3455 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3462bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3463 if (
In.getOpcode() == ISD::FABS ||
In.getOpcode() == ISD::FNEG)
3470bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3474 if (SelectVOP3ModsImpl(In, Src, Mods,
3479 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3486bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3488 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3491bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3493 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3496bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3500 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3501 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3503 return SelectVOP3Mods(In, Src, SrcMods);
3506bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3510 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3511 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3513 return SelectVOP3BMods(In, Src, SrcMods);
3516bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3521 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3522 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3527bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3528 SDValue &SrcMods,
bool IsDOT)
const {
3533 if (Src.getOpcode() == ISD::FNEG) {
3535 Src = Src.getOperand(0);
3539 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3540 unsigned VecMods = Mods;
3542 SDValue Lo = stripBitcast(Src.getOperand(0));
3543 SDValue Hi = stripBitcast(Src.getOperand(1));
3545 if (
Lo.getOpcode() == ISD::FNEG) {
3546 Lo = stripBitcast(
Lo.getOperand(0));
3550 if (
Hi.getOpcode() == ISD::FNEG) {
3551 Hi = stripBitcast(
Hi.getOperand(0));
3561 unsigned VecSize = Src.getValueSizeInBits();
3562 Lo = stripExtractLoElt(
Lo);
3563 Hi = stripExtractLoElt(
Hi);
3565 if (
Lo.getValueSizeInBits() > VecSize) {
3566 Lo =
CurDAG->getTargetExtractSubreg(
3567 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3571 if (
Hi.getValueSizeInBits() > VecSize) {
3572 Hi =
CurDAG->getTargetExtractSubreg(
3573 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3577 assert(
Lo.getValueSizeInBits() <= VecSize &&
3578 Hi.getValueSizeInBits() <= VecSize);
3580 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3584 if (VecSize ==
Lo.getValueSizeInBits()) {
3586 }
else if (VecSize == 32) {
3587 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3589 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3593 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3594 Lo.getValueType()), 0);
3595 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3596 : AMDGPU::SReg_64RegClassID;
3598 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3599 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3600 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3602 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3603 Src.getValueType(),
Ops), 0);
3605 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3611 .bitcastToAPInt().getZExtValue();
3613 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3614 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3621 Src.getNumOperands() == 2) {
3627 ArrayRef<int>
Mask = SVN->getMask();
3629 if (Mask[0] < 2 && Mask[1] < 2) {
3631 SDValue ShuffleSrc = SVN->getOperand(0);
3633 if (ShuffleSrc.
getOpcode() == ISD::FNEG) {
3644 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3652 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3656bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3658 return SelectVOP3PMods(In, Src, SrcMods,
true);
3661bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3664 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3667 unsigned SrcVal =
C->getZExtValue();
3671 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3678 unsigned DstRegClass;
3680 switch (Elts.
size()) {
3682 DstRegClass = AMDGPU::VReg_256RegClassID;
3686 DstRegClass = AMDGPU::VReg_128RegClassID;
3690 DstRegClass = AMDGPU::VReg_64RegClassID;
3699 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3700 Ops.push_back(Elts[i]);
3711 assert(
"unhandled Reg sequence size" &&
3712 (Elts.
size() == 8 || Elts.
size() == 16));
3716 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3717 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3725 {Elts[i + 1], Elts[i], PackLoLo});
3735 const SDLoc &
DL,
unsigned ElementSize) {
3736 if (ElementSize == 16)
3738 if (ElementSize == 32)
3746 unsigned ElementSize) {
3747 if (ModOpcode == ISD::FNEG) {
3751 for (
auto El : Elts) {
3752 if (El.getOpcode() != ISD::FABS)
3754 NegAbsElts.
push_back(El->getOperand(0));
3756 if (Elts.size() != NegAbsElts.
size()) {
3765 assert(ModOpcode == ISD::FABS);
3776 std::function<
bool(
SDValue)> ModifierCheck) {
3780 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3781 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3782 if (!ModifierCheck(ElF16))
3789bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3832 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3836bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3847 if (EltsF16.
empty())
3848 ModOpcode = (ElF16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3868 if (EltsV2F16.
empty())
3869 ModOpcode = (ElV2f16.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3881 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3885bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
3895 unsigned ModOpcode =
3896 (ElF32.
getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3910 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3914bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
3916 BitVector UndefElements;
3918 if (isInlineImmediate(
Splat.getNode())) {
3920 unsigned Imm =
C->getAPIntValue().getSExtValue();
3921 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3925 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
3926 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3934 SDValue SplatSrc32 = stripBitcast(In);
3936 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3937 SDValue SplatSrc16 = stripBitcast(Splat32);
3940 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
3941 std::optional<APInt> RawValue;
3943 RawValue =
C->getValueAPF().bitcastToAPInt();
3945 RawValue =
C->getAPIntValue();
3947 if (RawValue.has_value()) {
3948 EVT VT =
In.getValueType().getScalarType();
3954 if (
TII->isInlineConstant(FloatVal)) {
3955 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3960 if (
TII->isInlineConstant(RawValue.value())) {
3961 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3974bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
3980 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
3989 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
3993bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
3999 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4008 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4012bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4020 const SDValue &ExtendSrc =
In.getOperand(0);
4023 }
else if (
In->getOpcode() == ISD::BITCAST) {
4024 const SDValue &CastSrc =
In.getOperand(0);
4028 if (Zero &&
Zero->getZExtValue() == 0)
4039 Src = ExtractVecEltSrc;
4043 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4047bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4051 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4055bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4058 return SelectVOP3Mods(In, Src, SrcMods);
4068 if (
Op.getValueType() != MVT::f32 ||
Op.getOpcode() != ISD::BITCAST)
4070 Op =
Op.getOperand(0);
4072 IsExtractHigh =
false;
4075 if (!Low16 || !Low16->isZero())
4077 Op = stripBitcast(
Op.getOperand(1));
4078 if (
Op.getValueType() != MVT::bf16)
4083 if (
Op.getValueType() != MVT::i32)
4088 if (Mask->getZExtValue() == 0xffff0000) {
4089 IsExtractHigh =
true;
4090 return Op.getOperand(0);
4099 return Op.getOperand(0);
4108bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4112 SelectVOP3ModsImpl(In, Src, Mods);
4114 bool IsExtractHigh =
false;
4115 if (Src.getOpcode() == ISD::FP_EXTEND) {
4116 Src = Src.getOperand(0);
4117 }
else if (VT == MVT::bf16) {
4125 if (Src.getValueType() != VT &&
4126 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4129 Src = stripBitcast(Src);
4135 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4150 if (Src.getValueSizeInBits() == 16) {
4159 Src.getOperand(0).getValueType() == MVT::i32) {
4160 Src = Src.getOperand(0);
4164 if (Subtarget->useRealTrue16Insts())
4166 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4167 }
else if (IsExtractHigh)
4173bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4176 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4178 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4182bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4185 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4186 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4190bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4193 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4195 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4199bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4202 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4203 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4211 unsigned NumOpcodes = 0;
4224 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4227 if (
C->isAllOnes()) {
4237 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4251 if (Src.size() == 3) {
4257 if (
C->isAllOnes()) {
4259 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4260 if (Src[
I] ==
LHS) {
4272 Bits = SrcBits[Src.size()];
4277 switch (In.getOpcode()) {
4285 if (!getOperandBits(
LHS, LHSBits) ||
4286 !getOperandBits(
RHS, RHSBits)) {
4288 return std::make_pair(0, 0);
4294 NumOpcodes +=
Op.first;
4295 LHSBits =
Op.second;
4300 NumOpcodes +=
Op.first;
4301 RHSBits =
Op.second;
4306 return std::make_pair(0, 0);
4310 switch (In.getOpcode()) {
4312 TTbl = LHSBits & RHSBits;
4315 TTbl = LHSBits | RHSBits;
4318 TTbl = LHSBits ^ RHSBits;
4324 return std::make_pair(NumOpcodes + 1, TTbl);
4331 unsigned NumOpcodes;
4333 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4337 if (NumOpcodes < 2 || Src.empty())
4343 if (NumOpcodes < 4 && !In->isDivergent())
4346 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4351 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4352 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4366 while (Src.size() < 3)
4367 Src.push_back(Src[0]);
4373 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4379 return CurDAG->getUNDEF(MVT::i32);
4383 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4388 return CurDAG->getConstant(
4389 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4399bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4400 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4402 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4403 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4406 bool AllUsesAcceptSReg =
true;
4408 Limit < 10 && U !=
E; ++U, ++Limit) {
4409 const TargetRegisterClass *RC =
4410 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4418 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4419 RC != &AMDGPU::VS_64_Align2RegClass) {
4420 AllUsesAcceptSReg =
false;
4421 SDNode *
User =
U->getUser();
4422 if (
User->isMachineOpcode()) {
4423 unsigned Opc =
User->getMachineOpcode();
4424 const MCInstrDesc &
Desc = SII->get(
Opc);
4425 if (
Desc.isCommutable()) {
4426 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4429 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4430 const TargetRegisterClass *CommutedRC =
4431 getOperandRegClass(
U->getUser(), CommutedOpNo);
4432 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4433 CommutedRC == &AMDGPU::VS_64RegClass ||
4434 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4435 AllUsesAcceptSReg =
true;
4443 if (!AllUsesAcceptSReg)
4447 return !AllUsesAcceptSReg && (Limit < 10);
4450bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4452 const MachineMemOperand *MMO = Ld->getMemOperand();
4454 if (Ld->isDivergent()) {
4472 (Subtarget->getScalarizeGlobalBehavior() &&
4476 ->isMemOpHasNoClobberedMemOperand(
N)));
4482 bool IsModified =
false;
4488 while (Position !=
CurDAG->allnodes_end()) {
4495 if (ResNode !=
Node) {
4501 CurDAG->RemoveDeadNodes();
4502 }
while (IsModified);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
const Value * getValue() const
Return the base address of the memory access.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.