29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
557 for (
unsigned i = 0; i < NOps; i++) {
565 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
566 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
568 if (NOps != NumVectorElts) {
573 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
576 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
577 RegSeqArgs[1 + (2 * i) + 1] =
584 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
588 EVT VT =
N->getValueType(0);
592 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
606 Mask[0] < 4 && Mask[1] < 4);
608 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
609 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
610 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
611 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
614 Src0SubReg = Src1SubReg;
616 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
621 Src1SubReg = Src0SubReg;
623 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
633 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
634 Src1SubReg == AMDGPU::sub0) {
650 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
651 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
654 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
655 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
665 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
667 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
670 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
671 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
672 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
673 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
677 unsigned int Opc =
N->getOpcode();
678 if (
N->isMachineOpcode()) {
686 N = glueCopyToM0LDSInit(
N);
696 if (
N->getValueType(0) == MVT::i64) {
697 SelectAddcSubbI64(
N);
701 if (
N->getValueType(0) != MVT::i32)
708 if (
N->getValueType(0) == MVT::i64) {
709 SelectAddcSubbI64(
N);
713 SelectUADDO_USUBO(
N);
716 case AMDGPUISD::FMUL_W_CHAIN: {
717 SelectFMUL_W_CHAIN(
N);
720 case AMDGPUISD::FMA_W_CHAIN: {
721 SelectFMA_W_CHAIN(
N);
727 EVT VT =
N->getValueType(0);
744 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
756 if (
N->getValueType(0) == MVT::i128) {
757 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
758 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
759 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
760 }
else if (
N->getValueType(0) == MVT::i64) {
761 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
762 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
763 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
767 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
768 N->getOperand(1), SubReg1 };
770 N->getValueType(0),
Ops));
776 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
777 Subtarget->has64BitLiterals())
782 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
787 Imm =
C->getZExtValue();
796 case AMDGPUISD::BFE_I32:
797 case AMDGPUISD::BFE_U32: {
823 case AMDGPUISD::DIV_SCALE: {
834 return SelectMUL_LOHI(
N);
845 if (
N->getValueType(0) != MVT::i32)
856 case AMDGPUISD::CVT_PKRTZ_F16_F32:
857 case AMDGPUISD::CVT_PKNORM_I16_F32:
858 case AMDGPUISD::CVT_PKNORM_U16_F32:
859 case AMDGPUISD::CVT_PK_U16_U32:
860 case AMDGPUISD::CVT_PK_I16_I32: {
862 if (
N->getValueType(0) == MVT::i32) {
863 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
865 { N->getOperand(0), N->getOperand(1) });
873 SelectINTRINSIC_W_CHAIN(
N);
877 SelectINTRINSIC_WO_CHAIN(
N);
881 SelectINTRINSIC_VOID(
N);
885 SelectWAVE_ADDRESS(
N);
889 SelectSTACKRESTORE(
N);
898 if (!Subtarget->hasSDWA())
908 return RHS->getZExtValue() == 0xFF || RHS->getZExtValue() == 0xFFFF;
912 return (RHS->getZExtValue() % 8) == 0;
917bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
920 return Term->getMetadata(
"amdgpu.uniform") ||
921 Term->getMetadata(
"structurizecfg.uniform");
924bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
925 unsigned ShAmtBits)
const {
928 const APInt &
RHS =
N->getConstantOperandAPInt(1);
929 if (
RHS.countr_one() >= ShAmtBits)
959 N1 =
Lo.getOperand(1);
969 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
984 return "AMDGPU DAG->DAG Pattern Instruction Selection";
994#ifdef EXPENSIVE_CHECKS
1000 for (
auto &L : LI.getLoopsInPreorder())
1001 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1023 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1025 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1039SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1041 SDNode *Mov =
CurDAG->getMachineNode(
1042 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1043 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1047void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1052 if (
N->isDivergent()) {
1054 : AMDGPU::V_SUBB_U32_e64;
1056 N,
Opc,
N->getVTList(),
1058 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1061 : AMDGPU::S_SUB_CO_PSEUDO;
1062 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1066void AMDGPUDAGToDAGISel::SelectAddcSubbI64(
SDNode *
N) {
1071 unsigned Opcode =
N->getOpcode();
1078 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1079 MVT::i32,
LHS, Sub0);
1080 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1081 MVT::i32,
LHS, Sub1);
1083 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1084 MVT::i32,
RHS, Sub0);
1085 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1086 MVT::i32,
RHS, Sub1);
1088 SDVTList VTList =
CurDAG->getVTList(MVT::i32,
N->getValueType(1));
1090 static const unsigned NoCarryOpcMap[2][2] = {
1091 {AMDGPU::S_USUBO_PSEUDO, AMDGPU::S_UADDO_PSEUDO},
1092 {AMDGPU::V_SUB_CO_U32_e64, AMDGPU::V_ADD_CO_U32_e64}};
1093 static const unsigned CarryOpcMap[2][2] = {
1094 {AMDGPU::S_SUB_CO_PSEUDO, AMDGPU::S_ADD_CO_PSEUDO},
1095 {AMDGPU::V_SUBB_U32_e64, AMDGPU::V_ADDC_U32_e64}};
1097 bool IsVALU =
N->isDivergent();
1099 unsigned NoCarryOpc = NoCarryOpcMap[IsVALU][IsAdd];
1100 unsigned CarryOpc = CarryOpcMap[IsVALU][IsAdd];
1104 if (!ConsumeCarry) {
1107 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1110 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1116 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1119 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1127 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1130 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1133 unsigned RC = IsVALU ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID;
1134 SDValue RegSequenceArgs[] = {
CurDAG->getTargetConstant(RC,
DL, MVT::i32),
1138 MVT::i64, RegSequenceArgs);
1144void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1149 bool IsVALU =
N->isDivergent();
1151 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1153 if (UI.getUse().getResNo() == 1) {
1154 if (UI->isMachineOpcode()) {
1155 if (UI->getMachineOpcode() !=
1156 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1169 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1172 N,
Opc,
N->getVTList(),
1173 {N->getOperand(0), N->getOperand(1),
1174 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1176 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1178 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1179 {N->getOperand(0), N->getOperand(1)});
1183void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1187 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1188 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1189 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1190 Ops[8] =
N->getOperand(0);
1191 Ops[9] =
N->getOperand(4);
1195 bool UseFMAC = Subtarget->hasDLInsts() &&
1199 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1200 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1203void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1207 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1208 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1209 Ops[6] =
N->getOperand(0);
1210 Ops[7] =
N->getOperand(3);
1212 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1217void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1218 EVT VT =
N->getValueType(0);
1220 assert(VT == MVT::f32 || VT == MVT::f64);
1223 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1228 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1229 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1230 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1236void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1240 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() && !
N->hasAnyUseOfValue(1);
1241 if (Subtarget->hasMADIntraFwdBug())
1242 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1243 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1244 else if (UseNoCarry)
1245 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1247 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1250 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1254 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1265void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1270 if (Subtarget->hasMadNC64_32Insts()) {
1271 VTList =
CurDAG->getVTList(MVT::i64);
1272 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1274 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1275 if (Subtarget->hasMADIntraFwdBug()) {
1276 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1277 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1279 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1286 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1288 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1289 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1290 MVT::i32,
SDValue(Mad, 0), Sub0);
1294 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1295 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1296 MVT::i32,
SDValue(Mad, 0), Sub1);
1306 if (!
Base || Subtarget->hasUsableDSOffset() ||
1307 Subtarget->unsafeDSOffsetFoldingEnabled())
1318 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1331 int64_t ByteOffset =
C->getSExtValue();
1332 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1341 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1347 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1348 if (Subtarget->hasAddNoCarryInsts()) {
1349 SubOp = AMDGPU::V_SUB_U32_e64;
1351 CurDAG->getTargetConstant(0, {}, MVT::i1));
1354 MachineSDNode *MachineSub =
1355 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1371 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1373 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1374 DL, MVT::i32, Zero);
1376 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1383 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1387bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1389 unsigned Size)
const {
1390 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1395 if (!
Base || Subtarget->hasUsableDSOffset() ||
1396 Subtarget->unsafeDSOffsetFoldingEnabled())
1414bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1420 if (Subtarget->hasSignedScratchOffsets())
1430 ConstantSDNode *ImmOp =
nullptr;
1441bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1447 if (Subtarget->hasSignedScratchOffsets())
1457bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1471 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1474 auto LHS =
Base.getOperand(0);
1475 auto RHS =
Base.getOperand(1);
1483 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1489 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1494 unsigned Size)
const {
1497 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1502 unsigned OffsetValue1 = OffsetValue0 +
Size;
1505 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1507 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1508 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1513 if (
const ConstantSDNode *
C =
1515 unsigned OffsetValue0 =
C->getZExtValue();
1516 unsigned OffsetValue1 = OffsetValue0 +
Size;
1518 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1528 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1532 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1533 if (Subtarget->hasAddNoCarryInsts()) {
1534 SubOp = AMDGPU::V_SUB_U32_e64;
1536 CurDAG->getTargetConstant(0, {}, MVT::i1));
1539 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1544 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1546 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1552 unsigned OffsetValue0 = CAddr->getZExtValue();
1553 unsigned OffsetValue1 = OffsetValue0 +
Size;
1555 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1557 MachineSDNode *MovZero =
1558 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1560 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1561 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1569 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1570 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1580 if (Subtarget->useFlatForGlobal())
1585 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1586 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1587 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1588 SOffset = Subtarget->hasRestrictedSOffset()
1589 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1590 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1592 ConstantSDNode *C1 =
nullptr;
1594 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1607 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1613 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1629 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1631 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1635 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1645 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1656 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1662bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1665 SDValue Ptr, Offen, Idxen, Addr64;
1669 if (!Subtarget->hasAddr64())
1672 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1676 if (
C->getSExtValue()) {
1689std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1694 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1700 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1703bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1709 MachineFunction &
MF =
CurDAG->getMachineFunction();
1710 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1712 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1715 int64_t
Imm = CAddr->getSExtValue();
1716 const int64_t NullPtr =
1719 if (Imm != NullPtr) {
1722 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1723 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1724 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1725 VAddr =
SDValue(MovHighBits, 0);
1727 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1728 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1733 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1754 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1755 if (
TII->isLegalMUBUFImmOffset(C1) &&
1756 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1757 CurDAG->SignBitIsZero(N0))) {
1758 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1759 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1765 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1766 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1774 if (!
Reg.isPhysical())
1776 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1777 return RC &&
TRI.isSGPRClass(RC);
1780bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1785 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1786 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1787 MachineFunction &
MF =
CurDAG->getMachineFunction();
1788 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1793 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1799 ConstantSDNode *CAddr;
1812 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1817 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1823bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1826 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1827 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1829 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1835 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1848bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1850 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1851 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1855 SOffset = ByteOffsetNode;
1873bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
1877 int64_t OffsetVal = 0;
1881 bool CanHaveFlatSegmentOffsetBug =
1882 Subtarget->hasFlatSegmentOffsetBug() &&
1883 FlatVariant == FlatAddrSpace::FLAT &&
1886 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1888 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1889 (FlatVariant != FlatAddrSpace::FlatScratch ||
1890 isFlatScratchBaseLegal(Addr))) {
1898 if (COffsetVal == 0 || FlatVariant != FlatAddrSpace::FLAT || IsInBounds) {
1899 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1900 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1902 OffsetVal = COffsetVal;
1915 uint64_t RemainderOffset;
1917 std::tie(OffsetVal, RemainderOffset) =
1918 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1921 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1928 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1929 if (Subtarget->hasAddNoCarryInsts()) {
1930 AddOp = AMDGPU::V_ADD_U32_e64;
1939 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1941 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1943 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1944 DL, MVT::i32, N0, Sub0);
1945 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1946 DL, MVT::i32, N0, Sub1);
1949 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1951 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1954 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1955 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1957 SDNode *Addc =
CurDAG->getMachineNode(
1958 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1962 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1967 MVT::i64, RegSequenceArgs),
1976 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1980bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1983 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1987bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1990 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1994bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1997 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2005 if (
Op.getValueType() == MVT::i32)
2020bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2023 bool NeedIOffset)
const {
2025 int64_t ImmOffset = 0;
2026 ScaleOffset =
false;
2032 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2034 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2038 FlatAddrSpace::FlatGlobal)) {
2040 ImmOffset = COffsetVal;
2041 }
else if (!
LHS->isDivergent()) {
2042 if (COffsetVal > 0) {
2047 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2049 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2053 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2055 SDNode *VMov =
CurDAG->getMachineNode(
2056 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2057 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2060 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2070 unsigned NumLiterals =
2071 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2072 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2073 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2082 if (!
LHS->isDivergent()) {
2085 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2087 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2094 if (!SAddr && !
RHS->isDivergent()) {
2096 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2098 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2105 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2110 if (Subtarget->hasScaleOffset() &&
2111 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2126 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2139 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2140 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2142 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2146bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2151 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2159bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2164 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2169 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2175bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2181 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2186 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2192bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2197 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2201 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2205bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2211 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2217 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2223bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2229 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2250 FI->getValueType(0));
2260bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2269 int64_t COffsetVal = 0;
2271 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2280 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2283 FlatAddrSpace::FlatScratch)) {
2284 int64_t SplitImmOffset, RemainderOffset;
2285 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2288 COffsetVal = SplitImmOffset;
2292 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2293 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2294 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2299 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2305bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2307 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2313 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2320 return (VMax & 3) + (
SMax & 3) >= 4;
2323bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2327 int64_t ImmOffset = 0;
2331 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2333 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2338 ImmOffset = COffsetVal;
2339 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2343 int64_t SplitImmOffset, RemainderOffset;
2344 std::tie(SplitImmOffset, RemainderOffset) =
2349 SDNode *VMov =
CurDAG->getMachineNode(
2350 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2351 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2354 if (!isFlatScratchBaseLegal(Addr))
2356 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2358 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2359 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2371 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2374 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2381 if (OrigAddr != Addr) {
2382 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2385 if (!isFlatScratchBaseLegalSV(OrigAddr))
2389 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2392 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2394 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2403bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2406 int64_t ImmOffset)
const {
2407 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2409 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2421 bool IsSigned)
const {
2422 bool ScaleOffset =
false;
2423 if (!Subtarget->hasScaleOffset() || !
Offset)
2437 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2438 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2439 (
Offset.isMachineOpcode() &&
2440 Offset.getMachineOpcode() ==
2441 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2442 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2444 ScaleOffset =
C->getZExtValue() ==
Size;
2456bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2458 bool Imm32Only,
bool IsBuffer,
2459 bool HasSOffset, int64_t ImmOffset,
2460 bool *ScaleOffset)
const {
2462 "Cannot match both soffset and offset at the same time!");
2467 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2477 *SOffset = ByteOffsetNode;
2478 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2484 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2491 SDLoc SL(ByteOffsetNode);
2495 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2497 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2498 if (EncodedOffset &&
Offset && !Imm32Only) {
2499 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2508 if (EncodedOffset &&
Offset && Imm32Only) {
2509 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2517 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2519 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2526SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2533 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2534 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2535 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2536 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2539 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2541 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2542 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2544 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2547 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2554bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2557 bool IsBuffer,
bool HasSOffset,
2559 bool *ScaleOffset)
const {
2561 assert(!Imm32Only && !IsBuffer);
2564 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2569 ImmOff =
C->getSExtValue();
2571 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2572 true, ImmOff, ScaleOffset);
2592 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2593 ImmOffset, ScaleOffset)) {
2597 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2598 ImmOffset, ScaleOffset)) {
2607 bool Imm32Only,
bool *ScaleOffset)
const {
2608 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2611 SBase = Expand32BitAddress(SBase);
2616 SBase = Expand32BitAddress(Addr);
2617 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2624bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2626 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2630bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2633 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2640 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2641 false, &ScaleOffset))
2645 SDLoc(
N), MVT::i32);
2649bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2654 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2658 SDLoc(
N), MVT::i32);
2663 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2667bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2670 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2674bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2678 return N.getValueType() == MVT::i32 &&
2679 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2684bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2689 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2714SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2718 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2722 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2724 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2728 uint32_t PackedVal =
Offset | (Width << 16);
2729 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2731 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2734void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2739 const SDValue &Shl =
N->getOperand(0);
2744 uint32_t BVal =
B->getZExtValue();
2745 uint32_t CVal =
C->getZExtValue();
2747 if (0 < BVal && BVal <= CVal && CVal < 32) {
2757void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2758 switch (
N->getOpcode()) {
2760 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2763 const SDValue &Srl =
N->getOperand(0);
2767 if (Shift && Mask) {
2769 uint32_t MaskVal =
Mask->getZExtValue();
2781 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2788 if (Shift && Mask) {
2790 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2799 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2800 SelectS_BFEFromShifts(
N);
2805 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2806 SelectS_BFEFromShifts(
N);
2821 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2831bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2833 if (!
N->hasOneUse())
2843 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2847 if (VT == MVT::i64) {
2850 Subtarget->hasScalarCompareEq64();
2853 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2886void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2889 if (
Cond.isUndef()) {
2890 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2891 N->getOperand(2),
N->getOperand(0));
2895 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2897 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2898 bool AndExec = !UseSCCBr;
2899 bool Negate =
false;
2902 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2917 bool NegatedBallot =
false;
2920 UseSCCBr = !BallotCond->isDivergent();
2921 Negate = Negate ^ NegatedBallot;
2936 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2937 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2938 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2957 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2959 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2967 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2972void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2973 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2974 !
N->isDivergent()) {
2976 if (Src.getValueType() == MVT::f16) {
2978 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2988void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2991 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2992 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2997 MachineMemOperand *MMO =
M->getMemOperand();
3001 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
3006 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
3007 N = glueCopyToM0(
N, PtrBase);
3008 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
3013 N = glueCopyToM0(
N, Ptr);
3014 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
3019 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
3024 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3030void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3033 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3034 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3035 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3037 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3038 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3040 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3041 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3044 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3045 N->getOperand(5),
N->getOperand(0)};
3048 MachineMemOperand *MMO =
M->getMemOperand();
3049 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3053void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3054 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3056 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3068 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3069 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3081 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3086 case Intrinsic::amdgcn_ds_gws_init:
3087 return AMDGPU::DS_GWS_INIT;
3088 case Intrinsic::amdgcn_ds_gws_barrier:
3089 return AMDGPU::DS_GWS_BARRIER;
3090 case Intrinsic::amdgcn_ds_gws_sema_v:
3091 return AMDGPU::DS_GWS_SEMA_V;
3092 case Intrinsic::amdgcn_ds_gws_sema_br:
3093 return AMDGPU::DS_GWS_SEMA_BR;
3094 case Intrinsic::amdgcn_ds_gws_sema_p:
3095 return AMDGPU::DS_GWS_SEMA_P;
3096 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3097 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3103void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3104 if (!Subtarget->hasGWS() ||
3105 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3106 !Subtarget->hasGWSSemaReleaseAll())) {
3113 const bool HasVSrc =
N->getNumOperands() == 4;
3114 assert(HasVSrc ||
N->getNumOperands() == 3);
3117 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3120 MachineMemOperand *MMO =
M->getMemOperand();
3133 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3134 ImmOffset = ConstOffset->getZExtValue();
3136 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3145 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3149 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3151 CurDAG->getTargetConstant(16, SL, MVT::i32));
3152 glueCopyToM0(
N,
SDValue(M0Base, 0));
3156 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3160 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3161 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3163 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3167 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3170 MVT DataVT =
Data.getValueType().getSimpleVT();
3171 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3173 Ops.push_back(
N->getOperand(2));
3179 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3181 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3183 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3186 SL, MVT::v2i32, RegSeqOps),
3191 Ops.push_back(OffsetField);
3192 Ops.push_back(Chain);
3194 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3198void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3199 if (Subtarget->getLDSBankCount() != 16) {
3229 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3232 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3233 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3239 SDNode *InterpP1LV =
3240 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3241 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3245 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3248 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3249 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3256void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3257 unsigned IntrID =
N->getConstantOperandVal(1);
3259 case Intrinsic::amdgcn_ds_append:
3260 case Intrinsic::amdgcn_ds_consume: {
3261 if (
N->getValueType(0) != MVT::i32)
3263 SelectDSAppendConsume(
N, IntrID);
3266 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3267 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3268 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3269 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3270 SelectDSBvhStackIntrinsic(
N, IntrID);
3272 case Intrinsic::amdgcn_init_whole_wave:
3273 CurDAG->getMachineFunction()
3274 .getInfo<SIMachineFunctionInfo>()
3275 ->setInitWholeWave();
3282void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3283 unsigned IntrID =
N->getConstantOperandVal(0);
3284 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3285 SDNode *ConvGlueNode =
N->getGluedNode();
3291 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3292 MVT::Glue,
SDValue(ConvGlueNode, 0));
3294 ConvGlueNode =
nullptr;
3297 case Intrinsic::amdgcn_wqm:
3298 Opcode = AMDGPU::WQM;
3300 case Intrinsic::amdgcn_softwqm:
3301 Opcode = AMDGPU::SOFT_WQM;
3303 case Intrinsic::amdgcn_wwm:
3304 case Intrinsic::amdgcn_strict_wwm:
3305 Opcode = AMDGPU::STRICT_WWM;
3307 case Intrinsic::amdgcn_strict_wqm:
3308 Opcode = AMDGPU::STRICT_WQM;
3310 case Intrinsic::amdgcn_interp_p1_f16:
3311 SelectInterpP1F16(
N);
3313 case Intrinsic::amdgcn_permlane16_swap:
3314 case Intrinsic::amdgcn_permlane32_swap: {
3315 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3316 !Subtarget->hasPermlane16Swap()) ||
3317 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3318 !Subtarget->hasPermlane32Swap())) {
3323 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3324 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3325 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3329 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3331 bool FI =
N->getConstantOperandVal(3);
3332 NewOps[2] =
CurDAG->getTargetConstant(
3335 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3343 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3345 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3350 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3351 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3355void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3356 unsigned IntrID =
N->getConstantOperandVal(1);
3358 case Intrinsic::amdgcn_ds_gws_init:
3359 case Intrinsic::amdgcn_ds_gws_barrier:
3360 case Intrinsic::amdgcn_ds_gws_sema_v:
3361 case Intrinsic::amdgcn_ds_gws_sema_br:
3362 case Intrinsic::amdgcn_ds_gws_sema_p:
3363 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3364 SelectDS_GWS(
N, IntrID);
3366 case Intrinsic::amdgcn_tensor_load_to_lds:
3367 case Intrinsic::amdgcn_tensor_store_from_lds:
3368 SelectTensorLoadStore(
N, IntrID);
3377void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3379 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3380 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3381 {N->getOperand(0), Log2WaveSize});
3384void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3399 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3401 if (
N->isDivergent()) {
3402 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3407 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3408 {SrcVal, Log2WaveSize}),
3412 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3416bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3418 bool IsCanonicalizing,
3419 bool AllowAbs)
const {
3425 Src = Src.getOperand(0);
3426 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3430 if (
LHS &&
LHS->isZero()) {
3432 Src = Src.getOperand(1);
3436 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3438 Src = Src.getOperand(0);
3451 if (IsCanonicalizing)
3466 EVT VT = Src.getValueType();
3468 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3475 auto ReplaceSrc = [&]() ->
SDValue {
3477 return Src.getOperand(0);
3482 Src.getValueType(),
LHS, Index);
3508 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3510 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3517bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3520 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3522 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3529bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3532 if (SelectVOP3ModsImpl(In, Src, Mods,
3535 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3542bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3550bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3554 if (SelectVOP3ModsImpl(In, Src, Mods,
3559 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3566bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3568 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3571bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3573 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3576bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3580 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3581 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3583 return SelectVOP3Mods(In, Src, SrcMods);
3586bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3590 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3591 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3593 return SelectVOP3BMods(In, Src, SrcMods);
3596bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3601 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3602 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3607bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3608 SDValue &SrcMods,
bool IsDOT)
const {
3615 Src = Src.getOperand(0);
3620 if (Src.getValueSizeInBits() == 128) {
3622 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3627 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3628 unsigned VecMods = Mods;
3630 SDValue Lo = stripBitcast(Src.getOperand(0));
3631 SDValue Hi = stripBitcast(Src.getOperand(1));
3634 Lo = stripBitcast(
Lo.getOperand(0));
3639 Hi = stripBitcast(
Hi.getOperand(0));
3649 unsigned VecSize = Src.getValueSizeInBits();
3650 Lo = stripExtractLoElt(
Lo);
3651 Hi = stripExtractLoElt(
Hi);
3653 if (
Lo.getValueSizeInBits() > VecSize) {
3654 Lo =
CurDAG->getTargetExtractSubreg(
3655 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3659 if (
Hi.getValueSizeInBits() > VecSize) {
3660 Hi =
CurDAG->getTargetExtractSubreg(
3661 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3665 assert(
Lo.getValueSizeInBits() <= VecSize &&
3666 Hi.getValueSizeInBits() <= VecSize);
3668 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3672 if (VecSize ==
Lo.getValueSizeInBits()) {
3674 }
else if (VecSize == 32) {
3675 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3677 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3681 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3682 Lo.getValueType()), 0);
3683 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3684 : AMDGPU::SReg_64RegClassID;
3686 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3687 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3688 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3690 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3691 Src.getValueType(),
Ops), 0);
3693 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3699 .bitcastToAPInt().getZExtValue();
3701 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3702 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3709 Src.getNumOperands() == 2) {
3715 ArrayRef<int>
Mask = SVN->getMask();
3717 if (Mask[0] < 2 && Mask[1] < 2) {
3719 SDValue ShuffleSrc = SVN->getOperand(0);
3732 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3740 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3744bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3746 return SelectVOP3PMods(In, Src, SrcMods,
true);
3749bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3751 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3760bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3762 SelectVOP3Mods(In, Src, SrcMods);
3765 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3769bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3771 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3780bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3783 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3786 unsigned SrcVal =
C->getZExtValue();
3790 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3797 unsigned DstRegClass;
3799 switch (Elts.
size()) {
3801 DstRegClass = AMDGPU::VReg_256RegClassID;
3805 DstRegClass = AMDGPU::VReg_128RegClassID;
3809 DstRegClass = AMDGPU::VReg_64RegClassID;
3817 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3818 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3819 Ops.push_back(Elts[i]);
3820 Ops.push_back(
CurDAG->getTargetConstant(
3823 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3830 assert(
"unhandled Reg sequence size" &&
3831 (Elts.
size() == 8 || Elts.
size() == 16));
3835 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3836 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3841 if (Subtarget->useRealTrue16Insts()) {
3846 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3849 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3850 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3851 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3852 MVT::i32, {Elts[i + 1],
Undef},
3853 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3855 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3857 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3858 {Elts[i + 1], Elts[i], PackLoLo});
3862 return buildRegSequence32(PackedElts,
DL);
3868 unsigned ElementSize)
const {
3869 if (ElementSize == 16)
3870 return buildRegSequence16(Elts,
DL);
3871 if (ElementSize == 32)
3872 return buildRegSequence32(Elts,
DL);
3876void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3880 unsigned ElementSize)
const {
3885 for (
auto El : Elts) {
3888 NegAbsElts.
push_back(El->getOperand(0));
3890 if (Elts.size() != NegAbsElts.
size()) {
3892 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3896 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3902 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3910 std::function<
bool(
SDValue)> ModifierCheck) {
3914 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3915 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3916 if (!ModifierCheck(ElF16))
3923bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3941 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
3960 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
3966 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3970bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3981 if (EltsF16.
empty())
3991 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
4001 if (EltsV2F16.
empty())
4010 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
4013 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4017bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
4027 unsigned ModOpcode =
4038 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4041 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4045bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4047 BitVector UndefElements;
4049 if (isInlineImmediate(
Splat.getNode())) {
4051 unsigned Imm =
C->getAPIntValue().getSExtValue();
4052 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4056 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4057 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4065 SDValue SplatSrc32 = stripBitcast(In);
4067 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4068 SDValue SplatSrc16 = stripBitcast(Splat32);
4071 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4072 std::optional<APInt> RawValue;
4074 RawValue =
C->getValueAPF().bitcastToAPInt();
4076 RawValue =
C->getAPIntValue();
4078 if (RawValue.has_value()) {
4079 EVT VT =
In.getValueType().getScalarType();
4085 if (
TII->isInlineConstant(FloatVal)) {
4086 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4091 if (
TII->isInlineConstant(RawValue.value())) {
4092 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4105 if (
CurDAG->isConstantIntBuildVectorOrConstantInt(SplatSrc32)) {
4110 int64_t LoImm = Lo32->getAPIntValue().getSExtValue();
4111 int64_t HiImm = Hi32->getAPIntValue().getSExtValue();
4112 int64_t Imm64I = (HiImm << 32) + LoImm;
4114 if (!isInlineImmediate(APInt(64, Imm64I)))
4117 }
else if (Imm64I != Imm64)
4121 Src =
CurDAG->getTargetConstant(Imm64, SDLoc(In), MVT::i64);
4128bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4134 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4143 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4147bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4153 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4162 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4166bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4174 const SDValue &ExtendSrc =
In.getOperand(0);
4178 const SDValue &CastSrc =
In.getOperand(0);
4182 if (Zero &&
Zero->getZExtValue() == 0)
4193 Src = ExtractVecEltSrc;
4197 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4201bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4205 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4209bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4212 return SelectVOP3Mods(In, Src, SrcMods);
4224 Op =
Op.getOperand(0);
4226 IsExtractHigh =
false;
4229 if (!Low16 || !Low16->isZero())
4231 Op = stripBitcast(
Op.getOperand(1));
4232 if (
Op.getValueType() != MVT::bf16)
4237 if (
Op.getValueType() != MVT::i32)
4242 if (Mask->getZExtValue() == 0xffff0000) {
4243 IsExtractHigh =
true;
4244 return Op.getOperand(0);
4253 return Op.getOperand(0);
4262bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4266 SelectVOP3ModsImpl(In, Src, Mods);
4268 bool IsExtractHigh =
false;
4270 Src = Src.getOperand(0);
4271 }
else if (VT == MVT::bf16) {
4279 if (Src.getValueType() != VT &&
4280 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4283 Src = stripBitcast(Src);
4289 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4304 if (Src.getValueSizeInBits() == 16) {
4313 Src.getOperand(0).getValueType() == MVT::i32) {
4314 Src = Src.getOperand(0);
4318 if (Subtarget->useRealTrue16Insts())
4320 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4321 }
else if (IsExtractHigh)
4327bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4330 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4332 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4336bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4339 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4340 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4344bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4347 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4349 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4353bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4356 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4357 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4365 unsigned NumOpcodes = 0;
4378 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4381 if (
C->isAllOnes()) {
4391 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4405 if (Src.size() == 3) {
4411 if (
C->isAllOnes()) {
4413 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4414 if (Src[
I] ==
LHS) {
4426 Bits = SrcBits[Src.size()];
4431 switch (In.getOpcode()) {
4439 if (!getOperandBits(
LHS, LHSBits) ||
4440 !getOperandBits(
RHS, RHSBits)) {
4441 Src = std::move(Backup);
4442 return std::make_pair(0, 0);
4463 uint8_t LHSBitsOrig = LHSBits;
4464 uint8_t RHSBitsOrig = RHSBits;
4468 NumOpcodes += LHSOp.first;
4469 LHSBits = LHSOp.second;
4476 NumOpcodes += RHSOp.first;
4477 RHSBits = RHSOp.second;
4481 auto dependsOnSlot = [](
uint8_t TT,
int Slot) ->
bool {
4482 if (Slot < 0 || Slot > 2)
4484 const uint8_t Masks[3] = {0x0f, 0x33, 0x55};
4485 const int Shifts[3] = {4, 2, 1};
4486 return ((TT ^ (TT >> Shifts[Slot])) & Masks[Slot]) != 0;
4492 const uint8_t SrcBitsConst[3] = {0xf0, 0xcc, 0xaa};
4499 NegatedInner =
Op.getOperand(0);
4500 for (
int I = 0;
I < (int)S.size();
I++) {
4501 if (Bits == SrcBitsConst[
I] && S[
I] ==
Op)
4503 if (IsNegationOp && Bits == (
uint8_t)~SrcBitsConst[
I] &&
4504 S[
I] == NegatedInner)
4515 for (
int I = 0;
I < (int)SrcAfterLHS.
size() &&
I < 3;
I++) {
4516 if (
I < (
int)Src.size() && Src[
I] != SrcAfterLHS[
I] &&
4517 dependsOnSlot(LHSBits,
I)) {
4526 if (!Stale && !RHSOp.first) {
4527 int Slot = findSlot(RHSBitsOrig,
RHS, SrcBeforeRecurse);
4529 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4535 if (!Stale && !LHSOp.first) {
4536 int Slot = findSlot(LHSBitsOrig,
LHS, SrcBeforeRecurse);
4538 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4543 Src = std::move(SrcBeforeRecurse);
4544 LHSBits = LHSBitsOrig;
4545 RHSBits = RHSBitsOrig;
4551 return std::make_pair(0, 0);
4555 switch (In.getOpcode()) {
4557 TTbl = LHSBits & RHSBits;
4560 TTbl = LHSBits | RHSBits;
4563 TTbl = LHSBits ^ RHSBits;
4569 return std::make_pair(NumOpcodes + 1, TTbl);
4576 unsigned NumOpcodes;
4578 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4582 if (NumOpcodes < 2 || Src.empty())
4588 if (NumOpcodes < 4 && !In->isDivergent())
4591 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4596 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4597 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4611 while (Src.size() < 3)
4612 Src.push_back(Src[0]);
4618 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4624 return CurDAG->getUNDEF(MVT::i32);
4628 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4633 return CurDAG->getConstant(
4634 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4644bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4645 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4647 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4648 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4651 bool AllUsesAcceptSReg =
true;
4653 Limit < 10 && U !=
E; ++U, ++Limit) {
4654 const TargetRegisterClass *RC =
4655 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4663 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4664 RC != &AMDGPU::VS_64_Align2RegClass) {
4665 AllUsesAcceptSReg =
false;
4666 SDNode *
User =
U->getUser();
4667 if (
User->isMachineOpcode()) {
4668 unsigned Opc =
User->getMachineOpcode();
4669 const MCInstrDesc &
Desc = SII->get(
Opc);
4670 if (
Desc.isCommutable()) {
4671 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4674 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4675 const TargetRegisterClass *CommutedRC =
4676 getOperandRegClass(
U->getUser(), CommutedOpNo);
4677 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4678 CommutedRC == &AMDGPU::VS_64RegClass ||
4679 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4680 AllUsesAcceptSReg =
true;
4688 if (!AllUsesAcceptSReg)
4692 return !AllUsesAcceptSReg && (Limit < 10);
4695bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4697 const MachineMemOperand *MMO = Ld->getMemOperand();
4715 (Subtarget->getScalarizeGlobalBehavior() &&
4719 ->isMemOpHasNoClobberedMemOperand(
N)));
4725 bool IsModified =
false;
4731 while (Position !=
CurDAG->allnodes_end()) {
4738 if (ResNode !=
Node) {
4744 CurDAG->RemoveDeadNodes();
4745 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
bool isSDWAOperand(const SDNode *N) const
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.