29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
557 for (
unsigned i = 0; i < NOps; i++) {
565 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
566 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
568 if (NOps != NumVectorElts) {
573 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
576 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
577 RegSeqArgs[1 + (2 * i) + 1] =
584 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
588 EVT VT =
N->getValueType(0);
592 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
606 Mask[0] < 4 && Mask[1] < 4);
608 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
609 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
610 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
611 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
614 Src0SubReg = Src1SubReg;
616 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
621 Src1SubReg = Src0SubReg;
623 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
633 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
634 Src1SubReg == AMDGPU::sub0) {
650 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
651 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
654 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
655 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
665 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
667 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
670 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
671 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
672 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
673 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
677 unsigned int Opc =
N->getOpcode();
678 if (
N->isMachineOpcode()) {
686 N = glueCopyToM0LDSInit(
N);
701 if (
N->getValueType(0) != MVT::i64)
704 SelectADD_SUB_I64(
N);
709 if (
N->getValueType(0) == MVT::i64) {
710 SelectAddcSubbI64(
N);
714 if (
N->getValueType(0) != MVT::i32)
721 if (
N->getValueType(0) == MVT::i64) {
722 SelectAddcSubbI64(
N);
726 SelectUADDO_USUBO(
N);
729 case AMDGPUISD::FMUL_W_CHAIN: {
730 SelectFMUL_W_CHAIN(
N);
733 case AMDGPUISD::FMA_W_CHAIN: {
734 SelectFMA_W_CHAIN(
N);
740 EVT VT =
N->getValueType(0);
757 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
769 if (
N->getValueType(0) == MVT::i128) {
770 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
771 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
772 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
773 }
else if (
N->getValueType(0) == MVT::i64) {
774 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
775 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
776 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
780 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
781 N->getOperand(1), SubReg1 };
783 N->getValueType(0),
Ops));
789 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
790 Subtarget->has64BitLiterals())
795 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
800 Imm =
C->getZExtValue();
809 case AMDGPUISD::BFE_I32:
810 case AMDGPUISD::BFE_U32: {
836 case AMDGPUISD::DIV_SCALE: {
847 return SelectMUL_LOHI(
N);
858 if (
N->getValueType(0) != MVT::i32)
869 case AMDGPUISD::CVT_PKRTZ_F16_F32:
870 case AMDGPUISD::CVT_PKNORM_I16_F32:
871 case AMDGPUISD::CVT_PKNORM_U16_F32:
872 case AMDGPUISD::CVT_PK_U16_U32:
873 case AMDGPUISD::CVT_PK_I16_I32: {
875 if (
N->getValueType(0) == MVT::i32) {
876 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
878 { N->getOperand(0), N->getOperand(1) });
886 SelectINTRINSIC_W_CHAIN(
N);
890 SelectINTRINSIC_WO_CHAIN(
N);
894 SelectINTRINSIC_VOID(
N);
898 SelectWAVE_ADDRESS(
N);
902 SelectSTACKRESTORE(
N);
911 if (!Subtarget->hasSDWA())
921 return RHS->getZExtValue() == 0xFF || RHS->getZExtValue() == 0xFFFF;
925 return (RHS->getZExtValue() % 8) == 0;
930bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
933 return Term->getMetadata(
"amdgpu.uniform") ||
934 Term->getMetadata(
"structurizecfg.uniform");
937bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
938 unsigned ShAmtBits)
const {
941 const APInt &
RHS =
N->getConstantOperandAPInt(1);
942 if (
RHS.countr_one() >= ShAmtBits)
972 N1 =
Lo.getOperand(1);
982 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
997 return "AMDGPU DAG->DAG Pattern Instruction Selection";
1007#ifdef EXPENSIVE_CHECKS
1013 for (
auto &L : LI.getLoopsInPreorder())
1014 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1036 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1038 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1052SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1054 SDNode *Mov =
CurDAG->getMachineNode(
1055 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1056 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1063void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1068 unsigned Opcode =
N->getOpcode();
1077 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1078 DL, MVT::i32,
LHS, Sub0);
1079 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1080 DL, MVT::i32,
LHS, Sub1);
1082 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1083 DL, MVT::i32,
RHS, Sub0);
1084 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1085 DL, MVT::i32,
RHS, Sub1);
1087 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1089 static const unsigned OpcMap[2][2][2] = {
1090 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1091 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1092 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1093 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1095 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1096 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1099 if (!ConsumeCarry) {
1101 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1104 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1111 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1114 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1121 MVT::i64, RegSequenceArgs);
1132void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1137 if (
N->isDivergent()) {
1139 : AMDGPU::V_SUBB_U32_e64;
1141 N,
Opc,
N->getVTList(),
1143 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1146 : AMDGPU::S_SUB_CO_PSEUDO;
1147 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1151void AMDGPUDAGToDAGISel::SelectAddcSubbI64(
SDNode *
N) {
1156 unsigned Opcode =
N->getOpcode();
1163 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1164 MVT::i32,
LHS, Sub0);
1165 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1166 MVT::i32,
LHS, Sub1);
1168 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1169 MVT::i32,
RHS, Sub0);
1170 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
DL,
1171 MVT::i32,
RHS, Sub1);
1173 SDVTList VTList =
CurDAG->getVTList(MVT::i32,
N->getValueType(1));
1175 static const unsigned NoCarryOpcMap[2][2] = {
1176 {AMDGPU::S_USUBO_PSEUDO, AMDGPU::S_UADDO_PSEUDO},
1177 {AMDGPU::V_SUB_CO_U32_e64, AMDGPU::V_ADD_CO_U32_e64}};
1178 static const unsigned CarryOpcMap[2][2] = {
1179 {AMDGPU::S_SUB_CO_PSEUDO, AMDGPU::S_ADD_CO_PSEUDO},
1180 {AMDGPU::V_SUBB_U32_e64, AMDGPU::V_ADDC_U32_e64}};
1182 bool IsVALU =
N->isDivergent();
1184 unsigned NoCarryOpc = NoCarryOpcMap[IsVALU][IsAdd];
1185 unsigned CarryOpc = CarryOpcMap[IsVALU][IsAdd];
1189 if (!ConsumeCarry) {
1192 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1195 AddLo =
CurDAG->getMachineNode(NoCarryOpc,
DL, VTList, Args);
1201 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1204 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1212 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1215 AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1218 unsigned RC = IsVALU ? AMDGPU::VReg_64RegClassID : AMDGPU::SReg_64RegClassID;
1219 SDValue RegSequenceArgs[] = {
CurDAG->getTargetConstant(RC,
DL, MVT::i32),
1223 MVT::i64, RegSequenceArgs);
1229void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1234 bool IsVALU =
N->isDivergent();
1236 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1238 if (UI.getUse().getResNo() == 1) {
1239 if (UI->isMachineOpcode()) {
1240 if (UI->getMachineOpcode() !=
1241 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1254 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1257 N,
Opc,
N->getVTList(),
1258 {N->getOperand(0), N->getOperand(1),
1259 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1261 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1263 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1264 {N->getOperand(0), N->getOperand(1)});
1268void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1272 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1273 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1274 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1275 Ops[8] =
N->getOperand(0);
1276 Ops[9] =
N->getOperand(4);
1280 bool UseFMAC = Subtarget->hasDLInsts() &&
1284 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1285 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1288void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1292 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1293 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1294 Ops[6] =
N->getOperand(0);
1295 Ops[7] =
N->getOperand(3);
1297 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1302void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1303 EVT VT =
N->getValueType(0);
1305 assert(VT == MVT::f32 || VT == MVT::f64);
1308 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1313 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1314 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1315 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1321void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1325 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() && !
N->hasAnyUseOfValue(1);
1326 if (Subtarget->hasMADIntraFwdBug())
1327 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1328 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1329 else if (UseNoCarry)
1330 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1332 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1335 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1339 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1350void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1355 if (Subtarget->hasMadNC64_32Insts()) {
1356 VTList =
CurDAG->getVTList(MVT::i64);
1357 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1359 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1360 if (Subtarget->hasMADIntraFwdBug()) {
1361 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1362 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1364 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1371 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1373 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1374 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1375 MVT::i32,
SDValue(Mad, 0), Sub0);
1379 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1380 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1381 MVT::i32,
SDValue(Mad, 0), Sub1);
1391 if (!
Base || Subtarget->hasUsableDSOffset() ||
1392 Subtarget->unsafeDSOffsetFoldingEnabled())
1403 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1416 int64_t ByteOffset =
C->getSExtValue();
1417 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1426 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1432 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1433 if (Subtarget->hasAddNoCarryInsts()) {
1434 SubOp = AMDGPU::V_SUB_U32_e64;
1436 CurDAG->getTargetConstant(0, {}, MVT::i1));
1439 MachineSDNode *MachineSub =
1440 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1456 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1458 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1459 DL, MVT::i32, Zero);
1461 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1468 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1472bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1474 unsigned Size)
const {
1475 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1480 if (!
Base || Subtarget->hasUsableDSOffset() ||
1481 Subtarget->unsafeDSOffsetFoldingEnabled())
1499bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1505 if (Subtarget->hasSignedScratchOffsets())
1515 ConstantSDNode *ImmOp =
nullptr;
1526bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1532 if (Subtarget->hasSignedScratchOffsets())
1542bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1556 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1559 auto LHS =
Base.getOperand(0);
1560 auto RHS =
Base.getOperand(1);
1568 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1574 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1579 unsigned Size)
const {
1582 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1587 unsigned OffsetValue1 = OffsetValue0 +
Size;
1590 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1592 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1593 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1598 if (
const ConstantSDNode *
C =
1600 unsigned OffsetValue0 =
C->getZExtValue();
1601 unsigned OffsetValue1 = OffsetValue0 +
Size;
1603 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1613 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1617 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1618 if (Subtarget->hasAddNoCarryInsts()) {
1619 SubOp = AMDGPU::V_SUB_U32_e64;
1621 CurDAG->getTargetConstant(0, {}, MVT::i1));
1624 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1629 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1631 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1637 unsigned OffsetValue0 = CAddr->getZExtValue();
1638 unsigned OffsetValue1 = OffsetValue0 +
Size;
1640 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1642 MachineSDNode *MovZero =
1643 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1645 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1646 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1654 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1655 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1665 if (Subtarget->useFlatForGlobal())
1670 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1671 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1672 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1673 SOffset = Subtarget->hasRestrictedSOffset()
1674 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1675 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1677 ConstantSDNode *C1 =
nullptr;
1679 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1692 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1698 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1714 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1716 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1720 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1730 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1741 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1747bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1750 SDValue Ptr, Offen, Idxen, Addr64;
1754 if (!Subtarget->hasAddr64())
1757 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1761 if (
C->getSExtValue()) {
1774std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1779 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1785 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1788bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1794 MachineFunction &
MF =
CurDAG->getMachineFunction();
1795 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1797 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1800 int64_t
Imm = CAddr->getSExtValue();
1801 const int64_t NullPtr =
1804 if (Imm != NullPtr) {
1807 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1808 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1809 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1810 VAddr =
SDValue(MovHighBits, 0);
1812 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1813 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1818 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1839 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1840 if (
TII->isLegalMUBUFImmOffset(C1) &&
1841 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1842 CurDAG->SignBitIsZero(N0))) {
1843 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1844 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1850 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1851 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1859 if (!
Reg.isPhysical())
1861 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1862 return RC &&
TRI.isSGPRClass(RC);
1865bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1870 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1871 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1872 MachineFunction &
MF =
CurDAG->getMachineFunction();
1873 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1878 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1884 ConstantSDNode *CAddr;
1897 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1902 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1908bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1911 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1912 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1914 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1920 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1933bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1935 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1936 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1940 SOffset = ByteOffsetNode;
1958bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
1962 int64_t OffsetVal = 0;
1966 bool CanHaveFlatSegmentOffsetBug =
1967 Subtarget->hasFlatSegmentOffsetBug() &&
1968 FlatVariant == FlatAddrSpace::FLAT &&
1971 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1973 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1974 (FlatVariant != FlatAddrSpace::FlatScratch ||
1975 isFlatScratchBaseLegal(Addr))) {
1983 if (COffsetVal == 0 || FlatVariant != FlatAddrSpace::FLAT || IsInBounds) {
1984 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1985 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1987 OffsetVal = COffsetVal;
2000 uint64_t RemainderOffset;
2002 std::tie(OffsetVal, RemainderOffset) =
2003 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
2006 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
2013 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
2014 if (Subtarget->hasAddNoCarryInsts()) {
2015 AddOp = AMDGPU::V_ADD_U32_e64;
2024 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
2026 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
2028 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
2029 DL, MVT::i32, N0, Sub0);
2030 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
2031 DL, MVT::i32, N0, Sub1);
2034 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
2036 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
2039 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
2040 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
2042 SDNode *Addc =
CurDAG->getMachineNode(
2043 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
2047 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
2052 MVT::i64, RegSequenceArgs),
2061 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2065bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
2068 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2072bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
2075 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2079bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
2082 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2090 if (
Op.getValueType() == MVT::i32)
2105bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2108 bool NeedIOffset)
const {
2110 int64_t ImmOffset = 0;
2111 ScaleOffset =
false;
2117 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2119 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2123 FlatAddrSpace::FlatGlobal)) {
2125 ImmOffset = COffsetVal;
2126 }
else if (!
LHS->isDivergent()) {
2127 if (COffsetVal > 0) {
2132 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2134 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2138 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2140 SDNode *VMov =
CurDAG->getMachineNode(
2141 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2142 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2145 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2155 unsigned NumLiterals =
2156 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2157 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2158 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2167 if (!
LHS->isDivergent()) {
2170 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2172 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2179 if (!SAddr && !
RHS->isDivergent()) {
2181 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2183 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2190 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2195 if (Subtarget->hasScaleOffset() &&
2196 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2211 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2224 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2225 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2227 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2231bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2236 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2244bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2249 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2254 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2260bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2266 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2271 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2277bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2282 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2286 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2290bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2296 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2302 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2308bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2314 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2335 FI->getValueType(0));
2345bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2354 int64_t COffsetVal = 0;
2356 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2365 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2368 FlatAddrSpace::FlatScratch)) {
2369 int64_t SplitImmOffset, RemainderOffset;
2370 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2373 COffsetVal = SplitImmOffset;
2377 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2378 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2379 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2384 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2390bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2392 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2398 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2405 return (VMax & 3) + (
SMax & 3) >= 4;
2408bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2412 int64_t ImmOffset = 0;
2416 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2418 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2423 ImmOffset = COffsetVal;
2424 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2428 int64_t SplitImmOffset, RemainderOffset;
2429 std::tie(SplitImmOffset, RemainderOffset) =
2434 SDNode *VMov =
CurDAG->getMachineNode(
2435 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2436 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2439 if (!isFlatScratchBaseLegal(Addr))
2441 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2443 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2444 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2456 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2459 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2466 if (OrigAddr != Addr) {
2467 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2470 if (!isFlatScratchBaseLegalSV(OrigAddr))
2474 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2477 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2479 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2488bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2491 int64_t ImmOffset)
const {
2492 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2494 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2506 bool IsSigned)
const {
2507 bool ScaleOffset =
false;
2508 if (!Subtarget->hasScaleOffset() || !
Offset)
2522 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2523 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2524 (
Offset.isMachineOpcode() &&
2525 Offset.getMachineOpcode() ==
2526 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2527 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2529 ScaleOffset =
C->getZExtValue() ==
Size;
2541bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2543 bool Imm32Only,
bool IsBuffer,
2544 bool HasSOffset, int64_t ImmOffset,
2545 bool *ScaleOffset)
const {
2547 "Cannot match both soffset and offset at the same time!");
2552 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2562 *SOffset = ByteOffsetNode;
2563 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2569 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2576 SDLoc SL(ByteOffsetNode);
2580 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2582 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2583 if (EncodedOffset &&
Offset && !Imm32Only) {
2584 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2593 if (EncodedOffset &&
Offset && Imm32Only) {
2594 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2602 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2604 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2611SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2618 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2619 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2620 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2621 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2624 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2626 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2627 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2629 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2632 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2639bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2642 bool IsBuffer,
bool HasSOffset,
2644 bool *ScaleOffset)
const {
2646 assert(!Imm32Only && !IsBuffer);
2649 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2654 ImmOff =
C->getSExtValue();
2656 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2657 true, ImmOff, ScaleOffset);
2677 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2678 ImmOffset, ScaleOffset)) {
2682 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2683 ImmOffset, ScaleOffset)) {
2692 bool Imm32Only,
bool *ScaleOffset)
const {
2693 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2696 SBase = Expand32BitAddress(SBase);
2701 SBase = Expand32BitAddress(Addr);
2702 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2709bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2711 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2715bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2718 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2725 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2726 false, &ScaleOffset))
2730 SDLoc(
N), MVT::i32);
2734bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2739 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2743 SDLoc(
N), MVT::i32);
2748 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2752bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2755 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2759bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2763 return N.getValueType() == MVT::i32 &&
2764 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2769bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2774 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2799SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2803 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2807 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2809 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2813 uint32_t PackedVal =
Offset | (Width << 16);
2814 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2816 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2819void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2824 const SDValue &Shl =
N->getOperand(0);
2829 uint32_t BVal =
B->getZExtValue();
2830 uint32_t CVal =
C->getZExtValue();
2832 if (0 < BVal && BVal <= CVal && CVal < 32) {
2842void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2843 switch (
N->getOpcode()) {
2845 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2848 const SDValue &Srl =
N->getOperand(0);
2852 if (Shift && Mask) {
2854 uint32_t MaskVal =
Mask->getZExtValue();
2866 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2873 if (Shift && Mask) {
2875 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2884 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2885 SelectS_BFEFromShifts(
N);
2890 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2891 SelectS_BFEFromShifts(
N);
2906 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2916bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2918 if (!
N->hasOneUse())
2928 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2932 if (VT == MVT::i64) {
2935 Subtarget->hasScalarCompareEq64();
2938 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2971void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2974 if (
Cond.isUndef()) {
2975 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2976 N->getOperand(2),
N->getOperand(0));
2980 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2982 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2983 bool AndExec = !UseSCCBr;
2984 bool Negate =
false;
2987 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
3002 bool NegatedBallot =
false;
3005 UseSCCBr = !BallotCond->isDivergent();
3006 Negate = Negate ^ NegatedBallot;
3021 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
3022 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
3023 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
3042 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
3044 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
3052 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
3057void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
3058 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
3059 !
N->isDivergent()) {
3061 if (Src.getValueType() == MVT::f16) {
3063 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
3073void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
3076 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
3077 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
3082 MachineMemOperand *MMO =
M->getMemOperand();
3086 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
3091 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
3092 N = glueCopyToM0(
N, PtrBase);
3093 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
3098 N = glueCopyToM0(
N, Ptr);
3099 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
3104 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
3109 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3115void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3118 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3119 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3120 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3122 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3123 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3125 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3126 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3129 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3130 N->getOperand(5),
N->getOperand(0)};
3133 MachineMemOperand *MMO =
M->getMemOperand();
3134 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3138void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3139 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3141 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3153 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3154 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3166 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3171 case Intrinsic::amdgcn_ds_gws_init:
3172 return AMDGPU::DS_GWS_INIT;
3173 case Intrinsic::amdgcn_ds_gws_barrier:
3174 return AMDGPU::DS_GWS_BARRIER;
3175 case Intrinsic::amdgcn_ds_gws_sema_v:
3176 return AMDGPU::DS_GWS_SEMA_V;
3177 case Intrinsic::amdgcn_ds_gws_sema_br:
3178 return AMDGPU::DS_GWS_SEMA_BR;
3179 case Intrinsic::amdgcn_ds_gws_sema_p:
3180 return AMDGPU::DS_GWS_SEMA_P;
3181 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3182 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3188void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3189 if (!Subtarget->hasGWS() ||
3190 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3191 !Subtarget->hasGWSSemaReleaseAll())) {
3198 const bool HasVSrc =
N->getNumOperands() == 4;
3199 assert(HasVSrc ||
N->getNumOperands() == 3);
3202 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3205 MachineMemOperand *MMO =
M->getMemOperand();
3218 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3219 ImmOffset = ConstOffset->getZExtValue();
3221 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3230 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3234 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3236 CurDAG->getTargetConstant(16, SL, MVT::i32));
3237 glueCopyToM0(
N,
SDValue(M0Base, 0));
3241 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3245 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3246 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3248 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3252 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3255 MVT DataVT =
Data.getValueType().getSimpleVT();
3256 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3258 Ops.push_back(
N->getOperand(2));
3264 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3266 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3268 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3271 SL, MVT::v2i32, RegSeqOps),
3276 Ops.push_back(OffsetField);
3277 Ops.push_back(Chain);
3279 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3283void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3284 if (Subtarget->getLDSBankCount() != 16) {
3314 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3317 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3318 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3324 SDNode *InterpP1LV =
3325 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3326 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3330 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3333 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3334 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3341void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3342 unsigned IntrID =
N->getConstantOperandVal(1);
3344 case Intrinsic::amdgcn_ds_append:
3345 case Intrinsic::amdgcn_ds_consume: {
3346 if (
N->getValueType(0) != MVT::i32)
3348 SelectDSAppendConsume(
N, IntrID);
3351 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3352 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3353 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3354 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3355 SelectDSBvhStackIntrinsic(
N, IntrID);
3357 case Intrinsic::amdgcn_init_whole_wave:
3358 CurDAG->getMachineFunction()
3359 .getInfo<SIMachineFunctionInfo>()
3360 ->setInitWholeWave();
3367void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3368 unsigned IntrID =
N->getConstantOperandVal(0);
3369 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3370 SDNode *ConvGlueNode =
N->getGluedNode();
3376 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3377 MVT::Glue,
SDValue(ConvGlueNode, 0));
3379 ConvGlueNode =
nullptr;
3382 case Intrinsic::amdgcn_wqm:
3383 Opcode = AMDGPU::WQM;
3385 case Intrinsic::amdgcn_softwqm:
3386 Opcode = AMDGPU::SOFT_WQM;
3388 case Intrinsic::amdgcn_wwm:
3389 case Intrinsic::amdgcn_strict_wwm:
3390 Opcode = AMDGPU::STRICT_WWM;
3392 case Intrinsic::amdgcn_strict_wqm:
3393 Opcode = AMDGPU::STRICT_WQM;
3395 case Intrinsic::amdgcn_interp_p1_f16:
3396 SelectInterpP1F16(
N);
3398 case Intrinsic::amdgcn_permlane16_swap:
3399 case Intrinsic::amdgcn_permlane32_swap: {
3400 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3401 !Subtarget->hasPermlane16Swap()) ||
3402 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3403 !Subtarget->hasPermlane32Swap())) {
3408 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3409 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3410 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3414 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3416 bool FI =
N->getConstantOperandVal(3);
3417 NewOps[2] =
CurDAG->getTargetConstant(
3420 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3428 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3430 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3435 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3436 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3440void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3441 unsigned IntrID =
N->getConstantOperandVal(1);
3443 case Intrinsic::amdgcn_ds_gws_init:
3444 case Intrinsic::amdgcn_ds_gws_barrier:
3445 case Intrinsic::amdgcn_ds_gws_sema_v:
3446 case Intrinsic::amdgcn_ds_gws_sema_br:
3447 case Intrinsic::amdgcn_ds_gws_sema_p:
3448 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3449 SelectDS_GWS(
N, IntrID);
3451 case Intrinsic::amdgcn_tensor_load_to_lds:
3452 case Intrinsic::amdgcn_tensor_store_from_lds:
3453 SelectTensorLoadStore(
N, IntrID);
3462void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3464 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3465 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3466 {N->getOperand(0), Log2WaveSize});
3469void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3484 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3486 if (
N->isDivergent()) {
3487 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3492 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3493 {SrcVal, Log2WaveSize}),
3497 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3501bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3503 bool IsCanonicalizing,
3504 bool AllowAbs)
const {
3510 Src = Src.getOperand(0);
3511 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3515 if (
LHS &&
LHS->isZero()) {
3517 Src = Src.getOperand(1);
3521 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3523 Src = Src.getOperand(0);
3536 if (IsCanonicalizing)
3551 EVT VT = Src.getValueType();
3553 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3560 auto ReplaceSrc = [&]() ->
SDValue {
3562 return Src.getOperand(0);
3567 Src.getValueType(),
LHS, Index);
3593 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3595 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3602bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3605 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3607 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3614bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3617 if (SelectVOP3ModsImpl(In, Src, Mods,
3620 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3627bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3635bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3639 if (SelectVOP3ModsImpl(In, Src, Mods,
3644 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3651bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3653 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3656bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3658 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3661bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3665 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3666 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3668 return SelectVOP3Mods(In, Src, SrcMods);
3671bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3675 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3676 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3678 return SelectVOP3BMods(In, Src, SrcMods);
3681bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3686 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3687 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3692bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3693 SDValue &SrcMods,
bool IsDOT)
const {
3700 Src = Src.getOperand(0);
3705 if (Src.getValueSizeInBits() == 128) {
3707 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3712 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3713 unsigned VecMods = Mods;
3715 SDValue Lo = stripBitcast(Src.getOperand(0));
3716 SDValue Hi = stripBitcast(Src.getOperand(1));
3719 Lo = stripBitcast(
Lo.getOperand(0));
3724 Hi = stripBitcast(
Hi.getOperand(0));
3734 unsigned VecSize = Src.getValueSizeInBits();
3735 Lo = stripExtractLoElt(
Lo);
3736 Hi = stripExtractLoElt(
Hi);
3738 if (
Lo.getValueSizeInBits() > VecSize) {
3739 Lo =
CurDAG->getTargetExtractSubreg(
3740 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3744 if (
Hi.getValueSizeInBits() > VecSize) {
3745 Hi =
CurDAG->getTargetExtractSubreg(
3746 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3750 assert(
Lo.getValueSizeInBits() <= VecSize &&
3751 Hi.getValueSizeInBits() <= VecSize);
3753 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3757 if (VecSize ==
Lo.getValueSizeInBits()) {
3759 }
else if (VecSize == 32) {
3760 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3762 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3766 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3767 Lo.getValueType()), 0);
3768 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3769 : AMDGPU::SReg_64RegClassID;
3771 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3772 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3773 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3775 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3776 Src.getValueType(),
Ops), 0);
3778 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3784 .bitcastToAPInt().getZExtValue();
3786 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3787 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3794 Src.getNumOperands() == 2) {
3800 ArrayRef<int>
Mask = SVN->getMask();
3802 if (Mask[0] < 2 && Mask[1] < 2) {
3804 SDValue ShuffleSrc = SVN->getOperand(0);
3817 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3825 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3829bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3831 return SelectVOP3PMods(In, Src, SrcMods,
true);
3834bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3836 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3845bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3847 SelectVOP3Mods(In, Src, SrcMods);
3850 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3854bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3856 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3865bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3868 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3871 unsigned SrcVal =
C->getZExtValue();
3875 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3882 unsigned DstRegClass;
3884 switch (Elts.
size()) {
3886 DstRegClass = AMDGPU::VReg_256RegClassID;
3890 DstRegClass = AMDGPU::VReg_128RegClassID;
3894 DstRegClass = AMDGPU::VReg_64RegClassID;
3902 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3903 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3904 Ops.push_back(Elts[i]);
3905 Ops.push_back(
CurDAG->getTargetConstant(
3908 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3915 assert(
"unhandled Reg sequence size" &&
3916 (Elts.
size() == 8 || Elts.
size() == 16));
3920 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3921 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3926 if (Subtarget->useRealTrue16Insts()) {
3931 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3934 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3935 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3936 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3937 MVT::i32, {Elts[i + 1],
Undef},
3938 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3940 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3942 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3943 {Elts[i + 1], Elts[i], PackLoLo});
3947 return buildRegSequence32(PackedElts,
DL);
3953 unsigned ElementSize)
const {
3954 if (ElementSize == 16)
3955 return buildRegSequence16(Elts,
DL);
3956 if (ElementSize == 32)
3957 return buildRegSequence32(Elts,
DL);
3961void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3965 unsigned ElementSize)
const {
3970 for (
auto El : Elts) {
3973 NegAbsElts.
push_back(El->getOperand(0));
3975 if (Elts.size() != NegAbsElts.
size()) {
3977 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3981 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3987 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3995 std::function<
bool(
SDValue)> ModifierCheck) {
3999 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
4000 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
4001 if (!ModifierCheck(ElF16))
4008bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
4026 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
4045 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
4051 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4055bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
4066 if (EltsF16.
empty())
4076 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
4086 if (EltsV2F16.
empty())
4095 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
4098 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4102bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
4112 unsigned ModOpcode =
4123 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4126 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4130bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4132 BitVector UndefElements;
4134 if (isInlineImmediate(
Splat.getNode())) {
4136 unsigned Imm =
C->getAPIntValue().getSExtValue();
4137 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4141 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4142 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4150 SDValue SplatSrc32 = stripBitcast(In);
4152 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4153 SDValue SplatSrc16 = stripBitcast(Splat32);
4156 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4157 std::optional<APInt> RawValue;
4159 RawValue =
C->getValueAPF().bitcastToAPInt();
4161 RawValue =
C->getAPIntValue();
4163 if (RawValue.has_value()) {
4164 EVT VT =
In.getValueType().getScalarType();
4170 if (
TII->isInlineConstant(FloatVal)) {
4171 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4176 if (
TII->isInlineConstant(RawValue.value())) {
4177 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4190 if (
CurDAG->isConstantIntBuildVectorOrConstantInt(SplatSrc32)) {
4195 int64_t LoImm = Lo32->getAPIntValue().getSExtValue();
4196 int64_t HiImm = Hi32->getAPIntValue().getSExtValue();
4197 int64_t Imm64I = (HiImm << 32) + LoImm;
4199 if (!isInlineImmediate(APInt(64, Imm64I)))
4202 }
else if (Imm64I != Imm64)
4206 Src =
CurDAG->getTargetConstant(Imm64, SDLoc(In), MVT::i64);
4213bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4219 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4228 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4232bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4238 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4247 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4251bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4259 const SDValue &ExtendSrc =
In.getOperand(0);
4263 const SDValue &CastSrc =
In.getOperand(0);
4267 if (Zero &&
Zero->getZExtValue() == 0)
4278 Src = ExtractVecEltSrc;
4282 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4286bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4290 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4294bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4297 return SelectVOP3Mods(In, Src, SrcMods);
4309 Op =
Op.getOperand(0);
4311 IsExtractHigh =
false;
4314 if (!Low16 || !Low16->isZero())
4316 Op = stripBitcast(
Op.getOperand(1));
4317 if (
Op.getValueType() != MVT::bf16)
4322 if (
Op.getValueType() != MVT::i32)
4327 if (Mask->getZExtValue() == 0xffff0000) {
4328 IsExtractHigh =
true;
4329 return Op.getOperand(0);
4338 return Op.getOperand(0);
4347bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4351 SelectVOP3ModsImpl(In, Src, Mods);
4353 bool IsExtractHigh =
false;
4355 Src = Src.getOperand(0);
4356 }
else if (VT == MVT::bf16) {
4364 if (Src.getValueType() != VT &&
4365 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4368 Src = stripBitcast(Src);
4374 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4389 if (Src.getValueSizeInBits() == 16) {
4398 Src.getOperand(0).getValueType() == MVT::i32) {
4399 Src = Src.getOperand(0);
4403 if (Subtarget->useRealTrue16Insts())
4405 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4406 }
else if (IsExtractHigh)
4412bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4415 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4417 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4421bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4424 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4425 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4429bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4432 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4434 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4438bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4441 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4442 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4450 unsigned NumOpcodes = 0;
4463 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4466 if (
C->isAllOnes()) {
4476 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4490 if (Src.size() == 3) {
4496 if (
C->isAllOnes()) {
4498 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4499 if (Src[
I] ==
LHS) {
4511 Bits = SrcBits[Src.size()];
4516 switch (In.getOpcode()) {
4524 if (!getOperandBits(
LHS, LHSBits) ||
4525 !getOperandBits(
RHS, RHSBits)) {
4526 Src = std::move(Backup);
4527 return std::make_pair(0, 0);
4548 uint8_t LHSBitsOrig = LHSBits;
4549 uint8_t RHSBitsOrig = RHSBits;
4553 NumOpcodes += LHSOp.first;
4554 LHSBits = LHSOp.second;
4561 NumOpcodes += RHSOp.first;
4562 RHSBits = RHSOp.second;
4566 auto dependsOnSlot = [](
uint8_t TT,
int Slot) ->
bool {
4567 if (Slot < 0 || Slot > 2)
4569 const uint8_t Masks[3] = {0x0f, 0x33, 0x55};
4570 const int Shifts[3] = {4, 2, 1};
4571 return ((TT ^ (TT >> Shifts[Slot])) & Masks[Slot]) != 0;
4577 const uint8_t SrcBitsConst[3] = {0xf0, 0xcc, 0xaa};
4584 NegatedInner =
Op.getOperand(0);
4585 for (
int I = 0;
I < (int)S.size();
I++) {
4586 if (Bits == SrcBitsConst[
I] && S[
I] ==
Op)
4588 if (IsNegationOp && Bits == (
uint8_t)~SrcBitsConst[
I] &&
4589 S[
I] == NegatedInner)
4600 for (
int I = 0;
I < (int)SrcAfterLHS.
size() &&
I < 3;
I++) {
4601 if (
I < (
int)Src.size() && Src[
I] != SrcAfterLHS[
I] &&
4602 dependsOnSlot(LHSBits,
I)) {
4611 if (!Stale && !RHSOp.first) {
4612 int Slot = findSlot(RHSBitsOrig,
RHS, SrcBeforeRecurse);
4614 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4620 if (!Stale && !LHSOp.first) {
4621 int Slot = findSlot(LHSBitsOrig,
LHS, SrcBeforeRecurse);
4623 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4628 Src = std::move(SrcBeforeRecurse);
4629 LHSBits = LHSBitsOrig;
4630 RHSBits = RHSBitsOrig;
4636 return std::make_pair(0, 0);
4640 switch (In.getOpcode()) {
4642 TTbl = LHSBits & RHSBits;
4645 TTbl = LHSBits | RHSBits;
4648 TTbl = LHSBits ^ RHSBits;
4654 return std::make_pair(NumOpcodes + 1, TTbl);
4661 unsigned NumOpcodes;
4663 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4667 if (NumOpcodes < 2 || Src.empty())
4673 if (NumOpcodes < 4 && !In->isDivergent())
4676 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4681 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4682 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4696 while (Src.size() < 3)
4697 Src.push_back(Src[0]);
4703 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4709 return CurDAG->getUNDEF(MVT::i32);
4713 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4718 return CurDAG->getConstant(
4719 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4729bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4730 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4732 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4733 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4736 bool AllUsesAcceptSReg =
true;
4738 Limit < 10 && U !=
E; ++U, ++Limit) {
4739 const TargetRegisterClass *RC =
4740 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4748 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4749 RC != &AMDGPU::VS_64_Align2RegClass) {
4750 AllUsesAcceptSReg =
false;
4751 SDNode *
User =
U->getUser();
4752 if (
User->isMachineOpcode()) {
4753 unsigned Opc =
User->getMachineOpcode();
4754 const MCInstrDesc &
Desc = SII->get(
Opc);
4755 if (
Desc.isCommutable()) {
4756 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4759 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4760 const TargetRegisterClass *CommutedRC =
4761 getOperandRegClass(
U->getUser(), CommutedOpNo);
4762 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4763 CommutedRC == &AMDGPU::VS_64RegClass ||
4764 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4765 AllUsesAcceptSReg =
true;
4773 if (!AllUsesAcceptSReg)
4777 return !AllUsesAcceptSReg && (Limit < 10);
4780bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4782 const MachineMemOperand *MMO = Ld->getMemOperand();
4800 (Subtarget->getScalarizeGlobalBehavior() &&
4804 ->isMemOpHasNoClobberedMemOperand(
N)));
4810 bool IsModified =
false;
4816 while (Position !=
CurDAG->allnodes_end()) {
4823 if (ResNode !=
Node) {
4829 CurDAG->RemoveDeadNodes();
4830 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
bool isSDWAOperand(const SDNode *N) const
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.