29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
557 for (
unsigned i = 0; i < NOps; i++) {
565 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
566 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
568 if (NOps != NumVectorElts) {
573 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
576 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
577 RegSeqArgs[1 + (2 * i) + 1] =
584 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
588 EVT VT =
N->getValueType(0);
592 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
606 Mask[0] < 4 && Mask[1] < 4);
608 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
609 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
610 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
611 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
614 Src0SubReg = Src1SubReg;
616 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
621 Src1SubReg = Src0SubReg;
623 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
633 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
634 Src1SubReg == AMDGPU::sub0) {
650 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
651 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
654 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
655 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
665 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
667 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
670 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
671 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
672 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
673 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
677 unsigned int Opc =
N->getOpcode();
678 if (
N->isMachineOpcode()) {
686 N = glueCopyToM0LDSInit(
N);
701 if (
N->getValueType(0) != MVT::i64)
704 SelectADD_SUB_I64(
N);
709 if (
N->getValueType(0) != MVT::i32)
716 SelectUADDO_USUBO(
N);
719 case AMDGPUISD::FMUL_W_CHAIN: {
720 SelectFMUL_W_CHAIN(
N);
723 case AMDGPUISD::FMA_W_CHAIN: {
724 SelectFMA_W_CHAIN(
N);
730 EVT VT =
N->getValueType(0);
747 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
759 if (
N->getValueType(0) == MVT::i128) {
760 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
761 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
762 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
763 }
else if (
N->getValueType(0) == MVT::i64) {
764 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
765 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
766 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
770 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
771 N->getOperand(1), SubReg1 };
773 N->getValueType(0),
Ops));
779 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
780 Subtarget->has64BitLiterals())
785 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
790 Imm =
C->getZExtValue();
799 case AMDGPUISD::BFE_I32:
800 case AMDGPUISD::BFE_U32: {
826 case AMDGPUISD::DIV_SCALE: {
837 return SelectMUL_LOHI(
N);
848 if (
N->getValueType(0) != MVT::i32)
859 case AMDGPUISD::CVT_PKRTZ_F16_F32:
860 case AMDGPUISD::CVT_PKNORM_I16_F32:
861 case AMDGPUISD::CVT_PKNORM_U16_F32:
862 case AMDGPUISD::CVT_PK_U16_U32:
863 case AMDGPUISD::CVT_PK_I16_I32: {
865 if (
N->getValueType(0) == MVT::i32) {
866 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
868 { N->getOperand(0), N->getOperand(1) });
876 SelectINTRINSIC_W_CHAIN(
N);
880 SelectINTRINSIC_WO_CHAIN(
N);
884 SelectINTRINSIC_VOID(
N);
888 SelectWAVE_ADDRESS(
N);
892 SelectSTACKRESTORE(
N);
901 if (!Subtarget->hasSDWA())
911 return RHS->getZExtValue() == 0xFF || RHS->getZExtValue() == 0xFFFF;
915 return (RHS->getZExtValue() % 8) == 0;
920bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
923 return Term->getMetadata(
"amdgpu.uniform") ||
924 Term->getMetadata(
"structurizecfg.uniform");
927bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
928 unsigned ShAmtBits)
const {
931 const APInt &
RHS =
N->getConstantOperandAPInt(1);
932 if (
RHS.countr_one() >= ShAmtBits)
962 N1 =
Lo.getOperand(1);
972 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
987 return "AMDGPU DAG->DAG Pattern Instruction Selection";
997#ifdef EXPENSIVE_CHECKS
1003 for (
auto &L : LI.getLoopsInPreorder())
1004 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1026 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1028 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1042SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1044 SDNode *Mov =
CurDAG->getMachineNode(
1045 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1046 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1051void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1056 unsigned Opcode =
N->getOpcode();
1065 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1066 DL, MVT::i32,
LHS, Sub0);
1067 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1068 DL, MVT::i32,
LHS, Sub1);
1070 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1071 DL, MVT::i32,
RHS, Sub0);
1072 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1073 DL, MVT::i32,
RHS, Sub1);
1075 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1077 static const unsigned OpcMap[2][2][2] = {
1078 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1079 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1080 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1081 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1083 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1084 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1087 if (!ConsumeCarry) {
1089 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1092 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1099 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1102 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1109 MVT::i64, RegSequenceArgs);
1120void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1125 if (
N->isDivergent()) {
1127 : AMDGPU::V_SUBB_U32_e64;
1129 N,
Opc,
N->getVTList(),
1131 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1134 : AMDGPU::S_SUB_CO_PSEUDO;
1135 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1139void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1144 bool IsVALU =
N->isDivergent();
1146 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1148 if (UI.getUse().getResNo() == 1) {
1149 if (UI->isMachineOpcode()) {
1150 if (UI->getMachineOpcode() !=
1151 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1164 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1167 N,
Opc,
N->getVTList(),
1168 {N->getOperand(0), N->getOperand(1),
1169 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1171 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1173 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1174 {N->getOperand(0), N->getOperand(1)});
1178void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1182 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1183 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1184 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1185 Ops[8] =
N->getOperand(0);
1186 Ops[9] =
N->getOperand(4);
1190 bool UseFMAC = Subtarget->hasDLInsts() &&
1194 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1195 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1198void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1202 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1203 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1204 Ops[6] =
N->getOperand(0);
1205 Ops[7] =
N->getOperand(3);
1207 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1212void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1213 EVT VT =
N->getValueType(0);
1215 assert(VT == MVT::f32 || VT == MVT::f64);
1218 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1223 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1224 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1225 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1231void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1235 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && !
N->hasAnyUseOfValue(1);
1236 if (Subtarget->hasMADIntraFwdBug())
1237 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1238 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1239 else if (UseNoCarry)
1240 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1242 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1245 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1249 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1260void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1265 if (Subtarget->hasMadU64U32NoCarry()) {
1266 VTList =
CurDAG->getVTList(MVT::i64);
1267 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1269 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1270 if (Subtarget->hasMADIntraFwdBug()) {
1271 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1272 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1274 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1281 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1283 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1284 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1285 MVT::i32,
SDValue(Mad, 0), Sub0);
1289 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1290 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1291 MVT::i32,
SDValue(Mad, 0), Sub1);
1301 if (!
Base || Subtarget->hasUsableDSOffset() ||
1302 Subtarget->unsafeDSOffsetFoldingEnabled())
1313 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1326 int64_t ByteOffset =
C->getSExtValue();
1327 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1336 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1342 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1343 if (Subtarget->hasAddNoCarryInsts()) {
1344 SubOp = AMDGPU::V_SUB_U32_e64;
1346 CurDAG->getTargetConstant(0, {}, MVT::i1));
1349 MachineSDNode *MachineSub =
1350 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1366 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1368 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1369 DL, MVT::i32, Zero);
1371 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1378 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1382bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1384 unsigned Size)
const {
1385 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1390 if (!
Base || Subtarget->hasUsableDSOffset() ||
1391 Subtarget->unsafeDSOffsetFoldingEnabled())
1409bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1415 if (Subtarget->hasSignedScratchOffsets())
1425 ConstantSDNode *ImmOp =
nullptr;
1436bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1442 if (Subtarget->hasSignedScratchOffsets())
1452bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1466 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1469 auto LHS =
Base.getOperand(0);
1470 auto RHS =
Base.getOperand(1);
1478 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1484 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1489 unsigned Size)
const {
1492 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1497 unsigned OffsetValue1 = OffsetValue0 +
Size;
1500 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1502 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1503 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1508 if (
const ConstantSDNode *
C =
1510 unsigned OffsetValue0 =
C->getZExtValue();
1511 unsigned OffsetValue1 = OffsetValue0 +
Size;
1513 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1523 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1527 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1528 if (Subtarget->hasAddNoCarryInsts()) {
1529 SubOp = AMDGPU::V_SUB_U32_e64;
1531 CurDAG->getTargetConstant(0, {}, MVT::i1));
1534 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1539 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1541 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1547 unsigned OffsetValue0 = CAddr->getZExtValue();
1548 unsigned OffsetValue1 = OffsetValue0 +
Size;
1550 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1552 MachineSDNode *MovZero =
1553 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1555 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1556 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1564 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1565 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1575 if (Subtarget->useFlatForGlobal())
1580 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1581 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1582 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1583 SOffset = Subtarget->hasRestrictedSOffset()
1584 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1585 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1587 ConstantSDNode *C1 =
nullptr;
1589 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1602 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1608 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1624 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1626 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1630 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1640 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1651 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1657bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1660 SDValue Ptr, Offen, Idxen, Addr64;
1664 if (!Subtarget->hasAddr64())
1667 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1671 if (
C->getSExtValue()) {
1684std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1689 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1695 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1698bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1704 MachineFunction &
MF =
CurDAG->getMachineFunction();
1705 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1707 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1710 int64_t
Imm = CAddr->getSExtValue();
1711 const int64_t NullPtr =
1714 if (Imm != NullPtr) {
1717 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1718 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1719 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1720 VAddr =
SDValue(MovHighBits, 0);
1722 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1723 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1728 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1749 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1750 if (
TII->isLegalMUBUFImmOffset(C1) &&
1751 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1752 CurDAG->SignBitIsZero(N0))) {
1753 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1754 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1760 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1761 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1769 if (!
Reg.isPhysical())
1771 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1772 return RC &&
TRI.isSGPRClass(RC);
1775bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1780 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1781 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1782 MachineFunction &
MF =
CurDAG->getMachineFunction();
1783 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1788 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1794 ConstantSDNode *CAddr;
1807 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1812 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1818bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1821 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1822 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1824 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1830 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1843bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1845 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1846 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1850 SOffset = ByteOffsetNode;
1868bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
SDNode *
N,
SDValue Addr,
1870 uint64_t FlatVariant)
const {
1871 int64_t OffsetVal = 0;
1875 bool CanHaveFlatSegmentOffsetBug =
1876 Subtarget->hasFlatSegmentOffsetBug() &&
1880 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1882 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1884 isFlatScratchBaseLegal(Addr))) {
1893 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1894 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1896 OffsetVal = COffsetVal;
1909 uint64_t RemainderOffset;
1911 std::tie(OffsetVal, RemainderOffset) =
1912 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1915 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1922 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1923 if (Subtarget->hasAddNoCarryInsts()) {
1924 AddOp = AMDGPU::V_ADD_U32_e64;
1933 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1935 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1937 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1938 DL, MVT::i32, N0, Sub0);
1939 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1940 DL, MVT::i32, N0, Sub1);
1943 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1945 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1948 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1949 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1951 SDNode *Addc =
CurDAG->getMachineNode(
1952 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1956 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1961 MVT::i64, RegSequenceArgs),
1970 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1974bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1980bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1986bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1989 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1997 if (
Op.getValueType() == MVT::i32)
2012bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2015 bool NeedIOffset)
const {
2016 int64_t ImmOffset = 0;
2017 ScaleOffset =
false;
2023 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2025 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2031 ImmOffset = COffsetVal;
2032 }
else if (!
LHS->isDivergent()) {
2033 if (COffsetVal > 0) {
2038 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2040 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2044 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2046 SDNode *VMov =
CurDAG->getMachineNode(
2047 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2048 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2051 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2061 unsigned NumLiterals =
2062 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2063 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2064 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2073 if (!
LHS->isDivergent()) {
2076 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2078 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2085 if (!SAddr && !
RHS->isDivergent()) {
2087 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2089 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2096 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2101 if (Subtarget->hasScaleOffset() &&
2102 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2117 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2130 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2131 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2133 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2137bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2142 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2150bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2155 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2160 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2166bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2172 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2177 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2183bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2188 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2192 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2196bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2202 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2208 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2214bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2220 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2241 FI->getValueType(0));
2251bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2259 int64_t COffsetVal = 0;
2261 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2270 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2274 int64_t SplitImmOffset, RemainderOffset;
2275 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2278 COffsetVal = SplitImmOffset;
2282 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2283 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2284 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2289 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2295bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2297 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2303 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2310 return (VMax & 3) + (
SMax & 3) >= 4;
2313bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2317 int64_t ImmOffset = 0;
2321 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2323 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2328 ImmOffset = COffsetVal;
2329 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2333 int64_t SplitImmOffset, RemainderOffset;
2334 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2338 SDNode *VMov =
CurDAG->getMachineNode(
2339 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2340 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2343 if (!isFlatScratchBaseLegal(Addr))
2345 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2347 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2348 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2360 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2363 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2370 if (OrigAddr != Addr) {
2371 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2374 if (!isFlatScratchBaseLegalSV(OrigAddr))
2378 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2381 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2383 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2392bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2395 int64_t ImmOffset)
const {
2396 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2398 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2410 bool IsSigned)
const {
2411 bool ScaleOffset =
false;
2412 if (!Subtarget->hasScaleOffset() || !
Offset)
2426 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2427 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2428 (
Offset.isMachineOpcode() &&
2429 Offset.getMachineOpcode() ==
2430 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2431 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2433 ScaleOffset =
C->getZExtValue() ==
Size;
2445bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2447 bool Imm32Only,
bool IsBuffer,
2448 bool HasSOffset, int64_t ImmOffset,
2449 bool *ScaleOffset)
const {
2451 "Cannot match both soffset and offset at the same time!");
2456 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2466 *SOffset = ByteOffsetNode;
2467 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2473 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2480 SDLoc SL(ByteOffsetNode);
2484 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2486 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2487 if (EncodedOffset &&
Offset && !Imm32Only) {
2488 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2497 if (EncodedOffset &&
Offset && Imm32Only) {
2498 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2506 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2508 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2515SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2522 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2523 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2524 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2525 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2528 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2530 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2531 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2533 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2536 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2543bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2546 bool IsBuffer,
bool HasSOffset,
2548 bool *ScaleOffset)
const {
2550 assert(!Imm32Only && !IsBuffer);
2553 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2558 ImmOff =
C->getSExtValue();
2560 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2561 true, ImmOff, ScaleOffset);
2581 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2582 ImmOffset, ScaleOffset)) {
2586 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2587 ImmOffset, ScaleOffset)) {
2596 bool Imm32Only,
bool *ScaleOffset)
const {
2597 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2600 SBase = Expand32BitAddress(SBase);
2605 SBase = Expand32BitAddress(Addr);
2606 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2613bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2615 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2619bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2622 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2629 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2630 false, &ScaleOffset))
2634 SDLoc(
N), MVT::i32);
2638bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2643 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2647 SDLoc(
N), MVT::i32);
2652 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2656bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2659 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2663bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2667 return N.getValueType() == MVT::i32 &&
2668 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2673bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2678 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2703SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2707 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2711 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2713 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2717 uint32_t PackedVal =
Offset | (Width << 16);
2718 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2720 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2723void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2728 const SDValue &Shl =
N->getOperand(0);
2733 uint32_t BVal =
B->getZExtValue();
2734 uint32_t CVal =
C->getZExtValue();
2736 if (0 < BVal && BVal <= CVal && CVal < 32) {
2746void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2747 switch (
N->getOpcode()) {
2749 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2752 const SDValue &Srl =
N->getOperand(0);
2756 if (Shift && Mask) {
2758 uint32_t MaskVal =
Mask->getZExtValue();
2770 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2777 if (Shift && Mask) {
2779 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2788 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2789 SelectS_BFEFromShifts(
N);
2794 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2795 SelectS_BFEFromShifts(
N);
2810 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2820bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2822 if (!
N->hasOneUse())
2832 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2836 if (VT == MVT::i64) {
2839 Subtarget->hasScalarCompareEq64();
2842 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2875void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2878 if (
Cond.isUndef()) {
2879 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2880 N->getOperand(2),
N->getOperand(0));
2884 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2886 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2887 bool AndExec = !UseSCCBr;
2888 bool Negate =
false;
2891 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2906 bool NegatedBallot =
false;
2909 UseSCCBr = !BallotCond->isDivergent();
2910 Negate = Negate ^ NegatedBallot;
2925 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2926 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2927 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2946 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2948 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2956 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2961void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2962 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2963 !
N->isDivergent()) {
2965 if (Src.getValueType() == MVT::f16) {
2967 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2977void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2980 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2981 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2986 MachineMemOperand *MMO =
M->getMemOperand();
2990 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
2995 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
2996 N = glueCopyToM0(
N, PtrBase);
2997 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
3002 N = glueCopyToM0(
N, Ptr);
3003 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
3008 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
3013 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3019void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3022 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3023 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3024 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3026 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3027 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3029 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3030 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3033 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3034 N->getOperand(5),
N->getOperand(0)};
3037 MachineMemOperand *MMO =
M->getMemOperand();
3038 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3042void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3043 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3045 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3057 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3058 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3070 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3075 case Intrinsic::amdgcn_ds_gws_init:
3076 return AMDGPU::DS_GWS_INIT;
3077 case Intrinsic::amdgcn_ds_gws_barrier:
3078 return AMDGPU::DS_GWS_BARRIER;
3079 case Intrinsic::amdgcn_ds_gws_sema_v:
3080 return AMDGPU::DS_GWS_SEMA_V;
3081 case Intrinsic::amdgcn_ds_gws_sema_br:
3082 return AMDGPU::DS_GWS_SEMA_BR;
3083 case Intrinsic::amdgcn_ds_gws_sema_p:
3084 return AMDGPU::DS_GWS_SEMA_P;
3085 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3086 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3092void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3093 if (!Subtarget->hasGWS() ||
3094 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3095 !Subtarget->hasGWSSemaReleaseAll())) {
3102 const bool HasVSrc =
N->getNumOperands() == 4;
3103 assert(HasVSrc ||
N->getNumOperands() == 3);
3106 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3109 MachineMemOperand *MMO =
M->getMemOperand();
3122 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3123 ImmOffset = ConstOffset->getZExtValue();
3125 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3134 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3138 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3140 CurDAG->getTargetConstant(16, SL, MVT::i32));
3141 glueCopyToM0(
N,
SDValue(M0Base, 0));
3145 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3149 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3150 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3152 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3156 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3159 MVT DataVT =
Data.getValueType().getSimpleVT();
3160 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3162 Ops.push_back(
N->getOperand(2));
3168 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3170 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3172 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3175 SL, MVT::v2i32, RegSeqOps),
3180 Ops.push_back(OffsetField);
3181 Ops.push_back(Chain);
3183 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3187void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3188 if (Subtarget->getLDSBankCount() != 16) {
3218 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3221 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3222 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3228 SDNode *InterpP1LV =
3229 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3230 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3234 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3237 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3238 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3245void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3246 unsigned IntrID =
N->getConstantOperandVal(1);
3248 case Intrinsic::amdgcn_ds_append:
3249 case Intrinsic::amdgcn_ds_consume: {
3250 if (
N->getValueType(0) != MVT::i32)
3252 SelectDSAppendConsume(
N, IntrID);
3255 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3256 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3257 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3258 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3259 SelectDSBvhStackIntrinsic(
N, IntrID);
3261 case Intrinsic::amdgcn_init_whole_wave:
3262 CurDAG->getMachineFunction()
3263 .getInfo<SIMachineFunctionInfo>()
3264 ->setInitWholeWave();
3271void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3272 unsigned IntrID =
N->getConstantOperandVal(0);
3273 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3274 SDNode *ConvGlueNode =
N->getGluedNode();
3280 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3281 MVT::Glue,
SDValue(ConvGlueNode, 0));
3283 ConvGlueNode =
nullptr;
3286 case Intrinsic::amdgcn_wqm:
3287 Opcode = AMDGPU::WQM;
3289 case Intrinsic::amdgcn_softwqm:
3290 Opcode = AMDGPU::SOFT_WQM;
3292 case Intrinsic::amdgcn_wwm:
3293 case Intrinsic::amdgcn_strict_wwm:
3294 Opcode = AMDGPU::STRICT_WWM;
3296 case Intrinsic::amdgcn_strict_wqm:
3297 Opcode = AMDGPU::STRICT_WQM;
3299 case Intrinsic::amdgcn_interp_p1_f16:
3300 SelectInterpP1F16(
N);
3302 case Intrinsic::amdgcn_permlane16_swap:
3303 case Intrinsic::amdgcn_permlane32_swap: {
3304 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3305 !Subtarget->hasPermlane16Swap()) ||
3306 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3307 !Subtarget->hasPermlane32Swap())) {
3312 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3313 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3314 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3318 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3320 bool FI =
N->getConstantOperandVal(3);
3321 NewOps[2] =
CurDAG->getTargetConstant(
3324 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3332 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3334 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3339 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3340 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3344void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3345 unsigned IntrID =
N->getConstantOperandVal(1);
3347 case Intrinsic::amdgcn_ds_gws_init:
3348 case Intrinsic::amdgcn_ds_gws_barrier:
3349 case Intrinsic::amdgcn_ds_gws_sema_v:
3350 case Intrinsic::amdgcn_ds_gws_sema_br:
3351 case Intrinsic::amdgcn_ds_gws_sema_p:
3352 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3353 SelectDS_GWS(
N, IntrID);
3355 case Intrinsic::amdgcn_tensor_load_to_lds:
3356 case Intrinsic::amdgcn_tensor_store_from_lds:
3357 SelectTensorLoadStore(
N, IntrID);
3366void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3368 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3369 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3370 {N->getOperand(0), Log2WaveSize});
3373void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3388 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3390 if (
N->isDivergent()) {
3391 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3396 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3397 {SrcVal, Log2WaveSize}),
3401 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3405bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3407 bool IsCanonicalizing,
3408 bool AllowAbs)
const {
3414 Src = Src.getOperand(0);
3415 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3419 if (
LHS &&
LHS->isZero()) {
3421 Src = Src.getOperand(1);
3425 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3427 Src = Src.getOperand(0);
3440 if (IsCanonicalizing)
3455 EVT VT = Src.getValueType();
3457 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3464 auto ReplaceSrc = [&]() ->
SDValue {
3466 return Src.getOperand(0);
3471 Src.getValueType(),
LHS, Index);
3497 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3499 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3506bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3509 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3511 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3518bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3521 if (SelectVOP3ModsImpl(In, Src, Mods,
3524 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3531bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3539bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3543 if (SelectVOP3ModsImpl(In, Src, Mods,
3548 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3555bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3557 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3560bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3562 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3565bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3569 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3570 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3572 return SelectVOP3Mods(In, Src, SrcMods);
3575bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3579 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3580 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3582 return SelectVOP3BMods(In, Src, SrcMods);
3585bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3590 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3591 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3596bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3597 SDValue &SrcMods,
bool IsDOT)
const {
3604 Src = Src.getOperand(0);
3608 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3609 unsigned VecMods = Mods;
3611 SDValue Lo = stripBitcast(Src.getOperand(0));
3612 SDValue Hi = stripBitcast(Src.getOperand(1));
3615 Lo = stripBitcast(
Lo.getOperand(0));
3620 Hi = stripBitcast(
Hi.getOperand(0));
3630 unsigned VecSize = Src.getValueSizeInBits();
3631 Lo = stripExtractLoElt(
Lo);
3632 Hi = stripExtractLoElt(
Hi);
3634 if (
Lo.getValueSizeInBits() > VecSize) {
3635 Lo =
CurDAG->getTargetExtractSubreg(
3636 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3640 if (
Hi.getValueSizeInBits() > VecSize) {
3641 Hi =
CurDAG->getTargetExtractSubreg(
3642 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3646 assert(
Lo.getValueSizeInBits() <= VecSize &&
3647 Hi.getValueSizeInBits() <= VecSize);
3649 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3653 if (VecSize ==
Lo.getValueSizeInBits()) {
3655 }
else if (VecSize == 32) {
3656 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3658 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3662 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3663 Lo.getValueType()), 0);
3664 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3665 : AMDGPU::SReg_64RegClassID;
3667 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3668 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3669 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3671 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3672 Src.getValueType(),
Ops), 0);
3674 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3680 .bitcastToAPInt().getZExtValue();
3682 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3683 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3690 Src.getNumOperands() == 2) {
3696 ArrayRef<int>
Mask = SVN->getMask();
3698 if (Mask[0] < 2 && Mask[1] < 2) {
3700 SDValue ShuffleSrc = SVN->getOperand(0);
3713 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3721 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3725bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3727 return SelectVOP3PMods(In, Src, SrcMods,
true);
3730bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3732 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3741bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3743 SelectVOP3Mods(In, Src, SrcMods);
3746 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3750bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3752 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3761bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3764 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3767 unsigned SrcVal =
C->getZExtValue();
3771 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3778 unsigned DstRegClass;
3780 switch (Elts.
size()) {
3782 DstRegClass = AMDGPU::VReg_256RegClassID;
3786 DstRegClass = AMDGPU::VReg_128RegClassID;
3790 DstRegClass = AMDGPU::VReg_64RegClassID;
3798 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3799 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3800 Ops.push_back(Elts[i]);
3801 Ops.push_back(
CurDAG->getTargetConstant(
3804 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3811 assert(
"unhandled Reg sequence size" &&
3812 (Elts.
size() == 8 || Elts.
size() == 16));
3816 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3817 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3822 if (Subtarget->useRealTrue16Insts()) {
3827 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3830 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3831 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3832 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3833 MVT::i32, {Elts[i + 1],
Undef},
3834 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3836 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3838 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3839 {Elts[i + 1], Elts[i], PackLoLo});
3843 return buildRegSequence32(PackedElts,
DL);
3849 unsigned ElementSize)
const {
3850 if (ElementSize == 16)
3851 return buildRegSequence16(Elts,
DL);
3852 if (ElementSize == 32)
3853 return buildRegSequence32(Elts,
DL);
3857void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3861 unsigned ElementSize)
const {
3866 for (
auto El : Elts) {
3869 NegAbsElts.
push_back(El->getOperand(0));
3871 if (Elts.size() != NegAbsElts.
size()) {
3873 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3877 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3883 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3891 std::function<
bool(
SDValue)> ModifierCheck) {
3895 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3896 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3897 if (!ModifierCheck(ElF16))
3904bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3922 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
3941 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
3947 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3951bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3962 if (EltsF16.
empty())
3972 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
3982 if (EltsV2F16.
empty())
3991 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
3994 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3998bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
4008 unsigned ModOpcode =
4019 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4022 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4026bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4028 BitVector UndefElements;
4030 if (isInlineImmediate(
Splat.getNode())) {
4032 unsigned Imm =
C->getAPIntValue().getSExtValue();
4033 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4037 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4038 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4046 SDValue SplatSrc32 = stripBitcast(In);
4048 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4049 SDValue SplatSrc16 = stripBitcast(Splat32);
4052 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4053 std::optional<APInt> RawValue;
4055 RawValue =
C->getValueAPF().bitcastToAPInt();
4057 RawValue =
C->getAPIntValue();
4059 if (RawValue.has_value()) {
4060 EVT VT =
In.getValueType().getScalarType();
4066 if (
TII->isInlineConstant(FloatVal)) {
4067 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4072 if (
TII->isInlineConstant(RawValue.value())) {
4073 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4086bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4092 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4101 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4105bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4111 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4120 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4124bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4132 const SDValue &ExtendSrc =
In.getOperand(0);
4136 const SDValue &CastSrc =
In.getOperand(0);
4140 if (Zero &&
Zero->getZExtValue() == 0)
4151 Src = ExtractVecEltSrc;
4155 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4159bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4163 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4167bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4170 return SelectVOP3Mods(In, Src, SrcMods);
4182 Op =
Op.getOperand(0);
4184 IsExtractHigh =
false;
4187 if (!Low16 || !Low16->isZero())
4189 Op = stripBitcast(
Op.getOperand(1));
4190 if (
Op.getValueType() != MVT::bf16)
4195 if (
Op.getValueType() != MVT::i32)
4200 if (Mask->getZExtValue() == 0xffff0000) {
4201 IsExtractHigh =
true;
4202 return Op.getOperand(0);
4211 return Op.getOperand(0);
4220bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4224 SelectVOP3ModsImpl(In, Src, Mods);
4226 bool IsExtractHigh =
false;
4228 Src = Src.getOperand(0);
4229 }
else if (VT == MVT::bf16) {
4237 if (Src.getValueType() != VT &&
4238 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4241 Src = stripBitcast(Src);
4247 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4262 if (Src.getValueSizeInBits() == 16) {
4271 Src.getOperand(0).getValueType() == MVT::i32) {
4272 Src = Src.getOperand(0);
4276 if (Subtarget->useRealTrue16Insts())
4278 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4279 }
else if (IsExtractHigh)
4285bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4288 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4290 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4294bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4297 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4298 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4302bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4305 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4307 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4311bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4314 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4315 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4323 unsigned NumOpcodes = 0;
4336 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4339 if (
C->isAllOnes()) {
4349 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4363 if (Src.size() == 3) {
4369 if (
C->isAllOnes()) {
4371 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4372 if (Src[
I] ==
LHS) {
4384 Bits = SrcBits[Src.size()];
4389 switch (In.getOpcode()) {
4397 if (!getOperandBits(
LHS, LHSBits) ||
4398 !getOperandBits(
RHS, RHSBits)) {
4399 Src = std::move(Backup);
4400 return std::make_pair(0, 0);
4406 NumOpcodes +=
Op.first;
4407 LHSBits =
Op.second;
4412 NumOpcodes +=
Op.first;
4413 RHSBits =
Op.second;
4418 return std::make_pair(0, 0);
4422 switch (In.getOpcode()) {
4424 TTbl = LHSBits & RHSBits;
4427 TTbl = LHSBits | RHSBits;
4430 TTbl = LHSBits ^ RHSBits;
4436 return std::make_pair(NumOpcodes + 1, TTbl);
4443 unsigned NumOpcodes;
4445 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4449 if (NumOpcodes < 2 || Src.empty())
4455 if (NumOpcodes < 4 && !In->isDivergent())
4458 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4463 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4464 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4478 while (Src.size() < 3)
4479 Src.push_back(Src[0]);
4485 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4491 return CurDAG->getUNDEF(MVT::i32);
4495 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4500 return CurDAG->getConstant(
4501 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4511bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4512 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4514 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4515 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4518 bool AllUsesAcceptSReg =
true;
4520 Limit < 10 && U !=
E; ++U, ++Limit) {
4521 const TargetRegisterClass *RC =
4522 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4530 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4531 RC != &AMDGPU::VS_64_Align2RegClass) {
4532 AllUsesAcceptSReg =
false;
4533 SDNode *
User =
U->getUser();
4534 if (
User->isMachineOpcode()) {
4535 unsigned Opc =
User->getMachineOpcode();
4536 const MCInstrDesc &
Desc = SII->get(
Opc);
4537 if (
Desc.isCommutable()) {
4538 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4541 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4542 const TargetRegisterClass *CommutedRC =
4543 getOperandRegClass(
U->getUser(), CommutedOpNo);
4544 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4545 CommutedRC == &AMDGPU::VS_64RegClass ||
4546 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4547 AllUsesAcceptSReg =
true;
4555 if (!AllUsesAcceptSReg)
4559 return !AllUsesAcceptSReg && (Limit < 10);
4562bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4564 const MachineMemOperand *MMO = Ld->getMemOperand();
4582 (Subtarget->getScalarizeGlobalBehavior() &&
4586 ->isMemOpHasNoClobberedMemOperand(
N)));
4592 bool IsModified =
false;
4598 while (Position !=
CurDAG->allnodes_end()) {
4605 if (ResNode !=
Node) {
4611 CurDAG->RemoveDeadNodes();
4612 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
bool isSDWAOperand(const SDNode *N) const
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.