33#include "llvm/IR/IntrinsicsAMDGPU.h" 
   40#define DEBUG_TYPE "si-instr-info" 
   42#define GET_INSTRINFO_CTOR_DTOR 
   43#include "AMDGPUGenInstrInfo.inc" 
   46#define GET_D16ImageDimIntrinsics_IMPL 
   47#define GET_ImageDimIntrinsicTable_IMPL 
   48#define GET_RsrcIntrinsics_IMPL 
   49#include "AMDGPUGenSearchableTables.inc" 
   57                 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
 
   60  "amdgpu-fix-16-bit-physreg-copies",
 
   61  cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
 
   76  unsigned N = 
Node->getNumOperands();
 
   77  while (
N && 
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
 
 
   89  int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, 
OpName);
 
   90  int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, 
OpName);
 
   92  if (Op0Idx == -1 && Op1Idx == -1)
 
   96  if ((Op0Idx == -1 && Op1Idx != -1) ||
 
   97      (Op1Idx == -1 && Op0Idx != -1))
 
 
  118    return !
MI.memoperands_empty() &&
 
  120             return MMO->isLoad() && MMO->isInvariant();
 
 
  142    if (!
MI.hasImplicitDef() &&
 
  143        MI.getNumImplicitOperands() == 
MI.getDesc().implicit_uses().size() &&
 
  144        !
MI.mayRaiseFPException())
 
 
  152bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
 const {
 
  155  if (
MI.isCompare()) {
 
  166      switch (
Use.getOpcode()) {
 
  167      case AMDGPU::S_AND_SAVEEXEC_B32:
 
  168      case AMDGPU::S_AND_SAVEEXEC_B64:
 
  170      case AMDGPU::S_AND_B32:
 
  171      case AMDGPU::S_AND_B64:
 
  172        if (!
Use.readsRegister(AMDGPU::EXEC, 
nullptr))
 
  182  switch (
MI.getOpcode()) {
 
  185  case AMDGPU::V_READFIRSTLANE_B32:
 
  202  if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
 
  207  for (
auto Op : 
MI.uses()) {
 
  208    if (
Op.isReg() && 
Op.getReg().isVirtual() &&
 
  209        RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
 
  214      if (FromCycle == 
nullptr)
 
  220      while (FromCycle && !FromCycle->
contains(ToCycle)) {
 
 
  240                                          int64_t &Offset1)
 const {
 
  248  if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
 
  252  if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
 
  268    int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
 
  269    int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
 
  270    if (Offset0Idx == -1 || Offset1Idx == -1)
 
  277    Offset0Idx -= 
get(Opc0).NumDefs;
 
  278    Offset1Idx -= 
get(Opc1).NumDefs;
 
  308    if (!Load0Offset || !Load1Offset)
 
  325    int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
 
  326    int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
 
  328    if (OffIdx0 == -1 || OffIdx1 == -1)
 
  334    OffIdx0 -= 
get(Opc0).NumDefs;
 
  335    OffIdx1 -= 
get(Opc1).NumDefs;
 
 
  354  case AMDGPU::DS_READ2ST64_B32:
 
  355  case AMDGPU::DS_READ2ST64_B64:
 
  356  case AMDGPU::DS_WRITE2ST64_B32:
 
  357  case AMDGPU::DS_WRITE2ST64_B64:
 
 
  372  OffsetIsScalable = 
false;
 
  389      DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
 
  391        DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
 
  392      if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
 
  405      unsigned Offset0 = Offset0Op->
getImm() & 0xff;
 
  406      unsigned Offset1 = Offset1Op->
getImm() & 0xff;
 
  407      if (Offset0 + 1 != Offset1)
 
  418        int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
 
  426      Offset = EltSize * Offset0;
 
  428      DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
 
  429      if (DataOpIdx == -1) {
 
  430        DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
 
  432        DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
 
  448    if (BaseOp && !BaseOp->
isFI())
 
  456      if (SOffset->
isReg())
 
  462    DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
 
  464      DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
 
  473        isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
 
  474    int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
 
  476    int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
 
  477    if (VAddr0Idx >= 0) {
 
  479      for (
int I = VAddr0Idx; 
I < SRsrcIdx; ++
I)
 
  486    DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
 
  501    DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
 
  518    DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
 
  520      DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
 
 
  537  if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
 
  545  if (MO1->getAddrSpace() != MO2->getAddrSpace())
 
  548  const auto *Base1 = MO1->getValue();
 
  549  const auto *Base2 = MO2->getValue();
 
  550  if (!Base1 || !Base2)
 
  558  return Base1 == Base2;
 
 
  562                                      int64_t Offset1, 
bool OffsetIsScalable1,
 
  564                                      int64_t Offset2, 
bool OffsetIsScalable2,
 
  565                                      unsigned ClusterSize,
 
  566                                      unsigned NumBytes)
 const {
 
  579  } 
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
 
  598  const unsigned LoadSize = NumBytes / ClusterSize;
 
  599  const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
 
  600  return NumDWords <= MaxMemoryClusterDWords;
 
 
  614                                          int64_t Offset0, int64_t Offset1,
 
  615                                          unsigned NumLoads)
 const {
 
  616  assert(Offset1 > Offset0 &&
 
  617         "Second offset should be larger than first offset!");
 
  622  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
 
 
  629                              const char *Msg = 
"illegal VGPR to SGPR copy") {
 
 
  650  assert((
TII.getSubtarget().hasMAIInsts() &&
 
  651          !
TII.getSubtarget().hasGFX90AInsts()) &&
 
  652         "Expected GFX908 subtarget.");
 
  655          AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
 
  656         "Source register of the copy should be either an SGPR or an AGPR.");
 
  659         "Destination register of the copy should be an AGPR.");
 
  668    for (
auto Def = 
MI, 
E = 
MBB.begin(); Def != 
E; ) {
 
  671      if (!Def->modifiesRegister(SrcReg, &RI))
 
  674      if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
 
  675          Def->getOperand(0).getReg() != SrcReg)
 
  682        bool SafeToPropagate = 
true;
 
  685        for (
auto I = Def; 
I != 
MI && SafeToPropagate; ++
I)
 
  686          if (
I->modifiesRegister(DefOp.
getReg(), &RI))
 
  687            SafeToPropagate = 
false;
 
  689        if (!SafeToPropagate)
 
  692        for (
auto I = Def; 
I != 
MI; ++
I)
 
  693          I->clearRegisterKills(DefOp.
getReg(), &RI);
 
  702      if (ImpUseSuperReg) {
 
  703        Builder.addReg(ImpUseSuperReg,
 
  711  RS.enterBasicBlockEnd(
MBB);
 
  712  RS.backward(std::next(
MI));
 
  721  unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
 
  724  assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
 
  725         "VGPR used for an intermediate copy should have been reserved.");
 
  730    Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, 
MI,
 
  740  unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
 
  741  if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
 
  742    TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
 
  749  if (ImpUseSuperReg) {
 
  750    UseBuilder.
addReg(ImpUseSuperReg,
 
 
  771  for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
 
  772    int16_t SubIdx = BaseIndices[Idx];
 
  773    Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
 
  774    Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
 
  775    assert(DestSubReg && SrcSubReg && 
"Failed to find subregs!");
 
  776    unsigned Opcode = AMDGPU::S_MOV_B32;
 
  779    bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
 
  780    bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
 
  781    if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
 
  785      DestSubReg = RI.getSubReg(DestReg, SubIdx);
 
  786      SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
 
  787      assert(DestSubReg && SrcSubReg && 
"Failed to find subregs!");
 
  788      Opcode = AMDGPU::S_MOV_B64;
 
  803  assert(FirstMI && LastMI);
 
  811    LastMI->addRegisterKilled(SrcReg, &RI);
 
 
  817                              Register SrcReg, 
bool KillSrc, 
bool RenamableDest,
 
  818                              bool RenamableSrc)
 const {
 
  820  unsigned Size = RI.getRegSizeInBits(*RC);
 
  822  unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
 
  828    if (((
Size == 16) != (SrcSize == 16))) {
 
  830      assert(ST.useRealTrue16Insts());
 
  835      if (DestReg == SrcReg) {
 
  841      RC = RI.getPhysRegBaseClass(DestReg);
 
  842      Size = RI.getRegSizeInBits(*RC);
 
  843      SrcRC = RI.getPhysRegBaseClass(SrcReg);
 
  844      SrcSize = RI.getRegSizeInBits(*SrcRC);
 
  848  if (RC == &AMDGPU::VGPR_32RegClass) {
 
  850           AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
 
  851           AMDGPU::AGPR_32RegClass.
contains(SrcReg));
 
  852    unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
 
  853                     AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
 
  859  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
 
  860      RC == &AMDGPU::SReg_32RegClass) {
 
  861    if (SrcReg == AMDGPU::SCC) {
 
  868    if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
 
  869      if (DestReg == AMDGPU::VCC_LO) {
 
  887  if (RC == &AMDGPU::SReg_64RegClass) {
 
  888    if (SrcReg == AMDGPU::SCC) {
 
  895    if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
 
  896      if (DestReg == AMDGPU::VCC) {
 
  914  if (DestReg == AMDGPU::SCC) {
 
  917    if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
 
  921      assert(ST.hasScalarCompareEq64());
 
  935  if (RC == &AMDGPU::AGPR_32RegClass) {
 
  936    if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
 
  937        (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
 
  943    if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
 
  952    const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
 
  959           AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
 
  960           AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
 
  962    bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
 
  963    bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
 
  964    bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
 
  965    bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
 
  968    MCRegister NewDestReg = RI.get32BitRegister(DestReg);
 
  969    MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
 
  982    if (IsAGPRDst || IsAGPRSrc) {
 
  983      if (!DstLow || !SrcLow) {
 
  985                          "Cannot use hi16 subreg with an AGPR!");
 
  992    if (ST.useRealTrue16Insts()) {
 
  998      if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
 
  999          (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
 
 1011    if (IsSGPRSrc && !ST.hasSDWAScalar()) {
 
 1012      if (!DstLow || !SrcLow) {
 
 1014                          "Cannot use hi16 subreg on VI!");
 
 1037  if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
 
 1038    if (ST.hasMovB64()) {
 
 1043    if (ST.hasPkMovB32()) {
 
 1059  const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
 
 1060  if (RI.isSGPRClass(RC)) {
 
 1061    if (!RI.isSGPRClass(SrcRC)) {
 
 1065    const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
 
 1071  unsigned EltSize = 4;
 
 1072  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
 
 1073  if (RI.isAGPRClass(RC)) {
 
 1074    if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
 
 1075      Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
 
 1076    else if (RI.hasVGPRs(SrcRC) ||
 
 1077             (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
 
 1078      Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
 
 1080      Opcode = AMDGPU::INSTRUCTION_LIST_END;
 
 1081  } 
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
 
 1082    Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
 
 1083  } 
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
 
 1084             (RI.isProperlyAlignedRC(*RC) &&
 
 1085              (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
 
 1087    if (ST.hasMovB64()) {
 
 1088      Opcode = AMDGPU::V_MOV_B64_e32;
 
 1090    } 
else if (ST.hasPkMovB32()) {
 
 1091      Opcode = AMDGPU::V_PK_MOV_B32;
 
 1101  std::unique_ptr<RegScavenger> RS;
 
 1102  if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
 
 1103    RS = std::make_unique<RegScavenger>();
 
 1109  const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
 
 1110  const bool CanKillSuperReg = KillSrc && !Overlap;
 
 1112  for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
 
 1115      SubIdx = SubIndices[Idx];
 
 1117      SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
 
 1118    Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
 
 1119    Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
 
 1120    assert(DestSubReg && SrcSubReg && 
"Failed to find subregs!");
 
 1122    bool IsFirstSubreg = Idx == 0;
 
 1123    bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
 
 1125    if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
 
 1129                         *RS, Overlap, ImpDefSuper, ImpUseSuper);
 
 1130    } 
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
 
 
 1176  return &AMDGPU::VGPR_32RegClass;
 
 
 1188  assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
 
 1189         "Not a VGPR32 reg");
 
 1191  if (
Cond.size() == 1) {
 
 1192    Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1201  } 
else if (
Cond.size() == 2) {
 
 1202    assert(
Cond[0].isImm() && 
"Cond[0] is not an immediate");
 
 1204    case SIInstrInfo::SCC_TRUE: {
 
 1205      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1215    case SIInstrInfo::SCC_FALSE: {
 
 1216      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1226    case SIInstrInfo::VCCNZ: {
 
 1229      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1240    case SIInstrInfo::VCCZ: {
 
 1243      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1254    case SIInstrInfo::EXECNZ: {
 
 1255      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1256      Register SReg2 = 
MRI.createVirtualRegister(RI.getBoolRC());
 
 1267    case SIInstrInfo::EXECZ: {
 
 1268      Register SReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 1269      Register SReg2 = 
MRI.createVirtualRegister(RI.getBoolRC());
 
 
 1294  Register Reg = 
MRI.createVirtualRegister(RI.getBoolRC());
 
 
 1307  Register Reg = 
MRI.createVirtualRegister(RI.getBoolRC());
 
 
 1317                                          int64_t &ImmVal)
 const {
 
 1318  switch (
MI.getOpcode()) {
 
 1319  case AMDGPU::V_MOV_B32_e32:
 
 1320  case AMDGPU::S_MOV_B32:
 
 1321  case AMDGPU::S_MOVK_I32:
 
 1322  case AMDGPU::S_MOV_B64:
 
 1323  case AMDGPU::V_MOV_B64_e32:
 
 1324  case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
 
 1325  case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
 
 1326  case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
 
 1327  case AMDGPU::S_MOV_B64_IMM_PSEUDO:
 
 1328  case AMDGPU::V_MOV_B64_PSEUDO: {
 
 1332      return MI.getOperand(0).getReg() == Reg;
 
 1337  case AMDGPU::S_BREV_B32:
 
 1338  case AMDGPU::V_BFREV_B32_e32:
 
 1339  case AMDGPU::V_BFREV_B32_e64: {
 
 1343      return MI.getOperand(0).getReg() == Reg;
 
 1348  case AMDGPU::S_NOT_B32:
 
 1349  case AMDGPU::V_NOT_B32_e32:
 
 1350  case AMDGPU::V_NOT_B32_e64: {
 
 1353      ImmVal = 
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
 
 1354      return MI.getOperand(0).getReg() == Reg;
 
 
 1366  if (RI.isAGPRClass(DstRC))
 
 1367    return AMDGPU::COPY;
 
 1368  if (RI.getRegSizeInBits(*DstRC) == 16) {
 
 1371    return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
 
 1373  if (RI.getRegSizeInBits(*DstRC) == 32)
 
 1374    return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
 
 1375  if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
 
 1376    return AMDGPU::S_MOV_B64;
 
 1377  if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
 
 1378    return AMDGPU::V_MOV_B64_PSEUDO;
 
 1379  return AMDGPU::COPY;
 
 
 1384                                     bool IsIndirectSrc)
 const {
 
 1385  if (IsIndirectSrc) {
 
 1387      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
 
 1389      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
 
 1391      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
 
 1393      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
 
 1395      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
 
 1397      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
 
 1399      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
 
 1401      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
 
 1403      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
 
 1405      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
 
 1407      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
 
 1408    if (VecSize <= 1024) 
 
 1409      return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
 
 1415    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
 
 1417    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
 
 1419    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
 
 1421    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
 
 1423    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
 
 1425    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
 
 1427    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
 
 1429    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
 
 1431    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
 
 1433    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
 
 1435    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
 
 1436  if (VecSize <= 1024) 
 
 1437    return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
 
 
 1444    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
 
 1446    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
 
 1448    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
 
 1450    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
 
 1452    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
 
 1454    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
 
 1456    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
 
 1458    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
 
 1460    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
 
 1462    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
 
 1464    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
 
 1465  if (VecSize <= 1024) 
 
 1466    return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
 
 
 1473    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
 
 1475    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
 
 1477    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
 
 1479    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
 
 1481    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
 
 1483    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
 
 1485    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
 
 1487    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
 
 1489    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
 
 1491    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
 
 1493    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
 
 1494  if (VecSize <= 1024) 
 
 1495    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
 
 
 1502    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
 
 1504    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
 
 1506    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
 
 1508    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
 
 1509  if (VecSize <= 1024) 
 
 1510    return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
 
 
 1517                                             bool IsSGPR)
 const {
 
 1529  assert(EltSize == 32 && 
"invalid reg indexing elt size");
 
 
 1536    return AMDGPU::SI_SPILL_S32_SAVE;
 
 1538    return AMDGPU::SI_SPILL_S64_SAVE;
 
 1540    return AMDGPU::SI_SPILL_S96_SAVE;
 
 1542    return AMDGPU::SI_SPILL_S128_SAVE;
 
 1544    return AMDGPU::SI_SPILL_S160_SAVE;
 
 1546    return AMDGPU::SI_SPILL_S192_SAVE;
 
 1548    return AMDGPU::SI_SPILL_S224_SAVE;
 
 1550    return AMDGPU::SI_SPILL_S256_SAVE;
 
 1552    return AMDGPU::SI_SPILL_S288_SAVE;
 
 1554    return AMDGPU::SI_SPILL_S320_SAVE;
 
 1556    return AMDGPU::SI_SPILL_S352_SAVE;
 
 1558    return AMDGPU::SI_SPILL_S384_SAVE;
 
 1560    return AMDGPU::SI_SPILL_S512_SAVE;
 
 1562    return AMDGPU::SI_SPILL_S1024_SAVE;
 
 
 1571    return AMDGPU::SI_SPILL_V16_SAVE;
 
 1573    return AMDGPU::SI_SPILL_V32_SAVE;
 
 1575    return AMDGPU::SI_SPILL_V64_SAVE;
 
 1577    return AMDGPU::SI_SPILL_V96_SAVE;
 
 1579    return AMDGPU::SI_SPILL_V128_SAVE;
 
 1581    return AMDGPU::SI_SPILL_V160_SAVE;
 
 1583    return AMDGPU::SI_SPILL_V192_SAVE;
 
 1585    return AMDGPU::SI_SPILL_V224_SAVE;
 
 1587    return AMDGPU::SI_SPILL_V256_SAVE;
 
 1589    return AMDGPU::SI_SPILL_V288_SAVE;
 
 1591    return AMDGPU::SI_SPILL_V320_SAVE;
 
 1593    return AMDGPU::SI_SPILL_V352_SAVE;
 
 1595    return AMDGPU::SI_SPILL_V384_SAVE;
 
 1597    return AMDGPU::SI_SPILL_V512_SAVE;
 
 1599    return AMDGPU::SI_SPILL_V1024_SAVE;
 
 
 1608    return AMDGPU::SI_SPILL_AV32_SAVE;
 
 1610    return AMDGPU::SI_SPILL_AV64_SAVE;
 
 1612    return AMDGPU::SI_SPILL_AV96_SAVE;
 
 1614    return AMDGPU::SI_SPILL_AV128_SAVE;
 
 1616    return AMDGPU::SI_SPILL_AV160_SAVE;
 
 1618    return AMDGPU::SI_SPILL_AV192_SAVE;
 
 1620    return AMDGPU::SI_SPILL_AV224_SAVE;
 
 1622    return AMDGPU::SI_SPILL_AV256_SAVE;
 
 1624    return AMDGPU::SI_SPILL_AV288_SAVE;
 
 1626    return AMDGPU::SI_SPILL_AV320_SAVE;
 
 1628    return AMDGPU::SI_SPILL_AV352_SAVE;
 
 1630    return AMDGPU::SI_SPILL_AV384_SAVE;
 
 1632    return AMDGPU::SI_SPILL_AV512_SAVE;
 
 1634    return AMDGPU::SI_SPILL_AV1024_SAVE;
 
 
 1641                                         bool IsVectorSuperClass) {
 
 1646  if (IsVectorSuperClass)
 
 1647    return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
 
 1649  return AMDGPU::SI_SPILL_WWM_V32_SAVE;
 
 
 1655  bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
 
 1662  if (ST.hasMAIInsts())
 
 
 1682      FrameInfo.getObjectAlign(FrameIndex));
 
 1683  unsigned SpillSize = 
TRI->getSpillSize(*RC);
 
 1686  if (RI.isSGPRClass(RC)) {
 
 1688    assert(SrcReg != AMDGPU::M0 && 
"m0 should not be spilled");
 
 1689    assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
 
 1690           SrcReg != AMDGPU::EXEC && 
"exec should not be spilled");
 
 1698    if (SrcReg.
isVirtual() && SpillSize == 4) {
 
 1699      MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
 
 1708    if (RI.spillSGPRToVGPR())
 
 
 1728    return AMDGPU::SI_SPILL_S32_RESTORE;
 
 1730    return AMDGPU::SI_SPILL_S64_RESTORE;
 
 1732    return AMDGPU::SI_SPILL_S96_RESTORE;
 
 1734    return AMDGPU::SI_SPILL_S128_RESTORE;
 
 1736    return AMDGPU::SI_SPILL_S160_RESTORE;
 
 1738    return AMDGPU::SI_SPILL_S192_RESTORE;
 
 1740    return AMDGPU::SI_SPILL_S224_RESTORE;
 
 1742    return AMDGPU::SI_SPILL_S256_RESTORE;
 
 1744    return AMDGPU::SI_SPILL_S288_RESTORE;
 
 1746    return AMDGPU::SI_SPILL_S320_RESTORE;
 
 1748    return AMDGPU::SI_SPILL_S352_RESTORE;
 
 1750    return AMDGPU::SI_SPILL_S384_RESTORE;
 
 1752    return AMDGPU::SI_SPILL_S512_RESTORE;
 
 1754    return AMDGPU::SI_SPILL_S1024_RESTORE;
 
 
 1763    return AMDGPU::SI_SPILL_V16_RESTORE;
 
 1765    return AMDGPU::SI_SPILL_V32_RESTORE;
 
 1767    return AMDGPU::SI_SPILL_V64_RESTORE;
 
 1769    return AMDGPU::SI_SPILL_V96_RESTORE;
 
 1771    return AMDGPU::SI_SPILL_V128_RESTORE;
 
 1773    return AMDGPU::SI_SPILL_V160_RESTORE;
 
 1775    return AMDGPU::SI_SPILL_V192_RESTORE;
 
 1777    return AMDGPU::SI_SPILL_V224_RESTORE;
 
 1779    return AMDGPU::SI_SPILL_V256_RESTORE;
 
 1781    return AMDGPU::SI_SPILL_V288_RESTORE;
 
 1783    return AMDGPU::SI_SPILL_V320_RESTORE;
 
 1785    return AMDGPU::SI_SPILL_V352_RESTORE;
 
 1787    return AMDGPU::SI_SPILL_V384_RESTORE;
 
 1789    return AMDGPU::SI_SPILL_V512_RESTORE;
 
 1791    return AMDGPU::SI_SPILL_V1024_RESTORE;
 
 
 1800    return AMDGPU::SI_SPILL_AV32_RESTORE;
 
 1802    return AMDGPU::SI_SPILL_AV64_RESTORE;
 
 1804    return AMDGPU::SI_SPILL_AV96_RESTORE;
 
 1806    return AMDGPU::SI_SPILL_AV128_RESTORE;
 
 1808    return AMDGPU::SI_SPILL_AV160_RESTORE;
 
 1810    return AMDGPU::SI_SPILL_AV192_RESTORE;
 
 1812    return AMDGPU::SI_SPILL_AV224_RESTORE;
 
 1814    return AMDGPU::SI_SPILL_AV256_RESTORE;
 
 1816    return AMDGPU::SI_SPILL_AV288_RESTORE;
 
 1818    return AMDGPU::SI_SPILL_AV320_RESTORE;
 
 1820    return AMDGPU::SI_SPILL_AV352_RESTORE;
 
 1822    return AMDGPU::SI_SPILL_AV384_RESTORE;
 
 1824    return AMDGPU::SI_SPILL_AV512_RESTORE;
 
 1826    return AMDGPU::SI_SPILL_AV1024_RESTORE;
 
 
 1833                                            bool IsVectorSuperClass) {
 
 1838  if (IsVectorSuperClass) 
 
 1839    return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
 
 1841  return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
 
 
 1847  bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
 
 1854  if (ST.hasMAIInsts())
 
 1857  assert(!RI.isAGPRClass(RC));
 
 
 1872  unsigned SpillSize = 
TRI->getSpillSize(*RC);
 
 1879      FrameInfo.getObjectAlign(FrameIndex));
 
 1881  if (RI.isSGPRClass(RC)) {
 
 1883    assert(DestReg != AMDGPU::M0 && 
"m0 should not be reloaded into");
 
 1884    assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
 
 1885           DestReg != AMDGPU::EXEC && 
"exec should not be spilled");
 
 1890    if (DestReg.
isVirtual() && SpillSize == 4) {
 
 1892      MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
 
 1895    if (RI.spillSGPRToVGPR())
 
 
 1921                              unsigned Quantity)
 const {
 
 1923  unsigned MaxSNopCount = 1u << ST.getSNopBits();
 
 1924  while (Quantity > 0) {
 
 1925    unsigned Arg = std::min(Quantity, MaxSNopCount);
 
 
 1932  auto *MF = 
MBB.getParent();
 
 1935  assert(Info->isEntryFunction());
 
 1937  if (
MBB.succ_empty()) {
 
 1938    bool HasNoTerminator = 
MBB.getFirstTerminator() == 
MBB.end();
 
 1939    if (HasNoTerminator) {
 
 1940      if (Info->returnsVoid()) {
 
 
 1954  constexpr unsigned DoorbellIDMask = 0x3ff;
 
 1955  constexpr unsigned ECQueueWaveAbort = 0x400;
 
 1961  if (!
MBB.succ_empty() || std::next(
MI.getIterator()) != 
MBB.end()) {
 
 1962    ContBB = 
MBB.splitAt(
MI, 
false);
 
 1966    MBB.addSuccessor(TrapBB);
 
 1973  Register DoorbellReg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 1977  BuildMI(*TrapBB, TrapBB->
end(), 
DL, 
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
 
 1980      MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 1981  BuildMI(*TrapBB, TrapBB->
end(), 
DL, 
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
 
 1985      MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 1986  BuildMI(*TrapBB, TrapBB->
end(), 
DL, 
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
 
 1987      .
addUse(DoorbellRegMasked)
 
 1988      .
addImm(ECQueueWaveAbort);
 
 1989  BuildMI(*TrapBB, TrapBB->
end(), 
DL, 
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
 
 1990      .
addUse(SetWaveAbortBit);
 
 1993  BuildMI(*TrapBB, TrapBB->
end(), 
DL, 
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
 
 
 2008  switch (
MI.getOpcode()) {
 
 2010    if (
MI.isMetaInstruction())
 
 2015    return MI.getOperand(0).getImm() + 1;
 
 
 2025  switch (
MI.getOpcode()) {
 
 2027  case AMDGPU::S_MOV_B64_term:
 
 2030    MI.setDesc(
get(AMDGPU::S_MOV_B64));
 
 2033  case AMDGPU::S_MOV_B32_term:
 
 2036    MI.setDesc(
get(AMDGPU::S_MOV_B32));
 
 2039  case AMDGPU::S_XOR_B64_term:
 
 2042    MI.setDesc(
get(AMDGPU::S_XOR_B64));
 
 2045  case AMDGPU::S_XOR_B32_term:
 
 2048    MI.setDesc(
get(AMDGPU::S_XOR_B32));
 
 2050  case AMDGPU::S_OR_B64_term:
 
 2053    MI.setDesc(
get(AMDGPU::S_OR_B64));
 
 2055  case AMDGPU::S_OR_B32_term:
 
 2058    MI.setDesc(
get(AMDGPU::S_OR_B32));
 
 2061  case AMDGPU::S_ANDN2_B64_term:
 
 2064    MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
 
 2067  case AMDGPU::S_ANDN2_B32_term:
 
 2070    MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
 
 2073  case AMDGPU::S_AND_B64_term:
 
 2076    MI.setDesc(
get(AMDGPU::S_AND_B64));
 
 2079  case AMDGPU::S_AND_B32_term:
 
 2082    MI.setDesc(
get(AMDGPU::S_AND_B32));
 
 2085  case AMDGPU::S_AND_SAVEEXEC_B64_term:
 
 2088    MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
 
 2091  case AMDGPU::S_AND_SAVEEXEC_B32_term:
 
 2094    MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
 
 2097  case AMDGPU::SI_SPILL_S32_TO_VGPR:
 
 2098    MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
 
 2101  case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
 
 2102    MI.setDesc(
get(AMDGPU::V_READLANE_B32));
 
 2104  case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
 
 2108        get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
 
 2111  case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
 
 2114      int64_t Imm = 
MI.getOperand(1).getImm();
 
 2116      Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
 
 2117      Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
 
 2124      MI.eraseFromParent();
 
 2130  case AMDGPU::V_MOV_B64_PSEUDO: {
 
 2132    Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
 
 2133    Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
 
 2138    if (ST.hasMovB64()) {
 
 2139      MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
 
 2144    if (
SrcOp.isImm()) {
 
 2146      APInt Lo(32, Imm.getLoBits(32).getZExtValue());
 
 2147      APInt Hi(32, Imm.getHiBits(32).getZExtValue());
 
 2169      if (ST.hasPkMovB32() &&
 
 2190    MI.eraseFromParent();
 
 2193  case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
 
 2197  case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
 
 2201    if (ST.has64BitLiterals()) {
 
 2202      MI.setDesc(
get(AMDGPU::S_MOV_B64));
 
 2208      MI.setDesc(
get(AMDGPU::S_MOV_B64));
 
 2213    Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
 
 2214    Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
 
 2216    APInt Lo(32, Imm.getLoBits(32).getZExtValue());
 
 2217    APInt Hi(32, Imm.getHiBits(32).getZExtValue());
 
 2224    MI.eraseFromParent();
 
 2227  case AMDGPU::V_SET_INACTIVE_B32: {
 
 2231        .
add(
MI.getOperand(3))
 
 2232        .
add(
MI.getOperand(4))
 
 2233        .
add(
MI.getOperand(1))
 
 2234        .
add(
MI.getOperand(2))
 
 2235        .
add(
MI.getOperand(5));
 
 2236    MI.eraseFromParent();
 
 2239  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
 
 2240  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
 
 2241  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
 
 2242  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
 
 2243  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
 
 2244  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
 
 2245  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
 
 2246  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
 
 2247  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
 
 2248  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
 
 2249  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
 
 2250  case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
 
 2251  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
 
 2252  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
 
 2253  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
 
 2254  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
 
 2255  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
 
 2256  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
 
 2257  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
 
 2258  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
 
 2259  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
 
 2260  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
 
 2261  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
 
 2262  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
 
 2263  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
 
 2264  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
 
 2265  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
 
 2266  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
 
 2267  case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
 
 2271    if (RI.hasVGPRs(EltRC)) {
 
 2272      Opc = AMDGPU::V_MOVRELD_B32_e32;
 
 2274      Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
 
 2275                                              : AMDGPU::S_MOVRELD_B32;
 
 2280    bool IsUndef = 
MI.getOperand(1).isUndef();
 
 2281    unsigned SubReg = 
MI.getOperand(3).getImm();
 
 2282    assert(VecReg == 
MI.getOperand(1).getReg());
 
 2287        .
add(
MI.getOperand(2))
 
 2291    const int ImpDefIdx =
 
 2293    const int ImpUseIdx = ImpDefIdx + 1;
 
 2295    MI.eraseFromParent();
 
 2298  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
 
 2299  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
 
 2300  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
 
 2301  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
 
 2302  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
 
 2303  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
 
 2304  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
 
 2305  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
 
 2306  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
 
 2307  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
 
 2308  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
 
 2309  case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
 
 2310    assert(ST.useVGPRIndexMode());
 
 2312    bool IsUndef = 
MI.getOperand(1).isUndef();
 
 2321    const MCInstrDesc &OpDesc = 
get(AMDGPU::V_MOV_B32_indirect_write);
 
 2325            .
add(
MI.getOperand(2))
 
 2330    const int ImpDefIdx =
 
 2332    const int ImpUseIdx = ImpDefIdx + 1;
 
 2339    MI.eraseFromParent();
 
 2342  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
 
 2343  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
 
 2344  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
 
 2345  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
 
 2346  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
 
 2347  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
 
 2348  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
 
 2349  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
 
 2350  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
 
 2351  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
 
 2352  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
 
 2353  case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
 
 2354    assert(ST.useVGPRIndexMode());
 
 2357    bool IsUndef = 
MI.getOperand(1).isUndef();
 
 2375    MI.eraseFromParent();
 
 2378  case AMDGPU::SI_PC_ADD_REL_OFFSET: {
 
 2381    Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
 
 2382    Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
 
 2401    if (ST.hasGetPCZeroExtension()) {
 
 2405          BuildMI(MF, 
DL, 
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
 
 2412        BuildMI(MF, 
DL, 
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
 
 2422    MI.eraseFromParent();
 
 2425  case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
 
 2435      Op.setOffset(
Op.getOffset() + 4);
 
 2437        BuildMI(MF, 
DL, 
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
 
 2441    MI.eraseFromParent();
 
 2444  case AMDGPU::ENTER_STRICT_WWM: {
 
 2450  case AMDGPU::ENTER_STRICT_WQM: {
 
 2457    MI.eraseFromParent();
 
 2460  case AMDGPU::EXIT_STRICT_WWM:
 
 2461  case AMDGPU::EXIT_STRICT_WQM: {
 
 2467  case AMDGPU::SI_RETURN: {
 
 2481    MI.eraseFromParent();
 
 2485  case AMDGPU::S_MUL_U64_U32_PSEUDO:
 
 2486  case AMDGPU::S_MUL_I64_I32_PSEUDO:
 
 2487    MI.setDesc(
get(AMDGPU::S_MUL_U64));
 
 2490  case AMDGPU::S_GETPC_B64_pseudo:
 
 2491    MI.setDesc(
get(AMDGPU::S_GETPC_B64));
 
 2492    if (ST.hasGetPCZeroExtension()) {
 
 2494      Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
 
 2503  case AMDGPU::V_MAX_BF16_PSEUDO_e64:
 
 2504    assert(ST.hasBF16PackedInsts());
 
 2505    MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
 
 
 2529  case AMDGPU::S_LOAD_DWORDX16_IMM:
 
 2530  case AMDGPU::S_LOAD_DWORDX8_IMM: {
 
 2543    for (
auto &CandMO : 
I->operands()) {
 
 2544      if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
 
 2552    if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
 
 2556    unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
 
 2560    assert(
MRI.use_nodbg_empty(DestReg) && 
"DestReg should have no users yet.");
 
 2562    unsigned NewOpcode = -1;
 
 2563    if (SubregSize == 256)
 
 2564      NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
 
 2565    else if (SubregSize == 128)
 
 2566      NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
 
 2573    MRI.setRegClass(DestReg, NewRC);
 
 2576    UseMO->
setSubReg(AMDGPU::NoSubRegister);
 
 2581    MI->getOperand(0).setReg(DestReg);
 
 2582    MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
 
 2586      OffsetMO->
setImm(FinalOffset);
 
 2592    MI->setMemRefs(*MF, NewMMOs);
 
 
 2605std::pair<MachineInstr*, MachineInstr*>
 
 2607  assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
 
 2609  if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
 
 2612    MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
 
 2613    return std::pair(&
MI, 
nullptr);
 
 2624  for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
 
 2626    if (Dst.isPhysical()) {
 
 2627      MovDPP.addDef(RI.getSubReg(Dst, 
Sub));
 
 2630      auto Tmp = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 2634    for (
unsigned I = 1; 
I <= 2; ++
I) { 
 
 2637      if (
SrcOp.isImm()) {
 
 2639        Imm.ashrInPlace(Part * 32);
 
 2640        MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
 
 2644        if (Src.isPhysical())
 
 2645          MovDPP.addReg(RI.getSubReg(Src, 
Sub));
 
 2652      MovDPP.addImm(MO.getImm());
 
 2654    Split[Part] = MovDPP;
 
 2658  if (Dst.isVirtual())
 
 2665  MI.eraseFromParent();
 
 2666  return std::pair(Split[0], Split[1]);
 
 
 2669std::optional<DestSourcePair>
 
 2671  if (
MI.getOpcode() == AMDGPU::WWM_COPY)
 
 2674  return std::nullopt;
 
 
 2678                                      AMDGPU::OpName Src0OpName,
 
 2680                                      AMDGPU::OpName Src1OpName)
 const {
 
 2687         "All commutable instructions have both src0 and src1 modifiers");
 
 2689  int Src0ModsVal = Src0Mods->
getImm();
 
 2690  int Src1ModsVal = Src1Mods->
getImm();
 
 2692  Src1Mods->
setImm(Src0ModsVal);
 
 2693  Src0Mods->
setImm(Src1ModsVal);
 
 
 2702  bool IsKill = RegOp.
isKill();
 
 2704  bool IsUndef = RegOp.
isUndef();
 
 2705  bool IsDebug = RegOp.
isDebug();
 
 2707  if (NonRegOp.
isImm())
 
 2709  else if (NonRegOp.
isFI())
 
 
 2730  int64_t NonRegVal = NonRegOp1.
getImm();
 
 2733  NonRegOp2.
setImm(NonRegVal);
 
 
 2740                                unsigned OpIdx1)
 const {
 
 2745  unsigned Opc = 
MI.getOpcode();
 
 2746  int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
 
 2756    if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
 
 2759    if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
 
 2764  if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
 
 2770  if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
 
 
 2785                                                  unsigned Src1Idx)
 const {
 
 2786  assert(!NewMI && 
"this should never be used");
 
 2788  unsigned Opc = 
MI.getOpcode();
 
 2790  if (CommutedOpcode == -1)
 
 2793  if (Src0Idx > Src1Idx)
 
 2796  assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
 
 2797           static_cast<int>(Src0Idx) &&
 
 2798         AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
 
 2799           static_cast<int>(Src1Idx) &&
 
 2800         "inconsistency with findCommutedOpIndices");
 
 2825                        Src1, AMDGPU::OpName::src1_modifiers);
 
 2828                        AMDGPU::OpName::src1_sel);
 
 
 2840                                        unsigned &SrcOpIdx0,
 
 2841                                        unsigned &SrcOpIdx1)
 const {
 
 
 2846                                        unsigned &SrcOpIdx0,
 
 2847                                        unsigned &SrcOpIdx1)
 const {
 
 2848  if (!
Desc.isCommutable())
 
 2851  unsigned Opc = 
Desc.getOpcode();
 
 2852  int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
 
 2856  int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
 
 2860  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
 
 
 2864                                        int64_t BrOffset)
 const {
 
 
 2881  return MI.getOperand(0).getMBB();
 
 
 2886    if (
MI.getOpcode() == AMDGPU::SI_IF || 
MI.getOpcode() == AMDGPU::SI_ELSE ||
 
 2887        MI.getOpcode() == AMDGPU::SI_LOOP)
 
 
 2899         "new block should be inserted for expanding unconditional branch");
 
 2902         "restore block should be inserted for restoring clobbered registers");
 
 2910  if (ST.hasAddPC64Inst()) {
 
 2912        MCCtx.createTempSymbol(
"offset", 
true);
 
 2916        MCCtx.createTempSymbol(
"post_addpc", 
true);
 
 2917    AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
 
 2921    Offset->setVariableValue(OffsetExpr);
 
 2925  assert(RS && 
"RegScavenger required for long branching");
 
 2929  Register PCReg = 
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 2933  const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
 
 2934                               ST.hasVALUReadSGPRHazard();
 
 2935  auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
 
 2936    if (FlushSGPRWrites)
 
 2944  ApplyHazardWorkarounds();
 
 2947      MCCtx.createTempSymbol(
"post_getpc", 
true);
 
 2951      MCCtx.createTempSymbol(
"offset_lo", 
true);
 
 2953      MCCtx.createTempSymbol(
"offset_hi", 
true);
 
 2956      .
addReg(PCReg, 0, AMDGPU::sub0)
 
 2960      .
addReg(PCReg, 0, AMDGPU::sub1)
 
 2962  ApplyHazardWorkarounds();
 
 3003  if (LongBranchReservedReg) {
 
 3004    RS->enterBasicBlock(
MBB);
 
 3005    Scav = LongBranchReservedReg;
 
 3007    RS->enterBasicBlockEnd(
MBB);
 
 3008    Scav = RS->scavengeRegisterBackwards(
 
 3013    RS->setRegUsed(Scav);
 
 3014    MRI.replaceRegWith(PCReg, Scav);
 
 3015    MRI.clearVirtRegs();
 
 3021    TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
 
 3022    MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
 
 3023    MRI.clearVirtRegs();
 
 
 3038unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate 
Cond) {
 
 3040  case SIInstrInfo::SCC_TRUE:
 
 3041    return AMDGPU::S_CBRANCH_SCC1;
 
 3042  case SIInstrInfo::SCC_FALSE:
 
 3043    return AMDGPU::S_CBRANCH_SCC0;
 
 3044  case SIInstrInfo::VCCNZ:
 
 3045    return AMDGPU::S_CBRANCH_VCCNZ;
 
 3046  case SIInstrInfo::VCCZ:
 
 3047    return AMDGPU::S_CBRANCH_VCCZ;
 
 3048  case SIInstrInfo::EXECNZ:
 
 3049    return AMDGPU::S_CBRANCH_EXECNZ;
 
 3050  case SIInstrInfo::EXECZ:
 
 3051    return AMDGPU::S_CBRANCH_EXECZ;
 
 3057SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
 
 3059  case AMDGPU::S_CBRANCH_SCC0:
 
 3061  case AMDGPU::S_CBRANCH_SCC1:
 
 3063  case AMDGPU::S_CBRANCH_VCCNZ:
 
 3065  case AMDGPU::S_CBRANCH_VCCZ:
 
 3067  case AMDGPU::S_CBRANCH_EXECNZ:
 
 3069  case AMDGPU::S_CBRANCH_EXECZ:
 
 3081                                    bool AllowModify)
 const {
 
 3082  if (
I->getOpcode() == AMDGPU::S_BRANCH) {
 
 3084    TBB = 
I->getOperand(0).getMBB();
 
 3088  BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
 
 3089  if (Pred == INVALID_BR)
 
 3094  Cond.push_back(
I->getOperand(1)); 
 
 3098  if (
I == 
MBB.end()) {
 
 3104  if (
I->getOpcode() == AMDGPU::S_BRANCH) {
 
 3106    FBB = 
I->getOperand(0).getMBB();
 
 
 3116                                bool AllowModify)
 const {
 
 3124  while (
I != E && !
I->isBranch() && !
I->isReturn()) {
 
 3125    switch (
I->getOpcode()) {
 
 3126    case AMDGPU::S_MOV_B64_term:
 
 3127    case AMDGPU::S_XOR_B64_term:
 
 3128    case AMDGPU::S_OR_B64_term:
 
 3129    case AMDGPU::S_ANDN2_B64_term:
 
 3130    case AMDGPU::S_AND_B64_term:
 
 3131    case AMDGPU::S_AND_SAVEEXEC_B64_term:
 
 3132    case AMDGPU::S_MOV_B32_term:
 
 3133    case AMDGPU::S_XOR_B32_term:
 
 3134    case AMDGPU::S_OR_B32_term:
 
 3135    case AMDGPU::S_ANDN2_B32_term:
 
 3136    case AMDGPU::S_AND_B32_term:
 
 3137    case AMDGPU::S_AND_SAVEEXEC_B32_term:
 
 3140    case AMDGPU::SI_ELSE:
 
 3141    case AMDGPU::SI_KILL_I1_TERMINATOR:
 
 3142    case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
 
 
 3159                                   int *BytesRemoved)
 const {
 
 3161  unsigned RemovedSize = 0;
 
 3164    if (
MI.isBranch() || 
MI.isReturn()) {
 
 3166      MI.eraseFromParent();
 
 3172    *BytesRemoved = RemovedSize;
 
 
 3189                                   int *BytesAdded)
 const {
 
 3190  if (!FBB && 
Cond.empty()) {
 
 3194      *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
 
 3201    = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
 
 3213      *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
 
 3231    *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
 
 
 3238  if (
Cond.size() != 2) {
 
 3242  if (
Cond[0].isImm()) {
 
 
 3253                                  Register FalseReg, 
int &CondCycles,
 
 3254                                  int &TrueCycles, 
int &FalseCycles)
 const {
 
 3260    if (
MRI.getRegClass(FalseReg) != RC)
 
 3264    CondCycles = TrueCycles = FalseCycles = NumInsts; 
 
 3267    return RI.hasVGPRs(RC) && NumInsts <= 6;
 
 3275    if (
MRI.getRegClass(FalseReg) != RC)
 
 3281    if (NumInsts % 2 == 0)
 
 3284    CondCycles = TrueCycles = FalseCycles = NumInsts; 
 
 3285    return RI.isSGPRClass(RC);
 
 
 3296  BranchPredicate Pred = 
static_cast<BranchPredicate
>(
Cond[0].getImm());
 
 3297  if (Pred == VCCZ || Pred == SCC_FALSE) {
 
 3298    Pred = 
static_cast<BranchPredicate
>(-Pred);
 
 3304  unsigned DstSize = RI.getRegSizeInBits(*DstRC);
 
 3306  if (DstSize == 32) {
 
 3308    if (Pred == SCC_TRUE) {
 
 3323  if (DstSize == 64 && Pred == SCC_TRUE) {
 
 3333  static const int16_t Sub0_15[] = {
 
 3334    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
 
 3335    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
 
 3336    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
 
 3337    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
 
 3340  static const int16_t Sub0_15_64[] = {
 
 3341    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
 
 3342    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
 
 3343    AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
 
 3344    AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
 
 3347  unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
 
 3349  const int16_t *SubIndices = Sub0_15;
 
 3350  int NElts = DstSize / 32;
 
 3354  if (Pred == SCC_TRUE) {
 
 3356      SelOp = AMDGPU::S_CSELECT_B32;
 
 3357      EltRC = &AMDGPU::SGPR_32RegClass;
 
 3359      SelOp = AMDGPU::S_CSELECT_B64;
 
 3360      EltRC = &AMDGPU::SGPR_64RegClass;
 
 3361      SubIndices = Sub0_15_64;
 
 3367    MBB, 
I, 
DL, 
get(AMDGPU::REG_SEQUENCE), DstReg);
 
 3372  for (
int Idx = 0; Idx != NElts; ++Idx) {
 
 3373    Register DstElt = 
MRI.createVirtualRegister(EltRC);
 
 3376    unsigned SubIdx = SubIndices[Idx];
 
 3379    if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
 
 3382        .
addReg(FalseReg, 0, SubIdx)
 
 3383        .
addReg(TrueReg, 0, SubIdx);
 
 3387        .
addReg(TrueReg, 0, SubIdx)
 
 3388        .
addReg(FalseReg, 0, SubIdx);
 
 
 3400  switch (
MI.getOpcode()) {
 
 3401  case AMDGPU::V_MOV_B16_t16_e32:
 
 3402  case AMDGPU::V_MOV_B16_t16_e64:
 
 3403  case AMDGPU::V_MOV_B32_e32:
 
 3404  case AMDGPU::V_MOV_B32_e64:
 
 3405  case AMDGPU::V_MOV_B64_PSEUDO:
 
 3406  case AMDGPU::V_MOV_B64_e32:
 
 3407  case AMDGPU::V_MOV_B64_e64:
 
 3408  case AMDGPU::S_MOV_B32:
 
 3409  case AMDGPU::S_MOV_B64:
 
 3410  case AMDGPU::S_MOV_B64_IMM_PSEUDO:
 
 3412  case AMDGPU::WWM_COPY:
 
 3413  case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
 
 3414  case AMDGPU::V_ACCVGPR_READ_B32_e64:
 
 3415  case AMDGPU::V_ACCVGPR_MOV_B32:
 
 3416  case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
 
 3417  case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
 
 
 3425  switch (
MI.getOpcode()) {
 
 3426  case AMDGPU::V_MOV_B16_t16_e32:
 
 3427  case AMDGPU::V_MOV_B16_t16_e64:
 
 3429  case AMDGPU::V_MOV_B32_e32:
 
 3430  case AMDGPU::V_MOV_B32_e64:
 
 3431  case AMDGPU::V_MOV_B64_PSEUDO:
 
 3432  case AMDGPU::V_MOV_B64_e32:
 
 3433  case AMDGPU::V_MOV_B64_e64:
 
 3434  case AMDGPU::S_MOV_B32:
 
 3435  case AMDGPU::S_MOV_B64:
 
 3436  case AMDGPU::S_MOV_B64_IMM_PSEUDO:
 
 3438  case AMDGPU::WWM_COPY:
 
 3439  case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
 
 3440  case AMDGPU::V_ACCVGPR_READ_B32_e64:
 
 3441  case AMDGPU::V_ACCVGPR_MOV_B32:
 
 3442  case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
 
 3443  case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
 
 
 3451    AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
 
 3452    AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
 
 3453    AMDGPU::OpName::omod,           AMDGPU::OpName::op_sel};
 
 
 3456  unsigned Opc = 
MI.getOpcode();
 
 3458    int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
 
 3460      MI.removeOperand(Idx);
 
 
 3465                                                         unsigned SubRegIndex) {
 
 3466  switch (SubRegIndex) {
 
 3467  case AMDGPU::NoSubRegister:
 
 3477  case AMDGPU::sub1_lo16:
 
 3479  case AMDGPU::sub1_hi16:
 
 3482    return std::nullopt;
 
 
 3490  case AMDGPU::V_MAC_F16_e32:
 
 3491  case AMDGPU::V_MAC_F16_e64:
 
 3492  case AMDGPU::V_MAD_F16_e64:
 
 3493    return AMDGPU::V_MADAK_F16;
 
 3494  case AMDGPU::V_MAC_F32_e32:
 
 3495  case AMDGPU::V_MAC_F32_e64:
 
 3496  case AMDGPU::V_MAD_F32_e64:
 
 3497    return AMDGPU::V_MADAK_F32;
 
 3498  case AMDGPU::V_FMAC_F32_e32:
 
 3499  case AMDGPU::V_FMAC_F32_e64:
 
 3500  case AMDGPU::V_FMA_F32_e64:
 
 3501    return AMDGPU::V_FMAAK_F32;
 
 3502  case AMDGPU::V_FMAC_F16_e32:
 
 3503  case AMDGPU::V_FMAC_F16_e64:
 
 3504  case AMDGPU::V_FMAC_F16_t16_e64:
 
 3505  case AMDGPU::V_FMAC_F16_fake16_e64:
 
 3506  case AMDGPU::V_FMA_F16_e64:
 
 3507    return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
 
 3508                                        ? AMDGPU::V_FMAAK_F16_t16
 
 3509                                        : AMDGPU::V_FMAAK_F16_fake16
 
 3510                                  : AMDGPU::V_FMAAK_F16;
 
 3511  case AMDGPU::V_FMAC_F64_e32:
 
 3512  case AMDGPU::V_FMAC_F64_e64:
 
 3513  case AMDGPU::V_FMA_F64_e64:
 
 3514    return AMDGPU::V_FMAAK_F64;
 
 
 3522  case AMDGPU::V_MAC_F16_e32:
 
 3523  case AMDGPU::V_MAC_F16_e64:
 
 3524  case AMDGPU::V_MAD_F16_e64:
 
 3525    return AMDGPU::V_MADMK_F16;
 
 3526  case AMDGPU::V_MAC_F32_e32:
 
 3527  case AMDGPU::V_MAC_F32_e64:
 
 3528  case AMDGPU::V_MAD_F32_e64:
 
 3529    return AMDGPU::V_MADMK_F32;
 
 3530  case AMDGPU::V_FMAC_F32_e32:
 
 3531  case AMDGPU::V_FMAC_F32_e64:
 
 3532  case AMDGPU::V_FMA_F32_e64:
 
 3533    return AMDGPU::V_FMAMK_F32;
 
 3534  case AMDGPU::V_FMAC_F16_e32:
 
 3535  case AMDGPU::V_FMAC_F16_e64:
 
 3536  case AMDGPU::V_FMAC_F16_t16_e64:
 
 3537  case AMDGPU::V_FMAC_F16_fake16_e64:
 
 3538  case AMDGPU::V_FMA_F16_e64:
 
 3539    return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
 
 3540                                        ? AMDGPU::V_FMAMK_F16_t16
 
 3541                                        : AMDGPU::V_FMAMK_F16_fake16
 
 3542                                  : AMDGPU::V_FMAMK_F16;
 
 3543  case AMDGPU::V_FMAC_F64_e32:
 
 3544  case AMDGPU::V_FMAC_F64_e64:
 
 3545  case AMDGPU::V_FMA_F64_e64:
 
 3546    return AMDGPU::V_FMAMK_F64;
 
 
 3558  const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
 
 3560  assert(!
DefMI.getOperand(0).getSubReg() && 
"Expected SSA form");
 
 3563  if (
Opc == AMDGPU::COPY) {
 
 3564    assert(!
UseMI.getOperand(0).getSubReg() && 
"Expected SSA form");
 
 3571    if (HasMultipleUses) {
 
 3574      unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
 
 3577      if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
 
 3585      if (ImmDefSize == 32 &&
 
 3590    bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
 
 3591                   RI.getSubRegIdxSize(UseSubReg) == 16;
 
 3594      if (RI.hasVGPRs(DstRC))
 
 3597      if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
 
 3603    unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
 
 3610    for (
unsigned MovOp :
 
 3611         {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
 
 3612          AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
 
 3620        MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
 
 3624        if (MovDstPhysReg) {
 
 3628              RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
 
 3635      if (MovDstPhysReg) {
 
 3636        if (!MovDstRC->
contains(MovDstPhysReg))
 
 3638      } 
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
 
 3652      if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
 
 3660    if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
 
 3664      UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
 
 3666        UseMI.getOperand(0).setReg(MovDstPhysReg);
 
 3671    UseMI.setDesc(NewMCID);
 
 3672    UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
 
 3673    UseMI.addImplicitDefUseOperands(*MF);
 
 3677  if (HasMultipleUses)
 
 3680  if (
Opc == AMDGPU::V_MAD_F32_e64 || 
Opc == AMDGPU::V_MAC_F32_e64 ||
 
 3681      Opc == AMDGPU::V_MAD_F16_e64 || 
Opc == AMDGPU::V_MAC_F16_e64 ||
 
 3682      Opc == AMDGPU::V_FMA_F32_e64 || 
Opc == AMDGPU::V_FMAC_F32_e64 ||
 
 3683      Opc == AMDGPU::V_FMA_F16_e64 || 
Opc == AMDGPU::V_FMAC_F16_e64 ||
 
 3684      Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
 
 3685      Opc == AMDGPU::V_FMAC_F16_fake16_e64 || 
Opc == AMDGPU::V_FMA_F64_e64 ||
 
 3686      Opc == AMDGPU::V_FMAC_F64_e64) {
 
 3695    int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
 
 3710          Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
 
 3711      if (!RegSrc->
isReg())
 
 3713      if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
 
 3714          ST.getConstantBusLimit(
Opc) < 2)
 
 3717      if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
 
 3729      if (Def && Def->isMoveImmediate() &&
 
 3740      if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
 
 3741          NewOpc == AMDGPU::V_FMAMK_F16_fake16)
 
 3751      unsigned SrcSubReg = RegSrc->
getSubReg();
 
 3756      if (
Opc == AMDGPU::V_MAC_F32_e64 || 
Opc == AMDGPU::V_MAC_F16_e64 ||
 
 3757          Opc == AMDGPU::V_FMAC_F32_e64 || 
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
 
 3758          Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
 
 3759          Opc == AMDGPU::V_FMAC_F16_e64 || 
Opc == AMDGPU::V_FMAC_F64_e64)
 
 3760        UseMI.untieRegOperand(
 
 3761            AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
 
 3768      bool DeleteDef = 
MRI->use_nodbg_empty(Reg);
 
 3770        DefMI.eraseFromParent();
 
 3777      if (ST.getConstantBusLimit(
Opc) < 2) {
 
 3780        bool Src0Inlined = 
false;
 
 3781        if (Src0->
isReg()) {
 
 3786          if (Def && Def->isMoveImmediate() &&
 
 3791          } 
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
 
 3798        if (Src1->
isReg() && !Src0Inlined) {
 
 3801          if (Def && Def->isMoveImmediate() &&
 
 3803              MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
 
 3805          else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
 
 3818      if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
 
 3819          NewOpc == AMDGPU::V_FMAAK_F16_fake16)
 
 3825      if (
Opc == AMDGPU::V_MAC_F32_e64 || 
Opc == AMDGPU::V_MAC_F16_e64 ||
 
 3826          Opc == AMDGPU::V_FMAC_F32_e64 || 
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
 
 3827          Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
 
 3828          Opc == AMDGPU::V_FMAC_F16_e64 || 
Opc == AMDGPU::V_FMAC_F64_e64)
 
 3829        UseMI.untieRegOperand(
 
 3830            AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
 
 3832      const std::optional<int64_t> SubRegImm =
 
 3846      bool DeleteDef = 
MRI->use_nodbg_empty(Reg);
 
 3848        DefMI.eraseFromParent();
 
 
 3860  if (BaseOps1.
size() != BaseOps2.
size())
 
 3862  for (
size_t I = 0, 
E = BaseOps1.
size(); 
I < 
E; ++
I) {
 
 3863    if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
 
 
 3871  int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
 
 3872  int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
 
 3873  LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
 
 3875         LowOffset + (int)LowWidth.
getValue() <= HighOffset;
 
 
 3878bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
 
 3881  int64_t Offset0, Offset1;
 
 3884  bool Offset0IsScalable, Offset1IsScalable;
 
 3898  LocationSize Width0 = MIa.
memoperands().front()->getSize();
 
 3899  LocationSize Width1 = MIb.
memoperands().front()->getSize();
 
 3906         "MIa must load from or modify a memory location");
 
 3908         "MIb must load from or modify a memory location");
 
 3927      return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
 3934      return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
 3944      return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
 3958      return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
 
 3969  if (
Reg.isPhysical())
 
 3971  auto *Def = 
MRI.getUniqueVRegDef(
Reg);
 
 3973    Imm = Def->getOperand(1).getImm();
 
 
 3993    unsigned NumOps = 
MI.getNumOperands();
 
 3996      if (
Op.isReg() && 
Op.isKill())
 
 
 4004  case AMDGPU::V_MAC_F16_e32:
 
 4005  case AMDGPU::V_MAC_F16_e64:
 
 4006    return AMDGPU::V_MAD_F16_e64;
 
 4007  case AMDGPU::V_MAC_F32_e32:
 
 4008  case AMDGPU::V_MAC_F32_e64:
 
 4009    return AMDGPU::V_MAD_F32_e64;
 
 4010  case AMDGPU::V_MAC_LEGACY_F32_e32:
 
 4011  case AMDGPU::V_MAC_LEGACY_F32_e64:
 
 4012    return AMDGPU::V_MAD_LEGACY_F32_e64;
 
 4013  case AMDGPU::V_FMAC_LEGACY_F32_e32:
 
 4014  case AMDGPU::V_FMAC_LEGACY_F32_e64:
 
 4015    return AMDGPU::V_FMA_LEGACY_F32_e64;
 
 4016  case AMDGPU::V_FMAC_F16_e32:
 
 4017  case AMDGPU::V_FMAC_F16_e64:
 
 4018  case AMDGPU::V_FMAC_F16_t16_e64:
 
 4019  case AMDGPU::V_FMAC_F16_fake16_e64:
 
 4020    return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
 
 4021                                        ? AMDGPU::V_FMA_F16_gfx9_t16_e64
 
 4022                                        : AMDGPU::V_FMA_F16_gfx9_fake16_e64
 
 4023                                  : AMDGPU::V_FMA_F16_gfx9_e64;
 
 4024  case AMDGPU::V_FMAC_F32_e32:
 
 4025  case AMDGPU::V_FMAC_F32_e64:
 
 4026    return AMDGPU::V_FMA_F32_e64;
 
 4027  case AMDGPU::V_FMAC_F64_e32:
 
 4028  case AMDGPU::V_FMAC_F64_e64:
 
 4029    return AMDGPU::V_FMA_F64_e64;
 
 
 4056      if (Def.isEarlyClobber() && Def.isReg() &&
 
 4061        auto UpdateDefIndex = [&](
LiveRange &LR) {
 
 4062          auto *S = LR.find(OldIndex);
 
 4063          if (S != LR.end() && S->start == OldIndex) {
 
 4064            assert(S->valno && S->valno->def == OldIndex);
 
 4065            S->start = NewIndex;
 
 4066            S->valno->def = NewIndex;
 
 4070        for (
auto &SR : LI.subranges())
 
 4076  if (U.RemoveMIUse) {
 
 4079    Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
 
 4081    if (
MRI.hasOneNonDBGUse(DefReg)) {
 
 4083      U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
 
 4084      U.RemoveMIUse->getOperand(0).setIsDead(
true);
 
 4085      for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1; 
I != 0; --
I)
 
 4086        U.RemoveMIUse->removeOperand(
I);
 
 4098      Register DummyReg = 
MRI.cloneVirtualRegister(DefReg);
 
 4100        if (MIOp.isReg() && MIOp.getReg() == DefReg) {
 
 4101          MIOp.setIsUndef(
true);
 
 4102          MIOp.setReg(DummyReg);
 
 
 4115                                       ThreeAddressUpdates &U)
 const {
 
 4117  unsigned Opc = 
MI.getOpcode();
 
 4121  if (NewMFMAOpc != -1) {
 
 4124    for (
unsigned I = 0, E = 
MI.getNumOperands(); 
I != E; ++
I)
 
 4125      MIB.
add(
MI.getOperand(
I));
 
 4133    for (
unsigned I = 0, 
E = 
MI.getNumOperands(); 
I != 
E; ++
I)
 
 4138  assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
 
 4139         Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
 
 4140         "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be " 
 4144  bool IsF64 = 
Opc == AMDGPU::V_FMAC_F64_e32 || 
Opc == AMDGPU::V_FMAC_F64_e64;
 
 4145  bool IsLegacy = 
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
 
 4146                  Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
 
 4147                  Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
 
 4148                  Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
 
 4149  bool Src0Literal = 
false;
 
 4154  case AMDGPU::V_MAC_F16_e64:
 
 4155  case AMDGPU::V_FMAC_F16_e64:
 
 4156  case AMDGPU::V_FMAC_F16_t16_e64:
 
 4157  case AMDGPU::V_FMAC_F16_fake16_e64:
 
 4158  case AMDGPU::V_MAC_F32_e64:
 
 4159  case AMDGPU::V_MAC_LEGACY_F32_e64:
 
 4160  case AMDGPU::V_FMAC_F32_e64:
 
 4161  case AMDGPU::V_FMAC_LEGACY_F32_e64:
 
 4162  case AMDGPU::V_FMAC_F64_e64:
 
 4164  case AMDGPU::V_MAC_F16_e32:
 
 4165  case AMDGPU::V_FMAC_F16_e32:
 
 4166  case AMDGPU::V_MAC_F32_e32:
 
 4167  case AMDGPU::V_MAC_LEGACY_F32_e32:
 
 4168  case AMDGPU::V_FMAC_F32_e32:
 
 4169  case AMDGPU::V_FMAC_LEGACY_F32_e32:
 
 4170  case AMDGPU::V_FMAC_F64_e32: {
 
 4171    int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
 
 4172                                             AMDGPU::OpName::src0);
 
 4173    const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
 
 4184  MachineInstrBuilder MIB;
 
 4187  const MachineOperand *Src0Mods =
 
 4190  const MachineOperand *Src1Mods =
 
 4193  const MachineOperand *Src2Mods =
 
 4199  if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
 
 4200      (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
 
 4202      (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
 
 4204    MachineInstr *
DefMI;
 
 4240              MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
 
 4256  if (Src0Literal && !ST.hasVOP3Literal())
 
 4284  switch (
MI.getOpcode()) {
 
 4285  case AMDGPU::S_SET_GPR_IDX_ON:
 
 4286  case AMDGPU::S_SET_GPR_IDX_MODE:
 
 4287  case AMDGPU::S_SET_GPR_IDX_OFF:
 
 
 4305  if (
MI.isTerminator() || 
MI.isPosition())
 
 4309  if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
 
 4312  if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER && 
MI.getOperand(0).getImm() == 0)
 
 4318  return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
 
 4319         MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
 
 4320         MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
 
 4321         MI.getOpcode() == AMDGPU::S_SETPRIO ||
 
 4322         MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
 
 
 4327  return Opcode == AMDGPU::DS_ORDERED_COUNT ||
 
 4328         Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
 
 4329         Opcode == AMDGPU::DS_SUB_GS_REG_RTN || 
isGWS(Opcode);
 
 
 4337  if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
 
 4346  if (
MI.memoperands_empty())
 
 4351    unsigned AS = Memop->getAddrSpace();
 
 4352    if (AS == AMDGPUAS::FLAT_ADDRESS) {
 
 4353      const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
 
 4354      return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
 
 4355                        *MD, AMDGPUAS::PRIVATE_ADDRESS);
 
 
 4370  if (
MI.memoperands_empty())
 
 4379    unsigned AS = Memop->getAddrSpace();
 
 
 4396  if (ST.isTgSplitEnabled())
 
 4401  if (
MI.memoperands_empty())
 
 4406    unsigned AS = Memop->getAddrSpace();
 
 
 4422  unsigned Opcode = 
MI.getOpcode();
 
 4437  if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
 
 4438      isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
 
 4439      Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
 
 4442  if (
MI.isCall() || 
MI.isInlineAsm())
 
 4458  if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
 
 4459      Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
 
 4460      Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
 
 4461      Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
 
 
 4469  if (
MI.isMetaInstruction())
 
 4473  if (
MI.isCopyLike()) {
 
 4474    if (!RI.isSGPRReg(
MRI, 
MI.getOperand(0).getReg()))
 
 4478    return MI.readsRegister(AMDGPU::EXEC, &RI);
 
 4489  return !
isSALU(
MI) || 
MI.readsRegister(AMDGPU::EXEC, &RI);
 
 
 4493  switch (Imm.getBitWidth()) {
 
 4499                                        ST.hasInv2PiInlineImm());
 
 4502                                        ST.hasInv2PiInlineImm());
 
 4504    return ST.has16BitInsts() &&
 
 4506                                         ST.hasInv2PiInlineImm());
 
 
 4513  APInt IntImm = Imm.bitcastToAPInt();
 
 4515  bool HasInv2Pi = ST.hasInv2PiInlineImm();
 
 4523    return ST.has16BitInsts() &&
 
 4526    return ST.has16BitInsts() &&
 
 
 4536  switch (OperandType) {
 
 4546    int32_t Trunc = 
static_cast<int32_t
>(Imm);
 
 4586      int16_t Trunc = 
static_cast<int16_t
>(Imm);
 
 4587      return ST.has16BitInsts() &&
 
 4596      int16_t Trunc = 
static_cast<int16_t
>(Imm);
 
 4597      return ST.has16BitInsts() &&
 
 
 4648  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
 
 4654  return ST.hasVOP3Literal();
 
 
 4658                                    int64_t ImmVal)
 const {
 
 4661    if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
 
 4662        OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
 
 4663                                                     AMDGPU::OpName::src2))
 
 4665    return RI.opCanUseInlineConstant(OpInfo.OperandType);
 
 
 4677         "unexpected imm-like operand kind");
 
 
 4690  if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
 
 
 4708                                  AMDGPU::OpName 
OpName)
 const {
 
 4710  return Mods && Mods->
getImm();
 
 
 4723    switch (
MI.getOpcode()) {
 
 4724      default: 
return false;
 
 4726      case AMDGPU::V_ADDC_U32_e64:
 
 4727      case AMDGPU::V_SUBB_U32_e64:
 
 4728      case AMDGPU::V_SUBBREV_U32_e64: {
 
 4736      case AMDGPU::V_MAC_F16_e64:
 
 4737      case AMDGPU::V_MAC_F32_e64:
 
 4738      case AMDGPU::V_MAC_LEGACY_F32_e64:
 
 4739      case AMDGPU::V_FMAC_F16_e64:
 
 4740      case AMDGPU::V_FMAC_F16_t16_e64:
 
 4741      case AMDGPU::V_FMAC_F16_fake16_e64:
 
 4742      case AMDGPU::V_FMAC_F32_e64:
 
 4743      case AMDGPU::V_FMAC_F64_e64:
 
 4744      case AMDGPU::V_FMAC_LEGACY_F32_e64:
 
 4750      case AMDGPU::V_CNDMASK_B32_e64:
 
 4756  if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
 
 
 4786        (
Use.getReg() == AMDGPU::VCC || 
Use.getReg() == AMDGPU::VCC_LO)) {
 
 
 4795                                           unsigned Op32)
 const {
 
 4809    Inst32.
add(
MI.getOperand(
I));
 
 4813  int Idx = 
MI.getNumExplicitDefs();
 
 4815    int OpTy = 
MI.getDesc().operands()[Idx++].OperandType;
 
 4820      if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
 
 
 4842  if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
 
 4850    return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
 
 4853  return AMDGPU::SReg_32RegClass.contains(Reg) ||
 
 4854         AMDGPU::SReg_64RegClass.contains(Reg);
 
 
 4860  return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
 
 
 4872  return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
 
 
 4882    switch (MO.getReg()) {
 
 4884    case AMDGPU::VCC_LO:
 
 4885    case AMDGPU::VCC_HI:
 
 4887    case AMDGPU::FLAT_SCR:
 
 
 4900    switch (
MI.getOpcode()) {
 
 4901    case AMDGPU::V_READLANE_B32:
 
 4902    case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
 
 4903    case AMDGPU::V_WRITELANE_B32:
 
 4904    case AMDGPU::SI_SPILL_S32_TO_VGPR:
 
 4911  if (
MI.isPreISelOpcode() ||
 
 4912      SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
 
 
 4927  if (
SubReg.getReg().isPhysical())
 
 4930  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
 
 
 4941  if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
 
 4942    ErrInfo = 
"illegal copy from vector register to SGPR";
 
 4960  if (!
MRI.isSSA() && 
MI.isCopy())
 
 4961    return verifyCopy(
MI, 
MRI, ErrInfo);
 
 4963  if (SIInstrInfo::isGenericOpcode(Opcode))
 
 4966  int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
 
 4967  int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
 
 4968  int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
 
 4970  if (Src0Idx == -1) {
 
 4972    Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
 
 4973    Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
 
 4974    Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
 
 4975    Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
 
 4980  if (!
Desc.isVariadic() &&
 
 4981      Desc.getNumOperands() != 
MI.getNumExplicitOperands()) {
 
 4982    ErrInfo = 
"Instruction has wrong number of operands.";
 
 4986  if (
MI.isInlineAsm()) {
 
 4999      if (!Reg.isVirtual() && !RC->
contains(Reg)) {
 
 5000        ErrInfo = 
"inlineasm operand has incorrect register class.";
 
 5008  if (
isImage(
MI) && 
MI.memoperands_empty() && 
MI.mayLoadOrStore()) {
 
 5009    ErrInfo = 
"missing memory operand from image instruction.";
 
 5014  for (
int i = 0, e = 
Desc.getNumOperands(); i != e; ++i) {
 
 5017      ErrInfo = 
"FPImm Machine Operands are not supported. ISel should bitcast " 
 5018                "all fp values to integers.";
 
 5023    int16_t RegClass = getOpRegClassID(OpInfo);
 
 5025    switch (OpInfo.OperandType) {
 
 5027      if (
MI.getOperand(i).isImm() || 
MI.getOperand(i).isGlobal()) {
 
 5028        ErrInfo = 
"Illegal immediate value for operand.";
 
 5062        ErrInfo = 
"Illegal immediate value for operand.";
 
 5069        ErrInfo = 
"Expected inline constant for operand.";
 
 5084      if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
 
 5085        ErrInfo = 
"Expected immediate, but got non-immediate";
 
 5094      if (OpInfo.isGenericType())
 
 5109    if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
 
 5111      if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
 
 5113                RI.getSubRegisterClass(RC, MO.
getSubReg())) {
 
 5114          RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
 
 5121      if (!RC || !RI.isProperlyAlignedRC(*RC)) {
 
 5122        ErrInfo = 
"Subtarget requires even aligned vector registers";
 
 5127    if (RegClass != -1) {
 
 5128      if (Reg.isVirtual())
 
 5133        ErrInfo = 
"Operand has incorrect register class.";
 
 5141    if (!ST.hasSDWA()) {
 
 5142      ErrInfo = 
"SDWA is not supported on this target";
 
 5146    for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
 
 5147                    AMDGPU::OpName::dst_sel}) {
 
 5151      int64_t Imm = MO->
getImm();
 
 5153        ErrInfo = 
"Invalid SDWA selection";
 
 5158    int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
 
 5160    for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
 
 5165      if (!ST.hasSDWAScalar()) {
 
 5167        if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
 
 5168          ErrInfo = 
"Only VGPRs allowed as operands in SDWA instructions on VI";
 
 5175            "Only reg allowed as operands in SDWA instructions on GFX9+";
 
 5181    if (!ST.hasSDWAOmod()) {
 
 5184      if (OMod != 
nullptr &&
 
 5186        ErrInfo = 
"OMod not allowed in SDWA instructions on VI";
 
 5191    if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
 
 5192        Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
 
 5193        Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
 
 5194        Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
 
 5197      unsigned Mods = Src0ModsMO->
getImm();
 
 5200        ErrInfo = 
"sext, abs and neg are not allowed on this instruction";
 
 5206    if (
isVOPC(BasicOpcode)) {
 
 5207      if (!ST.hasSDWASdst() && DstIdx != -1) {
 
 5210        if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
 
 5211          ErrInfo = 
"Only VCC allowed as dst in SDWA instructions on VI";
 
 5214      } 
else if (!ST.hasSDWAOutModsVOPC()) {
 
 5217        if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
 
 5218          ErrInfo = 
"Clamp not allowed in VOPC SDWA instructions on VI";
 
 5224        if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
 
 5225          ErrInfo = 
"OMod not allowed in VOPC SDWA instructions on VI";
 
 5232    if (DstUnused && DstUnused->isImm() &&
 
 5235      if (!Dst.isReg() || !Dst.isTied()) {
 
 5236        ErrInfo = 
"Dst register should have tied register";
 
 5241          MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
 
 5244            "Dst register should be tied to implicit use of preserved register";
 
 5248        ErrInfo = 
"Dst register should use same physical register as preserved";
 
 5255  if (
isImage(Opcode) && !
MI.mayStore()) {
 
 5267      if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
 
 5275          AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
 
 5279        uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
 
 5280        if (RegCount > DstSize) {
 
 5281          ErrInfo = 
"Image instruction returns too many registers for dst " 
 5290  if (
isVALU(
MI) && 
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
 
 5291    unsigned ConstantBusCount = 0;
 
 5292    bool UsesLiteral = 
false;
 
 5295    int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
 
 5299      LiteralVal = &
MI.getOperand(ImmIdx);
 
 5308    for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
 
 5319        } 
else if (!MO.
isFI()) { 
 
 5326            ErrInfo = 
"VOP2/VOP3 instruction uses more than one literal";
 
 5336      if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
 
 5337            return !RI.regsOverlap(SGPRUsed, SGPR);
 
 5346    if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
 
 5347        Opcode != AMDGPU::V_WRITELANE_B32) {
 
 5348      ErrInfo = 
"VOP* instruction violates constant bus restriction";
 
 5352    if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
 
 5353      ErrInfo = 
"VOP3 instruction uses literal";
 
 5360  if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
 
 5361    unsigned SGPRCount = 0;
 
 5364    for (
int OpIdx : {Src0Idx, Src1Idx}) {
 
 5372          if (MO.
getReg() != SGPRUsed)
 
 5377      if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
 
 5378        ErrInfo = 
"WRITELANE instruction violates constant bus restriction";
 
 5385  if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
 
 5386      Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
 
 5393        ErrInfo = 
"v_div_scale_{f32|f64} require src0 = src1 or src2";
 
 5403      ErrInfo = 
"ABS not allowed in VOP3B instructions";
 
 5416      ErrInfo = 
"SOP2/SOPC instruction requires too many immediate constants";
 
 5423    if (
Desc.isBranch()) {
 
 5425        ErrInfo = 
"invalid branch target for SOPK instruction";
 
 5432          ErrInfo = 
"invalid immediate for SOPK instruction";
 
 5437          ErrInfo = 
"invalid immediate for SOPK instruction";
 
 5444  if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
 
 5445      Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
 
 5446      Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
 
 5447      Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
 
 5448    const bool IsDst = 
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
 
 5449                       Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
 
 5451    const unsigned StaticNumOps =
 
 5452        Desc.getNumOperands() + 
Desc.implicit_uses().size();
 
 5453    const unsigned NumImplicitOps = IsDst ? 2 : 1;
 
 5458    if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
 
 5459      ErrInfo = 
"missing implicit register operands";
 
 5465      if (!Dst->isUse()) {
 
 5466        ErrInfo = 
"v_movreld_b32 vdst should be a use operand";
 
 5471      if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
 
 5472          UseOpIdx != StaticNumOps + 1) {
 
 5473        ErrInfo = 
"movrel implicit operands should be tied";
 
 5480      = 
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
 
 5482        !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
 
 5483      ErrInfo = 
"src0 should be subreg of implicit vector use";
 
 5491    if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
 
 5492      ErrInfo = 
"VALU instruction does not implicitly read exec mask";
 
 5498    if (
MI.mayStore() &&
 
 5503      if (Soff && Soff->
getReg() != AMDGPU::M0) {
 
 5504        ErrInfo = 
"scalar stores must use m0 as offset register";
 
 5510  if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
 
 5512    if (
Offset->getImm() != 0) {
 
 5513      ErrInfo = 
"subtarget does not support offsets in flat instructions";
 
 5518  if (
isDS(
MI) && !ST.hasGDS()) {
 
 5520    if (GDSOp && GDSOp->
getImm() != 0) {
 
 5521      ErrInfo = 
"GDS is not supported on this subtarget";
 
 5529      int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
 
 5530                                                 AMDGPU::OpName::vaddr0);
 
 5531      AMDGPU::OpName RSrcOpName =
 
 5532          isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
 
 5533      int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
 
 5541        ErrInfo = 
"dim is out of range";
 
 5546      if (ST.hasR128A16()) {
 
 5548        IsA16 = R128A16->
getImm() != 0;
 
 5549      } 
else if (ST.hasA16()) {
 
 5551        IsA16 = A16->
getImm() != 0;
 
 5554      bool IsNSA = RsrcIdx - VAddr0Idx > 1;
 
 5556      unsigned AddrWords =
 
 5559      unsigned VAddrWords;
 
 5561        VAddrWords = RsrcIdx - VAddr0Idx;
 
 5562        if (ST.hasPartialNSAEncoding() &&
 
 5564          unsigned LastVAddrIdx = RsrcIdx - 1;
 
 5565          VAddrWords += 
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
 
 5573      if (VAddrWords != AddrWords) {
 
 5575                          << 
" but got " << VAddrWords << 
"\n");
 
 5576        ErrInfo = 
"bad vaddr size";
 
 5586    unsigned DC = DppCt->
getImm();
 
 5587    if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
 
 5588        DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
 
 5589        (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
 
 5590        (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
 
 5591        (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
 
 5592        (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
 
 5593        (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
 
 5594      ErrInfo = 
"Invalid dpp_ctrl value";
 
 5597    if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
 
 5599      ErrInfo = 
"Invalid dpp_ctrl value: " 
 5600                "wavefront shifts are not supported on GFX10+";
 
 5603    if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
 
 5605      ErrInfo = 
"Invalid dpp_ctrl value: " 
 5606                "broadcasts are not supported on GFX10+";
 
 5609    if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
 
 5611      if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
 
 5612          DC <= DppCtrl::ROW_NEWBCAST_LAST &&
 
 5613          !ST.hasGFX90AInsts()) {
 
 5614        ErrInfo = 
"Invalid dpp_ctrl value: " 
 5615                  "row_newbroadcast/row_share is not supported before " 
 5619      if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
 
 5620        ErrInfo = 
"Invalid dpp_ctrl value: " 
 5621                  "row_share and row_xmask are not supported before GFX10";
 
 5626    if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
 
 5629      ErrInfo = 
"Invalid dpp_ctrl value: " 
 5630                "DP ALU dpp only support row_newbcast";
 
 5637    AMDGPU::OpName DataName =
 
 5638        isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
 
 5644    if (ST.hasGFX90AInsts()) {
 
 5645      if (Dst && 
Data && !Dst->isTied() && !
Data->isTied() &&
 
 5646          (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI, 
Data->getReg()))) {
 
 5647        ErrInfo = 
"Invalid register class: " 
 5648                  "vdata and vdst should be both VGPR or AGPR";
 
 5651      if (
Data && Data2 &&
 
 5653        ErrInfo = 
"Invalid register class: " 
 5654                  "both data operands should be VGPR or AGPR";
 
 5658      if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
 
 5660          (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
 
 5661        ErrInfo = 
"Invalid register class: " 
 5662                  "agpr loads and stores not supported on this GPU";
 
 5668  if (ST.needsAlignedVGPRs()) {
 
 5669    const auto isAlignedReg = [&
MI, &
MRI, 
this](AMDGPU::OpName 
OpName) -> 
bool {
 
 5674      if (Reg.isPhysical())
 
 5675        return !(RI.getHWRegIndex(Reg) & 1);
 
 5677      return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
 
 5678             !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
 
 5681    if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
 
 5682        Opcode == AMDGPU::DS_GWS_BARRIER) {
 
 5684      if (!isAlignedReg(AMDGPU::OpName::data0)) {
 
 5685        ErrInfo = 
"Subtarget requires even aligned vector registers " 
 5686                  "for DS_GWS instructions";
 
 5692      if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
 
 5693        ErrInfo = 
"Subtarget requires even aligned vector registers " 
 5694                  "for vaddr operand of image instructions";
 
 5700  if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
 
 5702    if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
 
 5703      ErrInfo = 
"Invalid register class: " 
 5704                "v_accvgpr_write with an SGPR is not supported on this GPU";
 
 5709  if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
 
 5712      ErrInfo = 
"pseudo expects only physical SGPRs";
 
 5719      if (!ST.hasScaleOffset()) {
 
 5720        ErrInfo = 
"Subtarget does not support offset scaling";
 
 5724        ErrInfo = 
"Instruction does not support offset scaling";
 
 5733    for (
unsigned I = 0; 
I < 3; ++
I) {
 
 
 5746  switch (
MI.getOpcode()) {
 
 5747  default: 
return AMDGPU::INSTRUCTION_LIST_END;
 
 5748  case AMDGPU::REG_SEQUENCE: 
return AMDGPU::REG_SEQUENCE;
 
 5749  case AMDGPU::COPY: 
return AMDGPU::COPY;
 
 5750  case AMDGPU::PHI: 
return AMDGPU::PHI;
 
 5751  case AMDGPU::INSERT_SUBREG: 
return AMDGPU::INSERT_SUBREG;
 
 5752  case AMDGPU::WQM: 
return AMDGPU::WQM;
 
 5753  case AMDGPU::SOFT_WQM: 
return AMDGPU::SOFT_WQM;
 
 5754  case AMDGPU::STRICT_WWM: 
return AMDGPU::STRICT_WWM;
 
 5755  case AMDGPU::STRICT_WQM: 
return AMDGPU::STRICT_WQM;
 
 5756  case AMDGPU::S_MOV_B32: {
 
 5758    return MI.getOperand(1).isReg() ||
 
 5759           RI.isAGPR(
MRI, 
MI.getOperand(0).getReg()) ?
 
 5760           AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
 
 5762  case AMDGPU::S_ADD_I32:
 
 5763    return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
 
 5764  case AMDGPU::S_ADDC_U32:
 
 5765    return AMDGPU::V_ADDC_U32_e32;
 
 5766  case AMDGPU::S_SUB_I32:
 
 5767    return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
 
 5770  case AMDGPU::S_ADD_U32:
 
 5771    return AMDGPU::V_ADD_CO_U32_e32;
 
 5772  case AMDGPU::S_SUB_U32:
 
 5773    return AMDGPU::V_SUB_CO_U32_e32;
 
 5774  case AMDGPU::S_ADD_U64_PSEUDO:
 
 5775    return AMDGPU::V_ADD_U64_PSEUDO;
 
 5776  case AMDGPU::S_SUB_U64_PSEUDO:
 
 5777    return AMDGPU::V_SUB_U64_PSEUDO;
 
 5778  case AMDGPU::S_SUBB_U32: 
return AMDGPU::V_SUBB_U32_e32;
 
 5779  case AMDGPU::S_MUL_I32: 
return AMDGPU::V_MUL_LO_U32_e64;
 
 5780  case AMDGPU::S_MUL_HI_U32: 
return AMDGPU::V_MUL_HI_U32_e64;
 
 5781  case AMDGPU::S_MUL_HI_I32: 
return AMDGPU::V_MUL_HI_I32_e64;
 
 5782  case AMDGPU::S_AND_B32: 
return AMDGPU::V_AND_B32_e64;
 
 5783  case AMDGPU::S_OR_B32: 
return AMDGPU::V_OR_B32_e64;
 
 5784  case AMDGPU::S_XOR_B32: 
return AMDGPU::V_XOR_B32_e64;
 
 5785  case AMDGPU::S_XNOR_B32:
 
 5786    return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
 
 5787  case AMDGPU::S_MIN_I32: 
return AMDGPU::V_MIN_I32_e64;
 
 5788  case AMDGPU::S_MIN_U32: 
return AMDGPU::V_MIN_U32_e64;
 
 5789  case AMDGPU::S_MAX_I32: 
return AMDGPU::V_MAX_I32_e64;
 
 5790  case AMDGPU::S_MAX_U32: 
return AMDGPU::V_MAX_U32_e64;
 
 5791  case AMDGPU::S_ASHR_I32: 
return AMDGPU::V_ASHR_I32_e32;
 
 5792  case AMDGPU::S_ASHR_I64: 
return AMDGPU::V_ASHR_I64_e64;
 
 5793  case AMDGPU::S_LSHL_B32: 
return AMDGPU::V_LSHL_B32_e32;
 
 5794  case AMDGPU::S_LSHL_B64: 
return AMDGPU::V_LSHL_B64_e64;
 
 5795  case AMDGPU::S_LSHR_B32: 
return AMDGPU::V_LSHR_B32_e32;
 
 5796  case AMDGPU::S_LSHR_B64: 
return AMDGPU::V_LSHR_B64_e64;
 
 5797  case AMDGPU::S_SEXT_I32_I8: 
return AMDGPU::V_BFE_I32_e64;
 
 5798  case AMDGPU::S_SEXT_I32_I16: 
return AMDGPU::V_BFE_I32_e64;
 
 5799  case AMDGPU::S_BFE_U32: 
return AMDGPU::V_BFE_U32_e64;
 
 5800  case AMDGPU::S_BFE_I32: 
return AMDGPU::V_BFE_I32_e64;
 
 5801  case AMDGPU::S_BFM_B32: 
return AMDGPU::V_BFM_B32_e64;
 
 5802  case AMDGPU::S_BREV_B32: 
return AMDGPU::V_BFREV_B32_e32;
 
 5803  case AMDGPU::S_NOT_B32: 
return AMDGPU::V_NOT_B32_e32;
 
 5804  case AMDGPU::S_NOT_B64: 
return AMDGPU::V_NOT_B32_e32;
 
 5805  case AMDGPU::S_CMP_EQ_I32: 
return AMDGPU::V_CMP_EQ_I32_e64;
 
 5806  case AMDGPU::S_CMP_LG_I32: 
return AMDGPU::V_CMP_NE_I32_e64;
 
 5807  case AMDGPU::S_CMP_GT_I32: 
return AMDGPU::V_CMP_GT_I32_e64;
 
 5808  case AMDGPU::S_CMP_GE_I32: 
return AMDGPU::V_CMP_GE_I32_e64;
 
 5809  case AMDGPU::S_CMP_LT_I32: 
return AMDGPU::V_CMP_LT_I32_e64;
 
 5810  case AMDGPU::S_CMP_LE_I32: 
return AMDGPU::V_CMP_LE_I32_e64;
 
 5811  case AMDGPU::S_CMP_EQ_U32: 
return AMDGPU::V_CMP_EQ_U32_e64;
 
 5812  case AMDGPU::S_CMP_LG_U32: 
return AMDGPU::V_CMP_NE_U32_e64;
 
 5813  case AMDGPU::S_CMP_GT_U32: 
return AMDGPU::V_CMP_GT_U32_e64;
 
 5814  case AMDGPU::S_CMP_GE_U32: 
return AMDGPU::V_CMP_GE_U32_e64;
 
 5815  case AMDGPU::S_CMP_LT_U32: 
return AMDGPU::V_CMP_LT_U32_e64;
 
 5816  case AMDGPU::S_CMP_LE_U32: 
return AMDGPU::V_CMP_LE_U32_e64;
 
 5817  case AMDGPU::S_CMP_EQ_U64: 
return AMDGPU::V_CMP_EQ_U64_e64;
 
 5818  case AMDGPU::S_CMP_LG_U64: 
return AMDGPU::V_CMP_NE_U64_e64;
 
 5819  case AMDGPU::S_BCNT1_I32_B32: 
return AMDGPU::V_BCNT_U32_B32_e64;
 
 5820  case AMDGPU::S_FF1_I32_B32: 
return AMDGPU::V_FFBL_B32_e32;
 
 5821  case AMDGPU::S_FLBIT_I32_B32: 
return AMDGPU::V_FFBH_U32_e32;
 
 5822  case AMDGPU::S_FLBIT_I32: 
return AMDGPU::V_FFBH_I32_e64;
 
 5823  case AMDGPU::S_CBRANCH_SCC0: 
return AMDGPU::S_CBRANCH_VCCZ;
 
 5824  case AMDGPU::S_CBRANCH_SCC1: 
return AMDGPU::S_CBRANCH_VCCNZ;
 
 5825  case AMDGPU::S_CVT_F32_I32: 
return AMDGPU::V_CVT_F32_I32_e64;
 
 5826  case AMDGPU::S_CVT_F32_U32: 
return AMDGPU::V_CVT_F32_U32_e64;
 
 5827  case AMDGPU::S_CVT_I32_F32: 
return AMDGPU::V_CVT_I32_F32_e64;
 
 5828  case AMDGPU::S_CVT_U32_F32: 
return AMDGPU::V_CVT_U32_F32_e64;
 
 5829  case AMDGPU::S_CVT_F32_F16:
 
 5830  case AMDGPU::S_CVT_HI_F32_F16:
 
 5831    return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
 
 5832                                   : AMDGPU::V_CVT_F32_F16_fake16_e64;
 
 5833  case AMDGPU::S_CVT_F16_F32:
 
 5834    return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
 
 5835                                   : AMDGPU::V_CVT_F16_F32_fake16_e64;
 
 5836  case AMDGPU::S_CEIL_F32: 
return AMDGPU::V_CEIL_F32_e64;
 
 5837  case AMDGPU::S_FLOOR_F32: 
return AMDGPU::V_FLOOR_F32_e64;
 
 5838  case AMDGPU::S_TRUNC_F32: 
return AMDGPU::V_TRUNC_F32_e64;
 
 5839  case AMDGPU::S_RNDNE_F32: 
return AMDGPU::V_RNDNE_F32_e64;
 
 5840  case AMDGPU::S_CEIL_F16:
 
 5841    return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
 
 5842                                   : AMDGPU::V_CEIL_F16_fake16_e64;
 
 5843  case AMDGPU::S_FLOOR_F16:
 
 5844    return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
 
 5845                                   : AMDGPU::V_FLOOR_F16_fake16_e64;
 
 5846  case AMDGPU::S_TRUNC_F16:
 
 5847    return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
 
 5848                                   : AMDGPU::V_TRUNC_F16_fake16_e64;
 
 5849  case AMDGPU::S_RNDNE_F16:
 
 5850    return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
 
 5851                                   : AMDGPU::V_RNDNE_F16_fake16_e64;
 
 5852  case AMDGPU::S_ADD_F32: 
return AMDGPU::V_ADD_F32_e64;
 
 5853  case AMDGPU::S_SUB_F32: 
return AMDGPU::V_SUB_F32_e64;
 
 5854  case AMDGPU::S_MIN_F32: 
return AMDGPU::V_MIN_F32_e64;
 
 5855  case AMDGPU::S_MAX_F32: 
return AMDGPU::V_MAX_F32_e64;
 
 5856  case AMDGPU::S_MINIMUM_F32: 
return AMDGPU::V_MINIMUM_F32_e64;
 
 5857  case AMDGPU::S_MAXIMUM_F32: 
return AMDGPU::V_MAXIMUM_F32_e64;
 
 5858  case AMDGPU::S_MUL_F32: 
return AMDGPU::V_MUL_F32_e64;
 
 5859  case AMDGPU::S_ADD_F16:
 
 5860    return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
 
 5861                                   : AMDGPU::V_ADD_F16_fake16_e64;
 
 5862  case AMDGPU::S_SUB_F16:
 
 5863    return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
 
 5864                                   : AMDGPU::V_SUB_F16_fake16_e64;
 
 5865  case AMDGPU::S_MIN_F16:
 
 5866    return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
 
 5867                                   : AMDGPU::V_MIN_F16_fake16_e64;
 
 5868  case AMDGPU::S_MAX_F16:
 
 5869    return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
 
 5870                                   : AMDGPU::V_MAX_F16_fake16_e64;
 
 5871  case AMDGPU::S_MINIMUM_F16:
 
 5872    return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
 
 5873                                   : AMDGPU::V_MINIMUM_F16_fake16_e64;
 
 5874  case AMDGPU::S_MAXIMUM_F16:
 
 5875    return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
 
 5876                                   : AMDGPU::V_MAXIMUM_F16_fake16_e64;
 
 5877  case AMDGPU::S_MUL_F16:
 
 5878    return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
 
 5879                                   : AMDGPU::V_MUL_F16_fake16_e64;
 
 5880  case AMDGPU::S_CVT_PK_RTZ_F16_F32: 
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
 
 5881  case AMDGPU::S_FMAC_F32: 
return AMDGPU::V_FMAC_F32_e64;
 
 5882  case AMDGPU::S_FMAC_F16:
 
 5883    return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
 
 5884                                   : AMDGPU::V_FMAC_F16_fake16_e64;
 
 5885  case AMDGPU::S_FMAMK_F32: 
return AMDGPU::V_FMAMK_F32;
 
 5886  case AMDGPU::S_FMAAK_F32: 
return AMDGPU::V_FMAAK_F32;
 
 5887  case AMDGPU::S_CMP_LT_F32: 
return AMDGPU::V_CMP_LT_F32_e64;
 
 5888  case AMDGPU::S_CMP_EQ_F32: 
return AMDGPU::V_CMP_EQ_F32_e64;
 
 5889  case AMDGPU::S_CMP_LE_F32: 
return AMDGPU::V_CMP_LE_F32_e64;
 
 5890  case AMDGPU::S_CMP_GT_F32: 
return AMDGPU::V_CMP_GT_F32_e64;
 
 5891  case AMDGPU::S_CMP_LG_F32: 
return AMDGPU::V_CMP_LG_F32_e64;
 
 5892  case AMDGPU::S_CMP_GE_F32: 
return AMDGPU::V_CMP_GE_F32_e64;
 
 5893  case AMDGPU::S_CMP_O_F32: 
return AMDGPU::V_CMP_O_F32_e64;
 
 5894  case AMDGPU::S_CMP_U_F32: 
return AMDGPU::V_CMP_U_F32_e64;
 
 5895  case AMDGPU::S_CMP_NGE_F32: 
return AMDGPU::V_CMP_NGE_F32_e64;
 
 5896  case AMDGPU::S_CMP_NLG_F32: 
return AMDGPU::V_CMP_NLG_F32_e64;
 
 5897  case AMDGPU::S_CMP_NGT_F32: 
return AMDGPU::V_CMP_NGT_F32_e64;
 
 5898  case AMDGPU::S_CMP_NLE_F32: 
return AMDGPU::V_CMP_NLE_F32_e64;
 
 5899  case AMDGPU::S_CMP_NEQ_F32: 
return AMDGPU::V_CMP_NEQ_F32_e64;
 
 5900  case AMDGPU::S_CMP_NLT_F32: 
return AMDGPU::V_CMP_NLT_F32_e64;
 
 5901  case AMDGPU::S_CMP_LT_F16:
 
 5902    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
 
 5903                                   : AMDGPU::V_CMP_LT_F16_fake16_e64;
 
 5904  case AMDGPU::S_CMP_EQ_F16:
 
 5905    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
 
 5906                                   : AMDGPU::V_CMP_EQ_F16_fake16_e64;
 
 5907  case AMDGPU::S_CMP_LE_F16:
 
 5908    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
 
 5909                                   : AMDGPU::V_CMP_LE_F16_fake16_e64;
 
 5910  case AMDGPU::S_CMP_GT_F16:
 
 5911    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
 
 5912                                   : AMDGPU::V_CMP_GT_F16_fake16_e64;
 
 5913  case AMDGPU::S_CMP_LG_F16:
 
 5914    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
 
 5915                                   : AMDGPU::V_CMP_LG_F16_fake16_e64;
 
 5916  case AMDGPU::S_CMP_GE_F16:
 
 5917    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
 
 5918                                   : AMDGPU::V_CMP_GE_F16_fake16_e64;
 
 5919  case AMDGPU::S_CMP_O_F16:
 
 5920    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
 
 5921                                   : AMDGPU::V_CMP_O_F16_fake16_e64;
 
 5922  case AMDGPU::S_CMP_U_F16:
 
 5923    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
 
 5924                                   : AMDGPU::V_CMP_U_F16_fake16_e64;
 
 5925  case AMDGPU::S_CMP_NGE_F16:
 
 5926    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
 
 5927                                   : AMDGPU::V_CMP_NGE_F16_fake16_e64;
 
 5928  case AMDGPU::S_CMP_NLG_F16:
 
 5929    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
 
 5930                                   : AMDGPU::V_CMP_NLG_F16_fake16_e64;
 
 5931  case AMDGPU::S_CMP_NGT_F16:
 
 5932    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
 
 5933                                   : AMDGPU::V_CMP_NGT_F16_fake16_e64;
 
 5934  case AMDGPU::S_CMP_NLE_F16:
 
 5935    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
 
 5936                                   : AMDGPU::V_CMP_NLE_F16_fake16_e64;
 
 5937  case AMDGPU::S_CMP_NEQ_F16:
 
 5938    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
 
 5939                                   : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
 
 5940  case AMDGPU::S_CMP_NLT_F16:
 
 5941    return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
 
 5942                                   : AMDGPU::V_CMP_NLT_F16_fake16_e64;
 
 5943  case AMDGPU::V_S_EXP_F32_e64: 
return AMDGPU::V_EXP_F32_e64;
 
 5944  case AMDGPU::V_S_EXP_F16_e64:
 
 5945    return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
 
 5946                                   : AMDGPU::V_EXP_F16_fake16_e64;
 
 5947  case AMDGPU::V_S_LOG_F32_e64: 
return AMDGPU::V_LOG_F32_e64;
 
 5948  case AMDGPU::V_S_LOG_F16_e64:
 
 5949    return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
 
 5950                                   : AMDGPU::V_LOG_F16_fake16_e64;
 
 5951  case AMDGPU::V_S_RCP_F32_e64: 
return AMDGPU::V_RCP_F32_e64;
 
 5952  case AMDGPU::V_S_RCP_F16_e64:
 
 5953    return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
 
 5954                                   : AMDGPU::V_RCP_F16_fake16_e64;
 
 5955  case AMDGPU::V_S_RSQ_F32_e64: 
return AMDGPU::V_RSQ_F32_e64;
 
 5956  case AMDGPU::V_S_RSQ_F16_e64:
 
 5957    return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
 
 5958                                   : AMDGPU::V_RSQ_F16_fake16_e64;
 
 5959  case AMDGPU::V_S_SQRT_F32_e64: 
return AMDGPU::V_SQRT_F32_e64;
 
 5960  case AMDGPU::V_S_SQRT_F16_e64:
 
 5961    return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
 
 5962                                   : AMDGPU::V_SQRT_F16_fake16_e64;
 
 5965      "Unexpected scalar opcode without corresponding vector one!");
 
 
 6014         "Not a whole wave func");
 
 6017    if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
 
 6018        MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
 
 
 6033  int16_t RegClass = getOpRegClassID(OpInfo);
 
 6034  return RI.getRegClass(RegClass);
 
 
 6038                                                      unsigned OpNo)
 const {
 
 6040  if (
MI.isVariadic() || OpNo >= 
Desc.getNumOperands() ||
 
 6041      Desc.operands()[OpNo].RegClass == -1) {
 
 6044    if (Reg.isVirtual()) {
 
 6046          MI.getParent()->getParent()->getRegInfo();
 
 6047      return MRI.getRegClass(Reg);
 
 6049    return RI.getPhysRegBaseClass(Reg);
 
 6052  return RI.getRegClass(getOpRegClassID(
Desc.operands()[OpNo]));
 
 
 6060  unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
 
 6062  unsigned Size = RI.getRegSizeInBits(*RC);
 
 6063  unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
 
 6064                    : 
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
 
 6065                                 : AMDGPU::V_MOV_B32_e32;
 
 6067    Opcode = AMDGPU::COPY;
 
 6068  else if (RI.isSGPRClass(RC))
 
 6069    Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
 
 
 6083    return RI.getSubReg(SuperReg.
getReg(), SubIdx);
 
 6089  unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
 
 
 6100    if (SubIdx == AMDGPU::sub0)
 
 6102    if (SubIdx == AMDGPU::sub1)
 
 
 6114void SIInstrInfo::swapOperands(
MachineInstr &Inst)
 const {
 
 6130  if (Reg.isPhysical())
 
 6140    return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) != 
nullptr;
 
 6143  return RI.getCommonSubClass(DRC, RC) != 
nullptr;
 
 
 6150  unsigned Opc = 
MI.getOpcode();
 
 6156    constexpr AMDGPU::OpName OpNames[] = {
 
 6157        AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
 
 6160      int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
 
 6161      if (
static_cast<unsigned>(SrcIdx) == 
OpIdx &&
 
 6171  bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
 
 6172  if (IsAGPR && !ST.hasMAIInsts())
 
 6174  if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
 
 6178  const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
 
 6179  const int DataIdx = AMDGPU::getNamedOperandIdx(
 
 6180      Opc, 
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
 
 6181  if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
 
 6182      MI.getOperand(DataIdx).isReg() &&
 
 6183      RI.isAGPR(
MRI, 
MI.getOperand(DataIdx).getReg()) != IsAGPR)
 
 6185  if ((
int)
OpIdx == DataIdx) {
 
 6186    if (VDstIdx != -1 &&
 
 6187        RI.isAGPR(
MRI, 
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
 
 6190    const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
 
 6191    if (Data1Idx != -1 && 
MI.getOperand(Data1Idx).isReg() &&
 
 6192        RI.isAGPR(
MRI, 
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
 
 6197  if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
 
 6198      (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
 
 
 6218  constexpr unsigned NumOps = 3;
 
 6219  constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
 
 6220      AMDGPU::OpName::src0,           AMDGPU::OpName::src1,
 
 6221      AMDGPU::OpName::src2,           AMDGPU::OpName::src0_modifiers,
 
 6222      AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
 
 6227    int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
 
 6230    MO = &
MI.getOperand(SrcIdx);
 
 6237      AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
 
 6241  unsigned Mods = 
MI.getOperand(ModsIdx).getImm();
 
 6245  return !OpSel && !OpSelHi;
 
 
 6254  int64_t RegClass = getOpRegClassID(OpInfo);
 
 6256      RegClass != -1 ? RI.getRegClass(RegClass) : 
nullptr;
 
 6265    int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
 
 6266    int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
 
 6270      if (!LiteralLimit--)
 
 6280    for (
unsigned i = 0, e = 
MI.getNumOperands(); i != e; ++i) {
 
 6288            if (--ConstantBusLimit <= 0)
 
 6300        if (!LiteralLimit--)
 
 6302        if (--ConstantBusLimit <= 0)
 
 6308    for (
unsigned i = 0, e = 
MI.getNumOperands(); i != e; ++i) {
 
 6312      if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
 
 6314          !
Op.isIdenticalTo(*MO))
 
 6324  } 
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
 
 6338    bool Is64BitOp = Is64BitFPOp ||
 
 6345          (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
 
 6354      if (!Is64BitFPOp && (int32_t)Imm < 0 &&
 
 
 6372  bool IsGFX950Only = ST.hasGFX950Insts();
 
 6373  bool IsGFX940Only = ST.hasGFX940Insts();
 
 6375  if (!IsGFX950Only && !IsGFX940Only)
 
 6393  unsigned Opcode = 
MI.getOpcode();
 
 6395  case AMDGPU::V_CVT_PK_BF8_F32_e64:
 
 6396  case AMDGPU::V_CVT_PK_FP8_F32_e64:
 
 6397  case AMDGPU::V_MQSAD_PK_U16_U8_e64:
 
 6398  case AMDGPU::V_MQSAD_U32_U8_e64:
 
 6399  case AMDGPU::V_PK_ADD_F16:
 
 6400  case AMDGPU::V_PK_ADD_F32:
 
 6401  case AMDGPU::V_PK_ADD_I16:
 
 6402  case AMDGPU::V_PK_ADD_U16:
 
 6403  case AMDGPU::V_PK_ASHRREV_I16:
 
 6404  case AMDGPU::V_PK_FMA_F16:
 
 6405  case AMDGPU::V_PK_FMA_F32:
 
 6406  case AMDGPU::V_PK_FMAC_F16_e32:
 
 6407  case AMDGPU::V_PK_FMAC_F16_e64:
 
 6408  case AMDGPU::V_PK_LSHLREV_B16:
 
 6409  case AMDGPU::V_PK_LSHRREV_B16:
 
 6410  case AMDGPU::V_PK_MAD_I16:
 
 6411  case AMDGPU::V_PK_MAD_U16:
 
 6412  case AMDGPU::V_PK_MAX_F16:
 
 6413  case AMDGPU::V_PK_MAX_I16:
 
 6414  case AMDGPU::V_PK_MAX_U16:
 
 6415  case AMDGPU::V_PK_MIN_F16:
 
 6416  case AMDGPU::V_PK_MIN_I16:
 
 6417  case AMDGPU::V_PK_MIN_U16:
 
 6418  case AMDGPU::V_PK_MOV_B32:
 
 6419  case AMDGPU::V_PK_MUL_F16:
 
 6420  case AMDGPU::V_PK_MUL_F32:
 
 6421  case AMDGPU::V_PK_MUL_LO_U16:
 
 6422  case AMDGPU::V_PK_SUB_I16:
 
 6423  case AMDGPU::V_PK_SUB_U16:
 
 6424  case AMDGPU::V_QSAD_PK_U16_U8_e64:
 
 
 6433  unsigned Opc = 
MI.getOpcode();
 
 6436  int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
 
 6439  int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
 
 6445  if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
 
 6452  if (
Opc == AMDGPU::V_WRITELANE_B32) {
 
 6455      Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6461      Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6471  if (
Opc == AMDGPU::V_FMAC_F32_e32 || 
Opc == AMDGPU::V_FMAC_F16_e32) {
 
 6472    int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
 
 6473    if (!RI.isVGPR(
MRI, 
MI.getOperand(Src2Idx).getReg()))
 
 6485  if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
 
 6487    Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6499  if (HasImplicitSGPR || !
MI.isCommutable()) {
 
 6516  if (CommutedOpc == -1) {
 
 6521  MI.setDesc(
get(CommutedOpc));
 
 6525  bool Src0Kill = Src0.
isKill();
 
 6529  else if (Src1.
isReg()) {
 
 
 6544  unsigned Opc = 
MI.getOpcode();
 
 6547    AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
 
 6548    AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
 
 6549    AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
 
 6552  if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
 
 6553      Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
 
 6554      Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
 
 6555      Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
 
 6556      Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
 
 6557      Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
 
 6558      Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
 
 6562    if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
 
 6563      Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6568    if (VOP3Idx[2] != -1) {
 
 6570      if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
 
 6571        Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6580  int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
 
 6581  int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
 
 6583  Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
 
 6585    SGPRsUsed.
insert(SGPRReg);
 
 6589  for (
int Idx : VOP3Idx) {
 
 6598      if (LiteralLimit > 0 && ConstantBusLimit > 0) {
 
 6610    if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
 
 6617    if (ConstantBusLimit > 0) {
 
 6629  if ((
Opc == AMDGPU::V_FMAC_F32_e64 || 
Opc == AMDGPU::V_FMAC_F16_e64) &&
 
 6630      !RI.isVGPR(
MRI, 
MI.getOperand(VOP3Idx[2]).getReg()))
 
 6636    for (
unsigned I = 0; 
I < 3; ++
I) {
 
 
 6649    SRC = RI.getCommonSubClass(SRC, DstRC);
 
 6652  unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
 
 6654  if (RI.hasAGPRs(VRC)) {
 
 6655    VRC = RI.getEquivalentVGPRClass(VRC);
 
 6656    Register NewSrcReg = 
MRI.createVirtualRegister(VRC);
 
 6658            get(TargetOpcode::COPY), NewSrcReg)
 
 6665            get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
 
 6671  for (
unsigned i = 0; i < SubRegs; ++i) {
 
 6672    Register SGPR = 
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 
 6674            get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
 
 6675        .
addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
 
 6681              get(AMDGPU::REG_SEQUENCE), DstReg);
 
 6682  for (
unsigned i = 0; i < SubRegs; ++i) {
 
 6684    MIB.
addImm(RI.getSubRegFromChannel(i));
 
 
 6697  if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
 
 6699    SBase->setReg(SGPR);
 
 6702  if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
 
 
 6710  int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
 
 6711  if (OldSAddrIdx < 0)
 
 6727  int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
 
 6728  if (NewVAddrIdx < 0)
 
 6731  int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
 
 6735  if (OldVAddrIdx >= 0) {
 
 6737    VAddrDef = 
MRI.getUniqueVRegDef(VAddr.
getReg());
 
 6749  if (OldVAddrIdx == NewVAddrIdx) {
 
 6752    MRI.removeRegOperandFromUseList(&NewVAddr);
 
 6753    MRI.moveOperands(&NewVAddr, &SAddr, 1);
 
 6757    MRI.removeRegOperandFromUseList(&NewVAddr);
 
 6758    MRI.addRegOperandToUseList(&NewVAddr);
 
 6760    assert(OldSAddrIdx == NewVAddrIdx);
 
 6762    if (OldVAddrIdx >= 0) {
 
 6763      int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
 
 6764                                                 AMDGPU::OpName::vdst_in);
 
 6768      if (NewVDstIn != -1) {
 
 6769        int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
 
 6775      if (NewVDstIn != -1) {
 
 6776        int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
 
 
 6797  if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
 
 
 6817  unsigned OpSubReg = 
Op.getSubReg();
 
 6820      RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
 
 6826  Register DstReg = 
MRI.createVirtualRegister(DstRC);
 
 6836  if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
 
 6839  bool ImpDef = Def->isImplicitDef();
 
 6840  while (!ImpDef && Def && Def->isCopy()) {
 
 6841    if (Def->getOperand(1).getReg().isPhysical())
 
 6843    Def = 
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
 
 6844    ImpDef = Def && Def->isImplicitDef();
 
 6846  if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
 
 
 6865  const auto *BoolXExecRC = 
TRI->getWaveMaskRegClass();
 
 6871    unsigned RegSize = 
TRI->getRegSizeInBits(ScalarOp->getReg(), 
MRI);
 
 6872    unsigned NumSubRegs = 
RegSize / 32;
 
 6873    Register VScalarOp = ScalarOp->getReg();
 
 6875    if (NumSubRegs == 1) {
 
 6876      Register CurReg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6878      BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
 
 6881      Register NewCondReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 6883      BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
 
 6889        CondReg = NewCondReg;
 
 6891        Register AndReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 6899      ScalarOp->setReg(CurReg);
 
 6900      ScalarOp->setIsKill();
 
 6904      assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
 
 6905             "Unhandled register size");
 
 6907      for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
 
 6909            MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6911            MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 6914        BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
 
 6915            .
addReg(VScalarOp, VScalarOpUndef, 
TRI->getSubRegFromChannel(Idx));
 
 6918        BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
 
 6919            .
addReg(VScalarOp, VScalarOpUndef,
 
 6920                    TRI->getSubRegFromChannel(Idx + 1));
 
 6926        Register CurReg = 
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
 
 6927        BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
 
 6933        Register NewCondReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 6934        auto Cmp = 
BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
 
 6937        if (NumSubRegs <= 2)
 
 6938          Cmp.addReg(VScalarOp);
 
 6940          Cmp.addReg(VScalarOp, VScalarOpUndef,
 
 6941                     TRI->getSubRegFromChannel(Idx, 2));
 
 6945          CondReg = NewCondReg;
 
 6947          Register AndReg = 
MRI.createVirtualRegister(BoolXExecRC);
 
 6955      const auto *SScalarOpRC =
 
 6956          TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
 
 6957      Register SScalarOp = 
MRI.createVirtualRegister(SScalarOpRC);
 
 6961          BuildMI(LoopBB, 
I, 
DL, 
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
 
 6962      unsigned Channel = 0;
 
 6963      for (
Register Piece : ReadlanePieces) {
 
 6964        Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
 
 6968      ScalarOp->setReg(SScalarOp);
 
 6969      ScalarOp->setIsKill();
 
 6973  Register SaveExec = 
MRI.createVirtualRegister(BoolXExecRC);
 
 6974  MRI.setSimpleHint(SaveExec, CondReg);
 
 
 7005  if (!Begin.isValid())
 
 7007  if (!End.isValid()) {
 
 7013  const auto *BoolXExecRC = 
TRI->getWaveMaskRegClass();
 
 7021      MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC, 
MI,
 
 7022                                  std::numeric_limits<unsigned>::max()) !=
 
 7025    SaveSCCReg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 7031  Register SaveExec = 
MRI.createVirtualRegister(BoolXExecRC);
 
 7040  for (
auto I = Begin; 
I != AfterMI; 
I++) {
 
 7041    for (
auto &MO : 
I->all_uses())
 
 7042      MRI.clearKillFlags(MO.getReg());
 
 7067  MBB.addSuccessor(LoopBB);
 
 7077    for (
auto &Succ : RemainderBB->
successors()) {
 
 
 7101static std::tuple<unsigned, unsigned>
 
 7109      TII.buildExtractSubReg(
MI, 
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
 
 7110                             AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
 
 7113  Register Zero64 = 
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 7114  Register SRsrcFormatLo = 
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 
 7115  Register SRsrcFormatHi = 
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 
 7116  Register NewSRsrc = 
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
 
 7117  uint64_t RsrcDataFormat = 
TII.getDefaultRsrcDataFormat();
 
 7134      .
addImm(AMDGPU::sub0_sub1)
 
 7140  return std::tuple(RsrcPtr, NewSRsrc);
 
 
 7177  if (
MI.getOpcode() == AMDGPU::PHI) {
 
 7179    for (
unsigned i = 1, e = 
MI.getNumOperands(); i != e; i += 2) {
 
 7180      if (!
MI.getOperand(i).isReg() || !
MI.getOperand(i).getReg().isVirtual())
 
 7183          MRI.getRegClass(
MI.getOperand(i).getReg());
 
 7184      if (RI.hasVectorRegisters(OpRC)) {
 
 7198          VRC = &AMDGPU::VReg_1RegClass;
 
 7201                    ? RI.getEquivalentAGPRClass(SRC)
 
 7202                    : RI.getEquivalentVGPRClass(SRC);
 
 7205                  ? RI.getEquivalentAGPRClass(VRC)
 
 7206                  : RI.getEquivalentVGPRClass(VRC);
 
 7214    for (
unsigned I = 1, E = 
MI.getNumOperands(); 
I != E; 
I += 2) {
 
 7216      if (!
Op.isReg() || !
Op.getReg().isVirtual())
 
 7232  if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
 
 7235    if (RI.hasVGPRs(DstRC)) {
 
 7239      for (
unsigned I = 1, E = 
MI.getNumOperands(); 
I != E; 
I += 2) {
 
 7241        if (!
Op.isReg() || !
Op.getReg().isVirtual())
 
 7259  if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
 
 7264    if (DstRC != Src0RC) {
 
 7273  if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
 
 7275    if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
 
 7281  if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
 
 7282      MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
 
 7283      MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
 
 7284      MI.getOpcode() == AMDGPU::S_WQM_B32 ||
 
 7285      MI.getOpcode() == AMDGPU::S_WQM_B64 ||
 
 7286      MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
 
 7287      MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
 
 7289    if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
 
 7302                                    ? AMDGPU::OpName::rsrc
 
 7303                                    : AMDGPU::OpName::srsrc;
 
 7305    if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
 
 7308    AMDGPU::OpName SampOpName =
 
 7309        isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
 
 7311    if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
 
 7318  if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
 
 7320    if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
 
 7324      unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
 
 7325      unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
 
 7330      while (Start->getOpcode() != FrameSetupOpcode)
 
 7333      while (End->getOpcode() != FrameDestroyOpcode)
 
 7337      while (End != 
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
 
 7338             MI.definesRegister(End->getOperand(1).getReg(), 
nullptr))
 
 7346  if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
 
 7348    Register Reg = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 7350        AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
 
 7360  if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
 
 7361      MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
 
 7362      MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
 
 7363      MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
 
 7365      if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
 
 7372  bool isSoffsetLegal = 
true;
 
 7374      AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
 
 7375  if (SoffsetIdx != -1) {
 
 7378        !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
 
 7379      isSoffsetLegal = 
false;
 
 7383  bool isRsrcLegal = 
true;
 
 7385      AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
 
 7386  if (RsrcIdx != -1) {
 
 7389      isRsrcLegal = 
false;
 
 7393  if (isRsrcLegal && isSoffsetLegal)
 
 7417      Register NewVAddrLo = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 7418      Register NewVAddrHi = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 7419      Register NewVAddr = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 7421      const auto *BoolXExecRC = RI.getWaveMaskRegClass();
 
 7422      Register CondReg0 = 
MRI.createVirtualRegister(BoolXExecRC);
 
 7423      Register CondReg1 = 
MRI.createVirtualRegister(BoolXExecRC);
 
 7425      unsigned RsrcPtr, NewSRsrc;
 
 7432        .
addReg(RsrcPtr, 0, AMDGPU::sub0)
 
 7439        .
addReg(RsrcPtr, 0, AMDGPU::sub1)
 
 7453    } 
else if (!VAddr && ST.hasAddr64()) {
 
 7457             "FIXME: Need to emit flat atomics here");
 
 7459      unsigned RsrcPtr, NewSRsrc;
 
 7462      Register NewVAddr = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 7485          MIB.
addImm(CPol->getImm());
 
 7490          MIB.
addImm(TFE->getImm());
 
 7510      MI.removeFromParent();
 
 7515          .
addReg(RsrcPtr, 0, AMDGPU::sub0)
 
 7517          .
addReg(RsrcPtr, 0, AMDGPU::sub1)
 
 7521      if (!isSoffsetLegal) {
 
 7533  if (!isSoffsetLegal) {
 
 
 7545      AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
 
 7546  if (RsrcIdx != -1) {
 
 7547    DeferredList.insert(
MI);
 
 
 7552  return DeferredList.contains(
MI);
 
 
 7562  if (!ST.useRealTrue16Insts())
 
 7565  unsigned Opcode = 
MI.getOpcode();
 
 7569      OpIdx >= 
get(Opcode).getNumOperands() ||
 
 7570      get(Opcode).operands()[
OpIdx].RegClass == -1)
 
 7574  if (!
Op.isReg() || !
Op.getReg().isVirtual())
 
 7578  if (!RI.isVGPRClass(CurrRC))
 
 7581  int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
 
 7583  if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
 
 7584    Op.setSubReg(AMDGPU::lo16);
 
 7585  } 
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
 
 7587    Register NewDstReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 7588    Register Undef = 
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
 
 7595    Op.setReg(NewDstReg);
 
 
 7607  while (!Worklist.
empty()) {
 
 7621           "Deferred MachineInstr are not supposed to re-populate worklist");
 
 
 7639  case AMDGPU::S_ADD_I32:
 
 7640  case AMDGPU::S_SUB_I32: {
 
 7644    std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
 
 7652  case AMDGPU::S_MUL_U64:
 
 7653    if (ST.hasVectorMulU64()) {
 
 7654      NewOpcode = AMDGPU::V_MUL_U64_e64;
 
 7658    splitScalarSMulU64(Worklist, Inst, MDT);
 
 7662  case AMDGPU::S_MUL_U64_U32_PSEUDO:
 
 7663  case AMDGPU::S_MUL_I64_I32_PSEUDO:
 
 7666    splitScalarSMulPseudo(Worklist, Inst, MDT);
 
 7670  case AMDGPU::S_AND_B64:
 
 7671    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
 
 7675  case AMDGPU::S_OR_B64:
 
 7676    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
 
 7680  case AMDGPU::S_XOR_B64:
 
 7681    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
 
 7685  case AMDGPU::S_NAND_B64:
 
 7686    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
 
 7690  case AMDGPU::S_NOR_B64:
 
 7691    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
 
 7695  case AMDGPU::S_XNOR_B64:
 
 7696    if (ST.hasDLInsts())
 
 7697      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
 
 7699      splitScalar64BitXnor(Worklist, Inst, MDT);
 
 7703  case AMDGPU::S_ANDN2_B64:
 
 7704    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
 
 7708  case AMDGPU::S_ORN2_B64:
 
 7709    splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
 
 7713  case AMDGPU::S_BREV_B64:
 
 7714    splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, 
true);
 
 7718  case AMDGPU::S_NOT_B64:
 
 7719    splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
 
 7723  case AMDGPU::S_BCNT1_I32_B64:
 
 7724    splitScalar64BitBCNT(Worklist, Inst);
 
 7728  case AMDGPU::S_BFE_I64:
 
 7729    splitScalar64BitBFE(Worklist, Inst);
 
 7733  case AMDGPU::S_FLBIT_I32_B64:
 
 7734    splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
 
 7737  case AMDGPU::S_FF1_I32_B64:
 
 7738    splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
 
 7742  case AMDGPU::S_LSHL_B32:
 
 7743    if (ST.hasOnlyRevVALUShifts()) {
 
 7744      NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
 
 7748  case AMDGPU::S_ASHR_I32:
 
 7749    if (ST.hasOnlyRevVALUShifts()) {
 
 7750      NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
 
 7754  case AMDGPU::S_LSHR_B32:
 
 7755    if (ST.hasOnlyRevVALUShifts()) {
 
 7756      NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
 
 7760  case AMDGPU::S_LSHL_B64:
 
 7761    if (ST.hasOnlyRevVALUShifts()) {
 
 7763                      ? AMDGPU::V_LSHLREV_B64_pseudo_e64
 
 7764                      : AMDGPU::V_LSHLREV_B64_e64;
 
 7768  case AMDGPU::S_ASHR_I64:
 
 7769    if (ST.hasOnlyRevVALUShifts()) {
 
 7770      NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
 
 7774  case AMDGPU::S_LSHR_B64:
 
 7775    if (ST.hasOnlyRevVALUShifts()) {
 
 7776      NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
 
 7781  case AMDGPU::S_ABS_I32:
 
 7782    lowerScalarAbs(Worklist, Inst);
 
 7786  case AMDGPU::S_CBRANCH_SCC0:
 
 7787  case AMDGPU::S_CBRANCH_SCC1: {
 
 7790    bool IsSCC = CondReg == AMDGPU::SCC;
 
 7798  case AMDGPU::S_BFE_U64:
 
 7799  case AMDGPU::S_BFM_B64:
 
 7802  case AMDGPU::S_PACK_LL_B32_B16:
 
 7803  case AMDGPU::S_PACK_LH_B32_B16:
 
 7804  case AMDGPU::S_PACK_HL_B32_B16:
 
 7805  case AMDGPU::S_PACK_HH_B32_B16:
 
 7806    movePackToVALU(Worklist, 
MRI, Inst);
 
 7810  case AMDGPU::S_XNOR_B32:
 
 7811    lowerScalarXnor(Worklist, Inst);
 
 7815  case AMDGPU::S_NAND_B32:
 
 7816    splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
 
 7820  case AMDGPU::S_NOR_B32:
 
 7821    splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
 
 7825  case AMDGPU::S_ANDN2_B32:
 
 7826    splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
 
 7830  case AMDGPU::S_ORN2_B32:
 
 7831    splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
 
 7839  case AMDGPU::S_ADD_CO_PSEUDO:
 
 7840  case AMDGPU::S_SUB_CO_PSEUDO: {
 
 7841    unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
 
 7842                       ? AMDGPU::V_ADDC_U32_e64
 
 7843                       : AMDGPU::V_SUBB_U32_e64;
 
 7844    const auto *CarryRC = RI.getWaveMaskRegClass();
 
 7847    if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
 
 7848      Register NewCarryReg = 
MRI.createVirtualRegister(CarryRC);
 
 7855    Register DestReg = 
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
 
 7866    addUsersToMoveToVALUWorklist(DestReg, 
MRI, Worklist);
 
 7870  case AMDGPU::S_UADDO_PSEUDO:
 
 7871  case AMDGPU::S_USUBO_PSEUDO: {
 
 7878    unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
 
 7879                       ? AMDGPU::V_ADD_CO_U32_e64
 
 7880                       : AMDGPU::V_SUB_CO_U32_e64;
 
 7882        RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
 
 7883    Register DestReg = 
MRI.createVirtualRegister(NewRC);
 
 7891    MRI.replaceRegWith(Dest0.
getReg(), DestReg);
 
 7898  case AMDGPU::S_CSELECT_B32:
 
 7899  case AMDGPU::S_CSELECT_B64:
 
 7900    lowerSelect(Worklist, Inst, MDT);
 
 7903  case AMDGPU::S_CMP_EQ_I32:
 
 7904  case AMDGPU::S_CMP_LG_I32:
 
 7905  case AMDGPU::S_CMP_GT_I32:
 
 7906  case AMDGPU::S_CMP_GE_I32:
 
 7907  case AMDGPU::S_CMP_LT_I32:
 
 7908  case AMDGPU::S_CMP_LE_I32:
 
 7909  case AMDGPU::S_CMP_EQ_U32:
 
 7910  case AMDGPU::S_CMP_LG_U32:
 
 7911  case AMDGPU::S_CMP_GT_U32:
 
 7912  case AMDGPU::S_CMP_GE_U32:
 
 7913  case AMDGPU::S_CMP_LT_U32:
 
 7914  case AMDGPU::S_CMP_LE_U32:
 
 7915  case AMDGPU::S_CMP_EQ_U64:
 
 7916  case AMDGPU::S_CMP_LG_U64:
 
 7917  case AMDGPU::S_CMP_LT_F32:
 
 7918  case AMDGPU::S_CMP_EQ_F32:
 
 7919  case AMDGPU::S_CMP_LE_F32:
 
 7920  case AMDGPU::S_CMP_GT_F32:
 
 7921  case AMDGPU::S_CMP_LG_F32:
 
 7922  case AMDGPU::S_CMP_GE_F32:
 
 7923  case AMDGPU::S_CMP_O_F32:
 
 7924  case AMDGPU::S_CMP_U_F32:
 
 7925  case AMDGPU::S_CMP_NGE_F32:
 
 7926  case AMDGPU::S_CMP_NLG_F32:
 
 7927  case AMDGPU::S_CMP_NGT_F32:
 
 7928  case AMDGPU::S_CMP_NLE_F32:
 
 7929  case AMDGPU::S_CMP_NEQ_F32:
 
 7930  case AMDGPU::S_CMP_NLT_F32: {
 
 7931    Register CondReg = 
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
 
 7935    if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
 
 7949    addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
 
 7953  case AMDGPU::S_CMP_LT_F16:
 
 7954  case AMDGPU::S_CMP_EQ_F16:
 
 7955  case AMDGPU::S_CMP_LE_F16:
 
 7956  case AMDGPU::S_CMP_GT_F16:
 
 7957  case AMDGPU::S_CMP_LG_F16:
 
 7958  case AMDGPU::S_CMP_GE_F16:
 
 7959  case AMDGPU::S_CMP_O_F16:
 
 7960  case AMDGPU::S_CMP_U_F16:
 
 7961  case AMDGPU::S_CMP_NGE_F16:
 
 7962  case AMDGPU::S_CMP_NLG_F16:
 
 7963  case AMDGPU::S_CMP_NGT_F16:
 
 7964  case AMDGPU::S_CMP_NLE_F16:
 
 7965  case AMDGPU::S_CMP_NEQ_F16:
 
 7966  case AMDGPU::S_CMP_NLT_F16: {
 
 7967    Register CondReg = 
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
 
 7989    addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
 
 7993  case AMDGPU::S_CVT_HI_F32_F16: {
 
 7995    Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 7996    Register NewDst = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 7997    if (ST.useRealTrue16Insts()) {
 
 8002          .
addReg(TmpReg, 0, AMDGPU::hi16)
 
 8018    addUsersToMoveToVALUWorklist(NewDst, 
MRI, Worklist);
 
 8022  case AMDGPU::S_MINIMUM_F32:
 
 8023  case AMDGPU::S_MAXIMUM_F32: {
 
 8025    Register NewDst = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8036    addUsersToMoveToVALUWorklist(NewDst, 
MRI, Worklist);
 
 8040  case AMDGPU::S_MINIMUM_F16:
 
 8041  case AMDGPU::S_MAXIMUM_F16: {
 
 8043    Register NewDst = 
MRI.createVirtualRegister(ST.useRealTrue16Insts()
 
 8044                                                    ? &AMDGPU::VGPR_16RegClass
 
 8045                                                    : &AMDGPU::VGPR_32RegClass);
 
 8057    addUsersToMoveToVALUWorklist(NewDst, 
MRI, Worklist);
 
 8061  case AMDGPU::V_S_EXP_F16_e64:
 
 8062  case AMDGPU::V_S_LOG_F16_e64:
 
 8063  case AMDGPU::V_S_RCP_F16_e64:
 
 8064  case AMDGPU::V_S_RSQ_F16_e64:
 
 8065  case AMDGPU::V_S_SQRT_F16_e64: {
 
 8067    Register NewDst = 
MRI.createVirtualRegister(ST.useRealTrue16Insts()
 
 8068                                                    ? &AMDGPU::VGPR_16RegClass
 
 8069                                                    : &AMDGPU::VGPR_32RegClass);
 
 8081    addUsersToMoveToVALUWorklist(NewDst, 
MRI, Worklist);
 
 8087  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
 
 8095  if (NewOpcode == Opcode) {
 
 8103      Register NewDst = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 8105              get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
 
 8123      addUsersToMoveToVALUWorklist(DstReg, 
MRI, Worklist);
 
 8125      MRI.replaceRegWith(DstReg, NewDstReg);
 
 8126      MRI.clearKillFlags(NewDstReg);
 
 8140    if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
 
 8144      if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
 
 8145        Register NewDstReg = 
MRI.createVirtualRegister(NewDstRC);
 
 8146        Register Undef = 
MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
 
 8148                get(AMDGPU::IMPLICIT_DEF), Undef);
 
 8150                get(AMDGPU::REG_SEQUENCE), NewDstReg)
 
 8156        MRI.replaceRegWith(DstReg, NewDstReg);
 
 8157        addUsersToMoveToVALUWorklist(NewDstReg, 
MRI, Worklist);
 
 8159      } 
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
 
 8162        Register NewDstReg = 
MRI.createVirtualRegister(NewDstRC);
 
 8163        MRI.replaceRegWith(DstReg, NewDstReg);
 
 8164        addUsersToMoveToVALUWorklist(NewDstReg, 
MRI, Worklist);
 
 8169    Register NewDstReg = 
MRI.createVirtualRegister(NewDstRC);
 
 8170    MRI.replaceRegWith(DstReg, NewDstReg);
 
 8172    addUsersToMoveToVALUWorklist(NewDstReg, 
MRI, Worklist);
 
 8182    if (AMDGPU::getNamedOperandIdx(NewOpcode,
 
 8183                                   AMDGPU::OpName::src0_modifiers) >= 0)
 
 8187      NewInstr->addOperand(Src);
 
 8190    if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
 
 8193      unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
 
 8195      NewInstr.addImm(
Size);
 
 8196    } 
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
 
 8200    } 
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
 
 8205             "Scalar BFE is only implemented for constant width and offset");
 
 8213      if (AMDGPU::getNamedOperandIdx(NewOpcode,
 
 8214                                     AMDGPU::OpName::src1_modifiers) >= 0)
 
 8216      if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
 
 8218      if (AMDGPU::getNamedOperandIdx(NewOpcode,
 
 8219                                     AMDGPU::OpName::src2_modifiers) >= 0)
 
 8221      if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
 
 8223      if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
 
 8225      if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
 
 8227      if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
 
 8233      NewInstr->addOperand(
Op);
 
 8240    if (
Op.getReg() == AMDGPU::SCC) {
 
 8242      if (
Op.isDef() && !
Op.isDead())
 
 8243        addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
 
 8245        addSCCDefsToVALUWorklist(NewInstr, Worklist);
 
 8250  if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
 
 8251    Register DstReg = NewInstr->getOperand(0).getReg();
 
 8256    NewDstReg = 
MRI.createVirtualRegister(NewDstRC);
 
 8257    MRI.replaceRegWith(DstReg, NewDstReg);
 
 8266    addUsersToMoveToVALUWorklist(NewDstReg, 
MRI, Worklist);
 
 
 8270std::pair<bool, MachineBasicBlock *>
 
 8282    Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8285    assert(
Opc == AMDGPU::S_ADD_I32 || 
Opc == AMDGPU::S_SUB_I32);
 
 8287    unsigned NewOpc = 
Opc == AMDGPU::S_ADD_I32 ?
 
 8288      AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
 
 8296    MRI.replaceRegWith(OldDstReg, ResultReg);
 
 8299    addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 8300    return std::pair(
true, NewBB);
 
 8303  return std::pair(
false, 
nullptr);
 
 8320  bool IsSCC = (CondReg == AMDGPU::SCC);
 
 8328    MRI.replaceRegWith(Dest.
getReg(), CondReg);
 
 8334    const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
 
 8335    NewCondReg = 
MRI.createVirtualRegister(TC);
 
 8339    bool CopyFound = 
false;
 
 8340    for (MachineInstr &CandI :
 
 8343      if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI, 
false, 
false) !=
 
 8345        if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
 
 8347              .
addReg(CandI.getOperand(1).getReg());
 
 8359          ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
 
 8367      RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
 
 8368  MachineInstr *NewInst;
 
 8369  if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
 
 8370    NewInst = 
BuildMI(
MBB, MII, 
DL, 
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
 
 8383  MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
 
 8385  addUsersToMoveToVALUWorklist(NewDestReg, 
MRI, Worklist);
 
 8397  Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8398  Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8400  unsigned SubOp = ST.hasAddNoCarry() ?
 
 8401    AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
 
 8411  MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 8412  addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 8426  if (ST.hasDLInsts()) {
 
 8427    Register NewDest = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8435    MRI.replaceRegWith(Dest.
getReg(), NewDest);
 
 8436    addUsersToMoveToVALUWorklist(NewDest, 
MRI, Worklist);
 
 8442    bool Src0IsSGPR = Src0.
isReg() &&
 
 8443                      RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
 
 8444    bool Src1IsSGPR = Src1.
isReg() &&
 
 8445                      RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
 
 8447    Register Temp = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 8448    Register NewDest = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 8458    } 
else if (Src1IsSGPR) {
 
 8472    MRI.replaceRegWith(Dest.
getReg(), NewDest);
 
 8476    addUsersToMoveToVALUWorklist(NewDest, 
MRI, Worklist);
 
 8482                                      unsigned Opcode)
 const {
 
 8492  Register NewDest = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 8493  Register Interm = 
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
 8505  MRI.replaceRegWith(Dest.
getReg(), NewDest);
 
 8506  addUsersToMoveToVALUWorklist(NewDest, 
MRI, Worklist);
 
 8511                                     unsigned Opcode)
 const {
 
 8521  Register NewDest = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 8522  Register Interm = 
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
 
 8534  MRI.replaceRegWith(Dest.
getReg(), NewDest);
 
 8535  addUsersToMoveToVALUWorklist(NewDest, 
MRI, Worklist);
 
 8550  const MCInstrDesc &InstDesc = 
get(Opcode);
 
 8551  const TargetRegisterClass *Src0RC = Src0.
isReg() ?
 
 8553    &AMDGPU::SGPR_32RegClass;
 
 8555  const TargetRegisterClass *Src0SubRC =
 
 8556      RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
 
 8559                                                       AMDGPU::sub0, Src0SubRC);
 
 8561  const TargetRegisterClass *DestRC = 
MRI.getRegClass(Dest.
getReg());
 
 8562  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
 
 8563  const TargetRegisterClass *NewDestSubRC =
 
 8564      RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
 
 8566  Register DestSub0 = 
MRI.createVirtualRegister(NewDestSubRC);
 
 8567  MachineInstr &LoHalf = *
BuildMI(
MBB, MII, 
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
 
 8570                                                       AMDGPU::sub1, Src0SubRC);
 
 8572  Register DestSub1 = 
MRI.createVirtualRegister(NewDestSubRC);
 
 8573  MachineInstr &HiHalf = *
BuildMI(
MBB, MII, 
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
 
 8578  Register FullDestReg = 
MRI.createVirtualRegister(NewDestRC);
 
 8585  MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
 
 8587  Worklist.
insert(&LoHalf);
 
 8588  Worklist.
insert(&HiHalf);
 
 8594  addUsersToMoveToVALUWorklist(FullDestReg, 
MRI, Worklist);
 
 8605  Register FullDestReg = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 8606  Register DestSub0 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8607  Register DestSub1 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8615  const TargetRegisterClass *Src0RC = 
MRI.getRegClass(Src0.
getReg());
 
 8616  const TargetRegisterClass *Src1RC = 
MRI.getRegClass(Src1.
getReg());
 
 8617  const TargetRegisterClass *Src0SubRC =
 
 8618      RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
 
 8619  if (RI.isSGPRClass(Src0SubRC))
 
 8620    Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
 
 8621  const TargetRegisterClass *Src1SubRC =
 
 8622      RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
 
 8623  if (RI.isSGPRClass(Src1SubRC))
 
 8624    Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
 
 8628  MachineOperand Op0L =
 
 8630  MachineOperand Op1L =
 
 8632  MachineOperand Op0H =
 
 8634  MachineOperand Op1H =
 
 8652  Register Op1L_Op0H_Reg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8653  MachineInstr *Op1L_Op0H =
 
 8658  Register Op1H_Op0L_Reg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8659  MachineInstr *Op1H_Op0L =
 
 8664  Register CarryReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8665  MachineInstr *Carry =
 
 8670  MachineInstr *LoHalf =
 
 8675  Register AddReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8680  MachineInstr *HiHalf =
 
 8691  MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
 
 8703  addUsersToMoveToVALUWorklist(FullDestReg, 
MRI, Worklist);
 
 8714  Register FullDestReg = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 8715  Register DestSub0 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8716  Register DestSub1 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8724  const TargetRegisterClass *Src0RC = 
MRI.getRegClass(Src0.
getReg());
 
 8725  const TargetRegisterClass *Src1RC = 
MRI.getRegClass(Src1.
getReg());
 
 8726  const TargetRegisterClass *Src0SubRC =
 
 8727      RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
 
 8728  if (RI.isSGPRClass(Src0SubRC))
 
 8729    Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
 
 8730  const TargetRegisterClass *Src1SubRC =
 
 8731      RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
 
 8732  if (RI.isSGPRClass(Src1SubRC))
 
 8733    Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
 
 8737  MachineOperand Op0L =
 
 8739  MachineOperand Op1L =
 
 8743  unsigned NewOpc = 
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
 
 8744                        ? AMDGPU::V_MUL_HI_U32_e64
 
 8745                        : AMDGPU::V_MUL_HI_I32_e64;
 
 8746  MachineInstr *HiHalf =
 
 8749  MachineInstr *LoHalf =
 
 8760  MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
 
 8768  addUsersToMoveToVALUWorklist(FullDestReg, 
MRI, Worklist);
 
 8784  const MCInstrDesc &InstDesc = 
get(Opcode);
 
 8785  const TargetRegisterClass *Src0RC = Src0.
isReg() ?
 
 8787    &AMDGPU::SGPR_32RegClass;
 
 8789  const TargetRegisterClass *Src0SubRC =
 
 8790      RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
 
 8791  const TargetRegisterClass *Src1RC = Src1.
isReg() ?
 
 8793    &AMDGPU::SGPR_32RegClass;
 
 8795  const TargetRegisterClass *Src1SubRC =
 
 8796      RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
 
 8799                                                       AMDGPU::sub0, Src0SubRC);
 
 8801                                                       AMDGPU::sub0, Src1SubRC);
 
 8803                                                       AMDGPU::sub1, Src0SubRC);
 
 8805                                                       AMDGPU::sub1, Src1SubRC);
 
 8807  const TargetRegisterClass *DestRC = 
MRI.getRegClass(Dest.
getReg());
 
 8808  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
 
 8809  const TargetRegisterClass *NewDestSubRC =
 
 8810      RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
 
 8812  Register DestSub0 = 
MRI.createVirtualRegister(NewDestSubRC);
 
 8813  MachineInstr &LoHalf = *
BuildMI(
MBB, MII, 
DL, InstDesc, DestSub0)
 
 8817  Register DestSub1 = 
MRI.createVirtualRegister(NewDestSubRC);
 
 8818  MachineInstr &HiHalf = *
BuildMI(
MBB, MII, 
DL, InstDesc, DestSub1)
 
 8822  Register FullDestReg = 
MRI.createVirtualRegister(NewDestRC);
 
 8829  MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
 
 8831  Worklist.
insert(&LoHalf);
 
 8832  Worklist.
insert(&HiHalf);
 
 8835  addUsersToMoveToVALUWorklist(FullDestReg, 
MRI, Worklist);
 
 8851  const TargetRegisterClass *DestRC = 
MRI.getRegClass(Dest.
getReg());
 
 8853  Register Interm = 
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
 8855  MachineOperand* Op0;
 
 8856  MachineOperand* Op1;
 
 8869  Register NewDest = 
MRI.createVirtualRegister(DestRC);
 
 8875  MRI.replaceRegWith(Dest.
getReg(), NewDest);
 
 8891  const MCInstrDesc &InstDesc = 
get(AMDGPU::V_BCNT_U32_B32_e64);
 
 8892  const TargetRegisterClass *SrcRC = Src.isReg() ?
 
 8893    MRI.getRegClass(Src.getReg()) :
 
 8894    &AMDGPU::SGPR_32RegClass;
 
 8896  Register MidReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8897  Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8899  const TargetRegisterClass *SrcSubRC =
 
 8900      RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
 
 8903                                                      AMDGPU::sub0, SrcSubRC);
 
 8905                                                      AMDGPU::sub1, SrcSubRC);
 
 8911  MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 8915  addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 8934         Offset == 0 && 
"Not implemented");
 
 8937    Register MidRegLo = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8938    Register MidRegHi = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8939    Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 8956    MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 8957    addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 8962  Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 8963  Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
 
 8967    .
addReg(Src.getReg(), 0, AMDGPU::sub0);
 
 8970    .
addReg(Src.getReg(), 0, AMDGPU::sub0)
 
 8975  MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 8976  addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 8995  const MCInstrDesc &InstDesc = 
get(Opcode);
 
 8997  bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
 
 8998  unsigned OpcodeAdd =
 
 8999      ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
 
 9001  const TargetRegisterClass *SrcRC =
 
 9002      Src.isReg() ? 
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
 
 9003  const TargetRegisterClass *SrcSubRC =
 
 9004      RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
 
 9006  MachineOperand SrcRegSub0 =
 
 9008  MachineOperand SrcRegSub1 =
 
 9011  Register MidReg1 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9012  Register MidReg2 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9013  Register MidReg3 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9014  Register MidReg4 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9021      .
addReg(IsCtlz ? MidReg1 : MidReg2)
 
 9027      .
addReg(IsCtlz ? MidReg2 : MidReg1);
 
 9029  MRI.replaceRegWith(Dest.
getReg(), MidReg4);
 
 9031  addUsersToMoveToVALUWorklist(MidReg4, 
MRI, Worklist);
 
 9034void SIInstrInfo::addUsersToMoveToVALUWorklist(
 
 9038    MachineInstr &
UseMI = *MO.getParent();
 
 9042    switch (
UseMI.getOpcode()) {
 
 9045    case AMDGPU::SOFT_WQM:
 
 9046    case AMDGPU::STRICT_WWM:
 
 9047    case AMDGPU::STRICT_WQM:
 
 9048    case AMDGPU::REG_SEQUENCE:
 
 9050    case AMDGPU::INSERT_SUBREG:
 
 9053      OpNo = MO.getOperandNo();
 
 9058    MRI.constrainRegClass(DstReg, OpRC);
 
 9060    if (!RI.hasVectorRegisters(OpRC))
 
 9071  Register ResultReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9077  if (ST.useRealTrue16Insts()) {
 
 9080      SrcReg0 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9087      SrcReg1 = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9093    bool isSrc0Reg16 = 
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
 
 9094    bool isSrc1Reg16 = 
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
 
 9096    auto NewMI = 
BuildMI(*
MBB, Inst, 
DL, 
get(AMDGPU::REG_SEQUENCE), ResultReg);
 
 9098    case AMDGPU::S_PACK_LL_B32_B16:
 
 9101                  isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
 
 9102          .addImm(AMDGPU::lo16)
 
 9104                  isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
 
 9105          .addImm(AMDGPU::hi16);
 
 9107    case AMDGPU::S_PACK_LH_B32_B16:
 
 9110                  isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
 
 9111          .addImm(AMDGPU::lo16)
 
 9112          .addReg(SrcReg1, 0, AMDGPU::hi16)
 
 9113          .addImm(AMDGPU::hi16);
 
 9115    case AMDGPU::S_PACK_HL_B32_B16:
 
 9116      NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
 
 9117          .addImm(AMDGPU::lo16)
 
 9119                  isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
 
 9120          .addImm(AMDGPU::hi16);
 
 9122    case AMDGPU::S_PACK_HH_B32_B16:
 
 9123      NewMI.addReg(SrcReg0, 0, AMDGPU::hi16)
 
 9124          .addImm(AMDGPU::lo16)
 
 9125          .addReg(SrcReg1, 0, AMDGPU::hi16)
 
 9126          .addImm(AMDGPU::hi16);
 
 9133    MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 9134    addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 9139  case AMDGPU::S_PACK_LL_B32_B16: {
 
 9140    Register ImmReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9141    Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9158  case AMDGPU::S_PACK_LH_B32_B16: {
 
 9159    Register ImmReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9168  case AMDGPU::S_PACK_HL_B32_B16: {
 
 9169    Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9179  case AMDGPU::S_PACK_HH_B32_B16: {
 
 9180    Register ImmReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9181    Register TmpReg = 
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
 9198  MRI.replaceRegWith(Dest.
getReg(), ResultReg);
 
 9199  addUsersToMoveToVALUWorklist(ResultReg, 
MRI, Worklist);
 
 9208  assert(
Op.isReg() && 
Op.getReg() == AMDGPU::SCC && 
Op.isDef() &&
 
 9209         !
Op.isDead() && 
Op.getParent() == &SCCDefInst);
 
 9210  SmallVector<MachineInstr *, 4> CopyToDelete;
 
 9213  for (MachineInstr &
MI : 
 
 9217    int SCCIdx = 
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI, 
false);
 
 9220        MachineRegisterInfo &
MRI = 
MI.getParent()->getParent()->getRegInfo();
 
 9221        Register DestReg = 
MI.getOperand(0).getReg();
 
 9223        MRI.replaceRegWith(DestReg, NewCond);
 
 9228          MI.getOperand(SCCIdx).setReg(NewCond);
 
 9234    if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI, 
false, 
false) != -1)
 
 9237  for (
auto &Copy : CopyToDelete)
 
 9238    Copy->eraseFromParent();
 
 9246void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
 
 9252  for (MachineInstr &
MI :
 
 9255    if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
 
 9257    if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
 
 9266  const TargetRegisterClass *NewDstRC = 
getOpRegClass(Inst, 0);
 
 9274  case AMDGPU::REG_SEQUENCE:
 
 9275  case AMDGPU::INSERT_SUBREG:
 
 9277  case AMDGPU::SOFT_WQM:
 
 9278  case AMDGPU::STRICT_WWM:
 
 9279  case AMDGPU::STRICT_WQM: {
 
 9281    if (RI.isAGPRClass(SrcRC)) {
 
 9282      if (RI.isAGPRClass(NewDstRC))
 
 9287      case AMDGPU::REG_SEQUENCE:
 
 9288      case AMDGPU::INSERT_SUBREG:
 
 9289        NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
 
 9292        NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
 
 9298      if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
 
 9301      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
 
 9315                                   int OpIndices[3])
 const {
 
 9316  const MCInstrDesc &
Desc = 
MI.getDesc();
 
 9332  const MachineRegisterInfo &
MRI = 
MI.getParent()->getParent()->getRegInfo();
 
 9334  for (
unsigned i = 0; i < 3; ++i) {
 
 9335    int Idx = OpIndices[i];
 
 9339    const MachineOperand &MO = 
MI.getOperand(Idx);
 
 9345    const TargetRegisterClass *OpRC =
 
 9346        RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
 
 9347    bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
 
 9353    const TargetRegisterClass *RegRC = 
MRI.getRegClass(
Reg);
 
 9354    if (RI.isSGPRClass(RegRC))
 
 9372    if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
 
 9373      SGPRReg = UsedSGPRs[0];
 
 9376  if (!SGPRReg && UsedSGPRs[1]) {
 
 9377    if (UsedSGPRs[1] == UsedSGPRs[2])
 
 9378      SGPRReg = UsedSGPRs[1];
 
 9385                                             AMDGPU::OpName OperandName)
 const {
 
 9386  if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
 
 9389  int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
 
 9393  return &
MI.getOperand(Idx);
 
 
 9407  if (ST.isAmdHsaOS()) {
 
 9410      RsrcDataFormat |= (1ULL << 56);
 
 9415      RsrcDataFormat |= (2ULL << 59);
 
 9418  return RsrcDataFormat;
 
 
 9428    uint64_t EltSizeValue = 
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
 
 9433  uint64_t IndexStride = ST.isWave64() ? 3 : 2;
 
 9440    Rsrc23 &= 
~AMDGPU::RSRC_DATA_FORMAT;
 
 
 9446  unsigned Opc = 
MI.getOpcode();
 
 
 9452  return get(
Opc).mayLoad() &&
 
 
 9457                                    int &FrameIndex)
 const {
 
 9459  if (!Addr || !Addr->
isFI())
 
 
 9470                                        int &FrameIndex)
 const {
 
 
 9478                                          int &FrameIndex)
 const {
 
 
 9492                                         int &FrameIndex)
 const {
 
 
 9509  while (++
I != E && 
I->isInsideBundle()) {
 
 9510    assert(!
I->isBundle() && 
"No nested bundle!");
 
 
 9518  unsigned Opc = 
MI.getOpcode();
 
 9520  unsigned DescSize = 
Desc.getSize();
 
 9525    unsigned Size = DescSize;
 
 9529    if (
MI.isBranch() && ST.hasOffset3fBug())
 
 9540    bool HasLiteral = 
false;
 
 9541    unsigned LiteralSize = 4;
 
 9542    for (
int I = 0, E = 
MI.getNumExplicitOperands(); 
I != E; ++
I) {
 
 9547        if (ST.has64BitLiterals()) {
 
 9548          switch (OpInfo.OperandType) {
 
 9564    return HasLiteral ? DescSize + LiteralSize : DescSize;
 
 9569    int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
 
 9573    int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
 
 9574    return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
 
 9578  case TargetOpcode::BUNDLE:
 
 9580  case TargetOpcode::INLINEASM:
 
 9581  case TargetOpcode::INLINEASM_BR: {
 
 9583    const char *AsmStr = 
MI.getOperand(0).getSymbolName();
 
 9587    if (
MI.isMetaInstruction())
 
 9591    const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
 
 9594      unsigned LoInstOpcode = D16Info->LoOp;
 
 9596      DescSize = 
Desc.getSize();
 
 9600    if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 || 
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
 
 9603      DescSize = 
Desc.getSize();
 
 
 9614  if (
MI.memoperands_empty())
 
 
 9626  static const std::pair<int, const char *> TargetIndices[] = {
 
 
 9664std::pair<unsigned, unsigned>
 
 9671  static const std::pair<unsigned, const char *> TargetFlags[] = {
 
 
 9689  static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
 
 
 9704    return AMDGPU::WWM_COPY;
 
 9706  return AMDGPU::COPY;
 
 
 9718  bool IsNullOrVectorRegister = 
true;
 
 9721    IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
 
 9726  return IsNullOrVectorRegister &&
 
 9728          (Opcode == AMDGPU::IMPLICIT_DEF &&
 
 9730          (!
MI.isTerminator() && Opcode != AMDGPU::COPY &&
 
 9731           MI.modifiesRegister(AMDGPU::EXEC, &RI)));
 
 
 9739  if (ST.hasAddNoCarry())
 
 9743  Register UnusedCarry = 
MRI.createVirtualRegister(RI.getBoolRC());
 
 9744  MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
 
 
 9755  if (ST.hasAddNoCarry())
 
 9759  Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
 
 9761                             : RS.scavengeRegisterBackwards(
 
 9762                                   *RI.getBoolRC(), 
I,  
false,
 
 
 9775  case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
 
 9776  case AMDGPU::SI_KILL_I1_TERMINATOR:
 
 
 9785  case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
 
 9786    return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
 
 9787  case AMDGPU::SI_KILL_I1_PSEUDO:
 
 9788    return get(AMDGPU::SI_KILL_I1_TERMINATOR);
 
 
 9800  const unsigned OffsetBits =
 
 9802  return (1 << OffsetBits) - 1;
 
 
 9809  if (
MI.isInlineAsm())
 
 9812  for (
auto &
Op : 
MI.implicit_operands()) {
 
 9813    if (
Op.isReg() && 
Op.getReg() == AMDGPU::VCC)
 
 9814      Op.setReg(AMDGPU::VCC_LO);
 
 
 9823  int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
 
 9827  const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
 
 9828  return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
 
 
 9845    if (Imm <= MaxImm + 64) {
 
 9847      Overflow = Imm - MaxImm;
 
 9874    if (ST.hasRestrictedSOffset())
 
 
 9917  if (!ST.hasFlatInstOffsets())
 
 9925  if (ST.hasNegativeUnalignedScratchOffsetBug() &&
 
 
 9937std::pair<int64_t, int64_t>
 
 9940  int64_t RemainderOffset = COffsetVal;
 
 9941  int64_t ImmField = 0;
 
 9946  if (AllowNegative) {
 
 9948    int64_t 
D = 1LL << NumBits;
 
 9949    RemainderOffset = (COffsetVal / 
D) * 
D;
 
 9950    ImmField = COffsetVal - RemainderOffset;
 
 9952    if (ST.hasNegativeUnalignedScratchOffsetBug() &&
 
 9954        (ImmField % 4) != 0) {
 
 9956      RemainderOffset += ImmField % 4;
 
 9957      ImmField -= ImmField % 4;
 
 9959  } 
else if (COffsetVal >= 0) {
 
 9961    RemainderOffset = COffsetVal - ImmField;
 
 9965  assert(RemainderOffset + ImmField == COffsetVal);
 
 9966  return {ImmField, RemainderOffset};
 
 
 9970  if (ST.hasNegativeScratchOffsetBug() &&
 
 
 9978  switch (ST.getGeneration()) {
 
 
10004  case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
 
10005  case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
 
10006  case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
 
10007  case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
 
10008  case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
 
10009  case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
 
10010  case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
 
10011  case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
 
 
10018#define GENERATE_RENAMED_GFX9_CASES(OPCODE)                                    \ 
10019  case OPCODE##_dpp:                                                           \ 
10020  case OPCODE##_e32:                                                           \ 
10021  case OPCODE##_e64:                                                           \ 
10022  case OPCODE##_e64_dpp:                                                       \ 
10023  case OPCODE##_sdwa: 
 
10037  case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
 
10038  case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
 
10039  case AMDGPU::V_FMA_F16_gfx9_e64:
 
10040  case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
 
10041  case AMDGPU::V_INTERP_P2_F16:
 
10042  case AMDGPU::V_MAD_F16_e64:
 
10043  case AMDGPU::V_MAD_U16_e64:
 
10044  case AMDGPU::V_MAD_I16_e64:
 
 
10066    switch (ST.getGeneration()) {
 
10079  if (
isMAI(Opcode)) {
 
10087  if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
 
10094  if (ST.hasGFX90AInsts()) {
 
10096    if (ST.hasGFX940Insts())
 
 
10127  for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2; 
I < E; ++
I)
 
10128    if (
MI.getOperand(1 + 2 * 
I + 1).getImm() == 
SubReg) {
 
10129      auto &RegOp = 
MI.getOperand(1 + 2 * 
I);
 
 
10141  switch (
MI.getOpcode()) {
 
10143  case AMDGPU::REG_SEQUENCE:
 
10147  case AMDGPU::INSERT_SUBREG:
 
10148    if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
 
 
10165  if (!
P.Reg.isVirtual())
 
10169  auto *DefInst = 
MRI.getVRegDef(RSR.Reg);
 
10170  while (
auto *
MI = DefInst) {
 
10172    switch (
MI->getOpcode()) {
 
10174    case AMDGPU::V_MOV_B32_e32: {
 
10175      auto &Op1 = 
MI->getOperand(1);
 
10180        DefInst = 
MRI.getVRegDef(RSR.Reg);
 
10188        DefInst = 
MRI.getVRegDef(RSR.Reg);
 
 
10201  assert(
MRI.isSSA() && 
"Must be run on SSA");
 
10203  auto *
TRI = 
MRI.getTargetRegisterInfo();
 
10204  auto *DefBB = 
DefMI.getParent();
 
10208  if (
UseMI.getParent() != DefBB)
 
10211  const int MaxInstScan = 20;
 
10215  auto E = 
UseMI.getIterator();
 
10216  for (
auto I = std::next(
DefMI.getIterator()); 
I != E; ++
I) {
 
10217    if (
I->isDebugInstr())
 
10220    if (++NumInst > MaxInstScan)
 
10223    if (
I->modifiesRegister(AMDGPU::EXEC, 
TRI))
 
 
10233  assert(
MRI.isSSA() && 
"Must be run on SSA");
 
10235  auto *
TRI = 
MRI.getTargetRegisterInfo();
 
10236  auto *DefBB = 
DefMI.getParent();
 
10238  const int MaxUseScan = 10;
 
10241  for (
auto &
Use : 
MRI.use_nodbg_operands(VReg)) {
 
10242    auto &UseInst = *
Use.getParent();
 
10245    if (UseInst.getParent() != DefBB || UseInst.isPHI())
 
10248    if (++NumUse > MaxUseScan)
 
10255  const int MaxInstScan = 20;
 
10259  for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
 
10262    if (
I->isDebugInstr())
 
10265    if (++NumInst > MaxInstScan)
 
10278        if (Reg == VReg && --NumUse == 0)
 
10280      } 
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
 
 
10289  auto Cur = 
MBB.begin();
 
10290  if (Cur != 
MBB.end())
 
10292      if (!Cur->isPHI() && Cur->readsRegister(Dst, 
nullptr))
 
10295    } 
while (Cur != 
MBB.end() && Cur != LastPHIIt);
 
 
10304  if (InsPt != 
MBB.end() &&
 
10305      (InsPt->getOpcode() == AMDGPU::SI_IF ||
 
10306       InsPt->getOpcode() == AMDGPU::SI_ELSE ||
 
10307       InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
 
10308      InsPt->definesRegister(Src, 
nullptr)) {
 
10312        .
addReg(Src, 0, SrcSubReg)
 
 
10337  if (isFullCopyInstr(
MI)) {
 
10338    Register DstReg = 
MI.getOperand(0).getReg();
 
10339    Register SrcReg = 
MI.getOperand(1).getReg();
 
10346        MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
 
10350        MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
 
 
10361                                      unsigned *PredCost)
 const {
 
10362  if (
MI.isBundle()) {
 
10365    unsigned Lat = 0, 
Count = 0;
 
10366    for (++
I; 
I != E && 
I->isBundledWithPred(); ++
I) {
 
10368      Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
 
10370    return Lat + 
Count - 1;
 
10373  return SchedModel.computeInstrLatency(&
MI);
 
 
10379  unsigned Opcode = 
MI.getOpcode();
 
10384                                       : 
MI.getOperand(1).getReg();
 
10385    LLT DstTy = 
MRI.getType(Dst);
 
10386    LLT SrcTy = 
MRI.getType(Src);
 
10388    unsigned SrcAS = SrcTy.getAddressSpace();
 
10391                   ST.hasGloballyAddressableScratch()
 
10399  if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
 
10400    return HandleAddrSpaceCast(
MI);
 
10403    auto IID = GI->getIntrinsicID();
 
10410    case Intrinsic::amdgcn_addrspacecast_nonnull:
 
10411      return HandleAddrSpaceCast(
MI);
 
10412    case Intrinsic::amdgcn_if:
 
10413    case Intrinsic::amdgcn_else:
 
10427  if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
 
10428      Opcode == AMDGPU::G_SEXTLOAD) {
 
10429    if (
MI.memoperands_empty())
 
10433          return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
 
10434                 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
 
10442  if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
 
10443      Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
 
10444      Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
 
 
10457  unsigned opcode = 
MI.getOpcode();
 
10458  if (opcode == AMDGPU::V_READLANE_B32 ||
 
10459      opcode == AMDGPU::V_READFIRSTLANE_B32 ||
 
10460      opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
 
10463  if (isCopyInstr(
MI)) {
 
10467          RI.getPhysRegBaseClass(srcOp.
getReg());
 
10475  if (
MI.isPreISelOpcode())
 
10490    if (
MI.memoperands_empty())
 
10494          return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
 
10495                 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
 
10510  for (
unsigned I = 0, E = 
MI.getNumOperands(); 
I != E; ++
I) {
 
10512    if (!
SrcOp.isReg())
 
10516    if (!Reg || !
SrcOp.readsReg())
 
10522    if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
 
 
10549        F, 
"ds_ordered_count unsupported for this calling conv"));
 
 
10563                                 Register &SrcReg2, int64_t &CmpMask,
 
10564                                 int64_t &CmpValue)
 const {
 
10565  if (!
MI.getOperand(0).isReg() || 
MI.getOperand(0).getSubReg())
 
10568  switch (
MI.getOpcode()) {
 
10571  case AMDGPU::S_CMP_EQ_U32:
 
10572  case AMDGPU::S_CMP_EQ_I32:
 
10573  case AMDGPU::S_CMP_LG_U32:
 
10574  case AMDGPU::S_CMP_LG_I32:
 
10575  case AMDGPU::S_CMP_LT_U32:
 
10576  case AMDGPU::S_CMP_LT_I32:
 
10577  case AMDGPU::S_CMP_GT_U32:
 
10578  case AMDGPU::S_CMP_GT_I32:
 
10579  case AMDGPU::S_CMP_LE_U32:
 
10580  case AMDGPU::S_CMP_LE_I32:
 
10581  case AMDGPU::S_CMP_GE_U32:
 
10582  case AMDGPU::S_CMP_GE_I32:
 
10583  case AMDGPU::S_CMP_EQ_U64:
 
10584  case AMDGPU::S_CMP_LG_U64:
 
10585    SrcReg = 
MI.getOperand(0).getReg();
 
10586    if (
MI.getOperand(1).isReg()) {
 
10587      if (
MI.getOperand(1).getSubReg())
 
10589      SrcReg2 = 
MI.getOperand(1).getReg();
 
10591    } 
else if (
MI.getOperand(1).isImm()) {
 
10593      CmpValue = 
MI.getOperand(1).getImm();
 
10599  case AMDGPU::S_CMPK_EQ_U32:
 
10600  case AMDGPU::S_CMPK_EQ_I32:
 
10601  case AMDGPU::S_CMPK_LG_U32:
 
10602  case AMDGPU::S_CMPK_LG_I32:
 
10603  case AMDGPU::S_CMPK_LT_U32:
 
10604  case AMDGPU::S_CMPK_LT_I32:
 
10605  case AMDGPU::S_CMPK_GT_U32:
 
10606  case AMDGPU::S_CMPK_GT_I32:
 
10607  case AMDGPU::S_CMPK_LE_U32:
 
10608  case AMDGPU::S_CMPK_LE_I32:
 
10609  case AMDGPU::S_CMPK_GE_U32:
 
10610  case AMDGPU::S_CMPK_GE_I32:
 
10611    SrcReg = 
MI.getOperand(0).getReg();
 
10613    CmpValue = 
MI.getOperand(1).getImm();
 
 
10630    if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
 
10632    if (
MI.killsRegister(AMDGPU::SCC, &RI))
 
10637    SccDef->setIsDead(
false);
 
 
10645  if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
 
10646      Def.getOpcode() != AMDGPU::S_CSELECT_B64)
 
10648  bool Op1IsNonZeroImm =
 
10649      Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
 
10650  bool Op2IsZeroImm =
 
10651      Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
 
10652  if (!Op1IsNonZeroImm || !Op2IsZeroImm)
 
 
10658                                       Register SrcReg2, int64_t CmpMask,
 
10667  const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, 
MRI,
 
10673    if (!Def || Def->getParent() != CmpInstr.
getParent())
 
10695  const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, 
MRI,
 
10696                               this](int64_t ExpectedValue, 
unsigned SrcSize,
 
10697                                     bool IsReversible, 
bool IsSigned) -> 
bool {
 
10722    if (!Def || Def->getParent() != CmpInstr.
getParent())
 
10725    if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
 
10726        Def->getOpcode() != AMDGPU::S_AND_B64)
 
10730    const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) -> 
bool {
 
10741      SrcOp = &Def->getOperand(2);
 
10742    else if (isMask(&Def->getOperand(2)))
 
10743      SrcOp = &Def->getOperand(1);
 
10751    if (IsSigned && BitNo == SrcSize - 1)
 
10754    ExpectedValue <<= BitNo;
 
10756    bool IsReversedCC = 
false;
 
10757    if (CmpValue != ExpectedValue) {
 
10760      IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
 
10765    Register DefReg = Def->getOperand(0).getReg();
 
10766    if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
 
10772    if (!
MRI->use_nodbg_empty(DefReg)) {
 
10780    unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
 
10781                                                     : AMDGPU::S_BITCMP1_B32
 
10782                                      : IsReversedCC ? AMDGPU::S_BITCMP0_B64
 
10783                                                     : AMDGPU::S_BITCMP1_B64;
 
10788    Def->eraseFromParent();
 
10796  case AMDGPU::S_CMP_EQ_U32:
 
10797  case AMDGPU::S_CMP_EQ_I32:
 
10798  case AMDGPU::S_CMPK_EQ_U32:
 
10799  case AMDGPU::S_CMPK_EQ_I32:
 
10800    return optimizeCmpAnd(1, 32, 
true, 
false);
 
10801  case AMDGPU::S_CMP_GE_U32:
 
10802  case AMDGPU::S_CMPK_GE_U32:
 
10803    return optimizeCmpAnd(1, 32, 
false, 
false);
 
10804  case AMDGPU::S_CMP_GE_I32:
 
10805  case AMDGPU::S_CMPK_GE_I32:
 
10806    return optimizeCmpAnd(1, 32, 
false, 
true);
 
10807  case AMDGPU::S_CMP_EQ_U64:
 
10808    return optimizeCmpAnd(1, 64, 
true, 
false);
 
10809  case AMDGPU::S_CMP_LG_U32:
 
10810  case AMDGPU::S_CMP_LG_I32:
 
10811  case AMDGPU::S_CMPK_LG_U32:
 
10812  case AMDGPU::S_CMPK_LG_I32:
 
10813    return optimizeCmpAnd(0, 32, 
true, 
false) || optimizeCmpSelect();
 
10814  case AMDGPU::S_CMP_GT_U32:
 
10815  case AMDGPU::S_CMPK_GT_U32:
 
10816    return optimizeCmpAnd(0, 32, 
false, 
false);
 
10817  case AMDGPU::S_CMP_GT_I32:
 
10818  case AMDGPU::S_CMPK_GT_I32:
 
10819    return optimizeCmpAnd(0, 32, 
false, 
true);
 
10820  case AMDGPU::S_CMP_LG_U64:
 
10821    return optimizeCmpAnd(0, 64, 
true, 
false) || optimizeCmpSelect();
 
 
10828                                            AMDGPU::OpName 
OpName)
 const {
 
10829  if (!ST.needsAlignedVGPRs())
 
10832  int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), 
OpName);
 
10844  bool IsAGPR = RI.isAGPR(
MRI, DataReg);
 
10846      IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
 
10849      MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
 
10850                                       : &AMDGPU::VReg_64_Align2RegClass);
 
10852      .
addReg(DataReg, 0, 
Op.getSubReg())
 
10857  Op.setSubReg(AMDGPU::sub0);
 
 
10879  unsigned Opcode = 
MI.getOpcode();
 
10885      Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
 
10886      Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
 
10889  if (!ST.hasGFX940Insts())
 
 
unsigned const MachineRegisterInfo * MRI
 
MachineInstrBuilder & UseMI
 
MachineInstrBuilder MachineInstrBuilder & DefMI
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
 
AMDGPU Register Bank Select
 
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
 
MachineBasicBlock MachineBasicBlock::iterator MBBI
 
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
 
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
 
AMD GCN specific subclass of TargetSubtarget.
 
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
 
const HexagonInstrInfo * TII
 
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
 
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
 
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
 
static bool isUndef(const MachineInstr &MI)
 
TargetInstrInfo::RegSubRegPair RegSubRegPair
 
Register const TargetRegisterInfo * TRI
 
Promote Memory to Register
 
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
 
MachineInstr unsigned OpIdx
 
uint64_t IntrinsicInst * II
 
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
 
const SmallVectorImpl< MachineOperand > & Cond
 
This file declares the machine register scavenger class.
 
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
 
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
 
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
 
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
 
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
 
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
 
static bool isStride64(unsigned Opc)
 
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
 
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
 
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
 
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
 
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
 
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
 
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
 
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
 
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
 
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
 
static bool optimizeSCC(MachineInstr *SCCValid, MachineInstr *SCCRedefine, const SIRegisterInfo &RI)
 
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
 
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
 
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
 
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
 
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
 
static Register findImplicitSGPRRead(const MachineInstr &MI)
 
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
 
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
 
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
 
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
 
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
 
static bool isRegOrFI(const MachineOperand &MO)
 
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
 
static constexpr AMDGPU::OpName ModifierOpNames[]
 
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
 
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
 
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
 
static bool shouldReadExec(const MachineInstr &MI)
 
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
 
static bool isRenamedInGFX9(int Opcode)
 
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
 
static bool changesVGPRIndexingMode(const MachineInstr &MI)
 
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
 
static bool foldableSelect(const MachineInstr &Def)
 
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
 
static unsigned getAVSpillSaveOpcode(unsigned Size)
 
static unsigned getNumOperandsNoGlue(SDNode *Node)
 
static bool canRemat(const MachineInstr &MI)
 
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
 
static unsigned getAVSpillRestoreOpcode(unsigned Size)
 
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
 
Interface definition for SIInstrInfo.
 
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
 
const unsigned CSelectOpc
 
static const LaneMaskConstants & get(const GCNSubtarget &ST)
 
const unsigned XorTermOpc
 
const unsigned OrSaveExecOpc
 
const unsigned AndSaveExecOpc
 
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
 
Class for arbitrary precision integers.
 
int64_t getSExtValue() const
Get sign extended value.
 
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
 
const T & front() const
front - Get the first element.
 
size_t size() const
size - Get the array size.
 
bool empty() const
empty - Check if the array is empty.
 
uint64_t getZExtValue() const
 
Diagnostic information for unsupported feature in backend.
 
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
 
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
 
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
 
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
 
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
 
bool hasAddNoCarry() const
 
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
 
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
 
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
 
const GenericCycle * getParentCycle() const
 
Itinerary data supplied by a subtarget to be used by a target.
 
constexpr unsigned getAddressSpace() const
 
This is an important class for using LLVM in a threaded context.
 
LiveInterval - This class represents the liveness of a register, or stack slot.
 
bool hasInterval(Register Reg) const
 
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
 
LiveInterval & getInterval(Register Reg)
 
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
 
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
 
This class represents the liveness of a register, stack slot, etc.
 
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
 
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
 
static LocationSize precise(uint64_t Value)
 
TypeSize getValue() const
 
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
 
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
 
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
 
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
 
Describe properties that are true of each instruction in the target description file.
 
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
 
ArrayRef< MCOperandInfo > operands() const
 
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
 
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
 
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
 
unsigned getOpcode() const
Return the opcode number for this descriptor.
 
This holds information about one operand of a machine instruction, indicating the register class for ...
 
uint8_t OperandType
Information about the type of the operand.
 
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
 
Wrapper class representing physical registers. Should be passed by value.
 
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
 
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
 
LLVM_ABI void setVariableValue(const MCExpr *Value)
 
Helper class for constructing bundles of MachineInstrs.
 
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
 
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
 
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
 
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
 
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
 
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
 
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
 
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
 
Instructions::const_iterator const_instr_iterator
 
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
 
iterator_range< succ_iterator > successors()
 
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
 
MachineInstrBundleIterator< MachineInstr > iterator
 
@ LQR_Dead
Register is known to be fully dead.
 
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
 
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
 
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
 
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
 
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
 
void push_back(MachineBasicBlock *MBB)
 
MCContext & getContext() const
 
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
 
Function & getFunction()
Return the LLVM function that this machine code represents.
 
BasicBlockListType::iterator iterator
 
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
 
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
 
void insert(iterator MBBI, MachineBasicBlock *MBB)
 
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
 
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
 
const MachineInstrBuilder & add(const MachineOperand &MO) const
 
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
 
const MachineInstrBuilder & addFrameIndex(int Idx) const
 
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
 
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
 
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
 
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
 
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
 
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
 
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
 
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
 
Representation of each machine instruction.
 
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
 
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
 
const MachineBasicBlock * getParent() const
 
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
 
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
 
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
 
mop_range implicit_operands()
 
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
 
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
 
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
 
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
 
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
 
mop_range explicit_operands()
 
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
 
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
 
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
 
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
 
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
 
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
 
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
 
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
 
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
 
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
 
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
 
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
 
const MachineOperand & getOperand(unsigned i) const
 
uint32_t getFlags() const
Return the MI flags bitvector.
 
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
 
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
 
A description of a memory reference used in the backend.
 
@ MOLoad
The memory access reads data.
 
@ MOStore
The memory access writes data.
 
MachineOperand class - Representation of each machine instruction operand.
 
void setSubReg(unsigned subReg)
 
unsigned getSubReg() const
 
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
 
const GlobalValue * getGlobal() const
 
void setImplicit(bool Val=true)
 
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
 
void setImm(int64_t immVal)
 
bool isReg() const
isReg - Tests if this is a MO_Register operand.
 
void setIsDead(bool Val=true)
 
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
 
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
 
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
 
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
 
void setIsKill(bool Val=true)
 
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
 
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
 
void setOffset(int64_t Offset)
 
unsigned getTargetFlags() const
 
static MachineOperand CreateImm(int64_t Val)
 
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
 
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
 
void setIsUndef(bool Val=true)
 
Register getReg() const
getReg - Returns the register number.
 
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
 
void setTargetFlags(unsigned F)
 
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
 
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
 
@ MO_Immediate
Immediate operand.
 
@ MO_Register
Register operand.
 
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
 
int64_t getOffset() const
Return the offset from the symbol in this operand.
 
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
 
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
 
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
 
This class implements the register bank concept.
 
unsigned getID() const
Get the identifier of this register bank.
 
Wrapper class representing virtual and physical registers.
 
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
 
constexpr bool isValid() const
 
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
 
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
 
Represents one node in the SelectionDAG.
 
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
 
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
 
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
 
const SDValue & getOperand(unsigned Num) const
 
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
 
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
 
bool isLegalMUBUFImmOffset(unsigned Imm) const
 
bool isInlineConstant(const APInt &Imm) const
 
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
 
static bool isDS(const MachineInstr &MI)
 
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
 
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
 
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
 
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
 
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
 
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
 
static bool isNeverUniform(const MachineInstr &MI)
 
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
 
bool isXDLWMMA(const MachineInstr &MI) const
 
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
 
uint64_t getDefaultRsrcDataFormat() const
 
static bool isSOPP(const MachineInstr &MI)
 
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
 
bool isIGLP(unsigned Opcode) const
 
static bool isFLATScratch(const MachineInstr &MI)
 
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
 
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
 
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
 
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
 
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
 
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
 
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
 
static bool isSMRD(const MachineInstr &MI)
 
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
 
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
 
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
 
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
 
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
 
bool mayAccessScratchThroughFlat(const MachineInstr &MI) const
 
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
 
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
 
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
 
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
 
static bool isMTBUF(const MachineInstr &MI)
 
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
 
void insertReturn(MachineBasicBlock &MBB) const
 
static bool isDGEMM(unsigned Opcode)
 
static bool isEXP(const MachineInstr &MI)
 
static bool isSALU(const MachineInstr &MI)
 
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
 
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
 
unsigned getInstBundleSize(const MachineInstr &MI) const
 
static bool isVOP2(const MachineInstr &MI)
 
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
 
static bool isSDWA(const MachineInstr &MI)
 
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
 
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
 
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
 
static bool isGather4(const MachineInstr &MI)
 
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
 
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
 
static bool isDOT(const MachineInstr &MI)
 
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
 
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
 
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
 
static bool isSWMMAC(const MachineInstr &MI)
 
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
 
bool isHighLatencyDef(int Opc) const override
 
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
 
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
 
static bool isVOPC(const MachineInstr &MI)
 
void removeModOperands(MachineInstr &MI) const
 
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
 
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
 
bool isXDL(const MachineInstr &MI) const
 
static bool isVIMAGE(const MachineInstr &MI)
 
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
 
static bool isSOP2(const MachineInstr &MI)
 
static bool isGWS(const MachineInstr &MI)
 
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
 
bool isNeverCoissue(MachineInstr &MI) const
 
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
 
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
 
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
 
static bool isFLATGlobal(const MachineInstr &MI)
 
bool isGlobalMemoryObject(const MachineInstr *MI) const override
 
static bool isVSAMPLE(const MachineInstr &MI)
 
bool isBufferSMRD(const MachineInstr &MI) const
 
static bool isKillTerminator(unsigned Opcode)
 
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
 
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
 
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
 
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
 
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
 
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override
 
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
 
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
 
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
 
static bool isTRANS(const MachineInstr &MI)
 
static bool isImage(const MachineInstr &MI)
 
static bool isSOPK(const MachineInstr &MI)
 
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
 
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
 
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
 
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
 
static bool isFoldableCopy(const MachineInstr &MI)
 
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
 
bool isIgnorableUse(const MachineOperand &MO) const override
 
static bool isMUBUF(const MachineInstr &MI)
 
bool expandPostRAPseudo(MachineInstr &MI) const override
 
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
 
const TargetRegisterClass * getRegClass(const MCInstrDesc &TID, unsigned OpNum, const TargetRegisterInfo *TRI) const override
 
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
 
bool isReMaterializableImpl(const MachineInstr &MI) const override
 
static bool isVOP3(const MCInstrDesc &Desc)
 
bool physRegUsesConstantBus(const MachineOperand &Reg) const
 
static bool isF16PseudoScalarTrans(unsigned Opcode)
 
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
 
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
 
static bool isDPP(const MachineInstr &MI)
 
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
 
static bool isMFMA(const MachineInstr &MI)
 
bool isLowLatencyInstruction(const MachineInstr &MI) const
 
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
 
bool isAlwaysGDS(uint16_t Opcode) const
 
static bool isMAI(const MCInstrDesc &Desc)
 
static bool usesLGKM_CNT(const MachineInstr &MI)
 
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
 
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
 
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
 
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
 
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
 
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
 
static bool setsSCCifResultIsNonZero(const MachineInstr &MI)
 
static bool isVGPRSpill(const MachineInstr &MI)
 
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
 
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
 
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
 
static bool isWWMRegSpillOpcode(uint16_t Opcode)
 
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
 
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
 
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
 
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
 
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
 
static bool isMIMG(const MachineInstr &MI)
 
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
 
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
 
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
 
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
 
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
 
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
 
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
 
unsigned getVALUOp(const MachineInstr &MI) const
 
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
 
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
 
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
 
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
 
void fixImplicitOperands(MachineInstr &MI) const
 
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
 
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
 
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
 
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
 
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
 
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
 
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
 
static bool isAtomic(const MachineInstr &MI)
 
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
 
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
 
static bool sopkIsZext(unsigned Opcode)
 
static bool isSGPRSpill(const MachineInstr &MI)
 
static bool isWMMA(const MachineInstr &MI)
 
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
 
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
 
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
 
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
 
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
 
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
 
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
 
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
 
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
 
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
 
static bool isSOPC(const MachineInstr &MI)
 
static bool isFLAT(const MachineInstr &MI)
 
static bool isVALU(const MachineInstr &MI)
 
bool isBarrier(unsigned Opcode) const
 
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
 
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
 
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
 
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
 
static bool usesVM_CNT(const MachineInstr &MI)
 
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
 
static bool isFixedSize(const MachineInstr &MI)
 
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
 
LLVM_READONLY int commuteOpcode(unsigned Opc) const
 
uint64_t getScratchRsrcWords23() const
 
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
 
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
 
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
 
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
 
static bool isLDSDMA(const MachineInstr &MI)
 
static bool isVOP1(const MachineInstr &MI)
 
SIInstrInfo(const GCNSubtarget &ST)
 
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
 
bool hasAnyModifiersSet(const MachineInstr &MI) const
 
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
 
Register getLongBranchReservedReg() const
 
bool isWholeWaveFunction() const
 
Register getStackPtrOffsetReg() const
 
unsigned getMaxMemoryClusterDWords() const
 
void setHasSpilledVGPRs(bool Spill=true)
 
bool isWWMReg(Register Reg) const
 
bool checkFlag(Register Reg, uint8_t Flag) const
 
void setHasSpilledSGPRs(bool Spill=true)
 
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
 
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
 
unsigned getHWRegIndex(MCRegister Reg) const
 
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
 
unsigned getChannelFromSubReg(unsigned SubReg) const
 
static bool isAGPRClass(const TargetRegisterClass *RC)
 
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
 
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
 
MachineFunction & MF
Machine function.
 
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
 
SlotIndex - An opaque wrapper around machine indexes.
 
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
 
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
 
Implements a dense probed hash-table based set with some number of buckets stored inline.
 
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
StringRef - Represent a constant reference to a string, i.e.
 
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
 
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
 
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
 
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, const TargetRegisterInfo &TRI) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
 
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
 
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
 
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
 
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
 
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
 
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
 
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
 
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
 
static constexpr TypeSize getFixed(ScalarTy ExactSize)
 
A Use represents the edge between a Value definition and its users.
 
LLVM Value Representation.
 
std::pair< iterator, bool > insert(const ValueT &V)
 
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
 
self_iterator getIterator()
 
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
 
@ REGION_ADDRESS
Address space for region memory. (GDS)
 
@ LOCAL_ADDRESS
Address space for local memory.
 
@ FLAT_ADDRESS
Address space for flat memory.
 
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
 
@ PRIVATE_ADDRESS
Address space for private memory.
 
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
 
bool isPackedFP32Inst(unsigned Opc)
 
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
 
const uint64_t RSRC_DATA_FORMAT
 
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
 
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
 
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
 
LLVM_READONLY int getVOPe32(uint16_t Opcode)
 
bool getWMMAIsXDL(unsigned Opc)
 
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
 
bool isInlinableLiteralV2I16(uint32_t Literal)
 
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
 
bool isInlinableLiteralV2BF16(uint32_t Literal)
 
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
 
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
 
bool isGFX12Plus(const MCSubtargetInfo &STI)
 
bool isInlinableLiteralV2F16(uint32_t Literal)
 
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
 
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
 
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
 
bool getMAIIsGFX940XDL(unsigned Opc)
 
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
 
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
 
bool isIntrinsicAlwaysUniform(unsigned IntrID)
 
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
 
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
 
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
 
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
 
const uint64_t RSRC_TID_ENABLE
 
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
 
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
 
bool isGenericAtomic(unsigned Opc)
 
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
 
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
 
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
 
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
 
@ OPERAND_REG_INLINE_C_FP64
 
@ OPERAND_REG_INLINE_C_BF16
 
@ OPERAND_REG_INLINE_C_V2BF16
 
@ OPERAND_REG_IMM_V2INT16
 
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
 
@ OPERAND_REG_INLINE_C_INT64
 
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
 
@ OPERAND_REG_IMM_NOINLINE_V2FP16
 
@ OPERAND_REG_INLINE_C_V2FP16
 
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
 
@ OPERAND_REG_INLINE_AC_FP32
 
@ OPERAND_REG_IMM_V2INT32
 
@ OPERAND_REG_INLINE_C_FP32
 
@ OPERAND_REG_INLINE_C_INT32
 
@ OPERAND_REG_INLINE_C_V2INT16
 
@ OPERAND_INLINE_C_AV64_PSEUDO
 
@ OPERAND_REG_INLINE_AC_FP64
 
@ OPERAND_REG_INLINE_C_FP16
 
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
 
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
 
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
 
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
 
bool isGFX1250(const MCSubtargetInfo &STI)
 
int getMCOpcode(uint16_t Opcode, unsigned Gen)
 
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
 
const uint64_t RSRC_INDEX_STRIDE_SHIFT
 
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
 
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
 
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
 
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
 
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
 
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
 
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
 
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
 
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
 
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
 
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
 
@ Fast
Attempts to make calls as fast as possible (e.g.
 
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
 
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
 
@ C
The default llvm calling convention, compatible with C.
 
@ Implicit
Not emitted register (e.g. carry, or temporary result).
 
@ Define
Register definition.
 
@ Kill
The last use of a register.
 
@ Undef
Value of the register doesn't matter.
 
Not(const Pred &P) -> Not< Pred >
 
initializer< Ty > init(const Ty &Val)
 
This is an optimization pass for GlobalISel generic memory operations.
 
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
 
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
 
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
 
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
 
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
 
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
 
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
 
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
 
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
 
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
 
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
 
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
 
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
 
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
 
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
 
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
 
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
 
constexpr bool has_single_bit(T Value) noexcept
 
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
 
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
 
auto reverse(ContainerTy &&C)
 
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
 
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
 
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
 
FunctionAddr VTableAddr Count
 
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
 
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
 
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
 
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
 
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
 
FunctionAddr VTableAddr uintptr_t uintptr_t Data
 
unsigned getUndefRegState(bool B)
 
@ Xor
Bitwise or logical XOR of integers.
 
@ Sub
Subtraction of integers.
 
unsigned getKillRegState(bool B)
 
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
 
DWARFExpression::Operation Op
 
ArrayRef(const T &OneElt) -> ArrayRef< T >
 
constexpr unsigned DefaultMemoryClusterDWordsLimit
 
constexpr unsigned BitWidth
 
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
 
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
 
constexpr T reverseBits(T Val)
Reverse the bits in Val.
 
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
 
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
 
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
 
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
 
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
 
@ AlwaysUniform
The result values are always uniform.
 
@ NeverUniform
The result values can never be assumed to be uniform.
 
@ Default
The result values are uniform if and only if all operands are uniform.
 
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
 
MachineCycleInfo::CycleT MachineCycle
 
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
 
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
 
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
 
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
 
This struct is a compact representation of a valid (non-zero power of two) alignment.
 
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
 
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
 
This class contains a discriminated union of information about pointers in memory operands,...
 
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
 
Utility to store machine instructions worklist.
 
MachineInstr * top() const
 
bool isDeferred(MachineInstr *MI)
 
SetVector< MachineInstr * > & getDeferredList()
 
void insert(MachineInstr *MI)
 
A pair composed of a register and a sub-register index.