doxygen/AMDGPULowerVGPREncoding_8cpp_source.html

//===- AMDGPULowerVGPREncoding.cpp - lower VGPRs above v255 ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Lower VGPRs above first 256 on gfx1250.

///

/// The pass scans used VGPRs and inserts S_SET_VGPR_MSB instructions to switch

/// VGPR addressing mode. The mode change is effective until the next change.

/// This instruction provides high bits of a VGPR address for four of the

/// operands: vdst, src0, src1, and src2, or other 4 operands depending on the

/// instruction encoding. If bits are set they are added as MSB to the

/// corresponding operand VGPR number.

///

/// There is no need to replace actual register operands because encoding of the

/// high and low VGPRs is the same. I.e. v0 has the encoding 0x100, so does

/// v256. v1 has the encoding 0x101 and v257 has the same encoding. So high

/// VGPRs will survive until actual encoding and will result in a same actual

/// bit encoding.

///

/// As a result the pass only inserts S_SET_VGPR_MSB to provide an actual offset

/// to a VGPR address of the subseqent instructions. The InstPrinter will take

/// care of the printing a low VGPR instead of a high one. In prinicple this

/// shall be viable to print actual high VGPR numbers, but that would disagree

/// with a disasm printing and create a situation where asm text is not

/// deterministic.

///

/// This pass creates a convention where non-fall through basic blocks shall

/// start with all 4 MSBs zero. Otherwise a disassembly would not be readable.

/// An optimization here is possible but deemed not desirable because of the

/// readbility concerns.

///

/// Consequentially the ABI is set to expect all 4 MSBs to be zero on entry.

/// The pass must run very late in the pipeline to make sure no changes to VGPR

/// operands will be made after it.

//

//===----------------------------------------------------------------------===//


#include "AMDGPULowerVGPREncoding.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "SIDefines.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/bit.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/MathExtras.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-lower-vgpr-encoding"


namespace {


class AMDGPULowerVGPREncoding {

  static constexpr unsigned OpNum = 4;

  static constexpr unsigned BitsPerField = 2;

  static constexpr unsigned NumFields = 4;

  static constexpr unsigned ModeWidth = NumFields * BitsPerField;

  static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;

  static constexpr unsigned VGPRMSBShift =

      llvm::countr_zero_constexpr<unsigned>(AMDGPU::Hwreg::DST_VGPR_MSB);


  struct OpMode {

    // No MSBs set means they are not required to be of a particular value.

    std::optional<unsigned> MSBits;


    bool update(const OpMode &New, bool &Rewritten) {

      bool Updated = false;

      if (New.MSBits) {

        if (*New.MSBits != MSBits.value_or(0)) {

          Updated = true;

          Rewritten |= MSBits.has_value();

        }

        MSBits = New.MSBits;

      }

      return Updated;

    }

  };


  struct ModeTy {

    OpMode Ops[OpNum];


    bool update(const ModeTy &New, bool &Rewritten) {

      bool Updated = false;

      for (unsigned I : seq(OpNum))

        Updated |= Ops[I].update(New.Ops[I], Rewritten);

      return Updated;

    }


    unsigned encode() const {

      // Layout: [src0 msb, src1 msb, src2 msb, dst msb].

      unsigned V = 0;

      for (const auto &[I, Op] : enumerate(Ops))

        V |= Op.MSBits.value_or(0) << (I * 2);

      return V;

    }


    void print(raw_ostream &OS) const {

      static const char *FieldNames[] = {"src0", "src1", "src2", "dst"};

      OS << '{';

      for (const auto &[I, Op] : enumerate(Ops)) {

        if (I)

          OS << ", ";

        OS << FieldNames[I] << '=';

        if (Op.MSBits)

          OS << *Op.MSBits;

        else

          OS << '?';

      }

      OS << '}';

    }


    // Check if this mode is compatible with required \p NewMode without

    // modification.

    bool isCompatible(const ModeTy NewMode) const {

      for (unsigned I : seq(OpNum)) {

        if (!NewMode.Ops[I].MSBits.has_value())

          continue;

        if (Ops[I].MSBits.value_or(0) != NewMode.Ops[I].MSBits.value_or(0))

          return false;

      }

      return true;

    }

  };


public:

  bool run(MachineFunction &MF);


private:

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;


  // Current basic block.

  MachineBasicBlock *MBB;


  /// Most recent s_set_* instruction.

  MachineInstr *MostRecentModeSet;


  /// Current mode bits.

  ModeTy CurrentMode;


  /// Number of current hard clause instructions.

  unsigned ClauseLen;


  /// Number of hard clause instructions remaining.

  unsigned ClauseRemaining;


  /// Clause group breaks.

  unsigned ClauseBreaks;


  /// Last hard clause instruction.

  MachineInstr *Clause;


  /// S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32 targeting MODE is

  /// silently dropped on GFX1250. When set, the next S_SET_VGPR_MSB insertion

  /// must be preceded by S_NOP to avoid the hazard.

  bool NeedNopBeforeSetVGPRMSB;


  /// Insert mode change before \p I. \returns true if mode was changed.

  bool setMode(ModeTy NewMode, MachineBasicBlock::instr_iterator I);


  /// Reset mode to default.

  void resetMode(MachineBasicBlock::instr_iterator I) {

    ModeTy Mode;

    for (OpMode &Op : Mode.Ops)

      Op.MSBits = 0;

    setMode(Mode, I);

  }


  /// If \p MO references VGPRs, return the MSBs. Otherwise, return nullopt.

  std::optional<unsigned> getMSBs(const MachineOperand &MO) const;


  /// Handle single \p MI. \return true if changed.

  bool runOnMachineInstr(MachineInstr &MI);


  /// Compute the mode for a single \p MI given \p Ops operands

  /// bit mapping. Optionally takes second array \p Ops2 for VOPD.

  /// If provided and an operand from \p Ops is not a VGPR, then \p Ops2

  /// is checked.

  void computeMode(ModeTy &NewMode, const MachineInstr &MI,

                   const AMDGPU::OpName Ops[OpNum],

                   const AMDGPU::OpName *Ops2 = nullptr);


  /// Check if an instruction \p I is within a clause and returns a suitable

  /// iterator to insert mode change. It may also modify the S_CLAUSE

  /// instruction to extend it or drop the clause if it cannot be adjusted.

  MachineBasicBlock::instr_iterator

  handleClause(MachineBasicBlock::instr_iterator I);


  /// Check if an instruction \p I is immediately after another program state

  /// instruction which it cannot coissue with. If so, insert before that

  /// instruction to encourage more coissuing.

  MachineBasicBlock::instr_iterator

  handleCoissue(MachineBasicBlock::instr_iterator I);


  /// Handle S_SETREG_IMM32_B32 targeting MODE register. On certain hardware,

  /// this instruction clobbers VGPR MSB bits[12:19], so we need to restore

  /// the current mode. \returns true if the instruction was modified or a

  /// new one was inserted.

  bool handleSetregMode(MachineInstr &MI);


  /// Update bits[12:19] of the imm operand in S_SETREG_IMM32_B32 to contain

  /// the VGPR MSB mode value. \returns true if the immediate was changed.

  bool updateSetregModeImm(MachineInstr &MI, int64_t ModeValue);

};


bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,

                                      MachineBasicBlock::instr_iterator I) {

  LLVM_DEBUG({

    dbgs() << "  setMode: NewMode=";

    NewMode.print(dbgs());

    dbgs() << " CurrentMode=";

    CurrentMode.print(dbgs());

    dbgs() << " MostRecentModeSet=" << (MostRecentModeSet ? "yes" : "null");

    if (I != MBB->instr_end())

      dbgs() << " before: " << *I;

    else

      dbgs() << " at end\n";

  });


  // Record previous mode into high 8 bits of the immediate.

  int64_t OldModeBits = CurrentMode.encode() << ModeWidth;


  bool Rewritten = false;

  if (!CurrentMode.update(NewMode, Rewritten)) {

    LLVM_DEBUG(dbgs() << "    -> no change needed\n");

    return false;

  }


  LLVM_DEBUG(dbgs() << "    Rewritten=" << Rewritten << " after update\n");


  if (MostRecentModeSet && !Rewritten) {

    // Update MostRecentModeSet with the new mode. It can be either

    // S_SET_VGPR_MSB or S_SETREG_IMM32_B32 (with Size <= 12).

    if (MostRecentModeSet->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {

      MachineOperand &Op = MostRecentModeSet->getOperand(0);

      // Carry old mode bits from the existing instruction.

      int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);

      Op.setImm(CurrentMode.encode() | OldModeBits);

      LLVM_DEBUG(dbgs() << "    -> piggybacked onto S_SET_VGPR_MSB: "

                        << *MostRecentModeSet);

    } else {

      assert(MostRecentModeSet->getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&

             "unexpected MostRecentModeSet opcode");

      updateSetregModeImm(*MostRecentModeSet, CurrentMode.encode());

      LLVM_DEBUG(dbgs() << "    -> piggybacked onto S_SETREG_IMM32_B32: "

                        << *MostRecentModeSet);

    }


    return true;

  }


  I = handleClause(I);

  I = handleCoissue(I);

  // Case 2 match in handleSetregMode: the setreg's imm[12:19] matched

  // current MSBs, but the next VALU needs different MSBs, so this

  // S_SET_VGPR_MSB would land right after the setreg. Insert S_NOP to

  // prevent it from being silently dropped.

  if (NeedNopBeforeSetVGPRMSB) {

    BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_NOP)).addImm(0);

    NeedNopBeforeSetVGPRMSB = false;

  }

  MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))

                          .addImm(NewMode.encode() | OldModeBits);

  LLVM_DEBUG(dbgs() << "    -> inserted new S_SET_VGPR_MSB: "

                    << *MostRecentModeSet);


  CurrentMode = NewMode;

  return true;

}


std::optional<unsigned>

AMDGPULowerVGPREncoding::getMSBs(const MachineOperand &MO) const {

  if (!MO.isReg())

    return std::nullopt;


  MCRegister Reg = MO.getReg();

  const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

  if (!RC || !TRI->isVGPRClass(RC))

    return std::nullopt;


  unsigned Idx = TRI->getHWRegIndex(Reg);

  return Idx >> 8;

}


void AMDGPULowerVGPREncoding::computeMode(ModeTy &NewMode,

                                          const MachineInstr &MI,

                                          const AMDGPU::OpName Ops[OpNum],

                                          const AMDGPU::OpName *Ops2) {

  NewMode = {};


  for (unsigned I = 0; I < OpNum; ++I) {

    const MachineOperand *Op = TII->getNamedOperand(MI, Ops[I]);


    std::optional<unsigned> MSBits;

    if (Op)

      MSBits = getMSBs(*Op);


#if !defined(NDEBUG)

    if (MSBits.has_value() && Ops2) {

      const MachineOperand *Op2 = TII->getNamedOperand(MI, Ops2[I]);

      if (Op2) {

        std::optional<unsigned> MSBits2;

        MSBits2 = getMSBs(*Op2);

        if (MSBits2.has_value() && MSBits != MSBits2)

          llvm_unreachable("Invalid VOPD pair was created");

      }

    }

#endif


    if (!MSBits.has_value() && Ops2) {

      Op = TII->getNamedOperand(MI, Ops2[I]);

      if (Op)

        MSBits = getMSBs(*Op);

    }


    if (!MSBits.has_value())

      continue;


    // Skip tied uses of src2 of VOP2, these will be handled along with defs and

    // only vdst bit affects these operands. We cannot skip tied uses of VOP3,

    // these uses are real even if must match the vdst.

    if (Ops[I] == AMDGPU::OpName::src2 && !Op->isDef() && Op->isTied() &&

        (SIInstrInfo::isVOP2(MI) ||

         (SIInstrInfo::isVOP3(MI) &&

          TII->hasVALU32BitEncoding(MI.getOpcode()))))

      continue;


    NewMode.Ops[I].MSBits = MSBits.value();

  }

}


bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {

  auto Ops = AMDGPU::getVGPRLoweringOperandTables(MI.getDesc());

  if (Ops.first) {

    ModeTy NewMode;

    computeMode(NewMode, MI, Ops.first, Ops.second);

    LLVM_DEBUG({

      dbgs() << "  runOnMachineInstr: ";

      MI.print(dbgs());

      dbgs() << "    computed NewMode=";

      NewMode.print(dbgs());

      dbgs() << " compatible=" << CurrentMode.isCompatible(NewMode) << '\n';

    });

    if (!CurrentMode.isCompatible(NewMode) && MI.isCommutable() &&

        TII->commuteInstruction(MI)) {

      ModeTy NewModeCommuted;

      computeMode(NewModeCommuted, MI, Ops.first, Ops.second);

      LLVM_DEBUG({

        dbgs() << "    commuted NewMode=";

        NewModeCommuted.print(dbgs());

        dbgs() << " compatible=" << CurrentMode.isCompatible(NewModeCommuted)

               << '\n';

      });

      if (CurrentMode.isCompatible(NewModeCommuted)) {

        // Update CurrentMode with mode bits the commuted instruction relies on.

        // This prevents later instructions from piggybacking and corrupting

        // those bits (e.g., a nullopt src treated as 0 could be overwritten).

        bool Unused = false;

        CurrentMode.update(NewModeCommuted, Unused);

        // MI was modified by the commute above.

        return true;

      }

      // Commute back.

      if (!TII->commuteInstruction(MI))

        llvm_unreachable("Failed to restore commuted instruction.");

    }

    return setMode(NewMode, MI.getIterator());

  }

  assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());


  return false;

}


MachineBasicBlock::instr_iterator

AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {

  if (!ClauseRemaining)

    return I;


  // A clause cannot start with a special instruction, place it right before

  // the clause.

  if (ClauseRemaining == ClauseLen) {

    I = Clause->getPrevNode()->getIterator();

    assert(I->isBundle());

    return I;

  }


  // If a clause defines breaks each group cannot start with a mode change.

  // just drop the clause.

  if (ClauseBreaks) {

    Clause->eraseFromBundle();

    ClauseRemaining = 0;

    return I;

  }


  // Otherwise adjust a number of instructions in the clause if it fits.

  // If it does not clause will just become shorter. Since the length

  // recorded in the clause is one less, increment the length after the

  // update. Note that SIMM16[5:0] must be 1-62, not 0 or 63.

  if (ClauseLen < 63)

    Clause->getOperand(0).setImm(ClauseLen | (ClauseBreaks << 8));


  ++ClauseLen;


  return I;

}


MachineBasicBlock::instr_iterator

AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {

  if (I.isEnd())

    return I;


  // "Program State instructions" are instructions which are used to control

  // operation of the GPU rather than performing arithmetic. Such instructions

  // have different coissuing rules w.r.t s_set_vgpr_msb.

  auto isProgramStateInstr = [this](MachineInstr *MI) {

    unsigned Opc = MI->getOpcode();

    return TII->isBarrier(Opc) || TII->isWaitcnt(Opc) ||

           Opc == AMDGPU::S_DELAY_ALU;

  };


  while (!I.isEnd() && I != I->getParent()->begin()) {

    auto Prev = std::prev(I);

    if (!isProgramStateInstr(&*Prev))

      return I;

    I = Prev;

  }


  return I;

}


/// Convert mode value from S_SET_VGPR_MSB format to MODE register format.

/// S_SET_VGPR_MSB uses: (src0[0-1], src1[2-3], src2[4-5], dst[6-7])

/// MODE register uses:  (dst[0-1], src0[2-3], src1[4-5], src2[6-7])

/// This is a left rotation by 2 bits on an 8-bit value.

static int64_t convertModeToSetregFormat(int64_t Mode) {

  assert(isUInt<8>(Mode) && "Mode expected to be 8-bit");

  return llvm::rotl<uint8_t>(static_cast<uint8_t>(Mode), /*R=*/2);

}


bool AMDGPULowerVGPREncoding::updateSetregModeImm(MachineInstr &MI,

                                                  int64_t ModeValue) {

  assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32);


  // Convert from S_SET_VGPR_MSB format to MODE register format

  int64_t SetregMode = convertModeToSetregFormat(ModeValue);


  MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);

  int64_t OldImm = ImmOp->getImm();

  int64_t NewImm =

      (OldImm & ~AMDGPU::Hwreg::VGPR_MSB_MASK) | (SetregMode << VGPRMSBShift);

  ImmOp->setImm(NewImm);

  return NewImm != OldImm;

}


bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {

  using namespace AMDGPU::Hwreg;


  assert(MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&

         "only S_SETREG_IMM32_B32 needs to be handled");


  LLVM_DEBUG(dbgs() << "  handleSetregMode: " << MI);


  MachineOperand *SIMM16Op = TII->getNamedOperand(MI, AMDGPU::OpName::simm16);

  assert(SIMM16Op && "SIMM16Op must be present");


  auto [HwRegId, Offset, Size] = HwregEncoding::decode(SIMM16Op->getImm());

  (void)Offset;

  LLVM_DEBUG(dbgs() << "    HwRegId=" << HwRegId << " Offset=" << Offset

                    << " Size=" << Size << '\n');

  if (HwRegId != ID_MODE) {

    LLVM_DEBUG(dbgs() << "    -> not ID_MODE, skipping\n");

    return false;

  }


  int64_t ModeValue = CurrentMode.encode();

  LLVM_DEBUG({

    dbgs() << "    CurrentMode=";

    CurrentMode.print(dbgs());

    dbgs() << " encoded=0x" << Twine::utohexstr(ModeValue)

           << " VGPRMSBShift=" << VGPRMSBShift << '\n';

  });


  // Case 1: Size <= 12 - the original instruction uses imm32[0:Size-1], so

  // imm32[12:19] is unused. Safe to set imm32[12:19] to the correct VGPR

  // MSBs.

  if (Size <= VGPRMSBShift) {

    LLVM_DEBUG(dbgs() << "    Case 1: Size(" << Size << ") <= VGPRMSBShift("

                      << VGPRMSBShift

                      << "), treating as mode scope boundary\n");

    // This instruction is at the boundary of the old mode's control range.

    // Reset CurrentMode so that the next setMode call can freely piggyback

    // the required mode into bits[12:19] without triggering Rewritten.

    MostRecentModeSet = &MI;

    CurrentMode = {};

    bool Changed = updateSetregModeImm(MI, 0);

    LLVM_DEBUG(dbgs() << "    -> reset CurrentMode, cleared bits[12:19]: "

                      << MI);

    return Changed;

  }


  // Case 2: Size > 12 - the original instruction uses bits beyond 11, so we

  // cannot arbitrarily modify imm32[12:19]. Check if it already matches VGPR

  // MSBs. Note: imm32[12:19] is in MODE register format, while ModeValue is

  // in S_SET_VGPR_MSB format, so we need to convert before comparing.

  MachineOperand *ImmOp = TII->getNamedOperand(MI, AMDGPU::OpName::imm);

  assert(ImmOp && "ImmOp must be present");

  int64_t ImmBits12To19 = (ImmOp->getImm() & VGPR_MSB_MASK) >> VGPRMSBShift;

  int64_t SetregModeValue = convertModeToSetregFormat(ModeValue);

  LLVM_DEBUG(dbgs() << "    Case 2: Size(" << Size << ") > VGPRMSBShift, "

                    << "ImmBits12To19=0x" << Twine::utohexstr(ImmBits12To19)

                    << " SetregModeValue=0x"

                    << Twine::utohexstr(SetregModeValue) << '\n');

  if (ImmBits12To19 == SetregModeValue) {

    // Already correct, but we must invalidate MostRecentModeSet because this

    // instruction will overwrite mode[12:19]. We can't update this instruction

    // via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,

    // a new s_set_vgpr_msb will be inserted after this instruction.

    MostRecentModeSet = nullptr;

    NeedNopBeforeSetVGPRMSB = true;

    LLVM_DEBUG(dbgs() << "    -> bits[12:19] already correct, "

                         "invalidated MostRecentModeSet\n");

    return false;

  }


  // imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after

  // the original instruction to restore the correct value. Insert S_NOP

  // to avoid the GFX1250 hazard where S_SET_VGPR_MSB immediately after

  // S_SETREG_IMM32_B32(MODE) is silently dropped.

  MachineBasicBlock::iterator InsertPt = std::next(MI.getIterator());

  BuildMI(*MBB, InsertPt, MI.getDebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);

  MostRecentModeSet = BuildMI(*MBB, InsertPt, MI.getDebugLoc(),

                              TII->get(AMDGPU::S_SET_VGPR_MSB))

                          .addImm(ModeValue);

  LLVM_DEBUG(dbgs() << "    -> inserted S_SET_VGPR_MSB after setreg: "

                    << *MostRecentModeSet);

  return true;

}


bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  if (!ST.has1024AddressableVGPRs())

    return false;


  TII = ST.getInstrInfo();

  TRI = ST.getRegisterInfo();


  LLVM_DEBUG(dbgs() << "*** AMDGPULowerVGPREncoding on " << MF.getName()

                    << " ***\n");


  bool Changed = false;

  ClauseLen = ClauseRemaining = 0;

  CurrentMode = {};

  for (auto &MBB : MF) {

    MostRecentModeSet = nullptr;

    NeedNopBeforeSetVGPRMSB = false;

    this->MBB = &MBB;


    LLVM_DEBUG(dbgs() << "BB#" << MBB.getNumber() << ' ' << MBB.getName()

                      << ":\n");


    for (auto &MI : llvm::make_early_inc_range(MBB.instrs())) {

      if (MI.isMetaInstruction())

        continue;


      if (MI.isTerminator() || MI.isCall()) {

        LLVM_DEBUG(dbgs() << "  terminator/call: " << MI);

        if (MI.getOpcode() == AMDGPU::S_ENDPGM ||

            MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED)

          CurrentMode = {};

        else

          resetMode(MI.getIterator());

        NeedNopBeforeSetVGPRMSB = false;

        continue;

      }


      if (MI.isInlineAsm()) {

        LLVM_DEBUG(dbgs() << "  inline asm: " << MI);

        if (TII->hasVGPRUses(MI))

          resetMode(MI.getIterator());

        NeedNopBeforeSetVGPRMSB = false;

        continue;

      }


      if (MI.getOpcode() == AMDGPU::S_CLAUSE) {

        assert(!ClauseRemaining && "Nested clauses are not supported");

        ClauseLen = MI.getOperand(0).getImm();

        ClauseBreaks = (ClauseLen >> 8) & 15;

        ClauseLen = ClauseRemaining = (ClauseLen & 63) + 1;

        Clause = &MI;

        LLVM_DEBUG(dbgs() << "  clause: len=" << ClauseLen

                          << " breaks=" << ClauseBreaks << '\n');

        continue;

      }


      if (MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&

          ST.hasSetregVGPRMSBFixup()) {

        Changed |= handleSetregMode(MI);

        continue;

      }


      Changed |= runOnMachineInstr(MI);

      NeedNopBeforeSetVGPRMSB = false;


      if (ClauseRemaining)

        --ClauseRemaining;

    }


    // Reset the mode if we are falling through.

    LLVM_DEBUG(dbgs() << "  end of BB, resetting mode\n");

    resetMode(MBB.instr_end());

  }


  return Changed;

}


class AMDGPULowerVGPREncodingLegacy : public MachineFunctionPass {

public:

  static char ID;


  AMDGPULowerVGPREncodingLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override {

    return AMDGPULowerVGPREncoding().run(MF);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


} // namespace


char AMDGPULowerVGPREncodingLegacy::ID = 0;


char &llvm::AMDGPULowerVGPREncodingLegacyID = AMDGPULowerVGPREncodingLegacy::ID;


INITIALIZE_PASS(AMDGPULowerVGPREncodingLegacy, DEBUG_TYPE,

                "AMDGPU Lower VGPR Encoding", false, false)


PreservedAnalyses

AMDGPULowerVGPREncodingPass::run(MachineFunction &MF,

                                 MachineFunctionAnalysisManager &MFAM) {

  if (!AMDGPULowerVGPREncoding().run(MF))

    return PreservedAnalyses::all();


  return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPULowerVGPREncoding.h

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3370

I
#define I(x, y, z)
Definition MD5.cpp:57

Reg
Register Reg
Definition MachineSink.cpp:2119

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2120

MathExtras.h

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

Mode
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

SIDefines.h

SIInstrInfo.h
Interface definition for SIInstrInfo.

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

bit.h
This file implements the C++20 <bit> header.

llvm::AMDGPULowerVGPREncodingPass
Definition AMDGPULowerVGPREncoding.h:17

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::Clause
Definition DirectiveEmitter.h:279

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:338

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:343

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:188

llvm::MachineFunction
Definition MachineFunction.h:295

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:791

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition MachineFunction.cpp:653

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:233

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:72

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:599

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:607

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition MachineOperand.h:694

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:560

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::PreservedAnalyses::preserveSet
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151

llvm::SIInstrInfo
Definition SIInstrInfo.h:95

llvm::SIInstrInfo::isVOP2
static bool isVOP2(const MachineInstr &MI)
Definition SIInstrInfo.h:553

llvm::SIInstrInfo::isVOP3
static bool isVOP3(const MCInstrDesc &Desc)
Definition SIInstrInfo.h:561

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::Twine::utohexstr
static Twine utohexstr(uint64_t Val)
Definition Twine.h:385

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

uint8_t

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::Hwreg
Definition SIDefines.h:513

llvm::AMDGPU::Hwreg::DST_VGPR_MSB
@ DST_VGPR_MSB
Definition SIDefines.h:596

llvm::AMDGPU
Definition AMDGPUMetadataVerifier.h:34

llvm::AMDGPU::getVGPRLoweringOperandTables
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
Definition AMDGPUBaseInfo.cpp:3627

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::Offset
@ Offset
Definition DWP.cpp:532

llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
Definition GCNRegPressure.cpp:245

llvm::encode
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:23

llvm::countr_zero_constexpr
constexpr int countr_zero_constexpr(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:188

llvm::AMDGPULowerVGPREncodingLegacyID
char & AMDGPULowerVGPREncodingLegacyID
Definition AMDGPULowerVGPREncoding.cpp:641

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::rotl
constexpr T rotl(T V, int R)
Definition bit.h:369