18#include "AMDGPUGenSearchableTables.inc"
41#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
47 class SingleUseInstruction {
49 static const unsigned MaxSkipRange = 0b111;
50 static const unsigned MaxNumberOfSkipRegions = 2;
52 unsigned LastEncodedPositionEnd;
55 std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
59 void skip(
const unsigned ProducerPosition) {
60 while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
62 LastEncodedPositionEnd += MaxSkipRange;
64 SkipRegions.
push_back(ProducerPosition - LastEncodedPositionEnd);
65 LastEncodedPositionEnd = ProducerPosition;
68 bool currentRegionHasSpace() {
71 return SingleUseRegions[
Region] <
72 ((
Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
75 unsigned encodeImm() {
78 unsigned Imm = SingleUseRegions[SkipRegions.
size()];
79 unsigned ShiftAmount = 4;
80 for (
unsigned i = SkipRegions.
size(); i > 0; i--) {
81 Imm |= SkipRegions[i - 1] << ShiftAmount;
83 Imm |= SingleUseRegions[i - 1] << ShiftAmount;
90 SingleUseInstruction(
const unsigned ProducerPosition,
92 : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
93 SingleUseRegions({1, 0, 0}) {}
99 bool tryAddProducer(
const unsigned ProducerPosition,
MachineInstr *
MI) {
103 if (LastEncodedPositionEnd +
104 (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.
size())) <
109 if (LastEncodedPositionEnd != ProducerPosition ||
110 !currentRegionHasSpace()) {
113 if (SkipRegions.
size() == MaxNumberOfSkipRegions)
115 skip(ProducerPosition);
118 SingleUseRegions[SkipRegions.
size()]++;
119 LastEncodedPositionEnd = ProducerPosition + 1;
126 SII->get(AMDGPU::S_SINGLEUSE_VDST))
136 void insertSingleUseInstructions(
137 ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers)
const {
140 for (
auto &[Position,
MI] : SingleUseProducers) {
142 if (Instructions.empty() ||
143 !Instructions.back().tryAddProducer(Position,
MI)) {
145 Instructions.push_back(SingleUseInstruction(Position,
MI));
155 if (!ST.hasVGPRSingleUseHintInsts())
158 SII = ST.getInstrInfo();
160 bool InstructionEmitted =
false;
171 const auto [Unit, Mask] = *Units;
172 if ((Mask & Liveout.LaneMask).any())
173 RegisterUseCount[Unit] = 2;
178 SingleUseProducerPositions;
180 unsigned VALUInstrCount = 0;
184 bool AllProducerOperandsAreSingleUse =
true;
191 for (
const auto &Operand :
MI.all_defs()) {
192 const auto Reg = Operand.getReg();
194 const auto RegUnits =
TRI->regunits(Reg);
196 return RegisterUseCount[Unit] > 1;
198 AllProducerOperandsAreSingleUse =
false;
202 RegisterUseCount.
erase(Unit);
205 for (
const auto &Operand :
MI.all_uses()) {
206 const auto Reg = Operand.getReg();
214 for (
const MCRegUnit Unit : RegistersUsed)
215 RegisterUseCount[Unit]++;
218 if (
MI.modifiesRegister(AMDGPU::EXEC,
TRI) ||
220 for (
auto &UsedReg : RegisterUseCount)
227 if (AllProducerOperandsAreSingleUse) {
228 SingleUseProducerPositions.
push_back({VALUInstrCount, &
MI});
229 InstructionEmitted =
true;
233 insertSingleUseInstructions(SingleUseProducerPositions);
235 return InstructionEmitted;
240char AMDGPUInsertSingleUseVDST::ID = 0;
245 "AMDGPU Insert SingleUseVDST",
false,
false)
This file defines the DenseMap class.
AMD GCN specific subclass of TargetSubtarget.
static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges)
Skip an InlineInfo object in the specified data at the specified offset.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool erase(const KeyT &Val)
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
iterator_range< liveout_iterator > liveouts() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const SIRegisterInfo & getRegisterInfo() const
static bool isVALU(const MachineInstr &MI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isInvalidSingleUseProducerInst(unsigned Opc)
bool isInvalidSingleUseConsumerInst(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUInsertSingleUseVDSTID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.