LLVM 20.0.0git
AMDGPUInsertSingleUseVDST.cpp
Go to the documentation of this file.
1//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11/// instructions that produce single-use VGPR values. If the value is forwarded
12/// to the consumer instruction prior to VGPR writeback, the hardware can
13/// then skip (kill) the VGPR write.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "AMDGPUGenSearchableTables.inc"
19#include "GCNSubtarget.h"
20#include "SIInstrInfo.h"
21#include "SIRegisterInfo.h"
22#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/StringRef.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/MC/MCRegister.h"
36#include "llvm/Pass.h"
37#include <array>
38
39using namespace llvm;
40
41#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
42
43namespace {
44class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
45private:
46 const SIInstrInfo *SII;
47 class SingleUseInstruction {
48 private:
49 static const unsigned MaxSkipRange = 0b111;
50 static const unsigned MaxNumberOfSkipRegions = 2;
51
52 unsigned LastEncodedPositionEnd;
53 MachineInstr *ProducerInstr;
54
55 std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
57
58 // Adds a skip region into the instruction.
59 void skip(const unsigned ProducerPosition) {
60 while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
61 SkipRegions.push_back(MaxSkipRange);
62 LastEncodedPositionEnd += MaxSkipRange;
63 }
64 SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
65 LastEncodedPositionEnd = ProducerPosition;
66 }
67
68 bool currentRegionHasSpace() {
69 const auto Region = SkipRegions.size();
70 // The first region has an extra bit of encoding space.
71 return SingleUseRegions[Region] <
72 ((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
73 }
74
75 unsigned encodeImm() {
76 // Handle the first Single Use Region separately as it has an extra bit
77 // of encoding space.
78 unsigned Imm = SingleUseRegions[SkipRegions.size()];
79 unsigned ShiftAmount = 4;
80 for (unsigned i = SkipRegions.size(); i > 0; i--) {
81 Imm |= SkipRegions[i - 1] << ShiftAmount;
82 ShiftAmount += 3;
83 Imm |= SingleUseRegions[i - 1] << ShiftAmount;
84 ShiftAmount += 3;
85 }
86 return Imm;
87 }
88
89 public:
90 SingleUseInstruction(const unsigned ProducerPosition,
91 MachineInstr *Producer)
92 : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
93 SingleUseRegions({1, 0, 0}) {}
94
95 // Returns false if adding a new single use producer failed. This happens
96 // because it could not be encoded, either because there is no room to
97 // encode another single use producer region or that this single use
98 // producer is too far away to encode the amount of instructions to skip.
99 bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
100 // Producer is too far away to encode into this instruction or another
101 // skip region is needed and SkipRegions.size() = 2 so there's no room for
102 // another skip region, therefore a new instruction is needed.
103 if (LastEncodedPositionEnd +
104 (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
105 ProducerPosition)
106 return false;
107
108 // If a skip region is needed.
109 if (LastEncodedPositionEnd != ProducerPosition ||
110 !currentRegionHasSpace()) {
111 // If the current region is out of space therefore a skip region would
112 // be needed, but there is no room for another skip region.
113 if (SkipRegions.size() == MaxNumberOfSkipRegions)
114 return false;
115 skip(ProducerPosition);
116 }
117
118 SingleUseRegions[SkipRegions.size()]++;
119 LastEncodedPositionEnd = ProducerPosition + 1;
120 ProducerInstr = MI;
121 return true;
122 }
123
124 auto emit(const SIInstrInfo *SII) {
125 return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
126 SII->get(AMDGPU::S_SINGLEUSE_VDST))
127 .addImm(encodeImm());
128 }
129 };
130
131public:
132 static char ID;
133
134 AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
135
136 void insertSingleUseInstructions(
137 ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
139
140 for (auto &[Position, MI] : SingleUseProducers) {
141 // Encode this position into the last single use instruction if possible.
142 if (Instructions.empty() ||
143 !Instructions.back().tryAddProducer(Position, MI)) {
144 // If not, add a new instruction.
145 Instructions.push_back(SingleUseInstruction(Position, MI));
146 }
147 }
148
149 for (auto &Instruction : Instructions)
150 Instruction.emit(SII);
151 }
152
153 bool runOnMachineFunction(MachineFunction &MF) override {
154 const auto &ST = MF.getSubtarget<GCNSubtarget>();
155 if (!ST.hasVGPRSingleUseHintInsts())
156 return false;
157
158 SII = ST.getInstrInfo();
159 const auto *TRI = &SII->getRegisterInfo();
160 bool InstructionEmitted = false;
161
162 for (MachineBasicBlock &MBB : MF) {
163 DenseMap<MCRegUnit, unsigned> RegisterUseCount;
164
165 // Handle boundaries at the end of basic block separately to avoid
166 // false positives. If they are live at the end of a basic block then
167 // assume it has more uses later on.
168 for (const auto &Liveout : MBB.liveouts()) {
169 for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
170 ++Units) {
171 const auto [Unit, Mask] = *Units;
172 if ((Mask & Liveout.LaneMask).any())
173 RegisterUseCount[Unit] = 2;
174 }
175 }
176
178 SingleUseProducerPositions;
179
180 unsigned VALUInstrCount = 0;
181 for (MachineInstr &MI : reverse(MBB.instrs())) {
182 // All registers in all operands need to be single use for an
183 // instruction to be marked as a single use producer.
184 bool AllProducerOperandsAreSingleUse = true;
185
186 // Gather a list of Registers used before updating use counts to avoid
187 // double counting registers that appear multiple times in a single
188 // MachineInstr.
189 SmallVector<MCRegUnit> RegistersUsed;
190
191 for (const auto &Operand : MI.all_defs()) {
192 const auto Reg = Operand.getReg();
193
194 const auto RegUnits = TRI->regunits(Reg);
195 if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
196 return RegisterUseCount[Unit] > 1;
197 }))
198 AllProducerOperandsAreSingleUse = false;
199
200 // Reset uses count when a register is no longer live.
201 for (const MCRegUnit Unit : RegUnits)
202 RegisterUseCount.erase(Unit);
203 }
204
205 for (const auto &Operand : MI.all_uses()) {
206 const auto Reg = Operand.getReg();
207
208 // Count the number of times each register is read.
209 for (const MCRegUnit Unit : TRI->regunits(Reg)) {
210 if (!is_contained(RegistersUsed, Unit))
211 RegistersUsed.push_back(Unit);
212 }
213 }
214 for (const MCRegUnit Unit : RegistersUsed)
215 RegisterUseCount[Unit]++;
216
217 // Do not attempt to optimise across exec mask changes.
218 if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
220 for (auto &UsedReg : RegisterUseCount)
221 UsedReg.second = 2;
222 }
223
224 if (!SIInstrInfo::isVALU(MI) ||
226 continue;
227 if (AllProducerOperandsAreSingleUse) {
228 SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
229 InstructionEmitted = true;
230 }
231 VALUInstrCount++;
232 }
233 insertSingleUseInstructions(SingleUseProducerPositions);
234 }
235 return InstructionEmitted;
236 }
237};
238} // namespace
239
240char AMDGPUInsertSingleUseVDST::ID = 0;
241
242char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
243
244INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
245 "AMDGPU Insert SingleUseVDST", false, false)
#define DEBUG_TYPE
MachineBasicBlock & MBB
This file defines the DenseMap class.
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges)
Skip an InlineInfo object in the specified data at the specified offset.
Definition: InlineInfo.cpp:77
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
iterator_range< liveout_iterator > liveouts() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:416
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
bool isInvalidSingleUseProducerInst(unsigned Opc)
bool isInvalidSingleUseConsumerInst(unsigned Opc)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUInsertSingleUseVDSTID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886