LLVM 19.0.0git
AMDGPUInsertSingleUseVDST.cpp
Go to the documentation of this file.
1//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11/// instructions that produce single-use VGPR values. If the value is forwarded
12/// to the consumer instruction prior to VGPR writeback, the hardware can
13/// then skip (kill) the VGPR write.
14//
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
18#include "GCNSubtarget.h"
20#include "SIInstrInfo.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/StringRef.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCRegister.h"
34#include "llvm/Pass.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
39
40namespace {
41class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
42private:
43 const SIInstrInfo *SII;
44
45public:
46 static char ID;
47
48 AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
49
50 void emitSingleUseVDST(MachineInstr &MI) const {
51 // Mark the following instruction as a single-use producer:
52 // s_singleuse_vdst { supr0: 1 }
53 BuildMI(*MI.getParent(), MI, DebugLoc(), SII->get(AMDGPU::S_SINGLEUSE_VDST))
54 .addImm(0x1);
55 }
56
57 bool runOnMachineFunction(MachineFunction &MF) override {
58 const auto &ST = MF.getSubtarget<GCNSubtarget>();
59 if (!ST.hasVGPRSingleUseHintInsts())
60 return false;
61
62 SII = ST.getInstrInfo();
63 const auto *TRI = &SII->getRegisterInfo();
64 bool InstructionEmitted = false;
65
66 for (MachineBasicBlock &MBB : MF) {
67 DenseMap<MCRegUnit, unsigned> RegisterUseCount;
68
69 // Handle boundaries at the end of basic block separately to avoid
70 // false positives. If they are live at the end of a basic block then
71 // assume it has more uses later on.
72 for (const auto &Liveout : MBB.liveouts()) {
73 for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
74 ++Units) {
75 const auto [Unit, Mask] = *Units;
76 if ((Mask & Liveout.LaneMask).any())
77 RegisterUseCount[Unit] = 2;
78 }
79 }
80
81 for (MachineInstr &MI : reverse(MBB.instrs())) {
82 // All registers in all operands need to be single use for an
83 // instruction to be marked as a single use producer.
84 bool AllProducerOperandsAreSingleUse = true;
85
86 // Gather a list of Registers used before updating use counts to avoid
87 // double counting registers that appear multiple times in a single
88 // MachineInstr.
89 SmallVector<MCRegUnit> RegistersUsed;
90
91 for (const auto &Operand : MI.all_defs()) {
92 const auto Reg = Operand.getReg();
93
94 const auto RegUnits = TRI->regunits(Reg);
95 if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
96 return RegisterUseCount[Unit] > 1;
97 }))
98 AllProducerOperandsAreSingleUse = false;
99
100 // Reset uses count when a register is no longer live.
101 for (const MCRegUnit Unit : RegUnits)
102 RegisterUseCount.erase(Unit);
103 }
104
105 for (const auto &Operand : MI.all_uses()) {
106 const auto Reg = Operand.getReg();
107
108 // Count the number of times each register is read.
109 for (const MCRegUnit Unit : TRI->regunits(Reg)) {
110 if (!is_contained(RegistersUsed, Unit))
111 RegistersUsed.push_back(Unit);
112 }
113 }
114 for (const MCRegUnit Unit : RegistersUsed)
115 RegisterUseCount[Unit]++;
116
117 // Do not attempt to optimise across exec mask changes.
118 if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
119 for (auto &UsedReg : RegisterUseCount)
120 UsedReg.second = 2;
121 }
122 if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
123 // TODO: Replace with candidate logging for instruction grouping
124 // later.
125 emitSingleUseVDST(MI);
126 InstructionEmitted = true;
127 }
128 }
129 }
130 return InstructionEmitted;
131 }
132};
133} // namespace
134
135char AMDGPUInsertSingleUseVDST::ID = 0;
136
137char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
138
139INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
140 "AMDGPU Insert SingleUseVDST", false, false)
MachineBasicBlock & MBB
#define DEBUG_TYPE
Provides AMDGPU specific target descriptions.
This file defines the DenseMap class.
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
Interface definition for SIInstrInfo.
This file contains some templates that are useful if you are working with the STL at all.
A debug info location.
Definition: DebugLoc.h:33
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
bool isValid() const
Returns true if this iterator is not yet at the end.
iterator_range< liveout_iterator > liveouts() const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
const SIRegisterInfo & getRegisterInfo() const
Definition: SIInstrInfo.h:222
static bool isVALU(const MachineInstr &MI)
Definition: SIInstrInfo.h:416
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & AMDGPUInsertSingleUseVDSTID
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879