31#define DEBUG_TYPE "amdgpu-regbanklegalize"
34using namespace AMDGPU;
50 return "AMDGPU Register Bank Legalize";
64 MachineFunctionProperties::Property::NoPHIs);
71 "AMDGPU Register Bank Legalize",
false,
false)
78char AMDGPURegBankLegalize::
ID = 0;
83 return new AMDGPURegBankLegalize();
88 static std::mutex GlobalMutex;
91 std::lock_guard<std::mutex> Lock(GlobalMutex);
92 if (!CacheForRuleSet.
contains(ST.getGeneration())) {
93 auto Rules = std::make_unique<RegBankLegalizeRules>(ST,
MRI);
94 CacheForRuleSet[ST.getGeneration()] = std::move(Rules);
96 CacheForRuleSet[ST.getGeneration()]->refreshRefs(ST,
MRI);
98 return *CacheForRuleSet[ST.getGeneration()];
118 SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
119 VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
120 VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {};
124 if (RB && RB->
getID() == AMDGPU::VCCRegBankID)
132 MI.eraseFromParent();
148 if (!Dst.isVirtual() || !Src.isVirtual())
157 if (
isLaneMask(Dst) &&
MRI.getRegBankOrNull(Src) == SgprRB) {
158 auto [Trunc, TruncS32Src] =
tryMatch(Src, AMDGPU::G_TRUNC);
160 "sgpr S1 must be result of G_TRUNC of sgpr S32");
164 auto One = B.buildConstant({SgprRB,
S32}, 1);
165 auto BoolSrc = B.buildAnd({SgprRB,
S32}, TruncS32Src, One);
166 B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc});
175 if (
MRI.getRegBankOrNull(Dst) == VgprRB &&
176 MRI.getRegBankOrNull(Src) == SgprRB) {
177 auto [RAL, RALSrc] =
tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
181 assert(
MRI.getRegBank(RALSrc) == VgprRB);
182 MRI.replaceRegWith(Dst, RALSrc);
195 if (
MRI.getType(Src) !=
S1)
198 auto [Trunc, TruncSrc] =
tryMatch(Src, AMDGPU::G_TRUNC);
202 LLT DstTy =
MRI.getType(Dst);
203 LLT TruncSrcTy =
MRI.getType(TruncSrc);
205 if (DstTy == TruncSrcTy) {
206 MRI.replaceRegWith(Dst, TruncSrc);
213 if (DstTy ==
S32 && TruncSrcTy ==
S64) {
214 auto Unmerge = B.buildUnmerge({SgprRB,
S32}, TruncSrc);
215 MRI.replaceRegWith(Dst, Unmerge.getReg(0));
220 if (DstTy ==
S32 && TruncSrcTy ==
S16) {
221 B.buildAnyExt(Dst, TruncSrc);
226 if (DstTy ==
S16 && TruncSrcTy ==
S32) {
227 B.buildTrunc(Dst, TruncSrc);
239 for (
unsigned i = 0; i <
MRI.getNumVirtRegs(); ++i) {
241 if (
MRI.def_empty(Reg) ||
MRI.getType(Reg) !=
S1)
245 if (RB && RB->
getID() == AMDGPU::SGPRRegBankID) {
257 MachineFunctionProperties::Property::FailedISel))
263 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
269 B.setCSEInfo(&CSEInfo);
270 B.setChangeObserver(Observer);
279 getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
296 if (!
MI->isPreISelOpcode())
299 unsigned Opc =
MI->getOpcode();
301 if (Opc == AMDGPU::G_PHI) {
302 RBLHelper.applyMappingPHI(*
MI);
308 if (Opc == AMDGPU::G_BUILD_VECTOR || Opc == AMDGPU::G_UNMERGE_VALUES ||
309 Opc == AMDGPU::G_MERGE_VALUES) {
310 RBLHelper.applyMappingTrivial(*
MI);
315 if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
316 Opc == AMDGPU::G_IMPLICIT_DEF)) {
320 assert(
MRI.getRegBank(Dst)->getID() == AMDGPU::SGPRRegBankID);
327 RBLHelper.findRuleAndApplyMapping(*
MI);
357 if (
MI.getOpcode() == AMDGPU::COPY) {
361 if (
MI.getOpcode() == AMDGPU::G_ANYEXT) {
369 "Registers with sgpr reg bank and S1 LLT are not legal after "
370 "AMDGPURegBankLegalize. Should lower to sgpr S32");
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
AMDGPU Register Bank Legalize
static Register getAnySgprS1(const MachineRegisterInfo &MRI)
const RegBankLegalizeRules & getRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
#define DEBUG_TYPE
Lower G_ instructions that can't be inst-selected with register bank assignment from AMDGPURegBankSel...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.
AMD GCN specific subclass of TargetSubtarget.
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
AMDGPURegBankLegalizeCombiner(MachineIRBuilder &B, const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
bool isLaneMask(Register Reg)
void tryCombineS1AnyExt(MachineInstr &MI)
void cleanUpAfterCombine(MachineInstr &MI, MachineInstr *Optional0)
std::pair< MachineInstr *, Register > tryMatch(Register Src, unsigned Opcode)
void tryCombineCopy(MachineInstr &MI)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
Simple wrapper that does the following.
Simple wrapper observer that takes several observers, and calls each one for each event.
void addObserver(GISelChangeObserver *O)
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual MachineFunctionProperties getClearedProperties() const
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A simple RAII based Delegate installer.
A simple RAII based Observer installer.
Holds all the information related to register banks.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Target-Independent Code Generator Pass Configuration Options.
virtual std::unique_ptr< CSEConfigBase > getCSEConfig() const
Returns the CSEConfig object to use for the current optimization level.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createAMDGPURegBankLegalizePass()
char & AMDGPURegBankLegalizeID
void initializeAMDGPURegBankLegalizePass(PassRegistry &)
bool isTriviallyDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Check whether an instruction MI is dead: it only defines dead virtual registers, and doesn't have oth...