30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
44class AMDGPURegBankCombinerImpl :
public Combiner {
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
54 AMDGPURegBankCombinerImpl(
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
61 static const char *
getName() {
return "AMDGPURegBankCombinerImpl"; }
69 unsigned Min, Max, Med;
72 struct Med3MatchInfo {
77 MinMaxMedOpc getMinMaxPair(
unsigned Opc)
const;
79 template <
class m_Cst,
typename CstTy>
81 Register &Val, CstTy &K0, CstTy &K1)
const;
83 bool matchIntMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
84 bool matchFPMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
99 bool getDX10Clamp()
const;
104#define GET_GICOMBINER_CLASS_MEMBERS
105#define AMDGPUSubtarget GCNSubtarget
106#include "AMDGPUGenRegBankGICombiner.inc"
107#undef GET_GICOMBINER_CLASS_MEMBERS
108#undef AMDGPUSubtarget
111#define GET_GICOMBINER_IMPL
112#define AMDGPUSubtarget GCNSubtarget
113#include "AMDGPUGenRegBankGICombiner.inc"
114#undef AMDGPUSubtarget
115#undef GET_GICOMBINER_IMPL
117AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
120 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
122 :
Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
123 RBI(*STI.getRegBankInfo()),
TRI(*STI.getRegisterInfo()),
124 TII(*STI.getInstrInfo()),
125 Helper(Observer,
B,
false, &VT, MDT, LI),
127#include
"AMDGPUGenRegBankGICombiner.inc"
132bool AMDGPURegBankCombinerImpl::isVgprRegBank(
Register Reg)
const {
137 if (isVgprRegBank(
Reg))
141 for (MachineInstr &Use :
MRI.use_instructions(
Reg)) {
143 if (
Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
149 MRI.setRegBank(VgprReg, RBI.
getRegBank(AMDGPU::VGPRRegBankID));
153AMDGPURegBankCombinerImpl::MinMaxMedOpc
154AMDGPURegBankCombinerImpl::getMinMaxPair(
unsigned Opc)
const {
160 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
163 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
164 case AMDGPU::G_FMAXNUM:
165 case AMDGPU::G_FMINNUM:
166 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
167 case AMDGPU::G_FMAXNUM_IEEE:
168 case AMDGPU::G_FMINNUM_IEEE:
169 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
170 AMDGPU::G_AMDGPU_FMED3};
174template <
class m_Cst,
typename CstTy>
175bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &
MI,
176 MachineRegisterInfo &
MRI,
178 CstTy &K0, CstTy &K1)
const {
196bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
197 MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const {
199 if (!isVgprRegBank(Dst))
203 LLT Ty =
MRI.getType(Dst);
207 MinMaxMedOpc OpcodeTriple = getMinMaxPair(
MI.getOpcode());
209 std::optional<ValueAndVReg> K0, K1;
211 if (!matchMed<GCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
214 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
216 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
219 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
241bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
242 MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const {
244 LLT Ty =
MRI.getType(Dst);
250 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
253 std::optional<FPValueAndVReg> K0, K1;
255 if (!matchMed<GFCstAndRegMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
258 if (K0->Value > K1->Value)
270 if ((!
MRI.hasOneNonDBGUse(K0->VReg) ||
TII.isInlineConstant(K0->Value)) &&
271 (!
MRI.hasOneNonDBGUse(K1->VReg) ||
TII.isInlineConstant(K1->Value))) {
272 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
280bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &
MI,
283 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
285 std::optional<FPValueAndVReg> K0, K1;
287 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI,
MRI, OpcodeTriple, Val, K0, K1))
290 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
297 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
316bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &
MI,
324 if (isFCst(Src0) && !isFCst(Src1))
326 if (isFCst(Src1) && !isFCst(Src2))
328 if (isFCst(Src0) && !isFCst(Src1))
335 auto isOp3Zero = [&]() {
337 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
346 (getIEEE() && getDX10Clamp() &&
355void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &
MI,
357 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {
Reg},
359 MI.eraseFromParent();
362void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &
MI,
363 Med3MatchInfo &MatchInfo)
const {
364 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
365 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
366 getAsVgpr(MatchInfo.Val2)},
368 MI.eraseFromParent();
371void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
372 MachineInstr &
MI, MachineInstr &Ext)
const {
373 unsigned ShOpc =
MI.getOpcode();
374 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
375 ShOpc == AMDGPU::G_ASHR);
376 assert(
Ext.getOpcode() == AMDGPU::G_ZEXT);
382 LLT ExtAmtTy =
MRI.getType(
Ext.getOperand(0).getReg());
383 LLT AmtTy =
MRI.getType(AmtReg);
385 auto &RB = *
MRI.getRegBank(AmtReg);
387 auto NewExt =
B.buildAnyExt(ExtAmtTy, AmtReg);
388 auto Mask =
B.buildConstant(
390 auto And =
B.buildAnd(ExtAmtTy, NewExt, Mask);
391 B.buildInstr(ShOpc, {ShDst}, {ShSrc,
And});
393 MRI.setRegBank(NewExt.getReg(0), RB);
394 MRI.setRegBank(
Mask.getReg(0), RB);
395 MRI.setRegBank(
And.getReg(0), RB);
396 MI.eraseFromParent();
399bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &
MI)
const {
401 MachineInstr *
Load, *SextLoad;
402 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;
403 const int64_t CleanHi16 = 0x000000000000FFFF;
411 if (
Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
412 const MachineMemOperand *MMO = *
Load->memoperands_begin();
415 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8,
MI, Load, Dst);
417 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO,
MI, Load, Dst);
424 if (SextLoad->
getOpcode() != AMDGPU::G_SEXTLOAD)
431 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8,
MI, SextLoad, Dst);
443 if (
Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
444 const MachineMemOperand *MMO = *
Load->memoperands_begin();
447 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8,
MI, Load, Dst);
449 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI,
MI, Load, Dst);
456 if (SextLoad->
getOpcode() != AMDGPU::G_SEXTLOAD)
462 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8,
MI, SextLoad, Dst);
471bool AMDGPURegBankCombinerImpl::applyD16Load(
472 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
473 Register SrcReg32ToOverwriteD16)
const {
481SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode()
const {
482 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
485bool AMDGPURegBankCombinerImpl::getIEEE()
const {
return getMode().IEEE; }
487bool AMDGPURegBankCombinerImpl::getDX10Clamp()
const {
488 return getMode().DX10Clamp;
491bool AMDGPURegBankCombinerImpl::isFminnumIeee(
const MachineInstr &
MI)
const {
492 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
495bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *
MI)
const {
496 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
499bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
500 MachineInstr *K1)
const {
501 if (isFCst(K0) && isFCst(K1)) {
513class AMDGPURegBankCombiner :
public MachineFunctionPass {
517 AMDGPURegBankCombiner(
bool IsOptNone =
false);
519 StringRef getPassName()
const override {
return "AMDGPURegBankCombiner"; }
521 bool runOnMachineFunction(MachineFunction &MF)
override;
523 void getAnalysisUsage(AnalysisUsage &AU)
const override;
527 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
531void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU)
const {
535 AU.
addRequired<GISelValueTrackingAnalysisLegacy>();
544AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
545 : MachineFunctionPass(
ID), IsOptNone(IsOptNone) {
546 if (!RuleConfig.parseCommandLineOption())
553 auto *TPC = &getAnalysis<TargetPassConfig>();
560 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
562 const auto *LI =
ST.getLegalizerInfo();
565 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
568 LI, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
570 CInfo.MaxIterations = 1;
574 CInfo.EnableFullDCE =
false;
575 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *VT,
nullptr,
576 RuleConfig, ST, MDT, LI);
577 return Impl.combineMachineInstrs();
580char AMDGPURegBankCombiner::ID = 0;
582 "Combine AMDGPU machine instrs after regbankselect",
587 "Combine AMDGPU machine instrs after regbankselect",
false,
591 return new AMDGPURegBankCombiner(IsOptNone);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
TypeSize getValue() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
NodeAddr< DefNode * > Def
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...