30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
44class AMDGPURegBankCombinerImpl :
public Combiner {
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
54 AMDGPURegBankCombinerImpl(
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
61 static const char *
getName() {
return "AMDGPURegBankCombinerImpl"; }
69 unsigned Min, Max, Med;
72 struct Med3MatchInfo {
77 struct MinMaxToMinMax3MatchInfo {
82 MinMaxMedOpc getMinMaxPair(
unsigned Opc)
const;
84 template <
class m_Cst,
typename CstTy>
86 Register &Val, CstTy &K0, CstTy &K1)
const;
88 bool matchIntMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
89 bool matchFPMinMaxToMed3(
MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const;
102 MinMaxToMinMax3MatchInfo &MatchInfo)
const;
104 MinMaxToMinMax3MatchInfo &MatchInfo)
const;
108 bool getIEEE()
const;
109 bool getDX10Clamp()
const;
114#define GET_GICOMBINER_CLASS_MEMBERS
115#define AMDGPUSubtarget GCNSubtarget
116#include "AMDGPUGenRegBankGICombiner.inc"
117#undef GET_GICOMBINER_CLASS_MEMBERS
118#undef AMDGPUSubtarget
121#define GET_GICOMBINER_IMPL
122#define AMDGPUSubtarget GCNSubtarget
123#include "AMDGPUGenRegBankGICombiner.inc"
124#undef AMDGPUSubtarget
125#undef GET_GICOMBINER_IMPL
127AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
130 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
132 :
Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
133 RBI(*STI.getRegBankInfo()),
TRI(*STI.getRegisterInfo()),
134 TII(*STI.getInstrInfo()),
135 Helper(Observer,
B,
false, &VT, MDT, LI),
137#include
"AMDGPUGenRegBankGICombiner.inc"
142bool AMDGPURegBankCombinerImpl::isVgprRegBank(
Register Reg)
const {
147 if (isVgprRegBank(
Reg))
151 for (MachineInstr &Use : MRI.use_instructions(
Reg)) {
153 if (
Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
159 MRI.setRegBank(VgprReg, RBI.
getRegBank(AMDGPU::VGPRRegBankID));
163AMDGPURegBankCombinerImpl::MinMaxMedOpc
164AMDGPURegBankCombinerImpl::getMinMaxPair(
unsigned Opc)
const {
170 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
173 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
174 case AMDGPU::G_FMAXNUM:
175 case AMDGPU::G_FMINNUM:
176 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
177 case AMDGPU::G_FMAXNUM_IEEE:
178 case AMDGPU::G_FMINNUM_IEEE:
179 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
180 AMDGPU::G_AMDGPU_FMED3};
184template <
class m_Cst,
typename CstTy>
185bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &
MI,
186 MachineRegisterInfo &MRI,
188 CstTy &K0, CstTy &K1)
const {
206bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
207 MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const {
209 if (!isVgprRegBank(Dst))
217 MinMaxMedOpc OpcodeTriple = getMinMaxPair(
MI.getOpcode());
219 std::optional<ValueAndVReg> K0, K1;
221 if (!matchMed<GCstAndRegMatch>(
MI, MRI, OpcodeTriple, Val, K0, K1))
224 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
226 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
229 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
251bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
252 MachineInstr &
MI, Med3MatchInfo &MatchInfo)
const {
260 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
263 std::optional<FPValueAndVReg> K0, K1;
265 if (!matchMed<GFCstAndRegMatch>(
MI, MRI, OpcodeTriple, Val, K0, K1))
268 if (K0->Value > K1->Value)
278 if ((getIEEE() && isFminnumIeee(
MI)) || VT->isKnownNeverNaN(Dst)) {
282 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
290bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &
MI,
293 auto OpcodeTriple = getMinMaxPair(
MI.getOpcode());
295 std::optional<FPValueAndVReg> K0, K1;
297 if (!matchMed<GFCstOrSplatGFCstMatch>(
MI, MRI, OpcodeTriple, Val, K0, K1))
300 if (!K0->Value.isPosZero() || !K1->Value.isOne())
307 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(
MI) &&
308 VT->isKnownNeverSNaN(Val)) ||
309 VT->isKnownNeverNaN(
MI.getOperand(0).getReg())) {
326bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &
MI,
334 if (isFCst(Src0) && !isFCst(Src1))
336 if (isFCst(Src1) && !isFCst(Src2))
338 if (isFCst(Src0) && !isFCst(Src1))
345 auto isOp3Zero = [&]() {
347 if (Op3->
getOpcode() == TargetOpcode::G_FCONSTANT)
355 if (VT->isKnownNeverNaN(
MI.getOperand(0).getReg()) ||
356 (getIEEE() && getDX10Clamp() &&
357 (VT->isKnownNeverSNaN(Val) || isOp3Zero()))) {
365void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &
MI,
367 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {
MI.getOperand(0)}, {
Reg},
369 MI.eraseFromParent();
372void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &
MI,
373 Med3MatchInfo &MatchInfo)
const {
374 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
375 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
376 getAsVgpr(MatchInfo.Val2)},
378 MI.eraseFromParent();
381void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
382 MachineInstr &
MI, MachineInstr &Ext)
const {
383 unsigned ShOpc =
MI.getOpcode();
384 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
385 ShOpc == AMDGPU::G_ASHR);
393 LLT AmtTy = MRI.
getType(AmtReg);
397 auto NewExt =
B.buildAnyExt(ExtAmtTy, AmtReg);
398 auto Mask =
B.buildConstant(
400 auto And =
B.buildAnd(ExtAmtTy, NewExt, Mask);
401 B.buildInstr(ShOpc, {ShDst}, {ShSrc,
And});
406 MI.eraseFromParent();
409bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &
MI)
const {
411 MachineInstr *
Load, *SextLoad;
412 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;
413 const int64_t CleanHi16 = 0x000000000000FFFF;
421 if (
Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
422 const MachineMemOperand *MMO = *
Load->memoperands_begin();
425 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8,
MI, Load, Dst);
427 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO,
MI, Load, Dst);
440 if (SextLoad->
getOpcode() != AMDGPU::G_SEXTLOAD)
447 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8,
MI, SextLoad, Dst);
459 if (
Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
460 const MachineMemOperand *MMO = *
Load->memoperands_begin();
463 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8,
MI, Load, Dst);
465 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI,
MI, Load, Dst);
478 if (SextLoad->
getOpcode() != AMDGPU::G_SEXTLOAD)
485 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8,
MI, SextLoad, Dst);
494void AMDGPURegBankCombinerImpl::applyMinMaxToMinMax3(
495 MachineInstr &
MI, MinMaxToMinMax3MatchInfo &MatchInfo)
const {
496 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
497 {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2},
MI.getFlags());
498 MI.eraseFromParent();
504bool AMDGPURegBankCombinerImpl::matchMinMaxToMinMax3(
505 MachineInstr &
MI, MinMaxToMinMax3MatchInfo &MatchInfo)
const {
510 if (!(isVgprRegBank(Dst) && isVgprRegBank(Src1) && isVgprRegBank(Src2))) {
515 unsigned Opc =
MI.getOpcode();
528 unsigned AMDGPUOpc = 0;
531 AMDGPUOpc = AMDGPU::G_AMDGPU_SMAX3;
534 AMDGPUOpc = AMDGPU::G_AMDGPU_SMIN3;
537 AMDGPUOpc = AMDGPU::G_AMDGPU_UMAX3;
540 AMDGPUOpc = AMDGPU::G_AMDGPU_UMIN3;
542 case AMDGPU::G_FMAXNUM:
543 case AMDGPU::G_FMAXNUM_IEEE:
544 AMDGPUOpc = AMDGPU::G_AMDGPU_FMAX3;
546 case AMDGPU::G_FMINNUM:
547 case AMDGPU::G_FMINNUM_IEEE:
548 AMDGPUOpc = AMDGPU::G_AMDGPU_FMIN3;
550 case AMDGPU::G_FMAXIMUM:
551 case AMDGPU::G_FMAXIMUMNUM:
552 AMDGPUOpc = AMDGPU::G_AMDGPU_FMAXIMUM3;
554 case AMDGPU::G_FMINIMUM:
555 case AMDGPU::G_FMINIMUMNUM:
556 AMDGPUOpc = AMDGPU::G_AMDGPU_FMINIMUM3;
562 MatchInfo = {AMDGPUOpc, R0, R1,
R2};
566bool AMDGPURegBankCombinerImpl::applyD16Load(
567 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
568 Register SrcReg32ToOverwriteD16)
const {
576SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode()
const {
577 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
580bool AMDGPURegBankCombinerImpl::getIEEE()
const {
return getMode().IEEE; }
582bool AMDGPURegBankCombinerImpl::getDX10Clamp()
const {
583 return getMode().DX10Clamp;
586bool AMDGPURegBankCombinerImpl::isFminnumIeee(
const MachineInstr &
MI)
const {
587 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
590bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *
MI)
const {
591 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
594bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
595 MachineInstr *K1)
const {
596 if (isFCst(K0) && isFCst(K1)) {
608class AMDGPURegBankCombiner :
public MachineFunctionPass {
612 AMDGPURegBankCombiner(
bool IsOptNone =
false);
614 StringRef getPassName()
const override {
return "AMDGPURegBankCombiner"; }
616 bool runOnMachineFunction(MachineFunction &MF)
override;
618 void getAnalysisUsage(AnalysisUsage &AU)
const override;
622 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
626void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU)
const {
629 AU.
addRequired<GISelValueTrackingAnalysisLegacy>();
638AMDGPURegBankCombiner::AMDGPURegBankCombiner(
bool IsOptNone)
639 : MachineFunctionPass(
ID), IsOptNone(IsOptNone) {
640 if (!RuleConfig.parseCommandLineOption())
653 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
655 const auto *LI =
ST.getLegalizerInfo();
658 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
661 LI, EnableOpt,
F.hasOptSize(),
F.hasMinSize());
663 CInfo.MaxIterations = 1;
667 CInfo.EnableFullDCE =
false;
668 AMDGPURegBankCombinerImpl Impl(MF, CInfo, *VT,
nullptr,
669 RuleConfig, ST, MDT, LI);
670 return Impl.combineMachineInstrs();
673char AMDGPURegBankCombiner::ID = 0;
675 "Combine AMDGPU machine instrs after regbankselect",
679 "Combine AMDGPU machine instrs after regbankselect",
false,
683 return new AMDGPURegBankCombiner(IsOptNone);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
bool isPosZero() const
Return true if the value is positive zero.
bool isOne() const
Returns true if this value is exactly +1.0.
FunctionPass class - This class is used to implement most global optimizations.
bool hasMin3Max3_16() const
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
TypeSize getValue() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const RegisterBank * getRegBank(Register Reg) const
Return the register bank of Reg.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
And< Preds... > m_all_of(Preds &&... preds)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
@ And
Bitwise or logical AND of integers.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...