Go to the documentation of this file.
27 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
30 using namespace MIPatternMatch;
41 :
B(
B), MF(
B.getMF()),
MRI(*
B.getMRI()), Helper(Helper){};
60 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC &&
"Invalid instruction!");
73 auto IsApplicableForCombine = [&MatchInfo]() ->
bool {
74 const auto Cmp1 = MatchInfo.
Cmp1;
75 const auto Cmp2 = MatchInfo.
Cmp2;
76 const auto Diff =
std::abs(Cmp2 - Cmp1);
80 if (Diff == 0 || Diff == 1)
87 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
88 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
96 return IsApplicableForCombine();
104 return IsApplicableForCombine();
122 assert(
MI.getParent()->getParent()->getRegInfo().getType(Src) ==
126 B.setMBB(*
MI.getParent());
127 B.setInstrAndDebugLoc(
MI);
129 auto Unmerge =
B.buildUnmerge(S32, Src);
131 assert(
MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
135 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
136 {Unmerge.getReg(0), Unmerge.getReg(1)},
MI.getFlags());
140 auto MinBoundaryDst =
B.buildConstant(S32, MinBoundary);
141 auto MaxBoundaryDst =
B.buildConstant(S32, MaxBoundary);
143 auto Bitcast =
B.buildBitcast({S32}, CvtPk);
145 auto Med3 =
B.buildInstr(
146 AMDGPU::G_AMDGPU_MED3, {S32},
147 {MinBoundaryDst.getReg(0),
Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
150 B.buildTrunc(
MI.getOperand(0).getReg(), Med3);
152 MI.eraseFromParent();
164 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
167 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
168 #include "AMDGPUGenPreLegalizeGICombiner.inc"
169 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
172 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
173 #include "AMDGPUGenPreLegalizeGICombiner.inc"
174 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
176 class AMDGPUPreLegalizerCombinerInfo final :
public CombinerInfo {
181 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
183 AMDGPUPreLegalizerCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
186 nullptr, EnableOpt, OptSize, MinSize),
188 if (!GeneratedRuleCfg.parseCommandLineOption())
201 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
204 if (Generated.tryCombineAll(Observer,
MI,
B, Helper))
207 switch (
MI.getOpcode()) {
208 case TargetOpcode::G_CONCAT_VECTORS:
209 return Helper.tryCombineConcatVectors(
MI);
210 case TargetOpcode::G_SHUFFLE_VECTOR:
211 return Helper.tryCombineShuffleVector(
MI);
217 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
218 #include "AMDGPUGenPreLegalizeGICombiner.inc"
219 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
228 AMDGPUPreLegalizerCombiner(
bool IsOptNone =
false);
231 return "AMDGPUPreLegalizerCombiner";
242 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
258 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(
bool IsOptNone)
263 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
265 MachineFunctionProperties::Property::FailedISel))
267 auto *TPC = &getAnalysis<TargetPassConfig>();
271 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
273 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
274 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
275 F.hasMinSize(), KB, MDT);
278 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
279 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
282 return C.combineMachineInstrs(MF, CSEInfo);
287 "Combine AMDGPU machine instrs before legalization",
297 return new AMDGPUPreLegalizerCombiner(IsOptNone);
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, ClampI64ToI16MatchInfo &MatchInfo)
bool hasProperty(Property P) const
MachineRegisterInfo & MRI
AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
The actual analysis pass wrapper.
operand_type_match m_Reg()
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
(vector float) vec_cmpeq(*A, *B) C
AMDGPUPreLegalizerCombinerHelper & PreLegalizerHelper
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Target-Independent Code Generator Pass Configuration Options.
Helper class to build MachineInstr.
Representation of each machine instruction.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@ Bitcast
Perform the operation on a different, but equivalently sized type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Abstract class that contains various methods for clients to notify about changes.
AMDGPUPreLegalizerCombinerHelperState(CombinerHelper &Helper, AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Combine AMDGPU machine instrs before legalization
Simple wrapper that does the following.
ConstantMatch m_ICst(int64_t &Cst)
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Align max(MaybeAlign Lhs, Align Rhs)
FunctionPass class - This class is used to implement most global optimizations.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
AnalysisUsage & addRequired()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
APFloat abs(APFloat X)
Returns the absolute value of the argument.
INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs before legalization", false, false) INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner
coro Split coroutine into a set of functions driving its state machine