Go to the documentation of this file.
29 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
32 using namespace MIPatternMatch;
44 :
B(
B), MF(
B.getMF()),
MRI(*
B.getMRI()), Helper(Helper){};
63 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC &&
"Invalid instruction!");
76 auto IsApplicableForCombine = [&MatchInfo]() ->
bool {
77 const auto Cmp1 = MatchInfo.
Cmp1;
78 const auto Cmp2 = MatchInfo.
Cmp2;
79 const auto Diff =
std::abs(Cmp2 - Cmp1);
83 if (Diff == 0 || Diff == 1)
90 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
91 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
99 return IsApplicableForCombine();
107 return IsApplicableForCombine();
125 assert(
MI.getParent()->getParent()->getRegInfo().getType(Src) ==
129 B.setInstrAndDebugLoc(
MI);
131 auto Unmerge =
B.buildUnmerge(S32, Src);
133 assert(
MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
137 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
138 {Unmerge.getReg(0), Unmerge.getReg(1)},
MI.getFlags());
142 auto MinBoundaryDst =
B.buildConstant(S32, MinBoundary);
143 auto MaxBoundaryDst =
B.buildConstant(S32, MaxBoundary);
145 auto Bitcast =
B.buildBitcast({S32}, CvtPk);
147 auto Med3 =
B.buildInstr(
148 AMDGPU::G_AMDGPU_SMED3, {S32},
149 {MinBoundaryDst.getReg(0),
Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
152 B.buildTrunc(
MI.getOperand(0).getReg(), Med3);
154 MI.eraseFromParent();
166 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
169 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
170 #include "AMDGPUGenPreLegalizeGICombiner.inc"
171 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
174 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
175 #include "AMDGPUGenPreLegalizeGICombiner.inc"
176 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
178 class AMDGPUPreLegalizerCombinerInfo final :
public CombinerInfo {
183 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
185 AMDGPUPreLegalizerCombinerInfo(
bool EnableOpt,
bool OptSize,
bool MinSize,
188 nullptr, EnableOpt, OptSize, MinSize),
190 if (!GeneratedRuleCfg.parseCommandLineOption())
203 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
206 if (Generated.tryCombineAll(Observer,
MI,
B))
209 switch (
MI.getOpcode()) {
210 case TargetOpcode::G_CONCAT_VECTORS:
211 return Helper.tryCombineConcatVectors(
MI);
212 case TargetOpcode::G_SHUFFLE_VECTOR:
213 return Helper.tryCombineShuffleVector(
MI);
219 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
220 #include "AMDGPUGenPreLegalizeGICombiner.inc"
221 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
230 AMDGPUPreLegalizerCombiner(
bool IsOptNone =
false);
233 return "AMDGPUPreLegalizerCombiner";
244 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
260 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(
bool IsOptNone)
265 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
267 MachineFunctionProperties::Property::FailedISel))
269 auto *TPC = &getAnalysis<TargetPassConfig>();
273 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
275 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
276 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt,
F.hasOptSize(),
277 F.hasMinSize(), KB, MDT);
280 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
281 auto *CSEInfo = &
Wrapper.get(TPC->getCSEConfig());
284 return C.combineMachineInstrs(MF, CSEInfo);
289 "Combine AMDGPU machine instrs before legalization",
299 return new AMDGPUPreLegalizerCombiner(IsOptNone);
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, ClampI64ToI16MatchInfo &MatchInfo)
AMDGPUCombinerHelper & Helper
bool hasProperty(Property P) const
MachineRegisterInfo & MRI
This is an optimization pass for GlobalISel generic memory operations.
The actual analysis pass wrapper.
operand_type_match m_Reg()
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
@ Bitcast
Perform the operation on a different, but equivalently sized type.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
(vector float) vec_cmpeq(*A, *B) C
AMDGPUPreLegalizerCombinerHelper & PreLegalizerHelper
Represent the analysis usage information of a pass.
const MachineFunctionProperties & getProperties() const
Get the function properties.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options.
Helper class to build MachineInstr.
Representation of each machine instruction.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
AMDGPUCombinerHelper & Helper
void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Abstract class that contains various methods for clients to notify about changes.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
Function & getFunction()
Return the LLVM function that this machine code represents.
ConstantMatch< APInt > m_ICst(APInt &Cst)
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Combine AMDGPU machine instrs before legalization
Simple wrapper that does the following.
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Align max(MaybeAlign Lhs, Align Rhs)
AMDGPUPreLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
FunctionPass class - This class is used to implement most global optimizations.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
AnalysisUsage & addRequired()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
APFloat abs(APFloat X)
Returns the absolute value of the argument.
INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs before legalization", false, false) INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner