LLVM 19.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 // TODO: Make CombinerHelper methods const.
49 mutable AMDGPUCombinerHelper Helper;
50
51public:
52 AMDGPUPreLegalizerCombinerImpl(
53 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
54 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
55 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
56 const GCNSubtarget &STI, MachineDominatorTree *MDT,
57 const LegalizerInfo *LI);
58
59 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
60
61 bool tryCombineAllImpl(MachineInstr &MI) const;
62 bool tryCombineAll(MachineInstr &I) const override;
63
64 struct ClampI64ToI16MatchInfo {
65 int64_t Cmp1 = 0;
66 int64_t Cmp2 = 0;
67 Register Origin;
68 };
69
70 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
71 const MachineFunction &MF,
72 ClampI64ToI16MatchInfo &MatchInfo) const;
73
74 void applyClampI64ToI16(MachineInstr &MI,
75 const ClampI64ToI16MatchInfo &MatchInfo) const;
76
77private:
78#define GET_GICOMBINER_CLASS_MEMBERS
79#define AMDGPUSubtarget GCNSubtarget
80#include "AMDGPUGenPreLegalizeGICombiner.inc"
81#undef GET_GICOMBINER_CLASS_MEMBERS
82#undef AMDGPUSubtarget
83};
84
85#define GET_GICOMBINER_IMPL
86#define AMDGPUSubtarget GCNSubtarget
87#include "AMDGPUGenPreLegalizeGICombiner.inc"
88#undef AMDGPUSubtarget
89#undef GET_GICOMBINER_IMPL
90
91AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
92 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
93 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
94 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
95 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
96 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
97 Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
99#include "AMDGPUGenPreLegalizeGICombiner.inc"
101{
102}
103
104bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
105 if (tryCombineAllImpl(MI))
106 return true;
107
108 switch (MI.getOpcode()) {
109 case TargetOpcode::G_SHUFFLE_VECTOR:
110 return Helper.tryCombineShuffleVector(MI);
111 }
112
113 return false;
114}
115
116bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
118 ClampI64ToI16MatchInfo &MatchInfo) const {
119 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
120
121 // Try to find a pattern where an i64 value should get clamped to short.
122 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
123 if (SrcType != LLT::scalar(64))
124 return false;
125
126 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
127 if (DstType != LLT::scalar(16))
128 return false;
129
131
132 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
133 const auto Cmp1 = MatchInfo.Cmp1;
134 const auto Cmp2 = MatchInfo.Cmp2;
135 const auto Diff = std::abs(Cmp2 - Cmp1);
136
137 // If the difference between both comparison values is 0 or 1, there is no
138 // need to clamp.
139 if (Diff == 0 || Diff == 1)
140 return false;
141
142 const int64_t Min = std::numeric_limits<int16_t>::min();
143 const int64_t Max = std::numeric_limits<int16_t>::max();
144
145 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
146 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
147 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
148 };
149
150 // Try to match a combination of min / max MIR opcodes.
151 if (mi_match(MI.getOperand(1).getReg(), MRI,
152 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
153 if (mi_match(Base, MRI,
154 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
155 return IsApplicableForCombine();
156 }
157 }
158
159 if (mi_match(MI.getOperand(1).getReg(), MRI,
160 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
161 if (mi_match(Base, MRI,
162 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
163 return IsApplicableForCombine();
164 }
165 }
166
167 return false;
168}
169
170// We want to find a combination of instructions that
171// gets generated when an i64 gets clamped to i16.
172// The corresponding pattern is:
173// G_MAX / G_MAX for i16 <= G_TRUNC i64.
174// This can be efficiently written as following:
175// v_cvt_pk_i16_i32 v0, v0, v1
176// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
177void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
178 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
179
180 Register Src = MatchInfo.Origin;
181 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
182 LLT::scalar(64));
183 const LLT S32 = LLT::scalar(32);
184
185 B.setInstrAndDebugLoc(MI);
186
187 auto Unmerge = B.buildUnmerge(S32, Src);
188
189 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
190
191 const LLT V2S16 = LLT::fixed_vector(2, 16);
192 auto CvtPk =
193 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
194 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
195
196 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
197 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
198 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
199 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
200
201 auto Bitcast = B.buildBitcast({S32}, CvtPk);
202
203 auto Med3 = B.buildInstr(
204 AMDGPU::G_AMDGPU_SMED3, {S32},
205 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
206 MI.getFlags());
207
208 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
209
210 MI.eraseFromParent();
211}
212
213// Pass boilerplate
214// ================
215
216class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
217public:
218 static char ID;
219
220 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
221
222 StringRef getPassName() const override {
223 return "AMDGPUPreLegalizerCombiner";
224 }
225
226 bool runOnMachineFunction(MachineFunction &MF) override;
227
228 void getAnalysisUsage(AnalysisUsage &AU) const override;
229
230private:
231 bool IsOptNone;
232 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
233};
234} // end anonymous namespace
235
236void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
238 AU.setPreservesCFG();
242 if (!IsOptNone) {
245 }
246
250}
251
252AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
253 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
255
256 if (!RuleConfig.parseCommandLineOption())
257 report_fatal_error("Invalid rule identifier");
258}
259
260bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
261 if (MF.getProperties().hasProperty(
262 MachineFunctionProperties::Property::FailedISel))
263 return false;
264 auto *TPC = &getAnalysis<TargetPassConfig>();
265 const Function &F = MF.getFunction();
266 bool EnableOpt =
267 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
268 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
269
270 // Enable CSE.
272 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
273 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
274
275 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
277 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
278 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
279 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
280 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
281 STI, MDT, STI.getLegalizerInfo());
282 return Impl.combineMachineInstrs();
283}
284
285char AMDGPUPreLegalizerCombiner::ID = 0;
286INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
287 "Combine AMDGPU machine instrs before legalization",
288 false, false)
291INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
292 "Combine AMDGPU machine instrs before legalization", false,
293 false)
294
295namespace llvm {
297 return new AMDGPUPreLegalizerCombiner(IsOptNone);
298}
299} // end namespace llvm
unsigned const MachineRegisterInfo * MRI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
static const LLT V2S16
static const LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AMDGPU machine instrs before legalization
#define DEBUG_TYPE
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
Hexagon Vector Combine
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
Combiner implementation.
Definition: Combiner.h:34
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const LegalizerInfo * getLegalizerInfo() const override
Definition: GCNSubtarget.h:280
The actual analysis pass wrapper.
Definition: CSEInfo.h:222
Simple wrapper that does the following.
Definition: CSEInfo.h:204
The CSE Analysis object.
Definition: CSEInfo.h:69
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Representation of each machine instruction.
Definition: MachineInstr.h:69
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1140
auto instrs(const MachineBasicBlock &BB)