LLVM 22.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38namespace {
39
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenPreLegalizeGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPUPreLegalizerCombinerImpl : public Combiner {
45protected:
46 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const AMDGPUCombinerHelper Helper;
49
50public:
51 AMDGPUPreLegalizerCombinerImpl(
52 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
53 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
54 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
55 const GCNSubtarget &STI, MachineDominatorTree *MDT,
56 const LegalizerInfo *LI);
57
58 static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
59
60 bool tryCombineAllImpl(MachineInstr &MI) const;
61 bool tryCombineAll(MachineInstr &I) const override;
62
63 struct ClampI64ToI16MatchInfo {
64 int64_t Cmp1 = 0;
65 int64_t Cmp2 = 0;
66 Register Origin;
67 };
68
69 bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
70 const MachineFunction &MF,
71 ClampI64ToI16MatchInfo &MatchInfo) const;
72
73 void applyClampI64ToI16(MachineInstr &MI,
74 const ClampI64ToI16MatchInfo &MatchInfo) const;
75
76private:
77#define GET_GICOMBINER_CLASS_MEMBERS
78#define AMDGPUSubtarget GCNSubtarget
79#include "AMDGPUGenPreLegalizeGICombiner.inc"
80#undef GET_GICOMBINER_CLASS_MEMBERS
81#undef AMDGPUSubtarget
82};
83
84#define GET_GICOMBINER_IMPL
85#define AMDGPUSubtarget GCNSubtarget
86#include "AMDGPUGenPreLegalizeGICombiner.inc"
87#undef AMDGPUSubtarget
88#undef GET_GICOMBINER_IMPL
89
90AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
91 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
92 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
93 const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
94 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
95 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
96 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI, STI),
98#include "AMDGPUGenPreLegalizeGICombiner.inc"
100{
101}
102
103bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
104 if (tryCombineAllImpl(MI))
105 return true;
106
107 switch (MI.getOpcode()) {
108 case TargetOpcode::G_SHUFFLE_VECTOR:
109 return Helper.tryCombineShuffleVector(MI);
110 }
111
112 return false;
113}
114
115bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
116 MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
117 ClampI64ToI16MatchInfo &MatchInfo) const {
118 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
119
120 // Try to find a pattern where an i64 value should get clamped to short.
121 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
122 if (SrcType != LLT::scalar(64))
123 return false;
124
125 const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
126 if (DstType != LLT::scalar(16))
127 return false;
128
130
131 auto IsApplicableForCombine = [&MatchInfo]() -> bool {
132 const auto Cmp1 = MatchInfo.Cmp1;
133 const auto Cmp2 = MatchInfo.Cmp2;
134 const auto Diff = std::abs(Cmp2 - Cmp1);
135
136 // If the difference between both comparison values is 0 or 1, there is no
137 // need to clamp.
138 if (Diff == 0 || Diff == 1)
139 return false;
140
141 const int64_t Min = std::numeric_limits<int16_t>::min();
142 const int64_t Max = std::numeric_limits<int16_t>::max();
143
144 // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
145 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
146 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
147 };
148
149 // Try to match a combination of min / max MIR opcodes.
150 if (mi_match(MI.getOperand(1).getReg(), MRI,
151 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
152 if (mi_match(Base, MRI,
153 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
154 return IsApplicableForCombine();
155 }
156 }
157
158 if (mi_match(MI.getOperand(1).getReg(), MRI,
159 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
160 if (mi_match(Base, MRI,
161 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
162 return IsApplicableForCombine();
163 }
164 }
165
166 return false;
167}
168
169// We want to find a combination of instructions that
170// gets generated when an i64 gets clamped to i16.
171// The corresponding pattern is:
172// G_MAX / G_MAX for i16 <= G_TRUNC i64.
173// This can be efficiently written as following:
174// v_cvt_pk_i16_i32 v0, v0, v1
175// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
176void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
177 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
178
179 Register Src = MatchInfo.Origin;
180 assert(MI.getMF()->getRegInfo().getType(Src) == LLT::scalar(64));
181 const LLT S32 = LLT::scalar(32);
182
183 auto Unmerge = B.buildUnmerge(S32, Src);
184
185 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
186
187 const LLT V2S16 = LLT::fixed_vector(2, 16);
188 auto CvtPk =
189 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
190 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
191
192 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
193 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
194 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
195 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
196
197 auto Bitcast = B.buildBitcast({S32}, CvtPk);
198
199 auto Med3 = B.buildInstr(
200 AMDGPU::G_AMDGPU_SMED3, {S32},
201 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
202 MI.getFlags());
203
204 B.buildTrunc(MI.getOperand(0).getReg(), Med3);
205
206 MI.eraseFromParent();
207}
208
209// Pass boilerplate
210// ================
211
212class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
213public:
214 static char ID;
215
216 AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
217
218 StringRef getPassName() const override {
219 return "AMDGPUPreLegalizerCombiner";
220 }
221
222 bool runOnMachineFunction(MachineFunction &MF) override;
223
224 void getAnalysisUsage(AnalysisUsage &AU) const override;
225
226private:
227 bool IsOptNone;
228 AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
229};
230} // end anonymous namespace
231
232void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
233 AU.addRequired<TargetPassConfig>();
234 AU.setPreservesCFG();
236 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
237 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
238 if (!IsOptNone) {
239 AU.addRequired<MachineDominatorTreeWrapperPass>();
240 AU.addPreserved<MachineDominatorTreeWrapperPass>();
241 }
242
243 AU.addRequired<GISelCSEAnalysisWrapperPass>();
244 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
246}
247
248AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
249 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
250 if (!RuleConfig.parseCommandLineOption())
251 report_fatal_error("Invalid rule identifier");
252}
253
254bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
255 if (MF.getProperties().hasFailedISel())
256 return false;
257 auto *TPC = &getAnalysis<TargetPassConfig>();
258 const Function &F = MF.getFunction();
259 bool EnableOpt =
260 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
262 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
263
264 // Enable CSE.
266 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
267 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
268
269 const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>();
271 IsOptNone ? nullptr
272 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
273 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
274 nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
275 // Disable fixed-point iteration to reduce compile-time
276 CInfo.MaxIterations = 1;
277 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
278 // This is the first Combiner, so the input IR might contain dead
279 // instructions.
280 CInfo.EnableFullDCE = true;
281 AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *VT, CSEInfo, RuleConfig,
282 STI, MDT, STI.getLegalizerInfo());
283 return Impl.combineMachineInstrs();
284}
285
286char AMDGPUPreLegalizerCombiner::ID = 0;
287INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
288 "Combine AMDGPU machine instrs before legalization",
289 false, false)
292INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
293 "Combine AMDGPU machine instrs before legalization", false,
294 false)
295
297 return new AMDGPUPreLegalizerCombiner(IsOptNone);
298}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
This contains common combine transformations that may be used in a combine pass.
constexpr LLT V2S16
constexpr LLT S32
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
Combiner implementation.
Definition Combiner.h:34
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const LegalizerInfo * getLegalizerInfo() const override
Simple wrapper that does the following.
Definition CSEInfo.h:211
The CSE Analysis object.
Definition CSEInfo.h:71
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Bitcast
Perform the operation on a different, but equivalently sized type.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1184
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...