LLVM  16.0.0git
AMDGPUPreLegalizerCombiner.cpp
Go to the documentation of this file.
1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
28 
29 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
30 
31 using namespace llvm;
32 using namespace MIPatternMatch;
33 
35 protected:
40 
41 public:
43  AMDGPUCombinerHelper &Helper)
44  : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
45 
47  int64_t Cmp1 = 0;
48  int64_t Cmp2 = 0;
50  };
51 
52  bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
53  MachineFunction &MF,
54  ClampI64ToI16MatchInfo &MatchInfo);
55 
56  void applyClampI64ToI16(MachineInstr &MI,
57  const ClampI64ToI16MatchInfo &MatchInfo);
58 };
59 
62  ClampI64ToI16MatchInfo &MatchInfo) {
63  assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
64 
65  // Try to find a pattern where an i64 value should get clamped to short.
66  const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
67  if (SrcType != LLT::scalar(64))
68  return false;
69 
70  const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
71  if (DstType != LLT::scalar(16))
72  return false;
73 
74  Register Base;
75 
76  auto IsApplicableForCombine = [&MatchInfo]() -> bool {
77  const auto Cmp1 = MatchInfo.Cmp1;
78  const auto Cmp2 = MatchInfo.Cmp2;
79  const auto Diff = std::abs(Cmp2 - Cmp1);
80 
81  // If the difference between both comparison values is 0 or 1, there is no
82  // need to clamp.
83  if (Diff == 0 || Diff == 1)
84  return false;
85 
86  const int64_t Min = std::numeric_limits<int16_t>::min();
87  const int64_t Max = std::numeric_limits<int16_t>::max();
88 
89  // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
90  return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
91  (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
92  };
93 
94  // Try to match a combination of min / max MIR opcodes.
95  if (mi_match(MI.getOperand(1).getReg(), MRI,
96  m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
97  if (mi_match(Base, MRI,
98  m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
99  return IsApplicableForCombine();
100  }
101  }
102 
103  if (mi_match(MI.getOperand(1).getReg(), MRI,
104  m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
105  if (mi_match(Base, MRI,
106  m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
107  return IsApplicableForCombine();
108  }
109  }
110 
111  return false;
112 }
113 
114 // We want to find a combination of instructions that
115 // gets generated when an i64 gets clamped to i16.
116 // The corresponding pattern is:
117 // G_MAX / G_MAX for i16 <= G_TRUNC i64.
118 // This can be efficiently written as following:
119 // v_cvt_pk_i16_i32 v0, v0, v1
120 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
122  MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
123 
124  Register Src = MatchInfo.Origin;
125  assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
126  LLT::scalar(64));
127  const LLT S32 = LLT::scalar(32);
128 
129  B.setInstrAndDebugLoc(MI);
130 
131  auto Unmerge = B.buildUnmerge(S32, Src);
132 
133  assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
134 
135  const LLT V2S16 = LLT::fixed_vector(2, 16);
136  auto CvtPk =
137  B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16},
138  {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags());
139 
140  auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
141  auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
142  auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
143  auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
144 
145  auto Bitcast = B.buildBitcast({S32}, CvtPk);
146 
147  auto Med3 = B.buildInstr(
148  AMDGPU::G_AMDGPU_SMED3, {S32},
149  {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
150  MI.getFlags());
151 
152  B.buildTrunc(MI.getOperand(0).getReg(), Med3);
153 
154  MI.eraseFromParent();
155 }
156 
158 protected:
161 
162 public:
164  AMDGPUCombinerHelper &Helper,
165  AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
166  : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
167 };
168 
169 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
170 #include "AMDGPUGenPreLegalizeGICombiner.inc"
171 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
172 
173 namespace {
174 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
175 #include "AMDGPUGenPreLegalizeGICombiner.inc"
176 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
177 
178 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
179  GISelKnownBits *KB;
181 
182 public:
183  AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
184 
185  AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
187  : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
188  /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
189  KB(KB), MDT(MDT) {
190  if (!GeneratedRuleCfg.parseCommandLineOption())
191  report_fatal_error("Invalid rule identifier");
192  }
193 
194  bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
195  MachineIRBuilder &B) const override;
196 };
197 
199  MachineInstr &MI,
200  MachineIRBuilder &B) const {
201  const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo();
202  AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT, LI);
203  AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
204  AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
205  PreLegalizerHelper);
206 
207  if (Generated.tryCombineAll(Observer, MI, B))
208  return true;
209 
210  switch (MI.getOpcode()) {
211  case TargetOpcode::G_CONCAT_VECTORS:
212  return Helper.tryCombineConcatVectors(MI);
213  case TargetOpcode::G_SHUFFLE_VECTOR:
214  return Helper.tryCombineShuffleVector(MI);
215  }
216 
217  return false;
218 }
219 
220 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
221 #include "AMDGPUGenPreLegalizeGICombiner.inc"
222 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
223 
224 // Pass boilerplate
225 // ================
226 
227 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
228 public:
229  static char ID;
230 
231  AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
232 
233  StringRef getPassName() const override {
234  return "AMDGPUPreLegalizerCombiner";
235  }
236 
237  bool runOnMachineFunction(MachineFunction &MF) override;
238 
239  void getAnalysisUsage(AnalysisUsage &AU) const override;
240 private:
241  bool IsOptNone;
242 };
243 } // end anonymous namespace
244 
245 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
247  AU.setPreservesCFG();
251  if (!IsOptNone) {
254  }
255 
259 }
260 
261 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
262  : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
263  initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
264 }
265 
266 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
267  if (MF.getProperties().hasProperty(
268  MachineFunctionProperties::Property::FailedISel))
269  return false;
270  auto *TPC = &getAnalysis<TargetPassConfig>();
271  const Function &F = MF.getFunction();
272  bool EnableOpt =
273  MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
274  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
275  MachineDominatorTree *MDT =
276  IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
277  AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
278  F.hasMinSize(), KB, MDT);
279  // Enable CSE.
281  getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
282  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());
283 
284  Combiner C(PCInfo, TPC);
285  return C.combineMachineInstrs(MF, CSEInfo);
286 }
287 
289 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
290  "Combine AMDGPU machine instrs before legalization",
291  false, false)
294 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
295  "Combine AMDGPU machine instrs before legalization", false,
296  false)
297 
298 namespace llvm {
300  return new AMDGPUPreLegalizerCombiner(IsOptNone);
301 }
302 } // end namespace llvm
AMDGPUCombinerHelper
Definition: AMDGPUCombinerHelper.h:20
MIPatternMatch.h
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16
bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, ClampI64ToI16MatchInfo &MatchInfo)
Definition: AMDGPUPreLegalizerCombiner.cpp:60
CombinerInfo.h
AMDGPUPreLegalizerCombinerHelperState::Helper
AMDGPUCombinerHelper & Helper
Definition: AMDGPUPreLegalizerCombiner.cpp:159
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:192
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
AMDGPUPreLegalizerCombinerHelper::MRI
MachineRegisterInfo & MRI
Definition: AMDGPUPreLegalizerCombiner.cpp:38
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::GISelCSEAnalysisWrapperPass
The actual analysis pass wrapper.
Definition: CSEInfo.h:220
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:268
llvm::GISelKnownBits
Definition: GISelKnownBits.h:29
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Function
Definition: Function.h:60
AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo::Cmp2
int64_t Cmp2
Definition: AMDGPUPreLegalizerCombiner.cpp:48
Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31
llvm::logicalview::LVAttributeKind::Generated
@ Generated
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
GISelKnownBits.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::MIPatternMatch::m_GSMax
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, false > m_GSMax(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:540
AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo::Cmp1
int64_t Cmp1
Definition: AMDGPUPreLegalizerCombiner.cpp:47
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
llvm::MIPatternMatch::m_GSMin
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, false > m_GSMin(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:546
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::CombinerInfo
Definition: CombinerInfo.h:26
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
F
#define F(x, y, z)
Definition: MD5.cpp:55
CSEInfo.h
llvm::LLT::fixed_vector
static LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelTypeImpl.h:74
llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113
TargetMachine.h
GCNSubtarget.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
AMDGPUPreLegalizerCombinerHelperState::PreLegalizerHelper
AMDGPUPreLegalizerCombinerHelper & PreLegalizerHelper
Definition: AMDGPUPreLegalizerCombiner.cpp:160
AMDGPUPreLegalizerCombinerHelper::B
MachineIRBuilder & B
Definition: AMDGPUPreLegalizerCombiner.cpp:36
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:748
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
AMDGPUPreLegalizerCombinerHelper
Definition: AMDGPUPreLegalizerCombiner.cpp:34
AMDGPUPreLegalizerCombinerHelperState
Definition: AMDGPUPreLegalizerCombiner.cpp:157
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
AMDGPUPreLegalizerCombinerHelper::MF
MachineFunction & MF
Definition: AMDGPUPreLegalizerCombiner.cpp:37
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2520
AMDGPUMCTargetDesc.h
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:221
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::Combiner
Definition: Combiner.h:26
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
AMDGPUPreLegalizerCombinerHelper::AMDGPUPreLegalizerCombinerHelper
AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
Definition: AMDGPUPreLegalizerCombiner.cpp:42
TargetPassConfig.h
AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo::Origin
Register Origin
Definition: AMDGPUPreLegalizerCombiner.cpp:49
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo
Definition: AMDGPUPreLegalizerCombiner.cpp:46
llvm::MachineFunction
Definition: MachineFunction.h:257
CombinerHelper.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
AMDGPUPreLegalizerCombinerHelper::Helper
AMDGPUCombinerHelper & Helper
Definition: AMDGPUPreLegalizerCombiner.cpp:39
AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16
void applyClampI64ToI16(MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo)
Definition: AMDGPUPreLegalizerCombiner.cpp:121
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Combiner.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPreLegalizerCombiner.cpp:29
llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:92
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
legalization
Combine AMDGPU machine instrs before legalization
Definition: AMDGPUPreLegalizerCombiner.cpp:295
llvm::GISelCSEAnalysisWrapper
Simple wrapper that does the following.
Definition: CSEInfo.h:202
llvm::initializeAMDGPUPreLegalizerCombinerPass
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
AMDGPULegalizerInfo.h
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:745
AMDGPUCombinerHelper.h
llvm::createAMDGPUPreLegalizeCombiner
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPreLegalizerCombiner.cpp:299
AMDGPUPreLegalizerCombinerHelperState::AMDGPUPreLegalizerCombinerHelperState
AMDGPUPreLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
Definition: AMDGPUPreLegalizerCombiner.cpp:163
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
combine
vector combine
Definition: VectorCombine.cpp:1843
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs before legalization", false, false) INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
MachineDominators.h
llvm::LLT
Definition: LowLevelTypeImpl.h:39