LLVM  13.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "GCNSubtarget.h"
26 
27 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
28 
29 using namespace llvm;
30 using namespace MIPatternMatch;
31 
33 protected:
38 
39 public:
41  : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
42 
49  };
50 
51  // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
52  bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
53  void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
54  const FMinFMaxLegacyInfo &Info);
55 
56  bool matchUCharToFloat(MachineInstr &MI);
57  void applyUCharToFloat(MachineInstr &MI);
58 
59  // FIXME: Should be able to have 2 separate matchdatas rather than custom
60  // struct boilerplate.
63  unsigned ShiftOffset;
64  };
65 
66  bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
67  void applyCvtF32UByteN(MachineInstr &MI,
68  const CvtF32UByteMatchInfo &MatchInfo);
69 };
70 
73  // FIXME: Combines should have subtarget predicates, and we shouldn't need
74  // this here.
75  if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
76  return false;
77 
78  // FIXME: Type predicate on pattern
79  if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
80  return false;
81 
82  Register Cond = MI.getOperand(1).getReg();
83  if (!MRI.hasOneNonDBGUse(Cond) ||
84  !mi_match(Cond, MRI,
85  m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
86  return false;
87 
88  Info.True = MI.getOperand(2).getReg();
89  Info.False = MI.getOperand(3).getReg();
90 
91  if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
92  !(Info.LHS == Info.False && Info.RHS == Info.True))
93  return false;
94 
95  switch (Info.Pred) {
97  case CmpInst::FCMP_OEQ:
98  case CmpInst::FCMP_ONE:
99  case CmpInst::FCMP_ORD:
100  case CmpInst::FCMP_UNO:
101  case CmpInst::FCMP_UEQ:
102  case CmpInst::FCMP_UNE:
103  case CmpInst::FCMP_TRUE:
104  return false;
105  default:
106  return true;
107  }
108 }
109 
112  B.setInstrAndDebugLoc(MI);
113  auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
114  B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
115  };
116 
117  switch (Info.Pred) {
118  case CmpInst::FCMP_ULT:
119  case CmpInst::FCMP_ULE:
120  if (Info.LHS == Info.True)
121  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
122  else
123  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
124  break;
125  case CmpInst::FCMP_OLE:
126  case CmpInst::FCMP_OLT: {
127  // We need to permute the operands to get the correct NaN behavior. The
128  // selected operand is the second one based on the failing compare with NaN,
129  // so permute it based on the compare type the hardware uses.
130  if (Info.LHS == Info.True)
131  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
132  else
133  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
134  break;
135  }
136  case CmpInst::FCMP_UGE:
137  case CmpInst::FCMP_UGT: {
138  if (Info.LHS == Info.True)
139  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
140  else
141  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
142  break;
143  }
144  case CmpInst::FCMP_OGT:
145  case CmpInst::FCMP_OGE: {
146  if (Info.LHS == Info.True)
147  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
148  else
149  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
150  break;
151  }
152  default:
153  llvm_unreachable("predicate should not have matched");
154  }
155 
156  MI.eraseFromParent();
157 }
158 
160  Register DstReg = MI.getOperand(0).getReg();
161 
162  // TODO: We could try to match extracting the higher bytes, which would be
163  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
164  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
165  // about in practice.
166  LLT Ty = MRI.getType(DstReg);
167  if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
168  Register SrcReg = MI.getOperand(1).getReg();
169  unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
170  assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
171  const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
172  return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
173  }
174 
175  return false;
176 }
177 
179  B.setInstrAndDebugLoc(MI);
180 
181  const LLT S32 = LLT::scalar(32);
182 
183  Register DstReg = MI.getOperand(0).getReg();
184  Register SrcReg = MI.getOperand(1).getReg();
185  LLT Ty = MRI.getType(DstReg);
186  LLT SrcTy = MRI.getType(SrcReg);
187  if (SrcTy != S32)
188  SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
189 
190  if (Ty == S32) {
191  B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
192  {SrcReg}, MI.getFlags());
193  } else {
194  auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
195  {SrcReg}, MI.getFlags());
196  B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
197  }
198 
199  MI.eraseFromParent();
200 }
201 
203  MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
204  Register SrcReg = MI.getOperand(1).getReg();
205 
206  // Look through G_ZEXT.
207  mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
208 
209  Register Src0;
210  int64_t ShiftAmt;
211  bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
212  if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
213  const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
214 
215  unsigned ShiftOffset = 8 * Offset;
216  if (IsShr)
217  ShiftOffset += ShiftAmt;
218  else
219  ShiftOffset -= ShiftAmt;
220 
221  MatchInfo.CvtVal = Src0;
222  MatchInfo.ShiftOffset = ShiftOffset;
223  return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
224  }
225 
226  // TODO: Simplify demanded bits.
227  return false;
228 }
229 
231  MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
232  B.setInstrAndDebugLoc(MI);
233  unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
234 
235  const LLT S32 = LLT::scalar(32);
236  Register CvtSrc = MatchInfo.CvtVal;
237  LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
238  if (SrcTy != S32) {
239  assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
240  CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
241  }
242 
243  assert(MI.getOpcode() != NewOpc);
244  B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
245  MI.eraseFromParent();
246 }
247 
249 protected:
252 
253 public:
255  CombinerHelper &Helper,
256  AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
257  : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
258 };
259 
260 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
261 #include "AMDGPUGenPostLegalizeGICombiner.inc"
262 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
263 
264 namespace {
265 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
266 #include "AMDGPUGenPostLegalizeGICombiner.inc"
267 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
268 
269 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
270  GISelKnownBits *KB;
272 
273 public:
274  AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
275 
276  AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
277  const AMDGPULegalizerInfo *LI,
279  : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
280  /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
281  KB(KB), MDT(MDT) {
282  if (!GeneratedRuleCfg.parseCommandLineOption())
283  report_fatal_error("Invalid rule identifier");
284  }
285 
286  bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
287  MachineIRBuilder &B) const override;
288 };
289 
291  MachineInstr &MI,
292  MachineIRBuilder &B) const {
293  CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
294  AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
295  AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
296  PostLegalizerHelper);
297 
298  if (Generated.tryCombineAll(Observer, MI, B))
299  return true;
300 
301  switch (MI.getOpcode()) {
302  case TargetOpcode::G_SHL:
303  case TargetOpcode::G_LSHR:
304  case TargetOpcode::G_ASHR:
305  // On some subtargets, 64-bit shift is a quarter rate instruction. In the
306  // common case, splitting this into a move and a 32-bit shift is faster and
307  // the same code size.
308  return Helper.tryCombineShiftToUnmerge(MI, 32);
309  }
310 
311  return false;
312 }
313 
314 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
315 #include "AMDGPUGenPostLegalizeGICombiner.inc"
316 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
317 
318 // Pass boilerplate
319 // ================
320 
321 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
322 public:
323  static char ID;
324 
325  AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
326 
327  StringRef getPassName() const override {
328  return "AMDGPUPostLegalizerCombiner";
329  }
330 
331  bool runOnMachineFunction(MachineFunction &MF) override;
332 
333  void getAnalysisUsage(AnalysisUsage &AU) const override;
334 private:
335  bool IsOptNone;
336 };
337 } // end anonymous namespace
338 
339 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
341  AU.setPreservesCFG();
345  if (!IsOptNone) {
348  }
350 }
351 
352 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
353  : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
354  initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
355 }
356 
357 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
358  if (MF.getProperties().hasProperty(
359  MachineFunctionProperties::Property::FailedISel))
360  return false;
361  auto *TPC = &getAnalysis<TargetPassConfig>();
362  const Function &F = MF.getFunction();
363  bool EnableOpt =
364  MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
365 
366  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
367  const AMDGPULegalizerInfo *LI
368  = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
369 
370  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
371  MachineDominatorTree *MDT =
372  IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
373  AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
374  F.hasMinSize(), LI, KB, MDT);
375  Combiner C(PCInfo, TPC);
376  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
377 }
378 
380 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
381  "Combine AMDGPU machine instrs after legalization",
382  false, false)
385 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
386  "Combine AMDGPU machine instrs after legalization", false,
387  false)
388 
389 namespace llvm {
391  return new AMDGPUPostLegalizerCombiner(IsOptNone);
392 }
393 } // end namespace llvm
MIPatternMatch.h
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:737
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:198
CombinerInfo.h
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:162
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
llvm
Definition: AllocatorList.h:23
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:106
llvm::GISelKnownBits
Definition: GISelKnownBits.h:29
llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:294
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN
bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo)
Definition: AMDGPUPostLegalizerCombiner.cpp:202
llvm::Function
Definition: Function.h:61
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo
Definition: AMDGPUPostLegalizerCombiner.cpp:43
llvm::MIPatternMatch::m_GLShr
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:300
AMDGPUPostLegalizerCombinerHelperState::AMDGPUPostLegalizerCombinerHelperState
AMDGPUPostLegalizerCombinerHelperState(CombinerHelper &Helper, AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
Definition: AMDGPUPostLegalizerCombiner.cpp:254
AMDGPUPostLegalizerCombinerHelperState
Definition: AMDGPUPostLegalizerCombiner.cpp:248
AMDGPUPostLegalizerCombinerHelperState::Helper
CombinerHelper & Helper
Definition: AMDGPUPostLegalizerCombiner.cpp:250
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:730
GISelKnownBits.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:390
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:741
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::CombinerInfo
Definition: CombinerInfo.h:27
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:726
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:32
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:736
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs after legalization", false, false) INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner
llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113
TargetMachine.h
llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition: MIPatternMatch.h:351
GCNSubtarget.h
AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat
void applyUCharToFloat(MachineInstr &MI)
Definition: AMDGPUPostLegalizerCombiner.cpp:178
llvm::LLT::getSizeInBits
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelTypeImpl.h:109
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:735
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:646
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:732
AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN
void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo)
Definition: AMDGPUPostLegalizerCombiner.cpp:230
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:656
AMDGPUPostLegalizerCombinerHelperState::PostLegalizerHelper
AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper
Definition: AMDGPUPostLegalizerCombiner.cpp:251
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPostLegalizerCombiner.cpp:27
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:725
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::CombinerHelper
Definition: CombinerHelper.h:89
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:728
llvm::MIPatternMatch::m_Pred
bind_ty< CmpInst::Predicate > m_Pred(CmpInst::Predicate &P)
Definition: MIPatternMatch.h:203
llvm::None
const NoneType None
Definition: None.h:23
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat
bool matchUCharToFloat(MachineInstr &MI)
Definition: AMDGPUPostLegalizerCombiner.cpp:159
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:724
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::False
Register False
Definition: AMDGPUPostLegalizerCombiner.cpp:47
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1483
AMDGPUMCTargetDesc.h
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:220
AMDGPUPostLegalizerCombinerHelper::AMDGPUPostLegalizerCombinerHelper
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
Definition: AMDGPUPostLegalizerCombiner.cpp:40
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::Combiner
Definition: Combiner.h:27
AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy
bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info)
Definition: AMDGPUPostLegalizerCombiner.cpp:71
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:727
AMDGPUPostLegalizerCombinerHelper::Helper
CombinerHelper & Helper
Definition: AMDGPUPostLegalizerCombiner.cpp:37
llvm::LLT::isScalar
bool isScalar() const
Definition: LowLevelTypeImpl.h:92
llvm::AMDGPUSubtarget::hasFminFmaxLegacy
bool hasFminFmaxLegacy() const
Definition: AMDGPUSubtarget.h:168
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
llvm::MachineFunction
Definition: MachineFunction.h:227
CombinerHelper.h
AMDGPUPostLegalizerCombinerHelper::MF
MachineFunction & MF
Definition: AMDGPUPostLegalizerCombiner.cpp:35
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::Pred
CmpInst::Predicate Pred
Definition: AMDGPUPostLegalizerCombiner.cpp:48
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:419
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Combiner.h
AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, const FMinFMaxLegacyInfo &Info)
Definition: AMDGPUPostLegalizerCombiner.cpp:110
llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:734
AMDGPUPostLegalizerCombinerHelper::B
MachineIRBuilder & B
Definition: AMDGPUPostLegalizerCombiner.cpp:34
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::True
Register True
Definition: AMDGPUPostLegalizerCombiner.cpp:46
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::MIPatternMatch::m_GFCmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:439
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo::ShiftOffset
unsigned ShiftOffset
Definition: AMDGPUPostLegalizerCombiner.cpp:63
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
legalization
Combine AMDGPU machine instrs after legalization
Definition: AMDGPUPostLegalizerCombiner.cpp:386
AMDGPUPostLegalizerCombinerHelper
Definition: AMDGPUPostLegalizerCombiner.cpp:32
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::RHS
Register RHS
Definition: AMDGPUPostLegalizerCombiner.cpp:45
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo::CvtVal
Register CvtVal
Definition: AMDGPUPostLegalizerCombiner.cpp:62
llvm::MIPatternMatch::m_ICst
ConstantMatch m_ICst(int64_t &Cst)
Definition: MIPatternMatch.h:69
AMDGPUPostLegalizerCombinerHelper::MRI
MachineRegisterInfo & MRI
Definition: AMDGPUPostLegalizerCombiner.cpp:36
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::LHS
Register LHS
Definition: AMDGPUPostLegalizerCombiner.cpp:44
AMDGPULegalizerInfo.h
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:732
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:738
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo
Definition: AMDGPUPostLegalizerCombiner.cpp:61
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:729
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:24
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
combine
vector combine
Definition: VectorCombine.cpp:833
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:739
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:43
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:731
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:733
machine
coro Split coroutine into a set of functions driving its state machine
Definition: CoroSplit.cpp:2246
MachineDominators.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::LLT
Definition: LowLevelTypeImpl.h:40