LLVM  16.0.0git
AMDGPUPostLegalizerCombiner.cpp
Go to the documentation of this file.
1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
26 #include "llvm/IR/IntrinsicsAMDGPU.h"
28 
29 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
30 
31 using namespace llvm;
32 using namespace MIPatternMatch;
33 
35 protected:
40 
41 public:
43  AMDGPUCombinerHelper &Helper)
44  : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
45 
52  };
53 
54  // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
55  bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
56  void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
57  const FMinFMaxLegacyInfo &Info);
58 
59  bool matchUCharToFloat(MachineInstr &MI);
60  void applyUCharToFloat(MachineInstr &MI);
61 
62  bool matchRcpSqrtToRsq(MachineInstr &MI,
63  std::function<void(MachineIRBuilder &)> &MatchInfo);
64 
65  // FIXME: Should be able to have 2 separate matchdatas rather than custom
66  // struct boilerplate.
69  unsigned ShiftOffset;
70  };
71 
72  bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
73  void applyCvtF32UByteN(MachineInstr &MI,
74  const CvtF32UByteMatchInfo &MatchInfo);
75 
76  bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
77 };
78 
81  // FIXME: Type predicate on pattern
82  if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
83  return false;
84 
85  Register Cond = MI.getOperand(1).getReg();
86  if (!MRI.hasOneNonDBGUse(Cond) ||
87  !mi_match(Cond, MRI,
88  m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
89  return false;
90 
91  Info.True = MI.getOperand(2).getReg();
92  Info.False = MI.getOperand(3).getReg();
93 
94  if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
95  !(Info.LHS == Info.False && Info.RHS == Info.True))
96  return false;
97 
98  switch (Info.Pred) {
100  case CmpInst::FCMP_OEQ:
101  case CmpInst::FCMP_ONE:
102  case CmpInst::FCMP_ORD:
103  case CmpInst::FCMP_UNO:
104  case CmpInst::FCMP_UEQ:
105  case CmpInst::FCMP_UNE:
106  case CmpInst::FCMP_TRUE:
107  return false;
108  default:
109  return true;
110  }
111 }
112 
115  B.setInstrAndDebugLoc(MI);
116  auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
117  B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
118  };
119 
120  switch (Info.Pred) {
121  case CmpInst::FCMP_ULT:
122  case CmpInst::FCMP_ULE:
123  if (Info.LHS == Info.True)
124  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
125  else
126  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
127  break;
128  case CmpInst::FCMP_OLE:
129  case CmpInst::FCMP_OLT: {
130  // We need to permute the operands to get the correct NaN behavior. The
131  // selected operand is the second one based on the failing compare with NaN,
132  // so permute it based on the compare type the hardware uses.
133  if (Info.LHS == Info.True)
134  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
135  else
136  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
137  break;
138  }
139  case CmpInst::FCMP_UGE:
140  case CmpInst::FCMP_UGT: {
141  if (Info.LHS == Info.True)
142  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
143  else
144  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
145  break;
146  }
147  case CmpInst::FCMP_OGT:
148  case CmpInst::FCMP_OGE: {
149  if (Info.LHS == Info.True)
150  buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
151  else
152  buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
153  break;
154  }
155  default:
156  llvm_unreachable("predicate should not have matched");
157  }
158 
159  MI.eraseFromParent();
160 }
161 
163  Register DstReg = MI.getOperand(0).getReg();
164 
165  // TODO: We could try to match extracting the higher bytes, which would be
166  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
167  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
168  // about in practice.
169  LLT Ty = MRI.getType(DstReg);
170  if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
171  Register SrcReg = MI.getOperand(1).getReg();
172  unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
173  assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
174  const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
175  return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
176  }
177 
178  return false;
179 }
180 
182  B.setInstrAndDebugLoc(MI);
183 
184  const LLT S32 = LLT::scalar(32);
185 
186  Register DstReg = MI.getOperand(0).getReg();
187  Register SrcReg = MI.getOperand(1).getReg();
188  LLT Ty = MRI.getType(DstReg);
189  LLT SrcTy = MRI.getType(SrcReg);
190  if (SrcTy != S32)
191  SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
192 
193  if (Ty == S32) {
194  B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
195  {SrcReg}, MI.getFlags());
196  } else {
197  auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
198  {SrcReg}, MI.getFlags());
199  B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
200  }
201 
202  MI.eraseFromParent();
203 }
204 
206  MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
207 
208  auto getRcpSrc = [=](const MachineInstr &MI) {
209  MachineInstr *ResMI = nullptr;
210  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
211  MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
212  ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
213 
214  return ResMI;
215  };
216 
217  auto getSqrtSrc = [=](const MachineInstr &MI) {
218  MachineInstr *SqrtSrcMI = nullptr;
219  auto Match =
220  mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
221  (void)Match;
222  return SqrtSrcMI;
223  };
224 
225  MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
226  // rcp(sqrt(x))
227  if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
228  MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
229  B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
230  .addUse(SqrtSrcMI->getOperand(0).getReg())
231  .setMIFlags(MI.getFlags());
232  };
233  return true;
234  }
235 
236  // sqrt(rcp(x))
237  if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
238  MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
239  B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
240  .addUse(RcpSrcMI->getOperand(0).getReg())
241  .setMIFlags(MI.getFlags());
242  };
243  return true;
244  }
245 
246  return false;
247 }
248 
250  MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
251  Register SrcReg = MI.getOperand(1).getReg();
252 
253  // Look through G_ZEXT.
254  bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
255 
256  Register Src0;
257  int64_t ShiftAmt;
258  IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
259  if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
260  const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
261 
262  unsigned ShiftOffset = 8 * Offset;
263  if (IsShr)
264  ShiftOffset += ShiftAmt;
265  else
266  ShiftOffset -= ShiftAmt;
267 
268  MatchInfo.CvtVal = Src0;
269  MatchInfo.ShiftOffset = ShiftOffset;
270  return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
271  }
272 
273  // TODO: Simplify demanded bits.
274  return false;
275 }
276 
278  MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
279  B.setInstrAndDebugLoc(MI);
280  unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
281 
282  const LLT S32 = LLT::scalar(32);
283  Register CvtSrc = MatchInfo.CvtVal;
284  LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
285  if (SrcTy != S32) {
286  assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
287  CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
288  }
289 
290  assert(MI.getOpcode() != NewOpc);
291  B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
292  MI.eraseFromParent();
293 }
294 
297  const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
299  Reg = MI.getOperand(1).getReg();
300  return TLI->isCanonicalized(Reg, MF);
301 }
302 
304 protected:
307 
308  // Note: pointer is necessary because Target Predicates use
309  // "Subtarget->"
311 
312 public:
314  AMDGPUCombinerHelper &Helper,
315  AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper,
316  const GCNSubtarget &Subtarget)
317  : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper),
318  Subtarget(&Subtarget) {}
319 };
320 
321 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
322 #include "AMDGPUGenPostLegalizeGICombiner.inc"
323 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
324 
325 namespace {
326 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
327 #include "AMDGPUGenPostLegalizeGICombiner.inc"
328 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
329 
330 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
331  GISelKnownBits *KB;
333  const GCNSubtarget &Subtarget;
334 
335 public:
336  AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
337 
338  AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt,
339  bool OptSize, bool MinSize,
340  const AMDGPULegalizerInfo *LI,
342  : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
343  /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
344  KB(KB), MDT(MDT), Subtarget(Subtarget) {
345  if (!GeneratedRuleCfg.parseCommandLineOption())
346  report_fatal_error("Invalid rule identifier");
347  }
348 
349  bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
350  MachineIRBuilder &B) const override;
351 };
352 
354  MachineInstr &MI,
355  MachineIRBuilder &B) const {
356  AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
357  LInfo);
358  AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
359  AMDGPUGenPostLegalizerCombinerHelper Generated(
360  GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
361 
362  if (Generated.tryCombineAll(Observer, MI, B))
363  return true;
364 
365  switch (MI.getOpcode()) {
366  case TargetOpcode::G_SHL:
367  case TargetOpcode::G_LSHR:
368  case TargetOpcode::G_ASHR:
369  // On some subtargets, 64-bit shift is a quarter rate instruction. In the
370  // common case, splitting this into a move and a 32-bit shift is faster and
371  // the same code size.
372  return Helper.tryCombineShiftToUnmerge(MI, 32);
373  }
374 
375  return false;
376 }
377 
378 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
379 #include "AMDGPUGenPostLegalizeGICombiner.inc"
380 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
381 
382 // Pass boilerplate
383 // ================
384 
385 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
386 public:
387  static char ID;
388 
389  AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
390 
391  StringRef getPassName() const override {
392  return "AMDGPUPostLegalizerCombiner";
393  }
394 
395  bool runOnMachineFunction(MachineFunction &MF) override;
396 
397  void getAnalysisUsage(AnalysisUsage &AU) const override;
398 private:
399  bool IsOptNone;
400 };
401 } // end anonymous namespace
402 
403 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
405  AU.setPreservesCFG();
409  if (!IsOptNone) {
412  }
414 }
415 
416 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
417  : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
418  initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
419 }
420 
421 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
422  if (MF.getProperties().hasProperty(
423  MachineFunctionProperties::Property::FailedISel))
424  return false;
425  auto *TPC = &getAnalysis<TargetPassConfig>();
426  const Function &F = MF.getFunction();
427  bool EnableOpt =
428  MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
429 
430  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
431  const AMDGPULegalizerInfo *LI
432  = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
433 
434  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
435  MachineDominatorTree *MDT =
436  IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
437  AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(),
438  F.hasMinSize(), LI, KB, MDT);
439  Combiner C(PCInfo, TPC);
440  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
441 }
442 
444 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
445  "Combine AMDGPU machine instrs after legalization",
446  false, false)
449 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
450  "Combine AMDGPU machine instrs after legalization", false,
451  false)
452 
453 namespace llvm {
455  return new AMDGPUPostLegalizerCombiner(IsOptNone);
456 }
457 } // end namespace llvm
AMDGPUCombinerHelper
Definition: AMDGPUCombinerHelper.h:20
MIPatternMatch.h
llvm::CmpInst::FCMP_ULE
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:735
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
CombinerInfo.h
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:192
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::GISelKnownBits::maskedValueIsZero
bool maskedValueIsZero(Register Val, const APInt &Mask)
Definition: GISelKnownBits.h:78
llvm::initializeAMDGPUPostLegalizerCombinerPass
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:268
llvm::GISelKnownBits
Definition: GISelKnownBits.h:29
llvm::MIPatternMatch::m_GShl
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:522
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN
bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo)
Definition: AMDGPUPostLegalizerCombiner.cpp:249
llvm::Function
Definition: Function.h:60
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo
Definition: AMDGPUPostLegalizerCombiner.cpp:46
llvm::MIPatternMatch::m_GLShr
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:528
AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq
bool matchRcpSqrtToRsq(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Definition: AMDGPUPostLegalizerCombiner.cpp:205
llvm::CombinerHelper::getKnownBits
GISelKnownBits * getKnownBits() const
Definition: CombinerHelper.h:128
AMDGPUPostLegalizerCombinerHelperState
Definition: AMDGPUPostLegalizerCombiner.cpp:303
llvm::logicalview::LVAttributeKind::Generated
@ Generated
llvm::CmpInst::FCMP_ONE
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:728
GISelKnownBits.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
AMDGPUPostLegalizerCombinerHelperState::AMDGPUPostLegalizerCombinerHelperState
AMDGPUPostLegalizerCombinerHelperState(AMDGPUCombinerHelper &Helper, AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper, const GCNSubtarget &Subtarget)
Definition: AMDGPUPostLegalizerCombiner.cpp:313
llvm::createAMDGPUPostLegalizeCombiner
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
Definition: AMDGPUPostLegalizerCombiner.cpp:454
llvm::CombinerHelper::tryCombineShiftToUnmerge
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
Definition: CombinerHelper.cpp:2019
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:895
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::DiagnosticPredicateTy::Match
@ Match
llvm::CombinerInfo
Definition: CombinerInfo.h:26
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:31
AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize
bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg)
Definition: AMDGPUPostLegalizerCombiner.cpp:295
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::CmpInst::FCMP_ULT
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:734
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs after legalization", false, false) INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner
AMDGPUPostLegalizerCombinerHelper::AMDGPUPostLegalizerCombinerHelper
AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, AMDGPUCombinerHelper &Helper)
Definition: AMDGPUPostLegalizerCombiner.cpp:42
llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113
TargetMachine.h
llvm::MIPatternMatch::m_GZExt
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
Definition: MIPatternMatch.h:579
GCNSubtarget.h
AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat
void applyUCharToFloat(MachineInstr &MI)
Definition: AMDGPUPostLegalizerCombiner.cpp:181
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::LLT::getSizeInBits
TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelTypeImpl.h:152
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::CmpInst::FCMP_UGE
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:733
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:748
false
Definition: StackSlotColoring.cpp:141
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:730
AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN
void applyCvtF32UByteN(MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo)
Definition: AMDGPUPostLegalizerCombiner.cpp:277
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
AMDGPUPostLegalizerCombinerHelperState::PostLegalizerHelper
AMDGPUPostLegalizerCombinerHelper & PostLegalizerHelper
Definition: AMDGPUPostLegalizerCombiner.cpp:306
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUPostLegalizerCombiner.cpp:29
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:723
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:726
llvm::MIPatternMatch::m_MInstr
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
Definition: MIPatternMatch.h:368
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::MIPatternMatch::m_Pred
bind_ty< CmpInst::Predicate > m_Pred(CmpInst::Predicate &P)
Definition: MIPatternMatch.h:370
AMDGPUPostLegalizerCombinerHelper::Helper
AMDGPUCombinerHelper & Helper
Definition: AMDGPUPostLegalizerCombiner.cpp:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat
bool matchUCharToFloat(MachineInstr &MI)
Definition: AMDGPUPostLegalizerCombiner.cpp:162
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::CmpInst::FCMP_FALSE
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:722
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::False
Register False
Definition: AMDGPUPostLegalizerCombiner.cpp:50
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2522
AMDGPUMCTargetDesc.h
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:221
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::Combiner
Definition: Combiner.h:26
AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy
bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info)
Definition: AMDGPUPostLegalizerCombiner.cpp:79
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
AMDGPUPostLegalizerCombinerHelperState::Subtarget
const GCNSubtarget * Subtarget
Definition: AMDGPUPostLegalizerCombiner.cpp:310
TargetPassConfig.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:725
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::LLT::isScalar
bool isScalar() const
Definition: LowLevelTypeImpl.h:118
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:257
CombinerHelper.h
AMDGPUPostLegalizerCombinerHelper::MF
MachineFunction & MF
Definition: AMDGPUPostLegalizerCombiner.cpp:37
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::Pred
CmpInst::Predicate Pred
Definition: AMDGPUPostLegalizerCombiner.cpp:51
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Combiner.h
AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, const FMinFMaxLegacyInfo &Info)
Definition: AMDGPUPostLegalizerCombiner.cpp:113
llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::CmpInst::FCMP_UGT
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:732
AMDGPUPostLegalizerCombinerHelper::B
MachineIRBuilder & B
Definition: AMDGPUPostLegalizerCombiner.cpp:36
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::True
Register True
Definition: AMDGPUPostLegalizerCombiner.cpp:49
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:92
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MIPatternMatch::m_GFCmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:678
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo::ShiftOffset
unsigned ShiftOffset
Definition: AMDGPUPostLegalizerCombiner.cpp:69
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::MIPatternMatch::m_GFSqrt
UnaryOp_match< SrcTy, TargetOpcode::G_FSQRT > m_GFSqrt(const SrcTy &Src)
Definition: MIPatternMatch.h:633
llvm::SITargetLowering
Definition: SIISelLowering.h:31
legalization
Combine AMDGPU machine instrs after legalization
Definition: AMDGPUPostLegalizerCombiner.cpp:450
AMDGPUPostLegalizerCombinerHelper
Definition: AMDGPUPostLegalizerCombiner.cpp:34
AMDGPUPostLegalizerCombinerHelperState::Helper
AMDGPUCombinerHelper & Helper
Definition: AMDGPUPostLegalizerCombiner.cpp:305
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::RHS
Register RHS
Definition: AMDGPUPostLegalizerCombiner.cpp:48
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo::CvtVal
Register CvtVal
Definition: AMDGPUPostLegalizerCombiner.cpp:68
AMDGPUPostLegalizerCombinerHelper::MRI
MachineRegisterInfo & MRI
Definition: AMDGPUPostLegalizerCombiner.cpp:38
AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo::LHS
Register LHS
Definition: AMDGPUPostLegalizerCombiner.cpp:47
AMDGPULegalizerInfo.h
llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:99
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:745
AMDGPUCombinerHelper.h
llvm::CmpInst::FCMP_UNE
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition: InstrTypes.h:736
AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo
Definition: AMDGPUPostLegalizerCombiner.cpp:67
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:727
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
combine
vector combine
Definition: VectorCombine.cpp:1843
llvm::CmpInst::FCMP_TRUE
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:737
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
llvm::CmpInst::FCMP_ORD
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition: InstrTypes.h:729
llvm::SITargetLowering::isCanonicalized
bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth=5) const
Definition: SIISelLowering.cpp:10073
llvm::CmpInst::FCMP_UEQ
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition: InstrTypes.h:731
MachineDominators.h
llvm::LLT
Definition: LowLevelTypeImpl.h:39