LLVM  16.0.0git
AMDGPURegBankCombiner.cpp
Go to the documentation of this file.
1 //=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after register banks are known.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "AMDGPURegisterBankInfo.h"
17 #include "GCNSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
27 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #define DEBUG_TYPE "amdgpu-regbank-combiner"
30 
31 using namespace llvm;
32 using namespace MIPatternMatch;
33 
35 protected:
42  const SIInstrInfo &TII;
44 
45 public:
47  : B(B), MF(B.getMF()), MRI(*B.getMRI()),
48  Subtarget(MF.getSubtarget<GCNSubtarget>()),
49  RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
50  TII(*Subtarget.getInstrInfo()), Helper(Helper){};
51 
52  bool isVgprRegBank(Register Reg);
53  Register getAsVgpr(Register Reg);
54 
55  struct MinMaxMedOpc {
56  unsigned Min, Max, Med;
57  };
58 
59  struct Med3MatchInfo {
60  unsigned Opc;
61  Register Val0, Val1, Val2;
62  };
63 
64  MinMaxMedOpc getMinMaxPair(unsigned Opc);
65 
66  template <class m_Cst, typename CstTy>
67  bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
68  Register &Val, CstTy &K0, CstTy &K1);
69 
70  bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
71  bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
72  bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
73  bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
74  void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
75  void applyClamp(MachineInstr &MI, Register &Reg);
76 
77 private:
78  AMDGPU::SIModeRegisterDefaults getMode();
79  bool getIEEE();
80  bool getDX10Clamp();
81  bool isFminnumIeee(const MachineInstr &MI);
82  bool isFCst(MachineInstr *MI);
84 };
85 
87  return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
88 }
89 
91  if (isVgprRegBank(Reg))
92  return Reg;
93 
94  // Search for existing copy of Reg to vgpr.
96  Register Def = Use.getOperand(0).getReg();
97  if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
98  return Def;
99  }
100 
101  // Copy Reg to vgpr.
102  Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
103  MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
104  return VgprReg;
105 }
106 
109  switch (Opc) {
110  default:
111  llvm_unreachable("Unsupported opcode");
112  case AMDGPU::G_SMAX:
113  case AMDGPU::G_SMIN:
114  return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
115  case AMDGPU::G_UMAX:
116  case AMDGPU::G_UMIN:
117  return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
118  case AMDGPU::G_FMAXNUM:
119  case AMDGPU::G_FMINNUM:
120  return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
121  case AMDGPU::G_FMAXNUM_IEEE:
122  case AMDGPU::G_FMINNUM_IEEE:
123  return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
124  AMDGPU::G_AMDGPU_FMED3};
125  }
126 }
127 
128 template <class m_Cst, typename CstTy>
131  MinMaxMedOpc MMMOpc, Register &Val,
132  CstTy &K0, CstTy &K1) {
133  // 4 operand commutes of: min(max(Val, K0), K1).
134  // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
135  // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
136  // 4 operand commutes of: max(min(Val, K1), K0).
137  // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
138  // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
139  return mi_match(
140  MI, MRI,
141  m_any_of(
143  MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
144  m_Cst(K1)),
146  MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
147  m_Cst(K0))));
148 }
149 
151  MachineInstr &MI, Med3MatchInfo &MatchInfo) {
152  Register Dst = MI.getOperand(0).getReg();
153  if (!isVgprRegBank(Dst))
154  return false;
155 
156  // med3 for i16 is only available on gfx9+, and not available for v2i16.
157  LLT Ty = MRI.getType(Dst);
158  if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
159  Ty != LLT::scalar(32))
160  return false;
161 
162  MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
163  Register Val;
164  Optional<ValueAndVReg> K0, K1;
165  // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
166  if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
167  return false;
168 
169  if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
170  return false;
171  if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
172  return false;
173 
174  MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
175  return true;
176 }
177 
178 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
179 // ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
180 // ieee = false : min/max(NaN, K) = K
181 // clamp(NaN) = dx10_clamp ? 0.0 : NaN
182 // Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
183 // Other operand commutes (see matchMed) give same result since min and max are
184 // commutative.
185 
186 // Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
187 // with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
188 // Val = SNaN only for ieee = true
189 // fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
190 // min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
191 // max(min(SNaN, K1), K0) = max(K1, K0) = K1
192 // Val = NaN,ieee = false or Val = QNaN,ieee = true
193 // fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
194 // min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
195 // max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
197  MachineInstr &MI, Med3MatchInfo &MatchInfo) {
198  Register Dst = MI.getOperand(0).getReg();
199  LLT Ty = MRI.getType(Dst);
200 
201  // med3 for f16 is only available on gfx9+, and not available for v2f16.
202  if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
203  Ty != LLT::scalar(32))
204  return false;
205 
206  auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
207 
208  Register Val;
210  // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
211  if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
212  return false;
213 
214  if (K0->Value > K1->Value)
215  return false;
216 
217  // For IEEE=false perform combine only when it's safe to assume that there are
218  // no NaN inputs. Most often MI is marked with nnan fast math flag.
219  // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
220  // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
221  // nodes(max/min) have same behavior when one input is NaN and other isn't.
222  // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
223  // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
224  if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
225  // Don't fold single use constant that can't be inlined.
226  if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
227  (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
228  MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
229  return true;
230  }
231  }
232 
233  return false;
234 }
235 
237  Register &Reg) {
238  // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
239  auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
240  Register Val;
242  // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
243  if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
244  return false;
245 
246  if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
247  return false;
248 
249  // For IEEE=false perform combine only when it's safe to assume that there are
250  // no NaN inputs. Most often MI is marked with nnan fast math flag.
251  // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
252  // to 0.0 requires dx10_clamp = true.
253  if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
254  isKnownNeverSNaN(Val, MRI)) ||
255  isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
256  Reg = Val;
257  return true;
258  }
259 
260  return false;
261 }
262 
263 // Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
264 // Val = SNaN only for ieee = true. It is important which operand is NaN.
265 // min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
266 // min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
267 // min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
268 // Val = NaN,ieee = false or Val = QNaN,ieee = true
269 // min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
270 // min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
271 // min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
273  Register &Reg) {
274  if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
275  return false;
276 
277  // In llvm-ir, clamp is often represented as an intrinsic call to
278  // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
279  MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
280  MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
281  MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
282 
283  if (isFCst(Src0) && !isFCst(Src1))
284  std::swap(Src0, Src1);
285  if (isFCst(Src1) && !isFCst(Src2))
286  std::swap(Src1, Src2);
287  if (isFCst(Src0) && !isFCst(Src1))
288  std::swap(Src0, Src1);
289  if (!isClampZeroToOne(Src1, Src2))
290  return false;
291 
292  Register Val = Src0->getOperand(0).getReg();
293 
294  auto isOp3Zero = [&]() {
295  MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
296  if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
297  return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
298  return false;
299  };
300  // For IEEE=false perform combine only when it's safe to assume that there are
301  // no NaN inputs. Most often MI is marked with nnan fast math flag.
302  // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
303  // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
304  if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
305  (getIEEE() && getDX10Clamp() &&
306  (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
307  Reg = Val;
308  return true;
309  }
310 
311  return false;
312 }
313 
315  B.setInstrAndDebugLoc(MI);
316  B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
317  MI.getFlags());
318  MI.eraseFromParent();
319 }
320 
322  Med3MatchInfo &MatchInfo) {
323  B.setInstrAndDebugLoc(MI);
324  B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
325  {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
326  getAsVgpr(MatchInfo.Val2)},
327  MI.getFlags());
328  MI.eraseFromParent();
329 }
330 
331 AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
332  return MF.getInfo<SIMachineFunctionInfo>()->getMode();
333 }
334 
335 bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
336 
337 bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
338 
339 bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
340  return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
341 }
342 
343 bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
344  return MI->getOpcode() == AMDGPU::G_FCONSTANT;
345 }
346 
347 bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
348  MachineInstr *K1) {
349  if (isFCst(K0) && isFCst(K1)) {
350  const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
351  const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
352  return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
353  (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
354  }
355  return false;
356 }
357 
359 protected:
362 
363 public:
365  AMDGPURegBankCombinerHelper &RegBankHelper)
366  : Helper(Helper), RegBankHelper(RegBankHelper) {}
367 };
368 
369 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
370 #include "AMDGPUGenRegBankGICombiner.inc"
371 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
372 
373 namespace {
374 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
375 #include "AMDGPUGenRegBankGICombiner.inc"
376 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
377 
378 class AMDGPURegBankCombinerInfo final : public CombinerInfo {
379  GISelKnownBits *KB;
381 
382 public:
383  AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
384 
385  AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
386  const AMDGPULegalizerInfo *LI,
388  : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
389  /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
390  KB(KB), MDT(MDT) {
391  if (!GeneratedRuleCfg.parseCommandLineOption())
392  report_fatal_error("Invalid rule identifier");
393  }
394 
395  bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
396  MachineIRBuilder &B) const override;
397 };
398 
400  MachineInstr &MI,
401  MachineIRBuilder &B) const {
402  CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
403  AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
404  AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
405  RegBankHelper);
406 
407  if (Generated.tryCombineAll(Observer, MI, B))
408  return true;
409 
410  return false;
411 }
412 
413 #define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
414 #include "AMDGPUGenRegBankGICombiner.inc"
415 #undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
416 
417 // Pass boilerplate
418 // ================
419 
420 class AMDGPURegBankCombiner : public MachineFunctionPass {
421 public:
422  static char ID;
423 
424  AMDGPURegBankCombiner(bool IsOptNone = false);
425 
426  StringRef getPassName() const override {
427  return "AMDGPURegBankCombiner";
428  }
429 
430  bool runOnMachineFunction(MachineFunction &MF) override;
431 
432  void getAnalysisUsage(AnalysisUsage &AU) const override;
433 private:
434  bool IsOptNone;
435 };
436 } // end anonymous namespace
437 
438 void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
440  AU.setPreservesCFG();
444  if (!IsOptNone) {
447  }
449 }
450 
451 AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
452  : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
453  initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
454 }
455 
456 bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
457  if (MF.getProperties().hasProperty(
458  MachineFunctionProperties::Property::FailedISel))
459  return false;
460  auto *TPC = &getAnalysis<TargetPassConfig>();
461  const Function &F = MF.getFunction();
462  bool EnableOpt =
463  MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
464 
465  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
466  const AMDGPULegalizerInfo *LI
467  = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
468 
469  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
470  MachineDominatorTree *MDT =
471  IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
472  AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
473  F.hasMinSize(), LI, KB, MDT);
474  Combiner C(PCInfo, TPC);
475  return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
476 }
477 
479 INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
480  "Combine AMDGPU machine instrs after regbankselect",
481  false, false)
484 INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
485  "Combine AMDGPU machine instrs after regbankselect", false,
486  false)
487 
488 namespace llvm {
490  return new AMDGPURegBankCombiner(IsOptNone);
491 }
492 } // end namespace llvm
MIPatternMatch.h
llvm::TargetMachine::getOptLevel
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:182
CombinerInfo.h
AMDGPURegBankCombinerHelper::applyClamp
void applyClamp(MachineInstr &MI, Register &Reg)
Definition: AMDGPURegBankCombiner.cpp:314
AMDGPURegBankCombinerHelper::MRI
MachineRegisterInfo & MRI
Definition: AMDGPURegBankCombiner.cpp:38
llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:192
llvm::getDefIgnoringCopies
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:461
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
AMDGPURegBankCombinerHelper::Helper
CombinerHelper & Helper
Definition: AMDGPURegBankCombiner.cpp:43
llvm::FPValueAndVReg::Value
APFloat Value
Definition: Utils.h:197
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:268
SIMachineFunctionInfo.h
llvm::GISelKnownBits
Definition: GISelKnownBits.h:29
AMDGPURegBankCombinerHelper::getMinMaxPair
MinMaxMedOpc getMinMaxPair(unsigned Opc)
Definition: AMDGPURegBankCombiner.cpp:108
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::Function
Definition: Function.h:60
AMDGPURegBankCombinerHelper::MF
MachineFunction & MF
Definition: AMDGPURegBankCombiner.cpp:37
llvm::logicalview::LVAttributeKind::Generated
@ Generated
AMDGPURegBankCombinerHelper::isVgprRegBank
bool isVgprRegBank(Register Reg)
Definition: AMDGPURegBankCombiner.cpp:86
GISelKnownBits.h
regbankselect
Combine AMDGPU machine instrs after regbankselect
Definition: AMDGPURegBankCombiner.cpp:485
AMDGPURegBankCombinerHelper::MinMaxMedOpc::Max
unsigned Max
Definition: AMDGPURegBankCombiner.cpp:56
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
AMDGPURegBankCombinerHelper::matchFPMed3ToClamp
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg)
Definition: AMDGPURegBankCombiner.cpp:272
AMDGPURegBankCombinerHelper::AMDGPURegBankCombinerHelper
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
Definition: AMDGPURegBankCombiner.cpp:46
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
AMDGPURegBankCombinerHelper::matchMed
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc, Register &Val, CstTy &K0, CstTy &K1)
Definition: AMDGPURegBankCombiner.cpp:129
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:493
llvm::APInt::ugt
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1152
llvm::Optional
Definition: APInt.h:33
llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:894
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::CombinerInfo
Definition: CombinerInfo.h:26
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::ValueAndVReg::Value
APInt Value
Definition: Utils.h:179
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::AMDGPULegalizerInfo
This class provides the information for the target register banks.
Definition: AMDGPULegalizerInfo.h:31
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::ValueAndVReg::VReg
Register VReg
Definition: Utils.h:180
AMDGPURegBankCombinerHelperState::RegBankHelper
AMDGPURegBankCombinerHelper & RegBankHelper
Definition: AMDGPURegBankCombiner.cpp:361
llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113
TargetMachine.h
GCNSubtarget.h
AMDGPURegBankCombinerHelper::Med3MatchInfo
Definition: AMDGPURegBankCombiner.cpp:59
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:748
false
Definition: StackSlotColoring.cpp:141
isClampZeroToOne
static bool isClampZeroToOne(SDValue A, SDValue B)
Definition: SIISelLowering.cpp:10625
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
AMDGPURegBankCombinerHelper::MinMaxMedOpc::Min
unsigned Min
Definition: AMDGPURegBankCombiner.cpp:56
AMDGPURegBankCombinerHelperState::AMDGPURegBankCombinerHelperState
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper, AMDGPURegBankCombinerHelper &RegBankHelper)
Definition: AMDGPURegBankCombiner.cpp:364
AMDGPURegBankCombinerHelper::TII
const SIInstrInfo & TII
Definition: AMDGPURegBankCombiner.cpp:42
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1049
llvm::CombinerHelper
Definition: CombinerHelper.h:109
llvm::FPValueAndVReg::VReg
Register VReg
Definition: Utils.h:198
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
AMDGPURegBankCombinerHelper
Definition: AMDGPURegBankCombiner.cpp:34
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2520
AMDGPURegBankCombinerHelper::Med3MatchInfo::Opc
unsigned Opc
Definition: AMDGPURegBankCombiner.cpp:60
llvm::RegisterBankInfo
Holds all the information related to register banks.
Definition: RegisterBankInfo.h:39
AMDGPURegisterBankInfo.h
AMDGPUMCTargetDesc.h
llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:221
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::Combiner
Definition: Combiner.h:26
llvm::MachineRegisterInfo::setRegBank
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
Definition: MachineRegisterInfo.cpp:61
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPURegBankCombiner.cpp:29
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
AMDGPURegBankCombinerHelper::getAsVgpr
Register getAsVgpr(Register Reg)
Definition: AMDGPURegBankCombiner.cpp:90
AMDGPURegBankCombinerHelper::RBI
const RegisterBankInfo & RBI
Definition: AMDGPURegBankCombiner.cpp:40
llvm::MIPatternMatch::m_CommutativeBinOp
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:443
AMDGPURegBankCombinerHelper::Subtarget
const GCNSubtarget & Subtarget
Definition: AMDGPURegBankCombiner.cpp:39
TargetPassConfig.h
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:556
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineFunction
Definition: MachineFunction.h:257
AMDGPURegBankCombinerHelper::applyMed3
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
Definition: AMDGPURegBankCombiner.cpp:321
CombinerHelper.h
AMDGPURegBankCombinerHelper::Med3MatchInfo::Val2
Register Val2
Definition: AMDGPURegBankCombiner.cpp:61
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::MachineRegisterInfo::hasOneNonDBGUse
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
Definition: MachineRegisterInfo.cpp:415
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::createAMDGPURegBankCombiner
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
Definition: AMDGPURegBankCombiner.cpp:489
Combiner.h
AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
Definition: AMDGPURegBankCombiner.cpp:196
llvm::MIPatternMatch::m_any_of
Or< Preds... > m_any_of(Preds &&... preds)
Definition: MIPatternMatch.h:312
AMDGPURegBankCombinerHelper::MinMaxMedOpc
Definition: AMDGPURegBankCombiner.cpp:55
AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg)
Definition: AMDGPURegBankCombiner.cpp:236
llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29
llvm::isKnownNeverNaN
bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Definition: ValueTracking.cpp:3826
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
AMDGPURegBankCombinerHelper::MinMaxMedOpc::Med
unsigned Med
Definition: AMDGPURegBankCombiner.cpp:56
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:623
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:653
llvm::isKnownNeverSNaN
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:300
AMDGPURegBankCombinerHelper::B
MachineIRBuilder & B
Definition: AMDGPURegBankCombiner.cpp:36
llvm::APFloat::isExactlyValue
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1213
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
AMDGPULegalizerInfo.h
llvm::MachineRegisterInfo::getType
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Definition: MachineRegisterInfo.h:745
AMDGPURegBankCombinerHelperState
Definition: AMDGPURegBankCombiner.cpp:358
llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:351
llvm::initializeAMDGPURegBankCombinerPass
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1171
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo)
Definition: AMDGPURegBankCombiner.cpp:150
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
combine
vector combine
Definition: VectorCombine.cpp:1843
llvm::LLT::scalar
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelTypeImpl.h:42
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE, "Combine AMDGPU machine instrs after regbankselect", false, false) INITIALIZE_PASS_END(AMDGPURegBankCombiner
AMDGPURegBankCombinerHelper::TRI
const TargetRegisterInfo & TRI
Definition: AMDGPURegBankCombiner.cpp:41
AMDGPURegBankCombinerHelperState::Helper
CombinerHelper & Helper
Definition: AMDGPURegBankCombiner.cpp:360
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
MachineDominators.h
llvm::LLT
Definition: LowLevelTypeImpl.h:39