LLVM 22.0.0git
AMDGPURegBankCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after register banks are known.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38
39namespace {
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPURegBankCombinerImpl : public Combiner {
45protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const RegisterBankInfo &RBI;
50 const SIInstrInfo &TII;
51 const CombinerHelper Helper;
52
53public:
54 AMDGPURegBankCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
62
63 bool tryCombineAll(MachineInstr &I) const override;
64
65 bool isVgprRegBank(Register Reg) const;
66 Register getAsVgpr(Register Reg) const;
67
68 struct MinMaxMedOpc {
69 unsigned Min, Max, Med;
70 };
71
72 struct Med3MatchInfo {
73 unsigned Opc;
74 Register Val0, Val1, Val2;
75 };
76
77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
78
79 template <class m_Cst, typename CstTy>
80 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
81 Register &Val, CstTy &K0, CstTy &K1) const;
82
83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
85 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
86 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
88 void applyClamp(MachineInstr &MI, Register &Reg) const;
89
90 void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const;
91
92 bool combineD16Load(MachineInstr &MI) const;
93 bool applyD16Load(unsigned D16Opc, MachineInstr &DstMI,
94 MachineInstr *SmallLoad, Register ToOverwriteD16) const;
95
96private:
97 SIModeRegisterDefaults getMode() const;
98 bool getIEEE() const;
99 bool getDX10Clamp() const;
100 bool isFminnumIeee(const MachineInstr &MI) const;
101 bool isFCst(MachineInstr *MI) const;
102 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
103
104#define GET_GICOMBINER_CLASS_MEMBERS
105#define AMDGPUSubtarget GCNSubtarget
106#include "AMDGPUGenRegBankGICombiner.inc"
107#undef GET_GICOMBINER_CLASS_MEMBERS
108#undef AMDGPUSubtarget
109};
110
111#define GET_GICOMBINER_IMPL
112#define AMDGPUSubtarget GCNSubtarget
113#include "AMDGPUGenRegBankGICombiner.inc"
114#undef AMDGPUSubtarget
115#undef GET_GICOMBINER_IMPL
116
117AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
118 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
119 GISelValueTracking &VT, GISelCSEInfo *CSEInfo,
120 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
121 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
122 : Combiner(MF, CInfo, TPC, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
123 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
124 TII(*STI.getInstrInfo()),
125 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
127#include "AMDGPUGenRegBankGICombiner.inc"
129{
130}
131
132bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
133 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
134}
135
136Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
137 if (isVgprRegBank(Reg))
138 return Reg;
139
140 // Search for existing copy of Reg to vgpr.
141 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
142 Register Def = Use.getOperand(0).getReg();
143 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
144 return Def;
145 }
146
147 // Copy Reg to vgpr.
148 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
149 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
150 return VgprReg;
151}
152
153AMDGPURegBankCombinerImpl::MinMaxMedOpc
154AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
155 switch (Opc) {
156 default:
157 llvm_unreachable("Unsupported opcode");
158 case AMDGPU::G_SMAX:
159 case AMDGPU::G_SMIN:
160 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
161 case AMDGPU::G_UMAX:
162 case AMDGPU::G_UMIN:
163 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
164 case AMDGPU::G_FMAXNUM:
165 case AMDGPU::G_FMINNUM:
166 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
167 case AMDGPU::G_FMAXNUM_IEEE:
168 case AMDGPU::G_FMINNUM_IEEE:
169 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
170 AMDGPU::G_AMDGPU_FMED3};
171 }
172}
173
174template <class m_Cst, typename CstTy>
175bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
176 MachineRegisterInfo &MRI,
177 MinMaxMedOpc MMMOpc, Register &Val,
178 CstTy &K0, CstTy &K1) const {
179 // 4 operand commutes of: min(max(Val, K0), K1).
180 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
181 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
182 // 4 operand commutes of: max(min(Val, K1), K0).
183 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
184 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
185 return mi_match(
186 MI, MRI,
187 m_any_of(
189 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
190 m_Cst(K1)),
192 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
193 m_Cst(K0))));
194}
195
196bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
197 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
198 Register Dst = MI.getOperand(0).getReg();
199 if (!isVgprRegBank(Dst))
200 return false;
201
202 // med3 for i16 is only available on gfx9+, and not available for v2i16.
203 LLT Ty = MRI.getType(Dst);
204 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
205 return false;
206
207 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
208 Register Val;
209 std::optional<ValueAndVReg> K0, K1;
210 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
211 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
212 return false;
213
214 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
215 return false;
216 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
217 return false;
218
219 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
220 return true;
221}
222
223// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
224// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
225// ieee = false : min/max(NaN, K) = K
226// clamp(NaN) = dx10_clamp ? 0.0 : NaN
227// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
228// Other operand commutes (see matchMed) give same result since min and max are
229// commutative.
230
231// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
232// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
233// Val = SNaN only for ieee = true
234// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
235// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
236// max(min(SNaN, K1), K0) = max(K1, K0) = K1
237// Val = NaN,ieee = false or Val = QNaN,ieee = true
238// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
239// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
240// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
241bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
242 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
243 Register Dst = MI.getOperand(0).getReg();
244 LLT Ty = MRI.getType(Dst);
245
246 // med3 for f16 is only available on gfx9+, and not available for v2f16.
247 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
248 return false;
249
250 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
251
252 Register Val;
253 std::optional<FPValueAndVReg> K0, K1;
254 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
255 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
256 return false;
257
258 if (K0->Value > K1->Value)
259 return false;
260
261 // For IEEE=false perform combine only when it's safe to assume that there are
262 // no NaN inputs. Most often MI is marked with nnan fast math flag.
263 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
264 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
265 // nodes(max/min) have same behavior when one input is NaN and other isn't.
266 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
267 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
268 if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
269 // Don't fold single use constant that can't be inlined.
270 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
271 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
272 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
273 return true;
274 }
275 }
276
277 return false;
278}
279
280bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
281 Register &Reg) const {
282 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
283 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
284 Register Val;
285 std::optional<FPValueAndVReg> K0, K1;
286 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
287 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
288 return false;
289
290 if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
291 return false;
292
293 // For IEEE=false perform combine only when it's safe to assume that there are
294 // no NaN inputs. Most often MI is marked with nnan fast math flag.
295 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
296 // to 0.0 requires dx10_clamp = true.
297 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
298 isKnownNeverSNaN(Val, MRI)) ||
299 isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
300 Reg = Val;
301 return true;
302 }
303
304 return false;
305}
306
307// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
308// Val = SNaN only for ieee = true. It is important which operand is NaN.
309// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
310// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
311// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
312// Val = NaN,ieee = false or Val = QNaN,ieee = true
313// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
314// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
315// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
316bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
317 Register &Reg) const {
318 // In llvm-ir, clamp is often represented as an intrinsic call to
319 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
320 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
321 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
322 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
323
324 if (isFCst(Src0) && !isFCst(Src1))
325 std::swap(Src0, Src1);
326 if (isFCst(Src1) && !isFCst(Src2))
327 std::swap(Src1, Src2);
328 if (isFCst(Src0) && !isFCst(Src1))
329 std::swap(Src0, Src1);
330 if (!isClampZeroToOne(Src1, Src2))
331 return false;
332
333 Register Val = Src0->getOperand(0).getReg();
334
335 auto isOp3Zero = [&]() {
336 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
337 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
338 return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
339 return false;
340 };
341 // For IEEE=false perform combine only when it's safe to assume that there are
342 // no NaN inputs. Most often MI is marked with nnan fast math flag.
343 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
344 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
345 if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
346 (getIEEE() && getDX10Clamp() &&
347 (isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
348 Reg = Val;
349 return true;
350 }
351
352 return false;
353}
354
355void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
356 Register &Reg) const {
357 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
358 MI.getFlags());
359 MI.eraseFromParent();
360}
361
362void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
363 Med3MatchInfo &MatchInfo) const {
364 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
365 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
366 getAsVgpr(MatchInfo.Val2)},
367 MI.getFlags());
368 MI.eraseFromParent();
369}
370
371void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
372 MachineInstr &MI, MachineInstr &Ext) const {
373 unsigned ShOpc = MI.getOpcode();
374 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
375 ShOpc == AMDGPU::G_ASHR);
376 assert(Ext.getOpcode() == AMDGPU::G_ZEXT);
377
378 Register AmtReg = Ext.getOperand(1).getReg();
379 Register ShDst = MI.getOperand(0).getReg();
380 Register ShSrc = MI.getOperand(1).getReg();
381
382 LLT ExtAmtTy = MRI.getType(Ext.getOperand(0).getReg());
383 LLT AmtTy = MRI.getType(AmtReg);
384
385 auto &RB = *MRI.getRegBank(AmtReg);
386
387 auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
388 auto Mask = B.buildConstant(
390 auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
391 B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
392
393 MRI.setRegBank(NewExt.getReg(0), RB);
394 MRI.setRegBank(Mask.getReg(0), RB);
395 MRI.setRegBank(And.getReg(0), RB);
396 MI.eraseFromParent();
397}
398
399bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
400 Register Dst;
401 MachineInstr *Load, *SextLoad;
402 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;
403 const int64_t CleanHi16 = 0x000000000000FFFF;
404
405 // Load lo
406 if (mi_match(MI.getOperand(1).getReg(), MRI,
408 m_Copy(m_SpecificICst(CleanLo16))),
409 m_MInstr(Load)))) {
410
411 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
412 const MachineMemOperand *MMO = *Load->memoperands_begin();
413 unsigned LoadSize = MMO->getSizeInBits().getValue();
414 if (LoadSize == 8)
415 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8, MI, Load, Dst);
416 if (LoadSize == 16)
417 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO, MI, Load, Dst);
418 return false;
419 }
420
421 if (mi_match(
422 Load, MRI,
423 m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
424 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
425 return false;
426
427 const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
428 if (MMO->getSizeInBits().getValue() != 8)
429 return false;
430
431 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
432 }
433
434 return false;
435 }
436
437 // Load hi
438 if (mi_match(MI.getOperand(1).getReg(), MRI,
440 m_Copy(m_SpecificICst(CleanHi16))),
441 m_GShl(m_MInstr(Load), m_Copy(m_SpecificICst(16)))))) {
442
443 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
444 const MachineMemOperand *MMO = *Load->memoperands_begin();
445 unsigned LoadSize = MMO->getSizeInBits().getValue();
446 if (LoadSize == 8)
447 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8, MI, Load, Dst);
448 if (LoadSize == 16)
449 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI, MI, Load, Dst);
450 return false;
451 }
452
453 if (mi_match(
454 Load, MRI,
455 m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16))))) {
456 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
457 return false;
458 const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
459 if (MMO->getSizeInBits().getValue() != 8)
460 return false;
461
462 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
463 }
464
465 return false;
466 }
467
468 return false;
469}
470
471bool AMDGPURegBankCombinerImpl::applyD16Load(
472 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
473 Register SrcReg32ToOverwriteD16) const {
474 B.buildInstr(D16Opc, {DstMI.getOperand(0).getReg()},
475 {SmallLoad->getOperand(1).getReg(), SrcReg32ToOverwriteD16})
476 .setMemRefs(SmallLoad->memoperands());
477 DstMI.eraseFromParent();
478 return true;
479}
480
481SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
482 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
483}
484
485bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
486
487bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
488 return getMode().DX10Clamp;
489}
490
491bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
492 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
493}
494
495bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
496 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
497}
498
499bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
500 MachineInstr *K1) const {
501 if (isFCst(K0) && isFCst(K1)) {
502 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
503 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
504 return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
505 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
506 }
507 return false;
508}
509
510// Pass boilerplate
511// ================
512
513class AMDGPURegBankCombiner : public MachineFunctionPass {
514public:
515 static char ID;
516
517 AMDGPURegBankCombiner(bool IsOptNone = false);
518
519 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
520
521 bool runOnMachineFunction(MachineFunction &MF) override;
522
523 void getAnalysisUsage(AnalysisUsage &AU) const override;
524
525private:
526 bool IsOptNone;
527 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
528};
529} // end anonymous namespace
530
531void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
532 AU.addRequired<TargetPassConfig>();
533 AU.setPreservesCFG();
535 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
536 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
537 if (!IsOptNone) {
538 AU.addRequired<MachineDominatorTreeWrapperPass>();
539 AU.addPreserved<MachineDominatorTreeWrapperPass>();
540 }
542}
543
544AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
545 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
546 if (!RuleConfig.parseCommandLineOption())
547 report_fatal_error("Invalid rule identifier");
548}
549
550bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
551 if (MF.getProperties().hasFailedISel())
552 return false;
553 auto *TPC = &getAnalysis<TargetPassConfig>();
554 const Function &F = MF.getFunction();
555 bool EnableOpt =
556 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
557
560 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
561
562 const auto *LI = ST.getLegalizerInfo();
564 IsOptNone ? nullptr
565 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
566
567 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
568 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
569 // Disable fixed-point iteration to reduce compile-time
570 CInfo.MaxIterations = 1;
571 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
572 // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
573 // unnecessary.
574 CInfo.EnableFullDCE = false;
575 AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *VT, /*CSEInfo*/ nullptr,
576 RuleConfig, ST, MDT, LI);
577 return Impl.combineMachineInstrs();
578}
579
580char AMDGPURegBankCombiner::ID = 0;
581INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
582 "Combine AMDGPU machine instrs after regbankselect",
583 false, false)
586INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
587 "Combine AMDGPU machine instrs after regbankselect", false,
588 false)
589
591 return new AMDGPURegBankCombiner(IsOptNone);
592}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Combiner implementation.
Definition Combiner.h:34
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMed3_16() const
The CSE Analysis object.
Definition CSEInfo.h:71
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
TypeSize getValue() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_SpecificICst(APInt RequestedValue)
Matches a constant equal to RequestedValue.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:492
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1185
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:352
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...