LLVM 23.0.0git
AMDGPURegBankCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// after register banks are known.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULegalizerInfo.h"
17#include "GCNSubtarget.h"
29
30#define GET_GICOMBINER_DEPS
31#include "AMDGPUGenPreLegalizeGICombiner.inc"
32#undef GET_GICOMBINER_DEPS
33
34#define DEBUG_TYPE "amdgpu-regbank-combiner"
35
36using namespace llvm;
37using namespace MIPatternMatch;
38
39namespace {
40#define GET_GICOMBINER_TYPES
41#include "AMDGPUGenRegBankGICombiner.inc"
42#undef GET_GICOMBINER_TYPES
43
44class AMDGPURegBankCombinerImpl : public Combiner {
45protected:
46 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
47 const GCNSubtarget &STI;
48 const RegisterBankInfo &RBI;
50 const SIInstrInfo &TII;
51 const CombinerHelper Helper;
52
53public:
54 AMDGPURegBankCombinerImpl(
56 GISelCSEInfo *CSEInfo,
57 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
61 static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
62
63 bool tryCombineAll(MachineInstr &I) const override;
64
65 bool isVgprRegBank(Register Reg) const;
66 Register getAsVgpr(Register Reg) const;
67
68 struct MinMaxMedOpc {
69 unsigned Min, Max, Med;
70 };
71
72 struct Med3MatchInfo {
73 unsigned Opc;
74 Register Val0, Val1, Val2;
75 };
76
77 MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
78
79 template <class m_Cst, typename CstTy>
80 bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
81 Register &Val, CstTy &K0, CstTy &K1) const;
82
83 bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
84 bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
85 bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
86 bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
87 void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
88 void applyClamp(MachineInstr &MI, Register &Reg) const;
89
90 void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const;
91
92 bool combineD16Load(MachineInstr &MI) const;
93 bool applyD16Load(unsigned D16Opc, MachineInstr &DstMI,
94 MachineInstr *SmallLoad, Register ToOverwriteD16) const;
95
96private:
97 SIModeRegisterDefaults getMode() const;
98 bool getIEEE() const;
99 bool getDX10Clamp() const;
100 bool isFminnumIeee(const MachineInstr &MI) const;
101 bool isFCst(MachineInstr *MI) const;
102 bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
103
104#define GET_GICOMBINER_CLASS_MEMBERS
105#define AMDGPUSubtarget GCNSubtarget
106#include "AMDGPUGenRegBankGICombiner.inc"
107#undef GET_GICOMBINER_CLASS_MEMBERS
108#undef AMDGPUSubtarget
109};
110
111#define GET_GICOMBINER_IMPL
112#define AMDGPUSubtarget GCNSubtarget
113#include "AMDGPUGenRegBankGICombiner.inc"
114#undef AMDGPUSubtarget
115#undef GET_GICOMBINER_IMPL
116
117AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
119 GISelCSEInfo *CSEInfo,
120 const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
121 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
122 : Combiner(MF, CInfo, &VT, CSEInfo), RuleConfig(RuleConfig), STI(STI),
123 RBI(*STI.getRegBankInfo()), TRI(*STI.getRegisterInfo()),
124 TII(*STI.getInstrInfo()),
125 Helper(Observer, B, /*IsPreLegalize*/ false, &VT, MDT, LI),
127#include "AMDGPUGenRegBankGICombiner.inc"
129{
130}
131
132bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
133 return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
134}
135
136Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
137 if (isVgprRegBank(Reg))
138 return Reg;
139
140 // Search for existing copy of Reg to vgpr.
141 for (MachineInstr &Use : MRI.use_instructions(Reg)) {
142 Register Def = Use.getOperand(0).getReg();
143 if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
144 return Def;
145 }
146
147 // Copy Reg to vgpr.
148 Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
149 MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
150 return VgprReg;
151}
152
153AMDGPURegBankCombinerImpl::MinMaxMedOpc
154AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
155 switch (Opc) {
156 default:
157 llvm_unreachable("Unsupported opcode");
158 case AMDGPU::G_SMAX:
159 case AMDGPU::G_SMIN:
160 return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
161 case AMDGPU::G_UMAX:
162 case AMDGPU::G_UMIN:
163 return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
164 case AMDGPU::G_FMAXNUM:
165 case AMDGPU::G_FMINNUM:
166 return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
167 case AMDGPU::G_FMAXNUM_IEEE:
168 case AMDGPU::G_FMINNUM_IEEE:
169 return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
170 AMDGPU::G_AMDGPU_FMED3};
171 }
172}
173
174template <class m_Cst, typename CstTy>
175bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
176 MachineRegisterInfo &MRI,
177 MinMaxMedOpc MMMOpc, Register &Val,
178 CstTy &K0, CstTy &K1) const {
179 // 4 operand commutes of: min(max(Val, K0), K1).
180 // Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
181 // Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
182 // 4 operand commutes of: max(min(Val, K1), K0).
183 // Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
184 // Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
185 return mi_match(
186 MI, MRI,
187 m_any_of(
189 MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
190 m_Cst(K1)),
192 MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
193 m_Cst(K0))));
194}
195
196bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
197 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
198 Register Dst = MI.getOperand(0).getReg();
199 if (!isVgprRegBank(Dst))
200 return false;
201
202 // med3 for i16 is only available on gfx9+, and not available for v2i16.
203 LLT Ty = MRI.getType(Dst);
204 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
205 return false;
206
207 MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
208 Register Val;
209 std::optional<ValueAndVReg> K0, K1;
210 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
211 if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
212 return false;
213
214 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
215 return false;
216 if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
217 return false;
218
219 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
220 return true;
221}
222
223// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
224// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
225// ieee = false : min/max(NaN, K) = K
226// clamp(NaN) = dx10_clamp ? 0.0 : NaN
227// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
228// Other operand commutes (see matchMed) give same result since min and max are
229// commutative.
230
231// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
232// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
233// Val = SNaN only for ieee = true
234// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
235// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
236// max(min(SNaN, K1), K0) = max(K1, K0) = K1
237// Val = NaN,ieee = false or Val = QNaN,ieee = true
238// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
239// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
240// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
241bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
242 MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
243 Register Dst = MI.getOperand(0).getReg();
244 LLT Ty = MRI.getType(Dst);
245
246 // med3 for f16 is only available on gfx9+, and not available for v2f16.
247 if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
248 return false;
249
250 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
251
252 Register Val;
253 std::optional<FPValueAndVReg> K0, K1;
254 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
255 if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
256 return false;
257
258 if (K0->Value > K1->Value)
259 return false;
260
261 // For IEEE=false perform combine only when it's safe to assume that there are
262 // no NaN inputs. Most often MI is marked with nnan fast math flag.
263 // For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
264 // min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
265 // nodes(max/min) have same behavior when one input is NaN and other isn't.
266 // Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
267 // also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
268 if ((getIEEE() && isFminnumIeee(MI)) || VT->isKnownNeverNaN(Dst)) {
269 // Don't fold single use constant that can't be inlined.
270 if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
271 (!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
272 MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
273 return true;
274 }
275 }
276
277 return false;
278}
279
280bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
281 Register &Reg) const {
282 // Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
283 auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
284 Register Val;
285 std::optional<FPValueAndVReg> K0, K1;
286 // Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
287 if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
288 return false;
289
290 if (!K0->Value.isPosZero() || !K1->Value.isExactlyValue(1.0))
291 return false;
292
293 // For IEEE=false perform combine only when it's safe to assume that there are
294 // no NaN inputs. Most often MI is marked with nnan fast math flag.
295 // For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
296 // to 0.0 requires dx10_clamp = true.
297 if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
298 VT->isKnownNeverSNaN(Val)) ||
299 VT->isKnownNeverNaN(MI.getOperand(0).getReg())) {
300 Reg = Val;
301 return true;
302 }
303
304 return false;
305}
306
307// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
308// Val = SNaN only for ieee = true. It is important which operand is NaN.
309// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
310// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
311// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
312// Val = NaN,ieee = false or Val = QNaN,ieee = true
313// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
314// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
315// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
316bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
317 Register &Reg) const {
318 // In llvm-ir, clamp is often represented as an intrinsic call to
319 // @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
320 MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
321 MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
322 MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
323
324 if (isFCst(Src0) && !isFCst(Src1))
325 std::swap(Src0, Src1);
326 if (isFCst(Src1) && !isFCst(Src2))
327 std::swap(Src1, Src2);
328 if (isFCst(Src0) && !isFCst(Src1))
329 std::swap(Src0, Src1);
330 if (!isClampZeroToOne(Src1, Src2))
331 return false;
332
333 Register Val = Src0->getOperand(0).getReg();
334
335 auto isOp3Zero = [&]() {
336 MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
337 if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
338 return Op3->getOperand(1).getFPImm()->isPosZero();
339 return false;
340 };
341 // For IEEE=false perform combine only when it's safe to assume that there are
342 // no NaN inputs. Most often MI is marked with nnan fast math flag.
343 // For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
344 // when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
345 if (VT->isKnownNeverNaN(MI.getOperand(0).getReg()) ||
346 (getIEEE() && getDX10Clamp() &&
347 (VT->isKnownNeverSNaN(Val) || isOp3Zero()))) {
348 Reg = Val;
349 return true;
350 }
351
352 return false;
353}
354
355void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
356 Register &Reg) const {
357 B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
358 MI.getFlags());
359 MI.eraseFromParent();
360}
361
362void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
363 Med3MatchInfo &MatchInfo) const {
364 B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
365 {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
366 getAsVgpr(MatchInfo.Val2)},
367 MI.getFlags());
368 MI.eraseFromParent();
369}
370
371void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt(
372 MachineInstr &MI, MachineInstr &Ext) const {
373 unsigned ShOpc = MI.getOpcode();
374 assert(ShOpc == AMDGPU::G_SHL || ShOpc == AMDGPU::G_LSHR ||
375 ShOpc == AMDGPU::G_ASHR);
376 assert(Ext.getOpcode() == AMDGPU::G_ZEXT);
377
378 Register AmtReg = Ext.getOperand(1).getReg();
379 Register ShDst = MI.getOperand(0).getReg();
380 Register ShSrc = MI.getOperand(1).getReg();
381
382 LLT ExtAmtTy = MRI.getType(Ext.getOperand(0).getReg());
383 LLT AmtTy = MRI.getType(AmtReg);
384
385 auto &RB = *MRI.getRegBank(AmtReg);
386
387 auto NewExt = B.buildAnyExt(ExtAmtTy, AmtReg);
388 auto Mask = B.buildConstant(
390 auto And = B.buildAnd(ExtAmtTy, NewExt, Mask);
391 B.buildInstr(ShOpc, {ShDst}, {ShSrc, And});
392
393 MRI.setRegBank(NewExt.getReg(0), RB);
394 MRI.setRegBank(Mask.getReg(0), RB);
395 MRI.setRegBank(And.getReg(0), RB);
396 MI.eraseFromParent();
397}
398
399bool AMDGPURegBankCombinerImpl::combineD16Load(MachineInstr &MI) const {
400 Register Dst;
401 MachineInstr *Load, *SextLoad;
402 const int64_t CleanLo16 = 0xFFFFFFFFFFFF0000;
403 const int64_t CleanHi16 = 0x000000000000FFFF;
404
405 // Load lo
406 if (mi_match(MI.getOperand(1).getReg(), MRI,
408 m_Copy(m_SpecificICst(CleanLo16))),
409 m_MInstr(Load)))) {
410
411 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
412 const MachineMemOperand *MMO = *Load->memoperands_begin();
413 unsigned LoadSize = MMO->getSizeInBits().getValue();
414 if (LoadSize == 8)
415 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_U8, MI, Load, Dst);
416 if (LoadSize == 16)
417 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO, MI, Load, Dst);
418 return false;
419 }
420
421 // s32 Load_lo16 holds SextLoad i8, Load_hi16 is zero.
422 // fake16: and (sextload i8 -> s32), 0xFFFF
423 // true16: zext (sextload i8 -> s16) -> s32
424 if (mi_match(
425 Load, MRI,
426 m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
427 mi_match(Load, MRI,
429 m_MInstr(SextLoad))))) {
430 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
431 return false;
432
433 const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
434 if (MMO->getSizeInBits().getValue() != 8)
435 return false;
436
437 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_LO_I8, MI, SextLoad, Dst);
438 }
439
440 return false;
441 }
442
443 // Load hi
444 if (mi_match(MI.getOperand(1).getReg(), MRI,
446 m_Copy(m_SpecificICst(CleanHi16))),
447 m_GShl(m_MInstr(Load), m_Copy(m_SpecificICst(16)))))) {
448
449 if (Load->getOpcode() == AMDGPU::G_ZEXTLOAD) {
450 const MachineMemOperand *MMO = *Load->memoperands_begin();
451 unsigned LoadSize = MMO->getSizeInBits().getValue();
452 if (LoadSize == 8)
453 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_U8, MI, Load, Dst);
454 if (LoadSize == 16)
455 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI, MI, Load, Dst);
456 return false;
457 }
458
459 // s32 Load_lo16 holds SextLoad i8, Load_hi16 is zero.
460 // fake16: and (sextload i8 -> s32), 0xFFFF
461 // true16: zext (sextload i8 -> s16) -> s32
462 if (mi_match(
463 Load, MRI,
464 m_GAnd(m_MInstr(SextLoad), m_Copy(m_SpecificICst(CleanHi16)))) ||
465 mi_match(Load, MRI,
467 m_MInstr(SextLoad))))) {
468 if (SextLoad->getOpcode() != AMDGPU::G_SEXTLOAD)
469 return false;
470
471 const MachineMemOperand *MMO = *SextLoad->memoperands_begin();
472 if (MMO->getSizeInBits().getValue() != 8)
473 return false;
474
475 return applyD16Load(AMDGPU::G_AMDGPU_LOAD_D16_HI_I8, MI, SextLoad, Dst);
476 }
477
478 return false;
479 }
480
481 return false;
482}
483
484bool AMDGPURegBankCombinerImpl::applyD16Load(
485 unsigned D16Opc, MachineInstr &DstMI, MachineInstr *SmallLoad,
486 Register SrcReg32ToOverwriteD16) const {
487 B.buildInstr(D16Opc, {DstMI.getOperand(0).getReg()},
488 {SmallLoad->getOperand(1).getReg(), SrcReg32ToOverwriteD16})
489 .setMemRefs(SmallLoad->memoperands());
490 DstMI.eraseFromParent();
491 return true;
492}
493
494SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
495 return MF.getInfo<SIMachineFunctionInfo>()->getMode();
496}
497
498bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
499
500bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
501 return getMode().DX10Clamp;
502}
503
504bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
505 return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
506}
507
508bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
509 return MI->getOpcode() == AMDGPU::G_FCONSTANT;
510}
511
512bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
513 MachineInstr *K1) const {
514 if (isFCst(K0) && isFCst(K1)) {
515 const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
516 const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
517 return (KO_FPImm->isPosZero() && K1_FPImm->isExactlyValue(1.0)) ||
518 (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isPosZero());
519 }
520 return false;
521}
522
523// Pass boilerplate
524// ================
525
526class AMDGPURegBankCombiner : public MachineFunctionPass {
527public:
528 static char ID;
529
530 AMDGPURegBankCombiner(bool IsOptNone = false);
531
532 StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
533
534 bool runOnMachineFunction(MachineFunction &MF) override;
535
536 void getAnalysisUsage(AnalysisUsage &AU) const override;
537
538private:
539 bool IsOptNone;
540 AMDGPURegBankCombinerImplRuleConfig RuleConfig;
541};
542} // end anonymous namespace
543
544void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
545 AU.setPreservesCFG();
547 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
548 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
549 if (!IsOptNone) {
550 AU.addRequired<MachineDominatorTreeWrapperPass>();
551 AU.addPreserved<MachineDominatorTreeWrapperPass>();
552 }
554}
555
556AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
557 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
558 if (!RuleConfig.parseCommandLineOption())
559 report_fatal_error("Invalid rule identifier");
560}
561
562bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
563 if (MF.getProperties().hasFailedISel())
564 return false;
565 const Function &F = MF.getFunction();
566 bool EnableOpt =
567 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
568
571 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
572
573 const auto *LI = ST.getLegalizerInfo();
575 IsOptNone ? nullptr
576 : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
577
578 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
579 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
580 // Disable fixed-point iteration to reduce compile-time
581 CInfo.MaxIterations = 1;
582 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
583 // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
584 // unnecessary.
585 CInfo.EnableFullDCE = false;
586 AMDGPURegBankCombinerImpl Impl(MF, CInfo, *VT, /*CSEInfo*/ nullptr,
587 RuleConfig, ST, MDT, LI);
588 return Impl.combineMachineInstrs();
589}
590
591char AMDGPURegBankCombiner::ID = 0;
592INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
593 "Combine AMDGPU machine instrs after regbankselect",
594 false, false)
596INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
597 "Combine AMDGPU machine instrs after regbankselect", false,
598 false)
599
601 return new AMDGPURegBankCombiner(IsOptNone);
602}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
This file declares the targeting of the Machinelegalizer class for AMDGPU.
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
AMD GCN specific subclass of TargetSubtarget.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
static bool isClampZeroToOne(SDValue A, SDValue B)
Target-Independent Code Generator Pass Configuration Options pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Combiner implementation.
Definition Combiner.h:33
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isPosZero() const
Return true if the value is positive zero.
Definition Constants.h:470
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMed3_16() const
The CSE Analysis object.
Definition CSEInfo.h:72
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
constexpr unsigned getScalarSizeInBits() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
TypeSize getValue() const
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const RegisterBank * getRegBank(Register Reg) const
Return the register bank of Reg.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
Holds all the information related to register banks.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
CheckType m_SpecificType(LLT Ty)
BinaryOpc_match< LHS, RHS, true > m_CommutativeBinOp(unsigned Opcode, const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
And< Preds... > m_all_of(Preds &&... preds)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147
@ And
Bitwise or logical AND of integers.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...