LLVM 23.0.0git
AMDGPUCombinerHelper.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "GCNSubtarget.h"
14#include "llvm/IR/IntrinsicsAMDGPU.h"
16
17using namespace llvm;
18using namespace MIPatternMatch;
19
26
28static bool fnegFoldsIntoMI(const MachineInstr &MI) {
29 switch (MI.getOpcode()) {
30 case AMDGPU::G_FADD:
31 case AMDGPU::G_FSUB:
32 case AMDGPU::G_FMUL:
33 case AMDGPU::G_FMA:
34 case AMDGPU::G_FMAD:
35 case AMDGPU::G_FMINNUM:
36 case AMDGPU::G_FMAXNUM:
37 case AMDGPU::G_FMINNUM_IEEE:
38 case AMDGPU::G_FMAXNUM_IEEE:
39 case AMDGPU::G_FMINIMUM:
40 case AMDGPU::G_FMAXIMUM:
41 case AMDGPU::G_FSIN:
42 case AMDGPU::G_FPEXT:
43 case AMDGPU::G_INTRINSIC_TRUNC:
44 case AMDGPU::G_FPTRUNC:
45 case AMDGPU::G_FRINT:
46 case AMDGPU::G_FNEARBYINT:
47 case AMDGPU::G_INTRINSIC_ROUND:
48 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
49 case AMDGPU::G_FCANONICALIZE:
50 case AMDGPU::G_AMDGPU_RCP_IFLAG:
51 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
52 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
53 return true;
54 case AMDGPU::G_INTRINSIC: {
55 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
56 switch (IntrinsicID) {
57 case Intrinsic::amdgcn_rcp:
58 case Intrinsic::amdgcn_rcp_legacy:
59 case Intrinsic::amdgcn_sin:
60 case Intrinsic::amdgcn_fmul_legacy:
61 case Intrinsic::amdgcn_fmed3:
62 case Intrinsic::amdgcn_fma_legacy:
63 return true;
64 default:
65 return false;
66 }
67 }
68 default:
69 return false;
70 }
71}
72
73/// \p returns true if the operation will definitely need to use a 64-bit
74/// encoding, and thus will use a VOP3 encoding regardless of the source
75/// modifiers.
78 const MachineRegisterInfo &MRI) {
79 return MI.getNumOperands() > (isa<GIntrinsic>(MI) ? 4u : 3u) ||
80 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
81}
82
83// Most FP instructions support source modifiers.
85static bool hasSourceMods(const MachineInstr &MI) {
86 if (!MI.memoperands().empty())
87 return false;
88
89 switch (MI.getOpcode()) {
90 case AMDGPU::COPY:
91 case AMDGPU::G_SELECT:
92 case AMDGPU::G_FDIV:
93 case AMDGPU::G_FREM:
94 case TargetOpcode::INLINEASM:
95 case TargetOpcode::INLINEASM_BR:
96 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
97 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
98 case AMDGPU::G_BITCAST:
99 case AMDGPU::G_ANYEXT:
100 case AMDGPU::G_BUILD_VECTOR:
101 case AMDGPU::G_BUILD_VECTOR_TRUNC:
102 case AMDGPU::G_PHI:
103 return false;
104 case AMDGPU::G_INTRINSIC:
105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
106 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
107 switch (IntrinsicID) {
108 case Intrinsic::amdgcn_interp_p1:
109 case Intrinsic::amdgcn_interp_p2:
110 case Intrinsic::amdgcn_interp_mov:
111 case Intrinsic::amdgcn_interp_p1_f16:
112 case Intrinsic::amdgcn_interp_p2_f16:
113 case Intrinsic::amdgcn_div_scale:
114 return false;
115 default:
116 return true;
117 }
118 }
119 default:
120 return true;
121 }
122}
123
125 unsigned CostThreshold = 4) {
126 // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
127 // it is truly free to use a source modifier in all cases. If there are
128 // multiple users but for each one will necessitate using VOP3, there will be
129 // a code size increase. Try to avoid increasing code size unless we know it
130 // will save on the instruction count.
131 unsigned NumMayIncreaseSize = 0;
132 Register Dst = MI.getOperand(0).getReg();
133 for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
134 if (!hasSourceMods(Use))
135 return false;
136
138 if (++NumMayIncreaseSize > CostThreshold)
139 return false;
140 }
141 }
142 return true;
143}
144
146 return MI.getFlag(MachineInstr::MIFlag::FmNsz);
147}
148
149static bool isInv2Pi(const APFloat &APF) {
150 static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
151 static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
152 static const APFloat KF64(APFloat::IEEEdouble(),
153 APInt(64, 0x3fc45f306dc9c882));
154
155 return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
156 APF.bitwiseIsEqual(KF64);
157}
158
159// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
160// additional cost to negate them.
163 std::optional<FPValueAndVReg> FPValReg;
164 if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
165 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
166 return true;
167
168 const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
169 if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
170 return true;
171 }
172 return false;
173}
174
175static unsigned inverseMinMax(unsigned Opc) {
176 switch (Opc) {
177 case AMDGPU::G_FMAXNUM:
178 return AMDGPU::G_FMINNUM;
179 case AMDGPU::G_FMINNUM:
180 return AMDGPU::G_FMAXNUM;
181 case AMDGPU::G_FMAXNUM_IEEE:
182 return AMDGPU::G_FMINNUM_IEEE;
183 case AMDGPU::G_FMINNUM_IEEE:
184 return AMDGPU::G_FMAXNUM_IEEE;
185 case AMDGPU::G_FMAXIMUM:
186 return AMDGPU::G_FMINIMUM;
187 case AMDGPU::G_FMINIMUM:
188 return AMDGPU::G_FMAXIMUM;
189 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
190 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
191 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
192 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
193 default:
194 llvm_unreachable("invalid min/max opcode");
195 }
196}
197
199 MachineInstr *&MatchInfo) const {
200 Register Src = MI.getOperand(1).getReg();
201 MatchInfo = MRI.getVRegDef(Src);
202
203 // If the input has multiple uses and we can either fold the negate down, or
204 // the other uses cannot, give up. This both prevents unprofitable
205 // transformations and infinite loops: we won't repeatedly try to fold around
206 // a negate that has no 'good' form.
207 if (MRI.hasOneNonDBGUse(Src)) {
209 return false;
210 } else {
211 if (fnegFoldsIntoMI(*MatchInfo) &&
213 !allUsesHaveSourceMods(*MatchInfo, MRI)))
214 return false;
215 }
216
217 switch (MatchInfo->getOpcode()) {
218 case AMDGPU::G_FMINNUM:
219 case AMDGPU::G_FMAXNUM:
220 case AMDGPU::G_FMINNUM_IEEE:
221 case AMDGPU::G_FMAXNUM_IEEE:
222 case AMDGPU::G_FMINIMUM:
223 case AMDGPU::G_FMAXIMUM:
224 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
225 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
226 // 0 doesn't have a negated inline immediate.
227 return !isConstantCostlierToNegate(*MatchInfo,
228 MatchInfo->getOperand(2).getReg(), MRI);
229 case AMDGPU::G_FADD:
230 case AMDGPU::G_FSUB:
231 case AMDGPU::G_FMA:
232 case AMDGPU::G_FMAD:
233 return mayIgnoreSignedZero(*MatchInfo);
234 case AMDGPU::G_FMUL:
235 case AMDGPU::G_FPEXT:
236 case AMDGPU::G_INTRINSIC_TRUNC:
237 case AMDGPU::G_FPTRUNC:
238 case AMDGPU::G_FRINT:
239 case AMDGPU::G_FNEARBYINT:
240 case AMDGPU::G_INTRINSIC_ROUND:
241 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
242 case AMDGPU::G_FSIN:
243 case AMDGPU::G_FCANONICALIZE:
244 case AMDGPU::G_AMDGPU_RCP_IFLAG:
245 return true;
246 case AMDGPU::G_INTRINSIC:
247 case AMDGPU::G_INTRINSIC_CONVERGENT: {
248 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
249 switch (IntrinsicID) {
250 case Intrinsic::amdgcn_rcp:
251 case Intrinsic::amdgcn_rcp_legacy:
252 case Intrinsic::amdgcn_sin:
253 case Intrinsic::amdgcn_fmul_legacy:
254 case Intrinsic::amdgcn_fmed3:
255 return true;
256 case Intrinsic::amdgcn_fma_legacy:
257 return mayIgnoreSignedZero(*MatchInfo);
258 default:
259 return false;
260 }
261 }
262 default:
263 return false;
264 }
265}
266
268 MachineInstr *&MatchInfo) const {
269 // Transform:
270 // %A = inst %Op1, ...
271 // %B = fneg %A
272 //
273 // into:
274 //
275 // (if %A has one use, specifically fneg above)
276 // %B = inst (maybe fneg %Op1), ...
277 //
278 // (if %A has multiple uses)
279 // %B = inst (maybe fneg %Op1), ...
280 // %A = fneg %B
281
282 // Replace register in operand with a register holding negated value.
283 auto NegateOperand = [&](MachineOperand &Op) {
284 Register Reg = Op.getReg();
285 if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
286 Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
287 replaceRegOpWith(MRI, Op, Reg);
288 };
289
290 // Replace either register in operands with a register holding negated value.
291 auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
292 Register XReg = X.getReg();
293 Register YReg = Y.getReg();
294 if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
295 replaceRegOpWith(MRI, X, XReg);
296 else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
297 replaceRegOpWith(MRI, Y, YReg);
298 else {
299 YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
300 replaceRegOpWith(MRI, Y, YReg);
301 }
302 };
303
304 Builder.setInstrAndDebugLoc(*MatchInfo);
305
306 // Negate appropriate operands so that resulting value of MatchInfo is
307 // negated.
308 switch (MatchInfo->getOpcode()) {
309 case AMDGPU::G_FADD:
310 case AMDGPU::G_FSUB:
311 NegateOperand(MatchInfo->getOperand(1));
312 NegateOperand(MatchInfo->getOperand(2));
313 break;
314 case AMDGPU::G_FMUL:
315 NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
316 break;
317 case AMDGPU::G_FMINNUM:
318 case AMDGPU::G_FMAXNUM:
319 case AMDGPU::G_FMINNUM_IEEE:
320 case AMDGPU::G_FMAXNUM_IEEE:
321 case AMDGPU::G_FMINIMUM:
322 case AMDGPU::G_FMAXIMUM:
323 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
324 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
325 NegateOperand(MatchInfo->getOperand(1));
326 NegateOperand(MatchInfo->getOperand(2));
327 unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
328 replaceOpcodeWith(*MatchInfo, Opposite);
329 break;
330 }
331 case AMDGPU::G_FMA:
332 case AMDGPU::G_FMAD:
333 NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
334 NegateOperand(MatchInfo->getOperand(3));
335 break;
336 case AMDGPU::G_FPEXT:
337 case AMDGPU::G_INTRINSIC_TRUNC:
338 case AMDGPU::G_FRINT:
339 case AMDGPU::G_FNEARBYINT:
340 case AMDGPU::G_INTRINSIC_ROUND:
341 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
342 case AMDGPU::G_FSIN:
343 case AMDGPU::G_FCANONICALIZE:
344 case AMDGPU::G_AMDGPU_RCP_IFLAG:
345 case AMDGPU::G_FPTRUNC:
346 NegateOperand(MatchInfo->getOperand(1));
347 break;
348 case AMDGPU::G_INTRINSIC:
349 case AMDGPU::G_INTRINSIC_CONVERGENT: {
350 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MatchInfo)->getIntrinsicID();
351 switch (IntrinsicID) {
352 case Intrinsic::amdgcn_rcp:
353 case Intrinsic::amdgcn_rcp_legacy:
354 case Intrinsic::amdgcn_sin:
355 NegateOperand(MatchInfo->getOperand(2));
356 break;
357 case Intrinsic::amdgcn_fmul_legacy:
358 NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
359 break;
360 case Intrinsic::amdgcn_fmed3:
361 NegateOperand(MatchInfo->getOperand(2));
362 NegateOperand(MatchInfo->getOperand(3));
363 NegateOperand(MatchInfo->getOperand(4));
364 break;
365 case Intrinsic::amdgcn_fma_legacy:
366 NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
367 NegateOperand(MatchInfo->getOperand(4));
368 break;
369 default:
370 llvm_unreachable("folding fneg not supported for this intrinsic");
371 }
372 break;
373 }
374 default:
375 llvm_unreachable("folding fneg not supported for this instruction");
376 }
377
378 Register Dst = MI.getOperand(0).getReg();
379 Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
380
381 if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
382 // MatchInfo now has negated value so use that instead of old Dst.
383 replaceRegWith(MRI, Dst, MatchInfoDst);
384 } else {
385 // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
386 // but replaceRegWith will replace defs as well. It is easier to replace one
387 // def with a new register.
388 LLT Type = MRI.getType(Dst);
389 Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
390 replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
391
392 // MatchInfo now has negated value so use that instead of old Dst.
393 replaceRegWith(MRI, Dst, NegatedMatchInfo);
394
395 // Recreate non negated value for other uses of old MatchInfoDst
396 auto NextInst = ++MatchInfo->getIterator();
397 Builder.setInstrAndDebugLoc(*NextInst);
398 Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
399 }
400
401 MI.eraseFromParent();
402}
403
404// TODO: Should return converted value / extension source and avoid introducing
405// intermediate fptruncs in the apply function.
407 Register Reg) {
408 const MachineInstr *Def = MRI.getVRegDef(Reg);
409 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
410 Register SrcReg = Def->getOperand(1).getReg();
411 return MRI.getType(SrcReg) == LLT::scalar(16);
412 }
413
414 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
415 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
416 bool LosesInfo = true;
418 return !LosesInfo;
419 }
420
421 return false;
422}
423
425 Register Src0,
426 Register Src1,
427 Register Src2) const {
428 assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
429 Register SrcReg = MI.getOperand(1).getReg();
430 if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))
431 return false;
432
433 return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&
435}
436
438 Register Src0,
439 Register Src1,
440 Register Src2) const {
441 // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
442 // sources.
443 Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);
444 Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);
445 Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);
446
447 LLT Ty = MRI.getType(Src0);
448 auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);
449 auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
450 auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);
451 Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
452 MI.eraseFromParent();
453}
454
457 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
458 assert(MI.getOpcode() == TargetOpcode::G_FMUL);
459 assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
460 assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
461
462 Register Dst = MI.getOperand(0).getReg();
463 LLT DestTy = MRI.getType(Dst);
464 LLT ScalarDestTy = DestTy.getScalarType();
465
466 if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
467 ScalarDestTy != LLT::float16()) ||
468 !MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))
469 return false;
470
471 Register SelectCondReg = Sel.getOperand(1).getReg();
472 MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
473 MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
474
475 const auto SelectTrueVal =
477 if (!SelectTrueVal)
478 return false;
479 const auto SelectFalseVal =
481 if (!SelectFalseVal)
482 return false;
483
484 if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
485 return false;
486
487 // For f32, only non-inline constants should be transformed.
488 if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
489 TII.isInlineConstant(*SelectFalseVal))
490 return false;
491
492 int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
493 if (SelectTrueLog2Val == INT_MIN)
494 return false;
495 int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
496 if (SelectFalseLog2Val == INT_MIN)
497 return false;
498
499 MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
500 LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
501 auto NewSel = Builder.buildSelect(
502 IntDestTy, SelectCondReg,
503 Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
504 Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
505
506 Register XReg = MI.getOperand(1).getReg();
507 if (SelectTrueVal->isNegative()) {
508 auto NegX =
509 Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
510 Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
511 } else {
512 Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
513 }
514 };
515
516 return true;
517}
518
521 if (!Res)
522 return false;
523
524 const uint64_t Val = Res->Value.getZExtValue();
525 unsigned MaskIdx = 0;
526 unsigned MaskLen = 0;
527 if (!isShiftedMask_64(Val, MaskIdx, MaskLen))
528 return false;
529
530 // Check if low 32 bits or high 32 bits are all ones.
531 return MaskLen >= 32 && ((MaskIdx == 0) || (MaskIdx == 64 - MaskLen));
532}
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_READONLY
Definition Compiler.h:322
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
IRTranslator LLVM IR MI
Contains matchers for matching SSA Machine Instructions.
Register Reg
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT, MachineDominatorTree *MDT, const LegalizerInfo *LI, const GCNSubtarget &STI)
bool matchConstantIs32BitMask(Register Reg) const
bool matchCombineFmulWithSelectToFldexp(MachineInstr &MI, MachineInstr &Sel, std::function< void(MachineIRBuilder &)> &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo) const
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5975
bool bitwiseIsEqual(const APFloat &RHS) const
Definition APFloat.h:1477
Class for arbitrary precision integers.
Definition APInt.h:78
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
const LegalizerInfo * LI
MachineDominatorTree * MDT
GISelValueTracking * VT
GISelChangeObserver & Observer
MachineIRBuilder & Builder
Abstract class that contains various methods for clients to notify about changes.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr LLT getScalarType() const
static constexpr LLT float32()
Get a 32-bit IEEE float value.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1584
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))