13#include "llvm/IR/IntrinsicsAMDGPU.h"
17using namespace MIPatternMatch;
21 switch (
MI.getOpcode()) {
27 case AMDGPU::G_FMINNUM:
28 case AMDGPU::G_FMAXNUM:
29 case AMDGPU::G_FMINNUM_IEEE:
30 case AMDGPU::G_FMAXNUM_IEEE:
33 case AMDGPU::G_INTRINSIC_TRUNC:
34 case AMDGPU::G_FPTRUNC:
36 case AMDGPU::G_FNEARBYINT:
37 case AMDGPU::G_INTRINSIC_ROUND:
38 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
39 case AMDGPU::G_FCANONICALIZE:
40 case AMDGPU::G_AMDGPU_RCP_IFLAG:
41 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
42 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
44 case AMDGPU::G_INTRINSIC: {
45 unsigned IntrinsicID =
MI.getIntrinsicID();
46 switch (IntrinsicID) {
47 case Intrinsic::amdgcn_rcp:
48 case Intrinsic::amdgcn_rcp_legacy:
49 case Intrinsic::amdgcn_sin:
50 case Intrinsic::amdgcn_fmul_legacy:
51 case Intrinsic::amdgcn_fmed3:
52 case Intrinsic::amdgcn_fma_legacy:
69 return MI.getNumOperands() >
70 (
MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
77 if (!
MI.memoperands().empty())
80 switch (
MI.getOpcode()) {
82 case AMDGPU::G_SELECT:
85 case TargetOpcode::INLINEASM:
86 case TargetOpcode::INLINEASM_BR:
87 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
88 case AMDGPU::G_BITCAST:
89 case AMDGPU::G_ANYEXT:
90 case AMDGPU::G_BUILD_VECTOR:
91 case AMDGPU::G_BUILD_VECTOR_TRUNC:
94 case AMDGPU::G_INTRINSIC: {
95 unsigned IntrinsicID =
MI.getIntrinsicID();
96 switch (IntrinsicID) {
97 case Intrinsic::amdgcn_interp_p1:
98 case Intrinsic::amdgcn_interp_p2:
99 case Intrinsic::amdgcn_interp_mov:
100 case Intrinsic::amdgcn_interp_p1_f16:
101 case Intrinsic::amdgcn_interp_p2_f16:
102 case Intrinsic::amdgcn_div_scale:
120 unsigned NumMayIncreaseSize = 0;
136 return Options.NoSignedZerosFPMath ||
MI.getFlag(MachineInstr::MIFlag::FmNsz);
140 static const APFloat KF16(APFloat::IEEEhalf(),
APInt(16, 0x3118));
141 static const APFloat KF32(APFloat::IEEEsingle(),
APInt(32, 0x3e22f983));
142 static const APFloat KF64(APFloat::IEEEdouble(),
143 APInt(64, 0x3fc45f306dc9c882));
153 std::optional<FPValueAndVReg> FPValReg;
155 if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
159 if (ST.hasInv2PiInlineImm() &&
isInv2Pi(FPValReg->Value))
167 case AMDGPU::G_FMAXNUM:
168 return AMDGPU::G_FMINNUM;
169 case AMDGPU::G_FMINNUM:
170 return AMDGPU::G_FMAXNUM;
171 case AMDGPU::G_FMAXNUM_IEEE:
172 return AMDGPU::G_FMINNUM_IEEE;
173 case AMDGPU::G_FMINNUM_IEEE:
174 return AMDGPU::G_FMAXNUM_IEEE;
175 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
176 return AMDGPU::G_AMDGPU_FMIN_LEGACY;
177 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
178 return AMDGPU::G_AMDGPU_FMAX_LEGACY;
204 case AMDGPU::G_FMINNUM:
205 case AMDGPU::G_FMAXNUM:
206 case AMDGPU::G_FMINNUM_IEEE:
207 case AMDGPU::G_FMAXNUM_IEEE:
208 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
209 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
219 case AMDGPU::G_FPEXT:
220 case AMDGPU::G_INTRINSIC_TRUNC:
221 case AMDGPU::G_FPTRUNC:
222 case AMDGPU::G_FRINT:
223 case AMDGPU::G_FNEARBYINT:
224 case AMDGPU::G_INTRINSIC_ROUND:
225 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
227 case AMDGPU::G_FCANONICALIZE:
228 case AMDGPU::G_AMDGPU_RCP_IFLAG:
230 case AMDGPU::G_INTRINSIC: {
232 switch (IntrinsicID) {
233 case Intrinsic::amdgcn_rcp:
234 case Intrinsic::amdgcn_rcp_legacy:
235 case Intrinsic::amdgcn_sin:
236 case Intrinsic::amdgcn_fmul_legacy:
237 case Intrinsic::amdgcn_fmed3:
239 case Intrinsic::amdgcn_fma_legacy:
300 case AMDGPU::G_FMINNUM:
301 case AMDGPU::G_FMAXNUM:
302 case AMDGPU::G_FMINNUM_IEEE:
303 case AMDGPU::G_FMAXNUM_IEEE:
304 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
305 case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
317 case AMDGPU::G_FPEXT:
318 case AMDGPU::G_INTRINSIC_TRUNC:
319 case AMDGPU::G_FRINT:
320 case AMDGPU::G_FNEARBYINT:
321 case AMDGPU::G_INTRINSIC_ROUND:
322 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
324 case AMDGPU::G_FCANONICALIZE:
325 case AMDGPU::G_AMDGPU_RCP_IFLAG:
326 case AMDGPU::G_FPTRUNC:
329 case AMDGPU::G_INTRINSIC: {
331 switch (IntrinsicID) {
332 case Intrinsic::amdgcn_rcp:
333 case Intrinsic::amdgcn_rcp_legacy:
334 case Intrinsic::amdgcn_sin:
337 case Intrinsic::amdgcn_fmul_legacy:
340 case Intrinsic::amdgcn_fmed3:
345 case Intrinsic::amdgcn_fma_legacy:
381 MI.eraseFromParent();
389 if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
390 Register SrcReg = Def->getOperand(1).getReg();
394 if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
395 APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
396 bool LosesInfo =
true;
397 Val.
convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
408 assert(
MI.getOpcode() == TargetOpcode::G_FPTRUNC);
434 MI.eraseFromParent();
unsigned const MachineRegisterInfo * MRI
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI, Register Reg)
static bool mayIgnoreSignedZero(MachineInstr &MI)
static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg, MachineRegisterInfo &MRI)
static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned CostThreshold=4)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoMI(const MachineInstr &MI)
This contains common combine transformations that may be used in a combine pass.
Provides AMDGPU specific target descriptions.
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Contains matchers for matching SSA Machine Instructions.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0, Register Src1, Register Src2)
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
Class for arbitrary precision integers.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
MachineRegisterInfo & MRI
MachineIRBuilder & Builder
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildFMinNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildFMaxNumIEEE(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
unsigned getIntrinsicID() const
Returns the Intrinsic::ID for this instruction.
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Wrapper class representing virtual and physical registers.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
operand_type_match m_Reg()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
This is an optimization pass for GlobalISel generic memory operations.