#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNSubtarget.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <optional>
#include "AMDGPUGenSearchableTables.inc"

Macros
#define	DEBUG_TYPE "AMDGPUtti"
#define	GET_AMDGPUImageDMaskIntrinsicTable_IMPL

Functions
static APFloat	fmed3AMDGCN (const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static bool	canSafelyConvertTo16Bit (Value &V, bool IsFloat)
static Value *	convertTo16Bit (Value &V, InstCombiner::BuilderTy &Builder)
static std::optional< Instruction * >	modifyIntrinsicCall (IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
	Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.
static std::optional< Instruction * >	simplifyAMDGCNImageIntrinsic (const GCNSubtarget ST, const AMDGPU::ImageDimIntrinsicInfo ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static Value *	matchFPExtFromF16 (Value *Arg)
	Match an fpext from half to float, or a constant we can convert.
static APInt	trimTrailingZerosInVector (InstCombiner &IC, Value UseV, Instruction I)
static APInt	defaultComponentBroadcast (Value *V)
static Value *	simplifyAMDGCNMemoryIntrinsicDemanded (InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx, bool IsLoad)
	Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static bool	canContractSqrtToRsq (const FPMathOperator *SqrtOp)
	Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool	isTriviallyUniform (const Use &U)
	Return true if we can easily prove that use U is uniform.
static CallInst *	rewriteCall (IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE "AMDGPUtti"

Definition at line 29 of file AMDGPUInstCombineIntrinsic.cpp.

◆ GET_AMDGPUImageDMaskIntrinsicTable_IMPL

#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL

Definition at line 37 of file AMDGPUInstCombineIntrinsic.cpp.

Function Documentation

◆ canContractSqrtToRsq()

bool canContractSqrtToRsq ( const FPMathOperator * SqrtOp )

static

Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)

Definition at line 492 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::FPMathOperator::getFPAccuracy(), llvm::Value::getType(), llvm::FPMathOperator::hasApproxFunc(), llvm::Type::isFloatTy(), and llvm::Type::isHalfTy().

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ canSafelyConvertTo16Bit()

bool canSafelyConvertTo16Bit	(	Value &	V,
		bool	IsFloat )

static

Definition at line 67 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::dyn_cast(), llvm::APInt::getActiveBits(), llvm::Value::getType(), llvm::APFloatBase::IEEEhalf(), llvm::Type::isHalfTy(), llvm::Type::isIntegerTy(), llvm::PatternMatch::m_FPExt(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::m_ZExt(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmTowardZero.

Referenced by simplifyAMDGCNImageIntrinsic().

◆ convertTo16Bit()

Value * convertTo16Bit	(	Value &	V,
		InstCombiner::BuilderTy &	Builder )

static

Definition at line 105 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::cast(), llvm::Type::getHalfTy(), llvm::Type::getInt16Ty(), llvm::isa(), llvm::Type::isFloatingPointTy(), llvm::Type::isIntegerTy(), and llvm_unreachable.

◆ defaultComponentBroadcast()

APInt defaultComponentBroadcast ( Value * V )

static

Definition at line 458 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::cast(), llvm::APInt::clearBit(), llvm::dyn_cast(), llvm::findScalarElement(), llvm::APInt::getAllOnes(), I, llvm::isa(), and llvm::PoisonMaskElem.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ fmed3AMDGCN()

APFloat fmed3AMDGCN	(	const APFloat &	Src0,
		const APFloat &	Src1,
		const APFloat &	Src2 )

static

Definition at line 46 of file AMDGPUInstCombineIntrinsic.cpp.

References assert(), llvm::APFloatBase::cmpEqual, llvm::APFloatBase::cmpUnordered, llvm::APFloat::compare(), and llvm::maxnum().

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ isTriviallyUniform()

bool isTriviallyUniform ( const Use & U )

static

Return true if we can easily prove that use U is uniform.

Definition at line 499 of file AMDGPUInstCombineIntrinsic.cpp.

References A(), llvm::cast(), llvm::dyn_cast(), II, llvm::isa(), llvm::AMDGPU::isArgPassedInSGPR(), and llvm::AMDGPU::isIntrinsicAlwaysUniform().

Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand(), and llvm::GCNTTIImpl::instCombineIntrinsic().

◆ matchFPExtFromF16()

Value * matchFPExtFromF16 ( Value * Arg )

static

Match an fpext from half to float, or a constant we can convert.

Definition at line 414 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::APFloat::convert(), llvm::Value::getContext(), llvm::Type::getHalfTy(), llvm::ConstantFP::getValueAPF(), llvm::APFloatBase::IEEEhalf(), llvm::PatternMatch::m_ConstantFP(), llvm::PatternMatch::m_FPExt(), llvm::MIPatternMatch::m_OneUse(), llvm::PatternMatch::m_Value(), llvm::PatternMatch::match(), and llvm::APFloatBase::rmNearestTiesToEven.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ modifyIntrinsicCall()

std::optional< Instruction * > modifyIntrinsicCall	(	IntrinsicInst &	OldIntr,
		Instruction &	InstToReplace,
		unsigned	NewIntr,
		InstCombiner &	IC,
		std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)>	Func )

static

Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on OldIntr) and replaces InstToReplace with this newly created intrinsic call.

Definition at line 120 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::CallBase::args(), llvm::InstCombiner::Builder, llvm::Instruction::copyFastMathFlags(), llvm::Instruction::copyMetadata(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::InstCombiner::eraseInstFromFunction(), llvm::CallBase::getCalledFunction(), llvm::Intrinsic::getIntrinsicSignature(), llvm::Value::getType(), llvm::isa(), llvm::Type::isVoidTy(), llvm::InstCombiner::replaceInstUsesWith(), and llvm::Value::takeName().

Referenced by simplifyAMDGCNImageIntrinsic().

◆ rewriteCall()

CallInst * rewriteCall	(	IRBuilderBase &	B,
		CallInst &	Old,
		Function &	NewCallee,
		ArrayRef< Value * >	Ops )

static

Definition at line 546 of file AMDGPUInstCombineIntrinsic.cpp.

References AbstractManglingParser< Derived, Alloc >::Ops, B(), llvm::CallBase::getOperandBundlesAsDefs(), and llvm::Value::takeName().

Referenced by llvm::GCNTTIImpl::hoistLaneIntrinsicThroughOperand().

◆ simplifyAMDGCNImageIntrinsic()

std::optional< Instruction * > simplifyAMDGCNImageIntrinsic	(	const GCNSubtarget *	ST,
		const AMDGPU::ImageDimIntrinsicInfo *	ImageDimIntr,
		IntrinsicInst &	II,
		InstCombiner &	IC )

static

Definition at line 154 of file AMDGPUInstCombineIntrinsic.cpp.

Referenced by llvm::GCNTTIImpl::instCombineIntrinsic().

◆ simplifyAMDGCNMemoryIntrinsicDemanded()

Value * simplifyAMDGCNMemoryIntrinsicDemanded	(	InstCombiner &	IC,
		IntrinsicInst &	II,
		APInt	DemandedElts,
		int	DMaskIdx,
		bool	IsLoad )

static

Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.

The result of simplifying amdgcn image and buffer store intrinsics is updating definitions of the intrinsics vector argument, not Uses of the result like image and buffer loads. Note: This only supports non-TFE/LWE image intrinsic calls; those have struct returns.

Definition at line 1777 of file AMDGPUInstCombineIntrinsic.cpp.

References llvm::InstCombiner::Builder, llvm::cast(), llvm::Instruction::copyMetadata(), llvm::APInt::countr_zero(), llvm::IRBuilderBase::CreateAdd(), llvm::IRBuilderBase::CreateExtractElement(), llvm::IRBuilderBase::CreateInsertElement(), llvm::IRBuilderBase::CreateIntrinsic(), llvm::IRBuilderBase::CreateShuffleVector(), llvm::FixedVectorType::get(), llvm::PoisonValue::get(), llvm::APInt::getActiveBits(), llvm::InstCombiner::getDataLayout(), llvm::Intrinsic::getIntrinsicSignature(), llvm::Value::getType(), llvm::DataLayout::getTypeSizeInBits(), llvm::ConstantInt::getZExtValue(), II, llvm::APInt::isMask(), llvm::Offset, llvm::APInt::popcount(), llvm::popcount(), llvm::SmallVectorTemplateBase< T, bool >::push_back(), llvm::IRBuilderBase::SetInsertPoint(), and llvm::Value::takeName().