70#include "llvm/IR/IntrinsicsAMDGPU.h" 
   76#define DEBUG_TYPE "amdgpu-image-intrinsic-opt" 
   79class AMDGPUImageIntrinsicOptimizer : 
public FunctionPass {
 
   85  AMDGPUImageIntrinsicOptimizer(
const TargetMachine *TM = 
nullptr)
 
   94                "AMDGPU Image Intrinsic Optimizer", 
false, 
false)
 
   96char AMDGPUImageIntrinsicOptimizer::
ID = 0;
 
  101    const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) {
 
  104    if (IIList.front()->getIntrinsicID() != 
II->getIntrinsicID())
 
  108    if (IIList.front()->getType() != 
II->getType())
 
  112    bool AllEqual = 
true;
 
  113    assert(IIList.front()->arg_size() == 
II->arg_size());
 
  114    for (
int I = 1, 
E = 
II->arg_size(); AllEqual && 
I != 
E; ++
I) {
 
  115      Value *ArgList = IIList.front()->getArgOperand(
I);
 
  117      if (
I == ImageDimIntr->VAddrEnd - 1) {
 
  121        AllEqual = FragIdList->getValue().udiv(4) == FragId->getValue().udiv(4);
 
  124        AllEqual = ArgList == Arg;
 
  131    IIList.emplace_back(
II);
 
  136  MergeableInsts.emplace_back(1, 
II);
 
 
  145  for (; 
I != 
E; ++
I) {
 
  148    if (
I->mayHaveSideEffects()) {
 
  158      if (IntrinID != Intrinsic::amdgcn_image_load_2dmsaa &&
 
  159          IntrinID != Intrinsic::amdgcn_image_load_2darraymsaa)
 
  164      const uint8_t FragIdIndex = ImageDimIntr->VAddrEnd - 1;
 
 
  180  for (
const auto &IIList : MergeableInsts) {
 
  181    if (IIList.size() <= 1)
 
  190    Function *
F = IIList.front()->getCalledFunction();
 
  200    OverloadTys[0] = NewTy;
 
  204        IIList.front()->getArgOperand(ImageDimIntr->
DMaskIndex));
 
  206    unsigned NumElts = 
popcount(DMaskVal);
 
  210    unsigned NumLoads = IIList.size();
 
  211    unsigned NumMsaas = NumElts;
 
  212    unsigned NumVAddrLoads = 3 * NumLoads;
 
  213    unsigned NumVDataLoads = 
divideCeil(NumElts, isD16 ? 2 : 1) * NumLoads;
 
  214    unsigned NumVAddrMsaas = 3 * NumMsaas;
 
  215    unsigned NumVDataMsaas = 
divideCeil(4, isD16 ? 2 : 1) * NumMsaas;
 
  217    if (NumLoads < NumMsaas ||
 
  218        (NumVAddrLoads + NumVDataLoads < NumVAddrMsaas + NumVDataMsaas))
 
  224    const APInt &NewFragIdVal = FragId->getValue().
udiv(4) * 4;
 
  231    while (DMaskVal != 0) {
 
  235      if (IntrinID == Intrinsic::amdgcn_image_load_2dmsaa)
 
  236        NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2dmsaa;
 
  238        NewIntrinID = Intrinsic::amdgcn_image_msaa_load_2darraymsaa;
 
  241          ConstantInt::get(DMask->
getType(), NewMaskVal);
 
  242      Args[FragIdIndex] = ConstantInt::get(FragId->getType(), NewFragIdVal);
 
  243      CallInst *NewCall = 
B.CreateIntrinsic(NewIntrinID, OverloadTys, Args);
 
  247      DMaskVal -= NewMaskVal;
 
  251    for (
auto &
II : IIList) {
 
  252      Value *VecOp = 
nullptr;
 
  254      B.SetCurrentDebugLocation(
II->getDebugLoc());
 
  256        VecOp = 
B.CreateExtractElement(NewCalls[0], Idx->getValue().urem(4));
 
  260        for (
unsigned I = 0; 
I < NumElts; ++
I) {
 
  261          VecOp = 
B.CreateInsertElement(
 
  263              B.CreateExtractElement(NewCalls[
I], Idx->getValue().urem(4)), 
I);
 
  269      II->replaceAllUsesWith(VecOp);
 
  277  for (
auto *
I : InstrsToErase)
 
  278    I->eraseFromParent();
 
 
  296        return !
F.users().empty() &&
 
  297               (
F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2dmsaa ||
 
  298                F.getIntrinsicID() == Intrinsic::amdgcn_image_load_2darraymsaa);
 
 
  317bool AMDGPUImageIntrinsicOptimizer::runOnFunction(
Function &
F) {
 
  326  return new AMDGPUImageIntrinsicOptimizer(TM);
 
 
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
 
bool optimizeSection(ArrayRef< SmallVector< IntrinsicInst *, 4 > > MergeableInsts)
 
INITIALIZE_PASS(AMDGPUImageIntrinsicOptimizer, DEBUG_TYPE, "AMDGPU Image Intrinsic Optimizer", false, false) char AMDGPUImageIntrinsicOptimizer void addInstToMergeableList(IntrinsicInst *II, SmallVector< SmallVector< IntrinsicInst *, 4 > > &MergeableInsts, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr)
 
BasicBlock::iterator collectMergeableInsts(BasicBlock::iterator I, BasicBlock::iterator E, SmallVector< SmallVector< IntrinsicInst *, 4 > > &MergeableInsts)
 
static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM)
 
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
 
The AMDGPU TargetMachine interface definition for hw codegen targets.
 
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
 
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
 
static bool runOnFunction(Function &F, bool PostInlining)
 
uint64_t IntrinsicInst * II
 
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
 
Class for arbitrary precision integers.
 
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
 
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
 
InstListType::iterator iterator
Instruction iterators...
 
This class represents a function call, abstracting a target machine's calling convention.
 
This is the shared class of boolean and integer constants.
 
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
 
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
 
FunctionPass class - This class is used to implement most global optimizations.
 
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
 
A wrapper class for inspecting calls to intrinsic functions.
 
A Module instance is used to store all the information related to an LLVM module.
 
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
 
A set of analyses that are preserved following a run of a transformation pass.
 
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
 
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
 
void push_back(const T &Elt)
 
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
 
Primary interface to the complete machine description for the target machine.
 
The instances of the Type class are immutable: once they are created, they are never changed.
 
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
 
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
 
LLVM Value Representation.
 
Type * getType() const
All values are typed, get the type of this value.
 
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
 
bool isGFX11Plus(const MCSubtargetInfo &STI)
 
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
 
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
 
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
 
This is an optimization pass for GlobalISel generic memory operations.
 
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
 
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
 
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
 
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
 
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
 
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
 
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
 
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
 
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
 
FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)
 
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)