26#define DEBUG_TYPE "amdgpu-late-codegenprepare"
35 WidenLoads(
"amdgpu-late-codegenprepare-widen-constant-loads",
36 cl::desc(
"Widen sub-dword constant address space loads in "
37 "AMDGPULateCodeGenPrepare"),
42class AMDGPULateCodeGenPrepare
44 public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
57 return "AMDGPU IR late optimizations";
72 bool isDWORDAligned(
const Value *V)
const {
77 bool canWidenScalarExtLoad(
LoadInst &LI)
const;
83bool AMDGPULateCodeGenPrepare::doInitialization(
Module &M) {
89bool AMDGPULateCodeGenPrepare::runOnFunction(
Function &
F) {
93 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
94 UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
104bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(
LoadInst &LI)
const {
115 if (Ty->isAggregateType())
117 unsigned TySize =
DL->getTypeStoreSize(Ty);
128bool AMDGPULateCodeGenPrepare::visitLoadInst(
LoadInst &LI) {
137 if (!canWidenScalarExtLoad(LI))
145 if (!isDWORDAligned(
Base))
148 int64_t Adjust =
Offset & 0x3;
159 unsigned LdBits =
DL->getTypeStoreSizeInBits(LI.
getType());
162 auto *NewPtr = IRB.CreateConstGEP1_64(
167 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr,
Align(4));
169 NewLd->
setMetadata(LLVMContext::MD_range,
nullptr);
171 unsigned ShAmt = Adjust * 8;
172 auto *NewVal = IRB.CreateBitCast(
173 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), IntNTy), LI.
getType());
181 "AMDGPU IR late optimizations",
false,
false)
187char AMDGPULateCodeGenPrepare::
ID = 0;
190 return new AMDGPULateCodeGenPrepare();
aarch64 falkor hwpf fix late
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
AMDGPU IR late optimizations
Legalize the Machine IR a function s Machine IR
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
StringRef - Represent a constant reference to a string, i.e.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
FunctionPass * createAMDGPULateCodeGenPreparePass()
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.