LLVM 18.0.0git
AMDGPULateCodeGenPrepare.cpp
Go to the documentation of this file.
1//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11/// selection.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/InstVisitor.h"
25
26#define DEBUG_TYPE "amdgpu-late-codegenprepare"
27
28using namespace llvm;
29
30// Scalar load widening needs running after load-store-vectorizer as that pass
31// doesn't handle overlapping cases. In addition, this pass enhances the
32// widening to handle cases where scalar sub-dword loads are naturally aligned
33// only but not dword aligned.
34static cl::opt<bool>
35 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
36 cl::desc("Widen sub-dword constant address space loads in "
37 "AMDGPULateCodeGenPrepare"),
39
40namespace {
41
42class AMDGPULateCodeGenPrepare
43 : public FunctionPass,
44 public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
45 Module *Mod = nullptr;
46 const DataLayout *DL = nullptr;
47
48 AssumptionCache *AC = nullptr;
49 UniformityInfo *UA = nullptr;
50
51public:
52 static char ID;
53
54 AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
55
56 StringRef getPassName() const override {
57 return "AMDGPU IR late optimizations";
58 }
59
60 void getAnalysisUsage(AnalysisUsage &AU) const override {
63 AU.setPreservesAll();
64 }
65
66 bool doInitialization(Module &M) override;
67 bool runOnFunction(Function &F) override;
68
69 bool visitInstruction(Instruction &) { return false; }
70
71 // Check if the specified value is at least DWORD aligned.
72 bool isDWORDAligned(const Value *V) const {
73 KnownBits Known = computeKnownBits(V, *DL, 0, AC);
74 return Known.countMinTrailingZeros() >= 2;
75 }
76
77 bool canWidenScalarExtLoad(LoadInst &LI) const;
78 bool visitLoadInst(LoadInst &LI);
79};
80
81} // end anonymous namespace
82
83bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
84 Mod = &M;
85 DL = &Mod->getDataLayout();
86 return false;
87}
88
89bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
90 if (skipFunction(F))
91 return false;
92
93 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
94 UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
95
96 bool Changed = false;
97 for (auto &BB : F)
99 Changed |= visit(I);
100
101 return Changed;
102}
103
104bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
105 unsigned AS = LI.getPointerAddressSpace();
106 // Skip non-constant address space.
107 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
109 return false;
110 // Skip non-simple loads.
111 if (!LI.isSimple())
112 return false;
113 auto *Ty = LI.getType();
114 // Skip aggregate types.
115 if (Ty->isAggregateType())
116 return false;
117 unsigned TySize = DL->getTypeStoreSize(Ty);
118 // Only handle sub-DWORD loads.
119 if (TySize >= 4)
120 return false;
121 // That load must be at least naturally aligned.
122 if (LI.getAlign() < DL->getABITypeAlign(Ty))
123 return false;
124 // It should be uniform, i.e. a scalar load.
125 return UA->isUniform(&LI);
126}
127
128bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
129 if (!WidenLoads)
130 return false;
131
132 // Skip if that load is already aligned on DWORD at least as it's handled in
133 // SDAG.
134 if (LI.getAlign() >= 4)
135 return false;
136
137 if (!canWidenScalarExtLoad(LI))
138 return false;
139
140 int64_t Offset = 0;
141 auto *Base =
143 // If that base is not DWORD aligned, it's not safe to perform the following
144 // transforms.
145 if (!isDWORDAligned(Base))
146 return false;
147
148 int64_t Adjust = Offset & 0x3;
149 if (Adjust == 0) {
150 // With a zero adjust, the original alignment could be promoted with a
151 // better one.
152 LI.setAlignment(Align(4));
153 return true;
154 }
155
156 IRBuilder<> IRB(&LI);
157 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
158
159 unsigned LdBits = DL->getTypeStoreSizeInBits(LI.getType());
160 auto IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
161
162 auto *NewPtr = IRB.CreateConstGEP1_64(
163 IRB.getInt8Ty(),
164 IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
165 Offset - Adjust);
166
167 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
168 NewLd->copyMetadata(LI);
169 NewLd->setMetadata(LLVMContext::MD_range, nullptr);
170
171 unsigned ShAmt = Adjust * 8;
172 auto *NewVal = IRB.CreateBitCast(
173 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt), IntNTy), LI.getType());
174 LI.replaceAllUsesWith(NewVal);
176
177 return true;
178}
179
180INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
181 "AMDGPU IR late optimizations", false, false)
184INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
186
187char AMDGPULateCodeGenPrepare::ID = 0;
188
190 return new AMDGPULateCodeGenPrepare();
191}
aarch64 falkor hwpf fix late
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
#define DEBUG_TYPE
AMDGPU IR late optimizations
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:81
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
LLVM IR instance of the generic uniformity analysis.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2625
Base class for instruction visitors.
Definition: InstVisitor.h:78
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:280
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:392
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1521
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
An instruction for reading from memory.
Definition: Instructions.h:177
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:270
void setAlignment(Align Align)
Definition: Instructions.h:224
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:254
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Legacy analysis pass which computes a CycleInfo.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPU.h:398
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:394
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:529
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
FunctionPass * createAMDGPULateCodeGenPreparePass()
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:233