LLVM 22.0.0git
AMDGPULateCodeGenPrepare.cpp
Go to the documentation of this file.
1//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass does misc. AMDGPU optimizations on IR *just* before instruction
11/// selection.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "AMDGPUTargetMachine.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/InstVisitor.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
28
29#define DEBUG_TYPE "amdgpu-late-codegenprepare"
30
31using namespace llvm;
32
33// Scalar load widening needs running after load-store-vectorizer as that pass
34// doesn't handle overlapping cases. In addition, this pass enhances the
35// widening to handle cases where scalar sub-dword loads are naturally aligned
36// only but not dword aligned.
37static cl::opt<bool>
38 WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",
39 cl::desc("Widen sub-dword constant address space loads in "
40 "AMDGPULateCodeGenPrepare"),
42
43namespace {
44
45class AMDGPULateCodeGenPrepare
46 : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {
47 Function &F;
48 const DataLayout &DL;
49 const GCNSubtarget &ST;
50
51 AssumptionCache *const AC;
53
55
56public:
57 AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,
59 : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
60 bool run();
61 bool visitInstruction(Instruction &) { return false; }
62
63 // Check if the specified value is at least DWORD aligned.
64 bool isDWORDAligned(const Value *V) const {
65 KnownBits Known = computeKnownBits(V, DL, AC);
66 return Known.countMinTrailingZeros() >= 2;
67 }
68
69 bool canWidenScalarExtLoad(LoadInst &LI) const;
70 bool visitLoadInst(LoadInst &LI);
71};
72
74
75class LiveRegOptimizer {
76private:
77 Module &Mod;
78 const DataLayout &DL;
79 const GCNSubtarget &ST;
80
81 /// The scalar type to convert to
82 Type *const ConvertToScalar;
83 /// Map of Value -> Converted Value
84 ValueToValueMap ValMap;
85 /// Map of containing conversions from Optimal Type -> Original Type per BB.
86 DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;
87
88public:
89 /// Calculate the and \p return the type to convert to given a problematic \p
90 /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).
91 Type *calculateConvertType(Type *OriginalType);
92 /// Convert the virtual register defined by \p V to the compatible vector of
93 /// legal type
94 Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);
95 /// Convert the virtual register defined by \p V back to the original type \p
96 /// ConvertType, stripping away the MSBs in cases where there was an imperfect
97 /// fit (e.g. v2i32 -> v7i8)
98 Value *convertFromOptType(Type *ConvertType, Instruction *V,
100 BasicBlock *InsertBlock);
101 /// Check for problematic PHI nodes or cross-bb values based on the value
102 /// defined by \p I, and coerce to legal types if necessary. For problematic
103 /// PHI node, we coerce all incoming values in a single invocation.
104 bool optimizeLiveType(Instruction *I,
105 SmallVectorImpl<WeakTrackingVH> &DeadInsts);
106
107 // Whether or not the type should be replaced to avoid inefficient
108 // legalization code
109 bool shouldReplace(Type *ITy) {
110 FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy);
111 if (!VTy)
112 return false;
113
114 const auto *TLI = ST.getTargetLowering();
115
116 Type *EltTy = VTy->getElementType();
117 // If the element size is not less than the convert to scalar size, then we
118 // can't do any bit packing
119 if (!EltTy->isIntegerTy() ||
120 EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())
121 return false;
122
123 // Only coerce illegal types
125 TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false));
126 return LK.first != TargetLoweringBase::TypeLegal;
127 }
128
129 bool isOpLegal(Instruction *I) { return isa<StoreInst, IntrinsicInst>(I); }
130
131 bool isCoercionProfitable(Instruction *II) {
132 SmallPtrSet<Instruction *, 4> CVisited;
133 SmallVector<Instruction *, 4> UserList;
134
135 // Check users for profitable conditions (across block user which can
136 // natively handle the illegal vector).
137 for (User *V : II->users())
138 if (auto *UseInst = dyn_cast<Instruction>(V))
139 UserList.push_back(UseInst);
140
141 auto IsLookThru = [](Instruction *II) {
142 if (const auto *Intr = dyn_cast<IntrinsicInst>(II))
143 return Intr->getIntrinsicID() == Intrinsic::amdgcn_perm;
144 return isa<PHINode, ShuffleVectorInst, InsertElementInst,
145 ExtractElementInst, CastInst>(II);
146 };
147
148 while (!UserList.empty()) {
149 auto CII = UserList.pop_back_val();
150 if (!CVisited.insert(CII).second)
151 continue;
152
153 // Same-BB filter must look at the *user*; and allow non-lookthrough
154 // users when the def is a PHI (loop-header pattern).
155 if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
157 continue;
158
159 if (isOpLegal(CII))
160 return true;
161
162 if (IsLookThru(CII))
163 for (User *V : CII->users())
164 if (auto *UseInst = dyn_cast<Instruction>(V))
165 UserList.push_back(UseInst);
166 }
167 return false;
168 }
169
170 LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)
171 : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),
172 ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}
173};
174
175} // end anonymous namespace
176
177bool AMDGPULateCodeGenPrepare::run() {
178 // "Optimize" the virtual regs that cross basic block boundaries. When
179 // building the SelectionDAG, vectors of illegal types that cross basic blocks
180 // will be scalarized and widened, with each scalar living in its
181 // own register. To work around this, this optimization converts the
182 // vectors to equivalent vectors of legal type (which are converted back
183 // before uses in subsequent blocks), to pack the bits into fewer physical
184 // registers (used in CopyToReg/CopyFromReg pairs).
185 LiveRegOptimizer LRO(*F.getParent(), ST);
186
187 bool Changed = false;
188
189 bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();
190
191 for (auto &BB : reverse(F))
192 for (Instruction &I : make_early_inc_range(reverse(BB))) {
193 Changed |= !HasScalarSubwordLoads && visit(I);
194 Changed |= LRO.optimizeLiveType(&I, DeadInsts);
195 }
196
198 return Changed;
199}
200
201Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {
202 assert(OriginalType->getScalarSizeInBits() <=
203 ConvertToScalar->getScalarSizeInBits());
204
205 FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);
206
207 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
208 TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);
209 unsigned ConvertEltCount =
210 (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;
211
212 if (OriginalSize <= ConvertScalarSize)
213 return IntegerType::get(Mod.getContext(), ConvertScalarSize);
214
215 return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),
216 ConvertEltCount, false);
217}
218
219Value *LiveRegOptimizer::convertToOptType(Instruction *V,
220 BasicBlock::iterator &InsertPt) {
221 FixedVectorType *VTy = cast<FixedVectorType>(V->getType());
222 Type *NewTy = calculateConvertType(V->getType());
223
224 TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);
225 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
226
227 IRBuilder<> Builder(V->getParent(), InsertPt);
228 // If there is a bitsize match, we can fit the old vector into a new vector of
229 // desired type.
230 if (OriginalSize == NewSize)
231 return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");
232
233 // If there is a bitsize mismatch, we must use a wider vector.
234 assert(NewSize > OriginalSize);
235 uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();
236
237 SmallVector<int, 8> ShuffleMask;
238 uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();
239 for (unsigned I = 0; I < OriginalElementCount; I++)
240 ShuffleMask.push_back(I);
241
242 for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)
243 ShuffleMask.push_back(OriginalElementCount);
244
245 Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);
246 return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");
247}
248
249Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,
250 BasicBlock::iterator &InsertPt,
251 BasicBlock *InsertBB) {
252 FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);
253
254 TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());
255 TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);
256
257 IRBuilder<> Builder(InsertBB, InsertPt);
258 // If there is a bitsize match, we simply convert back to the original type.
259 if (OriginalSize == NewSize)
260 return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");
261
262 // If there is a bitsize mismatch, then we must have used a wider value to
263 // hold the bits.
264 assert(OriginalSize > NewSize);
265 // For wide scalars, we can just truncate the value.
266 if (!V->getType()->isVectorTy()) {
268 Builder.CreateTrunc(V, IntegerType::get(Mod.getContext(), NewSize)));
269 return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy));
270 }
271
272 // For wider vectors, we must strip the MSBs to convert back to the original
273 // type.
274 VectorType *ExpandedVT = VectorType::get(
275 Type::getIntNTy(Mod.getContext(), NewVTy->getScalarSizeInBits()),
276 (OriginalSize / NewVTy->getScalarSizeInBits()), false);
277 Instruction *Converted =
278 cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT));
279
280 unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();
281 SmallVector<int, 8> ShuffleMask(NarrowElementCount);
282 std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
283
284 return Builder.CreateShuffleVector(Converted, ShuffleMask);
285}
286
287bool LiveRegOptimizer::optimizeLiveType(
288 Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
289 SmallVector<Instruction *, 4> Worklist;
290 SmallPtrSet<PHINode *, 4> PhiNodes;
291 SmallPtrSet<Instruction *, 4> Defs;
292 SmallPtrSet<Instruction *, 4> Uses;
293 SmallPtrSet<Instruction *, 4> Visited;
294
295 Worklist.push_back(cast<Instruction>(I));
296 while (!Worklist.empty()) {
297 Instruction *II = Worklist.pop_back_val();
298
299 if (!Visited.insert(II).second)
300 continue;
301
302 if (!shouldReplace(II->getType()))
303 continue;
304
305 if (!isCoercionProfitable(II))
306 continue;
307
308 if (PHINode *Phi = dyn_cast<PHINode>(II)) {
309 PhiNodes.insert(Phi);
310 // Collect all the incoming values of problematic PHI nodes.
311 for (Value *V : Phi->incoming_values()) {
312 // Repeat the collection process for newly found PHI nodes.
313 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
314 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
315 Worklist.push_back(OpPhi);
316 continue;
317 }
318
320 // Other incoming value types (e.g. vector literals) are unhandled
321 if (!IncInst && !isa<ConstantAggregateZero>(V))
322 return false;
323
324 // Collect all other incoming values for coercion.
325 if (IncInst)
326 Defs.insert(IncInst);
327 }
328 }
329
330 // Collect all relevant uses.
331 for (User *V : II->users()) {
332 // Repeat the collection process for problematic PHI nodes.
333 if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {
334 if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))
335 Worklist.push_back(OpPhi);
336 continue;
337 }
338
339 Instruction *UseInst = cast<Instruction>(V);
340 // Collect all uses of PHINodes and any use the crosses BB boundaries.
341 if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) {
342 Uses.insert(UseInst);
343 if (!isa<PHINode>(II))
344 Defs.insert(II);
345 }
346 }
347 }
348
349 // Coerce and track the defs.
350 for (Instruction *D : Defs) {
351 if (!ValMap.contains(D)) {
352 BasicBlock::iterator InsertPt = std::next(D->getIterator());
353 Value *ConvertVal = convertToOptType(D, InsertPt);
354 assert(ConvertVal);
355 ValMap[D] = ConvertVal;
356 }
357 }
358
359 // Construct new-typed PHI nodes.
360 for (PHINode *Phi : PhiNodes) {
361 ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()),
362 Phi->getNumIncomingValues(),
363 Phi->getName() + ".tc", Phi->getIterator());
364 }
365
366 // Connect all the PHI nodes with their new incoming values.
367 for (PHINode *Phi : PhiNodes) {
368 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
369 bool MissingIncVal = false;
370 for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {
371 Value *IncVal = Phi->getIncomingValue(I);
372 if (isa<ConstantAggregateZero>(IncVal)) {
373 Type *NewType = calculateConvertType(Phi->getType());
374 NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),
375 Phi->getIncomingBlock(I));
376 } else if (Value *Val = ValMap.lookup(IncVal))
377 NewPhi->addIncoming(Val, Phi->getIncomingBlock(I));
378 else
379 MissingIncVal = true;
380 }
381 if (MissingIncVal) {
382 Value *DeadVal = ValMap[Phi];
383 // The coercion chain of the PHI is broken. Delete the Phi
384 // from the ValMap and any connected / user Phis.
385 SmallVector<Value *, 4> PHIWorklist;
386 SmallPtrSet<Value *, 4> VisitedPhis;
387 PHIWorklist.push_back(DeadVal);
388 while (!PHIWorklist.empty()) {
389 Value *NextDeadValue = PHIWorklist.pop_back_val();
390 VisitedPhis.insert(NextDeadValue);
391 auto OriginalPhi =
392 llvm::find_if(PhiNodes, [this, &NextDeadValue](PHINode *CandPhi) {
393 return ValMap[CandPhi] == NextDeadValue;
394 });
395 // This PHI may have already been removed from maps when
396 // unwinding a previous Phi
397 if (OriginalPhi != PhiNodes.end())
398 ValMap.erase(*OriginalPhi);
399
400 DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));
401
402 for (User *U : NextDeadValue->users()) {
403 if (!VisitedPhis.contains(cast<PHINode>(U)))
404 PHIWorklist.push_back(U);
405 }
406 }
407 } else {
408 DeadInsts.emplace_back(cast<Instruction>(Phi));
409 }
410 }
411 // Coerce back to the original type and replace the uses.
412 for (Instruction *U : Uses) {
413 // Replace all converted operands for a use.
414 for (auto [OpIdx, Op] : enumerate(U->operands())) {
415 if (Value *Val = ValMap.lookup(Op)) {
416 Value *NewVal = nullptr;
417 if (BBUseValMap.contains(U->getParent()) &&
418 BBUseValMap[U->getParent()].contains(Val))
419 NewVal = BBUseValMap[U->getParent()][Val];
420 else {
421 BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();
422 // We may pick up ops that were previously converted for users in
423 // other blocks. If there is an originally typed definition of the Op
424 // already in this block, simply reuse it.
426 U->getParent() == cast<Instruction>(Op)->getParent()) {
427 NewVal = Op;
428 } else {
429 NewVal =
430 convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),
431 InsertPt, U->getParent());
432 BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;
433 }
434 }
435 assert(NewVal);
436 U->setOperand(OpIdx, NewVal);
437 }
438 }
439 }
440
441 return true;
442}
443
444bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
445 unsigned AS = LI.getPointerAddressSpace();
446 // Skip non-constant address space.
447 if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
449 return false;
450 // Skip non-simple loads.
451 if (!LI.isSimple())
452 return false;
453 Type *Ty = LI.getType();
454 // Skip aggregate types.
455 if (Ty->isAggregateType())
456 return false;
457 unsigned TySize = DL.getTypeStoreSize(Ty);
458 // Only handle sub-DWORD loads.
459 if (TySize >= 4)
460 return false;
461 // That load must be at least naturally aligned.
462 if (LI.getAlign() < DL.getABITypeAlign(Ty))
463 return false;
464 // It should be uniform, i.e. a scalar load.
465 return UA.isUniform(&LI);
466}
467
468bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
469 if (!WidenLoads)
470 return false;
471
472 // Skip if that load is already aligned on DWORD at least as it's handled in
473 // SDAG.
474 if (LI.getAlign() >= 4)
475 return false;
476
477 if (!canWidenScalarExtLoad(LI))
478 return false;
479
480 int64_t Offset = 0;
481 auto *Base =
483 // If that base is not DWORD aligned, it's not safe to perform the following
484 // transforms.
485 if (!isDWORDAligned(Base))
486 return false;
487
488 int64_t Adjust = Offset & 0x3;
489 if (Adjust == 0) {
490 // With a zero adjust, the original alignment could be promoted with a
491 // better one.
492 LI.setAlignment(Align(4));
493 return true;
494 }
495
496 IRBuilder<> IRB(&LI);
497 IRB.SetCurrentDebugLocation(LI.getDebugLoc());
498
499 unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());
500 auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
501
502 auto *NewPtr = IRB.CreateConstGEP1_64(
503 IRB.getInt8Ty(),
504 IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
505 Offset - Adjust);
506
507 LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
508 NewLd->copyMetadata(LI);
509 NewLd->setMetadata(LLVMContext::MD_range, nullptr);
510
511 unsigned ShAmt = Adjust * 8;
512 Value *NewVal = IRB.CreateBitCast(
513 IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),
514 DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy
515 : LI.getType()),
516 LI.getType());
517 LI.replaceAllUsesWith(NewVal);
518 DeadInsts.emplace_back(&LI);
519
520 return true;
521}
522
523PreservedAnalyses
525 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
526 AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);
527 UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);
528
529 bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
530
531 if (!Changed)
532 return PreservedAnalyses::all();
535 return PA;
536}
537
539public:
540 static char ID;
541
543
544 StringRef getPassName() const override {
545 return "AMDGPU IR late optimizations";
546 }
547
548 void getAnalysisUsage(AnalysisUsage &AU) const override {
552 // Invalidates UniformityInfo
553 AU.setPreservesCFG();
554 }
555
556 bool runOnFunction(Function &F) override;
557};
558
560 if (skipFunction(F))
561 return false;
562
564 const TargetMachine &TM = TPC.getTM<TargetMachine>();
565 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
566
567 AssumptionCache &AC =
568 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
569 UniformityInfo &UI =
570 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
571
572 return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();
573}
574
576 "AMDGPU IR late optimizations", false, false)
581 "AMDGPU IR late optimizations", false, false)
582
584
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
Remove Loads Into Fake Uses
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Target-Independent Code Generator Pass Configuration Options pass.
LLVM IR instance of the generic uniformity analysis.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:194
bool erase(const KeyT &Val)
Definition DenseMap.h:311
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:158
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
FunctionPass(char &pid)
Definition Pass.h:316
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition Pass.cpp:188
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:319
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TMC & getTM() const
Get the right type of TargetMachine for this target.
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition Type.h:304
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
Changed
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2452
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
DWARFExpression::Operation Op
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition Local.cpp:548
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
DenseMap< const Value *, Value * > ValueToValueMap
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:242