LLVM 22.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
44};
45
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
49#include "AMDGPUAttributes.def"
50};
51
52// We do not need to note the x workitem or workgroup id because they are always
53// initialized.
54//
55// TODO: We should not add the attributes if the known compile time workgroup
56// size is 1 for y/z.
58intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
59 bool HasApertureRegs, bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
61 switch (ID) {
62 case Intrinsic::amdgcn_workitem_id_x:
63 NonKernelOnly = true;
64 return WORKITEM_ID_X;
65 case Intrinsic::amdgcn_workgroup_id_x:
66 NonKernelOnly = true;
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
70 return WORKITEM_ID_Y;
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
73 return WORKITEM_ID_Z;
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_cluster_id_x:
81 NonKernelOnly = true;
82 return CLUSTER_ID_X;
83 case Intrinsic::amdgcn_cluster_id_y:
84 return CLUSTER_ID_Y;
85 case Intrinsic::amdgcn_cluster_id_z:
86 return CLUSTER_ID_Z;
87 case Intrinsic::amdgcn_lds_kernel_id:
88 return LDS_KERNEL_ID;
89 case Intrinsic::amdgcn_dispatch_ptr:
90 return DISPATCH_PTR;
91 case Intrinsic::amdgcn_dispatch_id:
92 return DISPATCH_ID;
93 case Intrinsic::amdgcn_implicitarg_ptr:
94 return IMPLICIT_ARG_PTR;
95 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
96 // queue_ptr.
97 case Intrinsic::amdgcn_queue_ptr:
98 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
99 return QUEUE_PTR;
100 case Intrinsic::amdgcn_is_shared:
101 case Intrinsic::amdgcn_is_private:
102 if (HasApertureRegs)
103 return NOT_IMPLICIT_INPUT;
104 // Under V5, we need implicitarg_ptr + offsets to access private_base or
105 // shared_base. For pre-V5, however, need to access them through queue_ptr +
106 // offsets.
107 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
108 : QUEUE_PTR;
109 case Intrinsic::trap:
110 case Intrinsic::debugtrap:
111 case Intrinsic::ubsantrap:
112 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
113 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
114 : QUEUE_PTR;
115 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
116 return QUEUE_PTR;
117 default:
118 return NOT_IMPLICIT_INPUT;
119 }
120}
121
122static bool castRequiresQueuePtr(unsigned SrcAS) {
123 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
124}
125
126static bool isDSAddress(const Constant *C) {
128 if (!GV)
129 return false;
130 unsigned AS = GV->getAddressSpace();
132}
133
134/// Returns true if sanitizer attributes are present on a function.
135static bool hasSanitizerAttributes(const Function &F) {
136 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
137 F.hasFnAttribute(Attribute::SanitizeThread) ||
138 F.hasFnAttribute(Attribute::SanitizeMemory) ||
139 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
140 F.hasFnAttribute(Attribute::SanitizeMemTag);
141}
142
143namespace {
144class AMDGPUInformationCache : public InformationCache {
145public:
146 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
148 SetVector<Function *> *CGSCC, TargetMachine &TM)
149 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
150 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
151
152 TargetMachine &TM;
153
154 enum ConstantStatus : uint8_t {
155 NONE = 0,
156 DS_GLOBAL = 1 << 0,
157 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
158 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
159 ADDR_SPACE_CAST_BOTH_TO_FLAT =
160 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
161 };
162
163 /// Check if the subtarget has aperture regs.
164 bool hasApertureRegs(Function &F) {
165 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
166 return ST.hasApertureRegs();
167 }
168
169 /// Check if the subtarget supports GetDoorbellID.
170 bool supportsGetDoorbellID(Function &F) {
171 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
172 return ST.supportsGetDoorbellID();
173 }
174
175 std::optional<std::pair<unsigned, unsigned>>
176 getFlatWorkGroupSizeAttr(const Function &F) const {
177 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
178 if (!R)
179 return std::nullopt;
180 return std::make_pair(R->first, *(R->second));
181 }
182
183 std::pair<unsigned, unsigned>
184 getDefaultFlatWorkGroupSize(const Function &F) const {
185 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
186 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
187 }
188
189 std::pair<unsigned, unsigned>
190 getMaximumFlatWorkGroupRange(const Function &F) {
191 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
192 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
193 }
194
195 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
196 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
197 return ST.getMaxNumWorkGroups(F);
198 }
199
200 /// Get code object version.
201 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
202
203 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
204 /// accounting for the interaction with the passed value to use for
205 /// "amdgpu-flat-work-group-size".
206 std::pair<unsigned, unsigned>
207 getWavesPerEU(const Function &F,
208 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
209 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
210 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
211 }
212
213 std::optional<std::pair<unsigned, unsigned>>
214 getWavesPerEUAttr(const Function &F) {
215 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
216 /*OnlyFirstRequired=*/true);
217 if (!Val)
218 return std::nullopt;
219 if (!Val->second) {
220 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
221 Val->second = ST.getMaxWavesPerEU();
222 }
223 return std::make_pair(Val->first, *(Val->second));
224 }
225
226 unsigned getMaxWavesPerEU(const Function &F) {
227 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
228 return ST.getMaxWavesPerEU();
229 }
230
231 unsigned getMaxAddrSpace() const override {
233 }
234
235private:
236 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
237 /// local to flat. These casts may require the queue pointer.
238 static uint8_t visitConstExpr(const ConstantExpr *CE) {
239 uint8_t Status = NONE;
240
241 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
242 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
243 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
244 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
245 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
246 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
247 }
248
249 return Status;
250 }
251
252 /// Returns the minimum amount of LDS space used by a workgroup running
253 /// function \p F.
254 static unsigned getLDSSize(const Function &F) {
255 return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
256 {0, UINT32_MAX}, true)
257 .first;
258 }
259
260 /// Get the constant access bitmap for \p C.
261 uint8_t getConstantAccess(const Constant *C,
262 SmallPtrSetImpl<const Constant *> &Visited) {
263 auto It = ConstantStatus.find(C);
264 if (It != ConstantStatus.end())
265 return It->second;
266
267 uint8_t Result = 0;
268 if (isDSAddress(C))
269 Result = DS_GLOBAL;
270
271 if (const auto *CE = dyn_cast<ConstantExpr>(C))
272 Result |= visitConstExpr(CE);
273
274 for (const Use &U : C->operands()) {
275 const auto *OpC = dyn_cast<Constant>(U);
276 if (!OpC || !Visited.insert(OpC).second)
277 continue;
278
279 Result |= getConstantAccess(OpC, Visited);
280 }
281 return Result;
282 }
283
284public:
285 /// Returns true if \p Fn needs the queue pointer because of \p C.
286 bool needsQueuePtr(const Constant *C, Function &Fn) {
287 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
288 bool HasAperture = hasApertureRegs(Fn);
289
290 // No need to explore the constants.
291 if (!IsNonEntryFunc && HasAperture)
292 return false;
293
294 SmallPtrSet<const Constant *, 8> Visited;
295 uint8_t Access = getConstantAccess(C, Visited);
296
297 // We need to trap on DS globals in non-entry functions.
298 if (IsNonEntryFunc && (Access & DS_GLOBAL))
299 return true;
300
301 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
302 }
303
304 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
305 SmallPtrSet<const Constant *, 8> Visited;
306 uint8_t Access = getConstantAccess(C, Visited);
307 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
308 }
309
310private:
311 /// Used to determine if the Constant needs the queue pointer.
312 DenseMap<const Constant *, uint8_t> ConstantStatus;
313 const unsigned CodeObjectVersion;
314};
315
316struct AAAMDAttributes
317 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
318 AbstractAttribute> {
319 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
320 AbstractAttribute>;
321
322 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
323
324 /// Create an abstract attribute view for the position \p IRP.
325 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
326 Attributor &A);
327
328 /// See AbstractAttribute::getName().
329 StringRef getName() const override { return "AAAMDAttributes"; }
330
331 /// See AbstractAttribute::getIdAddr().
332 const char *getIdAddr() const override { return &ID; }
333
334 /// This function should return true if the type of the \p AA is
335 /// AAAMDAttributes.
336 static bool classof(const AbstractAttribute *AA) {
337 return (AA->getIdAddr() == &ID);
338 }
339
340 /// Unique ID (due to the unique address)
341 static const char ID;
342};
343const char AAAMDAttributes::ID = 0;
344
345struct AAUniformWorkGroupSize
346 : public StateWrapper<BooleanState, AbstractAttribute> {
347 using Base = StateWrapper<BooleanState, AbstractAttribute>;
348 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
349
350 /// Create an abstract attribute view for the position \p IRP.
351 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
352 Attributor &A);
353
354 /// See AbstractAttribute::getName().
355 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
356
357 /// See AbstractAttribute::getIdAddr().
358 const char *getIdAddr() const override { return &ID; }
359
360 /// This function should return true if the type of the \p AA is
361 /// AAAMDAttributes.
362 static bool classof(const AbstractAttribute *AA) {
363 return (AA->getIdAddr() == &ID);
364 }
365
366 /// Unique ID (due to the unique address)
367 static const char ID;
368};
369const char AAUniformWorkGroupSize::ID = 0;
370
371struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
372 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
373 : AAUniformWorkGroupSize(IRP, A) {}
374
375 void initialize(Attributor &A) override {
376 Function *F = getAssociatedFunction();
377 CallingConv::ID CC = F->getCallingConv();
378
379 if (CC != CallingConv::AMDGPU_KERNEL)
380 return;
381
382 bool InitialValue = false;
383 if (F->hasFnAttribute("uniform-work-group-size"))
384 InitialValue =
385 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
386 "true";
387
388 if (InitialValue)
389 indicateOptimisticFixpoint();
390 else
391 indicatePessimisticFixpoint();
392 }
393
394 ChangeStatus updateImpl(Attributor &A) override {
395 ChangeStatus Change = ChangeStatus::UNCHANGED;
396
397 auto CheckCallSite = [&](AbstractCallSite CS) {
398 Function *Caller = CS.getInstruction()->getFunction();
399 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
400 << "->" << getAssociatedFunction()->getName() << "\n");
401
402 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
403 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
404 if (!CallerInfo || !CallerInfo->isValidState())
405 return false;
406
407 Change = Change | clampStateAndIndicateChange(this->getState(),
408 CallerInfo->getState());
409
410 return true;
411 };
412
413 bool AllCallSitesKnown = true;
414 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
415 return indicatePessimisticFixpoint();
416
417 return Change;
418 }
419
420 ChangeStatus manifest(Attributor &A) override {
422 LLVMContext &Ctx = getAssociatedFunction()->getContext();
423
424 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
425 getAssumed() ? "true" : "false"));
426 return A.manifestAttrs(getIRPosition(), AttrList,
427 /* ForceReplace */ true);
428 }
429
430 bool isValidState() const override {
431 // This state is always valid, even when the state is false.
432 return true;
433 }
434
435 const std::string getAsStr(Attributor *) const override {
436 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
437 }
438
439 /// See AbstractAttribute::trackStatistics()
440 void trackStatistics() const override {}
441};
442
443AAUniformWorkGroupSize &
444AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
445 Attributor &A) {
447 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
449 "AAUniformWorkGroupSize is only valid for function position");
450}
451
452struct AAAMDAttributesFunction : public AAAMDAttributes {
453 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
454 : AAAMDAttributes(IRP, A) {}
455
456 void initialize(Attributor &A) override {
457 Function *F = getAssociatedFunction();
458
459 // If the function requires the implicit arg pointer due to sanitizers,
460 // assume it's needed even if explicitly marked as not requiring it.
461 // Flat scratch initialization is needed because `asan_malloc_impl`
462 // calls introduced later in pipeline will have flat scratch accesses.
463 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
464 // implementation for `asan_malloc_impl` is updated.
465 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
466 if (HasSanitizerAttrs) {
467 removeAssumedBits(IMPLICIT_ARG_PTR);
468 removeAssumedBits(HOSTCALL_PTR);
469 removeAssumedBits(FLAT_SCRATCH_INIT);
470 }
471
472 for (auto Attr : ImplicitAttrs) {
473 if (HasSanitizerAttrs &&
474 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
475 Attr.first == FLAT_SCRATCH_INIT))
476 continue;
477
478 if (F->hasFnAttribute(Attr.second))
479 addKnownBits(Attr.first);
480 }
481
482 if (F->isDeclaration())
483 return;
484
485 // Ignore functions with graphics calling conventions, these are currently
486 // not allowed to have kernel arguments.
487 if (AMDGPU::isGraphics(F->getCallingConv())) {
488 indicatePessimisticFixpoint();
489 return;
490 }
491 }
492
493 ChangeStatus updateImpl(Attributor &A) override {
494 Function *F = getAssociatedFunction();
495 // The current assumed state used to determine a change.
496 auto OrigAssumed = getAssumed();
497
498 // Check for Intrinsics and propagate attributes.
499 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
500 *this, this->getIRPosition(), DepClassTy::REQUIRED);
501 if (!AAEdges || !AAEdges->isValidState() ||
502 AAEdges->hasNonAsmUnknownCallee())
503 return indicatePessimisticFixpoint();
504
505 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
506
507 bool NeedsImplicit = false;
508 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
509 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
510 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
511 unsigned COV = InfoCache.getCodeObjectVersion();
512
513 for (Function *Callee : AAEdges->getOptimisticEdges()) {
514 Intrinsic::ID IID = Callee->getIntrinsicID();
515 if (IID == Intrinsic::not_intrinsic) {
516 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
517 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
518 if (!AAAMD || !AAAMD->isValidState())
519 return indicatePessimisticFixpoint();
520 *this &= *AAAMD;
521 continue;
522 }
523
524 bool NonKernelOnly = false;
525 ImplicitArgumentMask AttrMask =
526 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
527 HasApertureRegs, SupportsGetDoorbellID, COV);
528 if (AttrMask != NOT_IMPLICIT_INPUT) {
529 if ((IsNonEntryFunc || !NonKernelOnly))
530 removeAssumedBits(AttrMask);
531 }
532 }
533
534 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
535 if (NeedsImplicit)
536 removeAssumedBits(IMPLICIT_ARG_PTR);
537
538 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
539 // Under V5, we need implicitarg_ptr + offsets to access private_base or
540 // shared_base. We do not actually need queue_ptr.
541 if (COV >= 5)
542 removeAssumedBits(IMPLICIT_ARG_PTR);
543 else
544 removeAssumedBits(QUEUE_PTR);
545 }
546
547 if (funcRetrievesMultigridSyncArg(A, COV)) {
548 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
549 "multigrid_sync_arg needs implicitarg_ptr");
550 removeAssumedBits(MULTIGRID_SYNC_ARG);
551 }
552
553 if (funcRetrievesHostcallPtr(A, COV)) {
554 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
555 removeAssumedBits(HOSTCALL_PTR);
556 }
557
558 if (funcRetrievesHeapPtr(A, COV)) {
559 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
560 removeAssumedBits(HEAP_PTR);
561 }
562
563 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
564 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
565 removeAssumedBits(QUEUE_PTR);
566 }
567
568 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
569 removeAssumedBits(LDS_KERNEL_ID);
570 }
571
572 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
573 removeAssumedBits(DEFAULT_QUEUE);
574
575 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
576 removeAssumedBits(COMPLETION_ACTION);
577
578 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
579 removeAssumedBits(FLAT_SCRATCH_INIT);
580
581 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
582 : ChangeStatus::UNCHANGED;
583 }
584
585 ChangeStatus manifest(Attributor &A) override {
587 LLVMContext &Ctx = getAssociatedFunction()->getContext();
588
589 for (auto Attr : ImplicitAttrs) {
590 if (isKnown(Attr.first))
591 AttrList.push_back(Attribute::get(Ctx, Attr.second));
592 }
593
594 return A.manifestAttrs(getIRPosition(), AttrList,
595 /* ForceReplace */ true);
596 }
597
598 const std::string getAsStr(Attributor *) const override {
599 std::string Str;
600 raw_string_ostream OS(Str);
601 OS << "AMDInfo[";
602 for (auto Attr : ImplicitAttrs)
603 if (isAssumed(Attr.first))
604 OS << ' ' << Attr.second;
605 OS << " ]";
606 return OS.str();
607 }
608
609 /// See AbstractAttribute::trackStatistics()
610 void trackStatistics() const override {}
611
612private:
613 bool checkForQueuePtr(Attributor &A) {
614 Function *F = getAssociatedFunction();
615 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
616
617 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
618
619 bool NeedsQueuePtr = false;
620
621 auto CheckAddrSpaceCasts = [&](Instruction &I) {
622 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
623 if (castRequiresQueuePtr(SrcAS)) {
624 NeedsQueuePtr = true;
625 return false;
626 }
627 return true;
628 };
629
630 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
631
632 // `checkForAllInstructions` is much more cheaper than going through all
633 // instructions, try it first.
634
635 // The queue pointer is not needed if aperture regs is present.
636 if (!HasApertureRegs) {
637 bool UsedAssumedInformation = false;
638 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
639 {Instruction::AddrSpaceCast},
640 UsedAssumedInformation);
641 }
642
643 // If we found that we need the queue pointer, nothing else to do.
644 if (NeedsQueuePtr)
645 return true;
646
647 if (!IsNonEntryFunc && HasApertureRegs)
648 return false;
649
650 for (BasicBlock &BB : *F) {
651 for (Instruction &I : BB) {
652 for (const Use &U : I.operands()) {
653 if (const auto *C = dyn_cast<Constant>(U)) {
654 if (InfoCache.needsQueuePtr(C, *F))
655 return true;
656 }
657 }
658 }
659 }
660
661 return false;
662 }
663
664 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
666 AA::RangeTy Range(Pos, 8);
667 return funcRetrievesImplicitKernelArg(A, Range);
668 }
669
670 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
672 AA::RangeTy Range(Pos, 8);
673 return funcRetrievesImplicitKernelArg(A, Range);
674 }
675
676 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
678 AA::RangeTy Range(Pos, 8);
679 return funcRetrievesImplicitKernelArg(A, Range);
680 }
681
682 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
684 AA::RangeTy Range(Pos, 8);
685 return funcRetrievesImplicitKernelArg(A, Range);
686 }
687
688 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
689 if (COV < 5)
690 return false;
692 return funcRetrievesImplicitKernelArg(A, Range);
693 }
694
695 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
696 if (COV < 5)
697 return false;
699 return funcRetrievesImplicitKernelArg(A, Range);
700 }
701
702 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
703 // Check if this is a call to the implicitarg_ptr builtin and it
704 // is used to retrieve the hostcall pointer. The implicit arg for
705 // hostcall is not used only if every use of the implicitarg_ptr
706 // is a load that clearly does not retrieve any byte of the
707 // hostcall pointer. We check this by tracing all the uses of the
708 // initial call to the implicitarg_ptr intrinsic.
709 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
710 auto &Call = cast<CallBase>(I);
711 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
712 return true;
713
714 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
715 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
716 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
717 return false;
718
719 return PointerInfoAA->forallInterferingAccesses(
720 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
721 return Acc.getRemoteInst()->isDroppable();
722 });
723 };
724
725 bool UsedAssumedInformation = false;
726 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
727 UsedAssumedInformation);
728 }
729
730 bool funcRetrievesLDSKernelId(Attributor &A) {
731 auto DoesNotRetrieve = [&](Instruction &I) {
732 auto &Call = cast<CallBase>(I);
733 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
734 };
735 bool UsedAssumedInformation = false;
736 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
737 UsedAssumedInformation);
738 }
739
740 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
741 // not to be set.
742 bool needFlatScratchInit(Attributor &A) {
743 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
744
745 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
746 // there is a cast from PRIVATE_ADDRESS.
747 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
748 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
750 };
751
752 bool UsedAssumedInformation = false;
753 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
754 {Instruction::AddrSpaceCast},
755 UsedAssumedInformation))
756 return true;
757
758 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
759 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
760
761 Function *F = getAssociatedFunction();
762 for (Instruction &I : instructions(F)) {
763 for (const Use &U : I.operands()) {
764 if (const auto *C = dyn_cast<Constant>(U)) {
765 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
766 return true;
767 }
768 }
769 }
770
771 // Finally check callees.
772
773 // This is called on each callee; false means callee shouldn't have
774 // no-flat-scratch-init.
775 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
776 const auto &CB = cast<CallBase>(I);
777 const Function *Callee = CB.getCalledFunction();
778
779 // Callee == 0 for inline asm or indirect call with known callees.
780 // In the latter case, updateImpl() already checked the callees and we
781 // know their FLAT_SCRATCH_INIT bit is set.
782 // If function has indirect call with unknown callees, the bit is
783 // already removed in updateImpl() and execution won't reach here.
784 if (!Callee)
785 return true;
786
787 return Callee->getIntrinsicID() !=
788 Intrinsic::amdgcn_addrspacecast_nonnull;
789 };
790
791 UsedAssumedInformation = false;
792 // If any callee is false (i.e. need FlatScratchInit),
793 // checkForAllCallLikeInstructions returns false, in which case this
794 // function returns true.
795 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
796 UsedAssumedInformation);
797 }
798};
799
800AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
801 Attributor &A) {
803 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
804 llvm_unreachable("AAAMDAttributes is only valid for function position");
805}
806
807/// Base class to derive different size ranges.
808struct AAAMDSizeRangeAttribute
809 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
810 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
811
812 StringRef AttrName;
813
814 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
815 StringRef AttrName)
816 : Base(IRP, 32), AttrName(AttrName) {}
817
818 /// See AbstractAttribute::trackStatistics()
819 void trackStatistics() const override {}
820
821 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
822 ChangeStatus Change = ChangeStatus::UNCHANGED;
823
824 auto CheckCallSite = [&](AbstractCallSite CS) {
825 Function *Caller = CS.getInstruction()->getFunction();
826 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
827 << "->" << getAssociatedFunction()->getName() << '\n');
828
829 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
830 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
831 if (!CallerInfo || !CallerInfo->isValidState())
832 return false;
833
834 Change |=
835 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
836
837 return true;
838 };
839
840 bool AllCallSitesKnown = true;
841 if (!A.checkForAllCallSites(CheckCallSite, *this,
842 /*RequireAllCallSites=*/true,
843 AllCallSitesKnown))
844 return indicatePessimisticFixpoint();
845
846 return Change;
847 }
848
849 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
850 /// attribute if it is not same as default.
852 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
853 std::pair<unsigned, unsigned> Default) {
854 auto [Min, Max] = Default;
855 unsigned Lower = getAssumed().getLower().getZExtValue();
856 unsigned Upper = getAssumed().getUpper().getZExtValue();
857
858 // Clamp the range to the default value.
859 if (Lower < Min)
860 Lower = Min;
861 if (Upper > Max + 1)
862 Upper = Max + 1;
863
864 // No manifest if the value is invalid or same as default after clamp.
865 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
866 return ChangeStatus::UNCHANGED;
867
868 Function *F = getAssociatedFunction();
869 LLVMContext &Ctx = F->getContext();
870 SmallString<10> Buffer;
871 raw_svector_ostream OS(Buffer);
872 OS << Lower << ',' << Upper - 1;
873 return A.manifestAttrs(getIRPosition(),
874 {Attribute::get(Ctx, AttrName, OS.str())},
875 /*ForceReplace=*/true);
876 }
877
878 const std::string getAsStr(Attributor *) const override {
879 std::string Str;
880 raw_string_ostream OS(Str);
881 OS << getName() << '[';
882 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
883 OS << ']';
884 return OS.str();
885 }
886};
887
888/// Propagate amdgpu-flat-work-group-size attribute.
889struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
890 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
891 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
892
893 void initialize(Attributor &A) override {
894 Function *F = getAssociatedFunction();
895 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
896
897 bool HasAttr = false;
898 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
899 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
900
901 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
902 // We only consider an attribute that is not max range because the front
903 // end always emits the attribute, unfortunately, and sometimes it emits
904 // the max range.
905 if (*Attr != MaxRange) {
906 Range = *Attr;
907 HasAttr = true;
908 }
909 }
910
911 // We don't want to directly clamp the state if it's the max range because
912 // that is basically the worst state.
913 if (Range == MaxRange)
914 return;
915
916 auto [Min, Max] = Range;
917 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
918 IntegerRangeState IRS(CR);
919 clampStateAndIndicateChange(this->getState(), IRS);
920
921 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
922 indicateOptimisticFixpoint();
923 }
924
925 ChangeStatus updateImpl(Attributor &A) override {
926 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
927 }
928
929 /// Create an abstract attribute view for the position \p IRP.
930 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
931 Attributor &A);
932
933 ChangeStatus manifest(Attributor &A) override {
934 Function *F = getAssociatedFunction();
935 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
936 return emitAttributeIfNotDefaultAfterClamp(
937 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
938 }
939
940 /// See AbstractAttribute::getName()
941 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
942
943 /// See AbstractAttribute::getIdAddr()
944 const char *getIdAddr() const override { return &ID; }
945
946 /// This function should return true if the type of the \p AA is
947 /// AAAMDFlatWorkGroupSize
948 static bool classof(const AbstractAttribute *AA) {
949 return (AA->getIdAddr() == &ID);
950 }
951
952 /// Unique ID (due to the unique address)
953 static const char ID;
954};
955
956const char AAAMDFlatWorkGroupSize::ID = 0;
957
958AAAMDFlatWorkGroupSize &
959AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
960 Attributor &A) {
962 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
964 "AAAMDFlatWorkGroupSize is only valid for function position");
965}
966
967struct TupleDecIntegerRangeState : public AbstractState {
968 DecIntegerState<uint32_t> X, Y, Z;
969
970 bool isValidState() const override {
971 return X.isValidState() && Y.isValidState() && Z.isValidState();
972 }
973
974 bool isAtFixpoint() const override {
975 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
976 }
977
978 ChangeStatus indicateOptimisticFixpoint() override {
979 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
980 Z.indicateOptimisticFixpoint();
981 }
982
983 ChangeStatus indicatePessimisticFixpoint() override {
984 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
985 Z.indicatePessimisticFixpoint();
986 }
987
988 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
989 X ^= Other.X;
990 Y ^= Other.Y;
991 Z ^= Other.Z;
992 return *this;
993 }
994
995 bool operator==(const TupleDecIntegerRangeState &Other) const {
996 return X == Other.X && Y == Other.Y && Z == Other.Z;
997 }
998
999 TupleDecIntegerRangeState &getAssumed() { return *this; }
1000 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1001};
1002
1003using AAAMDMaxNumWorkgroupsState =
1004 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1005
1006/// Propagate amdgpu-max-num-workgroups attribute.
1007struct AAAMDMaxNumWorkgroups
1008 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1009 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1010
1011 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1012
1013 void initialize(Attributor &A) override {
1014 Function *F = getAssociatedFunction();
1015 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1016
1017 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1018
1019 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1020 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1021 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1022
1023 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1024 indicatePessimisticFixpoint();
1025 }
1026
1027 ChangeStatus updateImpl(Attributor &A) override {
1028 ChangeStatus Change = ChangeStatus::UNCHANGED;
1029
1030 auto CheckCallSite = [&](AbstractCallSite CS) {
1031 Function *Caller = CS.getInstruction()->getFunction();
1032 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1033 << "->" << getAssociatedFunction()->getName() << '\n');
1034
1035 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1036 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1037 if (!CallerInfo || !CallerInfo->isValidState())
1038 return false;
1039
1040 Change |=
1041 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1042 return true;
1043 };
1044
1045 bool AllCallSitesKnown = true;
1046 if (!A.checkForAllCallSites(CheckCallSite, *this,
1047 /*RequireAllCallSites=*/true,
1048 AllCallSitesKnown))
1049 return indicatePessimisticFixpoint();
1050
1051 return Change;
1052 }
1053
1054 /// Create an abstract attribute view for the position \p IRP.
1055 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1056 Attributor &A);
1057
1058 ChangeStatus manifest(Attributor &A) override {
1059 Function *F = getAssociatedFunction();
1060 LLVMContext &Ctx = F->getContext();
1061 SmallString<32> Buffer;
1062 raw_svector_ostream OS(Buffer);
1063 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1064
1065 // TODO: Should annotate loads of the group size for this to do anything
1066 // useful.
1067 return A.manifestAttrs(
1068 getIRPosition(),
1069 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1070 /* ForceReplace= */ true);
1071 }
1072
1073 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1074
1075 const std::string getAsStr(Attributor *) const override {
1076 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1077 raw_string_ostream OS(Buffer);
1078 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1079 << ']';
1080 return OS.str();
1081 }
1082
1083 const char *getIdAddr() const override { return &ID; }
1084
1085 /// This function should return true if the type of the \p AA is
1086 /// AAAMDMaxNumWorkgroups
1087 static bool classof(const AbstractAttribute *AA) {
1088 return (AA->getIdAddr() == &ID);
1089 }
1090
1091 void trackStatistics() const override {}
1092
1093 /// Unique ID (due to the unique address)
1094 static const char ID;
1095};
1096
1097const char AAAMDMaxNumWorkgroups::ID = 0;
1098
1099AAAMDMaxNumWorkgroups &
1100AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1102 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1103 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1104}
1105
1106/// Propagate amdgpu-waves-per-eu attribute.
1107struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1108 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1109 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1110
1111 void initialize(Attributor &A) override {
1112 Function *F = getAssociatedFunction();
1113 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1114
1115 // If the attribute exists, we will honor it if it is not the default.
1116 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1117 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1118 1U, InfoCache.getMaxWavesPerEU(*F)};
1119 if (*Attr != MaxWavesPerEURange) {
1120 auto [Min, Max] = *Attr;
1121 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1122 IntegerRangeState RangeState(Range);
1123 this->getState() = RangeState;
1124 indicateOptimisticFixpoint();
1125 return;
1126 }
1127 }
1128
1129 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1130 indicatePessimisticFixpoint();
1131 }
1132
1133 ChangeStatus updateImpl(Attributor &A) override {
1134 ChangeStatus Change = ChangeStatus::UNCHANGED;
1135
1136 auto CheckCallSite = [&](AbstractCallSite CS) {
1137 Function *Caller = CS.getInstruction()->getFunction();
1138 Function *Func = getAssociatedFunction();
1139 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1140 << "->" << Func->getName() << '\n');
1141 (void)Func;
1142
1143 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1144 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1145 if (!CallerAA || !CallerAA->isValidState())
1146 return false;
1147
1148 ConstantRange Assumed = getAssumed();
1149 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1150 CallerAA->getAssumed().getLower().getZExtValue());
1151 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1152 CallerAA->getAssumed().getUpper().getZExtValue());
1153 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1154 IntegerRangeState RangeState(Range);
1155 getState() = RangeState;
1156 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1157 : ChangeStatus::CHANGED;
1158
1159 return true;
1160 };
1161
1162 bool AllCallSitesKnown = true;
1163 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1164 return indicatePessimisticFixpoint();
1165
1166 return Change;
1167 }
1168
1169 /// Create an abstract attribute view for the position \p IRP.
1170 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1171 Attributor &A);
1172
1173 ChangeStatus manifest(Attributor &A) override {
1174 Function *F = getAssociatedFunction();
1175 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1176 return emitAttributeIfNotDefaultAfterClamp(
1177 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1178 }
1179
1180 /// See AbstractAttribute::getName()
1181 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1182
1183 /// See AbstractAttribute::getIdAddr()
1184 const char *getIdAddr() const override { return &ID; }
1185
1186 /// This function should return true if the type of the \p AA is
1187 /// AAAMDWavesPerEU
1188 static bool classof(const AbstractAttribute *AA) {
1189 return (AA->getIdAddr() == &ID);
1190 }
1191
1192 /// Unique ID (due to the unique address)
1193 static const char ID;
1194};
1195
1196const char AAAMDWavesPerEU::ID = 0;
1197
1198AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1199 Attributor &A) {
1201 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1202 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1203}
1204
1205/// Compute the minimum number of AGPRs required to allocate the inline asm.
1206static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1207 const CallBase &Call) {
1208 unsigned ArgNo = 0;
1209 unsigned ResNo = 0;
1210 unsigned AGPRDefCount = 0;
1211 unsigned AGPRUseCount = 0;
1212 unsigned MaxPhysReg = 0;
1213 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1214
1215 // TODO: Overestimates due to not accounting for tied operands
1216 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1217 Type *Ty = nullptr;
1218 switch (CI.Type) {
1219 case InlineAsm::isOutput: {
1220 Ty = Call.getType();
1221 if (auto *STy = dyn_cast<StructType>(Ty))
1222 Ty = STy->getElementType(ResNo);
1223 ++ResNo;
1224 break;
1225 }
1226 case InlineAsm::isInput: {
1227 Ty = Call.getArgOperand(ArgNo++)->getType();
1228 break;
1229 }
1230 case InlineAsm::isLabel:
1231 continue;
1233 // Parse the physical register reference.
1234 break;
1235 }
1236
1237 for (StringRef Code : CI.Codes) {
1238 unsigned RegCount = 0;
1239 if (Code.starts_with("a")) {
1240 // Virtual register, compute number of registers based on the type.
1241 //
1242 // We ought to be going through TargetLowering to get the number of
1243 // registers, but we should avoid the dependence on CodeGen here.
1244 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1245 } else {
1246 // Physical register reference
1247 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1248 if (Kind == 'a') {
1249 RegCount = NumRegs;
1250 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1251 }
1252
1253 continue;
1254 }
1255
1256 if (CI.Type == InlineAsm::isOutput) {
1257 // Apply tuple alignment requirement
1258 //
1259 // TODO: This is more conservative than necessary.
1260 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1261
1262 AGPRDefCount += RegCount;
1263 if (CI.isEarlyClobber) {
1264 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1265 AGPRUseCount += RegCount;
1266 }
1267 } else {
1268 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1269 AGPRUseCount += RegCount;
1270 }
1271 }
1272 }
1273
1274 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1275
1276 // TODO: This is overly conservative. If there are any physical registers,
1277 // allocate any virtual registers after them so we don't have to solve optimal
1278 // packing.
1279 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1280}
1281
1282struct AAAMDGPUMinAGPRAlloc
1283 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1284 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1285 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1286
1287 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1288 Attributor &A) {
1290 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1292 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1293 }
1294
1295 void initialize(Attributor &A) override {
1296 Function *F = getAssociatedFunction();
1297 auto [MinNumAGPR, MaxNumAGPR] =
1298 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1299 /*OnlyFirstRequired=*/true);
1300 if (MinNumAGPR == 0)
1301 indicateOptimisticFixpoint();
1302 }
1303
1304 const std::string getAsStr(Attributor *A) const override {
1305 std::string Str = "amdgpu-agpr-alloc=";
1306 raw_string_ostream OS(Str);
1307 OS << getAssumed();
1308 return OS.str();
1309 }
1310
1311 void trackStatistics() const override {}
1312
1313 ChangeStatus updateImpl(Attributor &A) override {
1314 DecIntegerState<> Maximum;
1315
1316 // Check for cases which require allocation of AGPRs. The only cases where
1317 // AGPRs are required are if there are direct references to AGPRs, so inline
1318 // assembly and special intrinsics.
1319 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1320 const auto &CB = cast<CallBase>(I);
1321 const Value *CalleeOp = CB.getCalledOperand();
1322
1323 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1324 // Technically, the inline asm could be invoking a call to an unknown
1325 // external function that requires AGPRs, but ignore that.
1326 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1327 Maximum.takeAssumedMaximum(NumRegs);
1328 return true;
1329 }
1330
1331 switch (CB.getIntrinsicID()) {
1333 break;
1334 case Intrinsic::write_register:
1335 case Intrinsic::read_register:
1336 case Intrinsic::read_volatile_register: {
1337 const MDString *RegName = cast<MDString>(
1339 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1340 ->getOperand(0));
1341 auto [Kind, RegIdx, NumRegs] =
1343 if (Kind == 'a')
1344 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1345
1346 return true;
1347 }
1348 default:
1349 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1350 // required to use AGPRs.
1351 return true;
1352 }
1353
1354 // TODO: Handle callsite attributes
1355 auto *CBEdges = A.getAAFor<AACallEdges>(
1356 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1357 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1359 return false;
1360 }
1361
1362 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1363 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1364 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1365 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1367 return false;
1368 }
1369
1370 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1371 }
1372
1373 return true;
1374 };
1375
1376 bool UsedAssumedInformation = false;
1377 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1378 UsedAssumedInformation))
1379 return indicatePessimisticFixpoint();
1380
1381 return clampStateAndIndicateChange(getState(), Maximum);
1382 }
1383
1384 ChangeStatus manifest(Attributor &A) override {
1385 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1386 SmallString<4> Buffer;
1387 raw_svector_ostream OS(Buffer);
1388 OS << getAssumed();
1389
1390 return A.manifestAttrs(
1391 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1392 }
1393
1394 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1395 const char *getIdAddr() const override { return &ID; }
1396
1397 /// This function should return true if the type of the \p AA is
1398 /// AAAMDGPUMinAGPRAllocs
1399 static bool classof(const AbstractAttribute *AA) {
1400 return (AA->getIdAddr() == &ID);
1401 }
1402
1403 static const char ID;
1404};
1405
1406const char AAAMDGPUMinAGPRAlloc::ID = 0;
1407
1408/// An abstract attribute to propagate the function attribute
1409/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1410struct AAAMDGPUClusterDims
1411 : public StateWrapper<BooleanState, AbstractAttribute> {
1412 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1413 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1414
1415 /// Create an abstract attribute view for the position \p IRP.
1416 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1417 Attributor &A);
1418
1419 /// See AbstractAttribute::getName().
1420 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1421
1422 /// See AbstractAttribute::getIdAddr().
1423 const char *getIdAddr() const override { return &ID; }
1424
1425 /// This function should return true if the type of the \p AA is
1426 /// AAAMDGPUClusterDims.
1427 static bool classof(const AbstractAttribute *AA) {
1428 return AA->getIdAddr() == &ID;
1429 }
1430
1431 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1432
1433 /// Unique ID (due to the unique address)
1434 static const char ID;
1435};
1436
1437const char AAAMDGPUClusterDims::ID = 0;
1438
1439struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1440 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1441 : AAAMDGPUClusterDims(IRP, A) {}
1442
1443 void initialize(Attributor &A) override {
1444 Function *F = getAssociatedFunction();
1445 assert(F && "empty associated function");
1446
1448
1449 // No matter what a kernel function has, it is final.
1450 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1451 if (Attr.isUnknown())
1452 indicatePessimisticFixpoint();
1453 else
1454 indicateOptimisticFixpoint();
1455 }
1456 }
1457
1458 const std::string getAsStr(Attributor *A) const override {
1459 if (!getAssumed() || Attr.isUnknown())
1460 return "unknown";
1461 if (Attr.isNoCluster())
1462 return "no";
1463 if (Attr.isVariableDims())
1464 return "variable";
1465 return Attr.to_string();
1466 }
1467
1468 void trackStatistics() const override {}
1469
1470 ChangeStatus updateImpl(Attributor &A) override {
1471 auto OldState = Attr;
1472
1473 auto CheckCallSite = [&](AbstractCallSite CS) {
1474 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1475 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1476 DepClassTy::REQUIRED);
1477 if (!CallerAA || !CallerAA->isValidState())
1478 return false;
1479
1480 return merge(CallerAA->getClusterDims());
1481 };
1482
1483 bool UsedAssumedInformation = false;
1484 if (!A.checkForAllCallSites(CheckCallSite, *this,
1485 /*RequireAllCallSites=*/true,
1486 UsedAssumedInformation))
1487 return indicatePessimisticFixpoint();
1488
1489 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1490 }
1491
1492 ChangeStatus manifest(Attributor &A) override {
1493 if (Attr.isUnknown())
1494 return ChangeStatus::UNCHANGED;
1495 return A.manifestAttrs(
1496 getIRPosition(),
1497 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1498 Attr.to_string())},
1499 /*ForceReplace=*/true);
1500 }
1501
1502 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1503 return Attr;
1504 }
1505
1506private:
1507 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1508 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1509 // propagation.
1510 if (Attr.isUnknown() && Other.isUnknown())
1511 return true;
1512
1513 // Case 2: The other is determined, but we are unknown yet, we simply take
1514 // the other's value.
1515 if (Attr.isUnknown()) {
1516 Attr = Other;
1517 return true;
1518 }
1519
1520 // Case 3: We are determined but the other is unknown yet, we simply keep
1521 // everything unchanged.
1522 if (Other.isUnknown())
1523 return true;
1524
1525 // After this point, both are determined.
1526
1527 // Case 4: If they are same, we do nothing.
1528 if (Attr == Other)
1529 return true;
1530
1531 // Now they are not same.
1532
1533 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1534 // would hold), then it is unknown whether cluster will be used, and the
1535 // state is final, unlike case 1.
1536 if (Attr.isNoCluster() || Other.isNoCluster()) {
1537 Attr.setUnknown();
1538 return false;
1539 }
1540
1541 // Case 6: Both of us use cluster, but the dims are different, so the result
1542 // is, cluster is used, but we just don't have a fixed dims.
1543 Attr.setVariableDims();
1544 return true;
1545 }
1546
1547 AMDGPU::ClusterDimsAttr Attr;
1548
1549 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1550};
1551
1552AAAMDGPUClusterDims &
1553AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1555 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1556 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1557}
1558
1559static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1560 AMDGPUAttributorOptions Options,
1561 ThinOrFullLTOPhase LTOPhase) {
1562 SetVector<Function *> Functions;
1563 for (Function &F : M) {
1564 if (!F.isIntrinsic())
1565 Functions.insert(&F);
1566 }
1567
1568 CallGraphUpdater CGUpdater;
1570 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1571 DenseSet<const char *> Allowed(
1572 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1573 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1574 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1575 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1578 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1579
1580 AttributorConfig AC(CGUpdater);
1581 AC.IsClosedWorldModule = Options.IsClosedWorld;
1582 AC.Allowed = &Allowed;
1583 AC.IsModulePass = true;
1584 AC.DefaultInitializeLiveInternals = false;
1585 AC.IndirectCalleeSpecializationCallback =
1586 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1587 Function &Callee, unsigned NumAssumedCallees) {
1588 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1589 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1590 };
1591 AC.IPOAmendableCB = [](const Function &F) {
1592 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1593 };
1594
1595 Attributor A(Functions, InfoCache, AC);
1596
1597 LLVM_DEBUG({
1598 StringRef LTOPhaseStr = to_string(LTOPhase);
1599 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1600 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1601 << (AC.IsClosedWorldModule ? "" : "not ")
1602 << "assumed to be a closed world.\n";
1603 });
1604
1605 for (auto *F : Functions) {
1606 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1607 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1608 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1609 CallingConv::ID CC = F->getCallingConv();
1610 if (!AMDGPU::isEntryFunctionCC(CC)) {
1611 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1612 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1613 }
1614
1615 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1616 if (!F->isDeclaration() && ST.hasClusters())
1617 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1618
1619 if (ST.hasGFX90AInsts())
1620 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1621
1622 for (auto &I : instructions(F)) {
1623 Value *Ptr = nullptr;
1624 if (auto *LI = dyn_cast<LoadInst>(&I))
1625 Ptr = LI->getPointerOperand();
1626 else if (auto *SI = dyn_cast<StoreInst>(&I))
1627 Ptr = SI->getPointerOperand();
1628 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1629 Ptr = RMW->getPointerOperand();
1630 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1631 Ptr = CmpX->getPointerOperand();
1632
1633 if (Ptr) {
1634 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1635 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1636 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1637 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1638 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1639 }
1640 }
1641 }
1642 }
1643
1644 return A.run() == ChangeStatus::CHANGED;
1645}
1646} // namespace
1647
1650
1653 AnalysisGetter AG(FAM);
1654
1655 // TODO: Probably preserves CFG
1656 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1658}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.