LLVM 22.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
16#include "llvm/IR/IntrinsicsAMDGPU.h"
17#include "llvm/IR/IntrinsicsR600.h"
20
21#define DEBUG_TYPE "amdgpu-attributor"
22
23using namespace llvm;
24
26 "amdgpu-indirect-call-specialization-threshold",
28 "A threshold controls whether an indirect call will be specialized"),
29 cl::init(3));
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34#include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42#include "AMDGPUAttributes.def"
45};
46
47#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
48static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
50#include "AMDGPUAttributes.def"
51};
52
53// We do not need to note the x workitem or workgroup id because they are always
54// initialized.
55//
56// TODO: We should not add the attributes if the known compile time workgroup
57// size is 1 for y/z.
59intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
60 bool HasApertureRegs, bool SupportsGetDoorBellID,
61 unsigned CodeObjectVersion) {
62 switch (ID) {
63 case Intrinsic::amdgcn_workitem_id_x:
64 NonKernelOnly = true;
65 return WORKITEM_ID_X;
66 case Intrinsic::amdgcn_workgroup_id_x:
67 NonKernelOnly = true;
68 return WORKGROUP_ID_X;
69 case Intrinsic::amdgcn_workitem_id_y:
70 case Intrinsic::r600_read_tidig_y:
71 return WORKITEM_ID_Y;
72 case Intrinsic::amdgcn_workitem_id_z:
73 case Intrinsic::r600_read_tidig_z:
74 return WORKITEM_ID_Z;
75 case Intrinsic::amdgcn_workgroup_id_y:
76 case Intrinsic::r600_read_tgid_y:
77 return WORKGROUP_ID_Y;
78 case Intrinsic::amdgcn_workgroup_id_z:
79 case Intrinsic::r600_read_tgid_z:
80 return WORKGROUP_ID_Z;
81 case Intrinsic::amdgcn_cluster_id_x:
82 NonKernelOnly = true;
83 return CLUSTER_ID_X;
84 case Intrinsic::amdgcn_cluster_id_y:
85 return CLUSTER_ID_Y;
86 case Intrinsic::amdgcn_cluster_id_z:
87 return CLUSTER_ID_Z;
88 case Intrinsic::amdgcn_lds_kernel_id:
89 return LDS_KERNEL_ID;
90 case Intrinsic::amdgcn_dispatch_ptr:
91 return DISPATCH_PTR;
92 case Intrinsic::amdgcn_dispatch_id:
93 return DISPATCH_ID;
94 case Intrinsic::amdgcn_implicitarg_ptr:
95 return IMPLICIT_ARG_PTR;
96 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
97 // queue_ptr.
98 case Intrinsic::amdgcn_queue_ptr:
99 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
100 return QUEUE_PTR;
101 case Intrinsic::amdgcn_is_shared:
102 case Intrinsic::amdgcn_is_private:
103 if (HasApertureRegs)
104 return NOT_IMPLICIT_INPUT;
105 // Under V5, we need implicitarg_ptr + offsets to access private_base or
106 // shared_base. For pre-V5, however, need to access them through queue_ptr +
107 // offsets.
108 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
109 : QUEUE_PTR;
110 case Intrinsic::trap:
111 case Intrinsic::debugtrap:
112 case Intrinsic::ubsantrap:
113 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
114 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
115 : QUEUE_PTR;
116 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
117 return QUEUE_PTR;
118 default:
119 return UNKNOWN_INTRINSIC;
120 }
121}
122
123static bool castRequiresQueuePtr(unsigned SrcAS) {
124 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
125}
126
127static bool isDSAddress(const Constant *C) {
129 if (!GV)
130 return false;
131 unsigned AS = GV->getAddressSpace();
133}
134
135/// Returns true if sanitizer attributes are present on a function.
136static bool hasSanitizerAttributes(const Function &F) {
137 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
138 F.hasFnAttribute(Attribute::SanitizeThread) ||
139 F.hasFnAttribute(Attribute::SanitizeMemory) ||
140 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
141 F.hasFnAttribute(Attribute::SanitizeMemTag);
142}
143
144namespace {
145class AMDGPUInformationCache : public InformationCache {
146public:
147 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
149 SetVector<Function *> *CGSCC, TargetMachine &TM)
150 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
151 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
152
153 TargetMachine &TM;
154
155 enum ConstantStatus : uint8_t {
156 NONE = 0,
157 DS_GLOBAL = 1 << 0,
158 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
159 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
160 ADDR_SPACE_CAST_BOTH_TO_FLAT =
161 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT
162 };
163
164 /// Check if the subtarget has aperture regs.
165 bool hasApertureRegs(Function &F) {
166 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
167 return ST.hasApertureRegs();
168 }
169
170 /// Check if the subtarget supports GetDoorbellID.
171 bool supportsGetDoorbellID(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.supportsGetDoorbellID();
174 }
175
176 std::optional<std::pair<unsigned, unsigned>>
177 getFlatWorkGroupSizeAttr(const Function &F) const {
178 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
179 if (!R)
180 return std::nullopt;
181 return std::make_pair(R->first, *(R->second));
182 }
183
184 std::pair<unsigned, unsigned>
185 getDefaultFlatWorkGroupSize(const Function &F) const {
186 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
187 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
188 }
189
190 std::pair<unsigned, unsigned>
191 getMaximumFlatWorkGroupRange(const Function &F) {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
194 }
195
196 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
197 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
198 return ST.getMaxNumWorkGroups(F);
199 }
200
201 /// Get code object version.
202 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
203
204 /// Get the effective value of "amdgpu-waves-per-eu" for the function,
205 /// accounting for the interaction with the passed value to use for
206 /// "amdgpu-flat-work-group-size".
207 std::pair<unsigned, unsigned>
208 getWavesPerEU(const Function &F,
209 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
210 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
211 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
212 }
213
214 std::optional<std::pair<unsigned, unsigned>>
215 getWavesPerEUAttr(const Function &F) {
216 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
217 /*OnlyFirstRequired=*/true);
218 if (!Val)
219 return std::nullopt;
220 if (!Val->second) {
221 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
222 Val->second = ST.getMaxWavesPerEU();
223 }
224 return std::make_pair(Val->first, *(Val->second));
225 }
226
227 std::pair<unsigned, unsigned>
228 getEffectiveWavesPerEU(const Function &F,
229 std::pair<unsigned, unsigned> WavesPerEU,
230 std::pair<unsigned, unsigned> FlatWorkGroupSize) {
231 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
232 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
233 getLDSSize(F));
234 }
235
236 unsigned getMaxWavesPerEU(const Function &F) {
237 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
238 return ST.getMaxWavesPerEU();
239 }
240
241 unsigned getMaxAddrSpace() const override {
243 }
244
245private:
246 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
247 /// local to flat. These casts may require the queue pointer.
248 static uint8_t visitConstExpr(const ConstantExpr *CE) {
249 uint8_t Status = NONE;
250
251 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
252 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
253 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
254 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
255 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
256 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
257 }
258
259 return Status;
260 }
261
262 /// Returns the minimum amount of LDS space used by a workgroup running
263 /// function \p F.
264 static unsigned getLDSSize(const Function &F) {
265 return AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
266 {0, UINT32_MAX}, true)
267 .first;
268 }
269
270 /// Get the constant access bitmap for \p C.
271 uint8_t getConstantAccess(const Constant *C,
272 SmallPtrSetImpl<const Constant *> &Visited) {
273 auto It = ConstantStatus.find(C);
274 if (It != ConstantStatus.end())
275 return It->second;
276
277 uint8_t Result = 0;
278 if (isDSAddress(C))
279 Result = DS_GLOBAL;
280
281 if (const auto *CE = dyn_cast<ConstantExpr>(C))
282 Result |= visitConstExpr(CE);
283
284 for (const Use &U : C->operands()) {
285 const auto *OpC = dyn_cast<Constant>(U);
286 if (!OpC || !Visited.insert(OpC).second)
287 continue;
288
289 Result |= getConstantAccess(OpC, Visited);
290 }
291 return Result;
292 }
293
294public:
295 /// Returns true if \p Fn needs the queue pointer because of \p C.
296 bool needsQueuePtr(const Constant *C, Function &Fn) {
297 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
298 bool HasAperture = hasApertureRegs(Fn);
299
300 // No need to explore the constants.
301 if (!IsNonEntryFunc && HasAperture)
302 return false;
303
304 SmallPtrSet<const Constant *, 8> Visited;
305 uint8_t Access = getConstantAccess(C, Visited);
306
307 // We need to trap on DS globals in non-entry functions.
308 if (IsNonEntryFunc && (Access & DS_GLOBAL))
309 return true;
310
311 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
312 }
313
314 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
315 SmallPtrSet<const Constant *, 8> Visited;
316 uint8_t Access = getConstantAccess(C, Visited);
317 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
318 }
319
320private:
321 /// Used to determine if the Constant needs the queue pointer.
322 DenseMap<const Constant *, uint8_t> ConstantStatus;
323 const unsigned CodeObjectVersion;
324};
325
326struct AAAMDAttributes
327 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
328 AbstractAttribute> {
329 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
330 AbstractAttribute>;
331
332 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
333
334 /// Create an abstract attribute view for the position \p IRP.
335 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
336 Attributor &A);
337
338 /// See AbstractAttribute::getName().
339 StringRef getName() const override { return "AAAMDAttributes"; }
340
341 /// See AbstractAttribute::getIdAddr().
342 const char *getIdAddr() const override { return &ID; }
343
344 /// This function should return true if the type of the \p AA is
345 /// AAAMDAttributes.
346 static bool classof(const AbstractAttribute *AA) {
347 return (AA->getIdAddr() == &ID);
348 }
349
350 /// Unique ID (due to the unique address)
351 static const char ID;
352};
353const char AAAMDAttributes::ID = 0;
354
355struct AAUniformWorkGroupSize
356 : public StateWrapper<BooleanState, AbstractAttribute> {
357 using Base = StateWrapper<BooleanState, AbstractAttribute>;
358 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
359
360 /// Create an abstract attribute view for the position \p IRP.
361 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
362 Attributor &A);
363
364 /// See AbstractAttribute::getName().
365 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
366
367 /// See AbstractAttribute::getIdAddr().
368 const char *getIdAddr() const override { return &ID; }
369
370 /// This function should return true if the type of the \p AA is
371 /// AAAMDAttributes.
372 static bool classof(const AbstractAttribute *AA) {
373 return (AA->getIdAddr() == &ID);
374 }
375
376 /// Unique ID (due to the unique address)
377 static const char ID;
378};
379const char AAUniformWorkGroupSize::ID = 0;
380
381struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
382 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
383 : AAUniformWorkGroupSize(IRP, A) {}
384
385 void initialize(Attributor &A) override {
386 Function *F = getAssociatedFunction();
387 CallingConv::ID CC = F->getCallingConv();
388
389 if (CC != CallingConv::AMDGPU_KERNEL)
390 return;
391
392 bool InitialValue = false;
393 if (F->hasFnAttribute("uniform-work-group-size"))
394 InitialValue =
395 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==
396 "true";
397
398 if (InitialValue)
399 indicateOptimisticFixpoint();
400 else
401 indicatePessimisticFixpoint();
402 }
403
404 ChangeStatus updateImpl(Attributor &A) override {
405 ChangeStatus Change = ChangeStatus::UNCHANGED;
406
407 auto CheckCallSite = [&](AbstractCallSite CS) {
408 Function *Caller = CS.getInstruction()->getFunction();
409 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
410 << "->" << getAssociatedFunction()->getName() << "\n");
411
412 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
413 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
414 if (!CallerInfo || !CallerInfo->isValidState())
415 return false;
416
417 Change = Change | clampStateAndIndicateChange(this->getState(),
418 CallerInfo->getState());
419
420 return true;
421 };
422
423 bool AllCallSitesKnown = true;
424 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
425 return indicatePessimisticFixpoint();
426
427 return Change;
428 }
429
430 ChangeStatus manifest(Attributor &A) override {
432 LLVMContext &Ctx = getAssociatedFunction()->getContext();
433
434 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
435 getAssumed() ? "true" : "false"));
436 return A.manifestAttrs(getIRPosition(), AttrList,
437 /* ForceReplace */ true);
438 }
439
440 bool isValidState() const override {
441 // This state is always valid, even when the state is false.
442 return true;
443 }
444
445 const std::string getAsStr(Attributor *) const override {
446 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
447 }
448
449 /// See AbstractAttribute::trackStatistics()
450 void trackStatistics() const override {}
451};
452
453AAUniformWorkGroupSize &
454AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
455 Attributor &A) {
457 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
459 "AAUniformWorkGroupSize is only valid for function position");
460}
461
462struct AAAMDAttributesFunction : public AAAMDAttributes {
463 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
464 : AAAMDAttributes(IRP, A) {}
465
466 void initialize(Attributor &A) override {
467 Function *F = getAssociatedFunction();
468
469 // If the function requires the implicit arg pointer due to sanitizers,
470 // assume it's needed even if explicitly marked as not requiring it.
471 // Flat scratch initialization is needed because `asan_malloc_impl`
472 // calls introduced later in pipeline will have flat scratch accesses.
473 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
474 // implementation for `asan_malloc_impl` is updated.
475 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
476 if (HasSanitizerAttrs) {
477 removeAssumedBits(IMPLICIT_ARG_PTR);
478 removeAssumedBits(HOSTCALL_PTR);
479 removeAssumedBits(FLAT_SCRATCH_INIT);
480 }
481
482 for (auto Attr : ImplicitAttrs) {
483 if (HasSanitizerAttrs &&
484 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
485 Attr.first == FLAT_SCRATCH_INIT))
486 continue;
487
488 if (F->hasFnAttribute(Attr.second))
489 addKnownBits(Attr.first);
490 }
491
492 if (F->isDeclaration())
493 return;
494
495 // Ignore functions with graphics calling conventions, these are currently
496 // not allowed to have kernel arguments.
497 if (AMDGPU::isGraphics(F->getCallingConv())) {
498 indicatePessimisticFixpoint();
499 return;
500 }
501 }
502
503 ChangeStatus updateImpl(Attributor &A) override {
504 Function *F = getAssociatedFunction();
505 // The current assumed state used to determine a change.
506 auto OrigAssumed = getAssumed();
507
508 // Check for Intrinsics and propagate attributes.
509 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
510 *this, this->getIRPosition(), DepClassTy::REQUIRED);
511 if (!AAEdges || !AAEdges->isValidState() ||
512 AAEdges->hasNonAsmUnknownCallee())
513 return indicatePessimisticFixpoint();
514
515 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
516
517 bool NeedsImplicit = false;
518 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
519 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
520 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
521 unsigned COV = InfoCache.getCodeObjectVersion();
522
523 for (Function *Callee : AAEdges->getOptimisticEdges()) {
524 Intrinsic::ID IID = Callee->getIntrinsicID();
525 if (IID == Intrinsic::not_intrinsic) {
526 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
527 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
528 if (!AAAMD || !AAAMD->isValidState())
529 return indicatePessimisticFixpoint();
530 *this &= *AAAMD;
531 continue;
532 }
533
534 bool NonKernelOnly = false;
535 ImplicitArgumentMask AttrMask =
536 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
537 HasApertureRegs, SupportsGetDoorbellID, COV);
538
539 if (AttrMask == UNKNOWN_INTRINSIC) {
540 // Assume not-nocallback intrinsics may invoke a function which accesses
541 // implicit arguments.
542 //
543 // FIXME: This isn't really the correct check. We want to ensure it
544 // isn't calling any function that may use implicit arguments regardless
545 // of whether it's internal to the module or not.
546 //
547 // TODO: Ignoring callsite attributes.
548 if (!Callee->hasFnAttribute(Attribute::NoCallback))
549 return indicatePessimisticFixpoint();
550 continue;
551 }
552
553 if (AttrMask != NOT_IMPLICIT_INPUT) {
554 if ((IsNonEntryFunc || !NonKernelOnly))
555 removeAssumedBits(AttrMask);
556 }
557 }
558
559 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
560 if (NeedsImplicit)
561 removeAssumedBits(IMPLICIT_ARG_PTR);
562
563 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
564 // Under V5, we need implicitarg_ptr + offsets to access private_base or
565 // shared_base. We do not actually need queue_ptr.
566 if (COV >= 5)
567 removeAssumedBits(IMPLICIT_ARG_PTR);
568 else
569 removeAssumedBits(QUEUE_PTR);
570 }
571
572 if (funcRetrievesMultigridSyncArg(A, COV)) {
573 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
574 "multigrid_sync_arg needs implicitarg_ptr");
575 removeAssumedBits(MULTIGRID_SYNC_ARG);
576 }
577
578 if (funcRetrievesHostcallPtr(A, COV)) {
579 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
580 removeAssumedBits(HOSTCALL_PTR);
581 }
582
583 if (funcRetrievesHeapPtr(A, COV)) {
584 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
585 removeAssumedBits(HEAP_PTR);
586 }
587
588 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
589 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
590 removeAssumedBits(QUEUE_PTR);
591 }
592
593 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
594 removeAssumedBits(LDS_KERNEL_ID);
595 }
596
597 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
598 removeAssumedBits(DEFAULT_QUEUE);
599
600 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
601 removeAssumedBits(COMPLETION_ACTION);
602
603 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
604 removeAssumedBits(FLAT_SCRATCH_INIT);
605
606 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
607 : ChangeStatus::UNCHANGED;
608 }
609
610 ChangeStatus manifest(Attributor &A) override {
612 LLVMContext &Ctx = getAssociatedFunction()->getContext();
613
614 for (auto Attr : ImplicitAttrs) {
615 if (isKnown(Attr.first))
616 AttrList.push_back(Attribute::get(Ctx, Attr.second));
617 }
618
619 return A.manifestAttrs(getIRPosition(), AttrList,
620 /* ForceReplace */ true);
621 }
622
623 const std::string getAsStr(Attributor *) const override {
624 std::string Str;
625 raw_string_ostream OS(Str);
626 OS << "AMDInfo[";
627 for (auto Attr : ImplicitAttrs)
628 if (isAssumed(Attr.first))
629 OS << ' ' << Attr.second;
630 OS << " ]";
631 return OS.str();
632 }
633
634 /// See AbstractAttribute::trackStatistics()
635 void trackStatistics() const override {}
636
637private:
638 bool checkForQueuePtr(Attributor &A) {
639 Function *F = getAssociatedFunction();
640 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
641
642 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
643
644 bool NeedsQueuePtr = false;
645
646 auto CheckAddrSpaceCasts = [&](Instruction &I) {
647 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
648 if (castRequiresQueuePtr(SrcAS)) {
649 NeedsQueuePtr = true;
650 return false;
651 }
652 return true;
653 };
654
655 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
656
657 // `checkForAllInstructions` is much more cheaper than going through all
658 // instructions, try it first.
659
660 // The queue pointer is not needed if aperture regs is present.
661 if (!HasApertureRegs) {
662 bool UsedAssumedInformation = false;
663 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
664 {Instruction::AddrSpaceCast},
665 UsedAssumedInformation);
666 }
667
668 // If we found that we need the queue pointer, nothing else to do.
669 if (NeedsQueuePtr)
670 return true;
671
672 if (!IsNonEntryFunc && HasApertureRegs)
673 return false;
674
675 for (BasicBlock &BB : *F) {
676 for (Instruction &I : BB) {
677 for (const Use &U : I.operands()) {
678 if (const auto *C = dyn_cast<Constant>(U)) {
679 if (InfoCache.needsQueuePtr(C, *F))
680 return true;
681 }
682 }
683 }
684 }
685
686 return false;
687 }
688
689 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
691 AA::RangeTy Range(Pos, 8);
692 return funcRetrievesImplicitKernelArg(A, Range);
693 }
694
695 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
697 AA::RangeTy Range(Pos, 8);
698 return funcRetrievesImplicitKernelArg(A, Range);
699 }
700
701 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
703 AA::RangeTy Range(Pos, 8);
704 return funcRetrievesImplicitKernelArg(A, Range);
705 }
706
707 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
709 AA::RangeTy Range(Pos, 8);
710 return funcRetrievesImplicitKernelArg(A, Range);
711 }
712
713 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
714 if (COV < 5)
715 return false;
717 return funcRetrievesImplicitKernelArg(A, Range);
718 }
719
720 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
721 if (COV < 5)
722 return false;
724 return funcRetrievesImplicitKernelArg(A, Range);
725 }
726
727 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
728 // Check if this is a call to the implicitarg_ptr builtin and it
729 // is used to retrieve the hostcall pointer. The implicit arg for
730 // hostcall is not used only if every use of the implicitarg_ptr
731 // is a load that clearly does not retrieve any byte of the
732 // hostcall pointer. We check this by tracing all the uses of the
733 // initial call to the implicitarg_ptr intrinsic.
734 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
735 auto &Call = cast<CallBase>(I);
736 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
737 return true;
738
739 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
740 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
741 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
742 return false;
743
744 return PointerInfoAA->forallInterferingAccesses(
745 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
746 return Acc.getRemoteInst()->isDroppable();
747 });
748 };
749
750 bool UsedAssumedInformation = false;
751 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
752 UsedAssumedInformation);
753 }
754
755 bool funcRetrievesLDSKernelId(Attributor &A) {
756 auto DoesNotRetrieve = [&](Instruction &I) {
757 auto &Call = cast<CallBase>(I);
758 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
759 };
760 bool UsedAssumedInformation = false;
761 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
762 UsedAssumedInformation);
763 }
764
765 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
766 // not to be set.
767 bool needFlatScratchInit(Attributor &A) {
768 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
769
770 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
771 // there is a cast from PRIVATE_ADDRESS.
772 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
773 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
775 };
776
777 bool UsedAssumedInformation = false;
778 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
779 {Instruction::AddrSpaceCast},
780 UsedAssumedInformation))
781 return true;
782
783 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
784 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
785
786 Function *F = getAssociatedFunction();
787 for (Instruction &I : instructions(F)) {
788 for (const Use &U : I.operands()) {
789 if (const auto *C = dyn_cast<Constant>(U)) {
790 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
791 return true;
792 }
793 }
794 }
795
796 // Finally check callees.
797
798 // This is called on each callee; false means callee shouldn't have
799 // no-flat-scratch-init.
800 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
801 const auto &CB = cast<CallBase>(I);
802 const Function *Callee = CB.getCalledFunction();
803
804 // Callee == 0 for inline asm or indirect call with known callees.
805 // In the latter case, updateImpl() already checked the callees and we
806 // know their FLAT_SCRATCH_INIT bit is set.
807 // If function has indirect call with unknown callees, the bit is
808 // already removed in updateImpl() and execution won't reach here.
809 if (!Callee)
810 return true;
811
812 return Callee->getIntrinsicID() !=
813 Intrinsic::amdgcn_addrspacecast_nonnull;
814 };
815
816 UsedAssumedInformation = false;
817 // If any callee is false (i.e. need FlatScratchInit),
818 // checkForAllCallLikeInstructions returns false, in which case this
819 // function returns true.
820 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
821 UsedAssumedInformation);
822 }
823};
824
825AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
826 Attributor &A) {
828 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
829 llvm_unreachable("AAAMDAttributes is only valid for function position");
830}
831
832/// Base class to derive different size ranges.
833struct AAAMDSizeRangeAttribute
834 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
835 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
836
837 StringRef AttrName;
838
839 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
840 StringRef AttrName)
841 : Base(IRP, 32), AttrName(AttrName) {}
842
843 /// See AbstractAttribute::trackStatistics()
844 void trackStatistics() const override {}
845
846 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
847 ChangeStatus Change = ChangeStatus::UNCHANGED;
848
849 auto CheckCallSite = [&](AbstractCallSite CS) {
850 Function *Caller = CS.getInstruction()->getFunction();
851 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
852 << "->" << getAssociatedFunction()->getName() << '\n');
853
854 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
855 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
856 if (!CallerInfo || !CallerInfo->isValidState())
857 return false;
858
859 Change |=
860 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
861
862 return true;
863 };
864
865 bool AllCallSitesKnown = true;
866 if (!A.checkForAllCallSites(CheckCallSite, *this,
867 /*RequireAllCallSites=*/true,
868 AllCallSitesKnown))
869 return indicatePessimisticFixpoint();
870
871 return Change;
872 }
873
874 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
875 /// attribute if it is not same as default.
877 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
878 std::pair<unsigned, unsigned> Default) {
879 auto [Min, Max] = Default;
880 unsigned Lower = getAssumed().getLower().getZExtValue();
881 unsigned Upper = getAssumed().getUpper().getZExtValue();
882
883 // Clamp the range to the default value.
884 if (Lower < Min)
885 Lower = Min;
886 if (Upper > Max + 1)
887 Upper = Max + 1;
888
889 // No manifest if the value is invalid or same as default after clamp.
890 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
891 return ChangeStatus::UNCHANGED;
892
893 Function *F = getAssociatedFunction();
894 LLVMContext &Ctx = F->getContext();
895 SmallString<10> Buffer;
896 raw_svector_ostream OS(Buffer);
897 OS << Lower << ',' << Upper - 1;
898 return A.manifestAttrs(getIRPosition(),
899 {Attribute::get(Ctx, AttrName, OS.str())},
900 /*ForceReplace=*/true);
901 }
902
903 const std::string getAsStr(Attributor *) const override {
904 std::string Str;
905 raw_string_ostream OS(Str);
906 OS << getName() << '[';
907 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
908 OS << ']';
909 return OS.str();
910 }
911};
912
913/// Propagate amdgpu-flat-work-group-size attribute.
914struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
915 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
916 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
917
918 void initialize(Attributor &A) override {
919 Function *F = getAssociatedFunction();
920 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
921
922 bool HasAttr = false;
923 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
924 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
925
926 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
927 // We only consider an attribute that is not max range because the front
928 // end always emits the attribute, unfortunately, and sometimes it emits
929 // the max range.
930 if (*Attr != MaxRange) {
931 Range = *Attr;
932 HasAttr = true;
933 }
934 }
935
936 // We don't want to directly clamp the state if it's the max range because
937 // that is basically the worst state.
938 if (Range == MaxRange)
939 return;
940
941 auto [Min, Max] = Range;
942 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
943 IntegerRangeState IRS(CR);
944 clampStateAndIndicateChange(this->getState(), IRS);
945
946 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
947 indicateOptimisticFixpoint();
948 }
949
950 ChangeStatus updateImpl(Attributor &A) override {
951 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
952 }
953
954 /// Create an abstract attribute view for the position \p IRP.
955 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
956 Attributor &A);
957
958 ChangeStatus manifest(Attributor &A) override {
959 Function *F = getAssociatedFunction();
960 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
961 return emitAttributeIfNotDefaultAfterClamp(
962 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
963 }
964
965 /// See AbstractAttribute::getName()
966 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
967
968 /// See AbstractAttribute::getIdAddr()
969 const char *getIdAddr() const override { return &ID; }
970
971 /// This function should return true if the type of the \p AA is
972 /// AAAMDFlatWorkGroupSize
973 static bool classof(const AbstractAttribute *AA) {
974 return (AA->getIdAddr() == &ID);
975 }
976
977 /// Unique ID (due to the unique address)
978 static const char ID;
979};
980
981const char AAAMDFlatWorkGroupSize::ID = 0;
982
983AAAMDFlatWorkGroupSize &
984AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
985 Attributor &A) {
987 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
989 "AAAMDFlatWorkGroupSize is only valid for function position");
990}
991
992struct TupleDecIntegerRangeState : public AbstractState {
993 DecIntegerState<uint32_t> X, Y, Z;
994
995 bool isValidState() const override {
996 return X.isValidState() && Y.isValidState() && Z.isValidState();
997 }
998
999 bool isAtFixpoint() const override {
1000 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
1001 }
1002
1003 ChangeStatus indicateOptimisticFixpoint() override {
1004 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
1005 Z.indicateOptimisticFixpoint();
1006 }
1007
1008 ChangeStatus indicatePessimisticFixpoint() override {
1009 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
1010 Z.indicatePessimisticFixpoint();
1011 }
1012
1013 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
1014 X ^= Other.X;
1015 Y ^= Other.Y;
1016 Z ^= Other.Z;
1017 return *this;
1018 }
1019
1020 bool operator==(const TupleDecIntegerRangeState &Other) const {
1021 return X == Other.X && Y == Other.Y && Z == Other.Z;
1022 }
1023
1024 TupleDecIntegerRangeState &getAssumed() { return *this; }
1025 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1026};
1027
1028using AAAMDMaxNumWorkgroupsState =
1029 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1030
1031/// Propagate amdgpu-max-num-workgroups attribute.
1032struct AAAMDMaxNumWorkgroups
1033 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1034 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1035
1036 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1037
1038 void initialize(Attributor &A) override {
1039 Function *F = getAssociatedFunction();
1040 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1041
1042 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1043
1044 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1045 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1046 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1047
1048 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1049 indicatePessimisticFixpoint();
1050 }
1051
1052 ChangeStatus updateImpl(Attributor &A) override {
1053 ChangeStatus Change = ChangeStatus::UNCHANGED;
1054
1055 auto CheckCallSite = [&](AbstractCallSite CS) {
1056 Function *Caller = CS.getInstruction()->getFunction();
1057 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1058 << "->" << getAssociatedFunction()->getName() << '\n');
1059
1060 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1061 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1062 if (!CallerInfo || !CallerInfo->isValidState())
1063 return false;
1064
1065 Change |=
1066 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1067 return true;
1068 };
1069
1070 bool AllCallSitesKnown = true;
1071 if (!A.checkForAllCallSites(CheckCallSite, *this,
1072 /*RequireAllCallSites=*/true,
1073 AllCallSitesKnown))
1074 return indicatePessimisticFixpoint();
1075
1076 return Change;
1077 }
1078
1079 /// Create an abstract attribute view for the position \p IRP.
1080 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1081 Attributor &A);
1082
1083 ChangeStatus manifest(Attributor &A) override {
1084 Function *F = getAssociatedFunction();
1085 LLVMContext &Ctx = F->getContext();
1086 SmallString<32> Buffer;
1087 raw_svector_ostream OS(Buffer);
1088 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1089
1090 // TODO: Should annotate loads of the group size for this to do anything
1091 // useful.
1092 return A.manifestAttrs(
1093 getIRPosition(),
1094 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1095 /* ForceReplace= */ true);
1096 }
1097
1098 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1099
1100 const std::string getAsStr(Attributor *) const override {
1101 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1102 raw_string_ostream OS(Buffer);
1103 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1104 << ']';
1105 return OS.str();
1106 }
1107
1108 const char *getIdAddr() const override { return &ID; }
1109
1110 /// This function should return true if the type of the \p AA is
1111 /// AAAMDMaxNumWorkgroups
1112 static bool classof(const AbstractAttribute *AA) {
1113 return (AA->getIdAddr() == &ID);
1114 }
1115
1116 void trackStatistics() const override {}
1117
1118 /// Unique ID (due to the unique address)
1119 static const char ID;
1120};
1121
1122const char AAAMDMaxNumWorkgroups::ID = 0;
1123
1124AAAMDMaxNumWorkgroups &
1125AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1127 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1128 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1129}
1130
1131/// Propagate amdgpu-waves-per-eu attribute.
1132struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1133 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1134 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1135
1136 void initialize(Attributor &A) override {
1137 Function *F = getAssociatedFunction();
1138 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1139
1140 // If the attribute exists, we will honor it if it is not the default.
1141 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1142 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1143 1U, InfoCache.getMaxWavesPerEU(*F)};
1144 if (*Attr != MaxWavesPerEURange) {
1145 auto [Min, Max] = *Attr;
1146 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1147 IntegerRangeState RangeState(Range);
1148 this->getState() = RangeState;
1149 indicateOptimisticFixpoint();
1150 return;
1151 }
1152 }
1153
1154 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1155 indicatePessimisticFixpoint();
1156 }
1157
1158 ChangeStatus updateImpl(Attributor &A) override {
1159 ChangeStatus Change = ChangeStatus::UNCHANGED;
1160
1161 auto CheckCallSite = [&](AbstractCallSite CS) {
1162 Function *Caller = CS.getInstruction()->getFunction();
1163 Function *Func = getAssociatedFunction();
1164 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1165 << "->" << Func->getName() << '\n');
1166 (void)Func;
1167
1168 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1169 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1170 if (!CallerAA || !CallerAA->isValidState())
1171 return false;
1172
1173 ConstantRange Assumed = getAssumed();
1174 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1175 CallerAA->getAssumed().getLower().getZExtValue());
1176 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1177 CallerAA->getAssumed().getUpper().getZExtValue());
1178 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1179 IntegerRangeState RangeState(Range);
1180 getState() = RangeState;
1181 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1182 : ChangeStatus::CHANGED;
1183
1184 return true;
1185 };
1186
1187 bool AllCallSitesKnown = true;
1188 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1189 return indicatePessimisticFixpoint();
1190
1191 return Change;
1192 }
1193
1194 /// Create an abstract attribute view for the position \p IRP.
1195 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1196 Attributor &A);
1197
1198 ChangeStatus manifest(Attributor &A) override {
1199 Function *F = getAssociatedFunction();
1200 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1201 return emitAttributeIfNotDefaultAfterClamp(
1202 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1203 }
1204
1205 /// See AbstractAttribute::getName()
1206 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1207
1208 /// See AbstractAttribute::getIdAddr()
1209 const char *getIdAddr() const override { return &ID; }
1210
1211 /// This function should return true if the type of the \p AA is
1212 /// AAAMDWavesPerEU
1213 static bool classof(const AbstractAttribute *AA) {
1214 return (AA->getIdAddr() == &ID);
1215 }
1216
1217 /// Unique ID (due to the unique address)
1218 static const char ID;
1219};
1220
1221const char AAAMDWavesPerEU::ID = 0;
1222
1223AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1224 Attributor &A) {
1226 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1227 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1228}
1229
1230/// Compute the minimum number of AGPRs required to allocate the inline asm.
1231static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1232 const CallBase &Call) {
1233 unsigned ArgNo = 0;
1234 unsigned ResNo = 0;
1235 unsigned AGPRDefCount = 0;
1236 unsigned AGPRUseCount = 0;
1237 unsigned MaxPhysReg = 0;
1238 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1239
1240 // TODO: Overestimates due to not accounting for tied operands
1241 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1242 Type *Ty = nullptr;
1243 switch (CI.Type) {
1244 case InlineAsm::isOutput: {
1245 Ty = Call.getType();
1246 if (auto *STy = dyn_cast<StructType>(Ty))
1247 Ty = STy->getElementType(ResNo);
1248 ++ResNo;
1249 break;
1250 }
1251 case InlineAsm::isInput: {
1252 Ty = Call.getArgOperand(ArgNo++)->getType();
1253 break;
1254 }
1255 case InlineAsm::isLabel:
1256 continue;
1258 // Parse the physical register reference.
1259 break;
1260 }
1261
1262 for (StringRef Code : CI.Codes) {
1263 unsigned RegCount = 0;
1264 if (Code.starts_with("a")) {
1265 // Virtual register, compute number of registers based on the type.
1266 //
1267 // We ought to be going through TargetLowering to get the number of
1268 // registers, but we should avoid the dependence on CodeGen here.
1269 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1270 } else {
1271 // Physical register reference
1272 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1273 if (Kind == 'a') {
1274 RegCount = NumRegs;
1275 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1276 }
1277
1278 continue;
1279 }
1280
1281 if (CI.Type == InlineAsm::isOutput) {
1282 // Apply tuple alignment requirement
1283 //
1284 // TODO: This is more conservative than necessary.
1285 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1286
1287 AGPRDefCount += RegCount;
1288 if (CI.isEarlyClobber) {
1289 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1290 AGPRUseCount += RegCount;
1291 }
1292 } else {
1293 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1294 AGPRUseCount += RegCount;
1295 }
1296 }
1297 }
1298
1299 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1300
1301 // TODO: This is overly conservative. If there are any physical registers,
1302 // allocate any virtual registers after them so we don't have to solve optimal
1303 // packing.
1304 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1305}
1306
1307struct AAAMDGPUMinAGPRAlloc
1308 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1309 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1310 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1311
1312 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1313 Attributor &A) {
1315 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1317 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1318 }
1319
1320 void initialize(Attributor &A) override {
1321 Function *F = getAssociatedFunction();
1322 auto [MinNumAGPR, MaxNumAGPR] =
1323 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1324 /*OnlyFirstRequired=*/true);
1325 if (MinNumAGPR == 0)
1326 indicateOptimisticFixpoint();
1327 }
1328
1329 const std::string getAsStr(Attributor *A) const override {
1330 std::string Str = "amdgpu-agpr-alloc=";
1331 raw_string_ostream OS(Str);
1332 OS << getAssumed();
1333 return OS.str();
1334 }
1335
1336 void trackStatistics() const override {}
1337
1338 ChangeStatus updateImpl(Attributor &A) override {
1339 DecIntegerState<> Maximum;
1340
1341 // Check for cases which require allocation of AGPRs. The only cases where
1342 // AGPRs are required are if there are direct references to AGPRs, so inline
1343 // assembly and special intrinsics.
1344 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1345 const auto &CB = cast<CallBase>(I);
1346 const Value *CalleeOp = CB.getCalledOperand();
1347
1348 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1349 // Technically, the inline asm could be invoking a call to an unknown
1350 // external function that requires AGPRs, but ignore that.
1351 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1352 Maximum.takeAssumedMaximum(NumRegs);
1353 return true;
1354 }
1355
1356 switch (CB.getIntrinsicID()) {
1358 break;
1359 case Intrinsic::write_register:
1360 case Intrinsic::read_register:
1361 case Intrinsic::read_volatile_register: {
1362 const MDString *RegName = cast<MDString>(
1364 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1365 ->getOperand(0));
1366 auto [Kind, RegIdx, NumRegs] =
1368 if (Kind == 'a')
1369 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1370
1371 return true;
1372 }
1373 default:
1374 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1375 // required to use AGPRs.
1376
1377 // Assume !nocallback intrinsics may call a function which requires
1378 // AGPRs.
1379 return CB.hasFnAttr(Attribute::NoCallback);
1380 }
1381
1382 // TODO: Handle callsite attributes
1383 auto *CBEdges = A.getAAFor<AACallEdges>(
1384 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1385 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1387 return false;
1388 }
1389
1390 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1391 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1392 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1393 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1395 return false;
1396 }
1397
1398 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1399 }
1400
1401 return true;
1402 };
1403
1404 bool UsedAssumedInformation = false;
1405 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1406 UsedAssumedInformation))
1407 return indicatePessimisticFixpoint();
1408
1409 return clampStateAndIndicateChange(getState(), Maximum);
1410 }
1411
1412 ChangeStatus manifest(Attributor &A) override {
1413 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1414 SmallString<4> Buffer;
1415 raw_svector_ostream OS(Buffer);
1416 OS << getAssumed();
1417
1418 return A.manifestAttrs(
1419 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1420 }
1421
1422 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1423 const char *getIdAddr() const override { return &ID; }
1424
1425 /// This function should return true if the type of the \p AA is
1426 /// AAAMDGPUMinAGPRAllocs
1427 static bool classof(const AbstractAttribute *AA) {
1428 return (AA->getIdAddr() == &ID);
1429 }
1430
1431 static const char ID;
1432};
1433
1434const char AAAMDGPUMinAGPRAlloc::ID = 0;
1435
1436/// An abstract attribute to propagate the function attribute
1437/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1438struct AAAMDGPUClusterDims
1439 : public StateWrapper<BooleanState, AbstractAttribute> {
1440 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1441 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1442
1443 /// Create an abstract attribute view for the position \p IRP.
1444 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1445 Attributor &A);
1446
1447 /// See AbstractAttribute::getName().
1448 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1449
1450 /// See AbstractAttribute::getIdAddr().
1451 const char *getIdAddr() const override { return &ID; }
1452
1453 /// This function should return true if the type of the \p AA is
1454 /// AAAMDGPUClusterDims.
1455 static bool classof(const AbstractAttribute *AA) {
1456 return AA->getIdAddr() == &ID;
1457 }
1458
1459 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1460
1461 /// Unique ID (due to the unique address)
1462 static const char ID;
1463};
1464
1465const char AAAMDGPUClusterDims::ID = 0;
1466
1467struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1468 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1469 : AAAMDGPUClusterDims(IRP, A) {}
1470
1471 void initialize(Attributor &A) override {
1472 Function *F = getAssociatedFunction();
1473 assert(F && "empty associated function");
1474
1476
1477 // No matter what a kernel function has, it is final.
1478 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1479 if (Attr.isUnknown())
1480 indicatePessimisticFixpoint();
1481 else
1482 indicateOptimisticFixpoint();
1483 }
1484 }
1485
1486 const std::string getAsStr(Attributor *A) const override {
1487 if (!getAssumed() || Attr.isUnknown())
1488 return "unknown";
1489 if (Attr.isNoCluster())
1490 return "no";
1491 if (Attr.isVariableDims())
1492 return "variable";
1493 return Attr.to_string();
1494 }
1495
1496 void trackStatistics() const override {}
1497
1498 ChangeStatus updateImpl(Attributor &A) override {
1499 auto OldState = Attr;
1500
1501 auto CheckCallSite = [&](AbstractCallSite CS) {
1502 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1503 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1504 DepClassTy::REQUIRED);
1505 if (!CallerAA || !CallerAA->isValidState())
1506 return false;
1507
1508 return merge(CallerAA->getClusterDims());
1509 };
1510
1511 bool UsedAssumedInformation = false;
1512 if (!A.checkForAllCallSites(CheckCallSite, *this,
1513 /*RequireAllCallSites=*/true,
1514 UsedAssumedInformation))
1515 return indicatePessimisticFixpoint();
1516
1517 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1518 }
1519
1520 ChangeStatus manifest(Attributor &A) override {
1521 if (Attr.isUnknown())
1522 return ChangeStatus::UNCHANGED;
1523 return A.manifestAttrs(
1524 getIRPosition(),
1525 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1526 Attr.to_string())},
1527 /*ForceReplace=*/true);
1528 }
1529
1530 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1531 return Attr;
1532 }
1533
1534private:
1535 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1536 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1537 // propagation.
1538 if (Attr.isUnknown() && Other.isUnknown())
1539 return true;
1540
1541 // Case 2: The other is determined, but we are unknown yet, we simply take
1542 // the other's value.
1543 if (Attr.isUnknown()) {
1544 Attr = Other;
1545 return true;
1546 }
1547
1548 // Case 3: We are determined but the other is unknown yet, we simply keep
1549 // everything unchanged.
1550 if (Other.isUnknown())
1551 return true;
1552
1553 // After this point, both are determined.
1554
1555 // Case 4: If they are same, we do nothing.
1556 if (Attr == Other)
1557 return true;
1558
1559 // Now they are not same.
1560
1561 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1562 // would hold), then it is unknown whether cluster will be used, and the
1563 // state is final, unlike case 1.
1564 if (Attr.isNoCluster() || Other.isNoCluster()) {
1565 Attr.setUnknown();
1566 return false;
1567 }
1568
1569 // Case 6: Both of us use cluster, but the dims are different, so the result
1570 // is, cluster is used, but we just don't have a fixed dims.
1571 Attr.setVariableDims();
1572 return true;
1573 }
1574
1575 AMDGPU::ClusterDimsAttr Attr;
1576
1577 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1578};
1579
1580AAAMDGPUClusterDims &
1581AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1583 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1584 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1585}
1586
1587static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
1588 AMDGPUAttributorOptions Options,
1589 ThinOrFullLTOPhase LTOPhase) {
1590 SetVector<Function *> Functions;
1591 for (Function &F : M) {
1592 if (!F.isIntrinsic())
1593 Functions.insert(&F);
1594 }
1595
1596 CallGraphUpdater CGUpdater;
1598 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1599 DenseSet<const char *> Allowed(
1600 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1601 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1602 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1603 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1606 &AAAMDGPUClusterDims::ID});
1607
1608 AttributorConfig AC(CGUpdater);
1609 AC.IsClosedWorldModule = Options.IsClosedWorld;
1610 AC.Allowed = &Allowed;
1611 AC.IsModulePass = true;
1612 AC.DefaultInitializeLiveInternals = false;
1613 AC.IndirectCalleeSpecializationCallback =
1614 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1615 Function &Callee, unsigned NumAssumedCallees) {
1616 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1617 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1618 };
1619 AC.IPOAmendableCB = [](const Function &F) {
1620 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1621 };
1622
1623 Attributor A(Functions, InfoCache, AC);
1624
1625 LLVM_DEBUG({
1626 StringRef LTOPhaseStr = to_string(LTOPhase);
1627 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1628 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1629 << (AC.IsClosedWorldModule ? "" : "not ")
1630 << "assumed to be a closed world.\n";
1631 });
1632
1633 for (auto *F : Functions) {
1634 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1635 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1636 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1637 CallingConv::ID CC = F->getCallingConv();
1638 if (!AMDGPU::isEntryFunctionCC(CC)) {
1639 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1640 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1641 }
1642
1643 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1644 if (!F->isDeclaration() && ST.hasClusters())
1645 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1646
1647 if (ST.hasGFX90AInsts())
1648 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1649
1650 for (auto &I : instructions(F)) {
1651 Value *Ptr = nullptr;
1652 if (auto *LI = dyn_cast<LoadInst>(&I))
1653 Ptr = LI->getPointerOperand();
1654 else if (auto *SI = dyn_cast<StoreInst>(&I))
1655 Ptr = SI->getPointerOperand();
1656 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1657 Ptr = RMW->getPointerOperand();
1658 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1659 Ptr = CmpX->getPointerOperand();
1660
1661 if (Ptr) {
1662 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1663 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1664 }
1665 }
1666 }
1667
1668 return A.run() == ChangeStatus::CHANGED;
1669}
1670} // namespace
1671
1674
1677 AnalysisGetter AG(FAM);
1678
1679 // TODO: Probably preserves CFG
1680 return runImpl(M, AG, TM, Options, LTOPhase) ? PreservedAnalyses::none()
1682}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:115
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:496
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:593
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:661
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:617
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:605
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:636
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:889
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:656
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.