LLVM 23.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "AMDGPUTargetMachine.h"
15#include "GCNSubtarget.h"
17#include "llvm/IR/IntrinsicsAMDGPU.h"
18#include "llvm/IR/IntrinsicsR600.h"
21#include <cstdint>
22
23#define DEBUG_TYPE "amdgpu-attributor"
24
25using namespace llvm;
26
28 "amdgpu-indirect-call-specialization-threshold",
30 "A threshold controls whether an indirect call will be specialized"),
31 cl::init(3));
32
33#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
34
36#include "AMDGPUAttributes.def"
38};
39
40#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
41
44#include "AMDGPUAttributes.def"
47};
48
49#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
50static constexpr std::pair<ImplicitArgumentMask, StringLiteral>
52#include "AMDGPUAttributes.def"
53};
54
55// We do not need to note the x workitem or workgroup id because they are always
56// initialized.
57//
58// TODO: We should not add the attributes if the known compile time workgroup
59// size is 1 for y/z.
61intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
62 bool HasApertureRegs, bool SupportsGetDoorBellID,
63 unsigned CodeObjectVersion) {
64 switch (ID) {
65 case Intrinsic::amdgcn_workitem_id_x:
66 NonKernelOnly = true;
67 return WORKITEM_ID_X;
68 case Intrinsic::amdgcn_workgroup_id_x:
69 NonKernelOnly = true;
70 return WORKGROUP_ID_X;
71 case Intrinsic::amdgcn_workitem_id_y:
72 case Intrinsic::r600_read_tidig_y:
73 return WORKITEM_ID_Y;
74 case Intrinsic::amdgcn_workitem_id_z:
75 case Intrinsic::r600_read_tidig_z:
76 return WORKITEM_ID_Z;
77 case Intrinsic::amdgcn_workgroup_id_y:
78 case Intrinsic::r600_read_tgid_y:
79 return WORKGROUP_ID_Y;
80 case Intrinsic::amdgcn_workgroup_id_z:
81 case Intrinsic::r600_read_tgid_z:
82 return WORKGROUP_ID_Z;
83 case Intrinsic::amdgcn_cluster_id_x:
84 NonKernelOnly = true;
85 return CLUSTER_ID_X;
86 case Intrinsic::amdgcn_cluster_id_y:
87 return CLUSTER_ID_Y;
88 case Intrinsic::amdgcn_cluster_id_z:
89 return CLUSTER_ID_Z;
90 case Intrinsic::amdgcn_lds_kernel_id:
91 return LDS_KERNEL_ID;
92 case Intrinsic::amdgcn_dispatch_ptr:
93 return DISPATCH_PTR;
94 case Intrinsic::amdgcn_dispatch_id:
95 return DISPATCH_ID;
96 case Intrinsic::amdgcn_implicitarg_ptr:
97 return IMPLICIT_ARG_PTR;
98 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
99 // queue_ptr.
100 case Intrinsic::amdgcn_queue_ptr:
101 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
102 return QUEUE_PTR;
103 case Intrinsic::amdgcn_is_shared:
104 case Intrinsic::amdgcn_is_private:
105 if (HasApertureRegs)
106 return NOT_IMPLICIT_INPUT;
107 // Under V5, we need implicitarg_ptr + offsets to access private_base or
108 // shared_base. For pre-V5, however, need to access them through queue_ptr +
109 // offsets.
110 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
111 : QUEUE_PTR;
112 case Intrinsic::amdgcn_wwm:
113 case Intrinsic::amdgcn_strict_wwm:
114 return WHOLE_WAVE_MODE;
115 case Intrinsic::trap:
116 case Intrinsic::debugtrap:
117 case Intrinsic::ubsantrap:
118 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
119 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT
120 : QUEUE_PTR;
121 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
122 return QUEUE_PTR;
123 default:
124 return UNKNOWN_INTRINSIC;
125 }
126}
127
128static bool castRequiresQueuePtr(unsigned SrcAS) {
129 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
130}
131
132static bool isDSAddress(const Constant *C) {
134 if (!GV)
135 return false;
136 unsigned AS = GV->getAddressSpace();
138}
139
140/// Returns true if sanitizer attributes are present on a function.
141static bool hasSanitizerAttributes(const Function &F) {
142 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
143 F.hasFnAttribute(Attribute::SanitizeThread) ||
144 F.hasFnAttribute(Attribute::SanitizeMemory) ||
145 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
146 F.hasFnAttribute(Attribute::SanitizeMemTag);
147}
148
149namespace {
150class AMDGPUInformationCache : public InformationCache {
151public:
152 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
154 SetVector<Function *> *CGSCC, TargetMachine &TM)
155 : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
156 CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}
157
158 TargetMachine &TM;
159
160 enum ConstantStatus : uint8_t {
161 NONE = 0,
162 DS_GLOBAL = 1 << 0,
163 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,
164 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,
165 ADDR_SPACE_CAST_BOTH_TO_FLAT =
166 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT,
167 CS_WORST = DS_GLOBAL | ADDR_SPACE_CAST_BOTH_TO_FLAT,
168 };
169
170 /// Check if the subtarget has aperture regs.
171 bool hasApertureRegs(Function &F) {
172 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
173 return ST.hasApertureRegs();
174 }
175
176 /// Check if the subtarget supports GetDoorbellID.
177 bool supportsGetDoorbellID(Function &F) {
178 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
179 return ST.supportsGetDoorbellID();
180 }
181
182 std::optional<std::pair<unsigned, unsigned>>
183 getFlatWorkGroupSizeAttr(const Function &F) const {
184 auto R = AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
185 if (!R)
186 return std::nullopt;
187 return std::make_pair(R->first, *(R->second));
188 }
189
190 std::pair<unsigned, unsigned>
191 getDefaultFlatWorkGroupSize(const Function &F) const {
192 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
193 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
194 }
195
196 std::pair<unsigned, unsigned>
197 getMaximumFlatWorkGroupRange(const Function &F) {
198 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
199 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
200 }
201
202 SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
203 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
204 return ST.getMaxNumWorkGroups(F);
205 }
206
207 /// Get code object version.
208 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }
209
210 std::optional<std::pair<unsigned, unsigned>>
211 getWavesPerEUAttr(const Function &F) {
212 auto Val = AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu",
213 /*OnlyFirstRequired=*/true);
214 if (!Val)
215 return std::nullopt;
216 if (!Val->second) {
217 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
218 Val->second = ST.getMaxWavesPerEU();
219 }
220 return std::make_pair(Val->first, *(Val->second));
221 }
222
223 unsigned getMaxWavesPerEU(const Function &F) {
224 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
225 return ST.getMaxWavesPerEU();
226 }
227
228 unsigned getMaxAddrSpace() const override {
230 }
231
232private:
233 /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
234 /// local to flat. These casts may require the queue pointer.
235 static uint8_t visitConstExpr(const ConstantExpr *CE) {
236 uint8_t Status = NONE;
237
238 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
239 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
240 if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS)
241 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
242 else if (SrcAS == AMDGPUAS::LOCAL_ADDRESS)
243 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;
244 }
245
246 return Status;
247 }
248
249 /// Get the constant access bitmap for \p C.
250 uint8_t getConstantAccess(const Constant *C) {
251 const auto &It = ConstantStatus.find(C);
252 if (It != ConstantStatus.end())
253 return It->second.value();
254
255 SmallPtrSet<const Constant *, 8> Visited;
257 Worklist.push_back(C);
258 Visited.insert(C);
259
260 uint8_t Result = 0;
261 while (Result != CS_WORST && !Worklist.empty()) {
262 const Constant *CurC = Worklist.pop_back_val();
263
264 std::optional<uint8_t> &CurCResultOrNone = ConstantStatus[CurC];
265 if (CurCResultOrNone) {
266 Result |= CurCResultOrNone.value();
267 continue;
268 }
269 uint8_t CurCResult = 0;
270
271 if (isDSAddress(CurC))
272 CurCResult |= DS_GLOBAL;
273
274 if (const auto *CE = dyn_cast<ConstantExpr>(CurC))
275 CurCResult |= visitConstExpr(CE);
276
277 for (const Use &U : CurC->operands()) {
278 if (const auto *OpC = dyn_cast<Constant>(U)) {
279 if (Visited.insert(OpC).second)
280 Worklist.push_back(OpC);
281 }
282 }
283
284 CurCResultOrNone = CurCResult;
285 Result |= CurCResult;
286 }
287
288 ConstantStatus[C] = Result;
289 return Result;
290 }
291
292public:
293 /// Returns true if \p Fn needs the queue pointer because of \p C.
294 bool needsQueuePtr(const Constant *C, Function &Fn) {
295 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
296 bool HasAperture = hasApertureRegs(Fn);
297
298 // No need to explore the constants.
299 if (!IsNonEntryFunc && HasAperture)
300 return false;
301
302 uint8_t Access = getConstantAccess(C);
303
304 // We need to trap on DS globals in non-entry functions.
305 if (IsNonEntryFunc && (Access & DS_GLOBAL))
306 return true;
307
308 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);
309 }
310
311 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {
312 uint8_t Access = getConstantAccess(C);
313 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;
314 }
315
316private:
317 /// Used to determine if the Constant needs the queue pointer.
318 DenseMap<const Constant *, std::optional<uint8_t>> ConstantStatus;
319 const unsigned CodeObjectVersion;
320};
321
322struct AAAMDAttributes
323 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
324 AbstractAttribute> {
325 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
326 AbstractAttribute>;
327
328 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
329
330 /// Create an abstract attribute view for the position \p IRP.
331 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
332 Attributor &A);
333
334 /// See AbstractAttribute::getName().
335 StringRef getName() const override { return "AAAMDAttributes"; }
336
337 /// See AbstractAttribute::getIdAddr().
338 const char *getIdAddr() const override { return &ID; }
339
340 /// This function should return true if the type of the \p AA is
341 /// AAAMDAttributes.
342 static bool classof(const AbstractAttribute *AA) {
343 return (AA->getIdAddr() == &ID);
344 }
345
346 /// Unique ID (due to the unique address)
347 static const char ID;
348};
349const char AAAMDAttributes::ID = 0;
350
351struct AAUniformWorkGroupSize
352 : public StateWrapper<BooleanState, AbstractAttribute> {
353 using Base = StateWrapper<BooleanState, AbstractAttribute>;
354 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
355
356 /// Create an abstract attribute view for the position \p IRP.
357 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
358 Attributor &A);
359
360 /// See AbstractAttribute::getName().
361 StringRef getName() const override { return "AAUniformWorkGroupSize"; }
362
363 /// See AbstractAttribute::getIdAddr().
364 const char *getIdAddr() const override { return &ID; }
365
366 /// This function should return true if the type of the \p AA is
367 /// AAAMDAttributes.
368 static bool classof(const AbstractAttribute *AA) {
369 return (AA->getIdAddr() == &ID);
370 }
371
372 /// Unique ID (due to the unique address)
373 static const char ID;
374};
375const char AAUniformWorkGroupSize::ID = 0;
376
377struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
378 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
379 : AAUniformWorkGroupSize(IRP, A) {}
380
381 void initialize(Attributor &A) override {
382 Function *F = getAssociatedFunction();
383 CallingConv::ID CC = F->getCallingConv();
384
385 if (CC != CallingConv::AMDGPU_KERNEL)
386 return;
387
388 bool InitialValue = F->hasFnAttribute("uniform-work-group-size");
389
390 if (InitialValue)
391 indicateOptimisticFixpoint();
392 else
393 indicatePessimisticFixpoint();
394 }
395
396 ChangeStatus updateImpl(Attributor &A) override {
397 ChangeStatus Change = ChangeStatus::UNCHANGED;
398
399 auto CheckCallSite = [&](AbstractCallSite CS) {
400 Function *Caller = CS.getInstruction()->getFunction();
401 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
402 << "->" << getAssociatedFunction()->getName() << "\n");
403
404 const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
405 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
406 if (!CallerInfo || !CallerInfo->isValidState())
407 return false;
408
409 Change = Change | clampStateAndIndicateChange(this->getState(),
410 CallerInfo->getState());
411
412 return true;
413 };
414
415 bool AllCallSitesKnown = true;
416 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
417 return indicatePessimisticFixpoint();
418
419 return Change;
420 }
421
422 ChangeStatus manifest(Attributor &A) override {
423 if (!getAssumed())
424 return ChangeStatus::UNCHANGED;
425
426 LLVMContext &Ctx = getAssociatedFunction()->getContext();
427 return A.manifestAttrs(getIRPosition(),
428 {Attribute::get(Ctx, "uniform-work-group-size")},
429 /*ForceReplace=*/true);
430 }
431
432 bool isValidState() const override {
433 // This state is always valid, even when the state is false.
434 return true;
435 }
436
437 const std::string getAsStr(Attributor *) const override {
438 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
439 }
440
441 /// See AbstractAttribute::trackStatistics()
442 void trackStatistics() const override {}
443};
444
445AAUniformWorkGroupSize &
446AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
447 Attributor &A) {
449 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
451 "AAUniformWorkGroupSize is only valid for function position");
452}
453
454struct AAAMDAttributesFunction : public AAAMDAttributes {
455 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
456 : AAAMDAttributes(IRP, A) {}
457
458 void initialize(Attributor &A) override {
459 Function *F = getAssociatedFunction();
460
461 // If the function requires the implicit arg pointer due to sanitizers,
462 // assume it's needed even if explicitly marked as not requiring it.
463 // Flat scratch initialization is needed because `asan_malloc_impl`
464 // calls introduced later in pipeline will have flat scratch accesses.
465 // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
466 // implementation for `asan_malloc_impl` is updated.
467 const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
468 if (HasSanitizerAttrs) {
469 removeAssumedBits(IMPLICIT_ARG_PTR);
470 removeAssumedBits(HOSTCALL_PTR);
471 removeAssumedBits(FLAT_SCRATCH_INIT);
472 }
473
474 for (auto Attr : ImplicitAttrs) {
475 if (HasSanitizerAttrs &&
476 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
477 Attr.first == FLAT_SCRATCH_INIT))
478 continue;
479
480 if (F->hasFnAttribute(Attr.second))
481 addKnownBits(Attr.first);
482 }
483
484 if (F->isDeclaration())
485 return;
486
487 // Ignore functions with graphics calling conventions, these are currently
488 // not allowed to have kernel arguments.
489 if (AMDGPU::isGraphics(F->getCallingConv())) {
490 indicatePessimisticFixpoint();
491 return;
492 }
493 }
494
495 ChangeStatus updateImpl(Attributor &A) override {
496 Function *F = getAssociatedFunction();
497 // The current assumed state used to determine a change.
498 auto OrigAssumed = getAssumed();
499
500 // Check for Intrinsics and propagate attributes.
501 const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
502 *this, this->getIRPosition(), DepClassTy::REQUIRED);
503 if (!AAEdges || !AAEdges->isValidState() ||
504 AAEdges->hasNonAsmUnknownCallee())
505 return indicatePessimisticFixpoint();
506
507 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
508
509 bool NeedsImplicit = false;
510 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
511 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
512 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
513 unsigned COV = InfoCache.getCodeObjectVersion();
514
515 for (Function *Callee : AAEdges->getOptimisticEdges()) {
516 Intrinsic::ID IID = Callee->getIntrinsicID();
517 if (IID == Intrinsic::not_intrinsic) {
518 const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
519 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
520 if (!AAAMD || !AAAMD->isValidState())
521 return indicatePessimisticFixpoint();
522 *this &= *AAAMD;
523 continue;
524 }
525
526 bool NonKernelOnly = false;
527 ImplicitArgumentMask AttrMask =
528 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
529 HasApertureRegs, SupportsGetDoorbellID, COV);
530
531 if (AttrMask == UNKNOWN_INTRINSIC) {
532 // Assume not-nocallback intrinsics may invoke a function which accesses
533 // implicit arguments.
534 //
535 // FIXME: This isn't really the correct check. We want to ensure it
536 // isn't calling any function that may use implicit arguments regardless
537 // of whether it's internal to the module or not.
538 //
539 // TODO: Ignoring callsite attributes.
540 if (!Callee->hasFnAttribute(Attribute::NoCallback))
541 return indicatePessimisticFixpoint();
542 continue;
543 }
544
545 if (AttrMask != NOT_IMPLICIT_INPUT) {
546 if ((IsNonEntryFunc || !NonKernelOnly))
547 removeAssumedBits(AttrMask);
548 }
549 }
550
551 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
552 if (NeedsImplicit)
553 removeAssumedBits(IMPLICIT_ARG_PTR);
554
555 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
556 // Under V5, we need implicitarg_ptr + offsets to access private_base or
557 // shared_base. We do not actually need queue_ptr.
558 if (COV >= 5)
559 removeAssumedBits(IMPLICIT_ARG_PTR);
560 else
561 removeAssumedBits(QUEUE_PTR);
562 }
563
564 if (funcRetrievesMultigridSyncArg(A, COV)) {
565 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
566 "multigrid_sync_arg needs implicitarg_ptr");
567 removeAssumedBits(MULTIGRID_SYNC_ARG);
568 }
569
570 if (funcRetrievesHostcallPtr(A, COV)) {
571 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
572 removeAssumedBits(HOSTCALL_PTR);
573 }
574
575 if (funcRetrievesHeapPtr(A, COV)) {
576 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
577 removeAssumedBits(HEAP_PTR);
578 }
579
580 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
581 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
582 removeAssumedBits(QUEUE_PTR);
583 }
584
585 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
586 removeAssumedBits(LDS_KERNEL_ID);
587 }
588
589 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
590 removeAssumedBits(DEFAULT_QUEUE);
591
592 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
593 removeAssumedBits(COMPLETION_ACTION);
594
595 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))
596 removeAssumedBits(FLAT_SCRATCH_INIT);
597
598 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
599 : ChangeStatus::UNCHANGED;
600 }
601
602 ChangeStatus manifest(Attributor &A) override {
604 LLVMContext &Ctx = getAssociatedFunction()->getContext();
605
606 for (auto Attr : ImplicitAttrs) {
607 if (isKnown(Attr.first))
608 AttrList.push_back(Attribute::get(Ctx, Attr.second));
609 }
610
611 return A.manifestAttrs(getIRPosition(), AttrList,
612 /* ForceReplace */ true);
613 }
614
615 const std::string getAsStr(Attributor *) const override {
616 std::string Str;
617 raw_string_ostream OS(Str);
618 OS << "AMDInfo[";
619 for (auto Attr : ImplicitAttrs)
620 if (isAssumed(Attr.first))
621 OS << ' ' << Attr.second;
622 OS << " ]";
623 return OS.str();
624 }
625
626 /// See AbstractAttribute::trackStatistics()
627 void trackStatistics() const override {}
628
629private:
630 bool checkForQueuePtr(Attributor &A) {
631 Function *F = getAssociatedFunction();
632 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
633
634 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
635
636 bool NeedsQueuePtr = false;
637
638 auto CheckAddrSpaceCasts = [&](Instruction &I) {
639 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
640 if (castRequiresQueuePtr(SrcAS)) {
641 NeedsQueuePtr = true;
642 return false;
643 }
644 return true;
645 };
646
647 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
648
649 // `checkForAllInstructions` is much more cheaper than going through all
650 // instructions, try it first.
651
652 // The queue pointer is not needed if aperture regs is present.
653 if (!HasApertureRegs) {
654 bool UsedAssumedInformation = false;
655 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
656 {Instruction::AddrSpaceCast},
657 UsedAssumedInformation);
658 }
659
660 // If we found that we need the queue pointer, nothing else to do.
661 if (NeedsQueuePtr)
662 return true;
663
664 if (!IsNonEntryFunc && HasApertureRegs)
665 return false;
666
667 for (BasicBlock &BB : *F) {
668 for (Instruction &I : BB) {
669 for (const Use &U : I.operands()) {
670 if (const auto *C = dyn_cast<Constant>(U)) {
671 if (InfoCache.needsQueuePtr(C, *F))
672 return true;
673 }
674 }
675 }
676 }
677
678 return false;
679 }
680
681 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
683 AA::RangeTy Range(Pos, 8);
684 return funcRetrievesImplicitKernelArg(A, Range);
685 }
686
687 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
689 AA::RangeTy Range(Pos, 8);
690 return funcRetrievesImplicitKernelArg(A, Range);
691 }
692
693 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
695 AA::RangeTy Range(Pos, 8);
696 return funcRetrievesImplicitKernelArg(A, Range);
697 }
698
699 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
701 AA::RangeTy Range(Pos, 8);
702 return funcRetrievesImplicitKernelArg(A, Range);
703 }
704
705 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
706 if (COV < 5)
707 return false;
709 return funcRetrievesImplicitKernelArg(A, Range);
710 }
711
712 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
713 if (COV < 5)
714 return false;
716 return funcRetrievesImplicitKernelArg(A, Range);
717 }
718
719 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
720 // Check if this is a call to the implicitarg_ptr builtin and it
721 // is used to retrieve the hostcall pointer. The implicit arg for
722 // hostcall is not used only if every use of the implicitarg_ptr
723 // is a load that clearly does not retrieve any byte of the
724 // hostcall pointer. We check this by tracing all the uses of the
725 // initial call to the implicitarg_ptr intrinsic.
726 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
727 auto &Call = cast<CallBase>(I);
728 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
729 return true;
730
731 const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
732 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
733 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
734 return false;
735
736 return PointerInfoAA->forallInterferingAccesses(
737 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
738 return Acc.getRemoteInst()->isDroppable();
739 });
740 };
741
742 bool UsedAssumedInformation = false;
743 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
744 UsedAssumedInformation);
745 }
746
747 bool funcRetrievesLDSKernelId(Attributor &A) {
748 auto DoesNotRetrieve = [&](Instruction &I) {
749 auto &Call = cast<CallBase>(I);
750 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
751 };
752 bool UsedAssumedInformation = false;
753 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
754 UsedAssumedInformation);
755 }
756
757 // Returns true if FlatScratchInit is needed, i.e., no-flat-scratch-init is
758 // not to be set.
759 bool needFlatScratchInit(Attributor &A) {
760 assert(isAssumed(FLAT_SCRATCH_INIT)); // only called if the bit is still set
761
762 // Check all AddrSpaceCast instructions. FlatScratchInit is needed if
763 // there is a cast from PRIVATE_ADDRESS.
764 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {
765 return cast<AddrSpaceCastInst>(I).getSrcAddressSpace() !=
767 };
768
769 bool UsedAssumedInformation = false;
770 if (!A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,
771 {Instruction::AddrSpaceCast},
772 UsedAssumedInformation))
773 return true;
774
775 // Check for addrSpaceCast from PRIVATE_ADDRESS in constant expressions
776 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
777
778 Function *F = getAssociatedFunction();
779 for (Instruction &I : instructions(F)) {
780 for (const Use &U : I.operands()) {
781 if (const auto *C = dyn_cast<Constant>(U)) {
782 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))
783 return true;
784 }
785 }
786 }
787
788 // Finally check callees.
789
790 // This is called on each callee; false means callee shouldn't have
791 // no-flat-scratch-init.
792 auto CheckForNoFlatScratchInit = [&](Instruction &I) {
793 const auto &CB = cast<CallBase>(I);
794 const Function *Callee = CB.getCalledFunction();
795
796 // Callee == 0 for inline asm or indirect call with known callees.
797 // In the latter case, updateImpl() already checked the callees and we
798 // know their FLAT_SCRATCH_INIT bit is set.
799 // If function has indirect call with unknown callees, the bit is
800 // already removed in updateImpl() and execution won't reach here.
801 if (!Callee)
802 return true;
803
804 return Callee->getIntrinsicID() !=
805 Intrinsic::amdgcn_addrspacecast_nonnull;
806 };
807
808 UsedAssumedInformation = false;
809 // If any callee is false (i.e. need FlatScratchInit),
810 // checkForAllCallLikeInstructions returns false, in which case this
811 // function returns true.
812 return !A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,
813 UsedAssumedInformation);
814 }
815};
816
817AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
818 Attributor &A) {
820 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
821 llvm_unreachable("AAAMDAttributes is only valid for function position");
822}
823
824/// Base class to derive different size ranges.
825struct AAAMDSizeRangeAttribute
826 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
827 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
828
829 StringRef AttrName;
830
831 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
832 StringRef AttrName)
833 : Base(IRP, 32), AttrName(AttrName) {}
834
835 /// See AbstractAttribute::trackStatistics()
836 void trackStatistics() const override {}
837
838 template <class AttributeImpl> ChangeStatus updateImplImpl(Attributor &A) {
839 ChangeStatus Change = ChangeStatus::UNCHANGED;
840
841 auto CheckCallSite = [&](AbstractCallSite CS) {
842 Function *Caller = CS.getInstruction()->getFunction();
843 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
844 << "->" << getAssociatedFunction()->getName() << '\n');
845
846 const auto *CallerInfo = A.getAAFor<AttributeImpl>(
847 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
848 if (!CallerInfo || !CallerInfo->isValidState())
849 return false;
850
851 Change |=
852 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
853
854 return true;
855 };
856
857 bool AllCallSitesKnown = true;
858 if (!A.checkForAllCallSites(CheckCallSite, *this,
859 /*RequireAllCallSites=*/true,
860 AllCallSitesKnown))
861 return indicatePessimisticFixpoint();
862
863 return Change;
864 }
865
866 /// Clamp the assumed range to the default value ([Min, Max]) and emit the
867 /// attribute if it is not same as default.
869 emitAttributeIfNotDefaultAfterClamp(Attributor &A,
870 std::pair<unsigned, unsigned> Default) {
871 auto [Min, Max] = Default;
872 unsigned Lower = getAssumed().getLower().getZExtValue();
873 unsigned Upper = getAssumed().getUpper().getZExtValue();
874
875 // Clamp the range to the default value.
876 if (Lower < Min)
877 Lower = Min;
878 if (Upper > Max + 1)
879 Upper = Max + 1;
880
881 // No manifest if the value is invalid or same as default after clamp.
882 if ((Lower == Min && Upper == Max + 1) || (Upper < Lower))
883 return ChangeStatus::UNCHANGED;
884
885 Function *F = getAssociatedFunction();
886 LLVMContext &Ctx = F->getContext();
887 SmallString<10> Buffer;
888 raw_svector_ostream OS(Buffer);
889 OS << Lower << ',' << Upper - 1;
890 return A.manifestAttrs(getIRPosition(),
891 {Attribute::get(Ctx, AttrName, OS.str())},
892 /*ForceReplace=*/true);
893 }
894
895 const std::string getAsStr(Attributor *) const override {
896 std::string Str;
897 raw_string_ostream OS(Str);
898 OS << getName() << '[';
899 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
900 OS << ']';
901 return OS.str();
902 }
903};
904
905/// Propagate amdgpu-flat-work-group-size attribute.
906struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
907 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
908 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
909
910 void initialize(Attributor &A) override {
911 Function *F = getAssociatedFunction();
912 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
913
914 bool HasAttr = false;
915 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);
916 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);
917
918 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {
919 // We only consider an attribute that is not max range because the front
920 // end always emits the attribute, unfortunately, and sometimes it emits
921 // the max range.
922 if (*Attr != MaxRange) {
923 Range = *Attr;
924 HasAttr = true;
925 }
926 }
927
928 // We don't want to directly clamp the state if it's the max range because
929 // that is basically the worst state.
930 if (Range == MaxRange)
931 return;
932
933 auto [Min, Max] = Range;
934 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));
935 IntegerRangeState IRS(CR);
936 clampStateAndIndicateChange(this->getState(), IRS);
937
938 if (HasAttr || AMDGPU::isEntryFunctionCC(F->getCallingConv()))
939 indicateOptimisticFixpoint();
940 }
941
942 ChangeStatus updateImpl(Attributor &A) override {
943 return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
944 }
945
946 /// Create an abstract attribute view for the position \p IRP.
947 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
948 Attributor &A);
949
950 ChangeStatus manifest(Attributor &A) override {
951 Function *F = getAssociatedFunction();
952 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
953 return emitAttributeIfNotDefaultAfterClamp(
954 A, InfoCache.getMaximumFlatWorkGroupRange(*F));
955 }
956
957 /// See AbstractAttribute::getName()
958 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }
959
960 /// See AbstractAttribute::getIdAddr()
961 const char *getIdAddr() const override { return &ID; }
962
963 /// This function should return true if the type of the \p AA is
964 /// AAAMDFlatWorkGroupSize
965 static bool classof(const AbstractAttribute *AA) {
966 return (AA->getIdAddr() == &ID);
967 }
968
969 /// Unique ID (due to the unique address)
970 static const char ID;
971};
972
973const char AAAMDFlatWorkGroupSize::ID = 0;
974
975AAAMDFlatWorkGroupSize &
976AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
977 Attributor &A) {
979 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
981 "AAAMDFlatWorkGroupSize is only valid for function position");
982}
983
984struct TupleDecIntegerRangeState : public AbstractState {
985 DecIntegerState<uint32_t> X, Y, Z;
986
987 bool isValidState() const override {
988 return X.isValidState() && Y.isValidState() && Z.isValidState();
989 }
990
991 bool isAtFixpoint() const override {
992 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();
993 }
994
995 ChangeStatus indicateOptimisticFixpoint() override {
996 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |
997 Z.indicateOptimisticFixpoint();
998 }
999
1000 ChangeStatus indicatePessimisticFixpoint() override {
1001 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |
1002 Z.indicatePessimisticFixpoint();
1003 }
1004
1005 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {
1006 X ^= Other.X;
1007 Y ^= Other.Y;
1008 Z ^= Other.Z;
1009 return *this;
1010 }
1011
1012 bool operator==(const TupleDecIntegerRangeState &Other) const {
1013 return X == Other.X && Y == Other.Y && Z == Other.Z;
1014 }
1015
1016 TupleDecIntegerRangeState &getAssumed() { return *this; }
1017 const TupleDecIntegerRangeState &getAssumed() const { return *this; }
1018};
1019
1020using AAAMDMaxNumWorkgroupsState =
1021 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;
1022
1023/// Propagate amdgpu-max-num-workgroups attribute.
1024struct AAAMDMaxNumWorkgroups
1025 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {
1026 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;
1027
1028 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1029
1030 void initialize(Attributor &A) override {
1031 Function *F = getAssociatedFunction();
1032 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1033
1034 SmallVector<unsigned> MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);
1035
1036 X.takeKnownMinimum(MaxNumWorkgroups[0]);
1037 Y.takeKnownMinimum(MaxNumWorkgroups[1]);
1038 Z.takeKnownMinimum(MaxNumWorkgroups[2]);
1039
1040 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1041 indicatePessimisticFixpoint();
1042 }
1043
1044 ChangeStatus updateImpl(Attributor &A) override {
1045 ChangeStatus Change = ChangeStatus::UNCHANGED;
1046
1047 auto CheckCallSite = [&](AbstractCallSite CS) {
1048 Function *Caller = CS.getInstruction()->getFunction();
1049 LLVM_DEBUG(dbgs() << "[AAAMDMaxNumWorkgroups] Call " << Caller->getName()
1050 << "->" << getAssociatedFunction()->getName() << '\n');
1051
1052 const auto *CallerInfo = A.getAAFor<AAAMDMaxNumWorkgroups>(
1053 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1054 if (!CallerInfo || !CallerInfo->isValidState())
1055 return false;
1056
1057 Change |=
1058 clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
1059 return true;
1060 };
1061
1062 bool AllCallSitesKnown = true;
1063 if (!A.checkForAllCallSites(CheckCallSite, *this,
1064 /*RequireAllCallSites=*/true,
1065 AllCallSitesKnown))
1066 return indicatePessimisticFixpoint();
1067
1068 return Change;
1069 }
1070
1071 /// Create an abstract attribute view for the position \p IRP.
1072 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,
1073 Attributor &A);
1074
1075 ChangeStatus manifest(Attributor &A) override {
1076 Function *F = getAssociatedFunction();
1077 LLVMContext &Ctx = F->getContext();
1078 SmallString<32> Buffer;
1079 raw_svector_ostream OS(Buffer);
1080 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();
1081
1082 // TODO: Should annotate loads of the group size for this to do anything
1083 // useful.
1084 return A.manifestAttrs(
1085 getIRPosition(),
1086 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},
1087 /* ForceReplace= */ true);
1088 }
1089
1090 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }
1091
1092 const std::string getAsStr(Attributor *) const override {
1093 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";
1094 raw_string_ostream OS(Buffer);
1095 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()
1096 << ']';
1097 return OS.str();
1098 }
1099
1100 const char *getIdAddr() const override { return &ID; }
1101
1102 /// This function should return true if the type of the \p AA is
1103 /// AAAMDMaxNumWorkgroups
1104 static bool classof(const AbstractAttribute *AA) {
1105 return (AA->getIdAddr() == &ID);
1106 }
1107
1108 void trackStatistics() const override {}
1109
1110 /// Unique ID (due to the unique address)
1111 static const char ID;
1112};
1113
1114const char AAAMDMaxNumWorkgroups::ID = 0;
1115
1116AAAMDMaxNumWorkgroups &
1117AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {
1119 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);
1120 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");
1121}
1122
1123/// Propagate amdgpu-waves-per-eu attribute.
1124struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
1125 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
1126 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
1127
1128 void initialize(Attributor &A) override {
1129 Function *F = getAssociatedFunction();
1130 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1131
1132 // If the attribute exists, we will honor it if it is not the default.
1133 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1134 std::pair<unsigned, unsigned> MaxWavesPerEURange{
1135 1U, InfoCache.getMaxWavesPerEU(*F)};
1136 if (*Attr != MaxWavesPerEURange) {
1137 auto [Min, Max] = *Attr;
1138 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1139 IntegerRangeState RangeState(Range);
1140 this->getState() = RangeState;
1141 indicateOptimisticFixpoint();
1142 return;
1143 }
1144 }
1145
1146 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1147 indicatePessimisticFixpoint();
1148 }
1149
1150 ChangeStatus updateImpl(Attributor &A) override {
1151 ChangeStatus Change = ChangeStatus::UNCHANGED;
1152
1153 auto CheckCallSite = [&](AbstractCallSite CS) {
1154 Function *Caller = CS.getInstruction()->getFunction();
1155 Function *Func = getAssociatedFunction();
1156 LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
1157 << "->" << Func->getName() << '\n');
1158 (void)Func;
1159
1160 const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
1161 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1162 if (!CallerAA || !CallerAA->isValidState())
1163 return false;
1164
1165 ConstantRange Assumed = getAssumed();
1166 unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1167 CallerAA->getAssumed().getLower().getZExtValue());
1168 unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1169 CallerAA->getAssumed().getUpper().getZExtValue());
1170 ConstantRange Range(APInt(32, Min), APInt(32, Max));
1171 IntegerRangeState RangeState(Range);
1172 getState() = RangeState;
1173 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1174 : ChangeStatus::CHANGED;
1175
1176 return true;
1177 };
1178
1179 bool AllCallSitesKnown = true;
1180 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
1181 return indicatePessimisticFixpoint();
1182
1183 return Change;
1184 }
1185
1186 /// Create an abstract attribute view for the position \p IRP.
1187 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
1188 Attributor &A);
1189
1190 ChangeStatus manifest(Attributor &A) override {
1191 Function *F = getAssociatedFunction();
1192 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
1193 return emitAttributeIfNotDefaultAfterClamp(
1194 A, {1U, InfoCache.getMaxWavesPerEU(*F)});
1195 }
1196
1197 /// See AbstractAttribute::getName()
1198 StringRef getName() const override { return "AAAMDWavesPerEU"; }
1199
1200 /// See AbstractAttribute::getIdAddr()
1201 const char *getIdAddr() const override { return &ID; }
1202
1203 /// This function should return true if the type of the \p AA is
1204 /// AAAMDWavesPerEU
1205 static bool classof(const AbstractAttribute *AA) {
1206 return (AA->getIdAddr() == &ID);
1207 }
1208
1209 /// Unique ID (due to the unique address)
1210 static const char ID;
1211};
1212
1213const char AAAMDWavesPerEU::ID = 0;
1214
1215AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
1216 Attributor &A) {
1218 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
1219 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
1220}
1221
1222/// Compute the minimum number of AGPRs required to allocate the inline asm.
1223static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
1224 const CallBase &Call) {
1225 unsigned ArgNo = 0;
1226 unsigned ResNo = 0;
1227 unsigned AGPRDefCount = 0;
1228 unsigned AGPRUseCount = 0;
1229 unsigned MaxPhysReg = 0;
1230 const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
1231
1232 // TODO: Overestimates due to not accounting for tied operands
1233 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
1234 Type *Ty = nullptr;
1235 switch (CI.Type) {
1236 case InlineAsm::isOutput: {
1237 Ty = Call.getType();
1238 if (auto *STy = dyn_cast<StructType>(Ty))
1239 Ty = STy->getElementType(ResNo);
1240 ++ResNo;
1241 break;
1242 }
1243 case InlineAsm::isInput: {
1244 Ty = Call.getArgOperand(ArgNo++)->getType();
1245 break;
1246 }
1247 case InlineAsm::isLabel:
1248 continue;
1250 // Parse the physical register reference.
1251 break;
1252 }
1253
1254 for (StringRef Code : CI.Codes) {
1255 unsigned RegCount = 0;
1256 if (Code.starts_with("a")) {
1257 // Virtual register, compute number of registers based on the type.
1258 //
1259 // We ought to be going through TargetLowering to get the number of
1260 // registers, but we should avoid the dependence on CodeGen here.
1261 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
1262 } else {
1263 // Physical register reference
1264 auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
1265 if (Kind == 'a') {
1266 RegCount = NumRegs;
1267 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
1268 }
1269
1270 continue;
1271 }
1272
1273 if (CI.Type == InlineAsm::isOutput) {
1274 // Apply tuple alignment requirement
1275 //
1276 // TODO: This is more conservative than necessary.
1277 AGPRDefCount = alignTo(AGPRDefCount, RegCount);
1278
1279 AGPRDefCount += RegCount;
1280 if (CI.isEarlyClobber) {
1281 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1282 AGPRUseCount += RegCount;
1283 }
1284 } else {
1285 AGPRUseCount = alignTo(AGPRUseCount, RegCount);
1286 AGPRUseCount += RegCount;
1287 }
1288 }
1289 }
1290
1291 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
1292
1293 // TODO: This is overly conservative. If there are any physical registers,
1294 // allocate any virtual registers after them so we don't have to solve optimal
1295 // packing.
1296 return std::min(MaxVirtReg + MaxPhysReg, 256u);
1297}
1298
1299struct AAAMDGPUMinAGPRAlloc
1300 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1301 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1302 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1303
1304 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1305 Attributor &A) {
1307 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1309 "AAAMDGPUMinAGPRAlloc is only valid for function position");
1310 }
1311
1312 void initialize(Attributor &A) override {
1313 Function *F = getAssociatedFunction();
1314 auto [MinNumAGPR, MaxNumAGPR] =
1315 AMDGPU::getIntegerPairAttribute(*F, "amdgpu-agpr-alloc", {~0u, ~0u},
1316 /*OnlyFirstRequired=*/true);
1317 if (MinNumAGPR == 0)
1318 indicateOptimisticFixpoint();
1319 }
1320
1321 const std::string getAsStr(Attributor *A) const override {
1322 std::string Str = "amdgpu-agpr-alloc=";
1323 raw_string_ostream OS(Str);
1324 OS << getAssumed();
1325 return OS.str();
1326 }
1327
1328 void trackStatistics() const override {}
1329
1330 ChangeStatus updateImpl(Attributor &A) override {
1331 DecIntegerState<> Maximum;
1332
1333 // Check for cases which require allocation of AGPRs. The only cases where
1334 // AGPRs are required are if there are direct references to AGPRs, so inline
1335 // assembly and special intrinsics.
1336 auto CheckForMinAGPRAllocs = [&](Instruction &I) {
1337 const auto &CB = cast<CallBase>(I);
1338 const Value *CalleeOp = CB.getCalledOperand();
1339
1340 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1341 // Technically, the inline asm could be invoking a call to an unknown
1342 // external function that requires AGPRs, but ignore that.
1343 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1344 Maximum.takeAssumedMaximum(NumRegs);
1345 return true;
1346 }
1347 switch (CB.getIntrinsicID()) {
1349 break;
1350 case Intrinsic::write_register:
1351 case Intrinsic::read_register:
1352 case Intrinsic::read_volatile_register: {
1353 const MDString *RegName = cast<MDString>(
1355 cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
1356 ->getOperand(0));
1357 auto [Kind, RegIdx, NumRegs] =
1359 if (Kind == 'a')
1360 Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1361
1362 return true;
1363 }
1364 // Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have
1365 // the nocallback attribute, so the AMDGPU attributor can conservatively
1366 // drop all implicitly-known inputs and AGPR allocation information. Make
1367 // sure we still infer that no implicit inputs are required and that the
1368 // AGPR allocation stays at zero. Trap-like intrinsics may invoke a
1369 // function which requires AGPRs, so we need to check if the called
1370 // function has the "trap-func-name" attribute.
1371 case Intrinsic::trap:
1372 case Intrinsic::debugtrap:
1373 case Intrinsic::ubsantrap:
1374 return CB.hasFnAttr(Attribute::NoCallback) ||
1375 !CB.hasFnAttr("trap-func-name");
1376 default:
1377 // Some intrinsics may use AGPRs, but if we have a choice, we are not
1378 // required to use AGPRs.
1379 // Assume !nocallback intrinsics may call a function which requires
1380 // AGPRs.
1381 return CB.hasFnAttr(Attribute::NoCallback);
1382 }
1383
1384 // TODO: Handle callsite attributes
1385 auto *CBEdges = A.getAAFor<AACallEdges>(
1386 *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1387 if (!CBEdges || CBEdges->hasUnknownCallee()) {
1389 return false;
1390 }
1391
1392 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1393 const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1394 *this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1395 if (!CalleeInfo || !CalleeInfo->isValidState()) {
1397 return false;
1398 }
1399
1400 Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1401 }
1402
1403 return true;
1404 };
1405
1406 bool UsedAssumedInformation = false;
1407 if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
1408 UsedAssumedInformation))
1409 return indicatePessimisticFixpoint();
1410
1411 return clampStateAndIndicateChange(getState(), Maximum);
1412 }
1413
1414 ChangeStatus manifest(Attributor &A) override {
1415 LLVMContext &Ctx = getAssociatedFunction()->getContext();
1416 SmallString<4> Buffer;
1417 raw_svector_ostream OS(Buffer);
1418 OS << getAssumed();
1419
1420 return A.manifestAttrs(
1421 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
1422 }
1423
1424 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
1425 const char *getIdAddr() const override { return &ID; }
1426
1427 /// This function should return true if the type of the \p AA is
1428 /// AAAMDGPUMinAGPRAllocs
1429 static bool classof(const AbstractAttribute *AA) {
1430 return (AA->getIdAddr() == &ID);
1431 }
1432
1433 static const char ID;
1434};
1435
1436const char AAAMDGPUMinAGPRAlloc::ID = 0;
1437
1438/// An abstract attribute to propagate the function attribute
1439/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
1440struct AAAMDGPUClusterDims
1441 : public StateWrapper<BooleanState, AbstractAttribute> {
1442 using Base = StateWrapper<BooleanState, AbstractAttribute>;
1443 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1444
1445 /// Create an abstract attribute view for the position \p IRP.
1446 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,
1447 Attributor &A);
1448
1449 /// See AbstractAttribute::getName().
1450 StringRef getName() const override { return "AAAMDGPUClusterDims"; }
1451
1452 /// See AbstractAttribute::getIdAddr().
1453 const char *getIdAddr() const override { return &ID; }
1454
1455 /// This function should return true if the type of the \p AA is
1456 /// AAAMDGPUClusterDims.
1457 static bool classof(const AbstractAttribute *AA) {
1458 return AA->getIdAddr() == &ID;
1459 }
1460
1461 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;
1462
1463 /// Unique ID (due to the unique address)
1464 static const char ID;
1465};
1466
1467const char AAAMDGPUClusterDims::ID = 0;
1468
1469struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1470 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)
1471 : AAAMDGPUClusterDims(IRP, A) {}
1472
1473 void initialize(Attributor &A) override {
1474 Function *F = getAssociatedFunction();
1475 assert(F && "empty associated function");
1476
1478
1479 // No matter what a kernel function has, it is final.
1480 if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1481 if (Attr.isUnknown())
1482 indicatePessimisticFixpoint();
1483 else
1484 indicateOptimisticFixpoint();
1485 }
1486 }
1487
1488 const std::string getAsStr(Attributor *A) const override {
1489 if (!getAssumed() || Attr.isUnknown())
1490 return "unknown";
1491 if (Attr.isNoCluster())
1492 return "no";
1493 if (Attr.isVariableDims())
1494 return "variable";
1495 return Attr.to_string();
1496 }
1497
1498 void trackStatistics() const override {}
1499
1500 ChangeStatus updateImpl(Attributor &A) override {
1501 auto OldState = Attr;
1502
1503 auto CheckCallSite = [&](AbstractCallSite CS) {
1504 const auto *CallerAA = A.getAAFor<AAAMDGPUClusterDims>(
1505 *this, IRPosition::function(*CS.getInstruction()->getFunction()),
1506 DepClassTy::REQUIRED);
1507 if (!CallerAA || !CallerAA->isValidState())
1508 return false;
1509
1510 return merge(CallerAA->getClusterDims());
1511 };
1512
1513 bool UsedAssumedInformation = false;
1514 if (!A.checkForAllCallSites(CheckCallSite, *this,
1515 /*RequireAllCallSites=*/true,
1516 UsedAssumedInformation))
1517 return indicatePessimisticFixpoint();
1518
1519 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1520 }
1521
1522 ChangeStatus manifest(Attributor &A) override {
1523 if (Attr.isUnknown())
1524 return ChangeStatus::UNCHANGED;
1525 return A.manifestAttrs(
1526 getIRPosition(),
1527 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,
1528 Attr.to_string())},
1529 /*ForceReplace=*/true);
1530 }
1531
1532 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {
1533 return Attr;
1534 }
1535
1536private:
1537 bool merge(const AMDGPU::ClusterDimsAttr &Other) {
1538 // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1539 // propagation.
1540 if (Attr.isUnknown() && Other.isUnknown())
1541 return true;
1542
1543 // Case 2: The other is determined, but we are unknown yet, we simply take
1544 // the other's value.
1545 if (Attr.isUnknown()) {
1546 Attr = Other;
1547 return true;
1548 }
1549
1550 // Case 3: We are determined but the other is unknown yet, we simply keep
1551 // everything unchanged.
1552 if (Other.isUnknown())
1553 return true;
1554
1555 // After this point, both are determined.
1556
1557 // Case 4: If they are same, we do nothing.
1558 if (Attr == Other)
1559 return true;
1560
1561 // Now they are not same.
1562
1563 // Case 5: If either of us uses cluster (but not both; otherwise case 4
1564 // would hold), then it is unknown whether cluster will be used, and the
1565 // state is final, unlike case 1.
1566 if (Attr.isNoCluster() || Other.isNoCluster()) {
1567 Attr.setUnknown();
1568 return false;
1569 }
1570
1571 // Case 6: Both of us use cluster, but the dims are different, so the result
1572 // is, cluster is used, but we just don't have a fixed dims.
1573 Attr.setVariableDims();
1574 return true;
1575 }
1576
1577 AMDGPU::ClusterDimsAttr Attr;
1578
1579 static constexpr char AttrName[] = "amdgpu-cluster-dims";
1580};
1581
1582AAAMDGPUClusterDims &
1583AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {
1585 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);
1586 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");
1587}
1588
1589static bool runImpl(SetVector<Function *> &Functions, bool IsModulePass,
1590 bool DeleteFns, Module &M, AnalysisGetter &AG,
1591 TargetMachine &TM, AMDGPUAttributorOptions Options,
1592 ThinOrFullLTOPhase LTOPhase) {
1593
1594 CallGraphUpdater CGUpdater;
1596 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);
1597 DenseSet<const char *> Allowed(
1598 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
1599 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1600 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1601 &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1604 &AAAMDGPUClusterDims::ID, &AAAlign::ID});
1605
1606 AttributorConfig AC(CGUpdater);
1607 AC.IsClosedWorldModule = Options.IsClosedWorld;
1608 AC.Allowed = &Allowed;
1609 AC.IsModulePass = IsModulePass;
1610 AC.DeleteFns = DeleteFns;
1611 AC.DefaultInitializeLiveInternals = false;
1612 AC.IndirectCalleeSpecializationCallback =
1613 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
1614 Function &Callee, unsigned NumAssumedCallees) {
1615 return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
1616 (NumAssumedCallees <= IndirectCallSpecializationThreshold);
1617 };
1618 AC.IPOAmendableCB = [](const Function &F) {
1619 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
1620 };
1621
1622 Attributor A(Functions, InfoCache, AC);
1623
1624 LLVM_DEBUG({
1625 StringRef LTOPhaseStr = to_string(LTOPhase);
1626 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'
1627 << "[AMDGPUAttributor] Module " << M.getName() << " is "
1628 << (AC.IsClosedWorldModule ? "" : "not ")
1629 << "assumed to be a closed world.\n";
1630 });
1631
1632 for (auto *F : Functions) {
1633 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(*F));
1634 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(*F));
1635 A.getOrCreateAAFor<AAAMDMaxNumWorkgroups>(IRPosition::function(*F));
1636 CallingConv::ID CC = F->getCallingConv();
1637 if (!AMDGPU::isEntryFunctionCC(CC)) {
1638 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(*F));
1639 A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
1640 }
1641
1642 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
1643 if (!F->isDeclaration() && ST.hasClusters())
1644 A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
1645
1646 if (ST.hasGFX90AInsts())
1647 A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
1648
1649 for (auto &I : instructions(F)) {
1650 Value *Ptr = nullptr;
1651 if (auto *LI = dyn_cast<LoadInst>(&I))
1652 Ptr = LI->getPointerOperand();
1653 else if (auto *SI = dyn_cast<StoreInst>(&I))
1654 Ptr = SI->getPointerOperand();
1655 else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
1656 Ptr = RMW->getPointerOperand();
1657 else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
1658 Ptr = CmpX->getPointerOperand();
1659
1660 if (Ptr) {
1661 A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
1662 A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
1663 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Ptr)) {
1664 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
1665 A.getOrCreateAAFor<AAAlign>(IRPosition::value(*Ptr));
1666 }
1667 }
1668 }
1669 }
1670
1671 return A.run() == ChangeStatus::CHANGED;
1672}
1673} // namespace
1674
1677
1680 AnalysisGetter AG(FAM);
1681
1682 SetVector<Function *> Functions;
1683 for (Function &F : M) {
1684 if (!F.isDeclaration())
1685 Functions.insert(&F);
1686 }
1687
1688 // TODO: Probably preserves CFG
1689 return runImpl(Functions, /*IsModulePass=*/true, /*DeleteFns=*/true, M, AG,
1690 TM, Options, LTOPhase)
1693}
1694
1697 LazyCallGraph &CG,
1698 CGSCCUpdateResult &UR) {
1699
1701 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1702 AnalysisGetter AG(FAM);
1703
1704 SetVector<Function *> Functions;
1705 for (LazyCallGraph::Node &N : C) {
1706 Function *F = &N.getFunction();
1707 if (!F->isIntrinsic())
1708 Functions.insert(F);
1709 }
1710
1712 Module *M = C.begin()->getFunction().getParent();
1713 // In the CGSCC pipeline, avoid untracked call graph modifications by
1714 // disabling function deletion, mirroring the generic AttributorCGSCCPass.
1715 return runImpl(Functions, /*IsModulePass=*/false, /*DeleteFns=*/false, *M, AG,
1719}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
static bool hasSanitizerAttributes(const Function &F)
Returns true if sanitizer attributes are present on a function.
ImplicitArgumentMask
@ UNKNOWN_INTRINSIC
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
DXIL Resource Access
@ Default
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
AMD GCN specific subclass of TargetSubtarget.
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
static StringRef getName(Value *V)
Basic Register Allocator
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR)
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ClusterDimsAttr get(const Function &F)
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Value * getArgOperand(unsigned i) const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
const APInt & getLower() const
Return the lower value for this range.
const APInt & getUpper() const
Return the upper value for this range.
This is an important base class in LLVM.
Definition Constant.h:43
A proxy from a FunctionAnalysisManager to an SCC.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
A node in the call graph.
An SCC of the call graph.
A lazily constructed view of the call graph of a module.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:280
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
A vector that has set insertion semantics.
Definition SetVector.h:57
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void push_back(const T &Elt)
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
op_range operands()
Definition User.h:267
LLVM_ABI bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition User.cpp:119
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
E & operator^=(E &LHS, E RHS)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
initializer< Ty > init(const Ty &Val)
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
AnalysisManager< LazyCallGraph::SCC, LazyCallGraph & > CGSCCAnalysisManager
The CGSCC analysis manager.
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
@ None
No LTO/ThinLTO behavior needed.
Definition Pass.h:79
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
const char * to_string(ThinOrFullLTOPhase Phase)
Definition Pass.cpp:301
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
ChangeStatus
{
Definition Attributor.h:508
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
#define N
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
static LLVM_ABI const char ID
Unique ID (due to the unique address)
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
The fixpoint analysis framework that orchestrates the attribute deduction.
Support structure for SCC passes to communicate updates the call graph back to the CGSCC pass manager...
DecIntegerState & takeAssumedMaximum(base_t Value)
Take maximum of assumed and Value.
Helper to describe and deal with positions in the LLVM-IR.
Definition Attributor.h:605
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition Attributor.h:673
static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)
Create a position describing the value of V.
Definition Attributor.h:629
@ IRP_FUNCTION
An attribute for a function (scope).
Definition Attributor.h:617
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition Attributor.h:648
Kind getPositionKind() const
Return the associated position kind.
Definition Attributor.h:901
static const IRPosition callsite_function(const CallBase &CB)
Create a position describing the function scope of CB.
Definition Attributor.h:668
Data structure to hold cached (LLVM-IR) information.
bool isValidState() const override
See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...
ChangeStatus indicatePessimisticFixpoint() override
See AbstractState::indicatePessimisticFixpoint(...)
Helper to tie a abstract state implementation to an abstract attribute.