LLVM 17.0.0git
AMDGPUAttributor.cpp
Go to the documentation of this file.
1//===- AMDGPUAttributor.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
18#include "llvm/IR/IntrinsicsAMDGPU.h"
19#include "llvm/IR/IntrinsicsR600.h"
22
23#define DEBUG_TYPE "amdgpu-attributor"
24
25namespace llvm {
27}
28
29using namespace llvm;
30
31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,
32
34 #include "AMDGPUAttributes.def"
36};
37
38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
39
42 #include "AMDGPUAttributes.def"
44};
45
46#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
47static constexpr std::pair<ImplicitArgumentMask,
49 #include "AMDGPUAttributes.def"
50};
51
52// We do not need to note the x workitem or workgroup id because they are always
53// initialized.
54//
55// TODO: We should not add the attributes if the known compile time workgroup
56// size is 1 for y/z.
58intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
59 bool HasApertureRegs, bool SupportsGetDoorBellID,
60 unsigned CodeObjectVersion) {
61 switch (ID) {
62 case Intrinsic::amdgcn_workitem_id_x:
63 NonKernelOnly = true;
64 return WORKITEM_ID_X;
65 case Intrinsic::amdgcn_workgroup_id_x:
66 NonKernelOnly = true;
67 return WORKGROUP_ID_X;
68 case Intrinsic::amdgcn_workitem_id_y:
69 case Intrinsic::r600_read_tidig_y:
70 return WORKITEM_ID_Y;
71 case Intrinsic::amdgcn_workitem_id_z:
72 case Intrinsic::r600_read_tidig_z:
73 return WORKITEM_ID_Z;
74 case Intrinsic::amdgcn_workgroup_id_y:
75 case Intrinsic::r600_read_tgid_y:
76 return WORKGROUP_ID_Y;
77 case Intrinsic::amdgcn_workgroup_id_z:
78 case Intrinsic::r600_read_tgid_z:
79 return WORKGROUP_ID_Z;
80 case Intrinsic::amdgcn_lds_kernel_id:
81 return LDS_KERNEL_ID;
82 case Intrinsic::amdgcn_dispatch_ptr:
83 return DISPATCH_PTR;
84 case Intrinsic::amdgcn_dispatch_id:
85 return DISPATCH_ID;
86 case Intrinsic::amdgcn_implicitarg_ptr:
87 return IMPLICIT_ARG_PTR;
88 // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
89 // queue_ptr.
90 case Intrinsic::amdgcn_queue_ptr:
91 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
92 return QUEUE_PTR;
93 case Intrinsic::amdgcn_is_shared:
94 case Intrinsic::amdgcn_is_private:
95 if (HasApertureRegs)
96 return NOT_IMPLICIT_INPUT;
97 // Under V5, we need implicitarg_ptr + offsets to access private_base or
98 // shared_base. For pre-V5, however, need to access them through queue_ptr +
99 // offsets.
100 return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
101 QUEUE_PTR;
102 case Intrinsic::trap:
103 if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
104 return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
105 QUEUE_PTR;
106 NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
107 return QUEUE_PTR;
108 default:
109 return NOT_IMPLICIT_INPUT;
110 }
111}
112
113static bool castRequiresQueuePtr(unsigned SrcAS) {
114 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
115}
116
117static bool isDSAddress(const Constant *C) {
118 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
119 if (!GV)
120 return false;
121 unsigned AS = GV->getAddressSpace();
123}
124
125/// Returns true if the function requires the implicit argument be passed
126/// regardless of the function contents.
127static bool funcRequiresHostcallPtr(const Function &F) {
128 // Sanitizers require the hostcall buffer passed in the implicit arguments.
129 return F.hasFnAttribute(Attribute::SanitizeAddress) ||
130 F.hasFnAttribute(Attribute::SanitizeThread) ||
131 F.hasFnAttribute(Attribute::SanitizeMemory) ||
132 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
133 F.hasFnAttribute(Attribute::SanitizeMemTag);
134}
135
136namespace {
137class AMDGPUInformationCache : public InformationCache {
138public:
139 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
140 BumpPtrAllocator &Allocator,
143 CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {}
144
146
147 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
148
149 /// Check if the subtarget has aperture regs.
150 bool hasApertureRegs(Function &F) {
151 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
152 return ST.hasApertureRegs();
153 }
154
155 /// Check if the subtarget supports GetDoorbellID.
156 bool supportsGetDoorbellID(Function &F) {
157 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
158 return ST.supportsGetDoorbellID();
159 }
160
161 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
162 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
163 return ST.getFlatWorkGroupSizes(F);
164 }
165
166 std::pair<unsigned, unsigned>
167 getMaximumFlatWorkGroupRange(const Function &F) {
168 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
169 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
170 }
171
172 /// Get code object version.
173 unsigned getCodeObjectVersion() const {
174 return CodeObjectVersion;
175 }
176
177private:
178 /// Check if the ConstantExpr \p CE requires the queue pointer.
179 static bool visitConstExpr(const ConstantExpr *CE) {
180 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
181 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
182 return castRequiresQueuePtr(SrcAS);
183 }
184 return false;
185 }
186
187 /// Get the constant access bitmap for \p C.
188 uint8_t getConstantAccess(const Constant *C) {
189 auto It = ConstantStatus.find(C);
190 if (It != ConstantStatus.end())
191 return It->second;
192
193 uint8_t Result = 0;
194 if (isDSAddress(C))
195 Result = DS_GLOBAL;
196
197 if (const auto *CE = dyn_cast<ConstantExpr>(C))
198 if (visitConstExpr(CE))
199 Result |= ADDR_SPACE_CAST;
200
201 for (const Use &U : C->operands()) {
202 const auto *OpC = dyn_cast<Constant>(U);
203 if (!OpC)
204 continue;
205
206 Result |= getConstantAccess(OpC);
207 }
208 return Result;
209 }
210
211public:
212 /// Returns true if \p Fn needs the queue pointer because of \p C.
213 bool needsQueuePtr(const Constant *C, Function &Fn) {
214 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
215 bool HasAperture = hasApertureRegs(Fn);
216
217 // No need to explore the constants.
218 if (!IsNonEntryFunc && HasAperture)
219 return false;
220
221 uint8_t Access = getConstantAccess(C);
222
223 // We need to trap on DS globals in non-entry functions.
224 if (IsNonEntryFunc && (Access & DS_GLOBAL))
225 return true;
226
227 return !HasAperture && (Access & ADDR_SPACE_CAST);
228 }
229
230private:
231 /// Used to determine if the Constant needs the queue pointer.
233 const unsigned CodeObjectVersion;
234};
235
236struct AAAMDAttributes
237 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,
238 AbstractAttribute> {
241
242 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
243
244 /// Create an abstract attribute view for the position \p IRP.
245 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
246 Attributor &A);
247
248 /// See AbstractAttribute::getName().
249 const std::string getName() const override { return "AAAMDAttributes"; }
250
251 /// See AbstractAttribute::getIdAddr().
252 const char *getIdAddr() const override { return &ID; }
253
254 /// This function should return true if the type of the \p AA is
255 /// AAAMDAttributes.
256 static bool classof(const AbstractAttribute *AA) {
257 return (AA->getIdAddr() == &ID);
258 }
259
260 /// Unique ID (due to the unique address)
261 static const char ID;
262};
263const char AAAMDAttributes::ID = 0;
264
265struct AAUniformWorkGroupSize
266 : public StateWrapper<BooleanState, AbstractAttribute> {
268 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
269
270 /// Create an abstract attribute view for the position \p IRP.
271 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
272 Attributor &A);
273
274 /// See AbstractAttribute::getName().
275 const std::string getName() const override {
276 return "AAUniformWorkGroupSize";
277 }
278
279 /// See AbstractAttribute::getIdAddr().
280 const char *getIdAddr() const override { return &ID; }
281
282 /// This function should return true if the type of the \p AA is
283 /// AAAMDAttributes.
284 static bool classof(const AbstractAttribute *AA) {
285 return (AA->getIdAddr() == &ID);
286 }
287
288 /// Unique ID (due to the unique address)
289 static const char ID;
290};
291const char AAUniformWorkGroupSize::ID = 0;
292
293struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
294 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
295 : AAUniformWorkGroupSize(IRP, A) {}
296
297 void initialize(Attributor &A) override {
298 Function *F = getAssociatedFunction();
299 CallingConv::ID CC = F->getCallingConv();
300
302 return;
303
304 bool InitialValue = false;
305 if (F->hasFnAttribute("uniform-work-group-size"))
306 InitialValue = F->getFnAttribute("uniform-work-group-size")
307 .getValueAsString()
308 .equals("true");
309
310 if (InitialValue)
311 indicateOptimisticFixpoint();
312 else
313 indicatePessimisticFixpoint();
314 }
315
316 ChangeStatus updateImpl(Attributor &A) override {
317 ChangeStatus Change = ChangeStatus::UNCHANGED;
318
319 auto CheckCallSite = [&](AbstractCallSite CS) {
320 Function *Caller = CS.getInstruction()->getFunction();
321 LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
322 << "->" << getAssociatedFunction()->getName() << "\n");
323
324 const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
325 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
326
327 Change = Change | clampStateAndIndicateChange(this->getState(),
328 CallerInfo.getState());
329
330 return true;
331 };
332
333 bool AllCallSitesKnown = true;
334 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
335 return indicatePessimisticFixpoint();
336
337 return Change;
338 }
339
340 ChangeStatus manifest(Attributor &A) override {
342 LLVMContext &Ctx = getAssociatedFunction()->getContext();
343
344 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
345 getAssumed() ? "true" : "false"));
346 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
347 /* ForceReplace */ true);
348 }
349
350 bool isValidState() const override {
351 // This state is always valid, even when the state is false.
352 return true;
353 }
354
355 const std::string getAsStr() const override {
356 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
357 }
358
359 /// See AbstractAttribute::trackStatistics()
360 void trackStatistics() const override {}
361};
362
363AAUniformWorkGroupSize &
364AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
365 Attributor &A) {
367 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
369 "AAUniformWorkGroupSize is only valid for function position");
370}
371
372struct AAAMDAttributesFunction : public AAAMDAttributes {
373 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
374 : AAAMDAttributes(IRP, A) {}
375
376 void initialize(Attributor &A) override {
377 Function *F = getAssociatedFunction();
378
379 // If the function requires the implicit arg pointer due to sanitizers,
380 // assume it's needed even if explicitly marked as not requiring it.
381 const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
382 if (NeedsHostcall) {
383 removeAssumedBits(IMPLICIT_ARG_PTR);
384 removeAssumedBits(HOSTCALL_PTR);
385 }
386
387 for (auto Attr : ImplicitAttrs) {
388 if (NeedsHostcall &&
389 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
390 continue;
391
392 if (F->hasFnAttribute(Attr.second))
393 addKnownBits(Attr.first);
394 }
395
396 if (F->isDeclaration())
397 return;
398
399 // Ignore functions with graphics calling conventions, these are currently
400 // not allowed to have kernel arguments.
401 if (AMDGPU::isGraphics(F->getCallingConv())) {
402 indicatePessimisticFixpoint();
403 return;
404 }
405 }
406
407 ChangeStatus updateImpl(Attributor &A) override {
408 Function *F = getAssociatedFunction();
409 // The current assumed state used to determine a change.
410 auto OrigAssumed = getAssumed();
411
412 // Check for Intrinsics and propagate attributes.
413 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
414 *this, this->getIRPosition(), DepClassTy::REQUIRED);
415 if (AAEdges.hasNonAsmUnknownCallee())
416 return indicatePessimisticFixpoint();
417
418 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
419
420 bool NeedsImplicit = false;
421 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
422 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
423 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
424 unsigned COV = InfoCache.getCodeObjectVersion();
425
426 for (Function *Callee : AAEdges.getOptimisticEdges()) {
427 Intrinsic::ID IID = Callee->getIntrinsicID();
428 if (IID == Intrinsic::not_intrinsic) {
429 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
430 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
431 *this &= AAAMD;
432 continue;
433 }
434
435 bool NonKernelOnly = false;
436 ImplicitArgumentMask AttrMask =
437 intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
438 HasApertureRegs, SupportsGetDoorbellID, COV);
439 if (AttrMask != NOT_IMPLICIT_INPUT) {
440 if ((IsNonEntryFunc || !NonKernelOnly))
441 removeAssumedBits(AttrMask);
442 }
443 }
444
445 // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.
446 if (NeedsImplicit)
447 removeAssumedBits(IMPLICIT_ARG_PTR);
448
449 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
450 // Under V5, we need implicitarg_ptr + offsets to access private_base or
451 // shared_base. We do not actually need queue_ptr.
452 if (COV >= 5)
453 removeAssumedBits(IMPLICIT_ARG_PTR);
454 else
455 removeAssumedBits(QUEUE_PTR);
456 }
457
458 if (funcRetrievesMultigridSyncArg(A, COV)) {
459 assert(!isAssumed(IMPLICIT_ARG_PTR) &&
460 "multigrid_sync_arg needs implicitarg_ptr");
461 removeAssumedBits(MULTIGRID_SYNC_ARG);
462 }
463
464 if (funcRetrievesHostcallPtr(A, COV)) {
465 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
466 removeAssumedBits(HOSTCALL_PTR);
467 }
468
469 if (funcRetrievesHeapPtr(A, COV)) {
470 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
471 removeAssumedBits(HEAP_PTR);
472 }
473
474 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
475 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
476 removeAssumedBits(QUEUE_PTR);
477 }
478
479 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {
480 removeAssumedBits(LDS_KERNEL_ID);
481 }
482
483 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
484 removeAssumedBits(DEFAULT_QUEUE);
485
486 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
487 removeAssumedBits(COMPLETION_ACTION);
488
489 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
490 : ChangeStatus::UNCHANGED;
491 }
492
493 ChangeStatus manifest(Attributor &A) override {
495 LLVMContext &Ctx = getAssociatedFunction()->getContext();
496
497 for (auto Attr : ImplicitAttrs) {
498 if (isKnown(Attr.first))
499 AttrList.push_back(Attribute::get(Ctx, Attr.second));
500 }
501
502 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
503 /* ForceReplace */ true);
504 }
505
506 const std::string getAsStr() const override {
507 std::string Str;
509 OS << "AMDInfo[";
510 for (auto Attr : ImplicitAttrs)
511 OS << ' ' << Attr.second;
512 OS << " ]";
513 return OS.str();
514 }
515
516 /// See AbstractAttribute::trackStatistics()
517 void trackStatistics() const override {}
518
519private:
520 bool checkForQueuePtr(Attributor &A) {
521 Function *F = getAssociatedFunction();
522 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
523
524 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
525
526 bool NeedsQueuePtr = false;
527
528 auto CheckAddrSpaceCasts = [&](Instruction &I) {
529 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
530 if (castRequiresQueuePtr(SrcAS)) {
531 NeedsQueuePtr = true;
532 return false;
533 }
534 return true;
535 };
536
537 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
538
539 // `checkForAllInstructions` is much more cheaper than going through all
540 // instructions, try it first.
541
542 // The queue pointer is not needed if aperture regs is present.
543 if (!HasApertureRegs) {
544 bool UsedAssumedInformation = false;
545 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
546 {Instruction::AddrSpaceCast},
547 UsedAssumedInformation);
548 }
549
550 // If we found that we need the queue pointer, nothing else to do.
551 if (NeedsQueuePtr)
552 return true;
553
554 if (!IsNonEntryFunc && HasApertureRegs)
555 return false;
556
557 for (BasicBlock &BB : *F) {
558 for (Instruction &I : BB) {
559 for (const Use &U : I.operands()) {
560 if (const auto *C = dyn_cast<Constant>(U)) {
561 if (InfoCache.needsQueuePtr(C, *F))
562 return true;
563 }
564 }
565 }
566 }
567
568 return false;
569 }
570
571 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
573 AA::RangeTy Range(Pos, 8);
574 return funcRetrievesImplicitKernelArg(A, Range);
575 }
576
577 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
579 AA::RangeTy Range(Pos, 8);
580 return funcRetrievesImplicitKernelArg(A, Range);
581 }
582
583 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
585 AA::RangeTy Range(Pos, 8);
586 return funcRetrievesImplicitKernelArg(A, Range);
587 }
588
589 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
591 AA::RangeTy Range(Pos, 8);
592 return funcRetrievesImplicitKernelArg(A, Range);
593 }
594
595 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
596 if (COV < 5)
597 return false;
599 return funcRetrievesImplicitKernelArg(A, Range);
600 }
601
602 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
603 if (COV < 5)
604 return false;
606 return funcRetrievesImplicitKernelArg(A, Range);
607 }
608
609 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {
610 // Check if this is a call to the implicitarg_ptr builtin and it
611 // is used to retrieve the hostcall pointer. The implicit arg for
612 // hostcall is not used only if every use of the implicitarg_ptr
613 // is a load that clearly does not retrieve any byte of the
614 // hostcall pointer. We check this by tracing all the uses of the
615 // initial call to the implicitarg_ptr intrinsic.
616 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {
617 auto &Call = cast<CallBase>(I);
618 if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
619 return true;
620
621 const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
622 *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
623
624 return PointerInfoAA.forallInterferingAccesses(
625 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
626 return Acc.getRemoteInst()->isDroppable();
627 });
628 };
629
630 bool UsedAssumedInformation = false;
631 return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,
632 UsedAssumedInformation);
633 }
634
635 bool funcRetrievesLDSKernelId(Attributor &A) {
636 auto DoesNotRetrieve = [&](Instruction &I) {
637 auto &Call = cast<CallBase>(I);
638 return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;
639 };
640 bool UsedAssumedInformation = false;
641 return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,
642 UsedAssumedInformation);
643 }
644};
645
646AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
647 Attributor &A) {
649 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
650 llvm_unreachable("AAAMDAttributes is only valid for function position");
651}
652
653/// Propagate amdgpu-flat-work-group-size attribute.
654struct AAAMDFlatWorkGroupSize
655 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
657 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
658 : Base(IRP, 32) {}
659
660 /// See AbstractAttribute::getState(...).
661 IntegerRangeState &getState() override { return *this; }
662 const IntegerRangeState &getState() const override { return *this; }
663
664 void initialize(Attributor &A) override {
665 Function *F = getAssociatedFunction();
666 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
667 unsigned MinGroupSize, MaxGroupSize;
668 std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
669 intersectKnown(
670 ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
671
672 if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
673 indicatePessimisticFixpoint();
674 }
675
676 ChangeStatus updateImpl(Attributor &A) override {
678
679 auto CheckCallSite = [&](AbstractCallSite CS) {
680 Function *Caller = CS.getInstruction()->getFunction();
681 LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
682 << "->" << getAssociatedFunction()->getName() << '\n');
683
684 const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
686
687 Change |=
688 clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
689
690 return true;
691 };
692
693 bool AllCallSitesKnown = true;
694 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
695 return indicatePessimisticFixpoint();
696
697 return Change;
698 }
699
700 ChangeStatus manifest(Attributor &A) override {
702 Function *F = getAssociatedFunction();
703 LLVMContext &Ctx = F->getContext();
704
705 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
706 unsigned Min, Max;
707 std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
708
709 // Don't add the attribute if it's the implied default.
710 if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
712
713 SmallString<10> Buffer;
714 raw_svector_ostream OS(Buffer);
715 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
716
717 AttrList.push_back(
718 Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
719 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
720 /* ForceReplace */ true);
721 }
722
723 const std::string getAsStr() const override {
724 std::string Str;
726 OS << "AMDFlatWorkGroupSize[";
727 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
728 OS << ']';
729 return OS.str();
730 }
731
732 /// See AbstractAttribute::trackStatistics()
733 void trackStatistics() const override {}
734
735 /// Create an abstract attribute view for the position \p IRP.
736 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
737 Attributor &A);
738
739 /// See AbstractAttribute::getName()
740 const std::string getName() const override {
741 return "AAAMDFlatWorkGroupSize";
742 }
743
744 /// See AbstractAttribute::getIdAddr()
745 const char *getIdAddr() const override { return &ID; }
746
747 /// This function should return true if the type of the \p AA is
748 /// AAAMDFlatWorkGroupSize
749 static bool classof(const AbstractAttribute *AA) {
750 return (AA->getIdAddr() == &ID);
751 }
752
753 /// Unique ID (due to the unique address)
754 static const char ID;
755};
756
757const char AAAMDFlatWorkGroupSize::ID = 0;
758
759AAAMDFlatWorkGroupSize &
760AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
761 Attributor &A) {
763 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
765 "AAAMDFlatWorkGroupSize is only valid for function position");
766}
767
768class AMDGPUAttributor : public ModulePass {
769public:
770 AMDGPUAttributor() : ModulePass(ID) {}
771
772 /// doInitialization - Virtual method overridden by subclasses to do
773 /// any necessary initialization before any pass is run.
774 bool doInitialization(Module &) override {
775 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
776 if (!TPC)
777 report_fatal_error("TargetMachine is required");
778
779 TM = &TPC->getTM<TargetMachine>();
780 return false;
781 }
782
783 bool runOnModule(Module &M) override {
784 SetVector<Function *> Functions;
785 AnalysisGetter AG(this);
786 for (Function &F : M) {
787 if (!F.isIntrinsic())
788 Functions.insert(&F);
789 }
790
791 CallGraphUpdater CGUpdater;
793 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
795 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
796 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
798
799 AttributorConfig AC(CGUpdater);
800 AC.Allowed = &Allowed;
801 AC.IsModulePass = true;
802 AC.DefaultInitializeLiveInternals = false;
803
804 Attributor A(Functions, InfoCache, AC);
805
806 for (Function &F : M) {
807 if (!F.isIntrinsic()) {
808 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
809 A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
810 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
811 A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
812 }
813 }
814 }
815
816 ChangeStatus Change = A.run();
817 return Change == ChangeStatus::CHANGED;
818 }
819
820 void getAnalysisUsage(AnalysisUsage &AU) const override {
822 }
823
824 StringRef getPassName() const override { return "AMDGPU Attributor"; }
826 static char ID;
827};
828} // namespace
829
830char AMDGPUAttributor::ID = 0;
831
832Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
833INITIALIZE_PASS_BEGIN(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false,
834 false)
837 false)
static bool isDSAddress(const Constant *C)
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
ImplicitArgumentMask
@ NOT_IMPLICIT_INPUT
@ ALL_ARGUMENT_MASK
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
ImplicitArgumentPositions
@ LAST_ARG_POS
static bool castRequiresQueuePtr(unsigned SrcAS)
#define DEBUG_TYPE
amdgpu Simplify well known AMD library false FunctionCallee Callee
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
AMD GCN specific subclass of TargetSubtarget.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static StringRef getName(Value *V)
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Target-Independent Code Generator Pass Configuration Options pass.
Class for arbitrary precision integers.
Definition: APInt.h:75
AbstractCallSite.
This class represents a conversion between pointers from one address space to another.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:91
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1002
This class represents a range of values.
Definition: ConstantRange.h:47
This is an important base class in LLVM.
Definition: Constant.h:41
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:31
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:248
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:116
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
A vector that has set insertion semantics.
Definition: SetVector.h:51
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:840
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:378
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:381
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:382
unsigned getCodeObjectVersion(const Module &M)
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeCycleInfoWrapperPassPass(PassRegistry &)
@ CGSCC
Definition: Attributor.h:5641
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3233
Pass * createAMDGPUAttributorPass()
ChangeStatus
{
Definition: Attributor.h:466
@ REQUIRED
The target cannot be valid if the source is not.
An abstract state for querying live call edges.
Definition: Attributor.h:4922
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4962
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.
An access description.
Definition: Attributor.h:5361
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:5461
An abstract interface for struct information.
Definition: Attributor.h:5182
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5566
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4771
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:4805
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:226
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3108
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1102
Configuration for the Attributor.
Definition: Attributor.h:1399
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1473
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, const ArrayRef< Attribute > &DeducedAttrs, bool ForceReplace=false)
Definition: Attributor.cpp:991
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:560
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:628
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:572
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:603
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:800
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1154
State for an integer range.
Definition: Attributor.h:2754
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:3003
StateType & getState() override
See AbstractAttribute::getState(...).
Definition: Attributor.h:3011