LLVM 23.0.0git
AMDGPUWaitcntUtils.h
Go to the documentation of this file.
1//===- AMDGPUWaitcntUtils.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
10#define LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
11
12#include "llvm/ADT/Sequence.h"
14#include "llvm/Support/Debug.h"
17
18namespace llvm {
19
20namespace AMDGPU {
21
23 LOAD_CNT = 0, // VMcnt prior to gfx12.
24 DS_CNT, // LKGMcnt prior to gfx12.
26 STORE_CNT, // VScnt in gfx10/gfx11.
29 BVH_CNT, // gfx12+ only.
30 KM_CNT, // gfx12+ only.
31 X_CNT, // gfx1250.
32 ASYNC_CNT, // gfx1250.
34 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
35 VM_VSRC, // gfx12+ expert mode only.
38};
39
41
42// Return an iterator over all counters between LOAD_CNT (the first counter)
43// and \c MaxCounter (exclusive, default value yields an enumeration over
44// all counters).
47
48} // namespace AMDGPU
49
50template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
51 static constexpr bool is_iterable = true;
52};
53
54namespace AMDGPU {
55
56/// Represents the counter values to wait for in an s_waitcnt instruction.
57///
58/// Large values (including the maximum possible integer) can be used to
59/// represent "don't care" waits.
60class Waitcnt {
61 std::array<unsigned, NUM_INST_CNTS> Cnt;
62
63public:
64 unsigned get(InstCounterType T) const { return Cnt[T]; }
65 void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
66
67 Waitcnt() { fill(Cnt, ~0u); }
68 // Pre-gfx12 constructor.
69 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
70 : Waitcnt() {
71 Cnt[LOAD_CNT] = VmCnt;
72 Cnt[EXP_CNT] = ExpCnt;
73 Cnt[DS_CNT] = LgkmCnt;
74 Cnt[STORE_CNT] = VsCnt;
75 }
76
77 // gfx12+ constructor.
78 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
79 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
80 unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
81 : Waitcnt() {
82 Cnt[LOAD_CNT] = LoadCnt;
83 Cnt[DS_CNT] = DsCnt;
84 Cnt[EXP_CNT] = ExpCnt;
85 Cnt[STORE_CNT] = StoreCnt;
86 Cnt[SAMPLE_CNT] = SampleCnt;
87 Cnt[BVH_CNT] = BvhCnt;
88 Cnt[KM_CNT] = KmCnt;
89 Cnt[X_CNT] = XCnt;
90 Cnt[ASYNC_CNT] = AsyncCnt;
91 Cnt[VA_VDST] = VaVdst;
92 Cnt[VM_VSRC] = VmVsrc;
93 }
94
95 bool hasWait() const {
96 return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
97 }
98
99 bool hasWaitExceptStoreCnt() const {
101 if (T == STORE_CNT)
102 continue;
103 if (Cnt[T] != ~0u)
104 return true;
105 }
106 return false;
107 }
108
109 bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
110
111 bool hasWaitDepctr() const {
112 return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
113 }
114
116 // Does the right thing provided self and Other are either both pre-gfx12
117 // or both gfx12+.
120 Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
121 return Wait;
122 }
123
124 void print(raw_ostream &OS) const {
125 ListSeparator LS;
127 OS << LS << getInstCounterName(T) << ": " << Cnt[T];
128 if (LS.unused())
129 OS << "none";
130 OS << '\n';
131 }
132
133#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
134 LLVM_DUMP_METHOD void dump() const;
135#endif
136
138 Wait.print(OS);
139 return OS;
140 }
141};
142
143Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
144
145unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
146
147// The following are only meaningful on targets that support
148// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
149
150/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
151/// isa \p Version.
152Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
153
154/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
155/// isa \p Version.
156Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
157
158/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
159/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
160/// \p Version.
161unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
162
163/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
164/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
165/// \p Version.
166unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
167
168} // namespace AMDGPU
169
170} // namespace llvm
171
172#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUWAITCNTUTILS_H
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define T
Provides some synthesis utilities to produce sequences of values.
This file contains some functions that are useful when dealing with strings.
Represents the counter values to wait for in an s_waitcnt instruction.
void print(raw_ostream &OS) const
LLVM_DUMP_METHOD void dump() const
bool hasWaitExceptStoreCnt() const
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned AsyncCnt, unsigned VaVdst, unsigned VmVsrc)
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void set(InstCounterType T, unsigned Val)
A helper class to return the specified delimiter string after the first invocation of operator String...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded)
unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
StringLiteral getInstCounterName(InstCounterType T)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded)
This is an optimization pass for GlobalISel generic memory operations.
void fill(R &&Range, T &&Value)
Provide wrappers to std::fill which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1759
@ Wait
Definition Threading.h:60
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334