LLVM 22.0.0git
Threading.inc
Go to the documentation of this file.
1//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Win32 specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/Twine.h"
15#include "llvm/Support/thread.h"
16
18#include <process.h>
19
20#include <bitset>
21
22// Windows will at times define MemoryFence.
23#ifdef MemoryFence
24#undef MemoryFence
25#endif
26
27namespace llvm {
28HANDLE
29llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
30 std::optional<unsigned> StackSizeInBytes) {
31 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
32 ThreadFunc, Arg, 0, NULL);
33
34 if (!hThread)
35 ReportLastErrorFatal("_beginthreadex failed");
36
37 return hThread;
38}
39
40void llvm_thread_join_impl(HANDLE hThread) {
41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED)
42 ReportLastErrorFatal("WaitForSingleObject failed");
43 if (::CloseHandle(hThread) == FALSE)
44 ReportLastErrorFatal("CloseHandle failed");
45}
46
47void llvm_thread_detach_impl(HANDLE hThread) {
48 if (::CloseHandle(hThread) == FALSE)
49 ReportLastErrorFatal("CloseHandle failed");
50}
51
52DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
53
54DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
55
56} // namespace llvm
57
58uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
59
61
62#if defined(_MSC_VER)
63static void SetThreadName(DWORD Id, LPCSTR Name) {
64 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
65
66#pragma pack(push, 8)
67 struct THREADNAME_INFO {
68 DWORD dwType; // Must be 0x1000.
69 LPCSTR szName; // Pointer to thread name
70 DWORD dwThreadId; // Thread ID (-1 == current thread)
71 DWORD dwFlags; // Reserved. Do not use.
72 };
73#pragma pack(pop)
74
75 THREADNAME_INFO info;
76 info.dwType = 0x1000;
77 info.szName = Name;
78 info.dwThreadId = Id;
79 info.dwFlags = 0;
80
81 __try {
82 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
83 (ULONG_PTR *)&info);
84 } __except (EXCEPTION_EXECUTE_HANDLER) {
85 }
86}
87#endif
88
89void llvm::set_thread_name(const Twine &Name) {
90#if defined(_MSC_VER)
91 // Make sure the input is null terminated.
92 SmallString<64> Storage;
93 StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
94 SetThreadName(::GetCurrentThreadId(), NameStr.data());
95#endif
96}
97
98void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
99 // "Name" is not an inherent property of a thread on Windows. In fact, when
100 // you "set" the name, you are only firing a one-time message to a debugger
101 // which it interprets as a program setting its threads' name. We may be
102 // able to get fancy by creating a TLS entry when someone calls
103 // set_thread_name so that subsequent calls to get_thread_name return this
104 // value.
105 Name.clear();
106}
107
108namespace llvm::sys::windows {
109HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName) {
110 // Ensure we load indeed a module from system32 path.
111 // As per GetModuleHandle documentation:
112 // "If lpModuleName does not include a path and there is more than one loaded
113 // module with the same base name and extension, you cannot predict which
114 // module handle will be returned.". This mitigates
115 // https://learn.microsoft.com/en-us/security-updates/securityadvisories/2010/2269637
117 size_t Size = MAX_PATH;
118 do {
119 Buf.resize_for_overwrite(Size);
120 SetLastError(NO_ERROR);
121 Size = ::GetSystemDirectoryW(Buf.data(), Buf.size());
122 if (Size == 0)
123 return NULL;
124
125 // Try again with larger buffer.
126 } while (Size > Buf.size());
127
128 Buf.truncate(Size);
129 Buf.push_back(L'\\');
130 Buf.append(lpModuleName, lpModuleName + std::wcslen(lpModuleName));
131 Buf.push_back(0);
132
133 return ::GetModuleHandleW(Buf.data());
134}
135} // namespace llvm::sys::windows
136
137SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
138#ifdef THREAD_POWER_THROTTLING_CURRENT_VERSION
139 HMODULE kernelM = llvm::sys::windows::loadSystemModuleSecure(L"kernel32.dll");
140 if (kernelM) {
141 // SetThreadInformation is only available on Windows 8 and later. Since we
142 // still support compilation on Windows 7, we load the function dynamically.
143 typedef BOOL(WINAPI * SetThreadInformation_t)(
144 HANDLE hThread, THREAD_INFORMATION_CLASS ThreadInformationClass,
145 _In_reads_bytes_(ThreadInformationSize) PVOID ThreadInformation,
146 ULONG ThreadInformationSize);
147 static const auto pfnSetThreadInformation =
148 (SetThreadInformation_t)::GetProcAddress(kernelM,
149 "SetThreadInformation");
150 if (pfnSetThreadInformation) {
151 auto setThreadInformation = [](ULONG ControlMaskAndStateMask) {
152 THREAD_POWER_THROTTLING_STATE state{};
153 state.Version = THREAD_POWER_THROTTLING_CURRENT_VERSION;
154 state.ControlMask = ControlMaskAndStateMask;
155 state.StateMask = ControlMaskAndStateMask;
156 return pfnSetThreadInformation(
157 ::GetCurrentThread(), ThreadPowerThrottling, &state, sizeof(state));
158 };
159
160 // Use EcoQoS for ThreadPriority::Background available (running on most
161 // efficent cores at the most efficient cpu frequency):
162 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadinformation
163 // https://learn.microsoft.com/en-us/windows/win32/procthread/quality-of-service
164 setThreadInformation(Priority == ThreadPriority::Background
165 ? THREAD_POWER_THROTTLING_EXECUTION_SPEED
166 : 0);
167 }
168 }
169#endif
170
171 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
172 // Begin background processing mode. The system lowers the resource scheduling
173 // priorities of the thread so that it can perform background work without
174 // significantly affecting activity in the foreground.
175 // End background processing mode. The system restores the resource scheduling
176 // priorities of the thread as they were before the thread entered background
177 // processing mode.
178 //
179 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
180 return SetThreadPriority(GetCurrentThread(),
181 Priority != ThreadPriority::Default
182 ? THREAD_MODE_BACKGROUND_BEGIN
183 : THREAD_MODE_BACKGROUND_END)
184 ? SetThreadPriorityResult::SUCCESS
185 : SetThreadPriorityResult::FAILURE;
186}
187
188struct ProcessorGroup {
189 unsigned ID;
190 unsigned AllThreads;
191 unsigned UsableThreads;
192 unsigned ThreadsPerCore;
193 uint64_t Affinity;
194
195 unsigned useableCores() const {
196 return std::max(1U, UsableThreads / ThreadsPerCore);
197 }
198};
199
200template <typename F>
201static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
202 DWORD Len = 0;
203 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
204 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
205 return false;
206
207 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
208 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
209 if (R) {
210 auto *End =
211 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
212 for (auto *Curr = Info; Curr < End;
213 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
214 Curr->Size)) {
215 if (Curr->Relationship != Relationship)
216 continue;
217 Fn(Curr);
218 }
219 }
220 free(Info);
221 return true;
222}
223
224static std::optional<std::vector<USHORT>> getActiveGroups() {
225 USHORT Count = 0;
226 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
227 return std::nullopt;
228
229 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
230 return std::nullopt;
231
232 std::vector<USHORT> Groups;
233 Groups.resize(Count);
234 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
235 return std::nullopt;
236
237 return Groups;
238}
239
240static ArrayRef<ProcessorGroup> getProcessorGroups() {
241 auto computeGroups = []() {
242 SmallVector<ProcessorGroup, 4> Groups;
243
244 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
245 GROUP_RELATIONSHIP &El = ProcInfo->Group;
246 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
247 ProcessorGroup G;
248 G.ID = Groups.size();
249 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
250 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
251 assert(G.UsableThreads <= 64);
252 G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
253 Groups.push_back(G);
254 }
255 };
256
257 if (!IterateProcInfo(RelationGroup, HandleGroup))
258 return std::vector<ProcessorGroup>();
259
260 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
261 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
262 assert(El.GroupCount == 1);
263 unsigned NumHyperThreads = 1;
264 // If the flag is set, each core supports more than one hyper-thread.
265 if (El.Flags & LTP_PC_SMT)
266 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
267 unsigned I = El.GroupMask[0].Group;
268 Groups[I].ThreadsPerCore = NumHyperThreads;
269 };
270
271 if (!IterateProcInfo(RelationProcessorCore, HandleProc))
272 return std::vector<ProcessorGroup>();
273
274 auto ActiveGroups = getActiveGroups();
275 if (!ActiveGroups)
276 return std::vector<ProcessorGroup>();
277
278 // If there's an affinity mask set, assume the user wants to constrain the
279 // current process to only a single CPU group. On Windows, it is not
280 // possible for affinity masks to cross CPU group boundaries.
281 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
282 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
283 &SystemAffinityMask)) {
284
285 if (ProcessAffinityMask != SystemAffinityMask) {
286 if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
287 // The process affinity mask is spurious, due to an OS bug, ignore it.
288 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
289 }
290
291 assert(ActiveGroups->size() == 1 &&
292 "When an affinity mask is set, the process is expected to be "
293 "assigned to a single processor group!");
294
295 unsigned CurrentGroupID = (*ActiveGroups)[0];
296 ProcessorGroup NewG{Groups[CurrentGroupID]};
297 NewG.Affinity = ProcessAffinityMask;
298 NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
299 Groups.clear();
300 Groups.push_back(NewG);
301 }
302 }
303 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
304 };
305 static auto Groups = computeGroups();
306 return ArrayRef<ProcessorGroup>(Groups);
307}
308
309template <typename R, typename UnaryPredicate>
310static unsigned aggregate(R &&Range, UnaryPredicate P) {
311 unsigned I{};
312 for (const auto &It : Range)
313 I += P(It);
314 return I;
315}
316
318 static unsigned Cores =
319 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
320 return G.UsableThreads / G.ThreadsPerCore;
321 });
322 return Cores;
323}
324
325static int computeHostNumHardwareThreads() {
326 static unsigned Threads =
327 aggregate(getProcessorGroups(),
328 [](const ProcessorGroup &G) { return G.UsableThreads; });
329 return Threads;
330}
331
332// Finds the proper CPU socket where a thread number should go. Returns
333// 'std::nullopt' if the thread shall remain on the actual CPU socket.
334std::optional<unsigned>
335llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
336 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
337 // Only one CPU socket in the system or process affinity was set, no need to
338 // move the thread(s) to another CPU socket.
339 if (Groups.size() <= 1)
340 return std::nullopt;
341
342 // We ask for less threads than there are hardware threads per CPU socket, no
343 // need to dispatch threads to other CPU sockets.
344 unsigned MaxThreadsPerSocket =
345 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
346 if (compute_thread_count() <= MaxThreadsPerSocket)
347 return std::nullopt;
348
349 assert(ThreadPoolNum < compute_thread_count() &&
350 "The thread index is not within thread strategy's range!");
351
352 // Assumes the same number of hardware threads per CPU socket.
353 return (ThreadPoolNum * Groups.size()) / compute_thread_count();
354}
355
356// Assign the current thread to a more appropriate CPU socket or CPU group
358 unsigned ThreadPoolNum) const {
359
360 // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
361 // since a process automatically gains access to all processor groups.
363 return;
364
365 std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
366 if (!Socket)
367 return;
368 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
369 GROUP_AFFINITY Affinity{};
370 Affinity.Group = Groups[*Socket].ID;
371 Affinity.Mask = Groups[*Socket].Affinity;
372 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
373}
374
376 GROUP_AFFINITY Affinity{};
377 GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
378
379 static unsigned All =
380 aggregate(getProcessorGroups(),
381 [](const ProcessorGroup &G) { return G.AllThreads; });
382
383 unsigned StartOffset =
384 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
385 return G.ID < Affinity.Group ? G.AllThreads : 0;
386 });
387
389 V.resize(All);
390 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
391 if ((Affinity.Mask >> I) & 1)
392 V.set(StartOffset + I);
393 }
394 return V;
395}
396
397unsigned llvm::get_cpus() { return getProcessorGroups().size(); }
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
lazy value info
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
This file defines the SmallString class.
static const X86InstrFMA3Group Groups[]
LLVM_ABI void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
LLVM_ABI std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
LLVM_ABI unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition Threading.cpp:41
LLVM_ABI HMODULE loadSystemModuleSecure(LPCWSTR lpModuleName)
Retrieves the handle to a in-memory system module such as ntdll.dll, while ensuring we're not retriev...
This is an optimization pass for GlobalISel generic memory operations.
void ReportLastErrorFatal(const char *Msg)
LLVM_ABI llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
Definition Threading.cpp:39
LLVM_ABI uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
Definition Threading.cpp:33
LLVM_ABI SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
LLVM_ABI unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
LLVM_ABI bool RunningWindows11OrGreater()
Determines if the program is running on Windows 11 or Windows Server 2022.
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
LLVM_ABI void set_thread_name(const Twine &Name)
Set the name of the current thread.
Definition Threading.cpp:35
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
SetThreadPriorityResult
Definition Threading.h:267
LLVM_ABI void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition Threading.cpp:37
LLVM_ABI int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
Definition Threading.cpp:47
LLVM_ABI uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition Threading.cpp:31
ArrayRef(const T &OneElt) -> ArrayRef< T >
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154