LLVM 23.0.0git
Threading.inc
Go to the documentation of this file.
1//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides the Unix specific implementation of Threading functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Unix.h"
14#include "llvm/ADT/ScopeExit.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/Twine.h"
21
22#if defined(__APPLE__)
23#include <mach/mach_init.h>
24#include <mach/mach_port.h>
25#include <pthread/qos.h>
26#include <sys/sysctl.h>
27#include <sys/types.h>
28#endif
29
30#include <pthread.h>
31
32#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
33#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
34#endif
35
36#include "llvm/Support/thread.h"
37
38#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
39#include <errno.h>
40#include <sys/cpuset.h>
41#include <sys/sysctl.h>
42#include <sys/user.h>
43#include <unistd.h>
44#endif
45
46#if defined(__NetBSD__)
47#include <lwp.h> // For _lwp_self()
48#endif
49
50#if defined(__OpenBSD__)
51#include <unistd.h> // For getthrid()
52#endif
53
54#if defined(__linux__)
55#include <sched.h> // For sched_getaffinity
56#include <sys/syscall.h> // For syscall codes
57#include <unistd.h> // For syscall()
58#endif
59
60#if defined(__CYGWIN__)
61#include <sys/cpuset.h>
62#endif
63
64#if defined(__HAIKU__)
65#include <OS.h> // For B_OS_NAME_LENGTH
66#endif
67
68namespace llvm {
69pthread_t
70llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
71 std::optional<unsigned> StackSizeInBytes) {
72 int errnum;
73
74 // Construct the attributes object.
75 pthread_attr_t Attr;
76 if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
77 ReportErrnumFatal("pthread_attr_init failed", errnum);
78 }
79
80 llvm::scope_exit AttrGuard([&] {
81 if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
82 ReportErrnumFatal("pthread_attr_destroy failed", errnum);
83 }
84 });
85
86 // Set the requested stack size, if given.
87 if (StackSizeInBytes) {
88 if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
89 ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
90 }
91 }
92
93 // Construct and execute the thread.
94 pthread_t Thread;
95 if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
96 ReportErrnumFatal("pthread_create failed", errnum);
97
98 return Thread;
99}
100
101void llvm_thread_detach_impl(pthread_t Thread) {
102 int errnum;
103
104 if ((errnum = ::pthread_detach(Thread)) != 0) {
105 ReportErrnumFatal("pthread_detach failed", errnum);
106 }
107}
108
109void llvm_thread_join_impl(pthread_t Thread) {
110 int errnum;
111
112 if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
113 ReportErrnumFatal("pthread_join failed", errnum);
114 }
115}
116
117llvm::thread::id llvm_thread_get_id_impl(pthread_t Thread) {
118#ifdef __MVS__
119 return Thread.__;
120#else
121 return Thread;
122#endif
123}
124
125llvm::thread::id llvm_thread_get_current_id_impl() {
126 return llvm_thread_get_id_impl(::pthread_self());
127}
128
129} // namespace llvm
130
132#if defined(__APPLE__)
133 // Calling "mach_thread_self()" bumps the reference count on the thread
134 // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
135 // count.
136 static thread_local thread_port_t Self = [] {
137 thread_port_t InitSelf = mach_thread_self();
138 mach_port_deallocate(mach_task_self(), Self);
139 return InitSelf;
140 }();
141 return Self;
142#elif defined(__FreeBSD__) || defined(__DragonFly__)
143 return uint64_t(pthread_getthreadid_np());
144#elif defined(__NetBSD__)
145 return uint64_t(_lwp_self());
146#elif defined(__OpenBSD__)
147 return uint64_t(getthrid());
148#elif defined(__ANDROID__)
149 return uint64_t(gettid());
150#elif defined(__linux__)
151 return uint64_t(syscall(__NR_gettid));
152#elif defined(_AIX)
153 return uint64_t(thread_self());
154#elif defined(__MVS__)
155 return llvm_thread_get_id_impl(pthread_self());
156#else
157 return uint64_t(pthread_self());
158#endif
159}
160
161static constexpr uint32_t get_max_thread_name_length_impl() {
162#if defined(PTHREAD_MAX_NAMELEN_NP)
163 return PTHREAD_MAX_NAMELEN_NP;
164#elif defined(__HAIKU__)
165 return B_OS_NAME_LENGTH;
166#elif defined(__APPLE__)
167 return 64;
168#elif defined(__sun__) && defined(__svr4__)
169 return 31;
170#elif defined(__linux__) && HAVE_PTHREAD_SETNAME_NP
171 return 16;
172#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
173 defined(__DragonFly__)
174 return 16;
175#elif defined(__OpenBSD__)
176 return 24;
177#elif defined(__CYGWIN__)
178 return 16;
179#else
180 return 0;
181#endif
182}
183
185 return get_max_thread_name_length_impl();
186}
187
188void llvm::set_thread_name(const Twine &Name) {
189 // Make sure the input is null terminated.
190 SmallString<64> Storage;
191 StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
192
193 // Truncate from the beginning, not the end, if the specified name is too
194 // long. For one, this ensures that the resulting string is still null
195 // terminated, but additionally the end of a long thread name will usually
196 // be more unique than the beginning, since a common pattern is for similar
197 // threads to share a common prefix.
198 // Note that the name length includes the null terminator.
200 NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
201 (void)NameStr;
202#if defined(HAVE_PTHREAD_SET_NAME_NP) && HAVE_PTHREAD_SET_NAME_NP
203 ::pthread_set_name_np(::pthread_self(), NameStr.data());
204#elif defined(HAVE_PTHREAD_SETNAME_NP) && HAVE_PTHREAD_SETNAME_NP
205#if defined(__NetBSD__)
206 ::pthread_setname_np(::pthread_self(), "%s",
207 const_cast<char *>(NameStr.data()));
208#elif defined(__APPLE__)
209 ::pthread_setname_np(NameStr.data());
210#else
211 ::pthread_setname_np(::pthread_self(), NameStr.data());
212#endif
213#endif
214}
215
216void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
217 Name.clear();
218
219#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
220 int pid = ::getpid();
221 uint64_t tid = get_threadid();
222
223 struct kinfo_proc *kp = nullptr, *nkp;
224 size_t len = 0;
225 int error;
226 int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
227 (int)pid};
228
229 while (1) {
230 error = sysctl(ctl, 4, kp, &len, nullptr, 0);
231 if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
232 // Add extra space in case threads are added before next call.
233 len += sizeof(*kp) + len / 10;
234 nkp = (struct kinfo_proc *)::realloc(kp, len);
235 if (nkp == nullptr) {
236 free(kp);
237 return;
238 }
239 kp = nkp;
240 continue;
241 }
242 if (error != 0)
243 len = 0;
244 break;
245 }
246
247 for (size_t i = 0; i < len / sizeof(*kp); i++) {
248 if (kp[i].ki_tid == (lwpid_t)tid) {
249 Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
250 break;
251 }
252 }
253 free(kp);
254 return;
255#elif (defined(__linux__) || defined(__CYGWIN__)) && HAVE_PTHREAD_GETNAME_NP
256 constexpr uint32_t len = get_max_thread_name_length_impl();
257 char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
258 if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
259 Name.append(Buffer, Buffer + strlen(Buffer));
260#elif defined(HAVE_PTHREAD_GET_NAME_NP) && HAVE_PTHREAD_GET_NAME_NP
261 constexpr uint32_t len = get_max_thread_name_length_impl();
262 char buf[len];
263 ::pthread_get_name_np(::pthread_self(), buf, len);
264
265 Name.append(buf, buf + strlen(buf));
266
267#elif defined(HAVE_PTHREAD_GETNAME_NP) && HAVE_PTHREAD_GETNAME_NP
268 constexpr uint32_t len = get_max_thread_name_length_impl();
269 char buf[len];
270 ::pthread_getname_np(::pthread_self(), buf, len);
271
272 Name.append(buf, buf + strlen(buf));
273#endif
274}
275
277llvm::set_thread_priority(ThreadPriority Priority) {
278#if (defined(__linux__) || defined(__CYGWIN__)) && defined(SCHED_IDLE)
279 // Some *really* old glibcs are missing SCHED_IDLE.
280 // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
281 // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
282 sched_param priority;
283 // For each of the above policies, param->sched_priority must be 0.
284 priority.sched_priority = 0;
285 // SCHED_IDLE for running very low priority background jobs.
286 // SCHED_OTHER the standard round-robin time-sharing policy;
287 return !pthread_setschedparam(
288 pthread_self(),
289 // FIXME: consider SCHED_BATCH for Low
290 Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
291 &priority)
292 ? SetThreadPriorityResult::SUCCESS
293 : SetThreadPriorityResult::FAILURE;
294#elif defined(__APPLE__)
295 // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
296 //
297 // Background - Applies to work that isn’t visible to the user and may take
298 // significant time to complete. Examples include indexing, backing up, or
299 // synchronizing data. This class emphasizes energy efficiency.
300 //
301 // Utility - Applies to work that takes anywhere from a few seconds to a few
302 // minutes to complete. Examples include downloading a document or importing
303 // data. This class offers a balance between responsiveness, performance, and
304 // energy efficiency.
305 const auto qosClass = [&]() {
306 switch (Priority) {
307 case ThreadPriority::Background:
308 return QOS_CLASS_BACKGROUND;
309 case ThreadPriority::Low:
310 return QOS_CLASS_UTILITY;
311 case ThreadPriority::Default:
312 return QOS_CLASS_DEFAULT;
313 }
314 }();
315 return !pthread_set_qos_class_self_np(qosClass, 0)
316 ? SetThreadPriorityResult::SUCCESS
317 : SetThreadPriorityResult::FAILURE;
318#endif
319 return SetThreadPriorityResult::FAILURE;
320}
321
322#include <thread>
323
324static int computeHostNumHardwareThreads() {
325#if defined(__FreeBSD__)
326 cpuset_t mask;
327 CPU_ZERO(&mask);
328 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
329 &mask) == 0)
330 return CPU_COUNT(&mask);
331#elif (defined(__linux__) || defined(__CYGWIN__))
332 cpu_set_t Set;
333 CPU_ZERO(&Set);
334 if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
335 return CPU_COUNT(&Set);
336#endif
337 // Guard against std::thread::hardware_concurrency() returning 0.
338 if (unsigned Val = std::thread::hardware_concurrency())
339 return Val;
340 return 1;
341}
342
344 unsigned ThreadPoolNum) const {}
345
347 // FIXME: Implement
348 llvm_unreachable("Not implemented!");
349}
350
351unsigned llvm::get_cpus() { return 1; }
352
353#if (defined(__linux__) || defined(__CYGWIN__)) && \
354 (defined(__i386__) || defined(__x86_64__))
355// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
356// using the number of unique physical/core id pairs. The following
357// implementation reads the /proc/cpuinfo format on an x86_64 system.
358static int computeHostNumPhysicalCores() {
359 // Enabled represents the number of physical id/core id pairs with at least
360 // one processor id enabled by the CPU affinity mask.
361 cpu_set_t Affinity, Enabled;
362 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
363 return -1;
364 CPU_ZERO(&Enabled);
365
366 // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
367 // mmapped because it appears to have 0 size.
370 if (std::error_code EC = Text.getError()) {
371 llvm::errs() << "Can't read "
372 << "/proc/cpuinfo: " << EC.message() << "\n";
373 return -1;
374 }
376 (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
377 /*KeepEmpty=*/false);
378 int CurProcessor = -1;
379 int CurPhysicalId = -1;
380 int CurSiblings = -1;
381 int CurCoreId = -1;
382 for (llvm::StringRef Line : strs) {
383 std::pair<llvm::StringRef, llvm::StringRef> Data = Line.split(':');
384 auto Name = Data.first.trim();
385 auto Val = Data.second.trim();
386 // These fields are available if the kernel is configured with CONFIG_SMP.
387 if (Name == "processor")
388 Val.getAsInteger(10, CurProcessor);
389 else if (Name == "physical id")
390 Val.getAsInteger(10, CurPhysicalId);
391 else if (Name == "siblings")
392 Val.getAsInteger(10, CurSiblings);
393 else if (Name == "core id") {
394 Val.getAsInteger(10, CurCoreId);
395 // The processor id corresponds to an index into cpu_set_t.
396 if (CPU_ISSET(CurProcessor, &Affinity))
397 CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
398 }
399 }
400 return CPU_COUNT(&Enabled);
401}
402#elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
403static int computeHostNumPhysicalCores() {
404 return sysconf(_SC_NPROCESSORS_ONLN);
405}
406#elif defined(__linux__)
407static int computeHostNumPhysicalCores() {
408 cpu_set_t Affinity;
409 if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
410 return CPU_COUNT(&Affinity);
411
412 // The call to sched_getaffinity() may have failed because the Affinity
413 // mask is too small for the number of CPU's on the system (i.e. the
414 // system has more than 1024 CPUs). Allocate a mask large enough for
415 // twice as many CPUs.
416 cpu_set_t *DynAffinity;
417 DynAffinity = CPU_ALLOC(2048);
418 if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
419 int NumCPUs = CPU_COUNT(DynAffinity);
420 CPU_FREE(DynAffinity);
421 return NumCPUs;
422 }
423 return -1;
424}
425#elif defined(__APPLE__)
426// Gets the number of *physical cores* on the machine.
427static int computeHostNumPhysicalCores() {
428 uint32_t count;
429 size_t len = sizeof(count);
430 sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
431 if (count < 1) {
432 int nm[2];
433 nm[0] = CTL_HW;
434 nm[1] = HW_AVAILCPU;
435 sysctl(nm, 2, &count, &len, NULL, 0);
436 if (count < 1)
437 return -1;
438 }
439 return count;
440}
441#elif defined(__MVS__)
442static int computeHostNumPhysicalCores() {
443 enum {
444 // Byte offset of the pointer to the Communications Vector Table (CVT) in
445 // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
446 // will be zero-extended to uintptr_t.
447 FLCCVT = 16,
448 // Byte offset of the pointer to the Common System Data Area (CSD) in the
449 // CVT. The table entry is a 31-bit pointer and will be zero-extended to
450 // uintptr_t.
451 CVTCSD = 660,
452 // Byte offset to the number of live CPs in the LPAR, stored as a signed
453 // 32-bit value in the table.
454 CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
455 };
456 char *PSA = 0;
457 char *CVT = reinterpret_cast<char *>(
458 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
459 char *CSD = reinterpret_cast<char *>(
460 static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
461 return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
462}
463#else
464// On other systems, return -1 to indicate unknown.
465static int computeHostNumPhysicalCores() { return -1; }
466#endif
467
469 static int NumCores = computeHostNumPhysicalCores();
470 return NumCores;
471}
static constexpr unsigned long long mask(BlockVerifier::State S)
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallString class.
This file defines the SmallVector class.
#define error(X)
static void ReportErrnumFatal(const char *Msg, int errnum)
Definition Unix.h:63
Represents either an error or a value T.
Definition ErrorOr.h:56
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileAsStream(const Twine &Filename)
Read all of the specified file into a MemoryBuffer as a stream (i.e.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
LLVM_ABI void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
Definition Threading.cpp:41
LLVM_ABI uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
Definition Threading.cpp:35
LLVM_ABI SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
LLVM_ABI unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
LLVM_ABI void set_thread_name(const Twine &Name)
Set the name of the current thread.
Definition Threading.cpp:37
SetThreadPriorityResult
Definition Threading.h:285
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
Definition Threading.cpp:39
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
Definition Threading.cpp:49
LLVM_ABI uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
Definition Threading.cpp:33
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27