LLVM  16.0.0git
Threading.h
Go to the documentation of this file.
1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares helper functions for running LLVM in a multi-threaded
10 // environment.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
20 #include "llvm/Support/Compiler.h"
21 #include <ciso646> // So we can check the C++ standard lib macros.
22 #include <optional>
23 
24 #if defined(_MSC_VER)
25 // MSVC's call_once implementation worked since VS 2015, which is the minimum
26 // supported version as of this writing.
27 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
28 #elif defined(LLVM_ON_UNIX) && \
29  (defined(_LIBCPP_VERSION) || \
30  !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__)))
31 // std::call_once from libc++ is used on all Unix platforms. Other
32 // implementations like libstdc++ are known to have problems on NetBSD,
33 // OpenBSD and PowerPC.
34 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
35 #elif defined(LLVM_ON_UNIX) && \
36  (defined(__powerpc__) && defined(__LITTLE_ENDIAN__))
37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
38 #else
39 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
40 #endif
41 
42 #if LLVM_THREADING_USE_STD_CALL_ONCE
43 #include <mutex>
44 #else
45 #include "llvm/Support/Atomic.h"
46 #endif
47 
48 namespace llvm {
49 class Twine;
50 
51 /// Returns true if LLVM is compiled with support for multi-threading, and
52 /// false otherwise.
53 constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
54 
55 #if LLVM_THREADING_USE_STD_CALL_ONCE
56 
58 
59 #else
60 
61  enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
62 
63  /// The llvm::once_flag structure
64  ///
65  /// This type is modeled after std::once_flag to use with llvm::call_once.
66  /// This structure must be used as an opaque object. It is a struct to force
67  /// autoinitialization and behave like std::once_flag.
68  struct once_flag {
69  volatile sys::cas_flag status = Uninitialized;
70  };
71 
72 #endif
73 
74  /// Execute the function specified as a parameter once.
75  ///
76  /// Typical usage:
77  /// \code
78  /// void foo() {...};
79  /// ...
80  /// static once_flag flag;
81  /// call_once(flag, foo);
82  /// \endcode
83  ///
84  /// \param flag Flag used for tracking whether or not this has run.
85  /// \param F Function to call once.
86  template <typename Function, typename... Args>
87  void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
88 #if LLVM_THREADING_USE_STD_CALL_ONCE
89  std::call_once(flag, std::forward<Function>(F),
90  std::forward<Args>(ArgList)...);
91 #else
92  // For other platforms we use a generic (if brittle) version based on our
93  // atomics.
94  sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
95  if (old_val == Uninitialized) {
96  std::forward<Function>(F)(std::forward<Args>(ArgList)...);
99  TsanHappensBefore(&flag.status);
100  flag.status = Done;
102  } else {
103  // Wait until any thread doing the call has finished.
104  sys::cas_flag tmp = flag.status;
106  while (tmp != Done) {
107  tmp = flag.status;
109  }
110  }
111  TsanHappensAfter(&flag.status);
112 #endif
113  }
114 
115  /// This tells how a thread pool will be used
117  public:
118  // The default value (0) means all available threads should be used,
119  // taking the affinity mask into account. If set, this value only represents
120  // a suggested high bound, the runtime might choose a lower value (not
121  // higher).
122  unsigned ThreadsRequested = 0;
123 
124  // If SMT is active, use hyper threads. If false, there will be only one
125  // std::thread per core.
126  bool UseHyperThreads = true;
127 
128  // If set, will constrain 'ThreadsRequested' to the number of hardware
129  // threads, or hardware cores.
130  bool Limit = false;
131 
132  /// Retrieves the max available threads for the current strategy. This
133  /// accounts for affinity masks and takes advantage of all CPU sockets.
134  unsigned compute_thread_count() const;
135 
136  /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
137  /// multi-socket system, this ensures threads are assigned to all CPU
138  /// sockets. \p ThreadPoolNum represents a number bounded by [0,
139  /// compute_thread_count()).
140  void apply_thread_strategy(unsigned ThreadPoolNum) const;
141 
142  /// Finds the CPU socket where a thread should go. Returns 'None' if the
143  /// thread shall remain on the actual CPU socket.
144  std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
145  };
146 
147  /// Build a strategy from a number of threads as a string provided in \p Num.
148  /// When Num is above the max number of threads specified by the \p Default
149  /// strategy, we attempt to equally allocate the threads on all CPU sockets.
150  /// "0" or an empty string will return the \p Default strategy.
151  /// "all" for using all hardware threads.
152  std::optional<ThreadPoolStrategy>
154 
155  /// Returns a thread strategy for tasks requiring significant memory or other
156  /// resources. To be used for workloads where hardware_concurrency() proves to
157  /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
158  /// based on physical cores, if available for the host system, otherwise falls
159  /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
160  /// LLVM_ENABLE_THREADS = OFF.
161  inline ThreadPoolStrategy
162  heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
164  S.UseHyperThreads = false;
165  S.ThreadsRequested = ThreadCount;
166  return S;
167  }
168 
169  /// Like heavyweight_hardware_concurrency() above, but builds a strategy
170  /// based on the rules described for get_threadpool_strategy().
171  /// If \p Num is invalid, returns a default strategy where one thread per
172  /// hardware core is used.
174  std::optional<ThreadPoolStrategy> S =
176  if (S)
177  return *S;
179  }
180 
181  /// Returns a default thread strategy where all available hardware resources
182  /// are to be used, except for those initially excluded by an affinity mask.
183  /// This function takes affinity into consideration. Returns 1 when LLVM is
184  /// configured with LLVM_ENABLE_THREADS=OFF.
185  inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
187  S.ThreadsRequested = ThreadCount;
188  return S;
189  }
190 
191  /// Returns an optimal thread strategy to execute specified amount of tasks.
192  /// This strategy should prevent us from creating too many threads if we
193  /// occasionaly have an unexpectedly small amount of tasks.
194  inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
196  S.Limit = true;
197  S.ThreadsRequested = TaskCount;
198  return S;
199  }
200 
201  /// Return the current thread id, as used in various OS system calls.
202  /// Note that not all platforms guarantee that the value returned will be
203  /// unique across the entire system, so portable code should not assume
204  /// this.
206 
207  /// Get the maximum length of a thread name on this platform.
208  /// A value of 0 means there is no limit.
210 
211  /// Set the name of the current thread. Setting a thread's name can
212  /// be helpful for enabling useful diagnostics under a debugger or when
213  /// logging. The level of support for setting a thread's name varies
214  /// wildly across operating systems, and we only make a best effort to
215  /// perform the operation on supported platforms. No indication of success
216  /// or failure is returned.
217  void set_thread_name(const Twine &Name);
218 
219  /// Get the name of the current thread. The level of support for
220  /// getting a thread's name varies wildly across operating systems, and it
221  /// is not even guaranteed that if you can successfully set a thread's name
222  /// that you can later get it back. This function is intended for diagnostic
223  /// purposes, and as with setting a thread's name no indication of whether
224  /// the operation succeeded or failed is returned.
225  void get_thread_name(SmallVectorImpl<char> &Name);
226 
227  /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
228  /// group, the calling thread can be executed. On Windows, threads cannot
229  /// cross CPU sockets boundaries.
231 
232  /// Returns how many physical CPUs or NUMA groups the system has.
233  unsigned get_cpus();
234 
235  /// Returns how many physical cores (as opposed to logical cores returned from
236  /// thread::hardware_concurrency(), which includes hyperthreads).
237  /// Returns -1 if unknown for the current host system.
238  int get_physical_cores();
239 
240  enum class ThreadPriority {
241  /// Lower the current thread's priority as much as possible. Can be used
242  /// for long-running tasks that are not time critical; more energy-
243  /// efficient than Low.
244  Background = 0,
245 
246  /// Lower the current thread's priority such that it does not affect
247  /// foreground tasks significantly. This is a good default for long-
248  /// running, latency-insensitive tasks to make sure cpu is not hogged
249  /// by this task.
250  Low = 1,
251 
252  /// Restore the current thread's priority to default scheduling priority.
253  Default = 2,
254  };
257 }
258 
259 #endif
llvm::ThreadPriority::Default
@ Default
Restore the current thread's priority to default scheduling priority.
llvm::ThreadPoolStrategy::compute_thread_count
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition: Threading.cpp:55
TsanIgnoreWritesBegin
#define TsanIgnoreWritesBegin()
Definition: Compiler.h:473
llvm::hardware_concurrency
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition: Threading.h:185
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Atomic.h
llvm::sys::MemoryFence
void MemoryFence()
Definition: Atomic.cpp:30
llvm::ThreadPoolStrategy::UseHyperThreads
bool UseHyperThreads
Definition: Threading.h:126
llvm::Function
Definition: Function.h:60
StringRef.h
llvm::get_threadpool_strategy
std::optional< ThreadPoolStrategy > get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default={})
Build a strategy from a number of threads as a string provided in Num.
Definition: Threading.cpp:94
llvm::ThreadPoolStrategy
This tells how a thread pool will be used.
Definition: Threading.h:116
llvm::heavyweight_hardware_concurrency
ThreadPoolStrategy heavyweight_hardware_concurrency(unsigned ThreadCount=0)
Returns a thread strategy for tasks requiring significant memory or other resources.
Definition: Threading.h:162
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::sys::CompareAndSwap
cas_flag CompareAndSwap(volatile cas_flag *ptr, cas_flag new_value, cas_flag old_value)
Definition: Atomic.cpp:44
llvm::get_threadid
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
llvm::ThreadPriority::Background
@ Background
Lower the current thread's priority as much as possible.
llvm::optimal_concurrency
ThreadPoolStrategy optimal_concurrency(unsigned TaskCount=0)
Returns an optimal thread strategy to execute specified amount of tasks.
Definition: Threading.h:194
llvm::get_max_thread_name_length
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
llvm::ThreadPriority::Low
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
BitVector.h
llvm::get_thread_name
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
llvm::ThreadPoolStrategy::apply_thread_strategy
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
llvm::BitVector
Definition: BitVector.h:75
llvm::SetThreadPriorityResult::FAILURE
@ FAILURE
uint64_t
llvm::ThreadPriority
ThreadPriority
Definition: Threading.h:240
llvm::sys::Wait
ProcessInfo Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilTerminates, std::string *ErrMsg=nullptr, std::optional< ProcessStatistics > *ProcStat=nullptr)
This function waits for the process specified by PI to finish.
llvm::ThreadPoolStrategy::ThreadsRequested
unsigned ThreadsRequested
Definition: Threading.h:122
llvm::get_physical_cores
int get_physical_cores()
Returns how many physical cores (as opposed to logical cores returned from thread::hardware_concurren...
llvm::llvm_is_multithreaded
constexpr bool llvm_is_multithreaded()
Returns true if LLVM is compiled with support for multi-threading, and false otherwise.
Definition: Threading.h:53
TsanHappensAfter
#define TsanHappensAfter(cv)
Definition: Compiler.h:472
TsanHappensBefore
#define TsanHappensBefore(cv)
Definition: Compiler.h:471
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint32_t
Compiler.h
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:57
llvm::SetThreadPriorityResult::SUCCESS
@ SUCCESS
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
llvm::ThreadPoolStrategy::compute_cpu_socket
std::optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
llvm::AllocFnKind::Uninitialized
@ Uninitialized
TsanIgnoreWritesEnd
#define TsanIgnoreWritesEnd()
Definition: Compiler.h:474
llvm::set_thread_name
void set_thread_name(const Twine &Name)
Set the name of the current thread.
llvm::ThreadPoolStrategy::Limit
bool Limit
Definition: Threading.h:130
llvm::get_cpus
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
llvm::SetThreadPriorityResult
SetThreadPriorityResult
Definition: Threading.h:255
llvm::sys::fs::status
std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
llvm::get_thread_affinity_mask
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::set_thread_priority
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
llvm::sys::cas_flag
uint32_t cas_flag
Definition: Atomic.h:34
llvm::codeview::PublicSymFlags::Function
@ Function