LLVM  13.0.0git
Threading.h
Go to the documentation of this file.
1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares helper functions for running LLVM in a multi-threaded
10 // environment.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
16 
17 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/Compiler.h"
23 #include <ciso646> // So we can check the C++ standard lib macros.
24 #include <functional>
25 
26 #if defined(_MSC_VER)
27 // MSVC's call_once implementation worked since VS 2015, which is the minimum
28 // supported version as of this writing.
29 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
30 #elif defined(LLVM_ON_UNIX) && \
31  (defined(_LIBCPP_VERSION) || \
32  !(defined(__NetBSD__) || defined(__OpenBSD__) || \
33  (defined(__ppc__) || defined(__PPC__))))
34 // std::call_once from libc++ is used on all Unix platforms. Other
35 // implementations like libstdc++ are known to have problems on NetBSD,
36 // OpenBSD and PowerPC.
37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
38 #elif defined(LLVM_ON_UNIX) && \
39  ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
40 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
41 #else
42 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
43 #endif
44 
45 #if LLVM_THREADING_USE_STD_CALL_ONCE
46 #include <mutex>
47 #else
48 #include "llvm/Support/Atomic.h"
49 #endif
50 
51 namespace llvm {
52 class Twine;
53 
54 /// Returns true if LLVM is compiled with support for multi-threading, and
55 /// false otherwise.
57 
58 /// Execute the given \p UserFn on a separate thread, passing it the provided \p
59 /// UserData and waits for thread completion.
60 ///
61 /// This function does not guarantee that the code will actually be executed
62 /// on a separate thread or honoring the requested stack size, but tries to do
63 /// so where system support is available.
64 ///
65 /// \param UserFn - The callback to execute.
66 /// \param UserData - An argument to pass to the callback function.
67 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
68 /// (or None for default)
70  void (*UserFn)(void *), void *UserData,
71  llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
72 
73 /// Schedule the given \p Func for execution on a separate thread, then return
74 /// to the caller immediately. Roughly equivalent to
75 /// `std::thread(Func).detach()`, except it allows requesting a specific stack
76 /// size, if supported for the platform.
77 ///
78 /// This function would report a fatal error if it can't execute the code
79 /// on a separate thread.
80 ///
81 /// \param Func - The callback to execute.
82 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack
83 /// (or None for default)
85  llvm::unique_function<void()> Func,
86  llvm::Optional<unsigned> StackSizeInBytes = llvm::None);
87 
88 #if LLVM_THREADING_USE_STD_CALL_ONCE
89 
91 
92 #else
93 
94  enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
95 
96  /// The llvm::once_flag structure
97  ///
98  /// This type is modeled after std::once_flag to use with llvm::call_once.
99  /// This structure must be used as an opaque object. It is a struct to force
100  /// autoinitialization and behave like std::once_flag.
101  struct once_flag {
102  volatile sys::cas_flag status = Uninitialized;
103  };
104 
105 #endif
106 
107  /// Execute the function specified as a parameter once.
108  ///
109  /// Typical usage:
110  /// \code
111  /// void foo() {...};
112  /// ...
113  /// static once_flag flag;
114  /// call_once(flag, foo);
115  /// \endcode
116  ///
117  /// \param flag Flag used for tracking whether or not this has run.
118  /// \param F Function to call once.
119  template <typename Function, typename... Args>
120  void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
121 #if LLVM_THREADING_USE_STD_CALL_ONCE
122  std::call_once(flag, std::forward<Function>(F),
123  std::forward<Args>(ArgList)...);
124 #else
125  // For other platforms we use a generic (if brittle) version based on our
126  // atomics.
127  sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
128  if (old_val == Uninitialized) {
129  std::forward<Function>(F)(std::forward<Args>(ArgList)...);
132  TsanHappensBefore(&flag.status);
133  flag.status = Done;
135  } else {
136  // Wait until any thread doing the call has finished.
137  sys::cas_flag tmp = flag.status;
139  while (tmp != Done) {
140  tmp = flag.status;
142  }
143  }
144  TsanHappensAfter(&flag.status);
145 #endif
146  }
147 
148  /// This tells how a thread pool will be used
150  public:
151  // The default value (0) means all available threads should be used,
152  // taking the affinity mask into account. If set, this value only represents
153  // a suggested high bound, the runtime might choose a lower value (not
154  // higher).
155  unsigned ThreadsRequested = 0;
156 
157  // If SMT is active, use hyper threads. If false, there will be only one
158  // std::thread per core.
159  bool UseHyperThreads = true;
160 
161  // If set, will constrain 'ThreadsRequested' to the number of hardware
162  // threads, or hardware cores.
163  bool Limit = false;
164 
165  /// Retrieves the max available threads for the current strategy. This
166  /// accounts for affinity masks and takes advantage of all CPU sockets.
167  unsigned compute_thread_count() const;
168 
169  /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
170  /// multi-socket system, this ensures threads are assigned to all CPU
171  /// sockets. \p ThreadPoolNum represents a number bounded by [0,
172  /// compute_thread_count()).
173  void apply_thread_strategy(unsigned ThreadPoolNum) const;
174 
175  /// Finds the CPU socket where a thread should go. Returns 'None' if the
176  /// thread shall remain on the actual CPU socket.
177  Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
178  };
179 
180  /// Build a strategy from a number of threads as a string provided in \p Num.
181  /// When Num is above the max number of threads specified by the \p Default
182  /// strategy, we attempt to equally allocate the threads on all CPU sockets.
183  /// "0" or an empty string will return the \p Default strategy.
184  /// "all" for using all hardware threads.
187 
188  /// Returns a thread strategy for tasks requiring significant memory or other
189  /// resources. To be used for workloads where hardware_concurrency() proves to
190  /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
191  /// based on physical cores, if available for the host system, otherwise falls
192  /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
193  /// LLVM_ENABLE_THREADS = OFF.
194  inline ThreadPoolStrategy
195  heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
197  S.UseHyperThreads = false;
198  S.ThreadsRequested = ThreadCount;
199  return S;
200  }
201 
202  /// Like heavyweight_hardware_concurrency() above, but builds a strategy
203  /// based on the rules described for get_threadpool_strategy().
204  /// If \p Num is invalid, returns a default strategy where one thread per
205  /// hardware core is used.
209  if (S)
210  return *S;
212  }
213 
214  /// Returns a default thread strategy where all available hardware resources
215  /// are to be used, except for those initially excluded by an affinity mask.
216  /// This function takes affinity into consideration. Returns 1 when LLVM is
217  /// configured with LLVM_ENABLE_THREADS=OFF.
218  inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
220  S.ThreadsRequested = ThreadCount;
221  return S;
222  }
223 
224  /// Returns an optimal thread strategy to execute specified amount of tasks.
225  /// This strategy should prevent us from creating too many threads if we
226  /// occasionaly have an unexpectedly small amount of tasks.
227  inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
229  S.Limit = true;
230  S.ThreadsRequested = TaskCount;
231  return S;
232  }
233 
234  /// Return the current thread id, as used in various OS system calls.
235  /// Note that not all platforms guarantee that the value returned will be
236  /// unique across the entire system, so portable code should not assume
237  /// this.
238  uint64_t get_threadid();
239 
240  /// Get the maximum length of a thread name on this platform.
241  /// A value of 0 means there is no limit.
243 
244  /// Set the name of the current thread. Setting a thread's name can
245  /// be helpful for enabling useful diagnostics under a debugger or when
246  /// logging. The level of support for setting a thread's name varies
247  /// wildly across operating systems, and we only make a best effort to
248  /// perform the operation on supported platforms. No indication of success
249  /// or failure is returned.
250  void set_thread_name(const Twine &Name);
251 
252  /// Get the name of the current thread. The level of support for
253  /// getting a thread's name varies wildly across operating systems, and it
254  /// is not even guaranteed that if you can successfully set a thread's name
255  /// that you can later get it back. This function is intended for diagnostic
256  /// purposes, and as with setting a thread's name no indication of whether
257  /// the operation succeeded or failed is returned.
258  void get_thread_name(SmallVectorImpl<char> &Name);
259 
260  /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
261  /// group, the calling thread can be executed. On Windows, threads cannot
262  /// cross CPU sockets boundaries.
264 
265  /// Returns how many physical CPUs or NUMA groups the system has.
266  unsigned get_cpus();
267 
268  enum class ThreadPriority {
269  Background = 0,
270  Default = 1,
271  };
272  /// If priority is Background tries to lower current threads priority such
273  /// that it does not affect foreground tasks significantly. Can be used for
274  /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
275  /// this task.
276  /// If the priority is default tries to restore current threads priority to
277  /// default scheduling priority.
280 }
281 
282 #endif
llvm::ThreadPriority::Default
@ Default
llvm::ThreadPoolStrategy::compute_thread_count
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition: Threading.cpp:86
TsanIgnoreWritesBegin
#define TsanIgnoreWritesBegin()
Definition: Compiler.h:480
llvm::hardware_concurrency
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition: Threading.h:218
llvm
Definition: AllocatorList.h:23
Atomic.h
llvm::sys::MemoryFence
void MemoryFence()
Definition: Atomic.cpp:30
FunctionExtras.h
llvm::ThreadPoolStrategy::UseHyperThreads
bool UseHyperThreads
Definition: Threading.h:159
llvm::unique_function
unique_function is a type-erasing functor similar to std::function.
Definition: FunctionExtras.h:55
llvm::Function
Definition: Function.h:61
StringRef.h
llvm::ThreadPoolStrategy
This tells how a thread pool will be used.
Definition: Threading.h:149
llvm::heavyweight_hardware_concurrency
ThreadPoolStrategy heavyweight_hardware_concurrency(unsigned ThreadCount=0)
Returns a thread strategy for tasks requiring significant memory or other resources.
Definition: Threading.h:195
llvm::Optional< unsigned >
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::sys::CompareAndSwap
cas_flag CompareAndSwap(volatile cas_flag *ptr, cas_flag new_value, cas_flag old_value)
Definition: Atomic.cpp:44
llvm::llvm_execute_on_thread
void llvm_execute_on_thread(void(*UserFn)(void *), void *UserData, llvm::Optional< unsigned > StackSizeInBytes=llvm::None)
Execute the given UserFn on a separate thread, passing it the provided UserData and waits for thread ...
Definition: Threading.cpp:117
llvm::get_threadid
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
llvm::ThreadPriority::Background
@ Background
llvm::optimal_concurrency
ThreadPoolStrategy optimal_concurrency(unsigned TaskCount=0)
Returns an optimal thread strategy to execute specified amount of tasks.
Definition: Threading.h:227
llvm::get_max_thread_name_length
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
BitVector.h
llvm::get_thread_name
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
llvm::ThreadPoolStrategy::apply_thread_strategy
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
llvm::BitVector
Definition: BitVector.h:74
llvm::None
const NoneType None
Definition: None.h:23
llvm::SetThreadPriorityResult::FAILURE
@ FAILURE
llvm::ThreadPriority
ThreadPriority
Definition: Threading.h:268
llvm::get_threadpool_strategy
Optional< ThreadPoolStrategy > get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default={})
Build a strategy from a number of threads as a string provided in Num.
Definition: Threading.cpp:136
llvm::ThreadPoolStrategy::ThreadsRequested
unsigned ThreadsRequested
Definition: Threading.h:155
TsanHappensAfter
#define TsanHappensAfter(cv)
Definition: Compiler.h:479
llvm::ThreadPoolStrategy::compute_cpu_socket
Optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
TsanHappensBefore
#define TsanHappensBefore(cv)
Definition: Compiler.h:478
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
uint32_t
Compiler.h
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:90
llvm::SetThreadPriorityResult::SUCCESS
@ SUCCESS
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:120
TsanIgnoreWritesEnd
#define TsanIgnoreWritesEnd()
Definition: Compiler.h:481
llvm::set_thread_name
void set_thread_name(const Twine &Name)
Set the name of the current thread.
llvm::ThreadPoolStrategy::Limit
bool Limit
Definition: Threading.h:163
llvm::get_cpus
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
llvm::SetThreadPriorityResult
SetThreadPriorityResult
If priority is Background tries to lower current threads priority such that it does not affect foregr...
Definition: Threading.h:278
llvm::llvm_is_multithreaded
bool llvm_is_multithreaded()
Returns true if LLVM is compiled with support for multi-threading, and false otherwise.
Definition: Threading.cpp:31
SmallVector.h
llvm::llvm_execute_on_thread_async
void llvm_execute_on_thread_async(llvm::unique_function< void()> Func, llvm::Optional< unsigned > StackSizeInBytes=llvm::None)
Schedule the given Func for execution on a separate thread, then return to the caller immediately.
Definition: Threading.cpp:125
llvm::sys::fs::status
std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
llvm::get_thread_affinity_mask
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
llvm::sys::Wait
ProcessInfo Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilTerminates, std::string *ErrMsg=nullptr, Optional< ProcessStatistics > *ProcStat=nullptr)
This function waits for the process specified by PI to finish.
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::set_thread_priority
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
llvm::sys::cas_flag
uint32_t cas_flag
Definition: Atomic.h:34
llvm::codeview::PublicSymFlags::Function
@ Function