LLVM  14.0.0git
Threading.h
Go to the documentation of this file.
1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares helper functions for running LLVM in a multi-threaded
10 // environment.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
16 
17 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
22 #include "llvm/Support/Compiler.h"
23 #include <ciso646> // So we can check the C++ standard lib macros.
24 #include <functional>
25 
26 #if defined(_MSC_VER)
27 // MSVC's call_once implementation worked since VS 2015, which is the minimum
28 // supported version as of this writing.
29 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
30 #elif defined(LLVM_ON_UNIX) && \
31  (defined(_LIBCPP_VERSION) || \
32  !(defined(__NetBSD__) || defined(__OpenBSD__) || \
33  (defined(__ppc__) || defined(__PPC__))))
34 // std::call_once from libc++ is used on all Unix platforms. Other
35 // implementations like libstdc++ are known to have problems on NetBSD,
36 // OpenBSD and PowerPC.
37 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
38 #elif defined(LLVM_ON_UNIX) && \
39  ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__))
40 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
41 #else
42 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
43 #endif
44 
45 #if LLVM_THREADING_USE_STD_CALL_ONCE
46 #include <mutex>
47 #else
48 #include "llvm/Support/Atomic.h"
49 #endif
50 
51 namespace llvm {
52 class Twine;
53 
54 /// Returns true if LLVM is compiled with support for multi-threading, and
55 /// false otherwise.
57 
58 #if LLVM_THREADING_USE_STD_CALL_ONCE
59 
61 
62 #else
63 
64  enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
65 
66  /// The llvm::once_flag structure
67  ///
68  /// This type is modeled after std::once_flag to use with llvm::call_once.
69  /// This structure must be used as an opaque object. It is a struct to force
70  /// autoinitialization and behave like std::once_flag.
71  struct once_flag {
72  volatile sys::cas_flag status = Uninitialized;
73  };
74 
75 #endif
76 
77  /// Execute the function specified as a parameter once.
78  ///
79  /// Typical usage:
80  /// \code
81  /// void foo() {...};
82  /// ...
83  /// static once_flag flag;
84  /// call_once(flag, foo);
85  /// \endcode
86  ///
87  /// \param flag Flag used for tracking whether or not this has run.
88  /// \param F Function to call once.
89  template <typename Function, typename... Args>
90  void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
91 #if LLVM_THREADING_USE_STD_CALL_ONCE
92  std::call_once(flag, std::forward<Function>(F),
93  std::forward<Args>(ArgList)...);
94 #else
95  // For other platforms we use a generic (if brittle) version based on our
96  // atomics.
97  sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
98  if (old_val == Uninitialized) {
99  std::forward<Function>(F)(std::forward<Args>(ArgList)...);
102  TsanHappensBefore(&flag.status);
103  flag.status = Done;
105  } else {
106  // Wait until any thread doing the call has finished.
107  sys::cas_flag tmp = flag.status;
109  while (tmp != Done) {
110  tmp = flag.status;
112  }
113  }
114  TsanHappensAfter(&flag.status);
115 #endif
116  }
117 
118  /// This tells how a thread pool will be used
120  public:
121  // The default value (0) means all available threads should be used,
122  // taking the affinity mask into account. If set, this value only represents
123  // a suggested high bound, the runtime might choose a lower value (not
124  // higher).
125  unsigned ThreadsRequested = 0;
126 
127  // If SMT is active, use hyper threads. If false, there will be only one
128  // std::thread per core.
129  bool UseHyperThreads = true;
130 
131  // If set, will constrain 'ThreadsRequested' to the number of hardware
132  // threads, or hardware cores.
133  bool Limit = false;
134 
135  /// Retrieves the max available threads for the current strategy. This
136  /// accounts for affinity masks and takes advantage of all CPU sockets.
137  unsigned compute_thread_count() const;
138 
139  /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
140  /// multi-socket system, this ensures threads are assigned to all CPU
141  /// sockets. \p ThreadPoolNum represents a number bounded by [0,
142  /// compute_thread_count()).
143  void apply_thread_strategy(unsigned ThreadPoolNum) const;
144 
145  /// Finds the CPU socket where a thread should go. Returns 'None' if the
146  /// thread shall remain on the actual CPU socket.
147  Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
148  };
149 
150  /// Build a strategy from a number of threads as a string provided in \p Num.
151  /// When Num is above the max number of threads specified by the \p Default
152  /// strategy, we attempt to equally allocate the threads on all CPU sockets.
153  /// "0" or an empty string will return the \p Default strategy.
154  /// "all" for using all hardware threads.
157 
158  /// Returns a thread strategy for tasks requiring significant memory or other
159  /// resources. To be used for workloads where hardware_concurrency() proves to
160  /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
161  /// based on physical cores, if available for the host system, otherwise falls
162  /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
163  /// LLVM_ENABLE_THREADS = OFF.
164  inline ThreadPoolStrategy
165  heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
167  S.UseHyperThreads = false;
168  S.ThreadsRequested = ThreadCount;
169  return S;
170  }
171 
172  /// Like heavyweight_hardware_concurrency() above, but builds a strategy
173  /// based on the rules described for get_threadpool_strategy().
174  /// If \p Num is invalid, returns a default strategy where one thread per
175  /// hardware core is used.
179  if (S)
180  return *S;
182  }
183 
184  /// Returns a default thread strategy where all available hardware resources
185  /// are to be used, except for those initially excluded by an affinity mask.
186  /// This function takes affinity into consideration. Returns 1 when LLVM is
187  /// configured with LLVM_ENABLE_THREADS=OFF.
188  inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
190  S.ThreadsRequested = ThreadCount;
191  return S;
192  }
193 
194  /// Returns an optimal thread strategy to execute specified amount of tasks.
195  /// This strategy should prevent us from creating too many threads if we
196  /// occasionaly have an unexpectedly small amount of tasks.
197  inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
199  S.Limit = true;
200  S.ThreadsRequested = TaskCount;
201  return S;
202  }
203 
204  /// Return the current thread id, as used in various OS system calls.
205  /// Note that not all platforms guarantee that the value returned will be
206  /// unique across the entire system, so portable code should not assume
207  /// this.
209 
210  /// Get the maximum length of a thread name on this platform.
211  /// A value of 0 means there is no limit.
213 
214  /// Set the name of the current thread. Setting a thread's name can
215  /// be helpful for enabling useful diagnostics under a debugger or when
216  /// logging. The level of support for setting a thread's name varies
217  /// wildly across operating systems, and we only make a best effort to
218  /// perform the operation on supported platforms. No indication of success
219  /// or failure is returned.
220  void set_thread_name(const Twine &Name);
221 
222  /// Get the name of the current thread. The level of support for
223  /// getting a thread's name varies wildly across operating systems, and it
224  /// is not even guaranteed that if you can successfully set a thread's name
225  /// that you can later get it back. This function is intended for diagnostic
226  /// purposes, and as with setting a thread's name no indication of whether
227  /// the operation succeeded or failed is returned.
228  void get_thread_name(SmallVectorImpl<char> &Name);
229 
230  /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
231  /// group, the calling thread can be executed. On Windows, threads cannot
232  /// cross CPU sockets boundaries.
234 
235  /// Returns how many physical CPUs or NUMA groups the system has.
236  unsigned get_cpus();
237 
238  enum class ThreadPriority {
239  Background = 0,
240  Default = 1,
241  };
242  /// If priority is Background tries to lower current threads priority such
243  /// that it does not affect foreground tasks significantly. Can be used for
244  /// long-running, latency-insensitive tasks to make sure cpu is not hogged by
245  /// this task.
246  /// If the priority is default tries to restore current threads priority to
247  /// default scheduling priority.
250 }
251 
252 #endif
llvm::ThreadPriority::Default
@ Default
llvm::ThreadPoolStrategy::compute_thread_count
unsigned compute_thread_count() const
Retrieves the max available threads for the current strategy.
Definition: Threading.cpp:60
TsanIgnoreWritesBegin
#define TsanIgnoreWritesBegin()
Definition: Compiler.h:472
llvm::hardware_concurrency
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition: Threading.h:188
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Atomic.h
llvm::sys::MemoryFence
void MemoryFence()
Definition: Atomic.cpp:30
FunctionExtras.h
llvm::ThreadPoolStrategy::UseHyperThreads
bool UseHyperThreads
Definition: Threading.h:129
llvm::Function
Definition: Function.h:61
StringRef.h
llvm::ThreadPoolStrategy
This tells how a thread pool will be used.
Definition: Threading.h:119
llvm::heavyweight_hardware_concurrency
ThreadPoolStrategy heavyweight_hardware_concurrency(unsigned ThreadCount=0)
Returns a thread strategy for tasks requiring significant memory or other resources.
Definition: Threading.h:165
llvm::Optional< unsigned >
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::sys::CompareAndSwap
cas_flag CompareAndSwap(volatile cas_flag *ptr, cas_flag new_value, cas_flag old_value)
Definition: Atomic.cpp:44
llvm::get_threadid
uint64_t get_threadid()
Return the current thread id, as used in various OS system calls.
llvm::ThreadPriority::Background
@ Background
llvm::optimal_concurrency
ThreadPoolStrategy optimal_concurrency(unsigned TaskCount=0)
Returns an optimal thread strategy to execute specified amount of tasks.
Definition: Threading.h:197
llvm::get_max_thread_name_length
uint32_t get_max_thread_name_length()
Get the maximum length of a thread name on this platform.
BitVector.h
llvm::get_thread_name
void get_thread_name(SmallVectorImpl< char > &Name)
Get the name of the current thread.
llvm::ThreadPoolStrategy::apply_thread_strategy
void apply_thread_strategy(unsigned ThreadPoolNum) const
Assign the current thread to an ideal hardware CPU or NUMA node.
llvm::BitVector
Definition: BitVector.h:74
llvm::SetThreadPriorityResult::FAILURE
@ FAILURE
uint64_t
llvm::ThreadPriority
ThreadPriority
Definition: Threading.h:238
llvm::get_threadpool_strategy
Optional< ThreadPoolStrategy > get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default={})
Build a strategy from a number of threads as a string provided in Num.
Definition: Threading.cpp:99
llvm::ThreadPoolStrategy::ThreadsRequested
unsigned ThreadsRequested
Definition: Threading.h:125
TsanHappensAfter
#define TsanHappensAfter(cv)
Definition: Compiler.h:471
llvm::ThreadPoolStrategy::compute_cpu_socket
Optional< unsigned > compute_cpu_socket(unsigned ThreadPoolNum) const
Finds the CPU socket where a thread should go.
TsanHappensBefore
#define TsanHappensBefore(cv)
Definition: Compiler.h:470
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
uint32_t
Compiler.h
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::once_flag
std::once_flag once_flag
Definition: Threading.h:60
llvm::SetThreadPriorityResult::SUCCESS
@ SUCCESS
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::call_once
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:90
TsanIgnoreWritesEnd
#define TsanIgnoreWritesEnd()
Definition: Compiler.h:473
llvm::set_thread_name
void set_thread_name(const Twine &Name)
Set the name of the current thread.
llvm::ThreadPoolStrategy::Limit
bool Limit
Definition: Threading.h:133
llvm::get_cpus
unsigned get_cpus()
Returns how many physical CPUs or NUMA groups the system has.
llvm::SetThreadPriorityResult
SetThreadPriorityResult
If priority is Background tries to lower current threads priority such that it does not affect foregr...
Definition: Threading.h:248
llvm::llvm_is_multithreaded
bool llvm_is_multithreaded()
Returns true if LLVM is compiled with support for multi-threading, and false otherwise.
Definition: Threading.cpp:31
SmallVector.h
llvm::sys::fs::status
std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
llvm::get_thread_affinity_mask
llvm::BitVector get_thread_affinity_mask()
Returns a mask that represents on which hardware thread, core, CPU, NUMA group, the calling thread ca...
llvm::sys::Wait
ProcessInfo Wait(const ProcessInfo &PI, unsigned SecondsToWait, bool WaitUntilTerminates, std::string *ErrMsg=nullptr, Optional< ProcessStatistics > *ProcStat=nullptr)
This function waits for the process specified by PI to finish.
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::set_thread_priority
SetThreadPriorityResult set_thread_priority(ThreadPriority Priority)
llvm::sys::cas_flag
uint32_t cas_flag
Definition: Atomic.h:34
llvm::codeview::PublicSymFlags::Function
@ Function