Bug Summary

File:projects/openmp/runtime/src/kmp_dispatch.cpp
Warning:line 1800, column 5
Dereference of null pointer (loaded from variable 'p_st')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name kmp_dispatch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn338205/include -I /build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/i18n -I /build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/include/50 -I /build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/thirdparty/ittnotify -U NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-comment -Wno-sign-compare -Wno-unused-function -Wno-unused-value -Wno-unused-variable -Wno-switch -Wno-unknown-pragmas -Wno-missing-field-initializers -Wno-missing-braces -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/projects/openmp/runtime/src -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fno-rtti -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-29-043837-17923-1 -x c++ /build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp -faddrsig
1/*
2 * kmp_dispatch.cpp: dynamic scheduling - iteration initialization and dispatch.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14/* Dynamic scheduling initialization and dispatch.
15 *
16 * NOTE: __kmp_nth is a constant inside of any dispatch loop, however
17 * it may change values between parallel regions. __kmp_max_nth
18 * is the largest value __kmp_nth may take, 1 is the smallest.
19 */
20
21// Need to raise Win version from XP to Vista here for support of
22// InterlockedExchange64
23#if defined(_WIN32_WINNT) && defined(_M_IX86)
24#undef _WIN32_WINNT
25#define _WIN32_WINNT 0x0502
26#endif
27
28#include "kmp.h"
29#include "kmp_error.h"
30#include "kmp_i18n.h"
31#include "kmp_itt.h"
32#include "kmp_stats.h"
33#include "kmp_str.h"
34#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
35#include <float.h>
36#endif
37#include "kmp_lock.h"
38#include "kmp_dispatch.h"
39#if KMP_USE_HIER_SCHED0
40#include "kmp_dispatch_hier.h"
41#endif
42
43#if OMPT_SUPPORT1
44#include "ompt-specific.h"
45#endif
46
47/* ------------------------------------------------------------------------ */
48/* ------------------------------------------------------------------------ */
49
50void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
51 kmp_info_t *th;
52
53 KMP_DEBUG_ASSERT(gtid_ref)((gtid_ref) ? 0 : __kmp_debug_assert("gtid_ref", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 53))
;
54
55 if (__kmp_env_consistency_check) {
56 th = __kmp_threads[*gtid_ref];
57 if (th->th.th_root->r.r_active &&
58 (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) {
59#if KMP_USE_DYNAMIC_LOCK1
60 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null, 0);
61#else
62 __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL__null);
63#endif
64 }
65 }
66}
67
68void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
69 kmp_info_t *th;
70
71 if (__kmp_env_consistency_check) {
72 th = __kmp_threads[*gtid_ref];
73 if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) {
74 __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref);
75 }
76 }
77}
78
79// Initialize a dispatch_private_info_template<T> buffer for a particular
80// type of schedule,chunk. The loop description is found in lb (lower bound),
81// ub (upper bound), and st (stride). nproc is the number of threads relevant
82// to the scheduling (often the number of threads in a team, but not always if
83// hierarchical scheduling is used). tid is the id of the thread calling
84// the function within the group of nproc threads. It will have a value
85// between 0 and nproc - 1. This is often just the thread id within a team, but
86// is not necessarily the case when using hierarchical scheduling.
87// loc is the source file location of the corresponding loop
88// gtid is the global thread id
89template <typename T>
90void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
91 dispatch_private_info_template<T> *pr,
92 enum sched_type schedule, T lb, T ub,
93 typename traits_t<T>::signed_t st,
94#if USE_ITT_BUILD1
95 kmp_uint64 *cur_chunk,
96#endif
97 typename traits_t<T>::signed_t chunk,
98 T nproc, T tid) {
99 typedef typename traits_t<T>::unsigned_t UT;
100 typedef typename traits_t<T>::signed_t ST;
101 typedef typename traits_t<T>::floating_t DBL;
102
103 int active;
104 T tc;
105 kmp_info_t *th;
106 kmp_team_t *team;
107
108#ifdef KMP_DEBUG1
109 {
110 char *buff;
111 // create format specifiers before the debug output
112 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d called "
113 "pr:%%p lb:%%%s ub:%%%s st:%%%s "
114 "schedule:%%d chunk:%%%s nproc:%%%s tid:%%%s\n",
115 traits_t<T>::spec, traits_t<T>::spec,
116 traits_t<ST>::spec, traits_t<ST>::spec,
117 traits_t<T>::spec, traits_t<T>::spec);
118 KD_TRACE(10, (buff, gtid, pr, lb, ub, st, schedule, chunk, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, lb, ub, st, schedule, chunk, nproc, tid); }
;
119 __kmp_str_free(&buff);
120 }
121#endif
122 /* setup data */
123 th = __kmp_threads[gtid];
124 team = th->th.th_team;
125 active = !team->t.t_serialized;
126
127#if USE_ITT_BUILD1
128 int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 &&
129 __kmp_forkjoin_frames_mode == 3 &&
130 KMP_MASTER_GTID(gtid)((((((gtid)) >= 0) ? 0 : __kmp_debug_assert("((gtid)) >= 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 130)), __kmp_threads[((gtid))]->th.th_info.ds.ds_tid) ==
0)
&&
131#if OMP_40_ENABLED(50 >= 40)
132 th->th.th_teams_microtask == NULL__null &&
133#endif
134 team->t.t_active_level == 1;
135#endif
136#if (KMP_STATIC_STEAL_ENABLED1)
137 if (SCHEDULE_HAS_NONMONOTONIC(schedule)(((schedule)&kmp_sch_modifier_nonmonotonic) != 0))
138 // AC: we now have only one implementation of stealing, so use it
139 schedule = kmp_sch_static_steal;
140 else
141#endif
142 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule)(enum sched_type)( (schedule) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
143
144 /* Pick up the nomerge/ordered bits from the scheduling type */
145 if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
146 pr->flags.nomerge = TRUE(!0);
147 schedule =
148 (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
149 } else {
150 pr->flags.nomerge = FALSE0;
151 }
152 pr->type_size = traits_t<T>::type_size; // remember the size of variables
153 if (kmp_ord_lower & schedule) {
154 pr->flags.ordered = TRUE(!0);
155 schedule =
156 (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
157 } else {
158 pr->flags.ordered = FALSE0;
159 }
160
161 if (schedule == kmp_sch_static) {
162 schedule = __kmp_static;
163 } else {
164 if (schedule == kmp_sch_runtime) {
165 // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
166 // not specified)
167 schedule = team->t.t_sched.r_sched_type;
168 // Detail the schedule if needed (global controls are differentiated
169 // appropriately)
170 if (schedule == kmp_sch_guided_chunked) {
171 schedule = __kmp_guided;
172 } else if (schedule == kmp_sch_static) {
173 schedule = __kmp_static;
174 }
175 // Use the chunk size specified by OMP_SCHEDULE (or default if not
176 // specified)
177 chunk = team->t.t_sched.chunk;
178#if USE_ITT_BUILD1
179 if (cur_chunk)
180 *cur_chunk = chunk;
181#endif
182#ifdef KMP_DEBUG1
183 {
184 char *buff;
185 // create format specifiers before the debug output
186 buff = __kmp_str_format("__kmp_dispatch_init_algorithm: T#%%d new: "
187 "schedule:%%d chunk:%%%s\n",
188 traits_t<ST>::spec);
189 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
190 __kmp_str_free(&buff);
191 }
192#endif
193 } else {
194 if (schedule == kmp_sch_guided_chunked) {
195 schedule = __kmp_guided;
196 }
197 if (chunk <= 0) {
198 chunk = KMP_DEFAULT_CHUNK1;
199 }
200 }
201
202 if (schedule == kmp_sch_auto) {
203 // mapping and differentiation: in the __kmp_do_serial_initialize()
204 schedule = __kmp_auto;
205#ifdef KMP_DEBUG1
206 {
207 char *buff;
208 // create format specifiers before the debug output
209 buff = __kmp_str_format(
210 "__kmp_dispatch_init_algorithm: kmp_sch_auto: T#%%d new: "
211 "schedule:%%d chunk:%%%s\n",
212 traits_t<ST>::spec);
213 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
214 __kmp_str_free(&buff);
215 }
216#endif
217 }
218
219 /* guided analytical not safe for too many threads */
220 if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
221 schedule = kmp_sch_guided_iterative_chunked;
222 KMP_WARNING(DispatchManyThreads)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_DispatchManyThreads
), __kmp_msg_null)
;
223 }
224#if OMP_45_ENABLED(50 >= 45)
225 if (schedule == kmp_sch_runtime_simd) {
226 // compiler provides simd_width in the chunk parameter
227 schedule = team->t.t_sched.r_sched_type;
228 // Detail the schedule if needed (global controls are differentiated
229 // appropriately)
230 if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
231 schedule == __kmp_static) {
232 schedule = kmp_sch_static_balanced_chunked;
233 } else {
234 if (schedule == kmp_sch_guided_chunked || schedule == __kmp_guided) {
235 schedule = kmp_sch_guided_simd;
236 }
237 chunk = team->t.t_sched.chunk * chunk;
238 }
239#if USE_ITT_BUILD1
240 if (cur_chunk)
241 *cur_chunk = chunk;
242#endif
243#ifdef KMP_DEBUG1
244 {
245 char *buff;
246 // create format specifiers before the debug output
247 buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d"
248 " chunk:%%%s\n",
249 traits_t<ST>::spec);
250 KD_TRACE(10, (buff, gtid, schedule, chunk))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk); }
;
251 __kmp_str_free(&buff);
252 }
253#endif
254 }
255#endif // OMP_45_ENABLED
256 pr->u.p.parm1 = chunk;
257 }
258 KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper),(((kmp_sch_lower < schedule && schedule < kmp_sch_upper
)) ? 0 : __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 259))
259 "unknown scheduling type")(((kmp_sch_lower < schedule && schedule < kmp_sch_upper
)) ? 0 : __kmp_debug_assert(("unknown scheduling type"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 259))
;
260
261 pr->u.p.count = 0;
262
263 if (__kmp_env_consistency_check) {
264 if (st == 0) {
265 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited,
266 (pr->flags.ordered ? ct_pdo_ordered : ct_pdo), loc);
267 }
268 }
269 // compute trip count
270 if (st == 1) { // most common case
271 if (ub >= lb) {
272 tc = ub - lb + 1;
273 } else { // ub < lb
274 tc = 0; // zero-trip
275 }
276 } else if (st < 0) {
277 if (lb >= ub) {
278 // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B),
279 // where the division needs to be unsigned regardless of the result type
280 tc = (UT)(lb - ub) / (-st) + 1;
281 } else { // lb < ub
282 tc = 0; // zero-trip
283 }
284 } else { // st > 0
285 if (ub >= lb) {
286 // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B),
287 // where the division needs to be unsigned regardless of the result type
288 tc = (UT)(ub - lb) / st + 1;
289 } else { // ub < lb
290 tc = 0; // zero-trip
291 }
292 }
293
294 pr->u.p.lb = lb;
295 pr->u.p.ub = ub;
296 pr->u.p.st = st;
297 pr->u.p.tc = tc;
298
299#if KMP_OS_WINDOWS0
300 pr->u.p.last_upper = ub + st;
301#endif /* KMP_OS_WINDOWS */
302
303 /* NOTE: only the active parallel region(s) has active ordered sections */
304
305 if (active) {
306 if (pr->flags.ordered) {
307 pr->ordered_bumped = 0;
308 pr->u.p.ordered_lower = 1;
309 pr->u.p.ordered_upper = 0;
310 }
311 }
312
313 switch (schedule) {
314#if (KMP_STATIC_STEAL_ENABLED1)
315 case kmp_sch_static_steal: {
316 T ntc, init;
317
318 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
319 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
320 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
321
322 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
323 if (nproc > 1 && ntc >= nproc) {
324 KMP_COUNT_BLOCK(OMP_FOR_static_steal)((void)0);
325 T id = tid;
326 T small_chunk, extras;
327
328 small_chunk = ntc / nproc;
329 extras = ntc % nproc;
330
331 init = id * small_chunk + (id < extras ? id : extras);
332 pr->u.p.count = init;
333 pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);
334
335 pr->u.p.parm2 = lb;
336 // pr->pfields.parm3 = 0; // it's not used in static_steal
337 pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
338 pr->u.p.st = st;
339 if (traits_t<T>::type_size > 4) {
340 // AC: TODO: check if 16-byte CAS available and use it to
341 // improve performance (probably wait for explicit request
342 // before spending time on this).
343 // For now use dynamically allocated per-thread lock,
344 // free memory in __kmp_dispatch_next when status==0.
345 KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL)((th->th.th_dispatch->th_steal_lock == __null) ? 0 : __kmp_debug_assert
("th->th.th_dispatch->th_steal_lock == __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 345))
;
346 th->th.th_dispatch->th_steal_lock =
347 (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t))___kmp_allocate((sizeof(kmp_lock_t)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 347)
;
348 __kmp_init_lock(th->th.th_dispatch->th_steal_lock);
349 }
350 break;
351 } else {
352 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
353 "kmp_sch_static_balanced\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
354 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_balanced\n", gtid); }
;
355 schedule = kmp_sch_static_balanced;
356 /* too few iterations: fall-through to kmp_sch_static_balanced */
357 } // if
358 /* FALL-THROUGH to static balanced */
359 } // case
360#endif
361 case kmp_sch_static_balanced: {
362 T init, limit;
363
364 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
365 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
366 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
367 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
368
369 if (nproc > 1) {
370 T id = tid;
371
372 if (tc < nproc) {
373 if (id < tc) {
374 init = id;
375 limit = id;
376 pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */
377 } else {
378 pr->u.p.count = 1; /* means no more chunks to execute */
379 pr->u.p.parm1 = FALSE0;
380 break;
381 }
382 } else {
383 T small_chunk = tc / nproc;
384 T extras = tc % nproc;
385 init = id * small_chunk + (id < extras ? id : extras);
386 limit = init + small_chunk - (id < extras ? 0 : 1);
387 pr->u.p.parm1 = (id == nproc - 1);
388 }
389 } else {
390 if (tc > 0) {
391 init = 0;
392 limit = tc - 1;
393 pr->u.p.parm1 = TRUE(!0);
394 } else {
395 // zero trip count
396 pr->u.p.count = 1; /* means no more chunks to execute */
397 pr->u.p.parm1 = FALSE0;
398 break;
399 }
400 }
401#if USE_ITT_BUILD1
402 // Calculate chunk for metadata report
403 if (itt_need_metadata_reporting)
404 if (cur_chunk)
405 *cur_chunk = limit - init + 1;
406#endif
407 if (st == 1) {
408 pr->u.p.lb = lb + init;
409 pr->u.p.ub = lb + limit;
410 } else {
411 // calculated upper bound, "ub" is user-defined upper bound
412 T ub_tmp = lb + limit * st;
413 pr->u.p.lb = lb + init * st;
414 // adjust upper bound to "ub" if needed, so that MS lastprivate will match
415 // it exactly
416 if (st > 0) {
417 pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp);
418 } else {
419 pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp);
420 }
421 }
422 if (pr->flags.ordered) {
423 pr->u.p.ordered_lower = init;
424 pr->u.p.ordered_upper = limit;
425 }
426 break;
427 } // case
428#if OMP_45_ENABLED(50 >= 45)
429 case kmp_sch_static_balanced_chunked: {
430 // similar to balanced, but chunk adjusted to multiple of simd width
431 T nth = nproc;
432 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
433 " -> falling-through to static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
434 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d runtime(simd:static)"
" -> falling-through to static_greedy\n", gtid); }
;
435 schedule = kmp_sch_static_greedy;
436 if (nth > 1)
437 pr->u.p.parm1 = ((tc + nth - 1) / nth + chunk - 1) & ~(chunk - 1);
438 else
439 pr->u.p.parm1 = tc;
440 break;
441 } // case
442 case kmp_sch_guided_simd:
443#endif // OMP_45_ENABLED
444 case kmp_sch_guided_iterative_chunked: {
445 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
446 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
447 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
448 " case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
449 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_guided_iterative_chunked"
" case\n", gtid); }
;
450
451 if (nproc > 1) {
452 if ((2L * chunk + 1) * nproc >= tc) {
453 /* chunk size too large, switch to dynamic */
454 schedule = kmp_sch_dynamic_chunked;
455 } else {
456 // when remaining iters become less than parm2 - switch to dynamic
457 pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1);
458 *(double *)&pr->u.p.parm3 =
459 guided_flt_param / nproc; // may occupy parm3 and parm4
460 }
461 } else {
462 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
463 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
464 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
465 schedule = kmp_sch_static_greedy;
466 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
467 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
468 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
469 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
470 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
471 pr->u.p.parm1 = tc;
472 } // if
473 } // case
474 break;
475 case kmp_sch_guided_analytical_chunked: {
476 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
477 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
478 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
479
480 if (nproc > 1) {
481 if ((2L * chunk + 1) * nproc >= tc) {
482 /* chunk size too large, switch to dynamic */
483 schedule = kmp_sch_dynamic_chunked;
484 } else {
485 /* commonly used term: (2 nproc - 1)/(2 nproc) */
486 DBL x;
487
488#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
489 /* Linux* OS already has 64-bit computation by default for long double,
490 and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
491 Windows* OS on IA-32 architecture, we need to set precision to 64-bit
492 instead of the default 53-bit. Even though long double doesn't work
493 on Windows* OS on Intel(R) 64, the resulting lack of precision is not
494 expected to impact the correctness of the algorithm, but this has not
495 been mathematically proven. */
496 // save original FPCW and set precision to 64-bit, as
497 // Windows* OS on IA-32 architecture defaults to 53-bit
498 unsigned int oldFpcw = _control87(0, 0);
499 _control87(_PC_64, _MCW_PC); // 0,0x30000
500#endif
501 /* value used for comparison in solver for cross-over point */
502 long double target = ((long double)chunk * 2 + 1) * nproc / tc;
503
504 /* crossover point--chunk indexes equal to or greater than
505 this point switch to dynamic-style scheduling */
506 UT cross;
507
508 /* commonly used term: (2 nproc - 1)/(2 nproc) */
509 x = (long double)1.0 - (long double)0.5 / nproc;
510
511#ifdef KMP_DEBUG1
512 { // test natural alignment
513 struct _test_a {
514 char a;
515 union {
516 char b;
517 DBL d;
518 };
519 } t;
520 ptrdiff_t natural_alignment =
521 (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
522 //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long
523 // long)natural_alignment );
524 KMP_DEBUG_ASSERT((((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0) ? 0 : __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 525))
525 (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0)(((((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment
)) == 0) ? 0 : __kmp_debug_assert("(((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 525))
;
526 }
527#endif // KMP_DEBUG
528
529 /* save the term in thread private dispatch structure */
530 *(DBL *)&pr->u.p.parm3 = x;
531
532 /* solve for the crossover point to the nearest integer i for which C_i
533 <= chunk */
534 {
535 UT left, right, mid;
536 long double p;
537
538 /* estimate initial upper and lower bound */
539
540 /* doesn't matter what value right is as long as it is positive, but
541 it affects performance of the solver */
542 right = 229;
543 p = __kmp_pow<UT>(x, right);
544 if (p > target) {
545 do {
546 p *= p;
547 right <<= 1;
548 } while (p > target && right < (1 << 27));
549 /* lower bound is previous (failed) estimate of upper bound */
550 left = right >> 1;
551 } else {
552 left = 0;
553 }
554
555 /* bisection root-finding method */
556 while (left + 1 < right) {
557 mid = (left + right) / 2;
558 if (__kmp_pow<UT>(x, mid) > target) {
559 left = mid;
560 } else {
561 right = mid;
562 }
563 } // while
564 cross = right;
565 }
566 /* assert sanity of computed crossover point */
567 KMP_ASSERT(cross && __kmp_pow<UT>(x, cross - 1) > target &&((cross && __kmp_pow<UT>(x, cross - 1) > target
&& __kmp_pow<UT>(x, cross) <= target) ? 0 :
__kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 568))
568 __kmp_pow<UT>(x, cross) <= target)((cross && __kmp_pow<UT>(x, cross - 1) > target
&& __kmp_pow<UT>(x, cross) <= target) ? 0 :
__kmp_debug_assert("cross && __kmp_pow<UT>(x, cross - 1) > target && __kmp_pow<UT>(x, cross) <= target"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 568))
;
569
570 /* save the crossover point in thread private dispatch structure */
571 pr->u.p.parm2 = cross;
572
573// C75803
574#if ((KMP_OS_LINUX1 || KMP_OS_WINDOWS0) && KMP_ARCH_X860) && (!defined(KMP_I8))
575#define GUIDED_ANALYTICAL_WORKAROUND(x) (*(DBL *)&pr->u.p.parm3)
576#else
577#define GUIDED_ANALYTICAL_WORKAROUND(x) (x)
578#endif
579 /* dynamic-style scheduling offset */
580 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
581 tc, GUIDED_ANALYTICAL_WORKAROUND(x), cross) -
582 cross * chunk;
583#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
584 // restore FPCW
585 _control87(oldFpcw, _MCW_PC);
586#endif
587 } // if
588 } else {
589 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
590 "kmp_sch_static_greedy\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
591 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d falling-through to "
"kmp_sch_static_greedy\n", gtid); }
;
592 schedule = kmp_sch_static_greedy;
593 /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */
594 pr->u.p.parm1 = tc;
595 } // if
596 } // case
597 break;
598 case kmp_sch_static_greedy:
599 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
600 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
601 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
602 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_greedy case\n"
, gtid); }
;
603 pr->u.p.parm1 = (nproc > 1) ? (tc + nproc - 1) / nproc : tc;
604 break;
605 case kmp_sch_static_chunked:
606 case kmp_sch_dynamic_chunked:
607 if (pr->u.p.parm1 <= 0) {
608 pr->u.p.parm1 = KMP_DEFAULT_CHUNK1;
609 }
610 KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
611 "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
612 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d "
"kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid
); }
;
613 break;
614 case kmp_sch_trapezoidal: {
615 /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */
616
617 T parm1, parm2, parm3, parm4;
618 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
619 ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
620 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
621
622 parm1 = chunk;
623
624 /* F : size of the first cycle */
625 parm2 = (tc / (2 * nproc));
626
627 if (parm2 < 1) {
628 parm2 = 1;
629 }
630
631 /* L : size of the last cycle. Make sure the last cycle is not larger
632 than the first cycle. */
633 if (parm1 < 1) {
634 parm1 = 1;
635 } else if (parm1 > parm2) {
636 parm1 = parm2;
637 }
638
639 /* N : number of cycles */
640 parm3 = (parm2 + parm1);
641 parm3 = (2 * tc + parm3 - 1) / parm3;
642
643 if (parm3 < 2) {
644 parm3 = 2;
645 }
646
647 /* sigma : decreasing incr of the trapezoid */
648 parm4 = (parm3 - 1);
649 parm4 = (parm2 - parm1) / parm4;
650
651 // pointless check, because parm4 >= 0 always
652 // if ( parm4 < 0 ) {
653 // parm4 = 0;
654 //}
655
656 pr->u.p.parm1 = parm1;
657 pr->u.p.parm2 = parm2;
658 pr->u.p.parm3 = parm3;
659 pr->u.p.parm4 = parm4;
660 } // case
661 break;
662
663 default: {
664 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
665 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
666 __kmp_msg_null // Variadic argument list terminator
667 );
668 } break;
669 } // switch
670 pr->schedule = schedule;
671}
672
673#if KMP_USE_HIER_SCHED0
674template <typename T>
675inline void __kmp_dispatch_init_hier_runtime(ident_t *loc, T lb, T ub,
676 typename traits_t<T>::signed_t st);
677template <>
678inline void
679__kmp_dispatch_init_hier_runtime<kmp_int32>(ident_t *loc, kmp_int32 lb,
680 kmp_int32 ub, kmp_int32 st) {
681 __kmp_dispatch_init_hierarchy<kmp_int32>(
682 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
683 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
684}
685template <>
686inline void
687__kmp_dispatch_init_hier_runtime<kmp_uint32>(ident_t *loc, kmp_uint32 lb,
688 kmp_uint32 ub, kmp_int32 st) {
689 __kmp_dispatch_init_hierarchy<kmp_uint32>(
690 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
691 __kmp_hier_scheds.scheds, __kmp_hier_scheds.small_chunks, lb, ub, st);
692}
693template <>
694inline void
695__kmp_dispatch_init_hier_runtime<kmp_int64>(ident_t *loc, kmp_int64 lb,
696 kmp_int64 ub, kmp_int64 st) {
697 __kmp_dispatch_init_hierarchy<kmp_int64>(
698 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
699 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
700}
701template <>
702inline void
703__kmp_dispatch_init_hier_runtime<kmp_uint64>(ident_t *loc, kmp_uint64 lb,
704 kmp_uint64 ub, kmp_int64 st) {
705 __kmp_dispatch_init_hierarchy<kmp_uint64>(
706 loc, __kmp_hier_scheds.size, __kmp_hier_scheds.layers,
707 __kmp_hier_scheds.scheds, __kmp_hier_scheds.large_chunks, lb, ub, st);
708}
709
710// free all the hierarchy scheduling memory associated with the team
711void __kmp_dispatch_free_hierarchies(kmp_team_t *team) {
712 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
713 for (int i = 0; i < num_disp_buff; ++i) {
714 // type does not matter here so use kmp_int32
715 auto sh =
716 reinterpret_cast<dispatch_shared_info_template<kmp_int32> volatile *>(
717 &team->t.t_disp_buffer[i]);
718 if (sh->hier) {
719 sh->hier->deallocate();
720 __kmp_free(sh->hier)___kmp_free((sh->hier), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 720)
;
721 }
722 }
723}
724#endif
725
726// UT - unsigned flavor of T, ST - signed flavor of T,
727// DBL - double if sizeof(T)==4, or long double if sizeof(T)==8
728template <typename T>
729static void
730__kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb,
731 T ub, typename traits_t<T>::signed_t st,
732 typename traits_t<T>::signed_t chunk, int push_ws) {
733 typedef typename traits_t<T>::unsigned_t UT;
734 typedef typename traits_t<T>::signed_t ST;
735 typedef typename traits_t<T>::floating_t DBL;
736
737 int active;
738 kmp_info_t *th;
739 kmp_team_t *team;
740 kmp_uint32 my_buffer_index;
741 dispatch_private_info_template<T> *pr;
742 dispatch_shared_info_template<T> volatile *sh;
743
744 KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template<T>) ==static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
745 sizeof(dispatch_private_info))static_assert(sizeof(dispatch_private_info_template<T>)
== sizeof(dispatch_private_info), "Build condition error")
;
746 KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template<UT>) ==static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
747 sizeof(dispatch_shared_info))static_assert(sizeof(dispatch_shared_info_template<UT>)
== sizeof(dispatch_shared_info), "Build condition error")
;
748
749 if (!TCR_4(__kmp_init_parallel)(__kmp_init_parallel))
750 __kmp_parallel_initialize();
751
752#if INCLUDE_SSC_MARKS(1 && 1)
753 SSC_MARK_DISPATCH_INIT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd696) : "%ebx")
;
754#endif
755#ifdef KMP_DEBUG1
756 {
757 char *buff;
758 // create format specifiers before the debug output
759 buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d "
760 "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
761 traits_t<ST>::spec, traits_t<T>::spec,
762 traits_t<T>::spec, traits_t<ST>::spec);
763 KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, schedule
, chunk, lb, ub, st); }
;
764 __kmp_str_free(&buff);
765 }
766#endif
767 /* setup data */
768 th = __kmp_threads[gtid];
769 team = th->th.th_team;
770 active = !team->t.t_serialized;
771 th->th.th_ident = loc;
772
773#if KMP_USE_HIER_SCHED0
774 // Initialize the scheduling hierarchy if requested in OMP_SCHEDULE envirable
775 // Hierarchical scheduling does not work with ordered, so if ordered is
776 // detected, then revert back to threaded scheduling.
777 bool ordered;
778 enum sched_type my_sched = schedule;
779 my_buffer_index = th->th.th_dispatch->th_disp_index;
780 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
781 &th->th.th_dispatch
782 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
783 my_sched = SCHEDULE_WITHOUT_MODIFIERS(my_sched)(enum sched_type)( (my_sched) & ~(kmp_sch_modifier_nonmonotonic
| kmp_sch_modifier_monotonic))
;
784 if ((my_sched >= kmp_nm_lower) && (my_sched < kmp_nm_upper))
785 my_sched =
786 (enum sched_type)(((int)my_sched) - (kmp_nm_lower - kmp_sch_lower));
787 ordered = (kmp_ord_lower & my_sched);
788 if (pr->flags.use_hier) {
789 if (ordered) {
790 KD_TRACE(100, ("__kmp_dispatch_init: T#%d ordered loop detected. "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
791 "Disabling hierarchical scheduling.\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
792 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d ordered loop detected. "
"Disabling hierarchical scheduling.\n", gtid); }
;
793 pr->flags.use_hier = FALSE0;
794 }
795 }
796 if (schedule == kmp_sch_runtime && __kmp_hier_scheds.size > 0) {
797 // Don't use hierarchical for ordered parallel loops and don't
798 // use the runtime hierarchy if one was specified in the program
799 if (!ordered && !pr->flags.use_hier)
800 __kmp_dispatch_init_hier_runtime<T>(loc, lb, ub, st);
801 }
802#endif // KMP_USE_HIER_SCHED
803
804#if USE_ITT_BUILD1
805 kmp_uint64 cur_chunk = chunk;
806 int itt_need_metadata_reporting = __itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0 &&
807 __kmp_forkjoin_frames_mode == 3 &&
808 KMP_MASTER_GTID(gtid)((((((gtid)) >= 0) ? 0 : __kmp_debug_assert("((gtid)) >= 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 808)), __kmp_threads[((gtid))]->th.th_info.ds.ds_tid) ==
0)
&&
809#if OMP_40_ENABLED(50 >= 40)
810 th->th.th_teams_microtask == NULL__null &&
811#endif
812 team->t.t_active_level == 1;
813#endif
814 if (!active) {
815 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
816 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
817 } else {
818 KMP_DEBUG_ASSERT(th->th.th_dispatch ==((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 819))
819 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 819))
;
820
821 my_buffer_index = th->th.th_dispatch->th_disp_index++;
822
823 /* What happens when number of threads changes, need to resize buffer? */
824 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
825 &th->th.th_dispatch
826 ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
827 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
828 &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
829 KD_TRACE(10, ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n", gtid,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
830 my_buffer_index))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d my_buffer_index:%d\n"
, gtid, my_buffer_index); }
;
831 }
832
833 __kmp_dispatch_init_algorithm(loc, gtid, pr, schedule, lb, ub, st,
834#if USE_ITT_BUILD1
835 &cur_chunk,
836#endif
837 chunk, (T)th->th.th_team_nproc,
838 (T)th->th.th_info.ds.ds_tid);
839 if (active) {
840 if (pr->flags.ordered == 0) {
841 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error;
842 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error;
843 } else {
844 th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo<UT>;
845 th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo<UT>;
846 }
847 }
848
849 // Any half-decent optimizer will remove this test when the blocks are empty
850 // since the macros expand to nothing
851 // when statistics are disabled.
852 if (schedule == __kmp_static) {
853 KMP_COUNT_BLOCK(OMP_FOR_static)((void)0);
854 KMP_COUNT_VALUE(FOR_static_iterations, pr->u.p.tc)((void)0);
855 } else {
856 KMP_COUNT_BLOCK(OMP_FOR_dynamic)((void)0);
857 KMP_COUNT_VALUE(FOR_dynamic_iterations, pr->u.p.tc)((void)0);
858 }
859
860 if (active) {
861 /* The name of this buffer should be my_buffer_index when it's free to use
862 * it */
863
864 KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
865 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
866 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
867 __kmp_wait_yield<kmp_uint32>(&sh->buffer_index, my_buffer_index,
868 __kmp_eq<kmp_uint32> USE_ITT_BUILD_ARG(NULL), __null);
869 // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and
870 // my_buffer_index are *always* 32-bit integers.
871 KMP_MB(); /* is this necessary? */
872 KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
873 "sh->buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
874 gtid, my_buffer_index, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d "
"sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index
); }
;
875
876 th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr;
877 th->th.th_dispatch->th_dispatch_sh_current =
878 CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh)const_cast<dispatch_shared_info_t *>((volatile dispatch_shared_info_t
*)sh)
;
879#if USE_ITT_BUILD1
880 if (pr->flags.ordered) {
881 __kmp_itt_ordered_init(gtid);
882 }
883 // Report loop metadata
884 if (itt_need_metadata_reporting) {
885 // Only report metadata by master of active team at level 1
886 kmp_uint64 schedtype = 0;
887 switch (schedule) {
888 case kmp_sch_static_chunked:
889 case kmp_sch_static_balanced: // Chunk is calculated in the switch above
890 break;
891 case kmp_sch_static_greedy:
892 cur_chunk = pr->u.p.parm1;
893 break;
894 case kmp_sch_dynamic_chunked:
895 schedtype = 1;
896 break;
897 case kmp_sch_guided_iterative_chunked:
898 case kmp_sch_guided_analytical_chunked:
899#if OMP_45_ENABLED(50 >= 45)
900 case kmp_sch_guided_simd:
901#endif
902 schedtype = 2;
903 break;
904 default:
905 // Should we put this case under "static"?
906 // case kmp_sch_static_steal:
907 schedtype = 3;
908 break;
909 }
910 __kmp_itt_metadata_loop(loc, schedtype, pr->u.p.tc, cur_chunk);
911 }
912#if KMP_USE_HIER_SCHED0
913 if (pr->flags.use_hier) {
914 pr->u.p.count = 0;
915 pr->u.p.ub = pr->u.p.lb = pr->u.p.st = pr->u.p.tc = 0;
916 }
917#endif // KMP_USER_HIER_SCHED
918#endif /* USE_ITT_BUILD */
919 }
920
921#ifdef KMP_DEBUG1
922 {
923 char *buff;
924 // create format specifiers before the debug output
925 buff = __kmp_str_format(
926 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s "
927 "lb:%%%s ub:%%%s"
928 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s"
929 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
930 traits_t<UT>::spec, traits_t<T>::spec, traits_t<T>::spec,
931 traits_t<ST>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
932 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<T>::spec,
933 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);
934 KD_TRACE(10, (buff, gtid, pr->schedule, pr->flags.ordered, pr->u.p.lb,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
935 pr->u.p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
936 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
937 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
->schedule, pr->flags.ordered, pr->u.p.lb, pr->u.
p.ub, pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->
u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1
, pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4); }
;
938 __kmp_str_free(&buff);
939 }
940#endif
941#if (KMP_STATIC_STEAL_ENABLED1)
942 // It cannot be guaranteed that after execution of a loop with some other
943 // schedule kind all the parm3 variables will contain the same value. Even if
944 // all parm3 will be the same, it still exists a bad case like using 0 and 1
945 // rather than program life-time increment. So the dedicated variable is
946 // required. The 'static_steal_counter' is used.
947 if (schedule == kmp_sch_static_steal) {
948 // Other threads will inspect this variable when searching for a victim.
949 // This is a flag showing that other threads may steal from this thread
950 // since then.
951 volatile T *p = &pr->u.p.static_steal_counter;
952 *p = *p + 1;
953 }
954#endif // ( KMP_STATIC_STEAL_ENABLED )
955
956#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
957 if (ompt_enabled.ompt_callback_work) {
958 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null);
959 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
960 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback(
961 ompt_work_loop, ompt_scope_begin, &(team_info->parallel_data),
962 &(task_info->task_data), pr->u.p.tc, OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid));
963 }
964#endif
965}
966
967/* For ordered loops, either __kmp_dispatch_finish() should be called after
968 * every iteration, or __kmp_dispatch_finish_chunk() should be called after
969 * every chunk of iterations. If the ordered section(s) were not executed
970 * for this iteration (or every iteration in this chunk), we need to set the
971 * ordered iteration counters so that the next thread can proceed. */
972template <typename UT>
973static void __kmp_dispatch_finish(int gtid, ident_t *loc) {
974 typedef typename traits_t<UT>::signed_t ST;
975 kmp_info_t *th = __kmp_threads[gtid];
976
977 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d called\n"
, gtid); }
;
978 if (!th->th.th_team->t.t_serialized) {
979
980 dispatch_private_info_template<UT> *pr =
981 reinterpret_cast<dispatch_private_info_template<UT> *>(
982 th->th.th_dispatch->th_dispatch_pr_current);
983 dispatch_shared_info_template<UT> volatile *sh =
984 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
985 th->th.th_dispatch->th_dispatch_sh_current);
986 KMP_DEBUG_ASSERT(pr)((pr) ? 0 : __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 986))
;
987 KMP_DEBUG_ASSERT(sh)((sh) ? 0 : __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 987))
;
988 KMP_DEBUG_ASSERT(th->th.th_dispatch ==((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 989))
989 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 989))
;
990
991 if (pr->ordered_bumped) {
992 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
993 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
994 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
995 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
996 pr->ordered_bumped = 0;
997 } else {
998 UT lower = pr->u.p.ordered_lower;
999
1000#ifdef KMP_DEBUG1
1001 {
1002 char *buff;
1003 // create format specifiers before the debug output
1004 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: "
1005 "ordered_iteration:%%%s lower:%%%s\n",
1006 traits_t<UT>::spec, traits_t<UT>::spec);
1007 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
1008 __kmp_str_free(&buff);
1009 }
1010#endif
1011
1012 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1013 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1014 KMP_MB(); /* is this necessary? */
1015#ifdef KMP_DEBUG1
1016 {
1017 char *buff;
1018 // create format specifiers before the debug output
1019 buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: "
1020 "ordered_iteration:%%%s lower:%%%s\n",
1021 traits_t<UT>::spec, traits_t<UT>::spec);
1022 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower); }
;
1023 __kmp_str_free(&buff);
1024 }
1025#endif
1026
1027 test_then_inc<ST>((volatile ST *)&sh->u.s.ordered_iteration);
1028 } // if
1029 } // if
1030 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d returned\n"
, gtid); }
;
1031}
1032
1033#ifdef KMP_GOMP_COMPAT
1034
1035template <typename UT>
1036static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) {
1037 typedef typename traits_t<UT>::signed_t ST;
1038 kmp_info_t *th = __kmp_threads[gtid];
1039
1040 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d called\n"
, gtid); }
;
1041 if (!th->th.th_team->t.t_serialized) {
1042 // int cid;
1043 dispatch_private_info_template<UT> *pr =
1044 reinterpret_cast<dispatch_private_info_template<UT> *>(
1045 th->th.th_dispatch->th_dispatch_pr_current);
1046 dispatch_shared_info_template<UT> volatile *sh =
1047 reinterpret_cast<dispatch_shared_info_template<UT> volatile *>(
1048 th->th.th_dispatch->th_dispatch_sh_current);
1049 KMP_DEBUG_ASSERT(pr)((pr) ? 0 : __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1049))
;
1050 KMP_DEBUG_ASSERT(sh)((sh) ? 0 : __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1050))
;
1051 KMP_DEBUG_ASSERT(th->th.th_dispatch ==((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1052))
1052 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1052))
;
1053
1054 // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) {
1055 UT lower = pr->u.p.ordered_lower;
1056 UT upper = pr->u.p.ordered_upper;
1057 UT inc = upper - lower + 1;
1058
1059 if (pr->ordered_bumped == inc) {
1060 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1061 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1062 ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
1063 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n"
, gtid); }
;
1064 pr->ordered_bumped = 0;
1065 } else {
1066 inc -= pr->ordered_bumped;
1067
1068#ifdef KMP_DEBUG1
1069 {
1070 char *buff;
1071 // create format specifiers before the debug output
1072 buff = __kmp_str_format(
1073 "__kmp_dispatch_finish_chunk: T#%%d before wait: "
1074 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1075 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec);
1076 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, lower, upper); }
;
1077 __kmp_str_free(&buff);
1078 }
1079#endif
1080
1081 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
1082 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL), __null);
1083
1084 KMP_MB(); /* is this necessary? */
1085 KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting "if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1086 "ordered_bumped to zero\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
1087 gtid))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d resetting "
"ordered_bumped to zero\n", gtid); }
;
1088 pr->ordered_bumped = 0;
1089//!!!!! TODO check if the inc should be unsigned, or signed???
1090#ifdef KMP_DEBUG1
1091 {
1092 char *buff;
1093 // create format specifiers before the debug output
1094 buff = __kmp_str_format(
1095 "__kmp_dispatch_finish_chunk: T#%%d after wait: "
1096 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1097 traits_t<UT>::spec, traits_t<UT>::spec, traits_t<UT>::spec,
1098 traits_t<UT>::spec);
1099 KD_TRACE(1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
1100 (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
sh->u.s.ordered_iteration, inc, lower, upper); }
;
1101 __kmp_str_free(&buff);
1102 }
1103#endif
1104
1105 test_then_add<ST>((volatile ST *)&sh->u.s.ordered_iteration, inc);
1106 }
1107 // }
1108 }
1109 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_finish_chunk: T#%d returned\n"
, gtid); }
;
1110}
1111
1112#endif /* KMP_GOMP_COMPAT */
1113
1114template <typename T>
1115int __kmp_dispatch_next_algorithm(int gtid,
1116 dispatch_private_info_template<T> *pr,
1117 dispatch_shared_info_template<T> volatile *sh,
1118 kmp_int32 *p_last, T *p_lb, T *p_ub,
1119 typename traits_t<T>::signed_t *p_st, T nproc,
1120 T tid) {
1121 typedef typename traits_t<T>::unsigned_t UT;
1122 typedef typename traits_t<T>::signed_t ST;
1123 typedef typename traits_t<T>::floating_t DBL;
1124 int status = 0;
1125 kmp_int32 last = 0;
1126 T start;
1127 ST incr;
1128 UT limit, trip, init;
1129 kmp_info_t *th = __kmp_threads[gtid];
1130 kmp_team_t *team = th->th.th_team;
1131
1132 KMP_DEBUG_ASSERT(th->th.th_dispatch ==((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1133))
1133 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1133))
;
1134 KMP_DEBUG_ASSERT(pr)((pr) ? 0 : __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1134))
;
1135 KMP_DEBUG_ASSERT(sh)((sh) ? 0 : __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1135))
;
1136 KMP_DEBUG_ASSERT(tid >= 0 && tid < nproc)((tid >= 0 && tid < nproc) ? 0 : __kmp_debug_assert
("tid >= 0 && tid < nproc", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1136))
;
1137#ifdef KMP_DEBUG1
1138 {
1139 char *buff;
1140 // create format specifiers before the debug output
1141 buff =
1142 __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d called pr:%%p "
1143 "sh:%%p nproc:%%%s tid:%%%s\n",
1144 traits_t<T>::spec, traits_t<T>::spec);
1145 KD_TRACE(10, (buff, gtid, pr, sh, nproc, tid))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, pr
, sh, nproc, tid); }
;
1146 __kmp_str_free(&buff);
1147 }
1148#endif
1149
1150 // zero trip count
1151 if (pr->u.p.tc == 0) {
1
Assuming the condition is false
2
Taking false branch
1152 KD_TRACE(10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1153 ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1154 "zero status:%d\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
1155 gtid, status))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d early exit trip count is "
"zero status:%d\n", gtid, status); }
;
1156 return 0;
1157 }
1158
1159 switch (pr->schedule) {
3
Control jumps to 'case kmp_sch_static_balanced:' at line 1388
1160#if (KMP_STATIC_STEAL_ENABLED1)
1161 case kmp_sch_static_steal: {
1162 T chunk = pr->u.p.parm1;
1163
1164 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1165 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
1166 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_steal case\n"
, gtid); }
;
1167
1168 trip = pr->u.p.tc - 1;
1169
1170 if (traits_t<T>::type_size > 4) {
1171 // use lock for 8-byte and CAS for 4-byte induction
1172 // variable. TODO (optional): check and use 16-byte CAS
1173 kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock;
1174 KMP_DEBUG_ASSERT(lck != NULL)((lck != __null) ? 0 : __kmp_debug_assert("lck != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1174))
;
1175 if (pr->u.p.count < (UT)pr->u.p.ub) {
1176 __kmp_acquire_lock(lck, gtid);
1177 // try to get own chunk of iterations
1178 init = (pr->u.p.count)++;
1179 status = (init < (UT)pr->u.p.ub);
1180 __kmp_release_lock(lck, gtid);
1181 } else {
1182 status = 0; // no own chunks
1183 }
1184 if (!status) { // try to steal
1185 kmp_info_t **other_threads = team->t.t_threads;
1186 int while_limit = nproc; // nproc attempts to find a victim
1187 int while_index = 0;
1188 // TODO: algorithm of searching for a victim
1189 // should be cleaned up and measured
1190 while ((!status) && (while_limit != ++while_index)) {
1191 T remaining;
1192 T victimIdx = pr->u.p.parm4;
1193 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1194 dispatch_private_info_template<T> *victim =
1195 reinterpret_cast<dispatch_private_info_template<T> *>(
1196 other_threads[victimIdx]
1197 ->th.th_dispatch->th_dispatch_pr_current);
1198 while ((victim == NULL__null || victim == pr ||
1199 (*(volatile T *)&victim->u.p.static_steal_counter !=
1200 *(volatile T *)&pr->u.p.static_steal_counter)) &&
1201 oldVictimIdx != victimIdx) {
1202 victimIdx = (victimIdx + 1) % nproc;
1203 victim = reinterpret_cast<dispatch_private_info_template<T> *>(
1204 other_threads[victimIdx]
1205 ->th.th_dispatch->th_dispatch_pr_current);
1206 }
1207 if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
1208 *(volatile T *)&pr->u.p.static_steal_counter)) {
1209 continue; // try once more (nproc attempts in total)
1210 // no victim is ready yet to participate in stealing
1211 // because all victims are still in kmp_init_dispatch
1212 }
1213 if (victim->u.p.count + 2 > (UT)victim->u.p.ub) {
1214 pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid
1215 continue; // not enough chunks to steal, goto next victim
1216 }
1217
1218 lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock;
1219 KMP_ASSERT(lck != NULL)((lck != __null) ? 0 : __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1219))
;
1220 __kmp_acquire_lock(lck, gtid);
1221 limit = victim->u.p.ub; // keep initial ub
1222 if (victim->u.p.count >= limit ||
1223 (remaining = limit - victim->u.p.count) < 2) {
1224 __kmp_release_lock(lck, gtid);
1225 pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim
1226 continue; // not enough chunks to steal
1227 }
1228 // stealing succeded, reduce victim's ub by 1/4 of undone chunks or
1229 // by 1
1230 if (remaining > 3) {
1231 // steal 1/4 of remaining
1232 KMP_COUNT_VALUE(FOR_static_steal_stolen, remaining >> 2)((void)0);
1233 init = (victim->u.p.ub -= (remaining >> 2));
1234 } else {
1235 // steal 1 chunk of 2 or 3 remaining
1236 KMP_COUNT_VALUE(FOR_static_steal_stolen, 1)((void)0);
1237 init = (victim->u.p.ub -= 1);
1238 }
1239 __kmp_release_lock(lck, gtid);
1240
1241 KMP_DEBUG_ASSERT(init + 1 <= limit)((init + 1 <= limit) ? 0 : __kmp_debug_assert("init + 1 <= limit"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1241))
;
1242 pr->u.p.parm4 = victimIdx; // remember victim to steal from
1243 status = 1;
1244 while_index = 0;
1245 // now update own count and ub with stolen range but init chunk
1246 __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid);
1247 pr->u.p.count = init + 1;
1248 pr->u.p.ub = limit;
1249 __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid);
1250 } // while (search for victim)
1251 } // if (try to find victim and steal)
1252 } else {
1253 // 4-byte induction variable, use 8-byte CAS for pair (count, ub)
1254 typedef union {
1255 struct {
1256 UT count;
1257 T ub;
1258 } p;
1259 kmp_int64 b;
1260 } union_i4;
1261 // All operations on 'count' or 'ub' must be combined atomically
1262 // together.
1263 {
1264 union_i4 vold, vnew;
1265 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1266 vnew = vold;
1267 vnew.p.count++;
1268 while (!KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1269 (volatile kmp_int64 *)&pr->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1270 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1271 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&pr->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1272 KMP_CPU_PAUSE()__kmp_x86_pause();
1273 vold.b = *(volatile kmp_int64 *)(&pr->u.p.count);
1274 vnew = vold;
1275 vnew.p.count++;
1276 }
1277 vnew = vold;
1278 init = vnew.p.count;
1279 status = (init < (UT)vnew.p.ub);
1280 }
1281
1282 if (!status) {
1283 kmp_info_t **other_threads = team->t.t_threads;
1284 int while_limit = nproc; // nproc attempts to find a victim
1285 int while_index = 0;
1286
1287 // TODO: algorithm of searching for a victim
1288 // should be cleaned up and measured
1289 while ((!status) && (while_limit != ++while_index)) {
1290 union_i4 vold, vnew;
1291 kmp_int32 remaining;
1292 T victimIdx = pr->u.p.parm4;
1293 T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1;
1294 dispatch_private_info_template<T> *victim =
1295 reinterpret_cast<dispatch_private_info_template<T> *>(
1296 other_threads[victimIdx]
1297 ->th.th_dispatch->th_dispatch_pr_current);
1298 while ((victim == NULL__null || victim == pr ||
1299 (*(volatile T *)&victim->u.p.static_steal_counter !=
1300 *(volatile T *)&pr->u.p.static_steal_counter)) &&
1301 oldVictimIdx != victimIdx) {
1302 victimIdx = (victimIdx + 1) % nproc;
1303 victim = reinterpret_cast<dispatch_private_info_template<T> *>(
1304 other_threads[victimIdx]
1305 ->th.th_dispatch->th_dispatch_pr_current);
1306 }
1307 if (!victim || (*(volatile T *)&victim->u.p.static_steal_counter !=
1308 *(volatile T *)&pr->u.p.static_steal_counter)) {
1309 continue; // try once more (nproc attempts in total)
1310 // no victim is ready yet to participate in stealing
1311 // because all victims are still in kmp_init_dispatch
1312 }
1313 pr->u.p.parm4 = victimIdx; // new victim found
1314 while (1) { // CAS loop if victim has enough chunks to steal
1315 vold.b = *(volatile kmp_int64 *)(&victim->u.p.count);
1316 vnew = vold;
1317
1318 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)(((vnew.p.ub - 1) * (UT)chunk <= trip) ? 0 : __kmp_debug_assert
("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1318))
;
1319 if (vnew.p.count >= (UT)vnew.p.ub ||
1320 (remaining = vnew.p.ub - vnew.p.count) < 2) {
1321 pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id
1322 break; // not enough chunks to steal, goto next victim
1323 }
1324 if (remaining > 3) {
1325 vnew.p.ub -= (remaining >> 2); // try to steal 1/4 of remaining
1326 } else {
1327 vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining
1328 }
1329 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip)(((vnew.p.ub - 1) * (UT)chunk <= trip) ? 0 : __kmp_debug_assert
("(vnew.p.ub - 1) * (UT)chunk <= trip", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1329))
;
1330 // TODO: Should this be acquire or release?
1331 if (KMP_COMPARE_AND_STORE_ACQ64(__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1332 (volatile kmp_int64 *)&victim->u.p.count,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1333 *VOLATILE_CAST(kmp_int64 *) & vold.b,__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
1334 *VOLATILE_CAST(kmp_int64 *) & vnew.b)__sync_bool_compare_and_swap((volatile kmp_uint64 *)((volatile
kmp_int64 *)&victim->u.p.count), (kmp_uint64)(*(kmp_int64
*) & vold.b), (kmp_uint64)(*(kmp_int64 *) & vnew.b))
) {
1335 // stealing succedded
1336 KMP_COUNT_VALUE(FOR_static_steal_stolen, vold.p.ub - vnew.p.ub)((void)0);
1337 status = 1;
1338 while_index = 0;
1339 // now update own count and ub
1340 init = vnew.p.ub;
1341 vold.p.count = init + 1;
1342#if KMP_ARCH_X860
1343 KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vold.b)__sync_lock_test_and_set((volatile kmp_uint64 *)((volatile kmp_int64
*)(&pr->u.p.count)), (kmp_uint64)(vold.b))
;
1344#else
1345 *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b;
1346#endif
1347 break;
1348 } // if (check CAS result)
1349 KMP_CPU_PAUSE()__kmp_x86_pause(); // CAS failed, repeate attempt
1350 } // while (try to steal from particular victim)
1351 } // while (search for victim)
1352 } // if (try to find victim and steal)
1353 } // if (4-byte induction variable)
1354 if (!status) {
1355 *p_lb = 0;
1356 *p_ub = 0;
1357 if (p_st != NULL__null)
1358 *p_st = 0;
1359 } else {
1360 start = pr->u.p.parm2;
1361 init *= chunk;
1362 limit = chunk + init - 1;
1363 incr = pr->u.p.st;
1364 KMP_COUNT_VALUE(FOR_static_steal_chunks, 1)((void)0);
1365
1366 KMP_DEBUG_ASSERT(init <= trip)((init <= trip) ? 0 : __kmp_debug_assert("init <= trip"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1366))
;
1367 if ((last = (limit >= trip)) != 0)
1368 limit = trip;
1369 if (p_st != NULL__null)
1370 *p_st = incr;
1371
1372 if (incr == 1) {
1373 *p_lb = start + init;
1374 *p_ub = start + limit;
1375 } else {
1376 *p_lb = start + init * incr;
1377 *p_ub = start + limit * incr;
1378 }
1379
1380 if (pr->flags.ordered) {
1381 pr->u.p.ordered_lower = init;
1382 pr->u.p.ordered_upper = limit;
1383 } // if
1384 } // if
1385 break;
1386 } // case
1387#endif // ( KMP_STATIC_STEAL_ENABLED )
1388 case kmp_sch_static_balanced: {
1389 KD_TRACE(if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1390 10,if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1391 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n",if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
1392 gtid))if (kmp_d_debug >= 10) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_static_balanced case\n"
, gtid); }
;
1393 /* check if thread has any iteration to do */
1394 if ((status = !pr->u.p.count) != 0) {
4
Assuming the condition is true
5
Taking true branch
1395 pr->u.p.count = 1;
1396 *p_lb = pr->u.p.lb;
1397 *p_ub = pr->u.p.ub;
1398 last = pr->u.p.parm1;
1399 if (p_st != NULL__null)
6
Assuming 'p_st' is equal to NULL
7
Taking false branch
1400 *p_st = pr->u.p.st;
1401 } else { /* no iterations to do */
1402 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1403 }
1404 } // case
1405 break;
8
Execution continues on line 1781
1406 case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was
1407 merged here */
1408 case kmp_sch_static_chunked: {
1409 T parm1;
1410
1411 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1412 "kmp_sch_static_[affinity|chunked] case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
1413 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_static_[affinity|chunked] case\n", gtid); }
;
1414 parm1 = pr->u.p.parm1;
1415
1416 trip = pr->u.p.tc - 1;
1417 init = parm1 * (pr->u.p.count + tid);
1418
1419 if ((status = (init <= trip)) != 0) {
1420 start = pr->u.p.lb;
1421 incr = pr->u.p.st;
1422 limit = parm1 + init - 1;
1423
1424 if ((last = (limit >= trip)) != 0)
1425 limit = trip;
1426
1427 if (p_st != NULL__null)
1428 *p_st = incr;
1429
1430 pr->u.p.count += nproc;
1431
1432 if (incr == 1) {
1433 *p_lb = start + init;
1434 *p_ub = start + limit;
1435 } else {
1436 *p_lb = start + init * incr;
1437 *p_ub = start + limit * incr;
1438 }
1439
1440 if (pr->flags.ordered) {
1441 pr->u.p.ordered_lower = init;
1442 pr->u.p.ordered_upper = limit;
1443 } // if
1444 } // if
1445 } // case
1446 break;
1447
1448 case kmp_sch_dynamic_chunked: {
1449 T chunk = pr->u.p.parm1;
1450
1451 KD_TRACE(if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1452 100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1453 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1454 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
1455
1456 init = chunk * test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1457 trip = pr->u.p.tc - 1;
1458
1459 if ((status = (init <= trip)) == 0) {
1460 *p_lb = 0;
1461 *p_ub = 0;
1462 if (p_st != NULL__null)
1463 *p_st = 0;
1464 } else {
1465 start = pr->u.p.lb;
1466 limit = chunk + init - 1;
1467 incr = pr->u.p.st;
1468
1469 if ((last = (limit >= trip)) != 0)
1470 limit = trip;
1471
1472 if (p_st != NULL__null)
1473 *p_st = incr;
1474
1475 if (incr == 1) {
1476 *p_lb = start + init;
1477 *p_ub = start + limit;
1478 } else {
1479 *p_lb = start + init * incr;
1480 *p_ub = start + limit * incr;
1481 }
1482
1483 if (pr->flags.ordered) {
1484 pr->u.p.ordered_lower = init;
1485 pr->u.p.ordered_upper = limit;
1486 } // if
1487 } // if
1488 } // case
1489 break;
1490
1491 case kmp_sch_guided_iterative_chunked: {
1492 T chunkspec = pr->u.p.parm1;
1493 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1494 "iterative case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
1495 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_chunked "
"iterative case\n", gtid); }
;
1496 trip = pr->u.p.tc;
1497 // Start atomic part of calculations
1498 while (1) {
1499 ST remaining; // signed, because can be < 0
1500 init = sh->u.s.iteration; // shared value
1501 remaining = trip - init;
1502 if (remaining <= 0) { // AC: need to compare with 0 first
1503 // nothing to do, don't try atomic op
1504 status = 0;
1505 break;
1506 }
1507 if ((T)remaining <
1508 pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default
1509 // use dynamic-style shcedule
1510 // atomically inrement iterations, get old value
1511 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1512 (ST)chunkspec);
1513 remaining = trip - init;
1514 if (remaining <= 0) {
1515 status = 0; // all iterations got by other threads
1516 } else {
1517 // got some iterations to work on
1518 status = 1;
1519 if ((T)remaining > chunkspec) {
1520 limit = init + chunkspec - 1;
1521 } else {
1522 last = 1; // the last chunk
1523 limit = init + remaining - 1;
1524 } // if
1525 } // if
1526 break;
1527 } // if
1528 limit = init +
1529 (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc
1530 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1531 (ST)init, (ST)limit)) {
1532 // CAS was successful, chunk obtained
1533 status = 1;
1534 --limit;
1535 break;
1536 } // if
1537 } // while
1538 if (status != 0) {
1539 start = pr->u.p.lb;
1540 incr = pr->u.p.st;
1541 if (p_st != NULL__null)
1542 *p_st = incr;
1543 *p_lb = start + init * incr;
1544 *p_ub = start + limit * incr;
1545 if (pr->flags.ordered) {
1546 pr->u.p.ordered_lower = init;
1547 pr->u.p.ordered_upper = limit;
1548 } // if
1549 } else {
1550 *p_lb = 0;
1551 *p_ub = 0;
1552 if (p_st != NULL__null)
1553 *p_st = 0;
1554 } // if
1555 } // case
1556 break;
1557
1558#if OMP_45_ENABLED(50 >= 45)
1559 case kmp_sch_guided_simd: {
1560 // same as iterative but curr-chunk adjusted to be multiple of given
1561 // chunk
1562 T chunk = pr->u.p.parm1;
1563 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1564 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
1565 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_guided_simd case\n"
, gtid); }
;
1566 trip = pr->u.p.tc;
1567 // Start atomic part of calculations
1568 while (1) {
1569 ST remaining; // signed, because can be < 0
1570 init = sh->u.s.iteration; // shared value
1571 remaining = trip - init;
1572 if (remaining <= 0) { // AC: need to compare with 0 first
1573 status = 0; // nothing to do, don't try atomic op
1574 break;
1575 }
1576 KMP_DEBUG_ASSERT(init % chunk == 0)((init % chunk == 0) ? 0 : __kmp_debug_assert("init % chunk == 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1576))
;
1577 // compare with K*nproc*(chunk+1), K=2 by default
1578 if ((T)remaining < pr->u.p.parm2) {
1579 // use dynamic-style shcedule
1580 // atomically inrement iterations, get old value
1581 init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1582 (ST)chunk);
1583 remaining = trip - init;
1584 if (remaining <= 0) {
1585 status = 0; // all iterations got by other threads
1586 } else {
1587 // got some iterations to work on
1588 status = 1;
1589 if ((T)remaining > chunk) {
1590 limit = init + chunk - 1;
1591 } else {
1592 last = 1; // the last chunk
1593 limit = init + remaining - 1;
1594 } // if
1595 } // if
1596 break;
1597 } // if
1598 // divide by K*nproc
1599 UT span = remaining * (*(double *)&pr->u.p.parm3);
1600 UT rem = span % chunk;
1601 if (rem) // adjust so that span%chunk == 0
1602 span += chunk - rem;
1603 limit = init + span;
1604 if (compare_and_swap<ST>(RCAST(volatile ST *, &sh->u.s.iteration)reinterpret_cast<volatile ST *>(&sh->u.s.iteration
)
,
1605 (ST)init, (ST)limit)) {
1606 // CAS was successful, chunk obtained
1607 status = 1;
1608 --limit;
1609 break;
1610 } // if
1611 } // while
1612 if (status != 0) {
1613 start = pr->u.p.lb;
1614 incr = pr->u.p.st;
1615 if (p_st != NULL__null)
1616 *p_st = incr;
1617 *p_lb = start + init * incr;
1618 *p_ub = start + limit * incr;
1619 if (pr->flags.ordered) {
1620 pr->u.p.ordered_lower = init;
1621 pr->u.p.ordered_upper = limit;
1622 } // if
1623 } else {
1624 *p_lb = 0;
1625 *p_ub = 0;
1626 if (p_st != NULL__null)
1627 *p_st = 0;
1628 } // if
1629 } // case
1630 break;
1631#endif // OMP_45_ENABLED
1632
1633 case kmp_sch_guided_analytical_chunked: {
1634 T chunkspec = pr->u.p.parm1;
1635 UT chunkIdx;
1636#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1637 /* for storing original FPCW value for Windows* OS on
1638 IA-32 architecture 8-byte version */
1639 unsigned int oldFpcw;
1640 unsigned int fpcwSet = 0;
1641#endif
1642 KD_TRACE(100, ("__kmp_dispatch_next_algorithm: T#%d "if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1643 "kmp_sch_guided_analytical_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
1644 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d "
"kmp_sch_guided_analytical_chunked case\n", gtid); }
;
1645
1646 trip = pr->u.p.tc;
1647
1648 KMP_DEBUG_ASSERT(nproc > 1)((nproc > 1) ? 0 : __kmp_debug_assert("nproc > 1", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1648))
;
1649 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)nproc < trip)(((2UL * chunkspec + 1) * (UT)nproc < trip) ? 0 : __kmp_debug_assert
("(2UL * chunkspec + 1) * (UT)nproc < trip", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1649))
;
1650
1651 while (1) { /* this while loop is a safeguard against unexpected zero
1652 chunk sizes */
1653 chunkIdx = test_then_inc_acq<ST>((volatile ST *)&sh->u.s.iteration);
1654 if (chunkIdx >= (UT)pr->u.p.parm2) {
1655 --trip;
1656 /* use dynamic-style scheduling */
1657 init = chunkIdx * chunkspec + pr->u.p.count;
1658 /* need to verify init > 0 in case of overflow in the above
1659 * calculation */
1660 if ((status = (init > 0 && init <= trip)) != 0) {
1661 limit = init + chunkspec - 1;
1662
1663 if ((last = (limit >= trip)) != 0)
1664 limit = trip;
1665 }
1666 break;
1667 } else {
1668/* use exponential-style scheduling */
1669/* The following check is to workaround the lack of long double precision on
1670 Windows* OS.
1671 This check works around the possible effect that init != 0 for chunkIdx == 0.
1672 */
1673#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1674 /* If we haven't already done so, save original
1675 FPCW and set precision to 64-bit, as Windows* OS
1676 on IA-32 architecture defaults to 53-bit */
1677 if (!fpcwSet) {
1678 oldFpcw = _control87(0, 0);
1679 _control87(_PC_64, _MCW_PC);
1680 fpcwSet = 0x30000;
1681 }
1682#endif
1683 if (chunkIdx) {
1684 init = __kmp_dispatch_guided_remaining<T>(
1685 trip, *(DBL *)&pr->u.p.parm3, chunkIdx);
1686 KMP_DEBUG_ASSERT(init)((init) ? 0 : __kmp_debug_assert("init", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1686))
;
1687 init = trip - init;
1688 } else
1689 init = 0;
1690 limit = trip - __kmp_dispatch_guided_remaining<T>(
1691 trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1);
1692 KMP_ASSERT(init <= limit)((init <= limit) ? 0 : __kmp_debug_assert("init <= limit"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1692))
;
1693 if (init < limit) {
1694 KMP_DEBUG_ASSERT(limit <= trip)((limit <= trip) ? 0 : __kmp_debug_assert("limit <= trip"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1694))
;
1695 --limit;
1696 status = 1;
1697 break;
1698 } // if
1699 } // if
1700 } // while (1)
1701#if KMP_OS_WINDOWS0 && KMP_ARCH_X860
1702 /* restore FPCW if necessary
1703 AC: check fpcwSet flag first because oldFpcw can be uninitialized here
1704 */
1705 if (fpcwSet && (oldFpcw & fpcwSet))
1706 _control87(oldFpcw, _MCW_PC);
1707#endif
1708 if (status != 0) {
1709 start = pr->u.p.lb;
1710 incr = pr->u.p.st;
1711 if (p_st != NULL__null)
1712 *p_st = incr;
1713 *p_lb = start + init * incr;
1714 *p_ub = start + limit * incr;
1715 if (pr->flags.ordered) {
1716 pr->u.p.ordered_lower = init;
1717 pr->u.p.ordered_upper = limit;
1718 }
1719 } else {
1720 *p_lb = 0;
1721 *p_ub = 0;
1722 if (p_st != NULL__null)
1723 *p_st = 0;
1724 }
1725 } // case
1726 break;
1727
1728 case kmp_sch_trapezoidal: {
1729 UT index;
1730 T parm2 = pr->u.p.parm2;
1731 T parm3 = pr->u.p.parm3;
1732 T parm4 = pr->u.p.parm4;
1733 KD_TRACE(100,if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1734 ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
1735 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_trapezoidal case\n"
, gtid); }
;
1736
1737 index = test_then_inc<ST>((volatile ST *)&sh->u.s.iteration);
1738
1739 init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2;
1740 trip = pr->u.p.tc - 1;
1741
1742 if ((status = ((T)index < parm3 && init <= trip)) == 0) {
1743 *p_lb = 0;
1744 *p_ub = 0;
1745 if (p_st != NULL__null)
1746 *p_st = 0;
1747 } else {
1748 start = pr->u.p.lb;
1749 limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1;
1750 incr = pr->u.p.st;
1751
1752 if ((last = (limit >= trip)) != 0)
1753 limit = trip;
1754
1755 if (p_st != NULL__null)
1756 *p_st = incr;
1757
1758 if (incr == 1) {
1759 *p_lb = start + init;
1760 *p_ub = start + limit;
1761 } else {
1762 *p_lb = start + init * incr;
1763 *p_ub = start + limit * incr;
1764 }
1765
1766 if (pr->flags.ordered) {
1767 pr->u.p.ordered_lower = init;
1768 pr->u.p.ordered_upper = limit;
1769 } // if
1770 } // if
1771 } // case
1772 break;
1773 default: {
1774 status = 0; // to avoid complaints on uninitialized variable use
1775 __kmp_fatal(KMP_MSG(UnknownSchedTypeDetected)__kmp_msg_format(kmp_i18n_msg_UnknownSchedTypeDetected), // Primary message
1776 KMP_HNT(GetNewerLibrary)__kmp_msg_format(kmp_i18n_hnt_GetNewerLibrary), // Hint
1777 __kmp_msg_null // Variadic argument list terminator
1778 );
1779 } break;
1780 } // switch
1781 if (p_last)
9
Assuming 'p_last' is non-null
10
Taking true branch
1782 *p_last = last;
1783#ifdef KMP_DEBUG1
1784 if (pr->flags.ordered) {
11
Taking false branch
1785 char *buff;
1786 // create format specifiers before the debug output
1787 buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d "
1788 "ordered_lower:%%%s ordered_upper:%%%s\n",
1789 traits_t<UT>::spec, traits_t<UT>::spec);
1790 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
1791 __kmp_str_free(&buff);
1792 }
1793 {
1794 char *buff;
1795 // create format specifiers before the debug output
1796 buff = __kmp_str_format(
1797 "__kmp_dispatch_next_algorithm: T#%%d exit status:%%d p_last:%%d "
1798 "p_lb:%%%s p_ub:%%%s p_st:%%%s\n",
1799 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1800 KD_TRACE(10, (buff, gtid, status, *p_last, *p_lb, *p_ub, *p_st))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, status
, *p_last, *p_lb, *p_ub, *p_st); }
;
12
Within the expansion of the macro 'KD_TRACE':
a
Assuming 'kmp_d_debug' is >= 10
b
Dereference of null pointer (loaded from variable 'p_st')
1801 __kmp_str_free(&buff);
1802 }
1803#endif
1804 return status;
1805}
1806
1807/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more
1808 work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini()
1809 is not called. */
1810#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1811#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
\
1812 if (status == 0) { \
1813 if (ompt_enabled.ompt_callback_work) { \
1814 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL__null); \
1815 ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
1816 ompt_callbacks.ompt_callback(ompt_callback_work)ompt_callback_work_callback( \
1817 ompt_work_loop, ompt_scope_end, &(team_info->parallel_data), \
1818 &(task_info->task_data), 0, codeptr); \
1819 } \
1820 }
1821// TODO: implement count
1822#else
1823#define OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
// no-op
1824#endif
1825
1826template <typename T>
1827static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
1828 T *p_lb, T *p_ub,
1829 typename traits_t<T>::signed_t *p_st
1830#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1831 ,
1832 void *codeptr
1833#endif
1834 ) {
1835
1836 typedef typename traits_t<T>::unsigned_t UT;
1837 typedef typename traits_t<T>::signed_t ST;
1838 typedef typename traits_t<T>::floating_t DBL;
1839 // This is potentially slightly misleading, schedule(runtime) will appear here
1840 // even if the actual runtme schedule is static. (Which points out a
1841 // disadavantage of schedule(runtime): even when static scheduling is used it
1842 // costs more than a compile time choice to use static scheduling would.)
1843 KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling)((void)0);
1844
1845 int status;
1846 dispatch_private_info_template<T> *pr;
1847 kmp_info_t *th = __kmp_threads[gtid];
1848 kmp_team_t *team = th->th.th_team;
1849
1850 KMP_DEBUG_ASSERT(p_lb && p_ub && p_st)((p_lb && p_ub && p_st) ? 0 : __kmp_debug_assert
("p_lb && p_ub && p_st", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1850))
; // AC: these cannot be NULL
1851 KD_TRACE(if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1852 1000,if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1853 ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n",if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
1854 gtid, p_lb, p_ub, p_st, p_last))if (kmp_d_debug >= 1000) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d called p_lb:%p p_ub:%p p_st:%p p_last: %p\n"
, gtid, p_lb, p_ub, p_st, p_last); }
;
1855
1856 if (team->t.t_serialized) {
1857 /* NOTE: serialize this dispatch becase we are not at the active level */
1858 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
1859 th->th.th_dispatch->th_disp_buffer); /* top of the stack */
1860 KMP_DEBUG_ASSERT(pr)((pr) ? 0 : __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1860))
;
1861
1862 if ((status = (pr->u.p.tc != 0)) == 0) {
1863 *p_lb = 0;
1864 *p_ub = 0;
1865 // if ( p_last != NULL )
1866 // *p_last = 0;
1867 if (p_st != NULL__null)
1868 *p_st = 0;
1869 if (__kmp_env_consistency_check) {
1870 if (pr->pushed_ws != ct_none) {
1871 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1872 }
1873 }
1874 } else if (pr->flags.nomerge) {
1875 kmp_int32 last;
1876 T start;
1877 UT limit, trip, init;
1878 ST incr;
1879 T chunk = pr->u.p.parm1;
1880
1881 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
1882 gtid))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n"
, gtid); }
;
1883
1884 init = chunk * pr->u.p.count++;
1885 trip = pr->u.p.tc - 1;
1886
1887 if ((status = (init <= trip)) == 0) {
1888 *p_lb = 0;
1889 *p_ub = 0;
1890 // if ( p_last != NULL )
1891 // *p_last = 0;
1892 if (p_st != NULL__null)
1893 *p_st = 0;
1894 if (__kmp_env_consistency_check) {
1895 if (pr->pushed_ws != ct_none) {
1896 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
1897 }
1898 }
1899 } else {
1900 start = pr->u.p.lb;
1901 limit = chunk + init - 1;
1902 incr = pr->u.p.st;
1903
1904 if ((last = (limit >= trip)) != 0) {
1905 limit = trip;
1906#if KMP_OS_WINDOWS0
1907 pr->u.p.last_upper = pr->u.p.ub;
1908#endif /* KMP_OS_WINDOWS */
1909 }
1910 if (p_last != NULL__null)
1911 *p_last = last;
1912 if (p_st != NULL__null)
1913 *p_st = incr;
1914 if (incr == 1) {
1915 *p_lb = start + init;
1916 *p_ub = start + limit;
1917 } else {
1918 *p_lb = start + init * incr;
1919 *p_ub = start + limit * incr;
1920 }
1921
1922 if (pr->flags.ordered) {
1923 pr->u.p.ordered_lower = init;
1924 pr->u.p.ordered_upper = limit;
1925#ifdef KMP_DEBUG1
1926 {
1927 char *buff;
1928 // create format specifiers before the debug output
1929 buff = __kmp_str_format("__kmp_dispatch_next: T#%%d "
1930 "ordered_lower:%%%s ordered_upper:%%%s\n",
1931 traits_t<UT>::spec, traits_t<UT>::spec);
1932 KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower,if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
1933 pr->u.p.ordered_upper))if (kmp_d_debug >= 1000) { __kmp_debug_printf (buff, gtid,
pr->u.p.ordered_lower, pr->u.p.ordered_upper); }
;
1934 __kmp_str_free(&buff);
1935 }
1936#endif
1937 } // if
1938 } // if
1939 } else {
1940 pr->u.p.tc = 0;
1941 *p_lb = pr->u.p.lb;
1942 *p_ub = pr->u.p.ub;
1943#if KMP_OS_WINDOWS0
1944 pr->u.p.last_upper = *p_ub;
1945#endif /* KMP_OS_WINDOWS */
1946 if (p_last != NULL__null)
1947 *p_last = TRUE(!0);
1948 if (p_st != NULL__null)
1949 *p_st = pr->u.p.st;
1950 } // if
1951#ifdef KMP_DEBUG1
1952 {
1953 char *buff;
1954 // create format specifiers before the debug output
1955 buff = __kmp_str_format(
1956 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s "
1957 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1958 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
1959 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, *p_st, p_last, *p_last, status); }
;
1960 __kmp_str_free(&buff);
1961 }
1962#endif
1963#if INCLUDE_SSC_MARKS(1 && 1)
1964 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
1965#endif
1966 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
1967 return status;
1968 } else {
1969 kmp_int32 last = 0;
1970 dispatch_shared_info_template<T> volatile *sh;
1971
1972 KMP_DEBUG_ASSERT(th->th.th_dispatch ==((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1973))
1973 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid])((th->th.th_dispatch == &th->th.th_team->t.t_dispatch
[th->th.th_info.ds.ds_tid]) ? 0 : __kmp_debug_assert("th->th.th_dispatch == &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1973))
;
1974
1975 pr = reinterpret_cast<dispatch_private_info_template<T> *>(
1976 th->th.th_dispatch->th_dispatch_pr_current);
1977 KMP_DEBUG_ASSERT(pr)((pr) ? 0 : __kmp_debug_assert("pr", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1977))
;
1978 sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
1979 th->th.th_dispatch->th_dispatch_sh_current);
1980 KMP_DEBUG_ASSERT(sh)((sh) ? 0 : __kmp_debug_assert("sh", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 1980))
;
1981
1982#if KMP_USE_HIER_SCHED0
1983 if (pr->flags.use_hier)
1984 status = sh->hier->next(loc, gtid, pr, &last, p_lb, p_ub, p_st);
1985 else
1986#endif // KMP_USE_HIER_SCHED
1987 status = __kmp_dispatch_next_algorithm<T>(gtid, pr, sh, &last, p_lb, p_ub,
1988 p_st, th->th.th_team_nproc,
1989 th->th.th_info.ds.ds_tid);
1990 // status == 0: no more iterations to execute
1991 if (status == 0) {
1992 UT num_done;
1993
1994 num_done = test_then_inc<ST>((volatile ST *)&sh->u.s.num_done);
1995#ifdef KMP_DEBUG1
1996 {
1997 char *buff;
1998 // create format specifiers before the debug output
1999 buff = __kmp_str_format(
2000 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2001 traits_t<UT>::spec);
2002 KD_TRACE(10, (buff, gtid, sh->u.s.num_done))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, sh
->u.s.num_done); }
;
2003 __kmp_str_free(&buff);
2004 }
2005#endif
2006
2007#if KMP_USE_HIER_SCHED0
2008 pr->flags.use_hier = FALSE0;
2009#endif
2010 if ((ST)num_done == th->th.th_team_nproc - 1) {
2011#if (KMP_STATIC_STEAL_ENABLED1)
2012 if (pr->schedule == kmp_sch_static_steal &&
2013 traits_t<T>::type_size > 4) {
2014 int i;
2015 kmp_info_t **other_threads = team->t.t_threads;
2016 // loop complete, safe to destroy locks used for stealing
2017 for (i = 0; i < th->th.th_team_nproc; ++i) {
2018 kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock;
2019 KMP_ASSERT(lck != NULL)((lck != __null) ? 0 : __kmp_debug_assert("lck != NULL", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2019))
;
2020 __kmp_destroy_lock(lck);
2021 __kmp_free(lck)___kmp_free((lck), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2021)
;
2022 other_threads[i]->th.th_dispatch->th_steal_lock = NULL__null;
2023 }
2024 }
2025#endif
2026 /* NOTE: release this buffer to be reused */
2027
2028 KMP_MB(); /* Flush all pending memory write invalidates. */
2029
2030 sh->u.s.num_done = 0;
2031 sh->u.s.iteration = 0;
2032
2033 /* TODO replace with general release procedure? */
2034 if (pr->flags.ordered) {
2035 sh->u.s.ordered_iteration = 0;
2036 }
2037
2038 KMP_MB(); /* Flush all pending memory write invalidates. */
2039
2040 sh->buffer_index += __kmp_dispatch_num_buffers;
2041 KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
2042 gtid, sh->buffer_index))if (kmp_d_debug >= 100) { __kmp_debug_printf ("__kmp_dispatch_next: T#%d change buffer_index:%d\n"
, gtid, sh->buffer_index); }
;
2043
2044 KMP_MB(); /* Flush all pending memory write invalidates. */
2045
2046 } // if
2047 if (__kmp_env_consistency_check) {
2048 if (pr->pushed_ws != ct_none) {
2049 pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc);
2050 }
2051 }
2052
2053 th->th.th_dispatch->th_deo_fcn = NULL__null;
2054 th->th.th_dispatch->th_dxo_fcn = NULL__null;
2055 th->th.th_dispatch->th_dispatch_sh_current = NULL__null;
2056 th->th.th_dispatch->th_dispatch_pr_current = NULL__null;
2057 } // if (status == 0)
2058#if KMP_OS_WINDOWS0
2059 else if (last) {
2060 pr->u.p.last_upper = pr->u.p.ub;
2061 }
2062#endif /* KMP_OS_WINDOWS */
2063 if (p_last != NULL__null && status != 0)
2064 *p_last = last;
2065 } // if
2066
2067#ifdef KMP_DEBUG1
2068 {
2069 char *buff;
2070 // create format specifiers before the debug output
2071 buff = __kmp_str_format(
2072 "__kmp_dispatch_next: T#%%d normal case: "
2073 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p (%%d) returning:%%d\n",
2074 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec);
2075 KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last,if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
2076 (p_last ? *p_last : 0), status))if (kmp_d_debug >= 10) { __kmp_debug_printf (buff, gtid, *
p_lb, *p_ub, p_st ? *p_st : 0, p_last, (p_last ? *p_last : 0)
, status); }
;
2077 __kmp_str_free(&buff);
2078 }
2079#endif
2080#if INCLUDE_SSC_MARKS(1 && 1)
2081 SSC_MARK_DISPATCH_NEXT()__asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0xd697) : "%ebx")
;
2082#endif
2083 OMPT_LOOP_ENDif (status == 0) { if (ompt_enabled.ompt_callback_work) { ompt_team_info_t
*team_info = __ompt_get_teaminfo(0, __null); ompt_task_info_t
*task_info = __ompt_get_task_info_object(0); ompt_callbacks.
ompt_callback_work_callback( ompt_work_loop, ompt_scope_end, &
(team_info->parallel_data), &(task_info->task_data)
, 0, codeptr); } }
;
2084 return status;
2085}
2086
2087template <typename T>
2088static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid,
2089 kmp_int32 *plastiter, T *plower, T *pupper,
2090 typename traits_t<T>::signed_t incr) {
2091 typedef typename traits_t<T>::unsigned_t UT;
2092 typedef typename traits_t<T>::signed_t ST;
2093 kmp_uint32 team_id;
2094 kmp_uint32 nteams;
2095 UT trip_count;
2096 kmp_team_t *team;
2097 kmp_info_t *th;
2098
2099 KMP_DEBUG_ASSERT(plastiter && plower && pupper)((plastiter && plower && pupper) ? 0 : __kmp_debug_assert
("plastiter && plower && pupper", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2099))
;
2100 KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid))if (kmp_e_debug >= 10) { __kmp_debug_printf ("__kmpc_dist_get_bounds called (%d)\n"
, gtid); }
;
2101#ifdef KMP_DEBUG1
2102 {
2103 char *buff;
2104 // create format specifiers before the debug output
2105 buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d "
2106 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2107 traits_t<T>::spec, traits_t<T>::spec,
2108 traits_t<ST>::spec, traits_t<T>::spec);
2109 KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr))if (kmp_d_debug >= 100) { __kmp_debug_printf (buff, gtid, *
plastiter, *plower, *pupper, incr); }
;
2110 __kmp_str_free(&buff);
2111 }
2112#endif
2113
2114 if (__kmp_env_consistency_check) {
2115 if (incr == 0) {
2116 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
2117 loc);
2118 }
2119 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
2120 // The loop is illegal.
2121 // Some zero-trip loops maintained by compiler, e.g.:
2122 // for(i=10;i<0;++i) // lower >= upper - run-time check
2123 // for(i=0;i>10;--i) // lower <= upper - run-time check
2124 // for(i=0;i>10;++i) // incr > 0 - compile-time check
2125 // for(i=10;i<0;--i) // incr < 0 - compile-time check
2126 // Compiler does not check the following illegal loops:
2127 // for(i=0;i<10;i+=incr) // where incr<0
2128 // for(i=10;i>0;i-=incr) // where incr<0
2129 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
2130 }
2131 }
2132 th = __kmp_threads[gtid];
2133 team = th->th.th_team;
2134#if OMP_40_ENABLED(50 >= 40)
2135 KMP_DEBUG_ASSERT(th->th.th_teams_microtask)((th->th.th_teams_microtask) ? 0 : __kmp_debug_assert("th->th.th_teams_microtask"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2135))
; // we are in the teams construct
2136 nteams = th->th.th_teams_size.nteams;
2137#endif
2138 team_id = team->t.t_master_tid;
2139 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc)((nteams == team->t.t_parent->t.t_nproc) ? 0 : __kmp_debug_assert
("nteams == team->t.t_parent->t.t_nproc", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2139))
;
2140
2141 // compute global trip count
2142 if (incr == 1) {
2143 trip_count = *pupper - *plower + 1;
2144 } else if (incr == -1) {
2145 trip_count = *plower - *pupper + 1;
2146 } else if (incr > 0) {
2147 // upper-lower can exceed the limit of signed type
2148 trip_count = (UT)(*pupper - *plower) / incr + 1;
2149 } else {
2150 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
2151 }
2152
2153 if (trip_count <= nteams) {
2154 KMP_DEBUG_ASSERT(((__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced
) ? 0 : __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2157))
2155 __kmp_static == kmp_sch_static_greedy ||((__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced
) ? 0 : __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2157))
2156 __kmp_static ==((__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced
) ? 0 : __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2157))
2157 kmp_sch_static_balanced)((__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced
) ? 0 : __kmp_debug_assert("__kmp_static == kmp_sch_static_greedy || __kmp_static == kmp_sch_static_balanced"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2157))
; // Unknown static scheduling type.
2158 // only some teams get single iteration, others get nothing
2159 if (team_id < trip_count) {
2160 *pupper = *plower = *plower + team_id * incr;
2161 } else {
2162 *plower = *pupper + incr; // zero-trip loop
2163 }
2164 if (plastiter != NULL__null)
2165 *plastiter = (team_id == trip_count - 1);
2166 } else {
2167 if (__kmp_static == kmp_sch_static_balanced) {
2168 UT chunk = trip_count / nteams;
2169 UT extras = trip_count % nteams;
2170 *plower +=
2171 incr * (team_id * chunk + (team_id < extras ? team_id : extras));
2172 *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr);
2173 if (plastiter != NULL__null)
2174 *plastiter = (team_id == nteams - 1);
2175 } else {
2176 T chunk_inc_count =
2177 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
2178 T upper = *pupper;
2179 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy)((__kmp_static == kmp_sch_static_greedy) ? 0 : __kmp_debug_assert
("__kmp_static == kmp_sch_static_greedy", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2179))
;
2180 // Unknown static scheduling type.
2181 *plower += team_id * chunk_inc_count;
2182 *pupper = *plower + chunk_inc_count - incr;
2183 // Check/correct bounds if needed
2184 if (incr > 0) {
2185 if (*pupper < *plower)
2186 *pupper = traits_t<T>::max_value;
2187 if (plastiter != NULL__null)
2188 *plastiter = *plower <= upper && *pupper > upper - incr;
2189 if (*pupper > upper)
2190 *pupper = upper; // tracker C73258
2191 } else {
2192 if (*pupper > *plower)
2193 *pupper = traits_t<T>::min_value;
2194 if (plastiter != NULL__null)
2195 *plastiter = *plower >= upper && *pupper < upper - incr;
2196 if (*pupper < upper)
2197 *pupper = upper; // tracker C73258
2198 }
2199 }
2200 }
2201}
2202
2203//-----------------------------------------------------------------------------
2204// Dispatch routines
2205// Transfer call to template< type T >
2206// __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule,
2207// T lb, T ub, ST st, ST chunk )
2208extern "C" {
2209
2210/*!
2211@ingroup WORK_SHARING
2212@{
2213@param loc Source location
2214@param gtid Global thread id
2215@param schedule Schedule type
2216@param lb Lower bound
2217@param ub Upper bound
2218@param st Step (or increment if you prefer)
2219@param chunk The chunk size to block with
2220
2221This function prepares the runtime to start a dynamically scheduled for loop,
2222saving the loop arguments.
2223These functions are all identical apart from the types of the arguments.
2224*/
2225
2226void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2227 enum sched_type schedule, kmp_int32 lb,
2228 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) {
2229 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2229))
;
2230#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2231 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2232#endif
2233 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2234}
2235/*!
2236See @ref __kmpc_dispatch_init_4
2237*/
2238void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2239 enum sched_type schedule, kmp_uint32 lb,
2240 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) {
2241 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2241))
;
2242#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2243 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2244#endif
2245 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2246}
2247
2248/*!
2249See @ref __kmpc_dispatch_init_4
2250*/
2251void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2252 enum sched_type schedule, kmp_int64 lb,
2253 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) {
2254 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2254))
;
2255#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2256 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2257#endif
2258 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2259}
2260
2261/*!
2262See @ref __kmpc_dispatch_init_4
2263*/
2264void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2265 enum sched_type schedule, kmp_uint64 lb,
2266 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) {
2267 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2267))
;
2268#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2269 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2270#endif
2271 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2272}
2273
2274/*!
2275See @ref __kmpc_dispatch_init_4
2276
2277Difference from __kmpc_dispatch_init set of functions is these functions
2278are called for composite distribute parallel for construct. Thus before
2279regular iterations dispatching we need to calc per-team iteration space.
2280
2281These functions are all identical apart from the types of the arguments.
2282*/
2283void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2284 enum sched_type schedule, kmp_int32 *p_last,
2285 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2286 kmp_int32 chunk) {
2287 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2287))
;
2288#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2289 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2290#endif
2291 __kmp_dist_get_bounds<kmp_int32>(loc, gtid, p_last, &lb, &ub, st);
2292 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2293}
2294
2295void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2296 enum sched_type schedule, kmp_int32 *p_last,
2297 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2298 kmp_int32 chunk) {
2299 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2299))
;
2300#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2301 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2302#endif
2303 __kmp_dist_get_bounds<kmp_uint32>(loc, gtid, p_last, &lb, &ub, st);
2304 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk, true);
2305}
2306
2307void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2308 enum sched_type schedule, kmp_int32 *p_last,
2309 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2310 kmp_int64 chunk) {
2311 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2311))
;
2312#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2313 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2314#endif
2315 __kmp_dist_get_bounds<kmp_int64>(loc, gtid, p_last, &lb, &ub, st);
2316 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2317}
2318
2319void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2320 enum sched_type schedule, kmp_int32 *p_last,
2321 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2322 kmp_int64 chunk) {
2323 KMP_DEBUG_ASSERT(__kmp_init_serial)((__kmp_init_serial) ? 0 : __kmp_debug_assert("__kmp_init_serial"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_dispatch.cpp"
, 2323))
;
2324#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2325 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2326#endif
2327 __kmp_dist_get_bounds<kmp_uint64>(loc, gtid, p_last, &lb, &ub, st);
2328 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk, true);
2329}
2330
2331/*!
2332@param loc Source code location
2333@param gtid Global thread id
2334@param p_last Pointer to a flag set to one if this is the last chunk or zero
2335otherwise
2336@param p_lb Pointer to the lower bound for the next chunk of work
2337@param p_ub Pointer to the upper bound for the next chunk of work
2338@param p_st Pointer to the stride for the next chunk of work
2339@return one if there is work to be done, zero otherwise
2340
2341Get the next dynamically allocated chunk of work for this thread.
2342If there is no more work, then the lb,ub and stride need not be modified.
2343*/
2344int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2345 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) {
2346#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2347 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2348#endif
2349 return __kmp_dispatch_next<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st
2350#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2351 ,
2352 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2353#endif
2354 );
2355}
2356
2357/*!
2358See @ref __kmpc_dispatch_next_4
2359*/
2360int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2361 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
2362 kmp_int32 *p_st) {
2363#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2364 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2365#endif
2366 return __kmp_dispatch_next<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st
2367#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2368 ,
2369 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2370#endif
2371 );
2372}
2373
2374/*!
2375See @ref __kmpc_dispatch_next_4
2376*/
2377int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2378 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) {
2379#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2380 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2381#endif
2382 return __kmp_dispatch_next<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st
2383#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2384 ,
2385 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2386#endif
2387 );
2388}
2389
2390/*!
2391See @ref __kmpc_dispatch_next_4
2392*/
2393int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
2394 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
2395 kmp_int64 *p_st) {
2396#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2397 OMPT_STORE_RETURN_ADDRESS(gtid)if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads
[gtid] && !__kmp_threads[gtid]->th.ompt_thread_info
.return_address) __kmp_threads[gtid]->th.ompt_thread_info.
return_address = __builtin_return_address(0)
;
2398#endif
2399 return __kmp_dispatch_next<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st
2400#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
2401 ,
2402 OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid)
2403#endif
2404 );
2405}
2406
2407/*!
2408@param loc Source code location
2409@param gtid Global thread id
2410
2411Mark the end of a dynamic loop.
2412*/
2413void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) {
2414 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2415}
2416
2417/*!
2418See @ref __kmpc_dispatch_fini_4
2419*/
2420void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) {
2421 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2422}
2423
2424/*!
2425See @ref __kmpc_dispatch_fini_4
2426*/
2427void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) {
2428 __kmp_dispatch_finish<kmp_uint32>(gtid, loc);
2429}
2430
2431/*!
2432See @ref __kmpc_dispatch_fini_4
2433*/
2434void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) {
2435 __kmp_dispatch_finish<kmp_uint64>(gtid, loc);
2436}
2437/*! @} */
2438
2439//-----------------------------------------------------------------------------
2440// Non-template routines from kmp_dispatch.cpp used in other sources
2441
2442kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) {
2443 return value == checker;
2444}
2445
2446kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) {
2447 return value != checker;
2448}
2449
2450kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) {
2451 return value < checker;
2452}
2453
2454kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) {
2455 return value >= checker;
2456}
2457
2458kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) {
2459 return value <= checker;
2460}
2461
2462kmp_uint32
2463__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker,
2464 kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
2465 void *obj // Higher-level synchronization object, or NULL.
2466 ) {
2467 // note: we may not belong to a team at this point
2468 volatile kmp_uint32 *spin = spinner;
2469 kmp_uint32 check = checker;
2470 kmp_uint32 spins;
2471 kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred;
2472 kmp_uint32 r;
2473
2474 KMP_FSYNC_SPIN_INIT(obj, CCAST(kmp_uint32 *, spin))int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = const_cast<kmp_uint32 *>(spin)
; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2475 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2476 // main wait spin loop
2477 while (!f(r = TCR_4(*spin)(*spin), check)) {
2478 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2479 /* GEH - remove this since it was accidentally introduced when kmp_wait was
2480 split. It causes problems with infinite recursion because of exit lock */
2481 /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort)
2482 __kmp_abort_thread(); */
2483
2484 /* if we have waited a bit, or are oversubscribed, yield */
2485 /* pause is in the following code */
2486 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc
)); }
;
2487 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
;
2488 }
2489 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2490 return r;
2491}
2492
2493void __kmp_wait_yield_4_ptr(
2494 void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32),
2495 void *obj // Higher-level synchronization object, or NULL.
2496 ) {
2497 // note: we may not belong to a team at this point
2498 void *spin = spinner;
2499 kmp_uint32 check = checker;
2500 kmp_uint32 spins;
2501 kmp_uint32 (*f)(void *, kmp_uint32) = pred;
2502
2503 KMP_FSYNC_SPIN_INIT(obj, spin)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(obj == __null) { obj = spin; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
2504 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
2505 // main wait spin loop
2506 while (!f(spin, check)) {
2507 KMP_FSYNC_SPIN_PREPARE(obj)do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)obj)); } } } while (0)
;
2508 /* if we have waited a bit, or are oversubscribed, yield */
2509 /* pause is in the following code */
2510 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc){ __kmp_x86_pause(); __kmp_yield(((__kmp_nth) > __kmp_avail_proc
)); }
;
2511 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
;
2512 }
2513 KMP_FSYNC_SPIN_ACQUIRED(obj)do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)obj)); } } while (0)
;
2514}
2515
2516} // extern "C"
2517
2518#ifdef KMP_GOMP_COMPAT
2519
2520void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
2521 enum sched_type schedule, kmp_int32 lb,
2522 kmp_int32 ub, kmp_int32 st, kmp_int32 chunk,
2523 int push_ws) {
2524 __kmp_dispatch_init<kmp_int32>(loc, gtid, schedule, lb, ub, st, chunk,
2525 push_ws);
2526}
2527
2528void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
2529 enum sched_type schedule, kmp_uint32 lb,
2530 kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk,
2531 int push_ws) {
2532 __kmp_dispatch_init<kmp_uint32>(loc, gtid, schedule, lb, ub, st, chunk,
2533 push_ws);
2534}
2535
2536void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
2537 enum sched_type schedule, kmp_int64 lb,
2538 kmp_int64 ub, kmp_int64 st, kmp_int64 chunk,
2539 int push_ws) {
2540 __kmp_dispatch_init<kmp_int64>(loc, gtid, schedule, lb, ub, st, chunk,
2541 push_ws);
2542}
2543
2544void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
2545 enum sched_type schedule, kmp_uint64 lb,
2546 kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk,
2547 int push_ws) {
2548 __kmp_dispatch_init<kmp_uint64>(loc, gtid, schedule, lb, ub, st, chunk,
2549 push_ws);
2550}
2551
2552void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) {
2553 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2554}
2555
2556void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) {
2557 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2558}
2559
2560void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) {
2561 __kmp_dispatch_finish_chunk<kmp_uint32>(gtid, loc);
2562}
2563
2564void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) {
2565 __kmp_dispatch_finish_chunk<kmp_uint64>(gtid, loc);
2566}
2567
2568#endif /* KMP_GOMP_COMPAT */
2569
2570/* ------------------------------------------------------------------------ */