Bug Summary

File:projects/openmp/runtime/src/kmp_barrier.cpp
Warning:line 1902, column 7
Called function pointer is null (null dereference)

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name kmp_barrier.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D omp_EXPORTS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/i18n -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/include/50 -I /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/thirdparty/ittnotify -U NDEBUG -D _GNU_SOURCE -D _REENTRANT -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -Wno-switch -Wno-missing-field-initializers -Wno-missing-braces -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/projects/openmp/runtime/src -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fno-rtti -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp

1/*
2 * kmp_barrier.cpp
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14#include "kmp.h"
15#include "kmp_wait_release.h"
16#include "kmp_itt.h"
17#include "kmp_os.h"
18#include "kmp_stats.h"
19#if OMPT_SUPPORT1
20#include "ompt-specific.h"
21#endif
22
23#if KMP_MIC0
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
28#include "tsan_annotations.h"
29
30#if KMP_MIC0 && USE_NGO_STORES
31// ICV copying
32#define ngo_load(src)((void)0) __m512d Vt = _mm512_load_pd((void *)(src))
33#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) _mm512_storenrngo_pd((void *)(dst), Vt)
34#define ngo_store_go(dst, src)memcpy((dst), (src), 64) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync()((void)0) __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
36#else
37#define ngo_load(src)((void)0) ((void)0)
38#define ngo_store_icvs(dst, src)copy_icvs((dst), (src)) copy_icvs((dst), (src))
39#define ngo_store_go(dst, src)memcpy((dst), (src), 64) KMP_MEMCPYmemcpy((dst), (src), CACHE_LINE64)
40#define ngo_sync()((void)0) ((void)0)
41#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
48static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather)((void)0);
52 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
55
56 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
57 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
58 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
59 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 60); }
;
61
62#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
63 // Barrier imbalance - save arrive time to the thread
64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
67 }
68#endif
69 // We now perform a linear reduction to signal that all of the threads have
70 // arrived.
71 if (!KMP_MASTER_TID(tid)((tid) == 0)) {
72 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
73 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
74 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar
->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived +
(1 << 2)); }
;
78 // Mark arrival to master thread
79 /* After performing this write, a worker thread may not assume that the team
80 is valid any more - it could be deallocated by the master thread at any
81 time. */
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
84 flag.release();
85 } else {
86 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
88 int i;
89 // Don't have to worry about sleep bit here or atomic since team setting
90 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
91
92 // Collect all the worker team member threads.
93 for (i = 1; i < nproc; ++i) {
94#if KMP_CACHE_MANAGE
95 // Prefetch next thread's arrived count
96 if (i + 1 < nproc)
97 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
98#endif /* KMP_CACHE_MANAGE */
99 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
100 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
101 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
102 team->t.t_id, i,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
103 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(i, team), team->t.t_id, i, &other_threads[i]->th.th_bar
[bt].bb.b_arrived, new_state); }
;
104
105 // Wait for worker thread to arrive
106 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
107 new_state);
108 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
109 ANNOTATE_BARRIER_END(other_threads[i]);
110#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
111 // Barrier imbalance - write min of the thread time and the other thread
112 // time to the thread.
113 if (__kmp_forkjoin_frames_mode == 2) {
114 this_thr->th.th_bar_min_time = KMP_MIN(((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
115 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (other_threads[i]->
th.th_bar_min_time) ? (this_thr->th.th_bar_min_time) : (other_threads
[i]->th.th_bar_min_time))
;
116 }
117#endif
118 if (reduce) {
119 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
120 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
121 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
122 team->t.t_id, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team
->t.t_id, i); }
;
123 ANNOTATE_REDUCE_AFTER(reduce);
124 (*reduce)(this_thr->th.th_local.reduce_data,
125 other_threads[i]->th.th_local.reduce_data);
126 ANNOTATE_REDUCE_BEFORE(reduce);
127 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
128 }
129 }
130 // Don't have to worry about sleep bit here or atomic since team setting
131 team_bar->b_arrived = new_state;
132 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
133 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
134 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
135 new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team_bar->b_arrived, new_state); }
;
136 }
137 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
138 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
139 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
140 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
141}
142
143static void __kmp_linear_barrier_release(
144 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
145 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
146 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release)((void)0);
147 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
148 kmp_team_t *team;
149
150 if (KMP_MASTER_TID(tid)((tid) == 0)) {
7
Taking false branch
151 unsigned int i;
152 kmp_uint32 nproc = this_thr->th.th_team_nproc;
153 kmp_info_t **other_threads;
154
155 team = __kmp_threads[gtid]->th.th_team;
156 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 156); }
;
157 other_threads = team->t.t_threads;
158
159 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
160 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
161 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
162
163 if (nproc > 1) {
164#if KMP_BARRIER_ICV_PUSH1
165 {
166 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
167 if (propagate_icvs) {
168 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs)((void)0);
169 for (i = 1; i < nproc; ++i) {
170 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
171 team, i, FALSE0);
172 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
173 &team->t.t_implicit_task_taskdata[0].td_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[i].td_icvs
), (&team->t.t_implicit_task_taskdata[0].td_icvs))
;
174 }
175 ngo_sync()((void)0);
176 }
177 }
178#endif // KMP_BARRIER_ICV_PUSH
179
180 // Now, release all of the worker threads
181 for (i = 1; i < nproc; ++i) {
182#if KMP_CACHE_MANAGE
183 // Prefetch next thread's go flag
184 if (i + 1 < nproc)
185 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
186#endif /* KMP_CACHE_MANAGE */
187 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
188 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
189 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
190 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
191 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
192 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
193 other_threads[i]->th.th_bar[bt].bb.b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
194 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, other_threads
[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, &other_threads
[i]->th.th_bar[bt].bb.b_go, other_threads[i]->th.th_bar
[bt].bb.b_go, other_threads[i]->th.th_bar[bt].bb.b_go + (1
<< 2)); }
;
195 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
196 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
197 other_threads[i]);
198 flag.release();
199 }
200 }
201 } else { // Wait for the MASTER thread to release us
202 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
203 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
204 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
205 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
8
Calling 'kmp_flag_64::wait'
11
Returning from 'kmp_flag_64::wait'
206 ANNOTATE_BARRIER_END(this_thr);
207#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
208 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
12
Taking false branch
209 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
210 // disabled)
211 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
212 // Cancel wait on previous parallel region...
213 __kmp_itt_task_starting(itt_sync_obj);
214
215 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
216 return;
217
218 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
219 if (itt_sync_obj != NULL__null)
220 // Call prepare as early as possible for "new" barrier
221 __kmp_itt_task_finished(itt_sync_obj);
222 } else
223#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
224 // Early exit for reaping threads releasing forkjoin barrier
225 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
13
Taking true branch
226 return;
227// The worker thread may now assume that the team is valid.
228#ifdef KMP_DEBUG1
229 tid = __kmp_tid_from_gtid(gtid);
230 team = __kmp_threads[gtid]->th.th_team;
231#endif
232 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 232); }
;
233 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
234 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
235 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
236 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
237 KMP_MB(); // Flush all pending memory write invalidates.
238 }
239 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
240 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
241 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
242 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
243}
244
245// Tree barrier
246static void
247__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
248 int tid, void (*reduce)(void *, void *)
249 USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
250 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather)((void)0);
251 kmp_team_t *team = this_thr->th.th_team;
252 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
253 kmp_info_t **other_threads = team->t.t_threads;
254 kmp_uint32 nproc = this_thr->th.th_team_nproc;
255 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
256 kmp_uint32 branch_factor = 1 << branch_bits;
257 kmp_uint32 child;
258 kmp_uint32 child_tid;
259 kmp_uint64 new_state;
260
261 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
262 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
263 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
264 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 264); }
;
265
266#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
267 // Barrier imbalance - save arrive time to the thread
268 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
270 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
271 }
272#endif
273 // Perform tree gather to wait until all threads have arrived; reduce any
274 // required data as we go
275 child_tid = (tid << branch_bits) + 1;
276 if (child_tid < nproc) {
277 // Parent threads wait for all their children to arrive
278 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
279 child = 1;
280 do {
281 kmp_info_t *child_thr = other_threads[child_tid];
282 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
283#if KMP_CACHE_MANAGE
284 // Prefetch next thread's arrived count
285 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
286 KMP_CACHE_PREFETCH(
287 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
288#endif /* KMP_CACHE_MANAGE */
289 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
290 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
291 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
292 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
293 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
294 // Wait for child to arrive
295 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
296 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
297 ANNOTATE_BARRIER_END(child_thr);
298#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
299 // Barrier imbalance - write min of the thread time and a child time to
300 // the thread.
301 if (__kmp_forkjoin_frames_mode == 2) {
302 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
303 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
304 }
305#endif
306 if (reduce) {
307 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
308 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
309 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
310 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
311 ANNOTATE_REDUCE_AFTER(reduce);
312 (*reduce)(this_thr->th.th_local.reduce_data,
313 child_thr->th.th_local.reduce_data);
314 ANNOTATE_REDUCE_BEFORE(reduce);
315 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
316 }
317 child++;
318 child_tid++;
319 } while (child <= branch_factor && child_tid < nproc);
320 }
321
322 if (!KMP_MASTER_TID(tid)((tid) == 0)) { // Worker threads
323 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
324
325 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
326 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
327 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
328 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
329 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
330 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
331
332 // Mark arrival to parent thread
333 /* After performing this write, a worker thread may not assume that the team
334 is valid any more - it could be deallocated by the master thread at any
335 time. */
336 ANNOTATE_BARRIER_BEGIN(this_thr);
337 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
338 flag.release();
339 } else {
340 // Need to update the team arrived pointer if we are the master thread
341 if (nproc > 1) // New value was already computed above
342 team->t.t_bar[bt].b_arrived = new_state;
343 else
344 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
345 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
346 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
347 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
348 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
349 }
350 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
351 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
352 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
353}
354
355static void __kmp_tree_barrier_release(
356 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
357 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
358 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release)((void)0);
359 kmp_team_t *team;
360 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
361 kmp_uint32 nproc;
362 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
363 kmp_uint32 branch_factor = 1 << branch_bits;
364 kmp_uint32 child;
365 kmp_uint32 child_tid;
366
367 // Perform a tree release for all of the threads that have been gathered
368 if (!KMP_MASTER_TID(((tid) == 0)
369 tid)((tid) == 0)) { // Handle fork barrier workers who aren't part of a team yet
370 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
371 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
372 // Wait for parent thread to release us
373 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
374 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
375 ANNOTATE_BARRIER_END(this_thr);
376#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
377 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
378 // In fork barrier where we could not get the object reliably (or
379 // ITTNOTIFY is disabled)
380 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
381 // Cancel wait on previous parallel region...
382 __kmp_itt_task_starting(itt_sync_obj);
383
384 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
385 return;
386
387 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
388 if (itt_sync_obj != NULL__null)
389 // Call prepare as early as possible for "new" barrier
390 __kmp_itt_task_finished(itt_sync_obj);
391 } else
392#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
393 // Early exit for reaping threads releasing forkjoin barrier
394 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
395 return;
396
397 // The worker thread may now assume that the team is valid.
398 team = __kmp_threads[gtid]->th.th_team;
399 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 399); }
;
400 tid = __kmp_tid_from_gtid(gtid);
401
402 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
403 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
404 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
405 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
406 KMP_MB(); // Flush all pending memory write invalidates.
407 } else {
408 team = __kmp_threads[gtid]->th.th_team;
409 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 409); }
;
410 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
411 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
412 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
413 }
414 nproc = this_thr->th.th_team_nproc;
415 child_tid = (tid << branch_bits) + 1;
416
417 if (child_tid < nproc) {
418 kmp_info_t **other_threads = team->t.t_threads;
419 child = 1;
420 // Parent threads release all their children
421 do {
422 kmp_info_t *child_thr = other_threads[child_tid];
423 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
424#if KMP_CACHE_MANAGE
425 // Prefetch next thread's go count
426 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
427 KMP_CACHE_PREFETCH(
428 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
429#endif /* KMP_CACHE_MANAGE */
430
431#if KMP_BARRIER_ICV_PUSH1
432 {
433 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
434 if (propagate_icvs) {
435 __kmp_init_implicit_task(team->t.t_ident,
436 team->t.t_threads[child_tid], team,
437 child_tid, FALSE0);
438 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
439 &team->t.t_implicit_task_taskdata[0].td_icvs);
440 }
441 }
442#endif // KMP_BARRIER_ICV_PUSH
443 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
444 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
445 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
446 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
447 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
448 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
449 // Release child from barrier
450 ANNOTATE_BARRIER_BEGIN(child_thr);
451 kmp_flag_64 flag(&child_bar->b_go, child_thr);
452 flag.release();
453 child++;
454 child_tid++;
455 } while (child <= branch_factor && child_tid < nproc);
456 }
457 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
458 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
459 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
460}
461
462// Hyper Barrier
463static void
464__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
465 int tid, void (*reduce)(void *, void *)
466 USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
467 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather)((void)0);
468 kmp_team_t *team = this_thr->th.th_team;
469 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
470 kmp_info_t **other_threads = team->t.t_threads;
471 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE(1 << 1);
472 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
473 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
474 kmp_uint32 branch_factor = 1 << branch_bits;
475 kmp_uint32 offset;
476 kmp_uint32 level;
477
478 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
479 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
480 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
481 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
482 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 482); }
;
483
484#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
485 // Barrier imbalance - save arrive time to the thread
486 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
487 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
488 __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
489 }
490#endif
491 /* Perform a hypercube-embedded tree gather to wait until all of the threads
492 have arrived, and reduce any required data as we go. */
493 kmp_flag_64 p_flag(&thr_bar->b_arrived);
494 for (level = 0, offset = 1; offset < num_threads;
495 level += branch_bits, offset <<= branch_bits) {
496 kmp_uint32 child;
497 kmp_uint32 child_tid;
498
499 if (((tid >> level) & (branch_factor - 1)) != 0) {
500 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
501
502 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
503 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
504 "arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
505 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
506 team->t.t_id, parent_tid, &thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
507 thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
508 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
"arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid
, __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid
, &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->
b_arrived + (1 << 2)); }
;
509 // Mark arrival to parent thread
510 /* After performing this write (in the last iteration of the enclosing for
511 loop), a worker thread may not assume that the team is valid any more
512 - it could be deallocated by the master thread at any time. */
513 ANNOTATE_BARRIER_BEGIN(this_thr);
514 p_flag.set_waiter(other_threads[parent_tid]);
515 p_flag.release();
516 break;
517 }
518
519 // Parent threads wait for children to arrive
520 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
521 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
522 for (child = 1, child_tid = tid + (1 << level);
523 child < branch_factor && child_tid < num_threads;
524 child++, child_tid += (1 << level)) {
525 kmp_info_t *child_thr = other_threads[child_tid];
526 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
527#if KMP_CACHE_MANAGE
528 kmp_uint32 next_child_tid = child_tid + (1 << level);
529 // Prefetch next thread's arrived count
530 if (child + 1 < branch_factor && next_child_tid < num_threads)
531 KMP_CACHE_PREFETCH(
532 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
533#endif /* KMP_CACHE_MANAGE */
534 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
535 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
536 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
537 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
538 team->t.t_id, child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
"arrived(%p) == %llu\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_arrived, new_state); }
;
539 // Wait for child to arrive
540 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
541 c_flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
542 ANNOTATE_BARRIER_END(child_thr);
543#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
544 // Barrier imbalance - write min of the thread time and a child time to
545 // the thread.
546 if (__kmp_forkjoin_frames_mode == 2) {
547 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
548 child_thr->th.th_bar_min_time)((this_thr->th.th_bar_min_time) < (child_thr->th.th_bar_min_time
) ? (this_thr->th.th_bar_min_time) : (child_thr->th.th_bar_min_time
))
;
549 }
550#endif
551 if (reduce) {
552 KA_TRACE(100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
553 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
554 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
555 team->t.t_id, child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n"
, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team
), team->t.t_id, child_tid); }
;
556 ANNOTATE_REDUCE_AFTER(reduce);
557 (*reduce)(this_thr->th.th_local.reduce_data,
558 child_thr->th.th_local.reduce_data);
559 ANNOTATE_REDUCE_BEFORE(reduce);
560 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
561 }
562 }
563 }
564
565 if (KMP_MASTER_TID(tid)((tid) == 0)) {
566 // Need to update the team arrived pointer if we are the master thread
567 if (new_state == KMP_BARRIER_UNUSED_STATE(1 << 1))
568 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP(1 << 2);
569 else
570 team->t.t_bar[bt].b_arrived = new_state;
571 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
572 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
573 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
574 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
575 }
576 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
577 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
578 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
579}
580
581// The reverse versions seem to beat the forward versions overall
582#define KMP_REVERSE_HYPER_BAR
583static void __kmp_hyper_barrier_release(
584 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
585 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
586 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release)((void)0);
587 kmp_team_t *team;
588 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
589 kmp_info_t **other_threads;
590 kmp_uint32 num_threads;
591 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
592 kmp_uint32 branch_factor = 1 << branch_bits;
593 kmp_uint32 child;
594 kmp_uint32 child_tid;
595 kmp_uint32 offset;
596 kmp_uint32 level;
597
598 /* Perform a hypercube-embedded tree release for all of the threads that have
599 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
600 are released in the reverse order of the corresponding gather, otherwise
601 threads are released in the same order. */
602 if (KMP_MASTER_TID(tid)((tid) == 0)) { // master
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 604); }
;
605 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
606 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
607 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
608#if KMP_BARRIER_ICV_PUSH1
609 if (propagate_icvs) { // master already has ICVs in final destination; copy
610 copy_icvs(&thr_bar->th_fixed_icvs,
611 &team->t.t_implicit_task_taskdata[tid].td_icvs);
612 }
613#endif
614 } else { // Handle fork barrier workers who aren't part of a team yet
615 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
616 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n"
, gtid, &thr_bar->b_go, (1 << 2)); }
;
617 // Wait for parent thread to release us
618 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
619 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
620 ANNOTATE_BARRIER_END(this_thr);
621#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
622 if ((__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 && itt_sync_obj == NULL__null) || KMP_ITT_DEBUG0) {
623 // In fork barrier where we could not get the object reliably
624 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
625 // Cancel wait on previous parallel region...
626 __kmp_itt_task_starting(itt_sync_obj);
627
628 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
629 return;
630
631 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
632 if (itt_sync_obj != NULL__null)
633 // Call prepare as early as possible for "new" barrier
634 __kmp_itt_task_finished(itt_sync_obj);
635 } else
636#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
637 // Early exit for reaping threads releasing forkjoin barrier
638 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
639 return;
640
641 // The worker thread may now assume that the team is valid.
642 team = __kmp_threads[gtid]->th.th_team;
643 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 643); }
;
644 tid = __kmp_tid_from_gtid(gtid);
645
646 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0);
647 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
648 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
649 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
650 KMP_MB(); // Flush all pending memory write invalidates.
651 }
652 num_threads = this_thr->th.th_team_nproc;
653 other_threads = team->t.t_threads;
654
655#ifdef KMP_REVERSE_HYPER_BAR
656 // Count up to correct level for parent
657 for (level = 0, offset = 1;
658 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
659 level += branch_bits, offset <<= branch_bits)
660 ;
661
662 // Now go down from there
663 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
664 level -= branch_bits, offset >>= branch_bits)
665#else
666 // Go down the tree, level by level
667 for (level = 0, offset = 1; offset < num_threads;
668 level += branch_bits, offset <<= branch_bits)
669#endif // KMP_REVERSE_HYPER_BAR
670 {
671#ifdef KMP_REVERSE_HYPER_BAR
672 /* Now go in reverse order through the children, highest to lowest.
673 Initial setting of child is conservative here. */
674 child = num_threads >> ((level == 0) ? level : level - 1);
675 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
676 child_tid = tid + (child << level);
677 child >= 1; child--, child_tid -= (1 << level))
678#else
679 if (((tid >> level) & (branch_factor - 1)) != 0)
680 // No need to go lower than this, since this is the level parent would be
681 // notified
682 break;
683 // Iterate through children on this level of the tree
684 for (child = 1, child_tid = tid + (1 << level);
685 child < branch_factor && child_tid < num_threads;
686 child++, child_tid += (1 << level))
687#endif // KMP_REVERSE_HYPER_BAR
688 {
689 if (child_tid >= num_threads)
690 continue; // Child doesn't exist so keep going
691 else {
692 kmp_info_t *child_thr = other_threads[child_tid];
693 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
694#if KMP_CACHE_MANAGE
695 kmp_uint32 next_child_tid = child_tid - (1 << level);
696// Prefetch next thread's go count
697#ifdef KMP_REVERSE_HYPER_BAR
698 if (child - 1 >= 1 && next_child_tid < num_threads)
699#else
700 if (child + 1 < branch_factor && next_child_tid < num_threads)
701#endif // KMP_REVERSE_HYPER_BAR
702 KMP_CACHE_PREFETCH(
703 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
704#endif /* KMP_CACHE_MANAGE */
705
706#if KMP_BARRIER_ICV_PUSH1
707 if (propagate_icvs) // push my fixed ICVs to my child
708 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
709#endif // KMP_BARRIER_ICV_PUSH
710
711 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
712 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
713 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
714 "go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
715 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
716 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
717 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
"go(%p): %u => %u\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid, &child_bar
->b_go, child_bar->b_go, child_bar->b_go + (1 <<
2)); }
;
718 // Release child from barrier
719 ANNOTATE_BARRIER_BEGIN(child_thr);
720 kmp_flag_64 flag(&child_bar->b_go, child_thr);
721 flag.release();
722 }
723 }
724 }
725#if KMP_BARRIER_ICV_PUSH1
726 if (propagate_icvs &&
727 !KMP_MASTER_TID(tid)((tid) == 0)) { // copy ICVs locally to final dest
728 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
729 FALSE0);
730 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
731 &thr_bar->th_fixed_icvs);
732 }
733#endif
734 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
735 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
736 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
737 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n"
, gtid, team->t.t_id, tid, bt); }
;
738}
739
740// Hierarchical Barrier
741
742// Initialize thread barrier data
743/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
744 Performs the minimum amount of initialization required based on how the team
745 has changed. Returns true if leaf children will require both on-core and
746 traditional wake-up mechanisms. For example, if the team size increases,
747 threads already in the team will respond to on-core wakeup on their parent
748 thread, but threads newly added to the team will only be listening on the
749 their local b_go. */
750static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
751 kmp_bstate_t *thr_bar,
752 kmp_uint32 nproc, int gtid,
753 int tid, kmp_team_t *team) {
754 // Checks to determine if (re-)initialization is needed
755 bool uninitialized = thr_bar->team == NULL__null;
756 bool team_changed = team != thr_bar->team;
757 bool team_sz_changed = nproc != thr_bar->nproc;
758 bool tid_changed = tid != thr_bar->old_tid;
759 bool retval = false;
760
761 if (uninitialized || team_sz_changed) {
762 __kmp_get_hierarchy(nproc, thr_bar);
763 }
764
765 if (uninitialized || team_sz_changed || tid_changed) {
766 thr_bar->my_level = thr_bar->depth - 1; // default for master
767 thr_bar->parent_tid = -1; // default for master
768 if (!KMP_MASTER_TID(((tid) == 0)
769 tid)((tid) == 0)) { // if not master, find parent thread in hierarchy
770 kmp_uint32 d = 0;
771 while (d < thr_bar->depth) { // find parent based on level of thread in
772 // hierarchy, and note level
773 kmp_uint32 rem;
774 if (d == thr_bar->depth - 2) { // reached level right below the master
775 thr_bar->parent_tid = 0;
776 thr_bar->my_level = d;
777 break;
778 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
779 0) { // TODO: can we make this op faster?
780 // thread is not a subtree root at next level, so this is max
781 thr_bar->parent_tid = tid - rem;
782 thr_bar->my_level = d;
783 break;
784 }
785 ++d;
786 }
787 }
788 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
789 thr_bar->old_tid = tid;
790 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
791 thr_bar->team = team;
792 thr_bar->parent_bar =
793 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
794 }
795 if (uninitialized || team_changed || tid_changed) {
796 thr_bar->team = team;
797 thr_bar->parent_bar =
798 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
799 retval = true;
800 }
801 if (uninitialized || team_sz_changed || tid_changed) {
802 thr_bar->nproc = nproc;
803 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
804 if (thr_bar->my_level == 0)
805 thr_bar->leaf_kids = 0;
806 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
807 thr_bar->leaf_kids = nproc - tid - 1;
808 thr_bar->leaf_state = 0;
809 for (int i = 0; i < thr_bar->leaf_kids; ++i)
810 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
811 }
812 return retval;
813}
814
815static void __kmp_hierarchical_barrier_gather(
816 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
817 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
818 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather)((void)0);
819 kmp_team_t *team = this_thr->th.th_team;
820 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
821 kmp_uint32 nproc = this_thr->th.th_team_nproc;
822 kmp_info_t **other_threads = team->t.t_threads;
823 kmp_uint64 new_state;
824
825 int level = team->t.t_level;
826#if OMP_40_ENABLED(50 >= 40)
827 if (other_threads[0]
828 ->th.th_teams_microtask) // are we inside the teams construct?
829 if (this_thr->th.th_teams_size.nteams > 1)
830 ++level; // level was not increased in teams construct for team_of_masters
831#endif
832 if (level == 1)
833 thr_bar->use_oncore_barrier = 1;
834 else
835 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
836
837 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
838 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
839 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
840 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid])if (!(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid
])) { __kmp_debug_assert("this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 840); }
;
841
842#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
843 // Barrier imbalance - save arrive time to the thread
844 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
845 this_thr->th.th_bar_arrive_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
846 }
847#endif
848
849 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
850 team);
851
852 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
853 kmp_int32 child_tid;
854 new_state =
855 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
856 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
857 thr_bar->use_oncore_barrier) {
858 if (thr_bar->leaf_kids) {
859 // First, wait for leaf children to check-in on my b_arrived flag
860 kmp_uint64 leaf_state =
861 KMP_MASTER_TID(tid)((tid) == 0)
862 ? thr_bar->b_arrived | thr_bar->leaf_state
863 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
864 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
865 "for leaf kids\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
866 gtid, team->t.t_id, tid))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
"for leaf kids\n", gtid, team->t.t_id, tid); }
;
867 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
868 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
869 if (reduce) {
870 ANNOTATE_REDUCE_AFTER(reduce);
871 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
872 ++child_tid) {
873 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
874 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
875 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
876 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
877 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
878 ANNOTATE_BARRIER_END(other_threads[child_tid]);
879 (*reduce)(this_thr->th.th_local.reduce_data,
880 other_threads[child_tid]->th.th_local.reduce_data);
881 }
882 ANNOTATE_REDUCE_BEFORE(reduce);
883 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
884 }
885 // clear leaf_state bits
886 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state))__sync_fetch_and_and((volatile kmp_uint64 *)(&thr_bar->
b_arrived), (kmp_uint64)(~(thr_bar->leaf_state)))
;
887 }
888 // Next, wait for higher level children on each child's b_arrived flag
889 for (kmp_uint32 d = 1; d < thr_bar->my_level;
890 ++d) { // gather lowest level threads first, but skip 0
891 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
892 skip = thr_bar->skip_per_level[d];
893 if (last > nproc)
894 last = nproc;
895 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
896 kmp_info_t *child_thr = other_threads[child_tid];
897 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
898 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
899 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
900 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
901 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
902 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
903 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
904 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
905 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
906 ANNOTATE_BARRIER_END(child_thr);
907 if (reduce) {
908 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
909 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
910 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
911 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
912 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
913 ANNOTATE_REDUCE_AFTER(reduce);
914 (*reduce)(this_thr->th.th_local.reduce_data,
915 child_thr->th.th_local.reduce_data);
916 ANNOTATE_REDUCE_BEFORE(reduce);
917 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
918 }
919 }
920 }
921 } else { // Blocktime is not infinite
922 for (kmp_uint32 d = 0; d < thr_bar->my_level;
923 ++d) { // Gather lowest level threads first
924 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
925 skip = thr_bar->skip_per_level[d];
926 if (last > nproc)
927 last = nproc;
928 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
929 kmp_info_t *child_thr = other_threads[child_tid];
930 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
931 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
932 "T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
933 "arrived(%p) == %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
934 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
935 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
936 child_tid, &child_bar->b_arrived, new_state))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
"T#%d(%d:%d) " "arrived(%p) == %llu\n", gtid, team->t.t_id
, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
child_tid, &child_bar->b_arrived, new_state); }
;
937 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
938 flag.wait(this_thr, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
939 ANNOTATE_BARRIER_END(child_thr);
940 if (reduce) {
941 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
942 "T#%d(%d:%d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
943 gtid, team->t.t_id, tid,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
944 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
945 child_tid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
"T#%d(%d:%d)\n", gtid, team->t.t_id, tid, __kmp_gtid_from_tid
(child_tid, team), team->t.t_id, child_tid); }
;
946 ANNOTATE_REDUCE_AFTER(reduce);
947 (*reduce)(this_thr->th.th_local.reduce_data,
948 child_thr->th.th_local.reduce_data);
949 ANNOTATE_REDUCE_BEFORE(reduce);
950 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
951 }
952 }
953 }
954 }
955 }
956 // All subordinates are gathered; now release parent if not master thread
957
958 if (!KMP_MASTER_TID(tid)((tid) == 0)) { // worker threads release parent in hierarchy
959 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
960 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
961 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
962 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
963 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
964 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
" T#%d(%d:%d) arrived(%p): %llu => %llu\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(thr_bar->parent_tid, team
), team->t.t_id, thr_bar->parent_tid, &thr_bar->
b_arrived, thr_bar->b_arrived, thr_bar->b_arrived + (1 <<
2)); }
;
965 /* Mark arrival to parent: After performing this write, a worker thread may
966 not assume that the team is valid any more - it could be deallocated by
967 the master thread at any time. */
968 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) ||
969 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
970 // flag; release it
971 ANNOTATE_BARRIER_BEGIN(this_thr);
972 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
973 flag.release();
974 } else {
975 // Leaf does special release on "offset" bits of parent's b_arrived flag
976 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP(1 << 2);
977 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
978 flag.set_waiter(other_threads[thr_bar->parent_tid]);
979 flag.release();
980 }
981 } else { // Master thread needs to update the team's b_arrived value
982 team->t.t_bar[bt].b_arrived = new_state;
983 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
984 "arrived(%p) = %llu\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
985 gtid, team->t.t_id, tid, team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
986 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
"arrived(%p) = %llu\n", gtid, team->t.t_id, tid, team->
t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar
[bt].b_arrived); }
;
987 }
988 // Is the team access below unsafe or just technically invalid?
989 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
990 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
991 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
992}
993
994static void __kmp_hierarchical_barrier_release(
995 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
996 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
997 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release)((void)0);
998 kmp_team_t *team;
999 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1000 kmp_uint32 nproc;
1001 bool team_change = false; // indicates on-core barrier shouldn't be used
1002
1003 if (KMP_MASTER_TID(tid)((tid) == 0)) {
1004 team = __kmp_threads[gtid]->th.th_team;
1005 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1005); }
;
1006 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1007 "entered barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
1008 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
"entered barrier type %d\n", gtid, team->t.t_id, tid, bt)
; }
;
1009 } else { // Worker threads
1010 // Wait for parent thread to release me
1011 if (!thr_bar->use_oncore_barrier ||
1012 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) || thr_bar->my_level != 0 ||
1013 thr_bar->team == NULL__null) {
1014 // Use traditional method of waiting on my own b_go flag
1015 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG1;
1016 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2));
1017 flag.wait(this_thr, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1018 ANNOTATE_BARRIER_END(this_thr);
1019 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1020 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1021 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1022 // infinite, not nested
1023 // Wait on my "offset" bits on parent's b_go flag
1024 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG2;
1025 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP(1 << 2),
1026 thr_bar->offset, bt,
1027 this_thr USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1028 flag.wait(this_thr, TRUE(!0));
1029 if (thr_bar->wait_flag ==
1030 KMP_BARRIER_SWITCHING4) { // Thread was switched to own b_go
1031 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1032 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1033 } else { // Reset my bits on parent's b_go flag
1034 (RCAST(volatile char *,reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
1035 &(thr_bar->parent_bar->b_go))reinterpret_cast<volatile char *>(&(thr_bar->parent_bar
->b_go))
)[thr_bar->offset] = 0;
1036 }
1037 }
1038 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING0;
1039 // Early exit for reaping threads releasing forkjoin barrier
1040 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done))
1041 return;
1042 // The worker thread may now assume that the team is valid.
1043 team = __kmp_threads[gtid]->th.th_team;
1044 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1044); }
;
1045 tid = __kmp_tid_from_gtid(gtid);
1046
1047 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1048 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1049 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
1050 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n"
, gtid, team->t.t_id, tid, &thr_bar->b_go, 0); }
;
1051 KMP_MB(); // Flush all pending memory write invalidates.
1052 }
1053
1054 nproc = this_thr->th.th_team_nproc;
1055 int level = team->t.t_level;
1056#if OMP_40_ENABLED(50 >= 40)
1057 if (team->t.t_threads[0]
1058 ->th.th_teams_microtask) { // are we inside the teams construct?
1059 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1060 this_thr->th.th_teams_level == level)
1061 ++level; // level was not increased in teams construct for team_of_workers
1062 if (this_thr->th.th_teams_size.nteams > 1)
1063 ++level; // level was not increased in teams construct for team_of_masters
1064 }
1065#endif
1066 if (level == 1)
1067 thr_bar->use_oncore_barrier = 1;
1068 else
1069 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
1070
1071 // If the team size has increased, we still communicate with old leaves via
1072 // oncore barrier.
1073 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1074 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1075 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1076 tid, team);
1077 // But if the entire team changes, we won't use oncore barrier at all
1078 if (team_change)
1079 old_leaf_kids = 0;
1080
1081#if KMP_BARRIER_ICV_PUSH1
1082 if (propagate_icvs) {
1083 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1084 FALSE0);
1085 if (KMP_MASTER_TID(((tid) == 0)
1086 tid)((tid) == 0)) { // master already has copy in final destination; copy
1087 copy_icvs(&thr_bar->th_fixed_icvs,
1088 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1089 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1090 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1091 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1092 // leaves (on-core children) pull parent's fixed ICVs directly to local
1093 // ICV store
1094 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1095 &thr_bar->parent_bar->th_fixed_icvs);
1096 // non-leaves will get ICVs piggybacked with b_go via NGO store
1097 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1098 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1099 // access
1100 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1101 else // leaves copy parent's fixed ICVs directly to local ICV store
1102 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1103 &thr_bar->parent_bar->th_fixed_icvs);
1104 }
1105 }
1106#endif // KMP_BARRIER_ICV_PUSH
1107
1108 // Now, release my children
1109 if (thr_bar->my_level) { // not a leaf
1110 kmp_int32 child_tid;
1111 kmp_uint32 last;
1112 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647) &&
1113 thr_bar->use_oncore_barrier) {
1114 if (KMP_MASTER_TID(tid)((tid) == 0)) { // do a flat release
1115 // Set local b_go to bump children via NGO store of the cache line
1116 // containing IVCs and b_go.
1117 thr_bar->b_go = KMP_BARRIER_STATE_BUMP(1 << 2);
1118 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1119 // the cache line
1120 ngo_load(&thr_bar->th_fixed_icvs)((void)0);
1121 // This loops over all the threads skipping only the leaf nodes in the
1122 // hierarchy
1123 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1124 child_tid += thr_bar->skip_per_level[1]) {
1125 kmp_bstate_t *child_bar =
1126 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1127 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1128 "releasing T#%d(%d:%d)"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1129 " go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1130 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1131 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1132 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1133 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d)" " go(%p): %u => %u\n", gtid, team
->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->
t.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1134 // Use ngo store (if available) to both store ICVs and release child
1135 // via child's b_go
1136 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs)memcpy((&child_bar->th_fixed_icvs), (&thr_bar->
th_fixed_icvs), 64)
;
1137 }
1138 ngo_sync()((void)0);
1139 }
1140 TCW_8(thr_bar->b_go,(thr_bar->b_go) = (0)
1141 KMP_INIT_BARRIER_STATE)(thr_bar->b_go) = (0); // Reset my b_go flag for next time
1142 // Now, release leaf children
1143 if (thr_bar->leaf_kids) { // if there are any
1144 // We test team_change on the off-chance that the level 1 team changed.
1145 if (team_change ||
1146 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1147 if (old_leaf_kids) { // release old leaf kids
1148 thr_bar->b_go |= old_leaf_state;
1149 }
1150 // Release new leaf kids
1151 last = tid + thr_bar->skip_per_level[1];
1152 if (last > nproc)
1153 last = nproc;
1154 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1155 ++child_tid) { // skip_per_level[0]=1
1156 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1157 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1158 KA_TRACE(if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1159 20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1160 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1161 " T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1162 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1163 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
1164 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
" T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->t.t_id,
tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid
, &child_bar->b_go, child_bar->b_go, child_bar->
b_go + (1 << 2)); }
;
1165 // Release child using child's b_go flag
1166 ANNOTATE_BARRIER_BEGIN(child_thr);
1167 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1168 flag.release();
1169 }
1170 } else { // Release all children at once with leaf_state bits on my own
1171 // b_go flag
1172 thr_bar->b_go |= thr_bar->leaf_state;
1173 }
1174 }
1175 } else { // Blocktime is not infinite; do a simple hierarchical release
1176 for (int d = thr_bar->my_level - 1; d >= 0;
1177 --d) { // Release highest level threads first
1178 last = tid + thr_bar->skip_per_level[d + 1];
1179 kmp_uint32 skip = thr_bar->skip_per_level[d];
1180 if (last > nproc)
1181 last = nproc;
1182 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1183 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1184 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1185 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1186 "releasing T#%d(%d:%d) go(%p): %u => %u\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1187 gtid, team->t.t_id, tid,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1188 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1189 child_tid, &child_bar->b_go, child_bar->b_go,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
1190 child_bar->b_go + KMP_BARRIER_STATE_BUMP))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
"releasing T#%d(%d:%d) go(%p): %u => %u\n", gtid, team->
t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t
.t_id, child_tid, &child_bar->b_go, child_bar->b_go
, child_bar->b_go + (1 << 2)); }
;
1191 // Release child using child's b_go flag
1192 ANNOTATE_BARRIER_BEGIN(child_thr);
1193 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1194 flag.release();
1195 }
1196 }
1197 }
1198#if KMP_BARRIER_ICV_PUSH1
1199 if (propagate_icvs && !KMP_MASTER_TID(tid)((tid) == 0))
1200 // non-leaves copy ICVs from fixed ICVs to local dest
1201 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1202 &thr_bar->th_fixed_icvs);
1203#endif // KMP_BARRIER_ICV_PUSH
1204 }
1205 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1206 "barrier type %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
1207 gtid, team->t.t_id, tid, bt))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
"barrier type %d\n", gtid, team->t.t_id, tid, bt); }
;
1208}
1209
1210// End of Barrier Algorithms
1211
1212// Internal function to do a barrier.
1213/* If is_split is true, do a split barrier, otherwise, do a plain barrier
1214 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1215 barrier
1216 Returns 0 if master thread, 1 if worker thread. */
1217int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
1218 size_t reduce_size, void *reduce_data,
1219 void (*reduce)(void *, void *)) {
1220 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier)((void)0);
1221 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
1222 int tid = __kmp_tid_from_gtid(gtid);
1223 kmp_info_t *this_thr = __kmp_threads[gtid];
1224 kmp_team_t *team = this_thr->th.th_team;
1225 int status = 0;
1226#if OMPT_SUPPORT1 && OMPT_OPTIONAL1
1227 ompt_data_t *my_task_data;
1228 ompt_data_t *my_parallel_data;
1229 void *return_address;
1230#endif
1231
1232 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1233 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) has arrived\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1234
1235 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1236#if OMPT_SUPPORT1
1237 if (ompt_enabled.enabled) {
1238#if OMPT_OPTIONAL1
1239 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
1240 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
1241 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid)__ompt_load_return_address(gtid);
1242 if (ompt_enabled.ompt_callback_sync_region) {
1243 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
1244 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1245 my_task_data, return_address);
1246 }
1247 if (ompt_enabled.ompt_callback_sync_region_wait) {
1248 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1249 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1250 my_task_data, return_address);
1251 }
1252#endif
1253 // It is OK to report the barrier state after the barrier begin callback.
1254 // According to the OMPT specification, a compliant implementation may
1255 // even delay reporting this state until the barrier begins to wait.
1256 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
1257 }
1258#endif
1259
1260 if (!team->t.t_serialized) {
1261#if USE_ITT_BUILD1
1262 // This value will be used in itt notify events below.
1263 void *itt_sync_obj = NULL__null;
1264#if USE_ITT_NOTIFY1
1265 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1266 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1267#endif
1268#endif /* USE_ITT_BUILD */
1269 if (__kmp_tasking_mode == tskm_extra_barrier) {
1270 __kmp_tasking_barrier(team, this_thr, gtid);
1271 KA_TRACE(15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1272 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
1273 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid)); }
;
1274 }
1275
1276 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1277 access it when the team struct is not guaranteed to exist. */
1278 // See note about the corresponding code in __kmp_join_barrier() being
1279 // performance-critical.
1280 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
1281#if KMP_USE_MONITOR
1282 this_thr->th.th_team_bt_intervals =
1283 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1284 this_thr->th.th_team_bt_set =
1285 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1286#else
1287 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
1288#endif
1289 }
1290
1291#if USE_ITT_BUILD1
1292 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1293 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1294#endif /* USE_ITT_BUILD */
1295#if USE_DEBUGGER0
1296 // Let the debugger know: the thread arrived to the barrier and waiting.
1297 if (KMP_MASTER_TID(tid)((tid) == 0)) { // Master counter is stored in team structure.
1298 team->t.t_bar[bt].b_master_arrived += 1;
1299 } else {
1300 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1301 } // if
1302#endif /* USE_DEBUGGER */
1303 if (reduce != NULL__null) {
1304 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1305 this_thr->th.th_local.reduce_data = reduce_data;
1306 }
1307
1308 if (KMP_MASTER_TID(tid)((tid) == 0) && __kmp_tasking_mode != tskm_immediate_exec)
1309 __kmp_task_team_setup(
1310 this_thr, team,
1311 0); // use 0 to only setup the current team if nthreads > 1
1312
1313 switch (__kmp_barrier_gather_pattern[bt]) {
1314 case bp_hyper_bar: {
1315 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1315); }
; // don't set branch bits
1316 // to 0; use linear
1317 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1318 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1319 break;
1320 }
1321 case bp_hierarchical_bar: {
1322 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1323 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1324 break;
1325 }
1326 case bp_tree_bar: {
1327 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt])if (!(__kmp_barrier_gather_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_gather_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1327); }
; // don't set branch bits
1328 // to 0; use linear
1329 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1330 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1331 break;
1332 }
1333 default: {
1334 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1335 reduce USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1336 }
1337 }
1338
1339 KMP_MB();
1340
1341 if (KMP_MASTER_TID(tid)((tid) == 0)) {
1342 status = 0;
1343 if (__kmp_tasking_mode != tskm_immediate_exec) {
1344 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1345 }
1346#if USE_DEBUGGER0
1347 // Let the debugger know: All threads are arrived and starting leaving the
1348 // barrier.
1349 team->t.t_bar[bt].b_team_arrived += 1;
1350#endif
1351
1352#if OMP_40_ENABLED(50 >= 40)
1353 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request)(&team->t.t_cancel_request)->load(std::memory_order_relaxed
)
;
1354 // Reset cancellation flag for worksharing constructs
1355 if (cancel_request == cancel_loop || cancel_request == cancel_sections) {
1356 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq)(&team->t.t_cancel_request)->store(cancel_noreq, std
::memory_order_relaxed)
;
1357 }
1358#endif
1359#if USE_ITT_BUILD1
1360 /* TODO: In case of split reduction barrier, master thread may send
1361 acquired event early, before the final summation into the shared
1362 variable is done (final summation can be a long operation for array
1363 reductions). */
1364 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1365 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1366#endif /* USE_ITT_BUILD */
1367#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1368 // Barrier - report frame end (only if active_level == 1)
1369 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
1370 __kmp_forkjoin_frames_mode &&
1371#if OMP_40_ENABLED(50 >= 40)
1372 this_thr->th.th_teams_microtask == NULL__null &&
1373#endif
1374 team->t.t_active_level == 1) {
1375 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1376 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1377 kmp_info_t **other_threads = team->t.t_threads;
1378 int nproc = this_thr->th.th_team_nproc;
1379 int i;
1380 switch (__kmp_forkjoin_frames_mode) {
1381 case 1:
1382 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1383 loc, nproc);
1384 this_thr->th.th_frame_time = cur_time;
1385 break;
1386 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1387 // be fixed)
1388 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1389 1, loc, nproc);
1390 break;
1391 case 3:
1392 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
1393 // Initialize with master's wait time
1394 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1395 // Set arrive time to zero to be able to check it in
1396 // __kmp_invoke_task(); the same is done inside the loop below
1397 this_thr->th.th_bar_arrive_time = 0;
1398 for (i = 1; i < nproc; ++i) {
1399 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1400 other_threads[i]->th.th_bar_arrive_time = 0;
1401 }
1402 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1403 cur_time, delta,
1404 (kmp_uint64)(reduce != NULL__null));
1405 }
1406 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1407 loc, nproc);
1408 this_thr->th.th_frame_time = cur_time;
1409 break;
1410 }
1411 }
1412#endif /* USE_ITT_BUILD */
1413 } else {
1414 status = 1;
1415#if USE_ITT_BUILD1
1416 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1417 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1418#endif /* USE_ITT_BUILD */
1419 }
1420 if (status == 1 || !is_split) {
1421 switch (__kmp_barrier_release_pattern[bt]) {
1422 case bp_hyper_bar: {
1423 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1423); }
;
1424 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1425 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1426 break;
1427 }
1428 case bp_hierarchical_bar: {
1429 __kmp_hierarchical_barrier_release(
1430 bt, this_thr, gtid, tid, FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1431 break;
1432 }
1433 case bp_tree_bar: {
1434 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1434); }
;
1435 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1436 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1437 break;
1438 }
1439 default: {
1440 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1441 FALSE0 USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1442 }
1443 }
1444 if (__kmp_tasking_mode != tskm_immediate_exec) {
1445 __kmp_task_team_sync(this_thr, team);
1446 }
1447 }
1448
1449#if USE_ITT_BUILD1
1450 /* GEH: TODO: Move this under if-condition above and also include in
1451 __kmp_end_split_barrier(). This will more accurately represent the actual
1452 release time of the threads for split barriers. */
1453 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1454 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1455#endif /* USE_ITT_BUILD */
1456 } else { // Team is serialized.
1457 status = 0;
1458 if (__kmp_tasking_mode != tskm_immediate_exec) {
1459#if OMP_45_ENABLED(50 >= 45)
1460 if (this_thr->th.th_task_team != NULL__null) {
1461#if USE_ITT_NOTIFY1
1462 void *itt_sync_obj = NULL__null;
1463 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
1464 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1465 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1466 }
1467#endif
1468
1469 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0)"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1470); }
1470 TRUE)if (!(this_thr->th.th_task_team->tt.tt_found_proxy_tasks
== (!0))) { __kmp_debug_assert("this_thr->th.th_task_team->tt.tt_found_proxy_tasks == (!0)"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1470); }
;
1471 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1472 __kmp_task_team_setup(this_thr, team, 0);
1473
1474#if USE_ITT_BUILD1
1475 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1476 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1477#endif /* USE_ITT_BUILD */
1478 }
1479#else
1480 // The task team should be NULL for serialized code (tasks will be
1481 // executed immediately)
1482 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL)if (!(team->t.t_task_team[this_thr->th.th_task_state] ==
__null)) { __kmp_debug_assert("team->t.t_task_team[this_thr->th.th_task_state] == __null"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1482); }
;
1483 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL)if (!(this_thr->th.th_task_team == __null)) { __kmp_debug_assert
("this_thr->th.th_task_team == __null", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1483); }
;
1484#endif
1485 }
1486 }
1487 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
1488 gtid, __kmp_team_from_gtid(gtid)->t.t_id,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
1489 __kmp_tid_from_gtid(gtid), status))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n"
, gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid
(gtid), status); }
;
1490
1491#if OMPT_SUPPORT1
1492 if (ompt_enabled.enabled) {
1493#if OMPT_OPTIONAL1
1494 if (ompt_enabled.ompt_callback_sync_region_wait) {
1495 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1496 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1497 my_task_data, return_address);
1498 }
1499 if (ompt_enabled.ompt_callback_sync_region) {
1500 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
1501 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1502 my_task_data, return_address);
1503 }
1504#endif
1505 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1506 }
1507#endif
1508 ANNOTATE_BARRIER_END(&team->t.t_bar);
1509
1510 return status;
1511}
1512
1513void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
1514 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier)((void)0);
1515 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER)((void)0);
1516 int tid = __kmp_tid_from_gtid(gtid);
1517 kmp_info_t *this_thr = __kmp_threads[gtid];
1518 kmp_team_t *team = this_thr->th.th_team;
1519
1520 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1521 if (!team->t.t_serialized) {
1522 if (KMP_MASTER_GTID(gtid)(__kmp_tid_from_gtid((gtid)) == 0)) {
1523 switch (__kmp_barrier_release_pattern[bt]) {
1524 case bp_hyper_bar: {
1525 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1525); }
;
1526 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1527 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
1528 break;
1529 }
1530 case bp_hierarchical_bar: {
1531 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1532 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
1533 break;
1534 }
1535 case bp_tree_bar: {
1536 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt])if (!(__kmp_barrier_release_branch_bits[bt])) { __kmp_debug_assert
("__kmp_barrier_release_branch_bits[bt]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1536); }
;
1537 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1538 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
1539 break;
1540 }
1541 default: {
1542 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1543 FALSE0 USE_ITT_BUILD_ARG(NULL), __null);
1544 }
1545 }
1546 if (__kmp_tasking_mode != tskm_immediate_exec) {
1547 __kmp_task_team_sync(this_thr, team);
1548 } // if
1549 }
1550 }
1551 ANNOTATE_BARRIER_END(&team->t.t_bar);
1552}
1553
1554void __kmp_join_barrier(int gtid) {
1555 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier)((void)0);
1556 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
1557 kmp_info_t *this_thr = __kmp_threads[gtid];
1558 kmp_team_t *team;
1559 kmp_uint nproc;
1560 kmp_info_t *master_thread;
1561 int tid;
1562#ifdef KMP_DEBUG1
1563 int team_id;
1564#endif /* KMP_DEBUG */
1565#if USE_ITT_BUILD1
1566 void *itt_sync_obj = NULL__null;
1567#if USE_ITT_NOTIFY1
1568 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) // Don't call routine without need
1569 // Get object created at fork_barrier
1570 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1571#endif
1572#endif /* USE_ITT_BUILD */
1573 KMP_MB();
1574
1575 // Get current info
1576 team = this_thr->th.th_team;
1577 nproc = this_thr->th.th_team_nproc;
1578 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc)if (!((int)nproc == team->t.t_nproc)) { __kmp_debug_assert
("(int)nproc == team->t.t_nproc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1578); }
;
1579 tid = __kmp_tid_from_gtid(gtid);
1580#ifdef KMP_DEBUG1
1581 team_id = team->t.t_id;
1582#endif /* KMP_DEBUG */
1583 master_thread = this_thr->th.th_team_master;
1584#ifdef KMP_DEBUG1
1585 if (master_thread != team->t.t_threads[0]) {
1586 __kmp_print_structure();
1587 }
1588#endif /* KMP_DEBUG */
1589 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0])if (!(master_thread == team->t.t_threads[0])) { __kmp_debug_assert
("master_thread == team->t.t_threads[0]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1589); }
;
1590 KMP_MB();
1591
1592 // Verify state
1593 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid])if (!(__kmp_threads && __kmp_threads[gtid])) { __kmp_debug_assert
("__kmp_threads && __kmp_threads[gtid]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1593); }
;
1594 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team))if (!(((void *)(this_thr->th.th_team)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_team))", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1594); }
;
1595 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root))if (!(((void *)(this_thr->th.th_root)))) { __kmp_debug_assert
("((void *)(this_thr->th.th_root))", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1595); }
;
1596 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid])if (!(this_thr == team->t.t_threads[tid])) { __kmp_debug_assert
("this_thr == team->t.t_threads[tid]", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1596); }
;
1597 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
1598 gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n"
, gtid, team_id, tid); }
;
1599
1600 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1601#if OMPT_SUPPORT1
1602 if (ompt_enabled.enabled) {
1603#if OMPT_OPTIONAL1
1604 ompt_data_t *my_task_data;
1605 ompt_data_t *my_parallel_data;
1606 void *codeptr = NULL__null;
1607 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1608 if (KMP_MASTER_TID(ds_tid)((ds_tid) == 0) &&
1609 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
1610 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
1611 codeptr = team->t.ompt_team_info.master_return_address;
1612 my_task_data = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
1613 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
1614 if (ompt_enabled.ompt_callback_sync_region) {
1615 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
1616 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1617 my_task_data, codeptr);
1618 }
1619 if (ompt_enabled.ompt_callback_sync_region_wait) {
1620 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1621 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1622 my_task_data, codeptr);
1623 }
1624 if (!KMP_MASTER_TID(ds_tid)((ds_tid) == 0))
1625 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
1626#endif
1627 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
1628 }
1629#endif
1630
1631 if (__kmp_tasking_mode == tskm_extra_barrier) {
1632 __kmp_tasking_barrier(team, this_thr, gtid);
1633 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n"
, gtid, team_id, tid); }
1634 team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n"
, gtid, team_id, tid); }
;
1635 }
1636#ifdef KMP_DEBUG1
1637 if (__kmp_tasking_mode != tskm_immediate_exec) {
1638 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
1639 "%p, th_task_team = %p\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
1640 __kmp_gtid_from_thread(this_thr), team_id,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
1641 team->t.t_task_team[this_thr->th.th_task_state],if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
1642 this_thr->th.th_task_team))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n", __kmp_gtid_from_thread(this_thr),
team_id, team->t.t_task_team[this_thr->th.th_task_state
], this_thr->th.th_task_team); }
;
1643 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1644); }
1644 team->t.t_task_team[this_thr->th.th_task_state])if (!(this_thr->th.th_task_team == team->t.t_task_team[
this_thr->th.th_task_state])) { __kmp_debug_assert("this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1644); }
;
1645 }
1646#endif /* KMP_DEBUG */
1647
1648 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1649 access it when the team struct is not guaranteed to exist. Doing these
1650 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
1651 we do not perform the copy if blocktime=infinite, since the values are not
1652 used by __kmp_wait_template() in that case. */
1653 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
1654#if KMP_USE_MONITOR
1655 this_thr->th.th_team_bt_intervals =
1656 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1657 this_thr->th.th_team_bt_set =
1658 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1659#else
1660 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
1661#endif
1662 }
1663
1664#if USE_ITT_BUILD1
1665 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1666 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1667#endif /* USE_ITT_BUILD */
1668
1669 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1670 case bp_hyper_bar: {
1671 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1671); }
;
1672 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1673 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1674 break;
1675 }
1676 case bp_hierarchical_bar: {
1677 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1678 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1679 break;
1680 }
1681 case bp_tree_bar: {
1682 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]))
{ __kmp_debug_assert("__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1682); }
;
1683 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1684 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1685 break;
1686 }
1687 default: {
1688 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1689 NULL__null USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1690 }
1691 }
1692
1693 /* From this point on, the team data structure may be deallocated at any time
1694 by the master thread - it is unsafe to reference it in any of the worker
1695 threads. Any per-team data items that need to be referenced before the
1696 end of the barrier should be moved to the kmp_task_team_t structs. */
1697 if (KMP_MASTER_TID(tid)((tid) == 0)) {
1698 if (__kmp_tasking_mode != tskm_immediate_exec) {
1699 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1700 }
1701#if KMP_STATS_ENABLED0
1702 // Have master thread flag the workers to indicate they are now waiting for
1703 // next parallel region, Also wake them up so they switch their timers to
1704 // idle.
1705 for (int i = 0; i < team->t.t_nproc; ++i) {
1706 kmp_info_t *team_thread = team->t.t_threads[i];
1707 if (team_thread == this_thr)
1708 continue;
1709 team_thread->th.th_stats->setIdleFlag();
1710 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647) &&
1711 team_thread->th.th_sleep_loc != NULL__null)
1712 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1713 team_thread->th.th_sleep_loc);
1714 }
1715#endif
1716#if USE_ITT_BUILD1
1717 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1718 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1719#endif /* USE_ITT_BUILD */
1720
1721#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1722 // Join barrier - report frame end
1723 if ((__itt_frame_submit_v3_ptr__kmp_itt_frame_submit_v3_ptr__3_0 || KMP_ITT_DEBUG0) &&
1724 __kmp_forkjoin_frames_mode &&
1725#if OMP_40_ENABLED(50 >= 40)
1726 this_thr->th.th_teams_microtask == NULL__null &&
1727#endif
1728 team->t.t_active_level == 1) {
1729 kmp_uint64 cur_time = __itt_get_timestamp(!__kmp_itt_get_timestamp_ptr__3_0) ? 0 : __kmp_itt_get_timestamp_ptr__3_0();
1730 ident_t *loc = team->t.t_ident;
1731 kmp_info_t **other_threads = team->t.t_threads;
1732 int nproc = this_thr->th.th_team_nproc;
1733 int i;
1734 switch (__kmp_forkjoin_frames_mode) {
1735 case 1:
1736 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1737 loc, nproc);
1738 break;
1739 case 2:
1740 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1741 loc, nproc);
1742 break;
1743 case 3:
1744 if (__itt_metadata_add_ptr__kmp_itt_metadata_add_ptr__3_0) {
1745 // Initialize with master's wait time
1746 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1747 // Set arrive time to zero to be able to check it in
1748 // __kmp_invoke_task(); the same is done inside the loop below
1749 this_thr->th.th_bar_arrive_time = 0;
1750 for (i = 1; i < nproc; ++i) {
1751 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1752 other_threads[i]->th.th_bar_arrive_time = 0;
1753 }
1754 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1755 cur_time, delta, 0);
1756 }
1757 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1758 loc, nproc);
1759 this_thr->th.th_frame_time = cur_time;
1760 break;
1761 }
1762 }
1763#endif /* USE_ITT_BUILD */
1764 }
1765#if USE_ITT_BUILD1
1766 else {
1767 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0)
1768 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1769 }
1770#endif /* USE_ITT_BUILD */
1771
1772#if KMP_DEBUG1
1773 if (KMP_MASTER_TID(tid)((tid) == 0)) {
1774 KA_TRACE(if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
1775 15,if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
1776 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
1777 gtid, team_id, tid, nproc))if (kmp_a_debug >= 15) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n"
, gtid, team_id, tid, nproc); }
;
1778 }
1779#endif /* KMP_DEBUG */
1780
1781 // TODO now, mark worker threads as done so they may be disbanded
1782 KMP_MB(); // Flush all pending memory write invalidates.
1783 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
1784 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_join_barrier: T#%d(%d:%d) leaving\n"
, gtid, team_id, tid); }
;
1785
1786 ANNOTATE_BARRIER_END(&team->t.t_bar);
1787}
1788
1789// TODO release worker threads' fork barriers as we are ready instead of all at
1790// once
1791void __kmp_fork_barrier(int gtid, int tid) {
1792 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier)((void)0);
1793 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER)((void)0);
1794 kmp_info_t *this_thr = __kmp_threads[gtid];
1795 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL__null;
1
Assuming 'tid' is not equal to 0
2
'?' condition is false
1796#if USE_ITT_BUILD1
1797 void *itt_sync_obj = NULL__null;
1798#endif /* USE_ITT_BUILD */
1799 if (team)
3
Taking false branch
1800 ANNOTATE_BARRIER_END(&team->t.t_bar);
1801
1802 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
1803 (team != NULL) ? team->t.t_id : -1, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n"
, gtid, (team != __null) ? team->t.t_id : -1, tid); }
;
1804
1805 // th_team pointer only valid for master thread here
1806 if (KMP_MASTER_TID(tid)((tid) == 0)) {
4
Taking false branch
1807#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1808 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
1809 // Create itt barrier object
1810 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1811 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1812 }
1813#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1814
1815#ifdef KMP_DEBUG1
1816 kmp_info_t **other_threads = team->t.t_threads;
1817 int i;
1818
1819 // Verify state
1820 KMP_MB();
1821
1822 for (i = 1; i < team->t.t_nproc; ++i) {
1823 KA_TRACE(500,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
1824 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
1825 "== %u.\n",if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
1826 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
1827 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
1828 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go))if (kmp_a_debug >= 500) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
"== %u.\n", gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_gtid, team->t.t_id, other_threads[i]->th.th_info
.ds.ds_tid, other_threads[i]->th.th_bar[bs_forkjoin_barrier
].bb.b_go); }
;
1829 KMP_DEBUG_ASSERT(if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1831); }
1830 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1831); }
1831 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE)if (!(((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb
.b_go) & ~((1 << 0))) == 0)) { __kmp_debug_assert("((other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & ~((1 << 0))) == 0"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1831); }
;
1832 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team)if (!(other_threads[i]->th.th_team == team)) { __kmp_debug_assert
("other_threads[i]->th.th_team == team", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1832); }
;
1833 }
1834#endif
1835
1836 if (__kmp_tasking_mode != tskm_immediate_exec) {
1837 // 0 indicates setup current task team if nthreads > 1
1838 __kmp_task_team_setup(this_thr, team, 0);
1839 }
1840
1841 /* The master thread may have changed its blocktime between the join barrier
1842 and the fork barrier. Copy the blocktime info to the thread, where
1843 __kmp_wait_template() can access it when the team struct is not
1844 guaranteed to exist. */
1845 // See note about the corresponding code in __kmp_join_barrier() being
1846 // performance-critical
1847 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
1848#if KMP_USE_MONITOR
1849 this_thr->th.th_team_bt_intervals =
1850 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1851 this_thr->th.th_team_bt_set =
1852 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1853#else
1854 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid)((((team)->t.t_threads[(tid)]->th.th_current_task->td_icvs
.bt_set) ? ((team)->t.t_threads[(tid)]->th.th_current_task
->td_icvs.blocktime) : __kmp_dflt_blocktime) * __kmp_ticks_per_msec
)
;
1855#endif
1856 }
1857 } // master
1858
1859 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
5
Control jumps to the 'default' case at line 1877
1860 case bp_hyper_bar: {
1861 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1861); }
;
1862 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1863 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1864 break;
1865 }
1866 case bp_hierarchical_bar: {
1867 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1868 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1869 break;
1870 }
1871 case bp_tree_bar: {
1872 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])if (!(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier])
) { __kmp_debug_assert("__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1872); }
;
1873 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1874 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1875 break;
1876 }
1877 default: {
1878 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
6
Calling '__kmp_linear_barrier_release'
14
Returning from '__kmp_linear_barrier_release'
1879 TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
1880 }
1881 }
1882
1883#if OMPT_SUPPORT1
1884 if (ompt_enabled.enabled &&
15
Assuming the condition is true
17
Taking true branch
1885 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
16
Assuming the condition is true
1886 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1887 ompt_data_t *task_data = (team)
18
'?' condition is false
1888 ? OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
1889 : &(this_thr->th.ompt_thread_info.task_data);
1890 this_thr->th.ompt_thread_info.state = omp_state_overhead;
1891#if OMPT_OPTIONAL1
1892 void *codeptr = NULL__null;
1893 if (KMP_MASTER_TID(ds_tid)((ds_tid) == 0) &&
21
Within the expansion of the macro 'KMP_MASTER_TID':
a
Assuming pointer value is null
22
Taking false branch
1894 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback ||
19
Assuming the condition is false
1895 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback))
20
Assuming the condition is false
1896 codeptr = team->t.ompt_team_info.master_return_address;
1897 if (ompt_enabled.ompt_callback_sync_region_wait) {
23
Assuming the condition is false
24
Taking false branch
1898 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
1899 ompt_sync_region_barrier, ompt_scope_end, NULL__null, task_data, codeptr);
1900 }
1901 if (ompt_enabled.ompt_callback_sync_region) {
25
Assuming the condition is true
26
Taking true branch
1902 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
27
Called function pointer is null (null dereference)
1903 ompt_sync_region_barrier, ompt_scope_end, NULL__null, task_data, codeptr);
1904 }
1905#endif
1906 if (!KMP_MASTER_TID(ds_tid)((ds_tid) == 0) && ompt_enabled.ompt_callback_implicit_task) {
1907 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
1908 ompt_scope_end, NULL__null, task_data, 0, ds_tid);
1909 }
1910 }
1911#endif
1912
1913 // Early exit for reaping threads releasing forkjoin barrier
1914 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
1915 this_thr->th.th_task_team = NULL__null;
1916
1917#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1918 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
1919 if (!KMP_MASTER_TID(tid)((tid) == 0)) {
1920 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1921 if (itt_sync_obj)
1922 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1923 }
1924 }
1925#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1926 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d is leaving early\n"
, gtid); }
;
1927 return;
1928 }
1929
1930 /* We can now assume that a valid team structure has been allocated by the
1931 master and propagated to all worker threads. The current thread, however,
1932 may not be part of the team, so we can't blindly assume that the team
1933 pointer is non-null. */
1934 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team)((void *)(this_thr->th.th_team));
1935 KMP_DEBUG_ASSERT(team != NULL)if (!(team != __null)) { __kmp_debug_assert("team != __null",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 1935); }
;
1936 tid = __kmp_tid_from_gtid(gtid);
1937
1938#if KMP_BARRIER_ICV_PULL
1939 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1940 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1941 implicit task has this data before this function is called. We cannot
1942 modify __kmp_fork_call() to look at the fixed ICVs in the master's thread
1943 struct, because it is not always the case that the threads arrays have
1944 been allocated when __kmp_fork_call() is executed. */
1945 {
1946 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy)((void)0);
1947 if (!KMP_MASTER_TID(tid)((tid) == 0)) { // master thread already has ICVs
1948 // Copy the initial ICVs from the master's thread struct to the implicit
1949 // task for this tid.
1950 KA_TRACE(10,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
1951 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n"
, gtid, tid); }
;
1952 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1953 tid, FALSE0);
1954 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1955 &team->t.t_threads[0]
1956 ->th.th_bar[bs_forkjoin_barrier]
1957 .bb.th_fixed_icvs);
1958 }
1959 }
1960#endif // KMP_BARRIER_ICV_PULL
1961
1962 if (__kmp_tasking_mode != tskm_immediate_exec) {
1963 __kmp_task_team_sync(this_thr, team);
1964 }
1965
1966#if OMP_40_ENABLED(50 >= 40) && KMP_AFFINITY_SUPPORTED1
1967 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1968 if (proc_bind == proc_bind_intel) {
1969#endif
1970#if KMP_AFFINITY_SUPPORTED1
1971 // Call dynamic affinity settings
1972 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1973 __kmp_balanced_affinity(this_thr, team->t.t_nproc);
1974 }
1975#endif // KMP_AFFINITY_SUPPORTED
1976#if OMP_40_ENABLED(50 >= 40) && KMP_AFFINITY_SUPPORTED1
1977 } else if (proc_bind != proc_bind_false) {
1978 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1979 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
1980 __kmp_gtid_from_thread(this_thr),if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
1981 this_thr->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d already in correct place %d\n"
, __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place
); }
;
1982 } else {
1983 __kmp_affinity_set_place(gtid);
1984 }
1985 }
1986#endif
1987#if OMP_50_ENABLED(50 >= 50)
1988 if (!KMP_MASTER_TID(tid)((tid) == 0))
1989 KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator)if ((this_thr->th.th_def_allocator) != (team->t.t_def_allocator
)) (this_thr->th.th_def_allocator) = (team->t.t_def_allocator
)
;
1990#endif
1991
1992#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
1993 if (__itt_sync_create_ptr__kmp_itt_sync_create_ptr__3_0 || KMP_ITT_DEBUG0) {
1994 if (!KMP_MASTER_TID(tid)((tid) == 0)) {
1995 // Get correct barrier object
1996 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1997 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1998 } // (prepare called inside barrier_release)
1999 }
2000#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2001 ANNOTATE_BARRIER_END(&team->t.t_bar);
2002 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
2003 team->t.t_id, tid))if (kmp_a_debug >= 10) { __kmp_debug_printf ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n"
, gtid, team->t.t_id, tid); }
;
2004}
2005
2006void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
2007 kmp_internal_control_t *new_icvs, ident_t *loc) {
2008 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy)((void)0);
2009
2010 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs)if (!(team && new_nproc && new_icvs)) { __kmp_debug_assert
("team && new_nproc && new_icvs", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 2010); }
;
2011 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc)if (!((!(__kmp_init_parallel)) || new_icvs->nproc)) { __kmp_debug_assert
("(!(__kmp_init_parallel)) || new_icvs->nproc", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 2011); }
;
2012
2013/* Master thread's copy of the ICVs was set up on the implicit taskdata in
2014 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
2015 implicit task has this data before this function is called. */
2016#if KMP_BARRIER_ICV_PULL
2017 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains
2018 untouched), where all of the worker threads can access them and make their
2019 own copies after the barrier. */
2020 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 2020); }
; // The threads arrays should be
2021 // allocated at this point
2022 copy_icvs(
2023 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2024 new_icvs);
2025 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2026 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2027#elif KMP_BARRIER_ICV_PUSH1
2028 // The ICVs will be propagated in the fork barrier, so nothing needs to be
2029 // done here.
2030 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
2031 team->t.t_threads[0], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n"
, 0, team->t.t_threads[0], team); }
;
2032#else
2033 // Copy the ICVs to each of the non-master threads. This takes O(nthreads)
2034 // time.
2035 ngo_load(new_icvs)((void)0);
2036 KMP_DEBUG_ASSERT(team->t.t_threads[0])if (!(team->t.t_threads[0])) { __kmp_debug_assert("team->t.t_threads[0]"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_barrier.cpp"
, 2036); }
; // The threads arrays should be
2037 // allocated at this point
2038 for (int f = 1; f < new_nproc; ++f) { // Skip the master thread
2039 // TODO: GEH - pass in better source location info since usually NULL here
2040 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2041 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2042 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE0);
2043 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs)copy_icvs((&team->t.t_implicit_task_taskdata[f].td_icvs
), (new_icvs))
;
2044 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
2045 f, team->t.t_threads[f], team))if (kmp_f_debug >= 10) { __kmp_debug_printf ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n"
, f, team->t.t_threads[f], team); }
;
2046 }
2047 ngo_sync()((void)0);
2048#endif // KMP_BARRIER_ICV_PULL
2049}

/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h

1/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef KMP_WAIT_RELEASE_H
15#define KMP_WAIT_RELEASE_H
16
17#include "kmp.h"
18#include "kmp_itt.h"
19#include "kmp_stats.h"
20#if OMPT_SUPPORT1
21#include "ompt-specific.h"
22#endif
23
24/*!
25@defgroup WAIT_RELEASE Wait/Release operations
26
27The definitions and functions here implement the lowest level thread
28synchronizations of suspending a thread and awaking it. They are used to build
29higher level operations such as barriers and fork/join.
30*/
31
32/*!
33@ingroup WAIT_RELEASE
34@{
35*/
36
37/*!
38 * The flag_type describes the storage used for the flag.
39 */
40enum flag_type {
41 flag32, /**< 32 bit flags */
42 flag64, /**< 64 bit flags */
43 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
44};
45
46/*!
47 * Base class for wait/release volatile flag
48 */
49template <typename P> class kmp_flag_native {
50 volatile P *loc;
51 flag_type t;
52
53public:
54 typedef P flag_t;
55 kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56 volatile P *get() { return loc; }
57 void *get_void_p() { return RCAST(void *, CCAST(P *, loc))reinterpret_cast<void *>(const_cast<P *>(loc)); }
58 void set(volatile P *new_loc) { loc = new_loc; }
59 flag_type get_type() { return t; }
60 P load() { return *loc; }
61 void store(P val) { *loc = val; }
62};
63
64/*!
65 * Base class for wait/release atomic flag
66 */
67template <typename P> class kmp_flag {
68 std::atomic<P>
69 *loc; /**< Pointer to the flag storage that is modified by another thread
70 */
71 flag_type t; /**< "Type" of the flag in loc */
72public:
73 typedef P flag_t;
74 kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
75 /*!
76 * @result the pointer to the actual flag
77 */
78 std::atomic<P> *get() { return loc; }
79 /*!
80 * @result void* pointer to the actual flag
81 */
82 void *get_void_p() { return RCAST(void *, loc)reinterpret_cast<void *>(loc); }
83 /*!
84 * @param new_loc in set loc to point at new_loc
85 */
86 void set(std::atomic<P> *new_loc) { loc = new_loc; }
87 /*!
88 * @result the flag_type
89 */
90 flag_type get_type() { return t; }
91 /*!
92 * @result flag value
93 */
94 P load() { return loc->load(std::memory_order_acquire); }
95 /*!
96 * @param val the new flag value to be stored
97 */
98 void store(P val) { loc->store(val, std::memory_order_release); }
99 // Derived classes must provide the following:
100 /*
101 kmp_info_t * get_waiter(kmp_uint32 i);
102 kmp_uint32 get_num_waiters();
103 bool done_check();
104 bool done_check_val(P old_loc);
105 bool notdone_check();
106 P internal_release();
107 void suspend(int th_gtid);
108 void resume(int th_gtid);
109 P set_sleeping();
110 P unset_sleeping();
111 bool is_sleeping();
112 bool is_any_sleeping();
113 bool is_sleeping_val(P old_loc);
114 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115 int *thread_finished
116 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
117 is_constrained);
118 */
119};
120
121#if OMPT_SUPPORT1
122static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
123 omp_state_t omp_state,
124 ompt_data_t *tId,
125 ompt_data_t *pId) {
126 int ds_tid = this_thr->th.th_info.ds.ds_tid;
127 if (omp_state == omp_state_wait_barrier_implicit) {
128 this_thr->th.ompt_thread_info.state = omp_state_overhead;
129#if OMPT_OPTIONAL1
130 void *codeptr = NULL__null;
131 if (ompt_enabled.ompt_callback_sync_region_wait) {
132 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)ompt_callback_sync_region_wait_callback(
133 ompt_sync_region_barrier, ompt_scope_end, NULL__null, tId, codeptr);
134 }
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)ompt_callback_sync_region_callback(
137 ompt_sync_region_barrier, ompt_scope_end, NULL__null, tId, codeptr);
138 }
139#endif
140 if (!KMP_MASTER_TID(ds_tid)((ds_tid) == 0)) {
141 if (ompt_enabled.ompt_callback_implicit_task) {
142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)ompt_callback_implicit_task_callback(
143 ompt_scope_end, NULL__null, tId, 0, ds_tid);
144 }
145 // return to idle state
146 this_thr->th.ompt_thread_info.state = omp_state_idle;
147 } else {
148 this_thr->th.ompt_thread_info.state = omp_state_overhead;
149 }
150 }
151}
152#endif
153
154/* Spin wait loop that first does pause, then yield, then sleep. A thread that
155 calls __kmp_wait_* must make certain that another thread calls __kmp_release
156 to wake it back up to prevent deadlocks!
157
158 NOTE: We may not belong to a team at this point. */
159template <class C, int final_spin>
160static inline void
161__kmp_wait_template(kmp_info_t *this_thr,
162 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
163#if USE_ITT_BUILD1 && USE_ITT_NOTIFY1
164 volatile void *spin = flag->get();
165#endif
166 kmp_uint32 spins;
167 int th_gtid;
168 int tasks_completed = FALSE0;
169 int oversubscribed;
170#if !KMP_USE_MONITOR
171 kmp_uint64 poll_count;
172 kmp_uint64 hibernate_goal;
173#else
174 kmp_uint32 hibernate;
175#endif
176
177 KMP_FSYNC_SPIN_INIT(spin, NULL)int sync_iters = 0; if (__kmp_itt_fsync_prepare_ptr__3_0) { if
(spin == __null) { spin = __null; } } __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4376) : "%ebx")
;
178 if (flag->done_check()) {
179 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)const_cast<void *>(spin))); } } while
(0)
;
180 return;
181 }
182 th_gtid = this_thr->th.th_info.ds.ds_gtid;
183#if KMP_OS_UNIX1
184 if (final_spin)
185 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release
)
;
186#endif
187 KA_TRACE(20,if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n"
, th_gtid, flag); }
188 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n"
, th_gtid, flag); }
;
189#if KMP_STATS_ENABLED0
190 stats_state_e thread_state = KMP_GET_THREAD_STATE()((void)0);
191#endif
192
193/* OMPT Behavior:
194THIS function is called from
195 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
196 these have join / fork behavior
197
198 In these cases, we don't change the state or trigger events in THIS
199function.
200 Events are triggered in the calling code (__kmp_barrier):
201
202 state := omp_state_overhead
203 barrier-begin
204 barrier-wait-begin
205 state := omp_state_wait_barrier
206 call join-barrier-implementation (finally arrive here)
207 {}
208 call fork-barrier-implementation (finally arrive here)
209 {}
210 state := omp_state_overhead
211 barrier-wait-end
212 barrier-end
213 state := omp_state_work_parallel
214
215
216 __kmp_fork_barrier (after thread creation, before executing implicit task)
217 call fork-barrier-implementation (finally arrive here)
218 {} // worker arrive here with state = omp_state_idle
219
220
221 __kmp_join_barrier (implicit barrier at end of parallel region)
222 state := omp_state_barrier_implicit
223 barrier-begin
224 barrier-wait-begin
225 call join-barrier-implementation (finally arrive here
226final_spin=FALSE)
227 {
228 }
229 __kmp_fork_barrier (implicit barrier at end of parallel region)
230 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
231
232 Worker after task-team is finished:
233 barrier-wait-end
234 barrier-end
235 implicit-task-end
236 idle-begin
237 state := omp_state_idle
238
239 Before leaving, if state = omp_state_idle
240 idle-end
241 state := omp_state_overhead
242*/
243#if OMPT_SUPPORT1
244 omp_state_t ompt_entry_state;
245 ompt_data_t *pId = NULL__null;
246 ompt_data_t *tId;
247 if (ompt_enabled.enabled) {
248 ompt_entry_state = this_thr->th.ompt_thread_info.state;
249 if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
250 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)((this_thr->th.th_info.ds.ds_tid) == 0)) {
251 ompt_lw_taskteam_t *team =
252 this_thr->th.th_team->t.ompt_serialized_team_info;
253 if (team) {
254 pId = &(team->ompt_team_info.parallel_data);
255 tId = &(team->ompt_task_info.task_data);
256 } else {
257 pId = OMPT_CUR_TEAM_DATA(this_thr)(&(this_thr->th.th_team->t.ompt_team_info.parallel_data
))
;
258 tId = OMPT_CUR_TASK_DATA(this_thr)(&(this_thr->th.th_current_task->ompt_task_info.task_data
))
;
259 }
260 } else {
261 pId = NULL__null;
262 tId = &(this_thr->th.ompt_thread_info.task_data);
263 }
264 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
265 this_thr->th.th_task_team == NULL__null)) {
266 // implicit task is done. Either no taskqueue, or task-team finished
267 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
268 }
269 }
270#endif
271
272 // Setup for waiting
273 KMP_INIT_YIELD(spins){ (spins) = __kmp_yield_init; };
274
275 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
276#if KMP_USE_MONITOR
277// The worker threads cannot rely on the team struct existing at this point.
278// Use the bt values cached in the thread struct instead.
279#ifdef KMP_ADJUST_BLOCKTIME1
280 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
281 // Force immediate suspend if not set by user and more threads than
282 // available procs
283 hibernate = 0;
284 else
285 hibernate = this_thr->th.th_team_bt_intervals;
286#else
287 hibernate = this_thr->th.th_team_bt_intervals;
288#endif /* KMP_ADJUST_BLOCKTIME */
289
290 /* If the blocktime is nonzero, we want to make sure that we spin wait for
291 the entirety of the specified #intervals, plus up to one interval more.
292 This increment make certain that this thread doesn't go to sleep too
293 soon. */
294 if (hibernate != 0)
295 hibernate++;
296
297 // Add in the current time value.
298 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value);
299 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
300 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
301 hibernate - __kmp_global.g.g_time.dt.t_value))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n"
, th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, hibernate
- __kmp_global.g.g_time.dt.t_value); }
;
302#else
303 hibernate_goal = KMP_NOW()__kmp_hardware_timestamp() + this_thr->th.th_team_bt_intervals;
304 poll_count = 0;
305#endif // KMP_USE_MONITOR
306 }
307
308 oversubscribed = (TCR_4(__kmp_nth)(__kmp_nth) > __kmp_avail_proc);
309 KMP_MB();
310
311 // Main wait spin loop
312 while (flag->notdone_check()) {
313 int in_pool;
314 kmp_task_team_t *task_team = NULL__null;
315 if (__kmp_tasking_mode != tskm_immediate_exec) {
316 task_team = this_thr->th.th_task_team;
317 /* If the thread's task team pointer is NULL, it means one of 3 things:
318 1) A newly-created thread is first being released by
319 __kmp_fork_barrier(), and its task team has not been set up yet.
320 2) All tasks have been executed to completion.
321 3) Tasking is off for this region. This could be because we are in a
322 serialized region (perhaps the outer one), or else tasking was manually
323 disabled (KMP_TASKING=0). */
324 if (task_team != NULL__null) {
325 if (TCR_SYNC_4(task_team->tt.tt_active)(task_team->tt.tt_active)) {
326 if (KMP_TASKING_ENABLED(task_team)(((task_team)->tt.tt_found_tasks) == (!0)))
327 flag->execute_tasks(
328 this_thr, th_gtid, final_spin,
329 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, 0);
330 else
331 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
332 } else {
333 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid))if (!(!((this_thr->th.th_info.ds.ds_tid) == 0))) { __kmp_debug_assert
("!((this_thr->th.th_info.ds.ds_tid) == 0)", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 333); }
;
334#if OMPT_SUPPORT1
335 // task-team is done now, other cases should be catched above
336 if (final_spin && ompt_enabled.enabled)
337 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
338#endif
339 this_thr->th.th_task_team = NULL__null;
340 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
341 }
342 } else {
343 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP1;
344 } // if
345 } // if
346
347 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin))do { if (__kmp_itt_fsync_prepare_ptr__3_0 && sync_iters
< __kmp_itt_prepare_delay) { ++sync_iters; if (sync_iters
>= __kmp_itt_prepare_delay) { (!__kmp_itt_fsync_prepare_ptr__3_0
) ? (void)0 : __kmp_itt_fsync_prepare_ptr__3_0((void *)((void
*)const_cast<void *>(spin))); } } } while (0)
;
348 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
349 if (__kmp_global.g.g_abort)
350 __kmp_abort_thread();
351 break;
352 }
353
354 // If we are oversubscribed, or have waited a bit (and
355 // KMP_LIBRARY=throughput), then yield
356 // TODO: Should it be number of cores instead of thread contexts? Like:
357 // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
358 // Need performance improvement data to make the change...
359 if (oversubscribed) {
360 KMP_YIELD(1){ __kmp_x86_pause(); __kmp_yield((1)); };
361 } else {
362 KMP_YIELD_SPIN(spins){ __kmp_x86_pause(); (spins) -= 2; if (!(spins)) { __kmp_yield
(1); (spins) = __kmp_yield_next; } }
;
363 }
364 // Check if this thread was transferred from a team
365 // to the thread pool (or vice-versa) while spinning.
366 in_pool = !!TCR_4(this_thr->th.th_in_pool)(this_thr->th.th_in_pool);
367 if (in_pool != !!this_thr->th.th_active_in_pool) {
368 if (in_pool) { // Recently transferred from team to pool
369 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_add(1, std::memory_order_acq_rel
)
;
370 this_thr->th.th_active_in_pool = TRUE(!0);
371 /* Here, we cannot assert that:
372 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
373 __kmp_thread_pool_nth);
374 __kmp_thread_pool_nth is inc/dec'd by the master thread while the
375 fork/join lock is held, whereas __kmp_thread_pool_active_nth is
376 inc/dec'd asynchronously by the workers. The two can get out of sync
377 for brief periods of time. */
378 } else { // Recently transferred from pool to team
379 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth)(&__kmp_thread_pool_active_nth)->fetch_sub(1, std::memory_order_acq_rel
)
;
380 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0)if (!((__kmp_thread_pool_active_nth) >= 0)) { __kmp_debug_assert
("(__kmp_thread_pool_active_nth) >= 0", "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 380); }
;
381 this_thr->th.th_active_in_pool = FALSE0;
382 }
383 }
384
385#if KMP_STATS_ENABLED0
386 // Check if thread has been signalled to idle state
387 // This indicates that the logical "join-barrier" has finished
388 if (this_thr->th.th_stats->isIdle() &&
389 KMP_GET_THREAD_STATE()((void)0) == FORK_JOIN_BARRIER) {
390 KMP_SET_THREAD_STATE(IDLE)((void)0);
391 KMP_PUSH_PARTITIONED_TIMER(OMP_idle)((void)0);
392 }
393#endif
394
395 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
396 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647))
397 continue;
398
399 // Don't suspend if there is a likelihood of new tasks being spawned.
400 if ((task_team != NULL__null) && TCR_4(task_team->tt.tt_found_tasks)(task_team->tt.tt_found_tasks))
401 continue;
402
403#if KMP_USE_MONITOR
404 // If we have waited a bit more, fall asleep
405 if (TCR_4(__kmp_global.g.g_time.dt.t_value)(__kmp_global.g.g_time.dt.t_value) < hibernate)
406 continue;
407#else
408 if (KMP_BLOCKING(hibernate_goal, poll_count++)((hibernate_goal) > __kmp_hardware_timestamp()))
409 continue;
410#endif
411
412 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_wait_sleep: T#%d suspend time reached\n"
, th_gtid); }
;
413#if KMP_OS_UNIX1
414 if (final_spin)
415 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release
)
;
416#endif
417 flag->suspend(th_gtid);
418#if KMP_OS_UNIX1
419 if (final_spin)
420 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true)(&this_thr->th.th_blocking)->store(true, std::memory_order_release
)
;
421#endif
422
423 if (TCR_4(__kmp_global.g.g_done)(__kmp_global.g.g_done)) {
424 if (__kmp_global.g.g_abort)
425 __kmp_abort_thread();
426 break;
427 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
428 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP1) {
429 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP0;
430 }
431 // TODO: If thread is done with work and times out, disband/free
432 }
433
434#if OMPT_SUPPORT1
435 omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
436 if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
437#if OMPT_OPTIONAL1
438 if (final_spin) {
439 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
440 ompt_exit_state = this_thr->th.ompt_thread_info.state;
441 }
442#endif
443 if (ompt_exit_state == omp_state_idle) {
444 this_thr->th.ompt_thread_info.state = omp_state_overhead;
445 }
446 }
447#endif
448#if KMP_STATS_ENABLED0
449 // If we were put into idle state, pop that off the state stack
450 if (KMP_GET_THREAD_STATE()((void)0) == IDLE) {
451 KMP_POP_PARTITIONED_TIMER()((void)0);
452 KMP_SET_THREAD_STATE(thread_state)((void)0);
453 this_thr->th.th_stats->resetIdleFlag();
454 }
455#endif
456
457#if KMP_OS_UNIX1
458 if (final_spin)
459 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false)(&this_thr->th.th_blocking)->store(false, std::memory_order_release
)
;
460#endif
461 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin))do { __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 "
::"i"(0x4377) : "%ebx"); if (sync_iters >= __kmp_itt_prepare_delay
) { (!__kmp_itt_fsync_acquired_ptr__3_0) ? (void)0 : __kmp_itt_fsync_acquired_ptr__3_0
((void *)((void *)const_cast<void *>(spin))); } } while
(0)
;
462}
463
464/* Release any threads specified as waiting on the flag by releasing the flag
465 and resume the waiting thread if indicated by the sleep bit(s). A thread that
466 calls __kmp_wait_template must call this function to wake up the potentially
467 sleeping thread and prevent deadlocks! */
468template <class C> static inline void __kmp_release_template(C *flag) {
469#ifdef KMP_DEBUG1
470 int gtid = TCR_4(__kmp_init_gtid)(__kmp_init_gtid) ? __kmp_get_gtid()__kmp_get_global_thread_id() : -1;
471#endif
472 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()))if (kmp_f_debug >= 20) { __kmp_debug_printf ("__kmp_release: T#%d releasing flag(%x)\n"
, gtid, flag->get()); }
;
473 KMP_DEBUG_ASSERT(flag->get())if (!(flag->get())) { __kmp_debug_assert("flag->get()",
"/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 473); }
;
474 KMP_FSYNC_RELEASING(flag->get_void_p())(!__kmp_itt_fsync_releasing_ptr__3_0) ? (void)0 : __kmp_itt_fsync_releasing_ptr__3_0
((void *)(flag->get_void_p()))
;
475
476 flag->internal_release();
477
478 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n"
, gtid, flag->get(), flag->load()); }
479 flag->load()))if (kmp_f_debug >= 100) { __kmp_debug_printf ("__kmp_release: T#%d set new spin=%d\n"
, gtid, flag->get(), flag->load()); }
;
480
481 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME(2147483647)) {
482 // Only need to check sleep stuff if infinite block time not set.
483 // Are *any* threads waiting on flag sleeping?
484 if (flag->is_any_sleeping()) {
485 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
486 // if sleeping waiter exists at i, sets current_waiter to i inside flag
487 kmp_info_t *waiter = flag->get_waiter(i);
488 if (waiter) {
489 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
490 // Wake up thread if needed
491 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
492 "flag(%p) set\n",if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
493 gtid, wait_gtid, flag->get()))if (kmp_f_debug >= 50) { __kmp_debug_printf ("__kmp_release: T#%d waking up thread T#%d since sleep "
"flag(%p) set\n", gtid, wait_gtid, flag->get()); }
;
494 flag->resume(wait_gtid); // unsets flag's current_waiter when done
495 }
496 }
497 }
498 }
499}
500
501template <typename FlagType> struct flag_traits {};
502
503template <> struct flag_traits<kmp_uint32> {
504 typedef kmp_uint32 flag_t;
505 static const flag_type t = flag32;
506 static inline flag_t tcr(flag_t f) { return TCR_4(f)(f); }
507 static inline flag_t test_then_add4(volatile flag_t *f) {
508 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f))__sync_fetch_and_add((volatile kmp_int32 *)(reinterpret_cast<
volatile kmp_int32 *>(f)), 4)
;
509 }
510 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
511 return KMP_TEST_THEN_OR32(f, v)__sync_fetch_and_or((volatile kmp_uint32 *)(f), (kmp_uint32)(
v))
;
512 }
513 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
514 return KMP_TEST_THEN_AND32(f, v)__sync_fetch_and_and((volatile kmp_uint32 *)(f), (kmp_uint32)
(v))
;
515 }
516};
517
518template <> struct flag_traits<kmp_uint64> {
519 typedef kmp_uint64 flag_t;
520 static const flag_type t = flag64;
521 static inline flag_t tcr(flag_t f) { return TCR_8(f)(f); }
522 static inline flag_t test_then_add4(volatile flag_t *f) {
523 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f))__sync_fetch_and_add((volatile kmp_int64 *)(reinterpret_cast<
volatile kmp_int64 *>(f)), 4LL)
;
524 }
525 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
526 return KMP_TEST_THEN_OR64(f, v)__sync_fetch_and_or((volatile kmp_uint64 *)(f), (kmp_uint64)(
v))
;
527 }
528 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
529 return KMP_TEST_THEN_AND64(f, v)__sync_fetch_and_and((volatile kmp_uint64 *)(f), (kmp_uint64)
(v))
;
530 }
531};
532
533// Basic flag that does not use C11 Atomics
534template <typename FlagType>
535class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
536 typedef flag_traits<FlagType> traits_type;
537 FlagType checker; /**< Value to compare flag to to check if flag has been
538 released. */
539 kmp_info_t
540 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
541 kmp_uint32
542 num_waiting_threads; /**< Number of threads sleeping on this thread. */
543public:
544 kmp_basic_flag_native(volatile FlagType *p)
545 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
546 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
547 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
548 waiting_threads[0] = thr;
549 }
550 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
551 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
552 num_waiting_threads(0) {}
553 /*!
554 * param i in index into waiting_threads
555 * @result the thread that is waiting at index i
556 */
557 kmp_info_t *get_waiter(kmp_uint32 i) {
558 KMP_DEBUG_ASSERT(i < num_waiting_threads)if (!(i < num_waiting_threads)) { __kmp_debug_assert("i < num_waiting_threads"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 558); }
;
559 return waiting_threads[i];
560 }
561 /*!
562 * @result num_waiting_threads
563 */
564 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
565 /*!
566 * @param thr in the thread which is now waiting
567 *
568 * Insert a waiting thread at index 0.
569 */
570 void set_waiter(kmp_info_t *thr) {
571 waiting_threads[0] = thr;
572 num_waiting_threads = 1;
573 }
574 /*!
575 * @result true if the flag object has been released.
576 */
577 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
578 /*!
579 * @param old_loc in old value of flag
580 * @result true if the flag's old value indicates it was released.
581 */
582 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
583 /*!
584 * @result true if the flag object is not yet released.
585 * Used in __kmp_wait_template like:
586 * @code
587 * while (flag.notdone_check()) { pause(); }
588 * @endcode
589 */
590 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
591 /*!
592 * @result Actual flag value before release was applied.
593 * Trigger all waiting threads to run by modifying flag to release state.
594 */
595 void internal_release() {
596 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
597 }
598 /*!
599 * @result Actual flag value before sleep bit(s) set.
600 * Notes that there is at least one thread sleeping on the flag by setting
601 * sleep bit(s).
602 */
603 FlagType set_sleeping() {
604 return traits_type::test_then_or((volatile FlagType *)this->get(),
605 KMP_BARRIER_SLEEP_STATE(1 << 0));
606 }
607 /*!
608 * @result Actual flag value before sleep bit(s) cleared.
609 * Notes that there are no longer threads sleeping on the flag by clearing
610 * sleep bit(s).
611 */
612 FlagType unset_sleeping() {
613 return traits_type::test_then_and((volatile FlagType *)this->get(),
614 ~KMP_BARRIER_SLEEP_STATE(1 << 0));
615 }
616 /*!
617 * @param old_loc in old value of flag
618 * Test whether there are threads sleeping on the flag's old value in old_loc.
619 */
620 bool is_sleeping_val(FlagType old_loc) {
621 return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0);
622 }
623 /*!
624 * Test whether there are threads sleeping on the flag.
625 */
626 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
627 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
628 kmp_uint8 *get_stolen() { return NULL__null; }
629 enum barrier_type get_bt() { return bs_last_barrier; }
630};
631
632template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
633 typedef flag_traits<FlagType> traits_type;
634 FlagType checker; /**< Value to compare flag to to check if flag has been
635 released. */
636 kmp_info_t
637 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
638 kmp_uint32
639 num_waiting_threads; /**< Number of threads sleeping on this thread. */
640public:
641 kmp_basic_flag(std::atomic<FlagType> *p)
642 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
643 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
644 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
645 waiting_threads[0] = thr;
646 }
647 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
648 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
649 num_waiting_threads(0) {}
650 /*!
651 * param i in index into waiting_threads
652 * @result the thread that is waiting at index i
653 */
654 kmp_info_t *get_waiter(kmp_uint32 i) {
655 KMP_DEBUG_ASSERT(i < num_waiting_threads)if (!(i < num_waiting_threads)) { __kmp_debug_assert("i < num_waiting_threads"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 655); }
;
656 return waiting_threads[i];
657 }
658 /*!
659 * @result num_waiting_threads
660 */
661 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
662 /*!
663 * @param thr in the thread which is now waiting
664 *
665 * Insert a waiting thread at index 0.
666 */
667 void set_waiter(kmp_info_t *thr) {
668 waiting_threads[0] = thr;
669 num_waiting_threads = 1;
670 }
671 /*!
672 * @result true if the flag object has been released.
673 */
674 bool done_check() { return this->load() == checker; }
675 /*!
676 * @param old_loc in old value of flag
677 * @result true if the flag's old value indicates it was released.
678 */
679 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
680 /*!
681 * @result true if the flag object is not yet released.
682 * Used in __kmp_wait_template like:
683 * @code
684 * while (flag.notdone_check()) { pause(); }
685 * @endcode
686 */
687 bool notdone_check() { return this->load() != checker; }
688 /*!
689 * @result Actual flag value before release was applied.
690 * Trigger all waiting threads to run by modifying flag to release state.
691 */
692 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4)(this->get())->fetch_add(4, std::memory_order_acq_rel); }
693 /*!
694 * @result Actual flag value before sleep bit(s) set.
695 * Notes that there is at least one thread sleeping on the flag by setting
696 * sleep bit(s).
697 */
698 FlagType set_sleeping() {
699 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_or((1 << 0), std::memory_order_acq_rel
)
;
700 }
701 /*!
702 * @result Actual flag value before sleep bit(s) cleared.
703 * Notes that there are no longer threads sleeping on the flag by clearing
704 * sleep bit(s).
705 */
706 FlagType unset_sleeping() {
707 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE)(this->get())->fetch_and(~(1 << 0), std::memory_order_acq_rel
)
;
708 }
709 /*!
710 * @param old_loc in old value of flag
711 * Test whether there are threads sleeping on the flag's old value in old_loc.
712 */
713 bool is_sleeping_val(FlagType old_loc) {
714 return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0);
715 }
716 /*!
717 * Test whether there are threads sleeping on the flag.
718 */
719 bool is_sleeping() { return is_sleeping_val(this->load()); }
720 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
721 kmp_uint8 *get_stolen() { return NULL__null; }
722 enum barrier_type get_bt() { return bs_last_barrier; }
723};
724
725class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
726public:
727 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
728 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
729 : kmp_basic_flag<kmp_uint32>(p, thr) {}
730 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
731 : kmp_basic_flag<kmp_uint32>(p, c) {}
732 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
733 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
734 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
735 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
736 kmp_int32 is_constrained) {
737 return __kmp_execute_tasks_32(
738 this_thr, gtid, this, final_spin,
739 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
740 }
741 void wait(kmp_info_t *this_thr,
742 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
743 if (final_spin)
744 __kmp_wait_template<kmp_flag_32, TRUE(!0)>(
745 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
746 else
747 __kmp_wait_template<kmp_flag_32, FALSE0>(
748 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
749 }
750 void release() { __kmp_release_template(this); }
751 flag_type get_ptr_type() { return flag32; }
752};
753
754class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
755public:
756 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
757 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
758 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
759 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
760 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
761 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
762 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
763 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
764 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
765 kmp_int32 is_constrained) {
766 return __kmp_execute_tasks_64(
767 this_thr, gtid, this, final_spin,
768 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
769 }
770 void wait(kmp_info_t *this_thr,
771 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj) {
772 if (final_spin)
9
Taking true branch
773 __kmp_wait_template<kmp_flag_64, TRUE(!0)>(
10
Value assigned to 'ompt_callbacks.ompt_callback_sync_region_callback'
774 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
775 else
776 __kmp_wait_template<kmp_flag_64, FALSE0>(
777 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
778 }
779 void release() { __kmp_release_template(this); }
780 flag_type get_ptr_type() { return flag64; }
781};
782
783// Hierarchical 64-bit on-core barrier instantiation
784class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
785 kmp_uint64 checker;
786 kmp_info_t *waiting_threads[1];
787 kmp_uint32 num_waiting_threads;
788 kmp_uint32
789 offset; /**< Portion of flag that is of interest for an operation. */
790 bool flag_switch; /**< Indicates a switch in flag location. */
791 enum barrier_type bt; /**< Barrier type. */
792 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
793 location. */
794#if USE_ITT_BUILD1
795 void *
796 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
797#endif
798 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
799 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc))reinterpret_cast<unsigned char *>(const_cast<kmp_uint64
*>(loc))
)[offset];
800 }
801
802public:
803 kmp_flag_oncore(volatile kmp_uint64 *p)
804 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
805 flag_switch(false) {}
806 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
807 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
808 offset(idx), flag_switch(false) {}
809 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
810 enum barrier_type bar_t,
811 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt), void *itt)
812 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
813 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
814 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)), itt_sync_obj(itt) {}
815 kmp_info_t *get_waiter(kmp_uint32 i) {
816 KMP_DEBUG_ASSERT(i < num_waiting_threads)if (!(i < num_waiting_threads)) { __kmp_debug_assert("i < num_waiting_threads"
, "/build/llvm-toolchain-snapshot-8~svn345461/projects/openmp/runtime/src/kmp_wait_release.h"
, 816); }
;
817 return waiting_threads[i];
818 }
819 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
820 void set_waiter(kmp_info_t *thr) {
821 waiting_threads[0] = thr;
822 num_waiting_threads = 1;
823 }
824 bool done_check_val(kmp_uint64 old_loc) {
825 return byteref(&old_loc, offset) == checker;
826 }
827 bool done_check() { return done_check_val(*get()); }
828 bool notdone_check() {
829 // Calculate flag_switch
830 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG3)
831 flag_switch = true;
832 if (byteref(get(), offset) != 1 && !flag_switch)
833 return true;
834 else if (flag_switch) {
835 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING4;
836 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
837 (kmp_uint64)KMP_BARRIER_STATE_BUMP(1 << 2));
838 __kmp_wait_64(this_thr, &flag, TRUE(!0) USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
839 }
840 return false;
841 }
842 void internal_release() {
843 // Other threads can write their own bytes simultaneously.
844 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME(2147483647)) {
845 byteref(get(), offset) = 1;
846 } else {
847 kmp_uint64 mask = 0;
848 byteref(&mask, offset) = 1;
849 KMP_TEST_THEN_OR64(get(), mask)__sync_fetch_and_or((volatile kmp_uint64 *)(get()), (kmp_uint64
)(mask))
;
850 }
851 }
852 kmp_uint64 set_sleeping() {
853 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE)__sync_fetch_and_or((volatile kmp_uint64 *)(get()), (kmp_uint64
)((1 << 0)))
;
854 }
855 kmp_uint64 unset_sleeping() {
856 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE)__sync_fetch_and_and((volatile kmp_uint64 *)(get()), (kmp_uint64
)(~(1 << 0)))
;
857 }
858 bool is_sleeping_val(kmp_uint64 old_loc) {
859 return old_loc & KMP_BARRIER_SLEEP_STATE(1 << 0);
860 }
861 bool is_sleeping() { return is_sleeping_val(*get()); }
862 bool is_any_sleeping() { return is_sleeping_val(*get()); }
863 void wait(kmp_info_t *this_thr, int final_spin) {
864 if (final_spin)
865 __kmp_wait_template<kmp_flag_oncore, TRUE(!0)>(
866 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
867 else
868 __kmp_wait_template<kmp_flag_oncore, FALSE0>(
869 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj);
870 }
871 void release() { __kmp_release_template(this); }
872 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
873 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
874 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
875 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), void *itt_sync_obj,
876 kmp_int32 is_constrained) {
877 return __kmp_execute_tasks_oncore(
878 this_thr, gtid, this, final_spin,
879 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), itt_sync_obj, is_constrained);
880 }
881 kmp_uint8 *get_stolen() { return NULL__null; }
882 enum barrier_type get_bt() { return bt; }
883 flag_type get_ptr_type() { return flag_oncore; }
884};
885
886// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
887// associated with int gtid.
888static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
889 if (!flag)
890 return;
891
892 switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))reinterpret_cast<kmp_flag_64 *>(const_cast<void *>
(flag))
->get_type()) {
893 case flag32:
894 __kmp_resume_32(gtid, NULL__null);
895 break;
896 case flag64:
897 __kmp_resume_64(gtid, NULL__null);
898 break;
899 case flag_oncore:
900 __kmp_resume_oncore(gtid, NULL__null);
901 break;
902 }
903}
904
905/*!
906@}
907*/
908
909#endif // KMP_WAIT_RELEASE_H