/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp

1

/*

2

* kmp_affinity.cpp -- affinity management

3

*/

4

5

//===----------------------------------------------------------------------===//

6

//

7

// The LLVM Compiler Infrastructure

8

//

9

// This file is dual licensed under the MIT and the University of Illinois Open

10

// Source Licenses. See LICENSE.txt for details.

11

//

12

//===----------------------------------------------------------------------===//

13

14

#include "kmp.h"

15

#include "kmp_affinity.h"

16

#include "kmp_i18n.h"

17

#include "kmp_io.h"

18

#include "kmp_str.h"

19

#include "kmp_wrapper_getpid.h"

20

#if KMP_USE_HIER_SCHED0

21

#include "kmp_dispatch_hier.h"

22

#endif

23

24

// Store the real or imagined machine hierarchy here

25

static hierarchy_info machine_hierarchy;

26

27

void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }

28

29

void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {

30

kmp_uint32 depth;

31

// The test below is true if affinity is available, but set to "none". Need to

32

// init on first use of hierarchical barrier.

33

if (TCR_1(machine_hierarchy.uninitialized)(machine_hierarchy.uninitialized))

34

machine_hierarchy.init(NULL__null, nproc);

35

36

// Adjust the hierarchy in case num threads exceeds original

37

if (nproc > machine_hierarchy.base_num_threads)

38

machine_hierarchy.resize(nproc);

39

40

depth = machine_hierarchy.depth;

41

KMP_DEBUG_ASSERT(depth > 0)((depth > 0) ? 0 : __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 41));

42

43

thr_bar->depth = depth;

44

thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;

45

thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;

46

}

47

48

#if KMP_AFFINITY_SUPPORTED1

49

50

bool KMPAffinity::picked_api = false;

51

52

void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 52); }

53

void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 53); }

54

void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 54); }

55

void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 55); }

56

void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n)___kmp_allocate((n), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 56); }

57

void KMPAffinity::operator delete(void *p) { __kmp_free(p)___kmp_free((p), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 57); }

58

59

void KMPAffinity::pick_api() {

60

KMPAffinity *affinity_dispatch;

61

if (picked_api)

62

return;

63

#if KMP_USE_HWLOC0

64

// Only use Hwloc if affinity isn't explicitly disabled and

65

// user requests Hwloc topology method

66

if (__kmp_affinity_top_method == affinity_top_method_hwloc &&

67

__kmp_affinity_type != affinity_disabled) {

68

affinity_dispatch = new KMPHwlocAffinity();

69

} else

70

#endif

71

{

72

affinity_dispatch = new KMPNativeAffinity();

73

}

74

__kmp_affinity_dispatch = affinity_dispatch;

75

picked_api = true;

76

}

77

78

void KMPAffinity::destroy_api() {

79

if (__kmp_affinity_dispatch != NULL__null) {

80

delete __kmp_affinity_dispatch;

81

__kmp_affinity_dispatch = NULL__null;

82

picked_api = false;

83

}

84

}

85

86

// Print the affinity mask to the character array in a pretty format.

87

char *__kmp_affinity_print_mask(char *buf, int buf_len,

88

kmp_affin_mask_t *mask) {

89

KMP_ASSERT(buf_len >= 40)((buf_len >= 40) ? 0 : __kmp_debug_assert("buf_len >= 40"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 89));

90

char *scan = buf;

91

char *end = buf + buf_len - 1;

92

93

// Find first element / check for empty set.

94

size_t i;

95

i = mask->begin();

96

if (i == mask->end()) {

97

KMP_SNPRINTFsnprintf(scan, end - scan + 1, "{<empty>}");

98

while (*scan != '\0')

99

scan++;

100

KMP_ASSERT(scan <= end)((scan <= end) ? 0 : __kmp_debug_assert("scan <= end", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 100));

101

return buf;

102

}

103

104

KMP_SNPRINTFsnprintf(scan, end - scan + 1, "{%ld", (long)i);

105

while (*scan != '\0')

106

scan++;

107

i++;

108

for (; i != mask->end(); i = mask->next(i)) {

109

if (!KMP_CPU_ISSET(i, mask)(mask)->is_set(i)) {

110

continue;

111

}

112

113

// Check for buffer overflow. A string of the form ",<n>" will have at most

114

// 10 characters, plus we want to leave room to print ",...}" if the set is

115

// too large to print for a total of 15 characters. We already left room for

116

// '\0' in setting end.

117

if (end - scan < 15) {

118

break;

119

}

120

KMP_SNPRINTFsnprintf(scan, end - scan + 1, ",%-ld", (long)i);

121

while (*scan != '\0')

122

scan++;

123

}

124

if (i != mask->end()) {

125

KMP_SNPRINTFsnprintf(scan, end - scan + 1, ",...");

126

while (*scan != '\0')

127

scan++;

128

}

129

KMP_SNPRINTFsnprintf(scan, end - scan + 1, "}");

130

while (*scan != '\0')

131

scan++;

132

KMP_ASSERT(scan <= end)((scan <= end) ? 0 : __kmp_debug_assert("scan <= end", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 132));

133

return buf;

134

}

135

136

void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {

137

KMP_CPU_ZERO(mask)(mask)->zero();

138

139

#if KMP_GROUP_AFFINITY0

140

141

if (__kmp_num_proc_groups > 1) {

142

int group;

143

KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL)((__kmp_GetActiveProcessorCount != __null) ? 0 : __kmp_debug_assert
("__kmp_GetActiveProcessorCount != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 143));

144

for (group = 0; group < __kmp_num_proc_groups; group++) {

145

int i;

146

int num = __kmp_GetActiveProcessorCount(group);

147

for (i = 0; i < num; i++) {

148

KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask)(mask)->set(i + group * (8 * sizeof(DWORD_PTR)));

149

}

150

}

151

} else

152

153

#endif /* KMP_GROUP_AFFINITY */

154

155

{

156

int proc;

157

for (proc = 0; proc < __kmp_xproc; proc++) {

158

KMP_CPU_SET(proc, mask)(mask)->set(proc);

159

}

160

}

161

}

162

163

// When sorting by labels, __kmp_affinity_assign_child_nums() must first be

164

// called to renumber the labels from [0..n] and place them into the child_num

165

// vector of the address object. This is done in case the labels used for

166

// the children at one node of the hierarchy differ from those used for

167

// another node at the same level. Example: suppose the machine has 2 nodes

168

// with 2 packages each. The first node contains packages 601 and 602, and

169

// second node contains packages 603 and 604. If we try to sort the table

170

// for "scatter" affinity, the table will still be sorted 601, 602, 603, 604

171

// because we are paying attention to the labels themselves, not the ordinal

172

// child numbers. By using the child numbers in the sort, the result is

173

// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.

174

static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,

175

int numAddrs) {

176

KMP_DEBUG_ASSERT(numAddrs > 0)((numAddrs > 0) ? 0 : __kmp_debug_assert("numAddrs > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 176));

177

int depth = address2os->first.depth;

178

unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 178);

179

unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 179);

180

int labCt;

181

for (labCt = 0; labCt < depth; labCt++) {

182

address2os[0].first.childNums[labCt] = counts[labCt] = 0;

183

lastLabel[labCt] = address2os[0].first.labels[labCt];

184

}

185

int i;

186

for (i = 1; i < numAddrs; i++) {

187

for (labCt = 0; labCt < depth; labCt++) {

188

if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {

189

int labCt2;

190

for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {

191

counts[labCt2] = 0;

192

lastLabel[labCt2] = address2os[i].first.labels[labCt2];

193

}

194

counts[labCt]++;

195

lastLabel[labCt] = address2os[i].first.labels[labCt];

196

break;

197

}

198

}

199

for (labCt = 0; labCt < depth; labCt++) {

200

address2os[i].first.childNums[labCt] = counts[labCt];

201

}

202

for (; labCt < (int)Address::maxDepth; labCt++) {

203

address2os[i].first.childNums[labCt] = 0;

204

}

205

}

206

__kmp_free(lastLabel)___kmp_free((lastLabel), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 206);

207

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 207);

208

}

209

210

// All of the __kmp_affinity_create_*_map() routines should set

211

// __kmp_affinity_masks to a vector of affinity mask objects of length

212

// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return

213

// the number of levels in the machine topology tree (zero if

214

// __kmp_affinity_type == affinity_none).

215

//

216

// All of the __kmp_affinity_create_*_map() routines should set

217

// *__kmp_affin_fullMask to the affinity mask for the initialization thread.

218

// They need to save and restore the mask, and it could be needed later, so

219

// saving it is just an optimization to avoid calling kmp_get_system_affinity()

220

// again.

221

kmp_affin_mask_t *__kmp_affin_fullMask = NULL__null;

222

223

static int nCoresPerPkg, nPackages;

224

static int __kmp_nThreadsPerCore;

225

#ifndef KMP_DFLT_NTH_CORES

226

static int __kmp_ncores;

227

#endif

228

static int *__kmp_pu_os_idx = NULL__null;

229

230

// __kmp_affinity_uniform_topology() doesn't work when called from

231

// places which support arbitrarily many levels in the machine topology

232

// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()

233

// __kmp_affinity_create_x2apicid_map().

234

inline static bool __kmp_affinity_uniform_topology() {

235

return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);

236

}

237

238

// Print out the detailed machine topology map, i.e. the physical locations

239

// of each OS proc.

240

static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,

241

int depth, int pkgLevel,

242

int coreLevel, int threadLevel) {

243

int proc;

244

245

KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPhysicalThreadMap
, "KMP_AFFINITY"), __kmp_msg_null);

246

for (proc = 0; proc < len; proc++) {

247

int level;

248

kmp_str_buf_t buf;

249

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

250

for (level = 0; level < depth; level++) {

251

if (level == threadLevel) {

252

__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)__kmp_i18n_catgets(kmp_i18n_str_Thread));

253

} else if (level == coreLevel) {

254

__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)__kmp_i18n_catgets(kmp_i18n_str_Core));

255

} else if (level == pkgLevel) {

256

__kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)__kmp_i18n_catgets(kmp_i18n_str_Package));

257

} else if (level > pkgLevel) {

258

__kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node)__kmp_i18n_catgets(kmp_i18n_str_Node),

259

level - pkgLevel - 1);

260

} else {

261

__kmp_str_buf_print(&buf, "L%d ", level);

262

}

263

__kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);

264

}

265

KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack
, "KMP_AFFINITY", address2os[proc].second, buf.str), __kmp_msg_null
)

266

buf.str)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack
, "KMP_AFFINITY", address2os[proc].second, buf.str), __kmp_msg_null
);

267

__kmp_str_buf_free(&buf);

268

}

269

}

270

271

#if KMP_USE_HWLOC0

272

273

static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP, int len,

274

int depth, int *levels) {

275

int proc;

276

kmp_str_buf_t buf;

277

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

278

KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPhysicalThreadMap
, "KMP_AFFINITY"), __kmp_msg_null);

279

for (proc = 0; proc < len; proc++) {

280

__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Package)__kmp_i18n_catgets(kmp_i18n_str_Package),

281

addrP[proc].first.labels[0]);

282

if (depth > 1) {

283

int level = 1; // iterate over levels

284

int label = 1; // iterate over labels

285

if (__kmp_numa_detected)

286

// node level follows package

287

if (levels[level++] > 0)

288

__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Node)__kmp_i18n_catgets(kmp_i18n_str_Node),

289

addrP[proc].first.labels[label++]);

290

if (__kmp_tile_depth > 0)

291

// tile level follows node if any, or package

292

if (levels[level++] > 0)

293

__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Tile)__kmp_i18n_catgets(kmp_i18n_str_Tile),

294

addrP[proc].first.labels[label++]);

295

if (levels[level++] > 0)

296

// core level follows

297

__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Core)__kmp_i18n_catgets(kmp_i18n_str_Core),

298

addrP[proc].first.labels[label++]);

299

if (levels[level++] > 0)

300

// thread level is the latest

301

__kmp_str_buf_print(&buf, "%s %d ", KMP_I18N_STR(Thread)__kmp_i18n_catgets(kmp_i18n_str_Thread),

302

addrP[proc].first.labels[label++]);

303

KMP_DEBUG_ASSERT(label == depth)((label == depth) ? 0 : __kmp_debug_assert("label == depth", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 303));

304

}

305

KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", addrP[proc].second, buf.str)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcMapToPack
, "KMP_AFFINITY", addrP[proc].second, buf.str), __kmp_msg_null
);

306

__kmp_str_buf_clear(&buf);

307

}

308

__kmp_str_buf_free(&buf);

309

}

310

311

static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;

312

313

// This function removes the topology levels that are radix 1 and don't offer

314

// further information about the topology. The most common example is when you

315

// have one thread context per core, we don't want the extra thread context

316

// level if it offers no unique labels. So they are removed.

317

// return value: the new depth of address2os

318

static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP, int nTh,

319

int depth, int *levels) {

320

int level;

321

int i;

322

int radix1_detected;

323

int new_depth = depth;

324

for (level = depth - 1; level > 0; --level) {

325

// Detect if this level is radix 1

326

radix1_detected = 1;

327

for (i = 1; i < nTh; ++i) {

328

if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {

329

// There are differing label values for this level so it stays

330

radix1_detected = 0;

331

break;

332

}

333

}

334

if (!radix1_detected)

335

continue;

336

// Radix 1 was detected

337

--new_depth;

338

levels[level] = -1; // mark level as not present in address2os array

339

if (level == new_depth) {

340

// "turn off" deepest level, just decrement the depth that removes

341

// the level from address2os array

342

for (i = 0; i < nTh; ++i) {

343

addrP[i].first.depth--;

344

}

345

} else {

346

// For other levels, we move labels over and also reduce the depth

347

int j;

348

for (j = level; j < new_depth; ++j) {

349

for (i = 0; i < nTh; ++i) {

350

addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];

351

addrP[i].first.depth--;

352

}

353

levels[j + 1] -= 1;

354

}

355

}

356

}

357

return new_depth;

358

}

359

360

// Returns the number of objects of type 'type' below 'obj' within the topology

361

// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is

362

// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET

363

// object.

364

static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,

365

hwloc_obj_type_t type) {

366

int retval = 0;

367

hwloc_obj_t first;

368

for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,

369

obj->logical_index, type, 0);

370

first != NULL__null &&

371

hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==

372

obj;

373

first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,

374

first)) {

375

++retval;

376

}

377

return retval;

378

}

379

380

static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,

381

hwloc_obj_t o, unsigned depth,

382

hwloc_obj_t *f) {

383

if (o->depth == depth) {

384

if (*f == NULL__null)

385

*f = o; // output first descendant found

386

return 1;

387

}

388

int sum = 0;

389

for (unsigned i = 0; i < o->arity; i++)

390

sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);

391

return sum; // will be 0 if no one found (as PU arity is 0)

392

}

393

394

static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,

395

hwloc_obj_type_t type,

396

hwloc_obj_t *f) {

397

if (!hwloc_compare_types(o->type, type)) {

398

if (*f == NULL__null)

399

*f = o; // output first descendant found

400

return 1;

401

}

402

int sum = 0;

403

for (unsigned i = 0; i < o->arity; i++)

404

sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);

405

return sum; // will be 0 if no one found (as PU arity is 0)

406

}

407

408

static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,

409

int &nActiveThreads,

410

int &num_active_cores,

411

hwloc_obj_t obj, int depth,

412

int *labels) {

413

hwloc_obj_t core = NULL__null;

414

hwloc_topology_t &tp = __kmp_hwloc_topology;

415

int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);

416

for (int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {

417

hwloc_obj_t pu = NULL__null;

418

KMP_DEBUG_ASSERT(core != NULL)((core != __null) ? 0 : __kmp_debug_assert("core != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 418));

419

int num_active_threads = 0;

420

int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);

421

// int NT = core->arity; pu = core->first_child; // faster?

422

for (int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {

423

KMP_DEBUG_ASSERT(pu != NULL)((pu != __null) ? 0 : __kmp_debug_assert("pu != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 423));

424

if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(pu->os_index))

425

continue; // skip inactive (inaccessible) unit

426

Address addr(depth + 2);

427

KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n"
, obj->os_index, obj->logical_index, core->os_index,
core->logical_index, pu->os_index, pu->logical_index
); }

428

obj->os_index, obj->logical_index, core->os_index,if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n"
, obj->os_index, obj->logical_index, core->os_index,
core->logical_index, pu->os_index, pu->logical_index
); }

429

core->logical_index, pu->os_index, pu->logical_index))if (kmp_a_debug >= 20) { __kmp_debug_printf ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n"
, obj->os_index, obj->logical_index, core->os_index,
core->logical_index, pu->os_index, pu->logical_index
); };

430

for (int i = 0; i < depth; ++i)

431

addr.labels[i] = labels[i]; // package, etc.

432

addr.labels[depth] = core_id; // core

433

addr.labels[depth + 1] = pu_id; // pu

434

addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);

435

__kmp_pu_os_idx[nActiveThreads] = pu->os_index;

436

nActiveThreads++;

437

++num_active_threads; // count active threads per core

438

}

439

if (num_active_threads) { // were there any active threads on the core?

440

++__kmp_ncores; // count total active cores

441

++num_active_cores; // count active cores per socket

442

if (num_active_threads > __kmp_nThreadsPerCore)

443

__kmp_nThreadsPerCore = num_active_threads; // calc maximum

444

}

445

}

446

return 0;

447

}

448

449

// Check if NUMA node detected below the package,

450

// and if tile object is detected and return its depth

451

static int __kmp_hwloc_check_numa() {

452

hwloc_topology_t &tp = __kmp_hwloc_topology;

453

hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)

454

int depth;

455

456

// Get some PU

457

hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);

458

if (hT == NULL__null) // something has gone wrong

459

return 1;

460

461

// check NUMA node below PACKAGE

462

hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);

463

hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);

464

KMP_DEBUG_ASSERT(hS != NULL)((hS != __null) ? 0 : __kmp_debug_assert("hS != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 464));

465

if (hN != NULL__null && hN->depth > hS->depth) {

466

__kmp_numa_detected = TRUE(!0); // socket includes node(s)

467

if (__kmp_affinity_gran == affinity_gran_node) {

468

__kmp_affinity_gran == affinity_gran_numa;

469

}

470

}

471

472

// check tile, get object by depth because of multiple caches possible

473

depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);

474

hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);

475

hC = NULL__null; // not used, but reset it here just in case

476

if (hL != NULL__null &&

477

__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)

478

__kmp_tile_depth = depth; // tile consists of multiple cores

479

return 0;

480

}

481

482

static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,

483

kmp_i18n_id_t *const msg_id) {

484

hwloc_topology_t &tp = __kmp_hwloc_topology; // shortcut of a long name

485

*address2os = NULL__null;

486

*msg_id = kmp_i18n_null;

487

488

// Save the affinity mask for the current thread.

489

kmp_affin_mask_t *oldMask;

490

KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask());

491

__kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0));

492

__kmp_hwloc_check_numa();

493

494

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

495

// Hack to try and infer the machine topology using only the data

496

// available from cpuid on the current thread, and __kmp_xproc.

497

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 497));

498

499

nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(

500

hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);

501

__kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(

502

hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);

503

__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;

504

nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;

505

if (__kmp_affinity_verbose) {

506

KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuidL11
, "KMP_AFFINITY"), __kmp_msg_null);

507

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

508

if (__kmp_affinity_uniform_topology()) {

509

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

510

} else {

511

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

512

}

513

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

514

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

515

}

516

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

517

return 0;

518

}

519

520

int depth = 3;

521

int levels[5] = {0, 1, 2, 3, 4}; // package, [node,] [tile,] core, thread

522

int labels[3] = {0}; // package [,node] [,tile] - head of lables array

523

if (__kmp_numa_detected)

524

++depth;

525

if (__kmp_tile_depth)

526

++depth;

527

528

// Allocate the data structure to be returned.

529

AddrUnsPair *retval =

530

(AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 530);

531

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 531));

532

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 532);

533

534

// When affinity is off, this routine will still be called to set

535

// __kmp_ncores, as well as __kmp_nThreadsPerCore,

536

// nCoresPerPkg, & nPackages. Make sure all these vars are set

537

// correctly, and return if affinity is not enabled.

538

539

hwloc_obj_t socket, node, tile;

540

int nActiveThreads = 0;

541

int socket_id = 0;

542

// re-calculate globals to count only accessible resources

543

__kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;

544

nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;

545

for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL__null;

546

socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),

547

socket_id++) {

548

labels[0] = socket_id;

549

if (__kmp_numa_detected) {

550

int NN;

551

int n_active_nodes = 0;

552

node = NULL__null;

553

NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,

554

&node);

555

for (int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {

556

labels[1] = node_id;

557

if (__kmp_tile_depth) {

558

// NUMA + tiles

559

int NT;

560

int n_active_tiles = 0;

561

tile = NULL__null;

562

NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,

563

&tile);

564

for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {

565

labels[2] = tl_id;

566

int n_active_cores = 0;

567

__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,

568

n_active_cores, tile, 3, labels);

569

if (n_active_cores) { // were there any active cores on the socket?

570

++n_active_tiles; // count active tiles per node

571

if (n_active_cores > nCorePerTile)

572

nCorePerTile = n_active_cores; // calc maximum

573

}

574

}

575

if (n_active_tiles) { // were there any active tiles on the socket?

576

++n_active_nodes; // count active nodes per package

577

if (n_active_tiles > nTilePerNode)

578

nTilePerNode = n_active_tiles; // calc maximum

579

}

580

} else {

581

// NUMA, no tiles

582

int n_active_cores = 0;

583

__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,

584

n_active_cores, node, 2, labels);

585

if (n_active_cores) { // were there any active cores on the socket?

586

++n_active_nodes; // count active nodes per package

587

if (n_active_cores > nCorePerNode)

588

nCorePerNode = n_active_cores; // calc maximum

589

}

590

}

591

}

592

if (n_active_nodes) { // were there any active nodes on the socket?

593

++nPackages; // count total active packages

594

if (n_active_nodes > nNodePerPkg)

595

nNodePerPkg = n_active_nodes; // calc maximum

596

}

597

} else {

598

if (__kmp_tile_depth) {

599

// no NUMA, tiles

600

int NT;

601

int n_active_tiles = 0;

602

tile = NULL__null;

603

NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,

604

&tile);

605

for (int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {

606

labels[1] = tl_id;

607

int n_active_cores = 0;

608

__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,

609

n_active_cores, tile, 2, labels);

610

if (n_active_cores) { // were there any active cores on the socket?

611

++n_active_tiles; // count active tiles per package

612

if (n_active_cores > nCorePerTile)

613

nCorePerTile = n_active_cores; // calc maximum

614

}

615

}

616

if (n_active_tiles) { // were there any active tiles on the socket?

617

++nPackages; // count total active packages

618

if (n_active_tiles > nTilePerPkg)

619

nTilePerPkg = n_active_tiles; // calc maximum

620

}

621

} else {

622

// no NUMA, no tiles

623

int n_active_cores = 0;

624

__kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,

625

socket, 1, labels);

626

if (n_active_cores) { // were there any active cores on the socket?

627

++nPackages; // count total active packages

628

if (n_active_cores > nCoresPerPkg)

629

nCoresPerPkg = n_active_cores; // calc maximum

630

}

631

}

632

}

633

}

634

635

// If there's only one thread context to bind to, return now.

636

KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc)((nActiveThreads == __kmp_avail_proc) ? 0 : __kmp_debug_assert
("nActiveThreads == __kmp_avail_proc", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 636));

637

KMP_ASSERT(nActiveThreads > 0)((nActiveThreads > 0) ? 0 : __kmp_debug_assert("nActiveThreads > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 637));

638

if (nActiveThreads == 1) {

639

__kmp_ncores = nPackages = 1;

640

__kmp_nThreadsPerCore = nCoresPerPkg = 1;

641

if (__kmp_affinity_verbose) {

642

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

643

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

644

645

KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc
, "KMP_AFFINITY"), __kmp_msg_null);

646

if (__kmp_affinity_respect_mask) {

647

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

648

} else {

649

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

650

}

651

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

652

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

653

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

654

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

655

}

656

657

if (__kmp_affinity_type == affinity_none) {

658

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 658);

659

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

660

return 0;

661

}

662

663

// Form an Address object which only includes the package level.

664

Address addr(1);

665

addr.labels[0] = retval[0].first.labels[0];

666

retval[0].first = addr;

667

668

if (__kmp_affinity_gran_levels < 0) {

669

__kmp_affinity_gran_levels = 0;

670

}

671

672

if (__kmp_affinity_verbose) {

673

__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);

674

}

675

676

*address2os = retval;

677

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

678

return 1;

679

}

680

681

// Sort the table by physical Id.

682

qsort(retval, nActiveThreads, sizeof(*retval),

683

__kmp_affinity_cmp_Address_labels);

684

685

// Check to see if the machine topology is uniform

686

int nPUs = nPackages * __kmp_nThreadsPerCore;

687

if (__kmp_numa_detected) {

688

if (__kmp_tile_depth) { // NUMA + tiles

689

nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);

690

} else { // NUMA, no tiles

691

nPUs *= (nNodePerPkg * nCorePerNode);

692

}

693

} else {

694

if (__kmp_tile_depth) { // no NUMA, tiles

695

nPUs *= (nTilePerPkg * nCorePerTile);

696

} else { // no NUMA, no tiles

697

nPUs *= nCoresPerPkg;

698

}

699

}

700

unsigned uniform = (nPUs == nActiveThreads);

701

702

// Print the machine topology summary.

703

if (__kmp_affinity_verbose) {

704

char mask[KMP_AFFIN_MASK_PRINT_LEN1024];

705

__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

706

if (__kmp_affinity_respect_mask) {

707

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", mask), __kmp_msg_null);

708

} else {

709

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", mask), __kmp_msg_null);

710

}

711

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

712

if (uniform) {

713

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

714

} else {

715

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

716

}

717

if (__kmp_numa_detected) {

718

if (__kmp_tile_depth) { // NUMA + tiles

719

KMP_INFORM(TopologyExtraNoTi, "KMP_AFFINITY", nPackages, nNodePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi
, "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile
, __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null)

720

nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi
, "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile
, __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null)

721

__kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNoTi
, "KMP_AFFINITY", nPackages, nNodePerPkg, nTilePerNode, nCorePerTile
, __kmp_nThreadsPerCore, __kmp_ncores), __kmp_msg_null);

722

} else { // NUMA, no tiles

723

KMP_INFORM(TopologyExtraNode, "KMP_AFFINITY", nPackages, nNodePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNode
, "KMP_AFFINITY", nPackages, nNodePerPkg, nCorePerNode, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

724

nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraNode
, "KMP_AFFINITY", nPackages, nNodePerPkg, nCorePerNode, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

725

nPUs *= (nNodePerPkg * nCorePerNode);

726

}

727

} else {

728

if (__kmp_tile_depth) { // no NUMA, tiles

729

KMP_INFORM(TopologyExtraTile, "KMP_AFFINITY", nPackages, nTilePerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraTile
, "KMP_AFFINITY", nPackages, nTilePerPkg, nCorePerTile, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

730

nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtraTile
, "KMP_AFFINITY", nPackages, nTilePerPkg, nCorePerTile, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

731

} else { // no NUMA, no tiles

732

kmp_str_buf_t buf;

733

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

734

__kmp_str_buf_print(&buf, "%d", nPackages);

735

KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

736

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

737

__kmp_str_buf_free(&buf);

738

}

739

}

740

}

741

742

if (__kmp_affinity_type == affinity_none) {

743

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 743);

744

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

745

return 0;

746

}

747

748

int depth_full = depth; // number of levels before compressing

749

// Find any levels with radiix 1, and remove them from the map

750

// (except for the package level).

751

depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,

752

levels);

753

KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default)((__kmp_affinity_gran != affinity_gran_default) ? 0 : __kmp_debug_assert
("__kmp_affinity_gran != affinity_gran_default", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 753));

754

if (__kmp_affinity_gran_levels < 0) {

755

// Set the granularity level based on what levels are modeled

756

// in the machine topology map.

757

__kmp_affinity_gran_levels = 0; // lowest level (e.g. fine)

758

if (__kmp_affinity_gran > affinity_gran_thread) {

759

for (int i = 1; i <= depth_full; ++i) {

760

if (__kmp_affinity_gran <= i) // only count deeper levels

761

break;

762

if (levels[depth_full - i] > 0)

763

__kmp_affinity_gran_levels++;

764

}

765

}

766

if (__kmp_affinity_gran > affinity_gran_package)

767

__kmp_affinity_gran_levels++; // e.g. granularity = group

768

}

769

770

if (__kmp_affinity_verbose)

771

__kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);

772

773

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

774

*address2os = retval;

775

return depth;

776

}

777

#endif // KMP_USE_HWLOC

778

779

// If we don't know how to retrieve the machine's processor topology, or

780

// encounter an error in doing so, this routine is called to form a "flat"

781

// mapping of os thread id's <-> processor id's.

782

static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,

783

kmp_i18n_id_t *const msg_id) {

784

*address2os = NULL__null;

785

*msg_id = kmp_i18n_null;

786

787

// Even if __kmp_affinity_type == affinity_none, this routine might still

788

// called to set __kmp_ncores, as well as

789

// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.

790

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

791

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 791));

792

__kmp_ncores = nPackages = __kmp_xproc;

793

__kmp_nThreadsPerCore = nCoresPerPkg = 1;

794

if (__kmp_affinity_verbose) {

795

KMP_INFORM(AffFlatTopology, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffFlatTopology
, "KMP_AFFINITY"), __kmp_msg_null);

796

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

797

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

798

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

799

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

800

}

801

return 0;

802

}

803

804

// When affinity is off, this routine will still be called to set

805

// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.

806

// Make sure all these vars are set correctly, and return now if affinity is

807

// not enabled.

808

__kmp_ncores = nPackages = __kmp_avail_proc;

809

__kmp_nThreadsPerCore = nCoresPerPkg = 1;

810

if (__kmp_affinity_verbose) {

811

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

812

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,

813

__kmp_affin_fullMask);

814

815

KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseFlat
, "KMP_AFFINITY"), __kmp_msg_null);

816

if (__kmp_affinity_respect_mask) {

817

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

818

} else {

819

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

820

}

821

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

822

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

823

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

824

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

825

}

826

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 826));

827

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 827);

828

if (__kmp_affinity_type == affinity_none) {

829

int avail_ct = 0;

830

int i;

831

KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); i != (__kmp_affin_fullMask
)->end(); i = (__kmp_affin_fullMask)->next(i)) {

832

if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i))

833

continue;

834

__kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat

835

}

836

return 0;

837

}

838

839

// Contruct the data structure to be returned.

840

*address2os =

841

(AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc)___kmp_allocate((sizeof(**address2os) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 841);

842

int avail_ct = 0;

843

unsigned int i;

844

KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); i != (__kmp_affin_fullMask
)->end(); i = (__kmp_affin_fullMask)->next(i)) {

845

// Skip this proc if it is not included in the machine model.

846

if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {

847

continue;

848

}

849

__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat

850

Address addr(1);

851

addr.labels[0] = i;

852

(*address2os)[avail_ct++] = AddrUnsPair(addr, i);

853

}

854

if (__kmp_affinity_verbose) {

855

KMP_INFORM(OSProcToPackage, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_OSProcToPackage
, "KMP_AFFINITY"), __kmp_msg_null);

856

}

857

858

if (__kmp_affinity_gran_levels < 0) {

859

// Only the package level is modeled in the machine topology map,

860

// so the #levels of granularity is either 0 or 1.

861

if (__kmp_affinity_gran > affinity_gran_package) {

862

__kmp_affinity_gran_levels = 1;

863

} else {

864

__kmp_affinity_gran_levels = 0;

865

}

866

}

867

return 1;

868

}

869

870

#if KMP_GROUP_AFFINITY0

871

872

// If multiple Windows* OS processor groups exist, we can create a 2-level

873

// topology map with the groups at level 0 and the individual procs at level 1.

874

// This facilitates letting the threads float among all procs in a group,

875

// if granularity=group (the default when there are multiple groups).

876

static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,

877

kmp_i18n_id_t *const msg_id) {

878

*address2os = NULL__null;

879

*msg_id = kmp_i18n_null;

880

881

// If we aren't affinity capable, then return now.

882

// The flat mapping will be used.

883

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

884

// FIXME set *msg_id

885

return -1;

886

}

887

888

// Contruct the data structure to be returned.

889

*address2os =

890

(AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc)___kmp_allocate((sizeof(**address2os) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 890);

891

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 891));

892

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 892);

893

int avail_ct = 0;

894

int i;

895

KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); i != (__kmp_affin_fullMask
)->end(); i = (__kmp_affin_fullMask)->next(i)) {

896

// Skip this proc if it is not included in the machine model.

897

if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {

898

continue;

899

}

900

__kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat

901

Address addr(2);

902

addr.labels[0] = i / (CHAR_BIT8 * sizeof(DWORD_PTR));

903

addr.labels[1] = i % (CHAR_BIT8 * sizeof(DWORD_PTR));

904

(*address2os)[avail_ct++] = AddrUnsPair(addr, i);

905

906

if (__kmp_affinity_verbose) {

907

KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffOSProcToGroup
, "KMP_AFFINITY", i, addr.labels[0], addr.labels[1]), __kmp_msg_null
)

908

addr.labels[1])__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffOSProcToGroup
, "KMP_AFFINITY", i, addr.labels[0], addr.labels[1]), __kmp_msg_null
);

909

}

910

}

911

912

if (__kmp_affinity_gran_levels < 0) {

913

if (__kmp_affinity_gran == affinity_gran_group) {

914

__kmp_affinity_gran_levels = 1;

915

} else if ((__kmp_affinity_gran == affinity_gran_fine) ||

916

(__kmp_affinity_gran == affinity_gran_thread)) {

917

__kmp_affinity_gran_levels = 0;

918

} else {

919

const char *gran_str = NULL__null;

920

if (__kmp_affinity_gran == affinity_gran_core) {

921

gran_str = "core";

922

} else if (__kmp_affinity_gran == affinity_gran_package) {

923

gran_str = "package";

924

} else if (__kmp_affinity_gran == affinity_gran_node) {

925

gran_str = "node";

926

} else {

927

KMP_ASSERT(0)((0) ? 0 : __kmp_debug_assert("0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 927));

928

}

929

930

// Warning: can't use affinity granularity \"gran\" with group topology

931

// method, using "thread"

932

__kmp_affinity_gran_levels = 0;

933

}

934

}

935

return 2;

936

}

937

938

#endif /* KMP_GROUP_AFFINITY */

939

940

#if KMP_ARCH_X860 || KMP_ARCH_X86_641

941

942

static int __kmp_cpuid_mask_width(int count) {

943

int r = 0;

944

945

while ((1 << r) < count)

946

++r;

947

return r;

948

}

949

950

class apicThreadInfo {

951

public:

952

unsigned osId; // param to __kmp_affinity_bind_thread

953

unsigned apicId; // from cpuid after binding

954

unsigned maxCoresPerPkg; // ""

955

unsigned maxThreadsPerPkg; // ""

956

unsigned pkgId; // inferred from above values

957

unsigned coreId; // ""

958

unsigned threadId; // ""

959

};

960

961

static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,

962

const void *b) {

963

const apicThreadInfo *aa = (const apicThreadInfo *)a;

964

const apicThreadInfo *bb = (const apicThreadInfo *)b;

965

if (aa->osId < bb->osId)

966

return -1;

967

if (aa->osId > bb->osId)

968

return 1;

969

return 0;

970

}

971

972

static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,

973

const void *b) {

974

const apicThreadInfo *aa = (const apicThreadInfo *)a;

975

const apicThreadInfo *bb = (const apicThreadInfo *)b;

976

if (aa->pkgId < bb->pkgId)

977

return -1;

978

if (aa->pkgId > bb->pkgId)

979

return 1;

980

if (aa->coreId < bb->coreId)

981

return -1;

982

if (aa->coreId > bb->coreId)

983

return 1;

984

if (aa->threadId < bb->threadId)

985

return -1;

986

if (aa->threadId > bb->threadId)

987

return 1;

988

return 0;

989

}

990

991

// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use

992

// an algorithm which cycles through the available os threads, setting

993

// the current thread's affinity mask to that thread, and then retrieves

994

// the Apic Id for each thread context using the cpuid instruction.

995

static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,

996

kmp_i18n_id_t *const msg_id) {

997

kmp_cpuid buf;

998

int rc;

999

*address2os = NULL__null;

1000

*msg_id = kmp_i18n_null;

1001

1002

// Check if cpuid leaf 4 is supported.

1003

__kmp_x86_cpuid(0, 0, &buf);

1004

if (buf.eax < 4) {

1005

*msg_id = kmp_i18n_str_NoLeaf4Support;

1006

return -1;

1007

}

1008

1009

// The algorithm used starts by setting the affinity to each available thread

1010

// and retrieving info from the cpuid instruction, so if we are not capable of

1011

// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we

1012

// need to do something else - use the defaults that we calculated from

1013

// issuing cpuid without binding to each proc.

1014

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

1015

// Hack to try and infer the machine topology using only the data

1016

// available from cpuid on the current thread, and __kmp_xproc.

1017

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1017));

1018

1019

// Get an upper bound on the number of threads per package using cpuid(1).

1020

// On some OS/chps combinations where HT is supported by the chip but is

1021

// disabled, this value will be 2 on a single core chip. Usually, it will be

1022

// 2 if HT is enabled and 1 if HT is disabled.

1023

__kmp_x86_cpuid(1, 0, &buf);

1024

int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;

1025

if (maxThreadsPerPkg == 0) {

1026

maxThreadsPerPkg = 1;

1027

}

1028

1029

// The num cores per pkg comes from cpuid(4). 1 must be added to the encoded

1030

// value.

1031

//

1032

// The author of cpu_count.cpp treated this only an upper bound on the

1033

// number of cores, but I haven't seen any cases where it was greater than

1034

// the actual number of cores, so we will treat it as exact in this block of

1035

// code.

1036

//

1037

// First, we need to check if cpuid(4) is supported on this chip. To see if

1038

// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or

1039

// greater.

1040

__kmp_x86_cpuid(0, 0, &buf);

1041

if (buf.eax >= 4) {

1042

__kmp_x86_cpuid(4, 0, &buf);

1043

nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;

1044

} else {

1045

nCoresPerPkg = 1;

1046

}

1047

1048

// There is no way to reliably tell if HT is enabled without issuing the

1049

// cpuid instruction from every thread, can correlating the cpuid info, so

1050

// if the machine is not affinity capable, we assume that HT is off. We have

1051

// seen quite a few machines where maxThreadsPerPkg is 2, yet the machine

1052

// does not support HT.

1053

//

1054

// - Older OSes are usually found on machines with older chips, which do not

1055

// support HT.

1056

// - The performance penalty for mistakenly identifying a machine as HT when

1057

// it isn't (which results in blocktime being incorrecly set to 0) is

1058

// greater than the penalty when for mistakenly identifying a machine as

1059

// being 1 thread/core when it is really HT enabled (which results in

1060

// blocktime being incorrectly set to a positive value).

1061

__kmp_ncores = __kmp_xproc;

1062

nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;

1063

__kmp_nThreadsPerCore = 1;

1064

if (__kmp_affinity_verbose) {

1065

KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuid
, "KMP_AFFINITY"), __kmp_msg_null);

1066

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1067

if (__kmp_affinity_uniform_topology()) {

1068

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1069

} else {

1070

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

1071

}

1072

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1073

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1074

}

1075

return 0;

1076

}

1077

1078

// From here on, we can assume that it is safe to call

1079

// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if

1080

// __kmp_affinity_type = affinity_none.

1081

1082

// Save the affinity mask for the current thread.

1083

kmp_affin_mask_t *oldMask;

1084

KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask());

1085

KMP_ASSERT(oldMask != NULL)((oldMask != __null) ? 0 : __kmp_debug_assert("oldMask != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1085));

1086

__kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0));

1087

1088

// Run through each of the available contexts, binding the current thread

1089

// to it, and obtaining the pertinent information using the cpuid instr.

1090

//

1091

// The relevant information is:

1092

// - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context

1093

// has a uniqie Apic Id, which is of the form pkg# : core# : thread#.

1094

// - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value

1095

// of this field determines the width of the core# + thread# fields in the

1096

// Apic Id. It is also an upper bound on the number of threads per

1097

// package, but it has been verified that situations happen were it is not

1098

// exact. In particular, on certain OS/chip combinations where Intel(R)

1099

// Hyper-Threading Technology is supported by the chip but has been

1100

// disabled, the value of this field will be 2 (for a single core chip).

1101

// On other OS/chip combinations supporting Intel(R) Hyper-Threading

1102

// Technology, the value of this field will be 1 when Intel(R)

1103

// Hyper-Threading Technology is disabled and 2 when it is enabled.

1104

// - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value

1105

// of this field (+1) determines the width of the core# field in the Apic

1106

// Id. The comments in "cpucount.cpp" say that this value is an upper

1107

// bound, but the IA-32 architecture manual says that it is exactly the

1108

// number of cores per package, and I haven't seen any case where it

1109

// wasn't.

1110

//

1111

// From this information, deduce the package Id, core Id, and thread Id,

1112

// and set the corresponding fields in the apicThreadInfo struct.

1113

unsigned i;

1114

apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1115)

1115

__kmp_avail_proc * sizeof(apicThreadInfo))___kmp_allocate((__kmp_avail_proc * sizeof(apicThreadInfo)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1115);

1116

unsigned nApics = 0;

1117

KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); i != (__kmp_affin_fullMask
)->end(); i = (__kmp_affin_fullMask)->next(i)) {

1118

// Skip this proc if it is not included in the machine model.

1119

if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {

1120

continue;

1121

}

1122

KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc)(((int)nApics < __kmp_avail_proc) ? 0 : __kmp_debug_assert
("(int)nApics < __kmp_avail_proc", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1122));

1123

1124

__kmp_affinity_dispatch->bind_thread(i);

1125

threadInfo[nApics].osId = i;

1126

1127

// The apic id and max threads per pkg come from cpuid(1).

1128

__kmp_x86_cpuid(1, 0, &buf);

1129

if (((buf.edx >> 9) & 1) == 0) {

1130

__kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0));

1131

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1131);

1132

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1133

*msg_id = kmp_i18n_str_ApicNotPresent;

1134

return -1;

1135

}

1136

threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;

1137

threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;

1138

if (threadInfo[nApics].maxThreadsPerPkg == 0) {

1139

threadInfo[nApics].maxThreadsPerPkg = 1;

1140

}

1141

1142

// Max cores per pkg comes from cpuid(4). 1 must be added to the encoded

1143

// value.

1144

//

1145

// First, we need to check if cpuid(4) is supported on this chip. To see if

1146

// cpuid(n) is supported, issue cpuid(0) and check if eax has the value n

1147

// or greater.

1148

__kmp_x86_cpuid(0, 0, &buf);

1149

if (buf.eax >= 4) {

1150

__kmp_x86_cpuid(4, 0, &buf);

1151

threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;

1152

} else {

1153

threadInfo[nApics].maxCoresPerPkg = 1;

1154

}

1155

1156

// Infer the pkgId / coreId / threadId using only the info obtained locally.

1157

int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);

1158

threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;

1159

1160

int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);

1161

int widthT = widthCT - widthC;

1162

if (widthT < 0) {

1163

// I've never seen this one happen, but I suppose it could, if the cpuid

1164

// instruction on a chip was really screwed up. Make sure to restore the

1165

// affinity mask before the tail call.

1166

__kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0));

1167

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1167);

1168

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1169

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1170

return -1;

1171

}

1172

1173

int maskC = (1 << widthC) - 1;

1174

threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;

1175

1176

int maskT = (1 << widthT) - 1;

1177

threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;

1178

1179

nApics++;

1180

}

1181

1182

// We've collected all the info we need.

1183

// Restore the old affinity mask for this thread.

1184

__kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0));

1185

1186

// If there's only one thread context to bind to, form an Address object

1187

// with depth 1 and return immediately (or, if affinity is off, set

1188

// address2os to NULL and return).

1189

//

1190

// If it is configured to omit the package level when there is only a single

1191

// package, the logic at the end of this routine won't work if there is only

1192

// a single thread - it would try to form an Address object with depth 0.

1193

KMP_ASSERT(nApics > 0)((nApics > 0) ? 0 : __kmp_debug_assert("nApics > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1193));

1194

if (nApics == 1) {

1195

__kmp_ncores = nPackages = 1;

1196

__kmp_nThreadsPerCore = nCoresPerPkg = 1;

1197

if (__kmp_affinity_verbose) {

1198

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

1199

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

1200

1201

KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuid
, "KMP_AFFINITY"), __kmp_msg_null);

1202

if (__kmp_affinity_respect_mask) {

1203

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1204

} else {

1205

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1206

}

1207

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1208

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1209

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1210

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1211

}

1212

1213

if (__kmp_affinity_type == affinity_none) {

1214

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1214);

1215

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1216

return 0;

1217

}

1218

1219

*address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair))___kmp_allocate((sizeof(AddrUnsPair)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1219);

1220

Address addr(1);

1221

addr.labels[0] = threadInfo[0].pkgId;

1222

(*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);

1223

1224

if (__kmp_affinity_gran_levels < 0) {

1225

__kmp_affinity_gran_levels = 0;

1226

}

1227

1228

if (__kmp_affinity_verbose) {

1229

__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);

1230

}

1231

1232

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1232);

1233

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1234

return 1;

1235

}

1236

1237

// Sort the threadInfo table by physical Id.

1238

qsort(threadInfo, nApics, sizeof(*threadInfo),

1239

__kmp_affinity_cmp_apicThreadInfo_phys_id);

1240

1241

// The table is now sorted by pkgId / coreId / threadId, but we really don't

1242

// know the radix of any of the fields. pkgId's may be sparsely assigned among

1243

// the chips on a system. Although coreId's are usually assigned

1244

// [0 .. coresPerPkg-1] and threadId's are usually assigned

1245

// [0..threadsPerCore-1], we don't want to make any such assumptions.

1246

//

1247

// For that matter, we don't know what coresPerPkg and threadsPerCore (or the

1248

// total # packages) are at this point - we want to determine that now. We

1249

// only have an upper bound on the first two figures.

1250

//

1251

// We also perform a consistency check at this point: the values returned by

1252

// the cpuid instruction for any thread bound to a given package had better

1253

// return the same info for maxThreadsPerPkg and maxCoresPerPkg.

1254

nPackages = 1;

1255

nCoresPerPkg = 1;

1256

__kmp_nThreadsPerCore = 1;

1257

unsigned nCores = 1;

1258

1259

unsigned pkgCt = 1; // to determine radii

1260

unsigned lastPkgId = threadInfo[0].pkgId;

1261

unsigned coreCt = 1;

1262

unsigned lastCoreId = threadInfo[0].coreId;

1263

unsigned threadCt = 1;

1264

unsigned lastThreadId = threadInfo[0].threadId;

1265

1266

// intra-pkg consist checks

1267

unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;

1268

unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;

1269

1270

for (i = 1; i < nApics; i++) {

1271

if (threadInfo[i].pkgId != lastPkgId) {

1272

nCores++;

1273

pkgCt++;

1274

lastPkgId = threadInfo[i].pkgId;

1275

if ((int)coreCt > nCoresPerPkg)

1276

nCoresPerPkg = coreCt;

1277

coreCt = 1;

1278

lastCoreId = threadInfo[i].coreId;

1279

if ((int)threadCt > __kmp_nThreadsPerCore)

1280

__kmp_nThreadsPerCore = threadCt;

1281

threadCt = 1;

1282

lastThreadId = threadInfo[i].threadId;

1283

1284

// This is a different package, so go on to the next iteration without

1285

// doing any consistency checks. Reset the consistency check vars, though.

1286

prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;

1287

prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;

1288

continue;

1289

}

1290

1291

if (threadInfo[i].coreId != lastCoreId) {

1292

nCores++;

1293

coreCt++;

1294

lastCoreId = threadInfo[i].coreId;

1295

if ((int)threadCt > __kmp_nThreadsPerCore)

1296

__kmp_nThreadsPerCore = threadCt;

1297

threadCt = 1;

1298

lastThreadId = threadInfo[i].threadId;

1299

} else if (threadInfo[i].threadId != lastThreadId) {

1300

threadCt++;

1301

lastThreadId = threadInfo[i].threadId;

1302

} else {

1303

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1303);

1304

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1305

*msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;

1306

return -1;

1307

}

1308

1309

// Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg

1310

// fields agree between all the threads bounds to a given package.

1311

if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||

1312

(prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {

1313

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1313);

1314

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1315

*msg_id = kmp_i18n_str_InconsistentCpuidInfo;

1316

return -1;

1317

}

1318

}

1319

nPackages = pkgCt;

1320

if ((int)coreCt > nCoresPerPkg)

1321

nCoresPerPkg = coreCt;

1322

if ((int)threadCt > __kmp_nThreadsPerCore)

1323

__kmp_nThreadsPerCore = threadCt;

1324

1325

// When affinity is off, this routine will still be called to set

1326

// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.

1327

// Make sure all these vars are set correctly, and return now if affinity is

1328

// not enabled.

1329

__kmp_ncores = nCores;

1330

if (__kmp_affinity_verbose) {

1331

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

1332

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

1333

1334

KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuid
, "KMP_AFFINITY"), __kmp_msg_null);

1335

if (__kmp_affinity_respect_mask) {

1336

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1337

} else {

1338

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1339

}

1340

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1341

if (__kmp_affinity_uniform_topology()) {

1342

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1343

} else {

1344

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

1345

}

1346

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1347

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1348

}

1349

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1349));

1350

KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc)((nApics == __kmp_avail_proc) ? 0 : __kmp_debug_assert("nApics == __kmp_avail_proc"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1350));

1351

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1351);

1352

for (i = 0; i < nApics; ++i) {

1353

__kmp_pu_os_idx[i] = threadInfo[i].osId;

1354

}

1355

if (__kmp_affinity_type == affinity_none) {

1356

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1356);

1357

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1358

return 0;

1359

}

1360

1361

// Now that we've determined the number of packages, the number of cores per

1362

// package, and the number of threads per core, we can construct the data

1363

// structure that is to be returned.

1364

int pkgLevel = 0;

1365

int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;

1366

int threadLevel =

1367

(__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);

1368

unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);

1369

1370

KMP_ASSERT(depth > 0)((depth > 0) ? 0 : __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1370));

1371

*address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics)___kmp_allocate((sizeof(AddrUnsPair) * nApics), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1371);

1372

1373

for (i = 0; i < nApics; ++i) {

1374

Address addr(depth);

1375

unsigned os = threadInfo[i].osId;

1376

int d = 0;

1377

1378

if (pkgLevel >= 0) {

1379

addr.labels[d++] = threadInfo[i].pkgId;

1380

}

1381

if (coreLevel >= 0) {

1382

addr.labels[d++] = threadInfo[i].coreId;

1383

}

1384

if (threadLevel >= 0) {

1385

addr.labels[d++] = threadInfo[i].threadId;

1386

}

1387

(*address2os)[i] = AddrUnsPair(addr, os);

1388

}

1389

1390

if (__kmp_affinity_gran_levels < 0) {

1391

// Set the granularity level based on what levels are modeled in the machine

1392

// topology map.

1393

__kmp_affinity_gran_levels = 0;

1394

if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {

1395

__kmp_affinity_gran_levels++;

1396

}

1397

if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {

1398

__kmp_affinity_gran_levels++;

1399

}

1400

if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {

1401

__kmp_affinity_gran_levels++;

1402

}

1403

}

1404

1405

if (__kmp_affinity_verbose) {

1406

__kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,

1407

coreLevel, threadLevel);

1408

}

1409

1410

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1410);

1411

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1412

return depth;

1413

}

1414

1415

// Intel(R) microarchitecture code name Nehalem, Dunnington and later

1416

// architectures support a newer interface for specifying the x2APIC Ids,

1417

// based on cpuid leaf 11.

1418

static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,

1419

kmp_i18n_id_t *const msg_id) {

1420

kmp_cpuid buf;

1421

*address2os = NULL__null;

1422

*msg_id = kmp_i18n_null;

1423

1424

// Check to see if cpuid leaf 11 is supported.

1425

__kmp_x86_cpuid(0, 0, &buf);

1426

if (buf.eax < 11) {

1427

*msg_id = kmp_i18n_str_NoLeaf11Support;

1428

return -1;

1429

}

1430

__kmp_x86_cpuid(11, 0, &buf);

1431

if (buf.ebx == 0) {

1432

*msg_id = kmp_i18n_str_NoLeaf11Support;

1433

return -1;

1434

}

1435

1436

// Find the number of levels in the machine topology. While we're at it, get

1437

// the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to

1438

// get more accurate values later by explicitly counting them, but get

1439

// reasonable defaults now, in case we return early.

1440

int level;

1441

int threadLevel = -1;

1442

int coreLevel = -1;

1443

int pkgLevel = -1;

1444

__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;

1445

1446

for (level = 0;; level++) {

1447

if (level > 31) {

1448

// FIXME: Hack for DPD200163180

1449

//

1450

// If level is big then something went wrong -> exiting

1451

//

1452

// There could actually be 32 valid levels in the machine topology, but so

1453

// far, the only machine we have seen which does not exit this loop before

1454

// iteration 32 has fubar x2APIC settings.

1455

//

1456

// For now, just reject this case based upon loop trip count.

1457

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1458

return -1;

1459

}

1460

__kmp_x86_cpuid(11, level, &buf);

1461

if (buf.ebx == 0) {

1462

if (pkgLevel < 0) {

1463

// Will infer nPackages from __kmp_xproc

1464

pkgLevel = level;

1465

level++;

1466

}

1467

break;

1468

}

1469

int kind = (buf.ecx >> 8) & 0xff;

1470

if (kind == 1) {

1471

// SMT level

1472

threadLevel = level;

1473

coreLevel = -1;

1474

pkgLevel = -1;

1475

__kmp_nThreadsPerCore = buf.ebx & 0xffff;

1476

if (__kmp_nThreadsPerCore == 0) {

1477

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1478

return -1;

1479

}

1480

} else if (kind == 2) {

1481

// core level

1482

coreLevel = level;

1483

pkgLevel = -1;

1484

nCoresPerPkg = buf.ebx & 0xffff;

1485

if (nCoresPerPkg == 0) {

1486

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1487

return -1;

1488

}

1489

} else {

1490

if (level <= 0) {

1491

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1492

return -1;

1493

}

1494

if (pkgLevel >= 0) {

1495

continue;

1496

}

1497

pkgLevel = level;

1498

nPackages = buf.ebx & 0xffff;

1499

if (nPackages == 0) {

1500

*msg_id = kmp_i18n_str_InvalidCpuidInfo;

1501

return -1;

1502

}

1503

}

1504

}

1505

int depth = level;

1506

1507

// In the above loop, "level" was counted from the finest level (usually

1508

// thread) to the coarsest. The caller expects that we will place the labels

1509

// in (*address2os)[].first.labels[] in the inverse order, so we need to

1510

// invert the vars saying which level means what.

1511

if (threadLevel >= 0) {

1512

threadLevel = depth - threadLevel - 1;

1513

}

1514

if (coreLevel >= 0) {

1515

coreLevel = depth - coreLevel - 1;

1516

}

1517

KMP_DEBUG_ASSERT(pkgLevel >= 0)((pkgLevel >= 0) ? 0 : __kmp_debug_assert("pkgLevel >= 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1517));

1518

pkgLevel = depth - pkgLevel - 1;

1519

1520

// The algorithm used starts by setting the affinity to each available thread

1521

// and retrieving info from the cpuid instruction, so if we are not capable of

1522

// calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we

1523

// need to do something else - use the defaults that we calculated from

1524

// issuing cpuid without binding to each proc.

1525

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

1526

// Hack to try and infer the machine topology using only the data

1527

// available from cpuid on the current thread, and __kmp_xproc.

1528

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1528));

1529

1530

__kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;

1531

nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;

1532

if (__kmp_affinity_verbose) {

1533

KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseLocCpuidL11
, "KMP_AFFINITY"), __kmp_msg_null);

1534

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1535

if (__kmp_affinity_uniform_topology()) {

1536

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1537

} else {

1538

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

1539

}

1540

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1541

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1542

}

1543

return 0;

1544

}

1545

1546

// From here on, we can assume that it is safe to call

1547

// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if

1548

// __kmp_affinity_type = affinity_none.

1549

1550

// Save the affinity mask for the current thread.

1551

kmp_affin_mask_t *oldMask;

1552

KMP_CPU_ALLOC(oldMask)(oldMask = __kmp_affinity_dispatch->allocate_mask());

1553

__kmp_get_system_affinity(oldMask, TRUE)(oldMask)->get_system_affinity((!0));

1554

1555

// Allocate the data structure to be returned.

1556

AddrUnsPair *retval =

1557

(AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1557);

1558

1559

// Run through each of the available contexts, binding the current thread

1560

// to it, and obtaining the pertinent information using the cpuid instr.

1561

unsigned int proc;

1562

int nApics = 0;

1563

KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask)for (proc = (__kmp_affin_fullMask)->begin(); proc != (__kmp_affin_fullMask
)->end(); proc = (__kmp_affin_fullMask)->next(proc)) {

1564

// Skip this proc if it is not included in the machine model.

1565

if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {

1566

continue;

1567

}

1568

KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc)((nApics < __kmp_avail_proc) ? 0 : __kmp_debug_assert("nApics < __kmp_avail_proc"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1568));

1569

1570

__kmp_affinity_dispatch->bind_thread(proc);

1571

1572

// Extract labels for each level in the machine topology map from Apic ID.

1573

Address addr(depth);

1574

int prev_shift = 0;

1575

1576

for (level = 0; level < depth; level++) {

1577

__kmp_x86_cpuid(11, level, &buf);

1578

unsigned apicId = buf.edx;

1579

if (buf.ebx == 0) {

1580

if (level != depth - 1) {

1581

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1582

*msg_id = kmp_i18n_str_InconsistentCpuidInfo;

1583

return -1;

1584

}

1585

addr.labels[depth - level - 1] = apicId >> prev_shift;

1586

level++;

1587

break;

1588

}

1589

int shift = buf.eax & 0x1f;

1590

int mask = (1 << shift) - 1;

1591

addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;

1592

prev_shift = shift;

1593

}

1594

if (level != depth) {

1595

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1596

*msg_id = kmp_i18n_str_InconsistentCpuidInfo;

1597

return -1;

1598

}

1599

1600

retval[nApics] = AddrUnsPair(addr, proc);

1601

nApics++;

1602

}

1603

1604

// We've collected all the info we need.

1605

// Restore the old affinity mask for this thread.

1606

__kmp_set_system_affinity(oldMask, TRUE)(oldMask)->set_system_affinity((!0));

1607

1608

// If there's only one thread context to bind to, return now.

1609

KMP_ASSERT(nApics > 0)((nApics > 0) ? 0 : __kmp_debug_assert("nApics > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1609));

1610

if (nApics == 1) {

1611

__kmp_ncores = nPackages = 1;

1612

__kmp_nThreadsPerCore = nCoresPerPkg = 1;

1613

if (__kmp_affinity_verbose) {

1614

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

1615

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

1616

1617

KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuidL11
, "KMP_AFFINITY"), __kmp_msg_null);

1618

if (__kmp_affinity_respect_mask) {

1619

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1620

} else {

1621

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

1622

}

1623

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1624

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1625

KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1626

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Topology
, "KMP_AFFINITY", nPackages, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1627

}

1628

1629

if (__kmp_affinity_type == affinity_none) {

1630

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1630);

1631

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1632

return 0;

1633

}

1634

1635

// Form an Address object which only includes the package level.

1636

Address addr(1);

1637

addr.labels[0] = retval[0].first.labels[pkgLevel];

1638

retval[0].first = addr;

1639

1640

if (__kmp_affinity_gran_levels < 0) {

1641

__kmp_affinity_gran_levels = 0;

1642

}

1643

1644

if (__kmp_affinity_verbose) {

1645

__kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);

1646

}

1647

1648

*address2os = retval;

1649

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1650

return 1;

1651

}

1652

1653

// Sort the table by physical Id.

1654

qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);

1655

1656

// Find the radix at each of the levels.

1657

unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1657);

1658

unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1658);

1659

unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1659);

1660

unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned))___kmp_allocate((depth * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1660);

1661

for (level = 0; level < depth; level++) {

1662

totals[level] = 1;

1663

maxCt[level] = 1;

1664

counts[level] = 1;

1665

last[level] = retval[0].first.labels[level];

1666

}

1667

1668

// From here on, the iteration variable "level" runs from the finest level to

1669

// the coarsest, i.e. we iterate forward through

1670

// (*address2os)[].first.labels[] - in the previous loops, we iterated

1671

// backwards.

1672

for (proc = 1; (int)proc < nApics; proc++) {

1673

int level;

1674

for (level = 0; level < depth; level++) {

1675

if (retval[proc].first.labels[level] != last[level]) {

1676

int j;

1677

for (j = level + 1; j < depth; j++) {

1678

totals[j]++;

1679

counts[j] = 1;

1680

// The line below causes printing incorrect topology information in

1681

// case the max value for some level (maxCt[level]) is encountered

1682

// earlier than some less value while going through the array. For

1683

// example, let pkg0 has 4 cores and pkg1 has 2 cores. Then

1684

// maxCt[1] == 2

1685

// whereas it must be 4.

1686

// TODO!!! Check if it can be commented safely

1687

// maxCt[j] = 1;

1688

last[j] = retval[proc].first.labels[j];

1689

}

1690

totals[level]++;

1691

counts[level]++;

1692

if (counts[level] > maxCt[level]) {

1693

maxCt[level] = counts[level];

1694

}

1695

last[level] = retval[proc].first.labels[level];

1696

break;

1697

} else if (level == depth - 1) {

1698

__kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1698);

1699

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1699);

1700

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1700);

1701

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1701);

1702

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1702);

1703

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1704

*msg_id = kmp_i18n_str_x2ApicIDsNotUnique;

1705

return -1;

1706

}

1707

}

1708

}

1709

1710

// When affinity is off, this routine will still be called to set

1711

// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.

1712

// Make sure all these vars are set correctly, and return if affinity is not

1713

// enabled.

1714

if (threadLevel >= 0) {

1715

__kmp_nThreadsPerCore = maxCt[threadLevel];

1716

} else {

1717

__kmp_nThreadsPerCore = 1;

1718

}

1719

nPackages = totals[pkgLevel];

1720

1721

if (coreLevel >= 0) {

1722

__kmp_ncores = totals[coreLevel];

1723

nCoresPerPkg = maxCt[coreLevel];

1724

} else {

1725

__kmp_ncores = nPackages;

1726

nCoresPerPkg = 1;

1727

}

1728

1729

// Check to see if the machine topology is uniform

1730

unsigned prod = maxCt[0];

1731

for (level = 1; level < depth; level++) {

1732

prod *= maxCt[level];

1733

}

1734

bool uniform = (prod == totals[level - 1]);

1735

1736

// Print the machine topology summary.

1737

if (__kmp_affinity_verbose) {

1738

char mask[KMP_AFFIN_MASK_PRINT_LEN1024];

1739

__kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN1024, oldMask);

1740

1741

KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUseGlobCpuidL11
, "KMP_AFFINITY"), __kmp_msg_null);

1742

if (__kmp_affinity_respect_mask) {

1743

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", mask), __kmp_msg_null);

1744

} else {

1745

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", mask), __kmp_msg_null);

1746

}

1747

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

1748

if (uniform) {

1749

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

1750

} else {

1751

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

1752

}

1753

1754

kmp_str_buf_t buf;

1755

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

1756

1757

__kmp_str_buf_print(&buf, "%d", totals[0]);

1758

for (level = 1; level <= pkgLevel; level++) {

1759

__kmp_str_buf_print(&buf, " x %d", maxCt[level]);

1760

}

1761

KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

1762

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

1763

1764

__kmp_str_buf_free(&buf);

1765

}

1766

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1766));

1767

KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc)((nApics == __kmp_avail_proc) ? 0 : __kmp_debug_assert("nApics == __kmp_avail_proc"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1767));

1768

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1768);

1769

for (proc = 0; (int)proc < nApics; ++proc) {

1770

__kmp_pu_os_idx[proc] = retval[proc].second;

1771

}

1772

if (__kmp_affinity_type == affinity_none) {

1773

__kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1773);

1774

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1774);

1775

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1775);

1776

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1776);

1777

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1777);

1778

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1779

return 0;

1780

}

1781

1782

// Find any levels with radiix 1, and remove them from the map

1783

// (except for the package level).

1784

int new_depth = 0;

1785

for (level = 0; level < depth; level++) {

1786

if ((maxCt[level] == 1) && (level != pkgLevel)) {

1787

continue;

1788

}

1789

new_depth++;

1790

}

1791

1792

// If we are removing any levels, allocate a new vector to return,

1793

// and copy the relevant information to it.

1794

if (new_depth != depth) {

1795

AddrUnsPair *new_retval =

1796

(AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics)___kmp_allocate((sizeof(AddrUnsPair) * nApics), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1796);

1797

for (proc = 0; (int)proc < nApics; proc++) {

1798

Address addr(new_depth);

1799

new_retval[proc] = AddrUnsPair(addr, retval[proc].second);

1800

}

1801

int new_level = 0;

1802

int newPkgLevel = -1;

1803

int newCoreLevel = -1;

1804

int newThreadLevel = -1;

1805

int i;

1806

for (level = 0; level < depth; level++) {

1807

if ((maxCt[level] == 1) && (level != pkgLevel)) {

1808

// Remove this level. Never remove the package level

1809

continue;

1810

}

1811

if (level == pkgLevel) {

1812

newPkgLevel = new_level;

1813

}

1814

if (level == coreLevel) {

1815

newCoreLevel = new_level;

1816

}

1817

if (level == threadLevel) {

1818

newThreadLevel = new_level;

1819

}

1820

for (proc = 0; (int)proc < nApics; proc++) {

1821

new_retval[proc].first.labels[new_level] =

1822

retval[proc].first.labels[level];

1823

}

1824

new_level++;

1825

}

1826

1827

__kmp_free(retval)___kmp_free((retval), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1827);

1828

retval = new_retval;

1829

depth = new_depth;

1830

pkgLevel = newPkgLevel;

1831

coreLevel = newCoreLevel;

1832

threadLevel = newThreadLevel;

1833

}

1834

1835

if (__kmp_affinity_gran_levels < 0) {

1836

// Set the granularity level based on what levels are modeled

1837

// in the machine topology map.

1838

__kmp_affinity_gran_levels = 0;

1839

if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {

1840

__kmp_affinity_gran_levels++;

1841

}

1842

if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {

1843

__kmp_affinity_gran_levels++;

1844

}

1845

if (__kmp_affinity_gran > affinity_gran_package) {

1846

__kmp_affinity_gran_levels++;

1847

}

1848

}

1849

1850

if (__kmp_affinity_verbose) {

1851

__kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,

1852

threadLevel);

1853

}

1854

1855

__kmp_free(last)___kmp_free((last), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1855);

1856

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1856);

1857

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1857);

1858

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1858);

1859

KMP_CPU_FREE(oldMask)__kmp_affinity_dispatch->deallocate_mask(oldMask);

1860

*address2os = retval;

1861

return depth;

1862

}

1863

1864

#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */

1865

1866

#define osIdIndex0 0

1867

#define threadIdIndex1 1

1868

#define coreIdIndex2 2

1869

#define pkgIdIndex3 3

1870

#define nodeIdIndex4 4

1871

1872

typedef unsigned *ProcCpuInfo;

1873

static unsigned maxIndex = pkgIdIndex3;

1874

1875

static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) {

1876

const unsigned *aa = (const unsigned *)a;

1877

const unsigned *bb = (const unsigned *)b;

1878

if (aa[osIdIndex0] < bb[osIdIndex0])

1879

return -1;

1880

if (aa[osIdIndex0] > bb[osIdIndex0])

1881

return 1;

1882

return 0;

1883

}

1884

1885

static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,

1886

const void *b) {

1887

unsigned i;

1888

const unsigned *aa = *(unsigned *const *)a;

1889

const unsigned *bb = *(unsigned *const *)b;

1890

for (i = maxIndex;; i--) {

1891

if (aa[i] < bb[i])

1892

return -1;

1893

if (aa[i] > bb[i])

1894

return 1;

1895

if (i == osIdIndex0)

1896

break;

1897

}

1898

return 0;

1899

}

1900

1901

#if KMP_USE_HIER_SCHED0

1902

// Set the array sizes for the hierarchy layers

1903

static void __kmp_dispatch_set_hierarchy_values() {

1904

// Set the maximum number of L1's to number of cores

1905

// Set the maximum number of L2's to to either number of cores / 2 for

1906

// Intel(R) Xeon Phi(TM) coprocessor formally codenamed Knights Landing

1907

// Or the number of cores for Intel(R) Xeon(R) processors

1908

// Set the maximum number of NUMA nodes and L3's to number of packages

1909

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =

1910

nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;

1911

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;

1912

#if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 || KMP_OS_WINDOWS0)

1913

if (__kmp_mic_type >= mic3)

1914

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;

1915

else

1916

#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)

1917

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;

1918

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;

1919

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;

1920

__kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;

1921

// Set the number of threads per unit

1922

// Number of hardware threads per L1/L2/L3/NUMA/LOOP

1923

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;

1924

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =

1925

__kmp_nThreadsPerCore;

1926

#if KMP_ARCH_X86_641 && (KMP_OS_LINUX1 || KMP_OS_WINDOWS0)

1927

if (__kmp_mic_type >= mic3)

1928

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =

1929

2 * __kmp_nThreadsPerCore;

1930

else

1931

#endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)

1932

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =

1933

__kmp_nThreadsPerCore;

1934

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =

1935

nCoresPerPkg * __kmp_nThreadsPerCore;

1936

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =

1937

nCoresPerPkg * __kmp_nThreadsPerCore;

1938

__kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =

1939

nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;

1940

}

1941

1942

// Return the index into the hierarchy for this tid and layer type (L1, L2, etc)

1943

// i.e., this thread's L1 or this thread's L2, etc.

1944

int __kmp_dispatch_get_index(int tid, kmp_hier_layer_e type) {

1945

int index = type + 1;

1946

int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];

1947

KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST)((type != kmp_hier_layer_e::LAYER_LAST) ? 0 : __kmp_debug_assert
("type != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1947));

1948

if (type == kmp_hier_layer_e::LAYER_THREAD)

1949

return tid;

1950

else if (type == kmp_hier_layer_e::LAYER_LOOP)

1951

return 0;

1952

KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0)((__kmp_hier_max_units[index] != 0) ? 0 : __kmp_debug_assert(
"__kmp_hier_max_units[index] != 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1952));

1953

if (tid >= num_hw_threads)

1954

tid = tid % num_hw_threads;

1955

return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];

1956

}

1957

1958

// Return the number of t1's per t2

1959

int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {

1960

int i1 = t1 + 1;

1961

int i2 = t2 + 1;

1962

KMP_DEBUG_ASSERT(i1 <= i2)((i1 <= i2) ? 0 : __kmp_debug_assert("i1 <= i2", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1962));

1963

KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST)((t1 != kmp_hier_layer_e::LAYER_LAST) ? 0 : __kmp_debug_assert
("t1 != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1963));

1964

KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST)((t2 != kmp_hier_layer_e::LAYER_LAST) ? 0 : __kmp_debug_assert
("t2 != kmp_hier_layer_e::LAYER_LAST", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1964));

1965

KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0)((__kmp_hier_threads_per[i1] != 0) ? 0 : __kmp_debug_assert("__kmp_hier_threads_per[i1] != 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 1965));

1966

// (nthreads/t2) / (nthreads/t1) = t1 / t2

1967

return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];

1968

}

1969

#endif // KMP_USE_HIER_SCHED

1970

1971

// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the

1972

// affinity map.

1973

static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,

1974

int *line,

1975

kmp_i18n_id_t *const msg_id,

1976

FILE *f) {

1977

*address2os = NULL__null;

1978

*msg_id = kmp_i18n_null;

1979

1980

// Scan of the file, and count the number of "processor" (osId) fields,

1981

// and find the highest value of <n> for a node_<n> field.

1982

char buf[256];

1983

unsigned num_records = 0;

1984

while (!feof(f)) {

1985

buf[sizeof(buf) - 1] = 1;

1986

if (!fgets(buf, sizeof(buf), f)) {

1987

// Read errors presumably because of EOF

1988

break;

1989

}

1990

1991

char s1[] = "processor";

1992

if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {

1993

num_records++;

1994

continue;

1995

}

1996

1997

// FIXME - this will match "node_<n> <garbage>"

1998

unsigned level;

1999

if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) {

2000

if (nodeIdIndex4 + level >= maxIndex) {

2001

maxIndex = nodeIdIndex4 + level;

2002

}

2003

continue;

2004

}

2005

}

2006

2007

// Check for empty file / no valid processor records, or too many. The number

2008

// of records can't exceed the number of valid bits in the affinity mask.

2009

if (num_records == 0) {

2010

*line = 0;

2011

*msg_id = kmp_i18n_str_NoProcRecords;

2012

return -1;

2013

}

2014

if (num_records > (unsigned)__kmp_xproc) {

2015

*line = 0;

2016

*msg_id = kmp_i18n_str_TooManyProcRecords;

2017

return -1;

2018

}

2019

2020

// Set the file pointer back to the begginning, so that we can scan the file

2021

// again, this time performing a full parse of the data. Allocate a vector of

2022

// ProcCpuInfo object, where we will place the data. Adding an extra element

2023

// at the end allows us to remove a lot of extra checks for termination

2024

// conditions.

2025

if (fseek(f, 0, SEEK_SET0) != 0) {

2026

*line = 0;

2027

*msg_id = kmp_i18n_str_CantRewindCpuinfo;

2028

return -1;

2029

}

2030

2031

// Allocate the array of records to store the proc info in. The dummy

2032

// element at the end makes the logic in filling them out easier to code.

2033

unsigned **threadInfo =

2034

(unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *))___kmp_allocate(((num_records + 1) * sizeof(unsigned *)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2034);

2035

unsigned i;

2036

for (i = 0; i <= num_records; i++) {

2037

threadInfo[i] =

2038

(unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2038);

2039

}

2040

2041

#define CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2041); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2041); \

2042

for (i = 0; i <= num_records; i++) { \

2043

__kmp_free(threadInfo[i])___kmp_free((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2043); \

2044

} \

2045

__kmp_free(threadInfo)___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2045);

2046

2047

// A value of UINT_MAX means that we didn't find the field

2048

unsigned __index;

2049

2050

#define INIT_PROC_INFO(p)for (__index = 0; __index <= maxIndex; __index++) { (p)[__index
] = (2147483647 *2U +1U); } \

2051

for (__index = 0; __index <= maxIndex; __index++) { \

2052

(p)[__index] = UINT_MAX(2147483647 *2U +1U); \

2053

}

2054

2055

for (i = 0; i <= num_records; i++) {

2056

INIT_PROC_INFO(threadInfo[i])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo
[i])[__index] = (2147483647 *2U +1U); };

2057

}

2058

2059

unsigned num_avail = 0;

2060

*line = 0;

2061

while (!feof(f)) {

2062

// Create an inner scoping level, so that all the goto targets at the end of

2063

// the loop appear in an outer scoping level. This avoids warnings about

2064

// jumping past an initialization to a target in the same block.

2065

{

2066

buf[sizeof(buf) - 1] = 1;

2067

bool long_line = false;

2068

if (!fgets(buf, sizeof(buf), f)) {

2069

// Read errors presumably because of EOF

2070

// If there is valid data in threadInfo[num_avail], then fake

2071

// a blank line in ensure that the last address gets parsed.

2072

bool valid = false;

2073

for (i = 0; i <= maxIndex; i++) {

2074

if (threadInfo[num_avail][i] != UINT_MAX(2147483647 *2U +1U)) {

2075

valid = true;

2076

}

2077

}

2078

if (!valid) {

2079

break;

2080

}

2081

buf[0] = 0;

2082

} else if (!buf[sizeof(buf) - 1]) {

2083

// The line is longer than the buffer. Set a flag and don't

2084

// emit an error if we were going to ignore the line, anyway.

2085

long_line = true;

2086

2087

#define CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2087); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2087);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; } \

2088

if (long_line) { \

2089

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2089); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2089);; \

2090

*msg_id = kmp_i18n_str_LongLineCpuinfo; \

2091

return -1; \

2092

}

2093

}

2094

(*line)++;

2095

2096

char s1[] = "processor";

2097

if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {

2098

CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2098); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2098);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; };

2099

char *p = strchr(buf + sizeof(s1) - 1, ':');

2100

unsigned val;

2101

if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))

2102

goto no_val;

2103

if (threadInfo[num_avail][osIdIndex0] != UINT_MAX(2147483647 *2U +1U))

2104

#if KMP_ARCH_AARCH640

2105

// Handle the old AArch64 /proc/cpuinfo layout differently,

2106

// it contains all of the 'processor' entries listed in a

2107

// single 'Processor' section, therefore the normal looking

2108

// for duplicates in that section will always fail.

2109

num_avail++;

2110

#else

2111

goto dup_field;

2112

#endif

2113

threadInfo[num_avail][osIdIndex0] = val;

2114

#if KMP_OS_LINUX1 && !(KMP_ARCH_X860 || KMP_ARCH_X86_641)

2115

char path[256];

2116

KMP_SNPRINTFsnprintf(

2117

path, sizeof(path),

2118

"/sys/devices/system/cpu/cpu%u/topology/physical_package_id",

2119

threadInfo[num_avail][osIdIndex0]);

2120

__kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex3]);

2121

2122

KMP_SNPRINTFsnprintf(path, sizeof(path),

2123

"/sys/devices/system/cpu/cpu%u/topology/core_id",

2124

threadInfo[num_avail][osIdIndex0]);

2125

__kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex2]);

2126

continue;

2127

#else

2128

}

2129

char s2[] = "physical id";

2130

if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {

2131

CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2131); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2131);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; };

2132

char *p = strchr(buf + sizeof(s2) - 1, ':');

2133

unsigned val;

2134

if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))

2135

goto no_val;

2136

if (threadInfo[num_avail][pkgIdIndex3] != UINT_MAX(2147483647 *2U +1U))

2137

goto dup_field;

2138

threadInfo[num_avail][pkgIdIndex3] = val;

2139

continue;

2140

}

2141

char s3[] = "core id";

2142

if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {

2143

CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2143); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2143);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; };

2144

char *p = strchr(buf + sizeof(s3) - 1, ':');

2145

unsigned val;

2146

if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))

2147

goto no_val;

2148

if (threadInfo[num_avail][coreIdIndex2] != UINT_MAX(2147483647 *2U +1U))

2149

goto dup_field;

2150

threadInfo[num_avail][coreIdIndex2] = val;

2151

continue;

2152

#endif // KMP_OS_LINUX && USE_SYSFS_INFO

2153

}

2154

char s4[] = "thread id";

2155

if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {

2156

CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2156); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2156);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; };

2157

char *p = strchr(buf + sizeof(s4) - 1, ':');

2158

unsigned val;

2159

if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))

2160

goto no_val;

2161

if (threadInfo[num_avail][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U))

2162

goto dup_field;

2163

threadInfo[num_avail][threadIdIndex1] = val;

2164

continue;

2165

}

2166

unsigned level;

2167

if (KMP_SSCANFsscanf(buf, "node_%u id", &level) == 1) {

2168

CHECK_LINEif (long_line) { for (i = 0; i <= num_records; i++) { ___kmp_free
((threadInfo[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2168); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2168);; *msg_id = kmp_i18n_str_LongLineCpuinfo; return -1; };

2169

char *p = strchr(buf + sizeof(s4) - 1, ':');

2170

unsigned val;

2171

if ((p == NULL__null) || (KMP_SSCANFsscanf(p + 1, "%u\n", &val) != 1))

2172

goto no_val;

2173

KMP_ASSERT(nodeIdIndex + level <= maxIndex)((4 + level <= maxIndex) ? 0 : __kmp_debug_assert("nodeIdIndex + level <= maxIndex"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2173));

2174

if (threadInfo[num_avail][nodeIdIndex4 + level] != UINT_MAX(2147483647 *2U +1U))

2175

goto dup_field;

2176

threadInfo[num_avail][nodeIdIndex4 + level] = val;

2177

continue;

2178

}

2179

2180

// We didn't recognize the leading token on the line. There are lots of

2181

// leading tokens that we don't recognize - if the line isn't empty, go on

2182

// to the next line.

2183

if ((*buf != 0) && (*buf != '\n')) {

2184

// If the line is longer than the buffer, read characters

2185

// until we find a newline.

2186

if (long_line) {

2187

int ch;

2188

while (((ch = fgetc(f)) != EOF(-1)) && (ch != '\n'))

2189

;

2190

}

2191

continue;

2192

}

2193

2194

// A newline has signalled the end of the processor record.

2195

// Check that there aren't too many procs specified.

2196

if ((int)num_avail == __kmp_xproc) {

2197

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2197); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2197);;

2198

*msg_id = kmp_i18n_str_TooManyEntries;

2199

return -1;

2200

}

2201

2202

// Check for missing fields. The osId field must be there, and we

2203

// currently require that the physical id field is specified, also.

2204

if (threadInfo[num_avail][osIdIndex0] == UINT_MAX(2147483647 *2U +1U)) {

2205

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2205); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2205);;

2206

*msg_id = kmp_i18n_str_MissingProcField;

2207

return -1;

2208

}

2209

if (threadInfo[0][pkgIdIndex3] == UINT_MAX(2147483647 *2U +1U)) {

2210

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2210); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2210);;

2211

*msg_id = kmp_i18n_str_MissingPhysicalIDField;

2212

return -1;

2213

}

2214

2215

// Skip this proc if it is not included in the machine model.

2216

if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0])

2217

__kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(threadInfo[num_avail][0])) {

2218

INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo
[num_avail])[__index] = (2147483647 *2U +1U); };

2219

continue;

2220

}

2221

2222

// We have a successful parse of this proc's info.

2223

// Increment the counter, and prepare for the next proc.

2224

num_avail++;

2225

KMP_ASSERT(num_avail <= num_records)((num_avail <= num_records) ? 0 : __kmp_debug_assert("num_avail <= num_records"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2225));

2226

INIT_PROC_INFO(threadInfo[num_avail])for (__index = 0; __index <= maxIndex; __index++) { (threadInfo
[num_avail])[__index] = (2147483647 *2U +1U); };

2227

}

2228

continue;

2229

2230

no_val:

2231

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2231); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2231);;

2232

*msg_id = kmp_i18n_str_MissingValCpuinfo;

2233

return -1;

2234

2235

dup_field:

2236

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2236); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2236);;

2237

*msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;

2238

return -1;

2239

}

2240

*line = 0;

2241

2242

#if KMP_MIC0 && REDUCE_TEAM_SIZE

2243

unsigned teamSize = 0;

2244

#endif // KMP_MIC && REDUCE_TEAM_SIZE

2245

2246

// check for num_records == __kmp_xproc ???

2247

2248

// If there's only one thread context to bind to, form an Address object with

2249

// depth 1 and return immediately (or, if affinity is off, set address2os to

2250

// NULL and return).

2251

//

2252

// If it is configured to omit the package level when there is only a single

2253

// package, the logic at the end of this routine won't work if there is only a

2254

// single thread - it would try to form an Address object with depth 0.

2255

KMP_ASSERT(num_avail > 0)((num_avail > 0) ? 0 : __kmp_debug_assert("num_avail > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2255));

2256

KMP_ASSERT(num_avail <= num_records)((num_avail <= num_records) ? 0 : __kmp_debug_assert("num_avail <= num_records"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2256));

2257

if (num_avail == 1) {

2258

__kmp_ncores = 1;

2259

__kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;

2260

if (__kmp_affinity_verbose) {

2261

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

2262

KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseCpuinfo
, "KMP_AFFINITY"), __kmp_msg_null);

2263

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

2264

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

2265

} else {

2266

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

2267

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,

2268

__kmp_affin_fullMask);

2269

KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseCpuinfo
, "KMP_AFFINITY"), __kmp_msg_null);

2270

if (__kmp_affinity_respect_mask) {

2271

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

2272

} else {

2273

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

2274

}

2275

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

2276

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

2277

}

2278

int index;

2279

kmp_str_buf_t buf;

2280

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

2281

__kmp_str_buf_print(&buf, "1");

2282

for (index = maxIndex - 1; index > pkgIdIndex3; index--) {

2283

__kmp_str_buf_print(&buf, " x 1");

2284

}

2285

KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, 1, 1, 1), __kmp_msg_null);

2286

__kmp_str_buf_free(&buf);

2287

}

2288

2289

if (__kmp_affinity_type == affinity_none) {

2290

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2290); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2290);;

2291

return 0;

2292

}

2293

2294

*address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair))___kmp_allocate((sizeof(AddrUnsPair)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2294);

2295

Address addr(1);

2296

addr.labels[0] = threadInfo[0][pkgIdIndex3];

2297

(*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex0]);

2298

2299

if (__kmp_affinity_gran_levels < 0) {

2300

__kmp_affinity_gran_levels = 0;

2301

}

2302

2303

if (__kmp_affinity_verbose) {

2304

__kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);

2305

}

2306

2307

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2307); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2307);;

2308

return 1;

2309

}

2310

2311

// Sort the threadInfo table by physical Id.

2312

qsort(threadInfo, num_avail, sizeof(*threadInfo),

2313

__kmp_affinity_cmp_ProcCpuInfo_phys_id);

2314

2315

// The table is now sorted by pkgId / coreId / threadId, but we really don't

2316

// know the radix of any of the fields. pkgId's may be sparsely assigned among

2317

// the chips on a system. Although coreId's are usually assigned

2318

// [0 .. coresPerPkg-1] and threadId's are usually assigned

2319

// [0..threadsPerCore-1], we don't want to make any such assumptions.

2320

//

2321

// For that matter, we don't know what coresPerPkg and threadsPerCore (or the

2322

// total # packages) are at this point - we want to determine that now. We

2323

// only have an upper bound on the first two figures.

2324

unsigned *counts =

2325

(unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2325);

2326

unsigned *maxCt =

2327

(unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2327);

2328

unsigned *totals =

2329

(unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2329);

2330

unsigned *lastId =

2331

(unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned))___kmp_allocate(((maxIndex + 1) * sizeof(unsigned)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2331);

2332

2333

bool assign_thread_ids = false;

2334

unsigned threadIdCt;

2335

unsigned index;

2336

2337

restart_radix_check:

2338

threadIdCt = 0;

2339

2340

// Initialize the counter arrays with data from threadInfo[0].

2341

if (assign_thread_ids) {

2342

if (threadInfo[0][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {

2343

threadInfo[0][threadIdIndex1] = threadIdCt++;

2344

} else if (threadIdCt <= threadInfo[0][threadIdIndex1]) {

2345

threadIdCt = threadInfo[0][threadIdIndex1] + 1;

2346

}

2347

}

2348

for (index = 0; index <= maxIndex; index++) {

2349

counts[index] = 1;

2350

maxCt[index] = 1;

2351

totals[index] = 1;

2352

lastId[index] = threadInfo[0][index];

2353

;

2354

}

2355

2356

// Run through the rest of the OS procs.

2357

for (i = 1; i < num_avail; i++) {

2358

// Find the most significant index whose id differs from the id for the

2359

// previous OS proc.

2360

for (index = maxIndex; index >= threadIdIndex1; index--) {

2361

if (assign_thread_ids && (index == threadIdIndex1)) {

2362

// Auto-assign the thread id field if it wasn't specified.

2363

if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {

2364

threadInfo[i][threadIdIndex1] = threadIdCt++;

2365

}

2366

// Apparently the thread id field was specified for some entries and not

2367

// others. Start the thread id counter off at the next higher thread id.

2368

else if (threadIdCt <= threadInfo[i][threadIdIndex1]) {

2369

threadIdCt = threadInfo[i][threadIdIndex1] + 1;

2370

}

2371

}

2372

if (threadInfo[i][index] != lastId[index]) {

2373

// Run through all indices which are less significant, and reset the

2374

// counts to 1. At all levels up to and including index, we need to

2375

// increment the totals and record the last id.

2376

unsigned index2;

2377

for (index2 = threadIdIndex1; index2 < index; index2++) {

2378

totals[index2]++;

2379

if (counts[index2] > maxCt[index2]) {

2380

maxCt[index2] = counts[index2];

2381

}

2382

counts[index2] = 1;

2383

lastId[index2] = threadInfo[i][index2];

2384

}

2385

counts[index]++;

2386

totals[index]++;

2387

lastId[index] = threadInfo[i][index];

2388

2389

if (assign_thread_ids && (index > threadIdIndex1)) {

2390

2391

#if KMP_MIC0 && REDUCE_TEAM_SIZE

2392

// The default team size is the total #threads in the machine

2393

// minus 1 thread for every core that has 3 or more threads.

2394

teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);

2395

#endif // KMP_MIC && REDUCE_TEAM_SIZE

2396

2397

// Restart the thread counter, as we are on a new core.

2398

threadIdCt = 0;

2399

2400

// Auto-assign the thread id field if it wasn't specified.

2401

if (threadInfo[i][threadIdIndex1] == UINT_MAX(2147483647 *2U +1U)) {

2402

threadInfo[i][threadIdIndex1] = threadIdCt++;

2403

}

2404

2405

// Aparrently the thread id field was specified for some entries and

2406

// not others. Start the thread id counter off at the next higher

2407

// thread id.

2408

else if (threadIdCt <= threadInfo[i][threadIdIndex1]) {

2409

threadIdCt = threadInfo[i][threadIdIndex1] + 1;

2410

}

2411

}

2412

break;

2413

}

2414

}

2415

if (index < threadIdIndex1) {

2416

// If thread ids were specified, it is an error if they are not unique.

2417

// Also, check that we waven't already restarted the loop (to be safe -

2418

// shouldn't need to).

2419

if ((threadInfo[i][threadIdIndex1] != UINT_MAX(2147483647 *2U +1U)) || assign_thread_ids) {

2420

__kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2420);

2421

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2421);

2422

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2422);

2423

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2423);

2424

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2424); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2424);;

2425

*msg_id = kmp_i18n_str_PhysicalIDsNotUnique;

2426

return -1;

2427

}

2428

2429

// If the thread ids were not specified and we see entries entries that

2430

// are duplicates, start the loop over and assign the thread ids manually.

2431

assign_thread_ids = true;

2432

goto restart_radix_check;

2433

}

2434

}

2435

2436

#if KMP_MIC0 && REDUCE_TEAM_SIZE

2437

// The default team size is the total #threads in the machine

2438

// minus 1 thread for every core that has 3 or more threads.

2439

teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);

2440

#endif // KMP_MIC && REDUCE_TEAM_SIZE

2441

2442

for (index = threadIdIndex1; index <= maxIndex; index++) {

2443

if (counts[index] > maxCt[index]) {

2444

maxCt[index] = counts[index];

2445

}

2446

}

2447

2448

__kmp_nThreadsPerCore = maxCt[threadIdIndex1];

2449

nCoresPerPkg = maxCt[coreIdIndex2];

2450

nPackages = totals[pkgIdIndex3];

2451

2452

// Check to see if the machine topology is uniform

2453

unsigned prod = totals[maxIndex];

2454

for (index = threadIdIndex1; index < maxIndex; index++) {

2455

prod *= maxCt[index];

2456

}

2457

bool uniform = (prod == totals[threadIdIndex1]);

2458

2459

// When affinity is off, this routine will still be called to set

2460

// __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.

2461

// Make sure all these vars are set correctly, and return now if affinity is

2462

// not enabled.

2463

__kmp_ncores = totals[coreIdIndex2];

2464

2465

if (__kmp_affinity_verbose) {

2466

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

2467

KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffNotCapableUseCpuinfo
, "KMP_AFFINITY"), __kmp_msg_null);

2468

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

2469

if (uniform) {

2470

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

2471

} else {

2472

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

2473

}

2474

} else {

2475

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

2476

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,

2477

__kmp_affin_fullMask);

2478

KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffCapableUseCpuinfo
, "KMP_AFFINITY"), __kmp_msg_null);

2479

if (__kmp_affinity_respect_mask) {

2480

KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

2481

} else {

2482

KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_AFFINITY", buf), __kmp_msg_null);

2483

}

2484

KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_AFFINITY", __kmp_avail_proc), __kmp_msg_null);

2485

if (uniform) {

2486

KMP_INFORM(Uniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_Uniform
, "KMP_AFFINITY"), __kmp_msg_null);

2487

} else {

2488

KMP_INFORM(NonUniform, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_NonUniform
, "KMP_AFFINITY"), __kmp_msg_null);

2489

}

2490

}

2491

kmp_str_buf_t buf;

2492

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

2493

2494

__kmp_str_buf_print(&buf, "%d", totals[maxIndex]);

2495

for (index = maxIndex - 1; index >= pkgIdIndex3; index--) {

2496

__kmp_str_buf_print(&buf, " x %d", maxCt[index]);

2497

}

2498

KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, maxCt[2], maxCt[1], __kmp_ncores),
__kmp_msg_null)

2499

maxCt[threadIdIndex], __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_AFFINITY", buf.str, maxCt[2], maxCt[1], __kmp_ncores),
__kmp_msg_null);

2500

2501

__kmp_str_buf_free(&buf);

2502

}

2503

2504

#if KMP_MIC0 && REDUCE_TEAM_SIZE

2505

// Set the default team size.

2506

if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {

2507

__kmp_dflt_team_nth = teamSize;

2508

KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting "
"__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); }

2509

"__kmp_dflt_team_nth = %d\n",if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting "
"__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); }

2510

__kmp_dflt_team_nth))if (kmp_a_debug >= 20) { __kmp_debug_printf ("__kmp_affinity_create_cpuinfo_map: setting "
"__kmp_dflt_team_nth = %d\n", __kmp_dflt_team_nth); };

2511

}

2512

#endif // KMP_MIC && REDUCE_TEAM_SIZE

2513

2514

KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL)((__kmp_pu_os_idx == __null) ? 0 : __kmp_debug_assert("__kmp_pu_os_idx == __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2514));

2515

KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc)((num_avail == __kmp_avail_proc) ? 0 : __kmp_debug_assert("num_avail == __kmp_avail_proc"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2515));

2516

__kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc)___kmp_allocate((sizeof(int) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2516);

2517

for (i = 0; i < num_avail; ++i) { // fill the os indices

2518

__kmp_pu_os_idx[i] = threadInfo[i][osIdIndex0];

2519

}

2520

2521

if (__kmp_affinity_type == affinity_none) {

2522

__kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2522);

2523

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2523);

2524

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2524);

2525

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2525);

2526

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2526); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2526);;

2527

return 0;

2528

}

2529

2530

// Count the number of levels which have more nodes at that level than at the

2531

// parent's level (with there being an implicit root node of the top level).

2532

// This is equivalent to saying that there is at least one node at this level

2533

// which has a sibling. These levels are in the map, and the package level is

2534

// always in the map.

2535

bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool))___kmp_allocate(((maxIndex + 1) * sizeof(bool)), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2535);

2536

int level = 0;

2537

for (index = threadIdIndex1; index < maxIndex; index++) {

2538

KMP_ASSERT(totals[index] >= totals[index + 1])((totals[index] >= totals[index + 1]) ? 0 : __kmp_debug_assert
("totals[index] >= totals[index + 1]", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2538));

2539

inMap[index] = (totals[index] > totals[index + 1]);

2540

}

2541

inMap[maxIndex] = (totals[maxIndex] > 1);

2542

inMap[pkgIdIndex3] = true;

2543

2544

int depth = 0;

2545

for (index = threadIdIndex1; index <= maxIndex; index++) {

2546

if (inMap[index]) {

2547

depth++;

2548

}

2549

}

2550

KMP_ASSERT(depth > 0)((depth > 0) ? 0 : __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2550));

2551

2552

// Construct the data structure that is to be returned.

2553

*address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail)___kmp_allocate((sizeof(AddrUnsPair) * num_avail), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2553);

2554

int pkgLevel = -1;

2555

int coreLevel = -1;

2556

int threadLevel = -1;

2557

2558

for (i = 0; i < num_avail; ++i) {

2559

Address addr(depth);

2560

unsigned os = threadInfo[i][osIdIndex0];

2561

int src_index;

2562

int dst_index = 0;

2563

2564

for (src_index = maxIndex; src_index >= threadIdIndex1; src_index--) {

2565

if (!inMap[src_index]) {

2566

continue;

2567

}

2568

addr.labels[dst_index] = threadInfo[i][src_index];

2569

if (src_index == pkgIdIndex3) {

2570

pkgLevel = dst_index;

2571

} else if (src_index == coreIdIndex2) {

2572

coreLevel = dst_index;

2573

} else if (src_index == threadIdIndex1) {

2574

threadLevel = dst_index;

2575

}

2576

dst_index++;

2577

}

2578

(*address2os)[i] = AddrUnsPair(addr, os);

2579

}

2580

2581

if (__kmp_affinity_gran_levels < 0) {

2582

// Set the granularity level based on what levels are modeled

2583

// in the machine topology map.

2584

unsigned src_index;

2585

__kmp_affinity_gran_levels = 0;

2586

for (src_index = threadIdIndex1; src_index <= maxIndex; src_index++) {

2587

if (!inMap[src_index]) {

2588

continue;

2589

}

2590

switch (src_index) {

2591

case threadIdIndex1:

2592

if (__kmp_affinity_gran > affinity_gran_thread) {

2593

__kmp_affinity_gran_levels++;

2594

}

2595

2596

break;

2597

case coreIdIndex2:

2598

if (__kmp_affinity_gran > affinity_gran_core) {

2599

__kmp_affinity_gran_levels++;

2600

}

2601

break;

2602

2603

case pkgIdIndex3:

2604

if (__kmp_affinity_gran > affinity_gran_package) {

2605

__kmp_affinity_gran_levels++;

2606

}

2607

break;

2608

}

2609

}

2610

}

2611

2612

if (__kmp_affinity_verbose) {

2613

__kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,

2614

coreLevel, threadLevel);

2615

}

2616

2617

__kmp_free(inMap)___kmp_free((inMap), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2617);

2618

__kmp_free(lastId)___kmp_free((lastId), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2618);

2619

__kmp_free(totals)___kmp_free((totals), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2619);

2620

__kmp_free(maxCt)___kmp_free((maxCt), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2620);

2621

__kmp_free(counts)___kmp_free((counts), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2621);

2622

CLEANUP_THREAD_INFOfor (i = 0; i <= num_records; i++) { ___kmp_free((threadInfo
[i]), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2622); } ___kmp_free((threadInfo), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2622);;

2623

return depth;

2624

}

2625

2626

// Create and return a table of affinity masks, indexed by OS thread ID.

2627

// This routine handles OR'ing together all the affinity masks of threads

2628

// that are sufficiently close, if granularity > fine.

2629

static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,

2630

unsigned *numUnique,

2631

AddrUnsPair *address2os,

2632

unsigned numAddrs) {

2633

// First form a table of affinity masks in order of OS thread id.

2634

unsigned depth;

2635

unsigned maxOsId;

2636

unsigned i;

2637

2638

KMP_ASSERT(numAddrs > 0)((numAddrs > 0) ? 0 : __kmp_debug_assert("numAddrs > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2638));

2639

depth = address2os[0].first.depth;

2640

2641

maxOsId = 0;

2642

for (i = numAddrs - 1;; --i) {

2643

unsigned osId = address2os[i].second;

2644

if (osId > maxOsId) {

2645

maxOsId = osId;

2646

}

2647

if (i == 0)

2648

break;

2649

}

2650

kmp_affin_mask_t *osId2Mask;

2651

KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1))(osId2Mask = __kmp_affinity_dispatch->allocate_mask_array(
(maxOsId + 1)));

2652

2653

// Sort the address2os table according to physical order. Doing so will put

2654

// all threads on the same core/package/node in consecutive locations.

2655

qsort(address2os, numAddrs, sizeof(*address2os),

2656

__kmp_affinity_cmp_Address_labels);

2657

2658

KMP_ASSERT(__kmp_affinity_gran_levels >= 0)((__kmp_affinity_gran_levels >= 0) ? 0 : __kmp_debug_assert
("__kmp_affinity_gran_levels >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2658));

2659

if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {

2660

KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_ThreadsMigrate
, "KMP_AFFINITY", __kmp_affinity_gran_levels), __kmp_msg_null
);

2661

}

2662

if (__kmp_affinity_gran_levels >= (int)depth) {

2663

if (__kmp_affinity_verbose ||

2664

(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {

2665

KMP_WARNING(AffThreadsMayMigrate)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffThreadsMayMigrate
), __kmp_msg_null);

2666

}

2667

}

2668

2669

// Run through the table, forming the masks for all threads on each core.

2670

// Threads on the same core will have identical "Address" objects, not

2671

// considering the last level, which must be the thread id. All threads on a

2672

// core will appear consecutively.

2673

unsigned unique = 0;

2674

unsigned j = 0; // index of 1st thread on core

2675

unsigned leader = 0;

2676

Address *leaderAddr = &(address2os[0].first);

2677

kmp_affin_mask_t *sum;

2678

KMP_CPU_ALLOC_ON_STACK(sum)(sum = __kmp_affinity_dispatch->allocate_mask());

2679

KMP_CPU_ZERO(sum)(sum)->zero();

2680

KMP_CPU_SET(address2os[0].second, sum)(sum)->set(address2os[0].second);

2681

for (i = 1; i < numAddrs; i++) {

2682

// If this thread is sufficiently close to the leader (within the

2683

// granularity setting), then set the bit for this os thread in the

2684

// affinity mask for this group, and go on to the next thread.

2685

if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {

2686

KMP_CPU_SET(address2os[i].second, sum)(sum)->set(address2os[i].second);

2687

continue;

2688

}

2689

2690

// For every thread in this group, copy the mask to the thread's entry in

2691

// the osId2Mask table. Mark the first address as a leader.

2692

for (; j < i; j++) {

2693

unsigned osId = address2os[j].second;

2694

KMP_DEBUG_ASSERT(osId <= maxOsId)((osId <= maxOsId) ? 0 : __kmp_debug_assert("osId <= maxOsId"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2694));

2695

kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId);

2696

KMP_CPU_COPY(mask, sum)(mask)->copy(sum);

2697

address2os[j].first.leader = (j == leader);

2698

}

2699

unique++;

2700

2701

// Start a new mask.

2702

leader = i;

2703

leaderAddr = &(address2os[i].first);

2704

KMP_CPU_ZERO(sum)(sum)->zero();

2705

KMP_CPU_SET(address2os[i].second, sum)(sum)->set(address2os[i].second);

2706

}

2707

2708

// For every thread in last group, copy the mask to the thread's

2709

// entry in the osId2Mask table.

2710

for (; j < i; j++) {

2711

unsigned osId = address2os[j].second;

2712

KMP_DEBUG_ASSERT(osId <= maxOsId)((osId <= maxOsId) ? 0 : __kmp_debug_assert("osId <= maxOsId"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2712));

2713

kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId);

2714

KMP_CPU_COPY(mask, sum)(mask)->copy(sum);

2715

address2os[j].first.leader = (j == leader);

2716

}

2717

unique++;

2718

KMP_CPU_FREE_FROM_STACK(sum)__kmp_affinity_dispatch->deallocate_mask(sum);

2719

2720

*maxIndex = maxOsId;

2721

*numUnique = unique;

2722

return osId2Mask;

2723

}

2724

2725

// Stuff for the affinity proclist parsers. It's easier to declare these vars

2726

// as file-static than to try and pass them through the calling sequence of

2727

// the recursive-descent OMP_PLACES parser.

2728

static kmp_affin_mask_t *newMasks;

2729

static int numNewMasks;

2730

static int nextNewMask;

2731

2732

#define ADD_MASK(_mask) \

2733

{ \

2734

if (nextNewMask >= numNewMasks) { \

2735

int i; \

2736

numNewMasks *= 2; \

2737

kmp_affin_mask_t *temp; \

2738

KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks)(temp = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks
)); \

2739

for (i = 0; i < numNewMasks / 2; i++) { \

2740

kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i); \

2741

kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i)__kmp_affinity_dispatch->index_mask_array(temp, i); \

2742

KMP_CPU_COPY(dest, src)(dest)->copy(src); \

2743

} \

2744

KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2)__kmp_affinity_dispatch->deallocate_mask_array(newMasks); \

2745

newMasks = temp; \

2746

} \

2747

KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask))(__kmp_affinity_dispatch->index_mask_array(newMasks, nextNewMask
))->copy((_mask)); \

2748

nextNewMask++; \

2749

}

2750

2751

#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \

2752

{ \

2753

if (((_osId) > _maxOsId) || \

2754

(!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId)))(__kmp_affinity_dispatch->index_mask_array((_osId2Mask), (
_osId)))->is_set((_osId)))) { \

2755

if (__kmp_affinity_verbose || \

2756

(__kmp_affinity_warnings && \

2757

(__kmp_affinity_type != affinity_none))) { \

2758

KMP_WARNING(AffIgnoreInvalidProcID, _osId)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, _osId), __kmp_msg_null); \

2759

} \

2760

} else { \

2761

ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))__kmp_affinity_dispatch->index_mask_array(_osId2Mask, (_osId
))); \

2762

} \

2763

}

2764

2765

// Re-parse the proclist (for the explicit affinity type), and form the list

2766

// of affinity newMasks indexed by gtid.

2767

static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,

2768

unsigned int *out_numMasks,

2769

const char *proclist,

2770

kmp_affin_mask_t *osId2Mask,

2771

int maxOsId) {

2772

int i;

2773

const char *scan = proclist;

2774

const char *next = proclist;

2775

2776

// We use malloc() for the temporary mask vector, so that we can use

2777

// realloc() to extend it.

2778

numNewMasks = 2;

2779

KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks
));

2780

nextNewMask = 0;

2781

kmp_affin_mask_t *sumMask;

2782

KMP_CPU_ALLOC(sumMask)(sumMask = __kmp_affinity_dispatch->allocate_mask());

2783

int setSize = 0;

2784

2785

for (;;) {

2786

int start, end, stride;

2787

2788

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

2789

next = scan;

2790

if (*next == '\0') {

2791

break;

2792

}

2793

2794

if (*next == '{') {

2795

int num;

2796

setSize = 0;

2797

next++; // skip '{'

2798

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2799

scan = next;

2800

2801

// Read the first integer in the set.

2802

KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist")(((*next >= '0') && (*next <= '9')) ? 0 : __kmp_debug_assert
(("bad proclist"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2802));

2803

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

2804

num = __kmp_str_to_int(scan, *next);

2805

KMP_ASSERT2(num >= 0, "bad explicit proc list")((num >= 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2805));

2806

2807

// Copy the mask for that osId to the sum (union) mask.

2808

if ((num > maxOsId) ||

2809

(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num)
)->is_set(num))) {

2810

if (__kmp_affinity_verbose ||

2811

(__kmp_affinity_warnings &&

2812

(__kmp_affinity_type != affinity_none))) {

2813

KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, num), __kmp_msg_null);

2814

}

2815

KMP_CPU_ZERO(sumMask)(sumMask)->zero();

2816

} else {

2817

KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->copy(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, num));

2818

setSize = 1;

2819

}

2820

2821

for (;;) {

2822

// Check for end of set.

2823

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2824

if (*next == '}') {

2825

next++; // skip '}'

2826

break;

2827

}

2828

2829

// Skip optional comma.

2830

if (*next == ',') {

2831

next++;

2832

}

2833

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2834

2835

// Read the next integer in the set.

2836

scan = next;

2837

KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")(((*next >= '0') && (*next <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit proc list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2837));

2838

2839

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

2840

num = __kmp_str_to_int(scan, *next);

2841

KMP_ASSERT2(num >= 0, "bad explicit proc list")((num >= 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2841));

2842

2843

// Add the mask for that osId to the sum mask.

2844

if ((num > maxOsId) ||

2845

(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num)
)->is_set(num))) {

2846

if (__kmp_affinity_verbose ||

2847

(__kmp_affinity_warnings &&

2848

(__kmp_affinity_type != affinity_none))) {

2849

KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, num), __kmp_msg_null);

2850

}

2851

} else {

2852

KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num))(sumMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, num));

2853

setSize++;

2854

}

2855

}

2856

if (setSize > 0) {

2857

ADD_MASK(sumMask);

2858

}

2859

2860

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2861

if (*next == ',') {

2862

next++;

2863

}

2864

scan = next;

2865

continue;

2866

}

2867

2868

// Read the first integer.

2869

KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")(((*next >= '0') && (*next <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit proc list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2869));

2870

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

2871

start = __kmp_str_to_int(scan, *next);

2872

KMP_ASSERT2(start >= 0, "bad explicit proc list")((start >= 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2872));

2873

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2874

2875

// If this isn't a range, then add a mask to the list and go on.

2876

if (*next != '-') {

2877

ADD_MASK_OSID(start, osId2Mask, maxOsId);

2878

2879

// Skip optional comma.

2880

if (*next == ',') {

2881

next++;

2882

}

2883

scan = next;

2884

continue;

2885

}

2886

2887

// This is a range. Skip over the '-' and read in the 2nd int.

2888

next++; // skip '-'

2889

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2890

scan = next;

2891

KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")(((*next >= '0') && (*next <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit proc list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2891));

2892

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

2893

end = __kmp_str_to_int(scan, *next);

2894

KMP_ASSERT2(end >= 0, "bad explicit proc list")((end >= 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2894));

2895

2896

// Check for a stride parameter

2897

stride = 1;

2898

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2899

if (*next == ':') {

2900

// A stride is specified. Skip over the ':" and read the 3rd int.

2901

int sign = +1;

2902

next++; // skip ':'

2903

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2904

scan = next;

2905

if (*next == '-') {

2906

sign = -1;

2907

next++;

2908

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2909

scan = next;

2910

}

2911

KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list")(((*next >= '0') && (*next <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit proc list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2911));

2912

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

2913

stride = __kmp_str_to_int(scan, *next);

2914

KMP_ASSERT2(stride >= 0, "bad explicit proc list")((stride >= 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2914));

2915

stride *= sign;

2916

}

2917

2918

// Do some range checks.

2919

KMP_ASSERT2(stride != 0, "bad explicit proc list")((stride != 0) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2919));

2920

if (stride > 0) {

2921

KMP_ASSERT2(start <= end, "bad explicit proc list")((start <= end) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2921));

2922

} else {

2923

KMP_ASSERT2(start >= end, "bad explicit proc list")((start >= end) ? 0 : __kmp_debug_assert(("bad explicit proc list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2923));

2924

}

2925

KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list")(((end - start) / stride <= 65536) ? 0 : __kmp_debug_assert
(("bad explicit proc list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2925));

2926

2927

// Add the mask for each OS proc # to the list.

2928

if (stride > 0) {

2929

do {

2930

ADD_MASK_OSID(start, osId2Mask, maxOsId);

2931

start += stride;

2932

} while (start <= end);

2933

} else {

2934

do {

2935

ADD_MASK_OSID(start, osId2Mask, maxOsId);

2936

start += stride;

2937

} while (start >= end);

2938

}

2939

2940

// Skip optional comma.

2941

SKIP_WS(next){ while (*(next) == ' ' || *(next) == '\t') (next)++; };

2942

if (*next == ',') {

2943

next++;

2944

}

2945

scan = next;

2946

}

2947

2948

*out_numMasks = nextNewMask;

2949

if (nextNewMask == 0) {

2950

*out_masks = NULL__null;

2951

KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);

2952

return;

2953

}

2954

KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask)((*out_masks) = __kmp_affinity_dispatch->allocate_mask_array
(nextNewMask));

2955

for (i = 0; i < nextNewMask; i++) {

2956

kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i);

2957

kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i);

2958

KMP_CPU_COPY(dest, src)(dest)->copy(src);

2959

}

2960

KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);

2961

KMP_CPU_FREE(sumMask)__kmp_affinity_dispatch->deallocate_mask(sumMask);

2962

}

2963

2964

#if OMP_40_ENABLED(50 >= 40)

2965

2966

/*-----------------------------------------------------------------------------

2967

Re-parse the OMP_PLACES proc id list, forming the newMasks for the different

2968

places. Again, Here is the grammar:

2969

2970

place_list := place

2971

place_list := place , place_list

2972

place := num

2973

place := place : num

2974

place := place : num : signed

2975

place := { subplacelist }

2976

place := ! place // (lowest priority)

2977

subplace_list := subplace

2978

subplace_list := subplace , subplace_list

2979

subplace := num

2980

subplace := num : num

2981

subplace := num : num : signed

2982

signed := num

2983

signed := + signed

2984

signed := - signed

2985

-----------------------------------------------------------------------------*/

2986

2987

static void __kmp_process_subplace_list(const char **scan,

2988

kmp_affin_mask_t *osId2Mask,

2989

int maxOsId, kmp_affin_mask_t *tempMask,

2990

int *setSize) {

2991

const char *next;

2992

2993

for (;;) {

2994

int start, count, stride, i;

2995

2996

// Read in the starting proc id

2997

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

2998

KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")(((**scan >= '0') && (**scan <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 2998));

2999

next = *scan;

3000

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3001

start = __kmp_str_to_int(*scan, *next);

3002

KMP_ASSERT(start >= 0)((start >= 0) ? 0 : __kmp_debug_assert("start >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3002));

3003

*scan = next;

3004

3005

// valid follow sets are ',' ':' and '}'

3006

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3007

if (**scan == '}' || **scan == ',') {

3008

if ((start > maxOsId) ||

3009

(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start
))->is_set(start))) {

3010

if (__kmp_affinity_verbose ||

3011

(__kmp_affinity_warnings &&

3012

(__kmp_affinity_type != affinity_none))) {

3013

KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, start), __kmp_msg_null);

3014

}

3015

} else {

3016

KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, start));

3017

(*setSize)++;

3018

}

3019

if (**scan == '}') {

3020

break;

3021

}

3022

(*scan)++; // skip ','

3023

continue;

3024

}

3025

KMP_ASSERT2(**scan == ':', "bad explicit places list")((**scan == ':') ? 0 : __kmp_debug_assert(("bad explicit places list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3025));

3026

(*scan)++; // skip ':'

3027

3028

// Read count parameter

3029

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3030

KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")(((**scan >= '0') && (**scan <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3030));

3031

next = *scan;

3032

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3033

count = __kmp_str_to_int(*scan, *next);

3034

KMP_ASSERT(count >= 0)((count >= 0) ? 0 : __kmp_debug_assert("count >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3034));

3035

*scan = next;

3036

3037

// valid follow sets are ',' ':' and '}'

3038

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3039

if (**scan == '}' || **scan == ',') {

3040

for (i = 0; i < count; i++) {

3041

if ((start > maxOsId) ||

3042

(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start
))->is_set(start))) {

3043

if (__kmp_affinity_verbose ||

3044

(__kmp_affinity_warnings &&

3045

(__kmp_affinity_type != affinity_none))) {

3046

KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, start), __kmp_msg_null);

3047

}

3048

break; // don't proliferate warnings for large count

3049

} else {

3050

KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, start));

3051

start++;

3052

(*setSize)++;

3053

}

3054

}

3055

if (**scan == '}') {

3056

break;

3057

}

3058

(*scan)++; // skip ','

3059

continue;

3060

}

3061

KMP_ASSERT2(**scan == ':', "bad explicit places list")((**scan == ':') ? 0 : __kmp_debug_assert(("bad explicit places list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3061));

3062

(*scan)++; // skip ':'

3063

3064

// Read stride parameter

3065

int sign = +1;

3066

for (;;) {

3067

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3068

if (**scan == '+') {

3069

(*scan)++; // skip '+'

3070

continue;

3071

}

3072

if (**scan == '-') {

3073

sign *= -1;

3074

(*scan)++; // skip '-'

3075

continue;

3076

}

3077

break;

3078

}

3079

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3080

KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list")(((**scan >= '0') && (**scan <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3080));

3081

next = *scan;

3082

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3083

stride = __kmp_str_to_int(*scan, *next);

3084

KMP_ASSERT(stride >= 0)((stride >= 0) ? 0 : __kmp_debug_assert("stride >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3084));

3085

*scan = next;

3086

stride *= sign;

3087

3088

// valid follow sets are ',' and '}'

3089

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3090

if (**scan == '}' || **scan == ',') {

3091

for (i = 0; i < count; i++) {

3092

if ((start > maxOsId) ||

3093

(!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, start
))->is_set(start))) {

3094

if (__kmp_affinity_verbose ||

3095

(__kmp_affinity_warnings &&

3096

(__kmp_affinity_type != affinity_none))) {

3097

KMP_WARNING(AffIgnoreInvalidProcID, start)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, start), __kmp_msg_null);

3098

}

3099

break; // don't proliferate warnings for large count

3100

} else {

3101

KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, start));

3102

start += stride;

3103

(*setSize)++;

3104

}

3105

}

3106

if (**scan == '}') {

3107

break;

3108

}

3109

(*scan)++; // skip ','

3110

continue;

3111

}

3112

3113

KMP_ASSERT2(0, "bad explicit places list")((0) ? 0 : __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3113));

3114

}

3115

}

3116

3117

static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,

3118

int maxOsId, kmp_affin_mask_t *tempMask,

3119

int *setSize) {

3120

const char *next;

3121

3122

// valid follow sets are '{' '!' and num

3123

SKIP_WS(*scan){ while (*(*scan) == ' ' || *(*scan) == '\t') (*scan)++; };

3124

if (**scan == '{') {

3125

(*scan)++; // skip '{'

3126

__kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);

3127

KMP_ASSERT2(**scan == '}', "bad explicit places list")((**scan == '}') ? 0 : __kmp_debug_assert(("bad explicit places list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3127));

3128

(*scan)++; // skip '}'

3129

} else if (**scan == '!') {

3130

(*scan)++; // skip '!'

3131

__kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);

3132

KMP_CPU_COMPLEMENT(maxOsId, tempMask)(tempMask)->bitwise_not();

3133

} else if ((**scan >= '0') && (**scan <= '9')) {

3134

next = *scan;

3135

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3136

int num = __kmp_str_to_int(*scan, *next);

3137

KMP_ASSERT(num >= 0)((num >= 0) ? 0 : __kmp_debug_assert("num >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3137));

3138

if ((num > maxOsId) ||

3139

(!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, num)
)->is_set(num))) {

3140

if (__kmp_affinity_verbose ||

3141

(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {

3142

KMP_WARNING(AffIgnoreInvalidProcID, num)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, num), __kmp_msg_null);

3143

}

3144

} else {

3145

KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num))(tempMask)->bitwise_or(__kmp_affinity_dispatch->index_mask_array
(osId2Mask, num));

3146

(*setSize)++;

3147

}

3148

*scan = next; // skip num

3149

} else {

3150

KMP_ASSERT2(0, "bad explicit places list")((0) ? 0 : __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3150));

3151

}

3152

}

3153

3154

// static void

3155

void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,

3156

unsigned int *out_numMasks,

3157

const char *placelist,

3158

kmp_affin_mask_t *osId2Mask,

3159

int maxOsId) {

3160

int i, j, count, stride, sign;

3161

const char *scan = placelist;

3162

const char *next = placelist;

3163

3164

numNewMasks = 2;

3165

KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks)(newMasks = __kmp_affinity_dispatch->allocate_mask_array(numNewMasks
));

3166

nextNewMask = 0;

3167

3168

// tempMask is modified based on the previous or initial

3169

// place to form the current place

3170

// previousMask contains the previous place

3171

kmp_affin_mask_t *tempMask;

3172

kmp_affin_mask_t *previousMask;

3173

KMP_CPU_ALLOC(tempMask)(tempMask = __kmp_affinity_dispatch->allocate_mask());

3174

KMP_CPU_ZERO(tempMask)(tempMask)->zero();

3175

KMP_CPU_ALLOC(previousMask)(previousMask = __kmp_affinity_dispatch->allocate_mask());

3176

KMP_CPU_ZERO(previousMask)(previousMask)->zero();

3177

int setSize = 0;

3178

3179

for (;;) {

3180

__kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);

3181

3182

// valid follow sets are ',' ':' and EOL

3183

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3184

if (*scan == '\0' || *scan == ',') {

3185

if (setSize > 0) {

3186

ADD_MASK(tempMask);

3187

}

3188

KMP_CPU_ZERO(tempMask)(tempMask)->zero();

3189

setSize = 0;

3190

if (*scan == '\0') {

3191

break;

3192

}

3193

scan++; // skip ','

3194

continue;

3195

}

3196

3197

KMP_ASSERT2(*scan == ':', "bad explicit places list")((*scan == ':') ? 0 : __kmp_debug_assert(("bad explicit places list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3197));

3198

scan++; // skip ':'

3199

3200

// Read count parameter

3201

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3202

KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list")(((*scan >= '0') && (*scan <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3202));

3203

next = scan;

3204

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3205

count = __kmp_str_to_int(scan, *next);

3206

KMP_ASSERT(count >= 0)((count >= 0) ? 0 : __kmp_debug_assert("count >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3206));

3207

scan = next;

3208

3209

// valid follow sets are ',' ':' and EOL

3210

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3211

if (*scan == '\0' || *scan == ',') {

3212

stride = +1;

3213

} else {

3214

KMP_ASSERT2(*scan == ':', "bad explicit places list")((*scan == ':') ? 0 : __kmp_debug_assert(("bad explicit places list"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3214));

3215

scan++; // skip ':'

3216

3217

// Read stride parameter

3218

sign = +1;

3219

for (;;) {

3220

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3221

if (*scan == '+') {

3222

scan++; // skip '+'

3223

continue;

3224

}

3225

if (*scan == '-') {

3226

sign *= -1;

3227

scan++; // skip '-'

3228

continue;

3229

}

3230

break;

3231

}

3232

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3233

KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list")(((*scan >= '0') && (*scan <= '9')) ? 0 : __kmp_debug_assert
(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3233));

3234

next = scan;

3235

SKIP_DIGITS(next){ while (*(next) >= '0' && *(next) <= '9') (next
)++; };

3236

stride = __kmp_str_to_int(scan, *next);

3237

KMP_DEBUG_ASSERT(stride >= 0)((stride >= 0) ? 0 : __kmp_debug_assert("stride >= 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3237));

3238

scan = next;

3239

stride *= sign;

3240

}

3241

3242

// Add places determined by initial_place : count : stride

3243

for (i = 0; i < count; i++) {

3244

if (setSize == 0) {

3245

break;

3246

}

3247

// Add the current place, then build the next place (tempMask) from that

3248

KMP_CPU_COPY(previousMask, tempMask)(previousMask)->copy(tempMask);

3249

ADD_MASK(previousMask);

3250

KMP_CPU_ZERO(tempMask)(tempMask)->zero();

3251

setSize = 0;

3252

KMP_CPU_SET_ITERATE(j, previousMask)for (j = (previousMask)->begin(); j != (previousMask)->
end(); j = (previousMask)->next(j)) {

3253

if (!KMP_CPU_ISSET(j, previousMask)(previousMask)->is_set(j)) {

3254

continue;

3255

}

3256

if ((j + stride > maxOsId) || (j + stride < 0) ||

3257

(!KMP_CPU_ISSET(j, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(j)) ||

3258

(!KMP_CPU_ISSET(j + stride,(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride
))->is_set(j + stride)

3259

KMP_CPU_INDEX(osId2Mask, j + stride))(__kmp_affinity_dispatch->index_mask_array(osId2Mask, j + stride
))->is_set(j + stride))) {

3260

if ((__kmp_affinity_verbose ||

3261

(__kmp_affinity_warnings &&

3262

(__kmp_affinity_type != affinity_none))) &&

3263

i < count - 1) {

3264

KMP_WARNING(AffIgnoreInvalidProcID, j + stride)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffIgnoreInvalidProcID
, j + stride), __kmp_msg_null);

3265

}

3266

continue;

3267

}

3268

KMP_CPU_SET(j + stride, tempMask)(tempMask)->set(j + stride);

3269

setSize++;

3270

}

3271

}

3272

KMP_CPU_ZERO(tempMask)(tempMask)->zero();

3273

setSize = 0;

3274

3275

// valid follow sets are ',' and EOL

3276

SKIP_WS(scan){ while (*(scan) == ' ' || *(scan) == '\t') (scan)++; };

3277

if (*scan == '\0') {

3278

break;

3279

}

3280

if (*scan == ',') {

3281

scan++; // skip ','

3282

continue;

3283

}

3284

3285

KMP_ASSERT2(0, "bad explicit places list")((0) ? 0 : __kmp_debug_assert(("bad explicit places list"), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3285));

3286

}

3287

3288

*out_numMasks = nextNewMask;

3289

if (nextNewMask == 0) {

3290

*out_masks = NULL__null;

3291

KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);

3292

return;

3293

}

3294

KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask)((*out_masks) = __kmp_affinity_dispatch->allocate_mask_array
(nextNewMask));

3295

KMP_CPU_FREE(tempMask)__kmp_affinity_dispatch->deallocate_mask(tempMask);

3296

KMP_CPU_FREE(previousMask)__kmp_affinity_dispatch->deallocate_mask(previousMask);

3297

for (i = 0; i < nextNewMask; i++) {

3298

kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i)__kmp_affinity_dispatch->index_mask_array(newMasks, i);

3299

kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i)__kmp_affinity_dispatch->index_mask_array((*out_masks), i);

3300

KMP_CPU_COPY(dest, src)(dest)->copy(src);

3301

}

3302

KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks)__kmp_affinity_dispatch->deallocate_mask_array(newMasks);

3303

}

3304

3305

#endif /* OMP_40_ENABLED */

3306

3307

#undef ADD_MASK

3308

#undef ADD_MASK_OSID

3309

3310

#if KMP_USE_HWLOC0

3311

static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {

3312

// skip PUs descendants of the object o

3313

int skipped = 0;

3314

hwloc_obj_t hT = NULL__null;

3315

int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);

3316

for (int i = 0; i < N; ++i) {

3317

KMP_DEBUG_ASSERT(hT)((hT) ? 0 : __kmp_debug_assert("hT", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3317));

3318

unsigned idx = hT->os_index;

3319

if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) {

3320

KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx);

3321

KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n"
, idx); };

3322

++skipped;

3323

}

3324

hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);

3325

}

3326

return skipped; // count number of skipped units

3327

}

3328

3329

static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {

3330

// check if obj has PUs present in fullMask

3331

hwloc_obj_t hT = NULL__null;

3332

int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);

3333

for (int i = 0; i < N; ++i) {

3334

KMP_DEBUG_ASSERT(hT)((hT) ? 0 : __kmp_debug_assert("hT", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3334));

3335

unsigned idx = hT->os_index;

3336

if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx))

3337

return 1; // found PU

3338

hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);

3339

}

3340

return 0; // no PUs found

3341

}

3342

#endif // KMP_USE_HWLOC

3343

3344

static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {

3345

AddrUnsPair *newAddr;

3346

if (__kmp_hws_requested == 0)

3347

goto _exit; // no topology limiting actions requested, exit

3348

#if KMP_USE_HWLOC0

3349

if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {

3350

// Number of subobjects calculated dynamically, this works fine for

3351

// any non-uniform topology.

3352

// L2 cache objects are determined by depth, other objects - by type.

3353

hwloc_topology_t tp = __kmp_hwloc_topology;

3354

int nS = 0, nN = 0, nL = 0, nC = 0,

3355

nT = 0; // logical index including skipped

3356

int nCr = 0, nTr = 0; // number of requested units

3357

int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0; // counters

3358

hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)

3359

int L2depth, idx;

3360

3361

// check support of extensions ----------------------------------

3362

int numa_support = 0, tile_support = 0;

3363

if (__kmp_pu_os_idx)

3364

hT = hwloc_get_pu_obj_by_os_index(tp,

3365

__kmp_pu_os_idx[__kmp_avail_proc - 1]);

3366

else

3367

hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);

3368

if (hT == NULL__null) { // something's gone wrong

3369

KMP_WARNING(AffHWSubsetUnsupported)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetUnsupported
), __kmp_msg_null);

3370

goto _exit;

3371

}

3372

// check NUMA node

3373

hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);

3374

hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);

3375

if (hN != NULL__null && hN->depth > hS->depth) {

3376

numa_support = 1; // 1 in case socket includes node(s)

3377

} else if (__kmp_hws_node.num > 0) {

3378

// don't support sockets inside NUMA node (no such HW found for testing)

3379

KMP_WARNING(AffHWSubsetUnsupported)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetUnsupported
), __kmp_msg_null);

3380

goto _exit;

3381

}

3382

// check L2 cahce, get object by depth because of multiple caches

3383

L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);

3384

hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);

3385

if (hL != NULL__null &&

3386

__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {

3387

tile_support = 1; // no sense to count L2 if it includes single core

3388

} else if (__kmp_hws_tile.num > 0) {

3389

if (__kmp_hws_core.num == 0) {

3390

__kmp_hws_core = __kmp_hws_tile; // replace L2 with core

3391

__kmp_hws_tile.num = 0;

3392

} else {

3393

// L2 and core are both requested, but represent same object

3394

KMP_WARNING(AffHWSubsetInvalid)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetInvalid
), __kmp_msg_null);

3395

goto _exit;

3396

}

3397

}

3398

// end of check of extensions -----------------------------------

3399

3400

// fill in unset items, validate settings -----------------------

3401

if (__kmp_hws_socket.num == 0)

3402

__kmp_hws_socket.num = nPackages; // use all available sockets

3403

if (__kmp_hws_socket.offset >= nPackages) {

3404

KMP_WARNING(AffHWSubsetManySockets)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManySockets
), __kmp_msg_null);

3405

goto _exit;

3406

}

3407

if (numa_support) {

3408

hN = NULL__null;

3409

int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,

3410

&hN); // num nodes in socket

3411

if (__kmp_hws_node.num == 0)

3412

__kmp_hws_node.num = NN; // use all available nodes

3413

if (__kmp_hws_node.offset >= NN) {

3414

KMP_WARNING(AffHWSubsetManyNodes)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyNodes
), __kmp_msg_null);

3415

goto _exit;

3416

}

3417

if (tile_support) {

3418

// get num tiles in node

3419

int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);

3420

if (__kmp_hws_tile.num == 0) {

3421

__kmp_hws_tile.num = NL + 1;

3422

} // use all available tiles, some node may have more tiles, thus +1

3423

if (__kmp_hws_tile.offset >= NL) {

3424

KMP_WARNING(AffHWSubsetManyTiles)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyTiles
), __kmp_msg_null);

3425

goto _exit;

3426

}

3427

int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,

3428

&hC); // num cores in tile

3429

if (__kmp_hws_core.num == 0)

3430

__kmp_hws_core.num = NC; // use all available cores

3431

if (__kmp_hws_core.offset >= NC) {

3432

KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores
), __kmp_msg_null);

3433

goto _exit;

3434

}

3435

} else { // tile_support

3436

int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,

3437

&hC); // num cores in node

3438

if (__kmp_hws_core.num == 0)

3439

__kmp_hws_core.num = NC; // use all available cores

3440

if (__kmp_hws_core.offset >= NC) {

3441

KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores
), __kmp_msg_null);

3442

goto _exit;

3443

}

3444

} // tile_support

3445

} else { // numa_support

3446

if (tile_support) {

3447

// get num tiles in socket

3448

int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);

3449

if (__kmp_hws_tile.num == 0)

3450

__kmp_hws_tile.num = NL; // use all available tiles

3451

if (__kmp_hws_tile.offset >= NL) {

3452

KMP_WARNING(AffHWSubsetManyTiles)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyTiles
), __kmp_msg_null);

3453

goto _exit;

3454

}

3455

int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,

3456

&hC); // num cores in tile

3457

if (__kmp_hws_core.num == 0)

3458

__kmp_hws_core.num = NC; // use all available cores

3459

if (__kmp_hws_core.offset >= NC) {

3460

KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores
), __kmp_msg_null);

3461

goto _exit;

3462

}

3463

} else { // tile_support

3464

int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,

3465

&hC); // num cores in socket

3466

if (__kmp_hws_core.num == 0)

3467

__kmp_hws_core.num = NC; // use all available cores

3468

if (__kmp_hws_core.offset >= NC) {

3469

KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores
), __kmp_msg_null);

3470

goto _exit;

3471

}

3472

} // tile_support

3473

}

3474

if (__kmp_hws_proc.num == 0)

3475

__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs

3476

if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {

3477

KMP_WARNING(AffHWSubsetManyProcs)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyProcs
), __kmp_msg_null);

3478

goto _exit;

3479

}

3480

// end of validation --------------------------------------------

3481

3482

if (pAddr) // pAddr is NULL in case of affinity_none

3483

newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3484)

3484

__kmp_avail_proc)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_avail_proc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3484); // max size

3485

// main loop to form HW subset ----------------------------------

3486

hS = NULL__null;

3487

int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);

3488

for (int s = 0; s < NP; ++s) {

3489

// Check Socket -----------------------------------------------

3490

hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);

3491

if (!__kmp_hwloc_obj_has_PUs(tp, hS))

3492

continue; // skip socket if all PUs are out of fullMask

3493

++nS; // only count objects those have PUs in affinity mask

3494

if (nS <= __kmp_hws_socket.offset ||

3495

nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {

3496

n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket

3497

continue; // move to next socket

3498

}

3499

nCr = 0; // count number of cores per socket

3500

// socket requested, go down the topology tree

3501

// check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)

3502

if (numa_support) {

3503

nN = 0;

3504

hN = NULL__null;

3505

// num nodes in current socket

3506

int NN =

3507

__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);

3508

for (int n = 0; n < NN; ++n) {

3509

// Check NUMA Node ----------------------------------------

3510

if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {

3511

hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);

3512

continue; // skip node if all PUs are out of fullMask

3513

}

3514

++nN;

3515

if (nN <= __kmp_hws_node.offset ||

3516

nN > __kmp_hws_node.num + __kmp_hws_node.offset) {

3517

// skip node as not requested

3518

n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node

3519

hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);

3520

continue; // move to next node

3521

}

3522

// node requested, go down the topology tree

3523

if (tile_support) {

3524

nL = 0;

3525

hL = NULL__null;

3526

int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);

3527

for (int l = 0; l < NL; ++l) {

3528

// Check L2 (tile) ------------------------------------

3529

if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {

3530

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3531

continue; // skip tile if all PUs are out of fullMask

3532

}

3533

++nL;

3534

if (nL <= __kmp_hws_tile.offset ||

3535

nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {

3536

// skip tile as not requested

3537

n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile

3538

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3539

continue; // move to next tile

3540

}

3541

// tile requested, go down the topology tree

3542

nC = 0;

3543

hC = NULL__null;

3544

// num cores in current tile

3545

int NC = __kmp_hwloc_count_children_by_type(tp, hL,

3546

HWLOC_OBJ_CORE, &hC);

3547

for (int c = 0; c < NC; ++c) {

3548

// Check Core ---------------------------------------

3549

if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {

3550

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3551

continue; // skip core if all PUs are out of fullMask

3552

}

3553

++nC;

3554

if (nC <= __kmp_hws_core.offset ||

3555

nC > __kmp_hws_core.num + __kmp_hws_core.offset) {

3556

// skip node as not requested

3557

n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core

3558

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3559

continue; // move to next node

3560

}

3561

// core requested, go down to PUs

3562

nT = 0;

3563

nTr = 0;

3564

hT = NULL__null;

3565

// num procs in current core

3566

int NT = __kmp_hwloc_count_children_by_type(tp, hC,

3567

HWLOC_OBJ_PU, &hT);

3568

for (int t = 0; t < NT; ++t) {

3569

// Check PU ---------------------------------------

3570

idx = hT->os_index;

3571

if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) {

3572

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3573

continue; // skip PU if not in fullMask

3574

}

3575

++nT;

3576

if (nT <= __kmp_hws_proc.offset ||

3577

nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {

3578

// skip PU

3579

KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx);

3580

++n_old;

3581

KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n"
, idx); };

3582

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3583

continue; // move to next node

3584

}

3585

++nTr;

3586

if (pAddr) // collect requested thread's data

3587

newAddr[n_new] = (*pAddr)[n_old];

3588

++n_new;

3589

++n_old;

3590

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3591

} // threads loop

3592

if (nTr > 0) {

3593

++nCr; // num cores per socket

3594

++nCo; // total num cores

3595

if (nTr > nTpC)

3596

nTpC = nTr; // calc max threads per core

3597

}

3598

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3599

} // cores loop

3600

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3601

} // tiles loop

3602

} else { // tile_support

3603

// no tiles, check cores

3604

nC = 0;

3605

hC = NULL__null;

3606

// num cores in current node

3607

int NC =

3608

__kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);

3609

for (int c = 0; c < NC; ++c) {

3610

// Check Core ---------------------------------------

3611

if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {

3612

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3613

continue; // skip core if all PUs are out of fullMask

3614

}

3615

++nC;

3616

if (nC <= __kmp_hws_core.offset ||

3617

nC > __kmp_hws_core.num + __kmp_hws_core.offset) {

3618

// skip node as not requested

3619

n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core

3620

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3621

continue; // move to next node

3622

}

3623

// core requested, go down to PUs

3624

nT = 0;

3625

nTr = 0;

3626

hT = NULL__null;

3627

int NT =

3628

__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);

3629

for (int t = 0; t < NT; ++t) {

3630

// Check PU ---------------------------------------

3631

idx = hT->os_index;

3632

if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) {

3633

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3634

continue; // skip PU if not in fullMask

3635

}

3636

++nT;

3637

if (nT <= __kmp_hws_proc.offset ||

3638

nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {

3639

// skip PU

3640

KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx);

3641

++n_old;

3642

KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n"
, idx); };

3643

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3644

continue; // move to next node

3645

}

3646

++nTr;

3647

if (pAddr) // collect requested thread's data

3648

newAddr[n_new] = (*pAddr)[n_old];

3649

++n_new;

3650

++n_old;

3651

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3652

} // threads loop

3653

if (nTr > 0) {

3654

++nCr; // num cores per socket

3655

++nCo; // total num cores

3656

if (nTr > nTpC)

3657

nTpC = nTr; // calc max threads per core

3658

}

3659

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3660

} // cores loop

3661

} // tiles support

3662

hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);

3663

} // nodes loop

3664

} else { // numa_support

3665

// no NUMA support

3666

if (tile_support) {

3667

nL = 0;

3668

hL = NULL__null;

3669

// num tiles in current socket

3670

int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);

3671

for (int l = 0; l < NL; ++l) {

3672

// Check L2 (tile) ------------------------------------

3673

if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {

3674

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3675

continue; // skip tile if all PUs are out of fullMask

3676

}

3677

++nL;

3678

if (nL <= __kmp_hws_tile.offset ||

3679

nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {

3680

// skip tile as not requested

3681

n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile

3682

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3683

continue; // move to next tile

3684

}

3685

// tile requested, go down the topology tree

3686

nC = 0;

3687

hC = NULL__null;

3688

// num cores per tile

3689

int NC =

3690

__kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);

3691

for (int c = 0; c < NC; ++c) {

3692

// Check Core ---------------------------------------

3693

if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {

3694

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3695

continue; // skip core if all PUs are out of fullMask

3696

}

3697

++nC;

3698

if (nC <= __kmp_hws_core.offset ||

3699

nC > __kmp_hws_core.num + __kmp_hws_core.offset) {

3700

// skip node as not requested

3701

n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core

3702

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3703

continue; // move to next node

3704

}

3705

// core requested, go down to PUs

3706

nT = 0;

3707

nTr = 0;

3708

hT = NULL__null;

3709

// num procs per core

3710

int NT =

3711

__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);

3712

for (int t = 0; t < NT; ++t) {

3713

// Check PU ---------------------------------------

3714

idx = hT->os_index;

3715

if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) {

3716

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3717

continue; // skip PU if not in fullMask

3718

}

3719

++nT;

3720

if (nT <= __kmp_hws_proc.offset ||

3721

nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {

3722

// skip PU

3723

KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx);

3724

++n_old;

3725

KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n"
, idx); };

3726

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3727

continue; // move to next node

3728

}

3729

++nTr;

3730

if (pAddr) // collect requested thread's data

3731

newAddr[n_new] = (*pAddr)[n_old];

3732

++n_new;

3733

++n_old;

3734

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3735

} // threads loop

3736

if (nTr > 0) {

3737

++nCr; // num cores per socket

3738

++nCo; // total num cores

3739

if (nTr > nTpC)

3740

nTpC = nTr; // calc max threads per core

3741

}

3742

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3743

} // cores loop

3744

hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);

3745

} // tiles loop

3746

} else { // tile_support

3747

// no tiles, check cores

3748

nC = 0;

3749

hC = NULL__null;

3750

// num cores in socket

3751

int NC =

3752

__kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);

3753

for (int c = 0; c < NC; ++c) {

3754

// Check Core -------------------------------------------

3755

if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {

3756

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3757

continue; // skip core if all PUs are out of fullMask

3758

}

3759

++nC;

3760

if (nC <= __kmp_hws_core.offset ||

3761

nC > __kmp_hws_core.num + __kmp_hws_core.offset) {

3762

// skip node as not requested

3763

n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core

3764

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3765

continue; // move to next node

3766

}

3767

// core requested, go down to PUs

3768

nT = 0;

3769

nTr = 0;

3770

hT = NULL__null;

3771

// num procs per core

3772

int NT =

3773

__kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);

3774

for (int t = 0; t < NT; ++t) {

3775

// Check PU ---------------------------------------

3776

idx = hT->os_index;

3777

if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(idx)) {

3778

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3779

continue; // skip PU if not in fullMask

3780

}

3781

++nT;

3782

if (nT <= __kmp_hws_proc.offset ||

3783

nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {

3784

// skip PU

3785

KMP_CPU_CLR(idx, __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(idx);

3786

++n_old;

3787

KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx))if (kmp_c_debug >= 200) { __kmp_debug_printf ("KMP_HW_SUBSET: skipped proc %d\n"
, idx); };

3788

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3789

continue; // move to next node

3790

}

3791

++nTr;

3792

if (pAddr) // collect requested thread's data

3793

newAddr[n_new] = (*pAddr)[n_old];

3794

++n_new;

3795

++n_old;

3796

hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);

3797

} // threads loop

3798

if (nTr > 0) {

3799

++nCr; // num cores per socket

3800

++nCo; // total num cores

3801

if (nTr > nTpC)

3802

nTpC = nTr; // calc max threads per core

3803

}

3804

hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);

3805

} // cores loop

3806

} // tiles support

3807

} // numa_support

3808

if (nCr > 0) { // found cores?

3809

++nPkg; // num sockets

3810

if (nCr > nCpP)

3811

nCpP = nCr; // calc max cores per socket

3812

}

3813

} // sockets loop

3814

3815

// check the subset is valid

3816

KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc)((n_old == __kmp_avail_proc) ? 0 : __kmp_debug_assert("n_old == __kmp_avail_proc"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3816));

3817

KMP_DEBUG_ASSERT(nPkg > 0)((nPkg > 0) ? 0 : __kmp_debug_assert("nPkg > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3817));

3818

KMP_DEBUG_ASSERT(nCpP > 0)((nCpP > 0) ? 0 : __kmp_debug_assert("nCpP > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3818));

3819

KMP_DEBUG_ASSERT(nTpC > 0)((nTpC > 0) ? 0 : __kmp_debug_assert("nTpC > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3819));

3820

KMP_DEBUG_ASSERT(nCo > 0)((nCo > 0) ? 0 : __kmp_debug_assert("nCo > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3820));

3821

KMP_DEBUG_ASSERT(nPkg <= nPackages)((nPkg <= nPackages) ? 0 : __kmp_debug_assert("nPkg <= nPackages"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3821));

3822

KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg)((nCpP <= nCoresPerPkg) ? 0 : __kmp_debug_assert("nCpP <= nCoresPerPkg"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3822));

3823

KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore)((nTpC <= __kmp_nThreadsPerCore) ? 0 : __kmp_debug_assert(
"nTpC <= __kmp_nThreadsPerCore", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3823));

3824

KMP_DEBUG_ASSERT(nCo <= __kmp_ncores)((nCo <= __kmp_ncores) ? 0 : __kmp_debug_assert("nCo <= __kmp_ncores"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3824));

3825

3826

nPackages = nPkg; // correct num sockets

3827

nCoresPerPkg = nCpP; // correct num cores per socket

3828

__kmp_nThreadsPerCore = nTpC; // correct num threads per core

3829

__kmp_avail_proc = n_new; // correct num procs

3830

__kmp_ncores = nCo; // correct num cores

3831

// hwloc topology method end

3832

} else

3833

#endif // KMP_USE_HWLOC

3834

{

3835

int n_old = 0, n_new = 0, proc_num = 0;

3836

if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {

3837

KMP_WARNING(AffHWSubsetNoHWLOC)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNoHWLOC
), __kmp_msg_null);

3838

goto _exit;

3839

}

3840

if (__kmp_hws_socket.num == 0)

3841

__kmp_hws_socket.num = nPackages; // use all available sockets

3842

if (__kmp_hws_core.num == 0)

3843

__kmp_hws_core.num = nCoresPerPkg; // use all available cores

3844

if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)

3845

__kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts

3846

if (!__kmp_affinity_uniform_topology()) {

3847

KMP_WARNING(AffHWSubsetNonUniform)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNonUniform
), __kmp_msg_null);

3848

goto _exit; // don't support non-uniform topology

3849

}

3850

if (depth > 3) {

3851

KMP_WARNING(AffHWSubsetNonThreeLevel)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetNonThreeLevel
), __kmp_msg_null);

3852

goto _exit; // don't support not-3-level topology

3853

}

3854

if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {

3855

KMP_WARNING(AffHWSubsetManySockets)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManySockets
), __kmp_msg_null);

3856

goto _exit;

3857

}

3858

if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {

3859

KMP_WARNING(AffHWSubsetManyCores)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffHWSubsetManyCores
), __kmp_msg_null);

3860

goto _exit;

3861

}

3862

// Form the requested subset

3863

if (pAddr) // pAddr is NULL in case of affinity_none

3864

newAddr = (AddrUnsPair *)__kmp_allocate(___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num *
__kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3866)

3865

sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num *
__kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3866)

3866

__kmp_hws_proc.num)___kmp_allocate((sizeof(AddrUnsPair) * __kmp_hws_socket.num *
__kmp_hws_core.num * __kmp_hws_proc.num), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3866);

3867

for (int i = 0; i < nPackages; ++i) {

3868

if (i < __kmp_hws_socket.offset ||

3869

i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {

3870

// skip not-requested socket

3871

n_old += nCoresPerPkg * __kmp_nThreadsPerCore;

3872

if (__kmp_pu_os_idx != NULL__null) {

3873

// walk through skipped socket

3874

for (int j = 0; j < nCoresPerPkg; ++j) {

3875

for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {

3876

KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]);

3877

++proc_num;

3878

}

3879

}

3880

}

3881

} else {

3882

// walk through requested socket

3883

for (int j = 0; j < nCoresPerPkg; ++j) {

3884

if (j < __kmp_hws_core.offset ||

3885

j >= __kmp_hws_core.offset +

3886

__kmp_hws_core.num) { // skip not-requested core

3887

n_old += __kmp_nThreadsPerCore;

3888

if (__kmp_pu_os_idx != NULL__null) {

3889

for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {

3890

KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]);

3891

++proc_num;

3892

}

3893

}

3894

} else {

3895

// walk through requested core

3896

for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {

3897

if (k < __kmp_hws_proc.num) {

3898

if (pAddr) // collect requested thread's data

3899

newAddr[n_new] = (*pAddr)[n_old];

3900

n_new++;

3901

} else {

3902

if (__kmp_pu_os_idx != NULL__null)

3903

KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask)(__kmp_affin_fullMask)->clear(__kmp_pu_os_idx[proc_num]);

3904

}

3905

n_old++;

3906

++proc_num;

3907

}

3908

}

3909

}

3910

}

3911

}

3912

KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore)((n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore) ?
0 : __kmp_debug_assert("n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3912));

3913

KMP_DEBUG_ASSERT(n_new ==((n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc
.num) ? 0 : __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3915))

3914

__kmp_hws_socket.num * __kmp_hws_core.num *((n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc
.num) ? 0 : __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3915))

3915

__kmp_hws_proc.num)((n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc
.num) ? 0 : __kmp_debug_assert("n_new == __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3915));

3916

nPackages = __kmp_hws_socket.num; // correct nPackages

3917

nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg

3918

__kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore

3919

__kmp_avail_proc = n_new; // correct avail_proc

3920

__kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores

3921

} // non-hwloc topology method

3922

if (pAddr) {

3923

__kmp_free(*pAddr)___kmp_free((*pAddr), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3923);

3924

*pAddr = newAddr; // replace old topology with new one

3925

}

3926

if (__kmp_affinity_verbose) {

3927

char m[KMP_AFFIN_MASK_PRINT_LEN1024];

3928

__kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN1024,

3929

__kmp_affin_fullMask);

3930

if (__kmp_affinity_respect_mask) {

3931

KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetRespect
, "KMP_HW_SUBSET", m), __kmp_msg_null);

3932

} else {

3933

KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_InitOSProcSetNotRespect
, "KMP_HW_SUBSET", m), __kmp_msg_null);

3934

}

3935

KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AvailableOSProc
, "KMP_HW_SUBSET", __kmp_avail_proc), __kmp_msg_null);

3936

kmp_str_buf_t buf;

3937

__kmp_str_buf_init(&buf){ (&buf)->str = (&buf)->bulk; (&buf)->size
= sizeof((&buf)->bulk); (&buf)->used = 0; (&
buf)->bulk[0] = 0; };

3938

__kmp_str_buf_print(&buf, "%d", nPackages);

3939

KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_HW_SUBSET", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null)

3940

__kmp_nThreadsPerCore, __kmp_ncores)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_TopologyExtra
, "KMP_HW_SUBSET", buf.str, nCoresPerPkg, __kmp_nThreadsPerCore
, __kmp_ncores), __kmp_msg_null);

3941

__kmp_str_buf_free(&buf);

3942

}

3943

_exit:

3944

if (__kmp_pu_os_idx != NULL__null) {

3945

__kmp_free(__kmp_pu_os_idx)___kmp_free((__kmp_pu_os_idx), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 3945);

3946

__kmp_pu_os_idx = NULL__null;

3947

}

3948

}

3949

3950

// This function figures out the deepest level at which there is at least one

3951

// cluster/core with more than one processing unit bound to it.

3952

static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,

3953

int nprocs, int bottom_level) {

3954

int core_level = 0;

3955

3956

for (int i = 0; i < nprocs; i++) {

3957

for (int j = bottom_level; j > 0; j--) {

3958

if (address2os[i].first.labels[j] > 0) {

3959

if (core_level < (j - 1)) {

3960

core_level = j - 1;

3961

}

3962

}

3963

}

3964

}

3965

return core_level;

3966

}

3967

3968

// This function counts number of clusters/cores at given level.

3969

static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,

3970

int nprocs, int bottom_level,

3971

int core_level) {

3972

int ncores = 0;

3973

int i, j;

3974

3975

j = bottom_level;

3976

for (i = 0; i < nprocs; i++) {

3977

for (j = bottom_level; j > core_level; j--) {

3978

if ((i + 1) < nprocs) {

3979

if (address2os[i + 1].first.labels[j] > 0) {

3980

break;

3981

}

3982

}

3983

}

3984

if (j == core_level) {

3985

ncores++;

3986

}

3987

}

3988

if (j > core_level) {

3989

// In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one

3990

// core. May occur when called from __kmp_affinity_find_core().

3991

ncores++;

3992

}

3993

return ncores;

3994

}

3995

3996

// This function finds to which cluster/core given processing unit is bound.

3997

static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,

3998

int bottom_level, int core_level) {

3999

return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,

4000

core_level) -

4001

1;

4002

}

4003

4004

// This function finds maximal number of processing units bound to a

4005

// cluster/core at given level.

4006

static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,

4007

int nprocs, int bottom_level,

4008

int core_level) {

4009

int maxprocpercore = 0;

4010

4011

if (core_level < bottom_level) {

4012

for (int i = 0; i < nprocs; i++) {

4013

int percore = address2os[i].first.labels[core_level + 1] + 1;

4014

4015

if (percore > maxprocpercore) {

4016

maxprocpercore = percore;

4017

}

4018

}

4019

} else {

4020

maxprocpercore = 1;

4021

}

4022

return maxprocpercore;

4023

}

4024

4025

static AddrUnsPair *address2os = NULL__null;

4026

static int *procarr = NULL__null;

4027

static int __kmp_aff_depth = 0;

4028

4029

#if KMP_USE_HIER_SCHED0

4030

#define KMP_EXIT_AFF_NONE \

4031

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4031)); \

4032

KMP_ASSERT(address2os == NULL)((address2os == __null) ? 0 : __kmp_debug_assert("address2os == NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4032)); \

4033

__kmp_apply_thread_places(NULL__null, 0); \

4034

__kmp_create_affinity_none_places(); \

4035

__kmp_dispatch_set_hierarchy_values(); \

4036

return;

4037

#else

4038

#define KMP_EXIT_AFF_NONE \

4039

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4039)); \

4040

KMP_ASSERT(address2os == NULL)((address2os == __null) ? 0 : __kmp_debug_assert("address2os == NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4040)); \

4041

__kmp_apply_thread_places(NULL__null, 0); \

4042

__kmp_create_affinity_none_places(); \

4043

return;

4044

#endif

4045

4046

// Create a one element mask array (set of places) which only contains the

4047

// initial process's affinity mask

4048

static void __kmp_create_affinity_none_places() {

4049

KMP_ASSERT(__kmp_affin_fullMask != NULL)((__kmp_affin_fullMask != __null) ? 0 : __kmp_debug_assert("__kmp_affin_fullMask != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4049));

4050

KMP_ASSERT(__kmp_affinity_type == affinity_none)((__kmp_affinity_type == affinity_none) ? 0 : __kmp_debug_assert
("__kmp_affinity_type == affinity_none", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4050));

4051

__kmp_affinity_num_masks = 1;

4052

KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)(__kmp_affinity_masks = __kmp_affinity_dispatch->allocate_mask_array
(__kmp_affinity_num_masks));

4053

kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks
, 0);

4054

KMP_CPU_COPY(dest, __kmp_affin_fullMask)(dest)->copy(__kmp_affin_fullMask);

4055

}

4056

4057

static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) {

4058

const Address *aa = &(((const AddrUnsPair *)a)->first);

4059

const Address *bb = &(((const AddrUnsPair *)b)->first);

4060

unsigned depth = aa->depth;

4061

unsigned i;

4062

KMP_DEBUG_ASSERT(depth == bb->depth)((depth == bb->depth) ? 0 : __kmp_debug_assert("depth == bb->depth"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4062));

4063

KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth)(((unsigned)__kmp_affinity_compact <= depth) ? 0 : __kmp_debug_assert
("(unsigned)__kmp_affinity_compact <= depth", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4063));

4064

KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0)((__kmp_affinity_compact >= 0) ? 0 : __kmp_debug_assert("__kmp_affinity_compact >= 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4064));

4065

for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {

4066

int j = depth - i - 1;

4067

if (aa->childNums[j] < bb->childNums[j])

4068

return -1;

4069

if (aa->childNums[j] > bb->childNums[j])

4070

return 1;

4071

}

4072

for (; i < depth; i++) {

4073

int j = i - __kmp_affinity_compact;

4074

if (aa->childNums[j] < bb->childNums[j])

4075

return -1;

4076

if (aa->childNums[j] > bb->childNums[j])

4077

return 1;

4078

}

4079

return 0;

4080

}

4081

4082

static void __kmp_aux_affinity_initialize(void) {

4083

if (__kmp_affinity_masks != NULL__null) {

4084

KMP_ASSERT(__kmp_affin_fullMask != NULL)((__kmp_affin_fullMask != __null) ? 0 : __kmp_debug_assert("__kmp_affin_fullMask != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4084));

4085

return;

4086

}

4087

4088

// Create the "full" mask - this defines all of the processors that we

4089

// consider to be in the machine model. If respect is set, then it is the

4090

// initialization thread's affinity mask. Otherwise, it is all processors that

4091

// we know about on the machine.

4092

if (__kmp_affin_fullMask == NULL__null) {

4093

KMP_CPU_ALLOC(__kmp_affin_fullMask)(__kmp_affin_fullMask = __kmp_affinity_dispatch->allocate_mask
());

4094

}

4095

if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4096

if (__kmp_affinity_respect_mask) {

4097

__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE)(__kmp_affin_fullMask)->get_system_affinity((!0));

4098

4099

// Count the number of available processors.

4100

unsigned i;

4101

__kmp_avail_proc = 0;

4102

KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask)for (i = (__kmp_affin_fullMask)->begin(); i != (__kmp_affin_fullMask
)->end(); i = (__kmp_affin_fullMask)->next(i)) {

4103

if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(i)) {

4104

continue;

4105

}

4106

__kmp_avail_proc++;

4107

}

4108

if (__kmp_avail_proc > __kmp_xproc) {

4109

if (__kmp_affinity_verbose ||

4110

(__kmp_affinity_warnings &&

4111

(__kmp_affinity_type != affinity_none))) {

4112

KMP_WARNING(ErrorInitializeAffinity)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity
), __kmp_msg_null);

4113

}

4114

__kmp_affinity_type = affinity_none;

4115

KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0);

4116

return;

4117

}

4118

} else {

4119

__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);

4120

__kmp_avail_proc = __kmp_xproc;

4121

}

4122

}

4123

4124

if (__kmp_affinity_gran == affinity_gran_tile &&

4125

// check if user's request is valid

4126

__kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {

4127

KMP_WARNING(AffTilesNoHWLOC, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffTilesNoHWLOC
, "KMP_AFFINITY"), __kmp_msg_null);

4128

__kmp_affinity_gran = affinity_gran_package;

4129

}

4130

4131

int depth = -1;

4132

kmp_i18n_id_t msg_id = kmp_i18n_null;

4133

4134

// For backward compatibility, setting KMP_CPUINFO_FILE =>

4135

// KMP_TOPOLOGY_METHOD=cpuinfo

4136

if ((__kmp_cpuinfo_file != NULL__null) &&

4137

(__kmp_affinity_top_method == affinity_top_method_all)) {

4138

__kmp_affinity_top_method = affinity_top_method_cpuinfo;

4139

}

4140

4141

if (__kmp_affinity_top_method == affinity_top_method_all) {

4142

// In the default code path, errors are not fatal - we just try using

4143

// another method. We only emit a warning message if affinity is on, or the

4144

// verbose flag is set, an the nowarnings flag was not set.

4145

const char *file_name = NULL__null;

4146

int line = 0;

4147

#if KMP_USE_HWLOC0

4148

if (depth < 0 &&

4149

__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {

4150

if (__kmp_affinity_verbose) {

4151

KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc
, "KMP_AFFINITY"), __kmp_msg_null);

4152

}

4153

if (!__kmp_hwloc_error) {

4154

depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);

4155

if (depth == 0) {

4156

KMP_EXIT_AFF_NONE;

4157

} else if (depth < 0 && __kmp_affinity_verbose) {

4158

KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc
, "KMP_AFFINITY"), __kmp_msg_null);

4159

}

4160

} else if (__kmp_affinity_verbose) {

4161

KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffIgnoringHwloc
, "KMP_AFFINITY"), __kmp_msg_null);

4162

}

4163

}

4164

#endif

4165

4166

#if KMP_ARCH_X860 || KMP_ARCH_X86_641

4167

4168

if (depth < 0) {

4169

if (__kmp_affinity_verbose) {

4170

KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr
, "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_Decodingx2APIC
)), __kmp_msg_null);

4171

}

4172

4173

file_name = NULL__null;

4174

depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);

4175

if (depth == 0) {

4176

KMP_EXIT_AFF_NONE;

4177

}

4178

4179

if (depth < 0) {

4180

if (__kmp_affinity_verbose) {

4181

if (msg_id != kmp_i18n_null) {

4182

KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr
, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets
(kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null)

4183

__kmp_i18n_catgets(msg_id),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr
, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets
(kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null)

4184

KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStrStr
, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), __kmp_i18n_catgets
(kmp_i18n_str_DecodingLegacyAPIC)), __kmp_msg_null);

4185

} else {

4186

KMP_INFORM(AffInfoStr, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr
, "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC
)), __kmp_msg_null)

4187

KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr
, "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC
)), __kmp_msg_null);

4188

}

4189

}

4190

4191

file_name = NULL__null;

4192

depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);

4193

if (depth == 0) {

4194

KMP_EXIT_AFF_NONE;

4195

}

4196

}

4197

}

4198

4199

#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */

4200

4201

#if KMP_OS_LINUX1

4202

4203

if (depth < 0) {

4204

if (__kmp_affinity_verbose) {

4205

if (msg_id != kmp_i18n_null) {

4206

KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffStrParseFilename
, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"
), __kmp_msg_null)

4207

__kmp_i18n_catgets(msg_id), "/proc/cpuinfo")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffStrParseFilename
, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"
), __kmp_msg_null);

4208

} else {

4209

KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffParseFilename
, "KMP_AFFINITY", "/proc/cpuinfo"), __kmp_msg_null);

4210

}

4211

}

4212

4213

FILE *f = fopen("/proc/cpuinfo", "r");

4214

if (f == NULL__null) {

4215

msg_id = kmp_i18n_str_CantOpenCpuinfo;

4216

} else {

4217

file_name = "/proc/cpuinfo";

4218

depth =

4219

__kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);

4220

fclose(f);

4221

if (depth == 0) {

4222

KMP_EXIT_AFF_NONE;

4223

}

4224

}

4225

}

4226

4227

#endif /* KMP_OS_LINUX */

4228

4229

#if KMP_GROUP_AFFINITY0

4230

4231

if ((depth < 0) && (__kmp_num_proc_groups > 1)) {

4232

if (__kmp_affinity_verbose) {

4233

KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffWindowsProcGroupMap
, "KMP_AFFINITY"), __kmp_msg_null);

4234

}

4235

4236

depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);

4237

KMP_ASSERT(depth != 0)((depth != 0) ? 0 : __kmp_debug_assert("depth != 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4237));

4238

}

4239

4240

#endif /* KMP_GROUP_AFFINITY */

4241

4242

if (depth < 0) {

4243

if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {

4244

if (file_name == NULL__null) {

4245

KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOS
, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);

4246

} else if (line == 0) {

4247

KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFile
, file_name, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);

4248

} else {

4249

KMP_INFORM(UsingFlatOSFileLine, file_name, line,__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFileLine
, file_name, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null
)

4250

__kmp_i18n_catgets(msg_id))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_UsingFlatOSFileLine
, file_name, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null
);

4251

}

4252

}

4253

// FIXME - print msg if msg_id = kmp_i18n_null ???

4254

4255

file_name = "";

4256

depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);

4257

if (depth == 0) {

4258

KMP_EXIT_AFF_NONE;

4259

}

4260

KMP_ASSERT(depth > 0)((depth > 0) ? 0 : __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4260));

4261

KMP_ASSERT(address2os != NULL)((address2os != __null) ? 0 : __kmp_debug_assert("address2os != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4261));

4262

}

4263

}

4264

4265

#if KMP_USE_HWLOC0

4266

else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {

4267

KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC)((__kmp_affinity_dispatch->get_api_type() == KMPAffinity::
HWLOC) ? 0 : __kmp_debug_assert("__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4267));

4268

if (__kmp_affinity_verbose) {

4269

KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingHwloc
, "KMP_AFFINITY"), __kmp_msg_null);

4270

}

4271

depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);

4272

if (depth == 0) {

4273

KMP_EXIT_AFF_NONE;

4274

}

4275

}

4276

#endif // KMP_USE_HWLOC

4277

4278

// If the user has specified that a paricular topology discovery method is to be

4279

// used, then we abort if that method fails. The exception is group affinity,

4280

// which might have been implicitly set.

4281

4282

#if KMP_ARCH_X860 || KMP_ARCH_X86_641

4283

4284

else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {

4285

if (__kmp_affinity_verbose) {

4286

KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr
, "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_Decodingx2APIC
)), __kmp_msg_null);

4287

}

4288

4289

depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);

4290

if (depth == 0) {

4291

KMP_EXIT_AFF_NONE;

4292

}

4293

if (depth < 0) {

4294

KMP_ASSERT(msg_id != kmp_i18n_null)((msg_id != kmp_i18n_null) ? 0 : __kmp_debug_assert("msg_id != kmp_i18n_null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4294));

4295

KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets
(msg_id)), __kmp_msg_null);

4296

}

4297

} else if (__kmp_affinity_top_method == affinity_top_method_apicid) {

4298

if (__kmp_affinity_verbose) {

4299

KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC))__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffInfoStr
, "KMP_AFFINITY", __kmp_i18n_catgets(kmp_i18n_str_DecodingLegacyAPIC
)), __kmp_msg_null);

4300

}

4301

4302

depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);

4303

if (depth == 0) {

4304

KMP_EXIT_AFF_NONE;

4305

}

4306

if (depth < 0) {

4307

KMP_ASSERT(msg_id != kmp_i18n_null)((msg_id != kmp_i18n_null) ? 0 : __kmp_debug_assert("msg_id != kmp_i18n_null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4307));

4308

KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets
(msg_id)), __kmp_msg_null);

4309

}

4310

}

4311

4312

#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */

4313

4314

else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {

4315

const char *filename;

4316

if (__kmp_cpuinfo_file != NULL__null) {

4317

filename = __kmp_cpuinfo_file;

4318

} else {

4319

filename = "/proc/cpuinfo";

4320

}

4321

4322

if (__kmp_affinity_verbose) {

4323

KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffParseFilename
, "KMP_AFFINITY", filename), __kmp_msg_null);

4324

}

4325

4326

FILE *f = fopen(filename, "r");

4327

if (f == NULL__null) {

4328

int code = errno(*__errno_location ());

4329

if (__kmp_cpuinfo_file != NULL__null) {

4330

__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
), KMP_ERR(code)__kmp_msg_error_code(code),

4331

KMP_HNT(NameComesFrom_CPUINFO_FILE)__kmp_msg_format(kmp_i18n_hnt_NameComesFrom_CPUINFO_FILE), __kmp_msg_null);

4332

} else {

4333

__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename)__kmp_msg_format(kmp_i18n_msg_CantOpenFileForReading, filename
), KMP_ERR(code)__kmp_msg_error_code(code),

4334

__kmp_msg_null);

4335

}

4336

}

4337

int line = 0;

4338

depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);

4339

fclose(f);

4340

if (depth < 0) {

4341

KMP_ASSERT(msg_id != kmp_i18n_null)((msg_id != kmp_i18n_null) ? 0 : __kmp_debug_assert("msg_id != kmp_i18n_null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4341));

4342

if (line > 0) {

4343

KMP_FATAL(FileLineMsgExiting, filename, line,__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting,
filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null)

4344

__kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileLineMsgExiting,
filename, line, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);

4345

} else {

4346

KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_FileMsgExiting, filename
, __kmp_i18n_catgets(msg_id)), __kmp_msg_null);

4347

}

4348

}

4349

if (__kmp_affinity_type == affinity_none) {

4350

KMP_ASSERT(depth == 0)((depth == 0) ? 0 : __kmp_debug_assert("depth == 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4350));

4351

KMP_EXIT_AFF_NONE;

4352

}

4353

}

4354

4355

#if KMP_GROUP_AFFINITY0

4356

4357

else if (__kmp_affinity_top_method == affinity_top_method_group) {

4358

if (__kmp_affinity_verbose) {

4359

KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffWindowsProcGroupMap
, "KMP_AFFINITY"), __kmp_msg_null);

4360

}

4361

4362

depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);

4363

KMP_ASSERT(depth != 0)((depth != 0) ? 0 : __kmp_debug_assert("depth != 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4363));

4364

if (depth < 0) {

4365

KMP_ASSERT(msg_id != kmp_i18n_null)((msg_id != kmp_i18n_null) ? 0 : __kmp_debug_assert("msg_id != kmp_i18n_null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4365));

4366

KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id))__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_MsgExiting, __kmp_i18n_catgets
(msg_id)), __kmp_msg_null);

4367

}

4368

}

4369

4370

#endif /* KMP_GROUP_AFFINITY */

4371

4372

else if (__kmp_affinity_top_method == affinity_top_method_flat) {

4373

if (__kmp_affinity_verbose) {

4374

KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY")__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_AffUsingFlatOS
, "KMP_AFFINITY"), __kmp_msg_null);

4375

}

4376

4377

depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);

4378

if (depth == 0) {

4379

KMP_EXIT_AFF_NONE;

4380

}

4381

// should not fail

4382

KMP_ASSERT(depth > 0)((depth > 0) ? 0 : __kmp_debug_assert("depth > 0", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4382));

4383

KMP_ASSERT(address2os != NULL)((address2os != __null) ? 0 : __kmp_debug_assert("address2os != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4383));

4384

}

4385

4386

#if KMP_USE_HIER_SCHED0

4387

__kmp_dispatch_set_hierarchy_values();

4388

#endif

4389

4390

if (address2os == NULL__null) {

4391

if (KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) &&

4392

(__kmp_affinity_verbose ||

4393

(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {

4394

KMP_WARNING(ErrorInitializeAffinity)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_ErrorInitializeAffinity
), __kmp_msg_null);

4395

}

4396

__kmp_affinity_type = affinity_none;

4397

__kmp_create_affinity_none_places();

4398

KMP_AFFINITY_DISABLE()(__kmp_affin_mask_size = 0);

4399

return;

4400

}

4401

4402

if (__kmp_affinity_gran == affinity_gran_tile

4403

#if KMP_USE_HWLOC0

4404

&& __kmp_tile_depth == 0

4405

#endif

4406

) {

4407

// tiles requested but not detected, warn user on this

4408

KMP_WARNING(AffTilesNoTiles, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffTilesNoTiles
, "KMP_AFFINITY"), __kmp_msg_null);

4409

}

4410

4411

__kmp_apply_thread_places(&address2os, depth);

4412

4413

// Create the table of masks, indexed by thread Id.

4414

unsigned maxIndex;

4415

unsigned numUnique;

4416

kmp_affin_mask_t *osId2Mask =

4417

__kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);

4418

if (__kmp_affinity_gran_levels == 0) {

4419

KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc)(((int)numUnique == __kmp_avail_proc) ? 0 : __kmp_debug_assert
("(int)numUnique == __kmp_avail_proc", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4419));

4420

}

4421

4422

// Set the childNums vector in all Address objects. This must be done before

4423

// we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into

4424

// account the setting of __kmp_affinity_compact.

4425

__kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);

4426

4427

switch (__kmp_affinity_type) {

4428

4429

case affinity_explicit:

4430

KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL)((__kmp_affinity_proclist != __null) ? 0 : __kmp_debug_assert
("__kmp_affinity_proclist != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4430));

4431

#if OMP_40_ENABLED(50 >= 40)

4432

if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)

4433

#endif

4434

{

4435

__kmp_affinity_process_proclist(

4436

&__kmp_affinity_masks, &__kmp_affinity_num_masks,

4437

__kmp_affinity_proclist, osId2Mask, maxIndex);

4438

}

4439

#if OMP_40_ENABLED(50 >= 40)

4440

else {

4441

__kmp_affinity_process_placelist(

4442

&__kmp_affinity_masks, &__kmp_affinity_num_masks,

4443

__kmp_affinity_proclist, osId2Mask, maxIndex);

4444

}

4445

#endif

4446

if (__kmp_affinity_num_masks == 0) {

4447

if (__kmp_affinity_verbose ||

4448

(__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {

4449

KMP_WARNING(AffNoValidProcID)__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffNoValidProcID
), __kmp_msg_null);

4450

}

4451

__kmp_affinity_type = affinity_none;

4452

return;

4453

}

4454

break;

4455

4456

// The other affinity types rely on sorting the Addresses according to some

4457

// permutation of the machine topology tree. Set __kmp_affinity_compact and

4458

// __kmp_affinity_offset appropriately, then jump to a common code fragment

4459

// to do the sort and create the array of affinity masks.

4460

4461

case affinity_logical:

4462

__kmp_affinity_compact = 0;

4463

if (__kmp_affinity_offset) {

4464

__kmp_affinity_offset =

4465

__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;

4466

}

4467

goto sortAddresses;

4468

4469

case affinity_physical:

4470

if (__kmp_nThreadsPerCore > 1) {

4471

__kmp_affinity_compact = 1;

4472

if (__kmp_affinity_compact >= depth) {

4473

__kmp_affinity_compact = 0;

4474

}

4475

} else {

4476

__kmp_affinity_compact = 0;

4477

}

4478

if (__kmp_affinity_offset) {

4479

__kmp_affinity_offset =

4480

__kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;

4481

}

4482

goto sortAddresses;

4483

4484

case affinity_scatter:

4485

if (__kmp_affinity_compact >= depth) {

4486

__kmp_affinity_compact = 0;

4487

} else {

4488

__kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;

4489

}

4490

goto sortAddresses;

4491

4492

case affinity_compact:

4493

if (__kmp_affinity_compact >= depth) {

4494

__kmp_affinity_compact = depth - 1;

4495

}

4496

goto sortAddresses;

4497

4498

case affinity_balanced:

4499

if (depth <= 1) {

4500

if (__kmp_affinity_verbose || __kmp_affinity_warnings) {

4501

KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail
, "KMP_AFFINITY"), __kmp_msg_null);

4502

}

4503

__kmp_affinity_type = affinity_none;

4504

return;

4505

} else if (__kmp_affinity_uniform_topology()) {

4506

break;

4507

} else { // Non-uniform topology

4508

4509

// Save the depth for further usage

4510

__kmp_aff_depth = depth;

4511

4512

int core_level = __kmp_affinity_find_core_level(

4513

address2os, __kmp_avail_proc, depth - 1);

4514

int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,

4515

depth - 1, core_level);

4516

int maxprocpercore = __kmp_affinity_max_proc_per_core(

4517

address2os, __kmp_avail_proc, depth - 1, core_level);

4518

4519

int nproc = ncores * maxprocpercore;

4520

if ((nproc < 2) || (nproc < __kmp_avail_proc)) {

4521

if (__kmp_affinity_verbose || __kmp_affinity_warnings) {

4522

KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY")__kmp_msg(kmp_ms_warning, __kmp_msg_format(kmp_i18n_msg_AffBalancedNotAvail
, "KMP_AFFINITY"), __kmp_msg_null);

4523

}

4524

__kmp_affinity_type = affinity_none;

4525

return;

4526

}

4527

4528

procarr = (int *)__kmp_allocate(sizeof(int) * nproc)___kmp_allocate((sizeof(int) * nproc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4528);

4529

for (int i = 0; i < nproc; i++) {

4530

procarr[i] = -1;

4531

}

4532

4533

int lastcore = -1;

4534

int inlastcore = 0;

4535

for (int i = 0; i < __kmp_avail_proc; i++) {

4536

int proc = address2os[i].second;

4537

int core =

4538

__kmp_affinity_find_core(address2os, i, depth - 1, core_level);

4539

4540

if (core == lastcore) {

4541

inlastcore++;

4542

} else {

4543

inlastcore = 0;

4544

}

4545

lastcore = core;

4546

4547

procarr[core * maxprocpercore + inlastcore] = proc;

4548

}

4549

4550

break;

4551

}

4552

4553

sortAddresses:

4554

// Allocate the gtid->affinity mask table.

4555

if (__kmp_affinity_dups) {

4556

__kmp_affinity_num_masks = __kmp_avail_proc;

4557

} else {

4558

__kmp_affinity_num_masks = numUnique;

4559

}

4560

4561

#if OMP_40_ENABLED(50 >= 40)

4562

if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&

4563

(__kmp_affinity_num_places > 0) &&

4564

((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {

4565

__kmp_affinity_num_masks = __kmp_affinity_num_places;

4566

}

4567

#endif

4568

4569

KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)(__kmp_affinity_masks = __kmp_affinity_dispatch->allocate_mask_array
(__kmp_affinity_num_masks));

4570

4571

// Sort the address2os table according to the current setting of

4572

// __kmp_affinity_compact, then fill out __kmp_affinity_masks.

4573

qsort(address2os, __kmp_avail_proc, sizeof(*address2os),

4574

__kmp_affinity_cmp_Address_child_num);

4575

{

4576

int i;

4577

unsigned j;

4578

for (i = 0, j = 0; i < __kmp_avail_proc; i++) {

4579

if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {

4580

continue;

4581

}

4582

unsigned osId = address2os[i].second;

4583

kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId)__kmp_affinity_dispatch->index_mask_array(osId2Mask, osId);

4584

kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks
, j);

4585

KMP_ASSERT(KMP_CPU_ISSET(osId, src))(((src)->is_set(osId)) ? 0 : __kmp_debug_assert("KMP_CPU_ISSET(osId, src)"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4585));

4586

KMP_CPU_COPY(dest, src)(dest)->copy(src);

4587

if (++j >= __kmp_affinity_num_masks) {

4588

break;

4589

}

4590

}

4591

KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks)((j == __kmp_affinity_num_masks) ? 0 : __kmp_debug_assert("j == __kmp_affinity_num_masks"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4591));

4592

}

4593

break;

4594

4595

default:

4596

KMP_ASSERT2(0, "Unexpected affinity setting")((0) ? 0 : __kmp_debug_assert(("Unexpected affinity setting")
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4596));

4597

}

4598

4599

KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1)__kmp_affinity_dispatch->deallocate_mask_array(osId2Mask);

4600

machine_hierarchy.init(address2os, __kmp_avail_proc);

4601

}

4602

#undef KMP_EXIT_AFF_NONE

4603

4604

void __kmp_affinity_initialize(void) {

4605

// Much of the code above was written assumming that if a machine was not

4606

// affinity capable, then __kmp_affinity_type == affinity_none. We now

4607

// explicitly represent this as __kmp_affinity_type == affinity_disabled.

4608

// There are too many checks for __kmp_affinity_type == affinity_none

4609

// in this code. Instead of trying to change them all, check if

4610

// __kmp_affinity_type == affinity_disabled, and if so, slam it with

4611

// affinity_none, call the real initialization routine, then restore

4612

// __kmp_affinity_type to affinity_disabled.

4613

int disabled = (__kmp_affinity_type == affinity_disabled);

4614

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4615

KMP_ASSERT(disabled)((disabled) ? 0 : __kmp_debug_assert("disabled", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4615));

4616

}

4617

if (disabled) {

4618

__kmp_affinity_type = affinity_none;

4619

}

4620

__kmp_aux_affinity_initialize();

4621

if (disabled) {

4622

__kmp_affinity_type = affinity_disabled;

4623

}

4624

}

4625

4626

void __kmp_affinity_uninitialize(void) {

4627

if (__kmp_affinity_masks != NULL__null) {

4628

KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks)__kmp_affinity_dispatch->deallocate_mask_array(__kmp_affinity_masks
);

4629

__kmp_affinity_masks = NULL__null;

4630

}

4631

if (__kmp_affin_fullMask != NULL__null) {

4632

KMP_CPU_FREE(__kmp_affin_fullMask)__kmp_affinity_dispatch->deallocate_mask(__kmp_affin_fullMask
);

4633

__kmp_affin_fullMask = NULL__null;

4634

}

4635

__kmp_affinity_num_masks = 0;

4636

__kmp_affinity_type = affinity_default;

4637

#if OMP_40_ENABLED(50 >= 40)

4638

__kmp_affinity_num_places = 0;

4639

#endif

4640

if (__kmp_affinity_proclist != NULL__null) {

4641

__kmp_free(__kmp_affinity_proclist)___kmp_free((__kmp_affinity_proclist), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4641);

4642

__kmp_affinity_proclist = NULL__null;

4643

}

4644

if (address2os != NULL__null) {

4645

__kmp_free(address2os)___kmp_free((address2os), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4645);

4646

address2os = NULL__null;

4647

}

4648

if (procarr != NULL__null) {

4649

__kmp_free(procarr)___kmp_free((procarr), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4649);

4650

procarr = NULL__null;

4651

}

4652

#if KMP_USE_HWLOC0

4653

if (__kmp_hwloc_topology != NULL__null) {

4654

hwloc_topology_destroy(__kmp_hwloc_topology);

4655

__kmp_hwloc_topology = NULL__null;

4656

}

4657

#endif

4658

KMPAffinity::destroy_api();

4659

}

4660

4661

void __kmp_affinity_set_init_mask(int gtid, int isa_root) {

4662

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4663

return;

4664

}

4665

4666

kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid]));

4667

if (th->th.th_affin_mask == NULL__null) {

4668

KMP_CPU_ALLOC(th->th.th_affin_mask)(th->th.th_affin_mask = __kmp_affinity_dispatch->allocate_mask
());

4669

} else {

4670

KMP_CPU_ZERO(th->th.th_affin_mask)(th->th.th_affin_mask)->zero();

4671

}

4672

4673

// Copy the thread mask to the kmp_info_t strucuture. If

4674

// __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that

4675

// has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,

4676

// then the full mask is the same as the mask of the initialization thread.

4677

kmp_affin_mask_t *mask;

4678

int i;

4679

4680

#if OMP_40_ENABLED(50 >= 40)

4681

if (KMP_AFFINITY_NON_PROC_BIND((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || __kmp_nested_proc_bind
.bind_types[0] == proc_bind_intel) && (__kmp_affinity_num_masks
> 0 || __kmp_affinity_type == affinity_balanced)))

4682

#endif

4683

{

4684

if ((__kmp_affinity_type == affinity_none) ||

4685

(__kmp_affinity_type == affinity_balanced)) {

4686

#if KMP_GROUP_AFFINITY0

4687

if (__kmp_num_proc_groups > 1) {

4688

return;

4689

}

4690

#endif

4691

KMP_ASSERT(__kmp_affin_fullMask != NULL)((__kmp_affin_fullMask != __null) ? 0 : __kmp_debug_assert("__kmp_affin_fullMask != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4691));

4692

i = 0;

4693

mask = __kmp_affin_fullMask;

4694

} else {

4695

KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0)((__kmp_affinity_num_masks > 0) ? 0 : __kmp_debug_assert("__kmp_affinity_num_masks > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4695));

4696

i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;

4697

mask = KMP_CPU_INDEX(__kmp_affinity_masks, i)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks
, i);

4698

}

4699

}

4700

#if OMP_40_ENABLED(50 >= 40)

4701

else {

4702

if ((!isa_root) ||

4703

(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {

4704

#if KMP_GROUP_AFFINITY0

4705

if (__kmp_num_proc_groups > 1) {

4706

return;

4707

}

4708

#endif

4709

KMP_ASSERT(__kmp_affin_fullMask != NULL)((__kmp_affin_fullMask != __null) ? 0 : __kmp_debug_assert("__kmp_affin_fullMask != NULL"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4709));

4710

i = KMP_PLACE_ALL(-1);

4711

mask = __kmp_affin_fullMask;

4712

} else {

4713

// int i = some hash function or just a counter that doesn't

4714

// always start at 0. Use gtid for now.

4715

KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0)((__kmp_affinity_num_masks > 0) ? 0 : __kmp_debug_assert("__kmp_affinity_num_masks > 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4715));

4716

i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;

4717

mask = KMP_CPU_INDEX(__kmp_affinity_masks, i)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks
, i);

4718

}

4719

}

4720

#endif

4721

4722

#if OMP_40_ENABLED(50 >= 40)

4723

th->th.th_current_place = i;

4724

if (isa_root) {

4725

th->th.th_new_place = i;

4726

th->th.th_first_place = 0;

4727

th->th.th_last_place = __kmp_affinity_num_masks - 1;

4728

}

4729

4730

if (i == KMP_PLACE_ALL(-1)) {

4731

KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n"
, gtid); }

4732

gtid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to all places\n"
, gtid); };

4733

} else {

4734

KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n"
, gtid, i); }

4735

gtid, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n"
, gtid, i); };

4736

}

4737

#else

4738

if (i == -1) {

4739

KA_TRACE(if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n"
, gtid); }

4740

100,if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n"
, gtid); }

4741

("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n"
, gtid); }

4742

gtid))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n"
, gtid); };

4743

} else {

4744

KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n"
, gtid, i); }

4745

gtid, i))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n"
, gtid, i); };

4746

}

4747

#endif /* OMP_40_ENABLED */

4748

4749

KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask);

4750

4751

if (__kmp_affinity_verbose

4752

/* to avoid duplicate printing (will be correctly printed on barrier) */

4753

&& (__kmp_affinity_type == affinity_none || i != KMP_PLACE_ALL(-1))) {

4754

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

4755

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,

4756

th->th.th_affin_mask);

4757

KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), gtid, buf
), __kmp_msg_null)

4758

__kmp_gettid(), gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), gtid, buf
), __kmp_msg_null);

4759

}

4760

4761

#if KMP_OS_WINDOWS0

4762

// On Windows* OS, the process affinity mask might have changed. If the user

4763

// didn't request affinity and this call fails, just continue silently.

4764

// See CQ171393.

4765

if (__kmp_affinity_type == affinity_none) {

4766

__kmp_set_system_affinity(th->th.th_affin_mask, FALSE)(th->th.th_affin_mask)->set_system_affinity(0);

4767

} else

4768

#endif

4769

__kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0));

4770

}

4771

4772

#if OMP_40_ENABLED(50 >= 40)

4773

4774

void __kmp_affinity_set_place(int gtid) {

4775

int retval;

4776

4777

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4778

return;

4779

}

4780

4781

kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid])((void *)(__kmp_threads[gtid]));

4782

4783

KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current "
"place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place
); }

4784

"place = %d)\n",if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current "
"place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place
); }

4785

gtid, th->th.th_new_place, th->th.th_current_place))if (kmp_a_debug >= 100) { __kmp_debug_printf ("__kmp_affinity_set_place: binding T#%d to place %d (current "
"place = %d)\n", gtid, th->th.th_new_place, th->th.th_current_place
); };

4786

4787

// Check that the new place is within this thread's partition.

4788

KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)((th->th.th_affin_mask != __null) ? 0 : __kmp_debug_assert
("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4788));

4789

KMP_ASSERT(th->th.th_new_place >= 0)((th->th.th_new_place >= 0) ? 0 : __kmp_debug_assert("th->th.th_new_place >= 0"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4789));

4790

KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks)(((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks
) ? 0 : __kmp_debug_assert("(unsigned)th->th.th_new_place <= __kmp_affinity_num_masks"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4790));

4791

if (th->th.th_first_place <= th->th.th_last_place) {

4792

KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&(((th->th.th_new_place >= th->th.th_first_place) &&
(th->th.th_new_place <= th->th.th_last_place)) ? 0 :
__kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4793))

4793

(th->th.th_new_place <= th->th.th_last_place))(((th->th.th_new_place >= th->th.th_first_place) &&
(th->th.th_new_place <= th->th.th_last_place)) ? 0 :
__kmp_debug_assert("(th->th.th_new_place >= th->th.th_first_place) && (th->th.th_new_place <= th->th.th_last_place)"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4793));

4794

} else {

4795

KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||(((th->th.th_new_place <= th->th.th_first_place) || (
th->th.th_new_place >= th->th.th_last_place)) ? 0 : __kmp_debug_assert
("(th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place)"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4796))

4796

(th->th.th_new_place >= th->th.th_last_place))(((th->th.th_new_place <= th->th.th_first_place) || (
th->th.th_new_place >= th->th.th_last_place)) ? 0 : __kmp_debug_assert
("(th->th.th_new_place <= th->th.th_first_place) || (th->th.th_new_place >= th->th.th_last_place)"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4796));

4797

}

4798

4799

// Copy the thread mask to the kmp_info_t strucuture,

4800

// and set this thread's affinity.

4801

kmp_affin_mask_t *mask =

4802

KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place)__kmp_affinity_dispatch->index_mask_array(__kmp_affinity_masks
, th->th.th_new_place);

4803

KMP_CPU_COPY(th->th.th_affin_mask, mask)(th->th.th_affin_mask)->copy(mask);

4804

th->th.th_current_place = th->th.th_new_place;

4805

4806

if (__kmp_affinity_verbose) {

4807

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

4808

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024,

4809

th->th.th_affin_mask);

4810

KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf
), __kmp_msg_null)

4811

__kmp_gettid(), gtid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "OMP_PROC_BIND", (kmp_int32)getpid(), syscall(186), gtid, buf
), __kmp_msg_null);

4812

}

4813

__kmp_set_system_affinity(th->th.th_affin_mask, TRUE)(th->th.th_affin_mask)->set_system_affinity((!0));

4814

}

4815

4816

#endif /* OMP_40_ENABLED */

4817

4818

int __kmp_aux_set_affinity(void **mask) {

4819

int gtid;

4820

kmp_info_t *th;

4821

int retval;

4822

4823

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4824

return -1;

4825

}

4826

4827

gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();

4828

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4829

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4830

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4831

(kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4832

__kmp_debug_printf(if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4833

"kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4834

buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4835

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_debug_printf( "kmp_set_affinity: setting affinity mask for thread %d = %s\n"
, gtid, buf); }; };

4836

4837

if (__kmp_env_consistency_check) {

4838

if ((mask == NULL__null) || (*mask == NULL__null)) {

4839

KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_set_affinity"), __kmp_msg_null);

4840

} else {

4841

unsigned proc;

4842

int num_procs = 0;

4843

4844

KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask)))for (proc = (((kmp_affin_mask_t *)(*mask)))->begin(); proc
!= (((kmp_affin_mask_t *)(*mask)))->end(); proc = (((kmp_affin_mask_t
*)(*mask)))->next(proc)) {

4845

if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {

4846

KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_set_affinity"), __kmp_msg_null);

4847

}

4848

if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->is_set(proc)) {

4849

continue;

4850

}

4851

num_procs++;

4852

}

4853

if (num_procs == 0) {

4854

KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_set_affinity"), __kmp_msg_null);

4855

}

4856

4857

#if KMP_GROUP_AFFINITY0

4858

if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->get_proc_group() < 0) {

4859

KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_set_affinity"), __kmp_msg_null);

4860

}

4861

#endif /* KMP_GROUP_AFFINITY */

4862

}

4863

}

4864

4865

th = __kmp_threads[gtid];

4866

KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)((th->th.th_affin_mask != __null) ? 0 : __kmp_debug_assert
("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4866));

4867

retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE)((kmp_affin_mask_t *)(*mask))->set_system_affinity(0);

4868

if (retval == 0) {

4869

KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask))(th->th.th_affin_mask)->copy((kmp_affin_mask_t *)(*mask
));

4870

}

4871

4872

#if OMP_40_ENABLED(50 >= 40)

4873

th->th.th_current_place = KMP_PLACE_UNDEFINED(-2);

4874

th->th.th_new_place = KMP_PLACE_UNDEFINED(-2);

4875

th->th.th_first_place = 0;

4876

th->th.th_last_place = __kmp_affinity_num_masks - 1;

4877

4878

// Turn off 4.0 affinity for the current tread at this parallel level.

4879

th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;

4880

#endif

4881

4882

return retval;

4883

}

4884

4885

int __kmp_aux_get_affinity(void **mask) {

4886

int gtid;

4887

int retval;

4888

kmp_info_t *th;

4889

4890

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4891

return -1;

4892

}

4893

4894

gtid = __kmp_entry_gtid()__kmp_get_global_thread_id_reg();

4895

th = __kmp_threads[gtid];

4896

KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL)((th->th.th_affin_mask != __null) ? 0 : __kmp_debug_assert
("th->th.th_affin_mask != __null", "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 4896));

4897

4898

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4899

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4900

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4901

th->th.th_affin_mask);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4902

__kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4903

gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4904

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, th->th.th_affin_mask
); __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n"
, gtid, buf); }; };

4905

4906

if (__kmp_env_consistency_check) {

4907

if ((mask == NULL__null) || (*mask == NULL__null)) {

4908

KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_get_affinity"), __kmp_msg_null);

4909

}

4910

}

4911

4912

#if !KMP_OS_WINDOWS0

4913

4914

retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE)((kmp_affin_mask_t *)(*mask))->get_system_affinity(0);

4915

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4916

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4917

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4918

(kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4919

__kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4920

gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; }

4921

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { char buf
[1024]; __kmp_affinity_print_mask(buf, 1024, (kmp_affin_mask_t
*)(*mask)); __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n"
, gtid, buf); }; };

4922

return retval;

4923

4924

#else

4925

4926

KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask)((kmp_affin_mask_t *)(*mask))->copy(th->th.th_affin_mask
);

4927

return 0;

4928

4929

#endif /* KMP_OS_WINDOWS */

4930

}

4931

4932

int __kmp_aux_get_affinity_max_proc() {

4933

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4934

return 0;

4935

}

4936

#if KMP_GROUP_AFFINITY0

4937

if (__kmp_num_proc_groups > 1) {

4938

return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT8);

4939

}

4940

#endif

4941

return __kmp_xproc;

4942

}

4943

4944

int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {

4945

int retval;

4946

4947

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4948

return -1;

4949

}

4950

4951

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4952

int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4953

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4954

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4955

(kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4956

__kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4957

"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4958

proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4959

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; };

4960

4961

if (__kmp_env_consistency_check) {

4962

if ((mask == NULL__null) || (*mask == NULL__null)) {

4963

KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_set_affinity_mask_proc"), __kmp_msg_null);

4964

}

4965

}

4966

4967

if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {

4968

return -1;

4969

}

4970

if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {

4971

return -2;

4972

}

4973

4974

KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->set(proc);

4975

return 0;

4976

}

4977

4978

int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {

4979

int retval;

4980

4981

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

4982

return -1;

4983

}

4984

4985

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4986

int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4987

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4988

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4989

(kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4990

__kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4991

"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4992

proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

4993

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; };

4994

4995

if (__kmp_env_consistency_check) {

4996

if ((mask == NULL__null) || (*mask == NULL__null)) {

4997

KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_unset_affinity_mask_proc"), __kmp_msg_null);

4998

}

4999

}

5000

5001

if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {

5002

return -1;

5003

}

5004

if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {

5005

return -2;

5006

}

5007

5008

KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->clear(proc);

5009

return 0;

5010

}

5011

5012

int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {

5013

int retval;

5014

5015

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0)) {

5016

return -1;

5017

}

5018

5019

KA_TRACE(1000, ; {if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5020

int gtid = __kmp_entry_gtid();if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5021

char buf[KMP_AFFIN_MASK_PRINT_LEN];if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5022

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5023

(kmp_affin_mask_t *)(*mask));if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5024

__kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5025

"affinity mask for thread %d = %s\n",if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5026

proc, gtid, buf);if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; }

5027

})if (kmp_a_debug >= 1000) { __kmp_debug_printf ; { int gtid
= __kmp_get_global_thread_id_reg(); char buf[1024]; __kmp_affinity_print_mask
(buf, 1024, (kmp_affin_mask_t *)(*mask)); __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " "affinity mask for thread %d = %s\n"
, proc, gtid, buf); }; };

5028

5029

if (__kmp_env_consistency_check) {

5030

if ((mask == NULL__null) || (*mask == NULL__null)) {

5031

KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc")__kmp_fatal(__kmp_msg_format(kmp_i18n_msg_AffinityInvalidMask
, "kmp_get_affinity_mask_proc"), __kmp_msg_null);

5032

}

5033

}

5034

5035

if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {

5036

return -1;

5037

}

5038

if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)(__kmp_affin_fullMask)->is_set(proc)) {

5039

return 0;

5040

}

5041

5042

return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))((kmp_affin_mask_t *)(*mask))->is_set(proc);

5043

}

5044

5045

// Dynamic affinity settings - Affinity balanced

5046

void __kmp_balanced_affinity(int tid, int nthreads) {

5047

bool fine_gran = true;

5048

5049

switch (__kmp_affinity_gran) {

5050

case affinity_gran_fine:

5051

case affinity_gran_thread:

5052

break;

5053

case affinity_gran_core:

5054

if (__kmp_nThreadsPerCore > 1) {

5055

fine_gran = false;

5056

}

5057

break;

5058

case affinity_gran_package:

5059

if (nCoresPerPkg > 1) {

5060

fine_gran = false;

5061

}

5062

break;

5063

default:

5064

fine_gran = false;

5065

}

5066

5067

if (__kmp_affinity_uniform_topology()) {

5068

int coreID;

5069

int threadID;

5070

// Number of hyper threads per core in HT machine

5071

int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;

5072

// Number of cores

5073

int ncores = __kmp_ncores;

5074

if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {

5075

__kmp_nth_per_core = __kmp_avail_proc / nPackages;

5076

ncores = nPackages;

5077

}

5078

// How many threads will be bound to each core

5079

int chunk = nthreads / ncores;

5080

// How many cores will have an additional thread bound to it - "big cores"

5081

int big_cores = nthreads % ncores;

5082

// Number of threads on the big cores

5083

int big_nth = (chunk + 1) * big_cores;

5084

if (tid < big_nth) {

5085

coreID = tid / (chunk + 1);

5086

threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;

5087

} else { // tid >= big_nth

5088

coreID = (tid - big_cores) / chunk;

5089

threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;

5090

}

5091

5092

KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),(((__kmp_affin_mask_size > 0)) ? 0 : __kmp_debug_assert(("Illegal set affinity operation when not capable"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 5093))

5093

"Illegal set affinity operation when not capable")(((__kmp_affin_mask_size > 0)) ? 0 : __kmp_debug_assert(("Illegal set affinity operation when not capable"
), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 5093));

5094

5095

kmp_affin_mask_t *mask;

5096

KMP_CPU_ALLOC_ON_STACK(mask)(mask = __kmp_affinity_dispatch->allocate_mask());

5097

KMP_CPU_ZERO(mask)(mask)->zero();

5098

5099

if (fine_gran) {

5100

int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;

5101

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5102

} else {

5103

for (int i = 0; i < __kmp_nth_per_core; i++) {

5104

int osID;

5105

osID = address2os[coreID * __kmp_nth_per_core + i].second;

5106

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5107

}

5108

}

5109

if (__kmp_affinity_verbose) {

5110

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

5111

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask);

5112

KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf
), __kmp_msg_null)

5113

__kmp_gettid(), tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf
), __kmp_msg_null);

5114

}

5115

__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));

5116

KMP_CPU_FREE_FROM_STACK(mask)__kmp_affinity_dispatch->deallocate_mask(mask);

5117

} else { // Non-uniform topology

5118

5119

kmp_affin_mask_t *mask;

5120

KMP_CPU_ALLOC_ON_STACK(mask)(mask = __kmp_affinity_dispatch->allocate_mask());

5121

KMP_CPU_ZERO(mask)(mask)->zero();

5122

5123

int core_level = __kmp_affinity_find_core_level(

5124

address2os, __kmp_avail_proc, __kmp_aff_depth - 1);

5125

int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,

5126

__kmp_aff_depth - 1, core_level);

5127

int nth_per_core = __kmp_affinity_max_proc_per_core(

5128

address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);

5129

5130

// For performance gain consider the special case nthreads ==

5131

// __kmp_avail_proc

5132

if (nthreads == __kmp_avail_proc) {

5133

if (fine_gran) {

5134

int osID = address2os[tid].second;

5135

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5136

} else {

5137

int core = __kmp_affinity_find_core(address2os, tid,

5138

__kmp_aff_depth - 1, core_level);

5139

for (int i = 0; i < __kmp_avail_proc; i++) {

5140

int osID = address2os[i].second;

5141

if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,

5142

core_level) == core) {

5143

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5144

}

5145

}

5146

}

5147

} else if (nthreads <= ncores) {

5148

5149

int core = 0;

5150

for (int i = 0; i < ncores; i++) {

5151

// Check if this core from procarr[] is in the mask

5152

int in_mask = 0;

5153

for (int j = 0; j < nth_per_core; j++) {

5154

if (procarr[i * nth_per_core + j] != -1) {

5155

in_mask = 1;

5156

break;

5157

}

5158

}

5159

if (in_mask) {

5160

if (tid == core) {

5161

for (int j = 0; j < nth_per_core; j++) {

5162

int osID = procarr[i * nth_per_core + j];

5163

if (osID != -1) {

5164

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5165

// For fine granularity it is enough to set the first available

5166

// osID for this core

5167

if (fine_gran) {

5168

break;

5169

}

5170

}

5171

}

5172

break;

5173

} else {

5174

core++;

5175

}

5176

}

5177

}

5178

} else { // nthreads > ncores

5179

// Array to save the number of processors at each core

5180

int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores)__builtin_alloca (sizeof(int) * ncores);

5181

// Array to save the number of cores with "x" available processors;

5182

int *ncores_with_x_procs =

5183

(int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1));

5184

// Array to save the number of cores with # procs from x to nth_per_core

5185

int *ncores_with_x_to_max_procs =

5186

(int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1))__builtin_alloca (sizeof(int) * (nth_per_core + 1));

5187

5188

for (int i = 0; i <= nth_per_core; i++) {

5189

ncores_with_x_procs[i] = 0;

5190

ncores_with_x_to_max_procs[i] = 0;

5191

}

5192

5193

for (int i = 0; i < ncores; i++) {

5194

int cnt = 0;

5195

for (int j = 0; j < nth_per_core; j++) {

5196

if (procarr[i * nth_per_core + j] != -1) {

5197

cnt++;

5198

}

5199

}

5200

nproc_at_core[i] = cnt;

5201

ncores_with_x_procs[cnt]++;

5202

}

5203

5204

for (int i = 0; i <= nth_per_core; i++) {

5205

for (int j = i; j <= nth_per_core; j++) {

5206

ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];

5207

}

5208

}

5209

5210

// Max number of processors

5211

int nproc = nth_per_core * ncores;

5212

// An array to keep number of threads per each context

5213

int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc)___kmp_allocate((sizeof(int) * nproc), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 5213);

5214

for (int i = 0; i < nproc; i++) {

5215

newarr[i] = 0;

5216

}

5217

5218

int nth = nthreads;

5219

int flag = 0;

5220

while (nth > 0) {

5221

for (int j = 1; j <= nth_per_core; j++) {

5222

int cnt = ncores_with_x_to_max_procs[j];

5223

for (int i = 0; i < ncores; i++) {

5224

// Skip the core with 0 processors

5225

if (nproc_at_core[i] == 0) {

5226

continue;

5227

}

5228

for (int k = 0; k < nth_per_core; k++) {

5229

if (procarr[i * nth_per_core + k] != -1) {

5230

if (newarr[i * nth_per_core + k] == 0) {

5231

newarr[i * nth_per_core + k] = 1;

5232

cnt--;

5233

nth--;

5234

break;

5235

} else {

5236

if (flag != 0) {

5237

newarr[i * nth_per_core + k]++;

5238

cnt--;

5239

nth--;

5240

break;

5241

}

5242

}

5243

}

5244

}

5245

if (cnt == 0 || nth == 0) {

5246

break;

5247

}

5248

}

5249

if (nth == 0) {

5250

break;

5251

}

5252

}

5253

flag = 1;

5254

}

5255

int sum = 0;

5256

for (int i = 0; i < nproc; i++) {

5257

sum += newarr[i];

5258

if (sum > tid) {

5259

if (fine_gran) {

5260

int osID = procarr[i];

5261

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5262

} else {

5263

int coreID = i / nth_per_core;

5264

for (int ii = 0; ii < nth_per_core; ii++) {

5265

int osID = procarr[coreID * nth_per_core + ii];

5266

if (osID != -1) {

5267

KMP_CPU_SET(osID, mask)(mask)->set(osID);

5268

}

5269

}

5270

}

5271

break;

5272

}

5273

}

5274

__kmp_free(newarr)___kmp_free((newarr), "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 5274);

5275

}

5276

5277

if (__kmp_affinity_verbose) {

5278

char buf[KMP_AFFIN_MASK_PRINT_LEN1024];

5279

__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN1024, mask);

5280

KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf
), __kmp_msg_null)

5281

__kmp_gettid(), tid, buf)__kmp_msg(kmp_ms_inform, __kmp_msg_format(kmp_i18n_msg_BoundToOSProcSet
, "KMP_AFFINITY", (kmp_int32)getpid(), syscall(186), tid, buf
), __kmp_msg_null);

5282

}

5283

__kmp_set_system_affinity(mask, TRUE)(mask)->set_system_affinity((!0));

5284

KMP_CPU_FREE_FROM_STACK(mask)__kmp_affinity_dispatch->deallocate_mask(mask);

5285

}

5286

}

5287

5288

#if KMP_OS_LINUX1

5289

// We don't need this entry for Windows because

5290

// there is GetProcessAffinityMask() api

5291

//

5292

// The intended usage is indicated by these steps:

5293

// 1) The user gets the current affinity mask

5294

// 2) Then sets the affinity by calling this function

5295

// 3) Error check the return value

5296

// 4) Use non-OpenMP parallelization

5297

// 5) Reset the affinity to what was stored in step 1)

5298

#ifdef __cplusplus201103L

5299

extern "C"

5300

#endif

5301

int

5302

kmp_set_thread_affinity_mask_initial()

5303

// the function returns 0 on success,

5304

// -1 if we cannot bind thread

5305

// >0 (errno) if an error happened during binding

5306

{

5307

int gtid = __kmp_get_gtid()__kmp_get_global_thread_id();

5308

if (gtid < 0) {

5309

// Do not touch non-omp threads

5310

KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"non-omp thread, returning\n"); }

5311

"non-omp thread, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"non-omp thread, returning\n"); };

5312

return -1;

5313

}

5314

if (!KMP_AFFINITY_CAPABLE()(__kmp_affin_mask_size > 0) || !__kmp_init_middle) {

5315

KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"affinity not initialized, returning\n"); }

5316

"affinity not initialized, returning\n"))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"affinity not initialized, returning\n"); };

5317

return -1;

5318

}

5319

KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"set full mask for thread %d\n", gtid); }

5320

"set full mask for thread %d\n",if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"set full mask for thread %d\n", gtid); }

5321

gtid))if (kmp_a_debug >= 30) { __kmp_debug_printf ("kmp_set_thread_affinity_mask_initial: "
"set full mask for thread %d\n", gtid); };

5322

KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL)((__kmp_affin_fullMask != __null) ? 0 : __kmp_debug_assert("__kmp_affin_fullMask != __null"
, "/build/llvm-toolchain-snapshot-7~svn338205/projects/openmp/runtime/src/kmp_affinity.cpp"
, 5322));

5323

return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE)(__kmp_affin_fullMask)->set_system_affinity(0);

5324

}

5325

#endif

5326

5327

#endif // KMP_AFFINITY_SUPPORTED

Bug Summary

Annotated Source Code

File:	projects/openmp/runtime/src/kmp_affinity.cpp
Warning:	line 1026, column 7 Value stored to 'maxThreadsPerPkg' is never read