LLVM  15.0.0git
Host.cpp
Go to the documentation of this file.
1 //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the operating system Host concept.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/Host.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/Config/llvm-config.h"
23 #include <string.h>
24 
25 // Include the platform-specific parts of this class.
26 #ifdef LLVM_ON_UNIX
27 #include "Unix/Host.inc"
28 #include <sched.h>
29 #endif
30 #ifdef _WIN32
31 #include "Windows/Host.inc"
32 #endif
33 #ifdef _MSC_VER
34 #include <intrin.h>
35 #endif
36 #ifdef __MVS__
37 #include "llvm/Support/BCD.h"
38 #endif
39 #if defined(__APPLE__)
40 #include <mach/host_info.h>
41 #include <mach/mach.h>
42 #include <mach/mach_host.h>
43 #include <mach/machine.h>
44 #include <sys/param.h>
45 #include <sys/sysctl.h>
46 #endif
47 #ifdef _AIX
48 #include <sys/systemcfg.h>
49 #endif
50 
51 #define DEBUG_TYPE "host-detection"
52 
53 //===----------------------------------------------------------------------===//
54 //
55 // Implementations of the CPU detection routines
56 //
57 //===----------------------------------------------------------------------===//
58 
59 using namespace llvm;
60 
61 static std::unique_ptr<llvm::MemoryBuffer>
64  llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
65  if (std::error_code EC = Text.getError()) {
66  llvm::errs() << "Can't read "
67  << "/proc/cpuinfo: " << EC.message() << "\n";
68  return nullptr;
69  }
70  return std::move(*Text);
71 }
72 
74  // Access to the Processor Version Register (PVR) on PowerPC is privileged,
75  // and so we must use an operating-system interface to determine the current
76  // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
77  const char *generic = "generic";
78 
79  // The cpu line is second (after the 'processor: 0' line), so if this
80  // buffer is too small then something has changed (or is wrong).
81  StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
82  StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
83 
84  StringRef::const_iterator CIP = CPUInfoStart;
85 
86  StringRef::const_iterator CPUStart = nullptr;
87  size_t CPULen = 0;
88 
89  // We need to find the first line which starts with cpu, spaces, and a colon.
90  // After the colon, there may be some additional spaces and then the cpu type.
91  while (CIP < CPUInfoEnd && CPUStart == nullptr) {
92  if (CIP < CPUInfoEnd && *CIP == '\n')
93  ++CIP;
94 
95  if (CIP < CPUInfoEnd && *CIP == 'c') {
96  ++CIP;
97  if (CIP < CPUInfoEnd && *CIP == 'p') {
98  ++CIP;
99  if (CIP < CPUInfoEnd && *CIP == 'u') {
100  ++CIP;
101  while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
102  ++CIP;
103 
104  if (CIP < CPUInfoEnd && *CIP == ':') {
105  ++CIP;
106  while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
107  ++CIP;
108 
109  if (CIP < CPUInfoEnd) {
110  CPUStart = CIP;
111  while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
112  *CIP != ',' && *CIP != '\n'))
113  ++CIP;
114  CPULen = CIP - CPUStart;
115  }
116  }
117  }
118  }
119  }
120 
121  if (CPUStart == nullptr)
122  while (CIP < CPUInfoEnd && *CIP != '\n')
123  ++CIP;
124  }
125 
126  if (CPUStart == nullptr)
127  return generic;
128 
129  return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
130  .Case("604e", "604e")
131  .Case("604", "604")
132  .Case("7400", "7400")
133  .Case("7410", "7400")
134  .Case("7447", "7400")
135  .Case("7455", "7450")
136  .Case("G4", "g4")
137  .Case("POWER4", "970")
138  .Case("PPC970FX", "970")
139  .Case("PPC970MP", "970")
140  .Case("G5", "g5")
141  .Case("POWER5", "g5")
142  .Case("A2", "a2")
143  .Case("POWER6", "pwr6")
144  .Case("POWER7", "pwr7")
145  .Case("POWER8", "pwr8")
146  .Case("POWER8E", "pwr8")
147  .Case("POWER8NVL", "pwr8")
148  .Case("POWER9", "pwr9")
149  .Case("POWER10", "pwr10")
150  // FIXME: If we get a simulator or machine with the capabilities of
151  // mcpu=future, we should revisit this and add the name reported by the
152  // simulator/machine.
153  .Default(generic);
154 }
155 
157  // The cpuid register on arm is not accessible from user space. On Linux,
158  // it is exposed through the /proc/cpuinfo file.
159 
160  // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
161  // in all cases.
163  ProcCpuinfoContent.split(Lines, "\n");
164 
165  // Look for the CPU implementer line.
166  StringRef Implementer;
167  StringRef Hardware;
168  StringRef Part;
169  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
170  if (Lines[I].startswith("CPU implementer"))
171  Implementer = Lines[I].substr(15).ltrim("\t :");
172  if (Lines[I].startswith("Hardware"))
173  Hardware = Lines[I].substr(8).ltrim("\t :");
174  if (Lines[I].startswith("CPU part"))
175  Part = Lines[I].substr(8).ltrim("\t :");
176  }
177 
178  if (Implementer == "0x41") { // ARM Ltd.
179  // MSM8992/8994 may give cpu part for the core that the kernel is running on,
180  // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
181  if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
182  return "cortex-a53";
183 
184 
185  // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
186  // values correspond to the "Part number" in the CP15/c0 register. The
187  // contents are specified in the various processor manuals.
188  // This corresponds to the Main ID Register in Technical Reference Manuals.
189  // and is used in programs like sys-utils
190  return StringSwitch<const char *>(Part)
191  .Case("0x926", "arm926ej-s")
192  .Case("0xb02", "mpcore")
193  .Case("0xb36", "arm1136j-s")
194  .Case("0xb56", "arm1156t2-s")
195  .Case("0xb76", "arm1176jz-s")
196  .Case("0xc08", "cortex-a8")
197  .Case("0xc09", "cortex-a9")
198  .Case("0xc0f", "cortex-a15")
199  .Case("0xc20", "cortex-m0")
200  .Case("0xc23", "cortex-m3")
201  .Case("0xc24", "cortex-m4")
202  .Case("0xd22", "cortex-m55")
203  .Case("0xd02", "cortex-a34")
204  .Case("0xd04", "cortex-a35")
205  .Case("0xd03", "cortex-a53")
206  .Case("0xd07", "cortex-a57")
207  .Case("0xd08", "cortex-a72")
208  .Case("0xd09", "cortex-a73")
209  .Case("0xd0a", "cortex-a75")
210  .Case("0xd0b", "cortex-a76")
211  .Case("0xd0d", "cortex-a77")
212  .Case("0xd41", "cortex-a78")
213  .Case("0xd44", "cortex-x1")
214  .Case("0xd4c", "cortex-x1c")
215  .Case("0xd0c", "neoverse-n1")
216  .Case("0xd49", "neoverse-n2")
217  .Case("0xd40", "neoverse-v1")
218  .Default("generic");
219  }
220 
221  if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
222  return StringSwitch<const char *>(Part)
223  .Case("0x516", "thunderx2t99")
224  .Case("0x0516", "thunderx2t99")
225  .Case("0xaf", "thunderx2t99")
226  .Case("0x0af", "thunderx2t99")
227  .Case("0xa1", "thunderxt88")
228  .Case("0x0a1", "thunderxt88")
229  .Default("generic");
230  }
231 
232  if (Implementer == "0x46") { // Fujitsu Ltd.
233  return StringSwitch<const char *>(Part)
234  .Case("0x001", "a64fx")
235  .Default("generic");
236  }
237 
238  if (Implementer == "0x4e") { // NVIDIA Corporation
239  return StringSwitch<const char *>(Part)
240  .Case("0x004", "carmel")
241  .Default("generic");
242  }
243 
244  if (Implementer == "0x48") // HiSilicon Technologies, Inc.
245  // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
246  // values correspond to the "Part number" in the CP15/c0 register. The
247  // contents are specified in the various processor manuals.
248  return StringSwitch<const char *>(Part)
249  .Case("0xd01", "tsv110")
250  .Default("generic");
251 
252  if (Implementer == "0x51") // Qualcomm Technologies, Inc.
253  // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
254  // values correspond to the "Part number" in the CP15/c0 register. The
255  // contents are specified in the various processor manuals.
256  return StringSwitch<const char *>(Part)
257  .Case("0x06f", "krait") // APQ8064
258  .Case("0x201", "kryo")
259  .Case("0x205", "kryo")
260  .Case("0x211", "kryo")
261  .Case("0x800", "cortex-a73") // Kryo 2xx Gold
262  .Case("0x801", "cortex-a73") // Kryo 2xx Silver
263  .Case("0x802", "cortex-a75") // Kryo 3xx Gold
264  .Case("0x803", "cortex-a75") // Kryo 3xx Silver
265  .Case("0x804", "cortex-a76") // Kryo 4xx Gold
266  .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
267  .Case("0xc00", "falkor")
268  .Case("0xc01", "saphira")
269  .Default("generic");
270  if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
271  // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
272  // any predictive pattern across variants and parts.
273  unsigned Variant = 0, Part = 0;
274 
275  // Look for the CPU variant line, whose value is a 1 digit hexadecimal
276  // number, corresponding to the Variant bits in the CP15/C0 register.
277  for (auto I : Lines)
278  if (I.consume_front("CPU variant"))
279  I.ltrim("\t :").getAsInteger(0, Variant);
280 
281  // Look for the CPU part line, whose value is a 3 digit hexadecimal
282  // number, corresponding to the PartNum bits in the CP15/C0 register.
283  for (auto I : Lines)
284  if (I.consume_front("CPU part"))
285  I.ltrim("\t :").getAsInteger(0, Part);
286 
287  unsigned Exynos = (Variant << 12) | Part;
288  switch (Exynos) {
289  default:
290  // Default by falling through to Exynos M3.
292  case 0x1002:
293  return "exynos-m3";
294  case 0x1003:
295  return "exynos-m4";
296  }
297  }
298 
299  if (Implementer == "0xc0") { // Ampere Computing
300  return StringSwitch<const char *>(Part)
301  .Case("0xac3", "ampere1")
302  .Default("generic");
303  }
304 
305  return "generic";
306 }
307 
308 namespace {
309 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
310  switch (Id) {
311  case 2064: // z900 not supported by LLVM
312  case 2066:
313  case 2084: // z990 not supported by LLVM
314  case 2086:
315  case 2094: // z9-109 not supported by LLVM
316  case 2096:
317  return "generic";
318  case 2097:
319  case 2098:
320  return "z10";
321  case 2817:
322  case 2818:
323  return "z196";
324  case 2827:
325  case 2828:
326  return "zEC12";
327  case 2964:
328  case 2965:
329  return HaveVectorSupport? "z13" : "zEC12";
330  case 3906:
331  case 3907:
332  return HaveVectorSupport? "z14" : "zEC12";
333  case 8561:
334  case 8562:
335  return HaveVectorSupport? "z15" : "zEC12";
336  case 3931:
337  case 3932:
338  default:
339  return HaveVectorSupport? "z16" : "zEC12";
340  }
341 }
342 } // end anonymous namespace
343 
345  // STIDP is a privileged operation, so use /proc/cpuinfo instead.
346 
347  // The "processor 0:" line comes after a fair amount of other information,
348  // including a cache breakdown, but this should be plenty.
350  ProcCpuinfoContent.split(Lines, "\n");
351 
352  // Look for the CPU features.
353  SmallVector<StringRef, 32> CPUFeatures;
354  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
355  if (Lines[I].startswith("features")) {
356  size_t Pos = Lines[I].find(':');
357  if (Pos != StringRef::npos) {
358  Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
359  break;
360  }
361  }
362 
363  // We need to check for the presence of vector support independently of
364  // the machine type, since we may only use the vector register set when
365  // supported by the kernel (and hypervisor).
366  bool HaveVectorSupport = false;
367  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
368  if (CPUFeatures[I] == "vx")
369  HaveVectorSupport = true;
370  }
371 
372  // Now check the processor machine type.
373  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
374  if (Lines[I].startswith("processor ")) {
375  size_t Pos = Lines[I].find("machine = ");
376  if (Pos != StringRef::npos) {
377  Pos += sizeof("machine = ") - 1;
378  unsigned int Id;
379  if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
380  return getCPUNameFromS390Model(Id, HaveVectorSupport);
381  }
382  break;
383  }
384  }
385 
386  return "generic";
387 }
388 
390  // There are 24 lines in /proc/cpuinfo
392  ProcCpuinfoContent.split(Lines, "\n");
393 
394  // Look for uarch line to determine cpu name
395  StringRef UArch;
396  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
397  if (Lines[I].startswith("uarch")) {
398  UArch = Lines[I].substr(5).ltrim("\t :");
399  break;
400  }
401  }
402 
403  return StringSwitch<const char *>(UArch)
404  .Case("sifive,u74-mc", "sifive-u74")
405  .Case("sifive,bullet0", "sifive-u74")
406  .Default("generic");
407 }
408 
410 #if !defined(__linux__) || !defined(__x86_64__)
411  return "generic";
412 #else
413  uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
414  /* BPF_MOV64_IMM(BPF_REG_0, 0) */
415  { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
416  /* BPF_MOV64_IMM(BPF_REG_2, 1) */
417  0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
418  /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
419  0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
420  /* BPF_MOV64_IMM(BPF_REG_0, 1) */
421  0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
422  /* BPF_EXIT_INSN() */
423  0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
424 
425  uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
426  /* BPF_MOV64_IMM(BPF_REG_0, 0) */
427  { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
428  /* BPF_MOV64_IMM(BPF_REG_2, 1) */
429  0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
430  /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
431  0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
432  /* BPF_MOV64_IMM(BPF_REG_0, 1) */
433  0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
434  /* BPF_EXIT_INSN() */
435  0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
436 
437  struct bpf_prog_load_attr {
438  uint32_t prog_type;
439  uint32_t insn_cnt;
440  uint64_t insns;
441  uint64_t license;
442  uint32_t log_level;
443  uint32_t log_size;
444  uint64_t log_buf;
445  uint32_t kern_version;
446  uint32_t prog_flags;
447  } attr = {};
448  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
449  attr.insn_cnt = 5;
450  attr.insns = (uint64_t)v3_insns;
451  attr.license = (uint64_t)"DUMMY";
452 
453  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
454  sizeof(attr));
455  if (fd >= 0) {
456  close(fd);
457  return "v3";
458  }
459 
460  /* Clear the whole attr in case its content changed by syscall. */
461  memset(&attr, 0, sizeof(attr));
462  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
463  attr.insn_cnt = 5;
464  attr.insns = (uint64_t)v2_insns;
465  attr.license = (uint64_t)"DUMMY";
466  fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
467  if (fd >= 0) {
468  close(fd);
469  return "v2";
470  }
471  return "v1";
472 #endif
473 }
474 
475 #if defined(__i386__) || defined(_M_IX86) || \
476  defined(__x86_64__) || defined(_M_X64)
477 
478 // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
479 // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
480 // support. Consequently, for i386, the presence of CPUID is checked first
481 // via the corresponding eflags bit.
482 // Removal of cpuid.h header motivated by PR30384
483 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
484 // or test-suite, but are used in external projects e.g. libstdcxx
485 static bool isCpuIdSupported() {
486 #if defined(__GNUC__) || defined(__clang__)
487 #if defined(__i386__)
488  int __cpuid_supported;
489  __asm__(" pushfl\n"
490  " popl %%eax\n"
491  " movl %%eax,%%ecx\n"
492  " xorl $0x00200000,%%eax\n"
493  " pushl %%eax\n"
494  " popfl\n"
495  " pushfl\n"
496  " popl %%eax\n"
497  " movl $0,%0\n"
498  " cmpl %%eax,%%ecx\n"
499  " je 1f\n"
500  " movl $1,%0\n"
501  "1:"
502  : "=r"(__cpuid_supported)
503  :
504  : "eax", "ecx");
505  if (!__cpuid_supported)
506  return false;
507 #endif
508  return true;
509 #endif
510  return true;
511 }
512 
513 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
514 /// the specified arguments. If we can't run cpuid on the host, return true.
515 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
516  unsigned *rECX, unsigned *rEDX) {
517 #if defined(__GNUC__) || defined(__clang__)
518 #if defined(__x86_64__)
519  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
520  // FIXME: should we save this for Clang?
521  __asm__("movq\t%%rbx, %%rsi\n\t"
522  "cpuid\n\t"
523  "xchgq\t%%rbx, %%rsi\n\t"
524  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
525  : "a"(value));
526  return false;
527 #elif defined(__i386__)
528  __asm__("movl\t%%ebx, %%esi\n\t"
529  "cpuid\n\t"
530  "xchgl\t%%ebx, %%esi\n\t"
531  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
532  : "a"(value));
533  return false;
534 #else
535  return true;
536 #endif
537 #elif defined(_MSC_VER)
538  // The MSVC intrinsic is portable across x86 and x64.
539  int registers[4];
540  __cpuid(registers, value);
541  *rEAX = registers[0];
542  *rEBX = registers[1];
543  *rECX = registers[2];
544  *rEDX = registers[3];
545  return false;
546 #else
547  return true;
548 #endif
549 }
550 
551 namespace llvm {
552 namespace sys {
553 namespace detail {
554 namespace x86 {
555 
556 VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
557  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
558  if (MaxLeaf == nullptr)
559  MaxLeaf = &EAX;
560  else
561  *MaxLeaf = 0;
562 
563  if (!isCpuIdSupported())
565 
566  if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
568 
569  // "Genu ineI ntel"
570  if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
571  return VendorSignatures::GENUINE_INTEL;
572 
573  // "Auth enti cAMD"
574  if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
575  return VendorSignatures::AUTHENTIC_AMD;
576 
578 }
579 
580 } // namespace x86
581 } // namespace detail
582 } // namespace sys
583 } // namespace llvm
584 
585 using namespace llvm::sys::detail::x86;
586 
587 /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
588 /// the 4 values in the specified arguments. If we can't run cpuid on the host,
589 /// return true.
590 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
591  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
592  unsigned *rEDX) {
593 #if defined(__GNUC__) || defined(__clang__)
594 #if defined(__x86_64__)
595  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
596  // FIXME: should we save this for Clang?
597  __asm__("movq\t%%rbx, %%rsi\n\t"
598  "cpuid\n\t"
599  "xchgq\t%%rbx, %%rsi\n\t"
600  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
601  : "a"(value), "c"(subleaf));
602  return false;
603 #elif defined(__i386__)
604  __asm__("movl\t%%ebx, %%esi\n\t"
605  "cpuid\n\t"
606  "xchgl\t%%ebx, %%esi\n\t"
607  : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
608  : "a"(value), "c"(subleaf));
609  return false;
610 #else
611  return true;
612 #endif
613 #elif defined(_MSC_VER)
614  int registers[4];
615  __cpuidex(registers, value, subleaf);
616  *rEAX = registers[0];
617  *rEBX = registers[1];
618  *rECX = registers[2];
619  *rEDX = registers[3];
620  return false;
621 #else
622  return true;
623 #endif
624 }
625 
626 // Read control register 0 (XCR0). Used to detect features such as AVX.
627 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
628 #if defined(__GNUC__) || defined(__clang__)
629  // Check xgetbv; this uses a .byte sequence instead of the instruction
630  // directly because older assemblers do not include support for xgetbv and
631  // there is no easy way to conditionally compile based on the assembler used.
632  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
633  return false;
634 #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
635  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
636  *rEAX = Result;
637  *rEDX = Result >> 32;
638  return false;
639 #else
640  return true;
641 #endif
642 }
643 
644 static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
645  unsigned *Model) {
646  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
647  *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
648  if (*Family == 6 || *Family == 0xf) {
649  if (*Family == 0xf)
650  // Examine extended family ID if family ID is F.
651  *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
652  // Examine extended model ID if family ID is 6 or F.
653  *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
654  }
655 }
656 
657 static StringRef
658 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
659  const unsigned *Features,
660  unsigned *Type, unsigned *Subtype) {
661  auto testFeature = [&](unsigned F) {
662  return (Features[F / 32] & (1U << (F % 32))) != 0;
663  };
664 
665  StringRef CPU;
666 
667  switch (Family) {
668  case 3:
669  CPU = "i386";
670  break;
671  case 4:
672  CPU = "i486";
673  break;
674  case 5:
675  if (testFeature(X86::FEATURE_MMX)) {
676  CPU = "pentium-mmx";
677  break;
678  }
679  CPU = "pentium";
680  break;
681  case 6:
682  switch (Model) {
683  case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
684  // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
685  // mobile processor, Intel Core 2 Extreme processor, Intel
686  // Pentium Dual-Core processor, Intel Xeon processor, model
687  // 0Fh. All processors are manufactured using the 65 nm process.
688  case 0x16: // Intel Celeron processor model 16h. All processors are
689  // manufactured using the 65 nm process
690  CPU = "core2";
691  *Type = X86::INTEL_CORE2;
692  break;
693  case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
694  // 17h. All processors are manufactured using the 45 nm process.
695  //
696  // 45nm: Penryn , Wolfdale, Yorkfield (XE)
697  case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
698  // the 45 nm process.
699  CPU = "penryn";
700  *Type = X86::INTEL_CORE2;
701  break;
702  case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
703  // processors are manufactured using the 45 nm process.
704  case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
705  // As found in a Summer 2010 model iMac.
706  case 0x1f:
707  case 0x2e: // Nehalem EX
708  CPU = "nehalem";
709  *Type = X86::INTEL_COREI7;
710  *Subtype = X86::INTEL_COREI7_NEHALEM;
711  break;
712  case 0x25: // Intel Core i7, laptop version.
713  case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
714  // processors are manufactured using the 32 nm process.
715  case 0x2f: // Westmere EX
716  CPU = "westmere";
717  *Type = X86::INTEL_COREI7;
718  *Subtype = X86::INTEL_COREI7_WESTMERE;
719  break;
720  case 0x2a: // Intel Core i7 processor. All processors are manufactured
721  // using the 32 nm process.
722  case 0x2d:
723  CPU = "sandybridge";
724  *Type = X86::INTEL_COREI7;
725  *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
726  break;
727  case 0x3a:
728  case 0x3e: // Ivy Bridge EP
729  CPU = "ivybridge";
730  *Type = X86::INTEL_COREI7;
731  *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
732  break;
733 
734  // Haswell:
735  case 0x3c:
736  case 0x3f:
737  case 0x45:
738  case 0x46:
739  CPU = "haswell";
740  *Type = X86::INTEL_COREI7;
741  *Subtype = X86::INTEL_COREI7_HASWELL;
742  break;
743 
744  // Broadwell:
745  case 0x3d:
746  case 0x47:
747  case 0x4f:
748  case 0x56:
749  CPU = "broadwell";
750  *Type = X86::INTEL_COREI7;
751  *Subtype = X86::INTEL_COREI7_BROADWELL;
752  break;
753 
754  // Skylake:
755  case 0x4e: // Skylake mobile
756  case 0x5e: // Skylake desktop
757  case 0x8e: // Kaby Lake mobile
758  case 0x9e: // Kaby Lake desktop
759  case 0xa5: // Comet Lake-H/S
760  case 0xa6: // Comet Lake-U
761  CPU = "skylake";
762  *Type = X86::INTEL_COREI7;
763  *Subtype = X86::INTEL_COREI7_SKYLAKE;
764  break;
765 
766  // Rocketlake:
767  case 0xa7:
768  CPU = "rocketlake";
769  *Type = X86::INTEL_COREI7;
770  *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
771  break;
772 
773  // Skylake Xeon:
774  case 0x55:
775  *Type = X86::INTEL_COREI7;
776  if (testFeature(X86::FEATURE_AVX512BF16)) {
777  CPU = "cooperlake";
778  *Subtype = X86::INTEL_COREI7_COOPERLAKE;
779  } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
780  CPU = "cascadelake";
781  *Subtype = X86::INTEL_COREI7_CASCADELAKE;
782  } else {
783  CPU = "skylake-avx512";
784  *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
785  }
786  break;
787 
788  // Cannonlake:
789  case 0x66:
790  CPU = "cannonlake";
791  *Type = X86::INTEL_COREI7;
792  *Subtype = X86::INTEL_COREI7_CANNONLAKE;
793  break;
794 
795  // Icelake:
796  case 0x7d:
797  case 0x7e:
798  CPU = "icelake-client";
799  *Type = X86::INTEL_COREI7;
800  *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
801  break;
802 
803  // Tigerlake:
804  case 0x8c:
805  case 0x8d:
806  CPU = "tigerlake";
807  *Type = X86::INTEL_COREI7;
808  *Subtype = X86::INTEL_COREI7_TIGERLAKE;
809  break;
810 
811  // Alderlake:
812  case 0x97:
813  case 0x9a:
814  CPU = "alderlake";
815  *Type = X86::INTEL_COREI7;
816  *Subtype = X86::INTEL_COREI7_ALDERLAKE;
817  break;
818 
819  // Icelake Xeon:
820  case 0x6a:
821  case 0x6c:
822  CPU = "icelake-server";
823  *Type = X86::INTEL_COREI7;
824  *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
825  break;
826 
827  // Sapphire Rapids:
828  case 0x8f:
829  CPU = "sapphirerapids";
830  *Type = X86::INTEL_COREI7;
831  *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
832  break;
833 
834  case 0x1c: // Most 45 nm Intel Atom processors
835  case 0x26: // 45 nm Atom Lincroft
836  case 0x27: // 32 nm Atom Medfield
837  case 0x35: // 32 nm Atom Midview
838  case 0x36: // 32 nm Atom Midview
839  CPU = "bonnell";
840  *Type = X86::INTEL_BONNELL;
841  break;
842 
843  // Atom Silvermont codes from the Intel software optimization guide.
844  case 0x37:
845  case 0x4a:
846  case 0x4d:
847  case 0x5a:
848  case 0x5d:
849  case 0x4c: // really airmont
850  CPU = "silvermont";
851  *Type = X86::INTEL_SILVERMONT;
852  break;
853  // Goldmont:
854  case 0x5c: // Apollo Lake
855  case 0x5f: // Denverton
856  CPU = "goldmont";
857  *Type = X86::INTEL_GOLDMONT;
858  break;
859  case 0x7a:
860  CPU = "goldmont-plus";
861  *Type = X86::INTEL_GOLDMONT_PLUS;
862  break;
863  case 0x86:
864  CPU = "tremont";
865  *Type = X86::INTEL_TREMONT;
866  break;
867 
868  // Xeon Phi (Knights Landing + Knights Mill):
869  case 0x57:
870  CPU = "knl";
871  *Type = X86::INTEL_KNL;
872  break;
873  case 0x85:
874  CPU = "knm";
875  *Type = X86::INTEL_KNM;
876  break;
877 
878  default: // Unknown family 6 CPU, try to guess.
879  // Don't both with Type/Subtype here, they aren't used by the caller.
880  // They're used above to keep the code in sync with compiler-rt.
881  // TODO detect tigerlake host from model
882  if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
883  CPU = "tigerlake";
884  } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
885  CPU = "icelake-client";
886  } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
887  CPU = "cannonlake";
888  } else if (testFeature(X86::FEATURE_AVX512BF16)) {
889  CPU = "cooperlake";
890  } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
891  CPU = "cascadelake";
892  } else if (testFeature(X86::FEATURE_AVX512VL)) {
893  CPU = "skylake-avx512";
894  } else if (testFeature(X86::FEATURE_AVX512ER)) {
895  CPU = "knl";
896  } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
897  if (testFeature(X86::FEATURE_SHA))
898  CPU = "goldmont";
899  else
900  CPU = "skylake";
901  } else if (testFeature(X86::FEATURE_ADX)) {
902  CPU = "broadwell";
903  } else if (testFeature(X86::FEATURE_AVX2)) {
904  CPU = "haswell";
905  } else if (testFeature(X86::FEATURE_AVX)) {
906  CPU = "sandybridge";
907  } else if (testFeature(X86::FEATURE_SSE4_2)) {
908  if (testFeature(X86::FEATURE_MOVBE))
909  CPU = "silvermont";
910  else
911  CPU = "nehalem";
912  } else if (testFeature(X86::FEATURE_SSE4_1)) {
913  CPU = "penryn";
914  } else if (testFeature(X86::FEATURE_SSSE3)) {
915  if (testFeature(X86::FEATURE_MOVBE))
916  CPU = "bonnell";
917  else
918  CPU = "core2";
919  } else if (testFeature(X86::FEATURE_64BIT)) {
920  CPU = "core2";
921  } else if (testFeature(X86::FEATURE_SSE3)) {
922  CPU = "yonah";
923  } else if (testFeature(X86::FEATURE_SSE2)) {
924  CPU = "pentium-m";
925  } else if (testFeature(X86::FEATURE_SSE)) {
926  CPU = "pentium3";
927  } else if (testFeature(X86::FEATURE_MMX)) {
928  CPU = "pentium2";
929  } else {
930  CPU = "pentiumpro";
931  }
932  break;
933  }
934  break;
935  case 15: {
936  if (testFeature(X86::FEATURE_64BIT)) {
937  CPU = "nocona";
938  break;
939  }
940  if (testFeature(X86::FEATURE_SSE3)) {
941  CPU = "prescott";
942  break;
943  }
944  CPU = "pentium4";
945  break;
946  }
947  default:
948  break; // Unknown.
949  }
950 
951  return CPU;
952 }
953 
954 static StringRef
955 getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
956  const unsigned *Features,
957  unsigned *Type, unsigned *Subtype) {
958  auto testFeature = [&](unsigned F) {
959  return (Features[F / 32] & (1U << (F % 32))) != 0;
960  };
961 
962  StringRef CPU;
963 
964  switch (Family) {
965  case 4:
966  CPU = "i486";
967  break;
968  case 5:
969  CPU = "pentium";
970  switch (Model) {
971  case 6:
972  case 7:
973  CPU = "k6";
974  break;
975  case 8:
976  CPU = "k6-2";
977  break;
978  case 9:
979  case 13:
980  CPU = "k6-3";
981  break;
982  case 10:
983  CPU = "geode";
984  break;
985  }
986  break;
987  case 6:
988  if (testFeature(X86::FEATURE_SSE)) {
989  CPU = "athlon-xp";
990  break;
991  }
992  CPU = "athlon";
993  break;
994  case 15:
995  if (testFeature(X86::FEATURE_SSE3)) {
996  CPU = "k8-sse3";
997  break;
998  }
999  CPU = "k8";
1000  break;
1001  case 16:
1002  CPU = "amdfam10";
1003  *Type = X86::AMDFAM10H; // "amdfam10"
1004  switch (Model) {
1005  case 2:
1006  *Subtype = X86::AMDFAM10H_BARCELONA;
1007  break;
1008  case 4:
1009  *Subtype = X86::AMDFAM10H_SHANGHAI;
1010  break;
1011  case 8:
1012  *Subtype = X86::AMDFAM10H_ISTANBUL;
1013  break;
1014  }
1015  break;
1016  case 20:
1017  CPU = "btver1";
1018  *Type = X86::AMD_BTVER1;
1019  break;
1020  case 21:
1021  CPU = "bdver1";
1022  *Type = X86::AMDFAM15H;
1023  if (Model >= 0x60 && Model <= 0x7f) {
1024  CPU = "bdver4";
1025  *Subtype = X86::AMDFAM15H_BDVER4;
1026  break; // 60h-7Fh: Excavator
1027  }
1028  if (Model >= 0x30 && Model <= 0x3f) {
1029  CPU = "bdver3";
1030  *Subtype = X86::AMDFAM15H_BDVER3;
1031  break; // 30h-3Fh: Steamroller
1032  }
1033  if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1034  CPU = "bdver2";
1035  *Subtype = X86::AMDFAM15H_BDVER2;
1036  break; // 02h, 10h-1Fh: Piledriver
1037  }
1038  if (Model <= 0x0f) {
1039  *Subtype = X86::AMDFAM15H_BDVER1;
1040  break; // 00h-0Fh: Bulldozer
1041  }
1042  break;
1043  case 22:
1044  CPU = "btver2";
1045  *Type = X86::AMD_BTVER2;
1046  break;
1047  case 23:
1048  CPU = "znver1";
1049  *Type = X86::AMDFAM17H;
1050  if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
1051  CPU = "znver2";
1052  *Subtype = X86::AMDFAM17H_ZNVER2;
1053  break; // 30h-3fh, 71h: Zen2
1054  }
1055  if (Model <= 0x0f) {
1056  *Subtype = X86::AMDFAM17H_ZNVER1;
1057  break; // 00h-0Fh: Zen1
1058  }
1059  break;
1060  case 25:
1061  CPU = "znver3";
1062  *Type = X86::AMDFAM19H;
1063  if (Model <= 0x0f || Model == 0x21) {
1064  *Subtype = X86::AMDFAM19H_ZNVER3;
1065  break; // 00h-0Fh, 21h: Zen3
1066  }
1067  break;
1068  default:
1069  break; // Unknown AMD CPU.
1070  }
1071 
1072  return CPU;
1073 }
1074 
1075 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1076  unsigned *Features) {
1077  unsigned EAX, EBX;
1078 
1079  auto setFeature = [&](unsigned F) {
1080  Features[F / 32] |= 1U << (F % 32);
1081  };
1082 
1083  if ((EDX >> 15) & 1)
1084  setFeature(X86::FEATURE_CMOV);
1085  if ((EDX >> 23) & 1)
1086  setFeature(X86::FEATURE_MMX);
1087  if ((EDX >> 25) & 1)
1088  setFeature(X86::FEATURE_SSE);
1089  if ((EDX >> 26) & 1)
1090  setFeature(X86::FEATURE_SSE2);
1091 
1092  if ((ECX >> 0) & 1)
1093  setFeature(X86::FEATURE_SSE3);
1094  if ((ECX >> 1) & 1)
1095  setFeature(X86::FEATURE_PCLMUL);
1096  if ((ECX >> 9) & 1)
1097  setFeature(X86::FEATURE_SSSE3);
1098  if ((ECX >> 12) & 1)
1099  setFeature(X86::FEATURE_FMA);
1100  if ((ECX >> 19) & 1)
1101  setFeature(X86::FEATURE_SSE4_1);
1102  if ((ECX >> 20) & 1) {
1103  setFeature(X86::FEATURE_SSE4_2);
1104  setFeature(X86::FEATURE_CRC32);
1105  }
1106  if ((ECX >> 23) & 1)
1107  setFeature(X86::FEATURE_POPCNT);
1108  if ((ECX >> 25) & 1)
1109  setFeature(X86::FEATURE_AES);
1110 
1111  if ((ECX >> 22) & 1)
1112  setFeature(X86::FEATURE_MOVBE);
1113 
1114  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1115  // indicates that the AVX registers will be saved and restored on context
1116  // switch, then we have full AVX support.
1117  const unsigned AVXBits = (1 << 27) | (1 << 28);
1118  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1119  ((EAX & 0x6) == 0x6);
1120 #if defined(__APPLE__)
1121  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1122  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1123  // set right now.
1124  bool HasAVX512Save = true;
1125 #else
1126  // AVX512 requires additional context to be saved by the OS.
1127  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1128 #endif
1129 
1130  if (HasAVX)
1131  setFeature(X86::FEATURE_AVX);
1132 
1133  bool HasLeaf7 =
1134  MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1135 
1136  if (HasLeaf7 && ((EBX >> 3) & 1))
1137  setFeature(X86::FEATURE_BMI);
1138  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1139  setFeature(X86::FEATURE_AVX2);
1140  if (HasLeaf7 && ((EBX >> 8) & 1))
1141  setFeature(X86::FEATURE_BMI2);
1142  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1143  setFeature(X86::FEATURE_AVX512F);
1144  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1145  setFeature(X86::FEATURE_AVX512DQ);
1146  if (HasLeaf7 && ((EBX >> 19) & 1))
1147  setFeature(X86::FEATURE_ADX);
1148  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1149  setFeature(X86::FEATURE_AVX512IFMA);
1150  if (HasLeaf7 && ((EBX >> 23) & 1))
1151  setFeature(X86::FEATURE_CLFLUSHOPT);
1152  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1153  setFeature(X86::FEATURE_AVX512PF);
1154  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1155  setFeature(X86::FEATURE_AVX512ER);
1156  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1157  setFeature(X86::FEATURE_AVX512CD);
1158  if (HasLeaf7 && ((EBX >> 29) & 1))
1159  setFeature(X86::FEATURE_SHA);
1160  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1161  setFeature(X86::FEATURE_AVX512BW);
1162  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1163  setFeature(X86::FEATURE_AVX512VL);
1164 
1165  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1166  setFeature(X86::FEATURE_AVX512VBMI);
1167  if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1168  setFeature(X86::FEATURE_AVX512VBMI2);
1169  if (HasLeaf7 && ((ECX >> 8) & 1))
1170  setFeature(X86::FEATURE_GFNI);
1171  if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1172  setFeature(X86::FEATURE_VPCLMULQDQ);
1173  if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1174  setFeature(X86::FEATURE_AVX512VNNI);
1175  if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1176  setFeature(X86::FEATURE_AVX512BITALG);
1177  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1178  setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1179 
1180  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1181  setFeature(X86::FEATURE_AVX5124VNNIW);
1182  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1183  setFeature(X86::FEATURE_AVX5124FMAPS);
1184  if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1185  setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1186 
1187  bool HasLeaf7Subleaf1 =
1188  MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1189  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1190  setFeature(X86::FEATURE_AVX512BF16);
1191 
1192  unsigned MaxExtLevel;
1193  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1194 
1195  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1196  !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1197  if (HasExtLeaf1 && ((ECX >> 6) & 1))
1198  setFeature(X86::FEATURE_SSE4_A);
1199  if (HasExtLeaf1 && ((ECX >> 11) & 1))
1200  setFeature(X86::FEATURE_XOP);
1201  if (HasExtLeaf1 && ((ECX >> 16) & 1))
1202  setFeature(X86::FEATURE_FMA4);
1203 
1204  if (HasExtLeaf1 && ((EDX >> 29) & 1))
1205  setFeature(X86::FEATURE_64BIT);
1206 }
1207 
1209  unsigned MaxLeaf = 0;
1210  const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1211  if (Vendor == VendorSignatures::UNKNOWN)
1212  return "generic";
1213 
1214  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1215  getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1216 
1217  unsigned Family = 0, Model = 0;
1218  unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1219  detectX86FamilyModel(EAX, &Family, &Model);
1220  getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1221 
1222  // These aren't consumed in this file, but we try to keep some source code the
1223  // same or similar to compiler-rt.
1224  unsigned Type = 0;
1225  unsigned Subtype = 0;
1226 
1227  StringRef CPU;
1228 
1229  if (Vendor == VendorSignatures::GENUINE_INTEL) {
1230  CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1231  &Subtype);
1232  } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1233  CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1234  &Subtype);
1235  }
1236 
1237  if (!CPU.empty())
1238  return CPU;
1239 
1240  return "generic";
1241 }
1242 
1243 #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
1245  host_basic_info_data_t hostInfo;
1246  mach_msg_type_number_t infoCount;
1247 
1248  infoCount = HOST_BASIC_INFO_COUNT;
1249  mach_port_t hostPort = mach_host_self();
1250  host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1251  &infoCount);
1252  mach_port_deallocate(mach_task_self(), hostPort);
1253 
1254  if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1255  return "generic";
1256 
1257  switch (hostInfo.cpu_subtype) {
1259  return "601";
1261  return "602";
1263  return "603";
1265  return "603e";
1267  return "603ev";
1269  return "604";
1271  return "604e";
1273  return "620";
1275  return "750";
1277  return "7400";
1279  return "7450";
1281  return "970";
1282  default:;
1283  }
1284 
1285  return "generic";
1286 }
1287 #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
1289  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1290  StringRef Content = P ? P->getBuffer() : "";
1292 }
1293 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1295  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1296  StringRef Content = P ? P->getBuffer() : "";
1298 }
1299 #elif defined(__linux__) && defined(__s390x__)
1301  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1302  StringRef Content = P ? P->getBuffer() : "";
1304 }
1305 #elif defined(__MVS__)
1307  // Get pointer to Communications Vector Table (CVT).
1308  // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1309  // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1310  int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1311  // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1312  // of address.
1313  int ReadValue = *StartToCVTOffset;
1314  // Explicitly clear the high order bit.
1315  ReadValue = (ReadValue & 0x7FFFFFFF);
1316  char *CVT = reinterpret_cast<char *>(ReadValue);
1317  // The model number is located in the CVT prefix at offset -6 and stored as
1318  // signless packed decimal.
1319  uint16_t Id = *(uint16_t *)&CVT[-6];
1320  // Convert number to integer.
1321  Id = decodePackedBCD<uint16_t>(Id, false);
1322  // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1323  // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1324  // extension can only be used if bit CVTVEF is on.
1325  bool HaveVectorSupport = CVT[244] & 0x80;
1326  return getCPUNameFromS390Model(Id, HaveVectorSupport);
1327 }
1328 #elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1329 #define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1330 #define CPUFAMILY_ARM_CYCLONE 0x37a09642
1331 #define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1332 #define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1333 #define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1334 #define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1335 #define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1336 #define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1337 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1338 
1340  uint32_t Family;
1341  size_t Length = sizeof(Family);
1342  sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1343 
1344  switch (Family) {
1345  case CPUFAMILY_ARM_SWIFT:
1346  return "swift";
1347  case CPUFAMILY_ARM_CYCLONE:
1348  return "apple-a7";
1349  case CPUFAMILY_ARM_TYPHOON:
1350  return "apple-a8";
1351  case CPUFAMILY_ARM_TWISTER:
1352  return "apple-a9";
1353  case CPUFAMILY_ARM_HURRICANE:
1354  return "apple-a10";
1355  case CPUFAMILY_ARM_MONSOON_MISTRAL:
1356  return "apple-a11";
1357  case CPUFAMILY_ARM_VORTEX_TEMPEST:
1358  return "apple-a12";
1359  case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1360  return "apple-a13";
1361  case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1362  return "apple-m1";
1363  default:
1364  // Default to the newest CPU we know about.
1365  return "apple-m1";
1366  }
1367 }
1368 #elif defined(_AIX)
1370  switch (_system_configuration.implementation) {
1371  case POWER_4:
1372  if (_system_configuration.version == PV_4_3)
1373  return "970";
1374  return "pwr4";
1375  case POWER_5:
1376  if (_system_configuration.version == PV_5)
1377  return "pwr5";
1378  return "pwr5x";
1379  case POWER_6:
1380  if (_system_configuration.version == PV_6_Compat)
1381  return "pwr6";
1382  return "pwr6x";
1383  case POWER_7:
1384  return "pwr7";
1385  case POWER_8:
1386  return "pwr8";
1387  case POWER_9:
1388  return "pwr9";
1389 // TODO: simplify this once the macro is available in all OS levels.
1390 #ifdef POWER_10
1391  case POWER_10:
1392 #else
1393  case 0x40000:
1394 #endif
1395  return "pwr10";
1396  default:
1397  return "generic";
1398  }
1399 }
1400 #elif defined(__riscv)
1402 #if defined(__linux__)
1403  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1404  StringRef Content = P ? P->getBuffer() : "";
1406 #else
1407 #if __riscv_xlen == 64
1408  return "generic-rv64";
1409 #elif __riscv_xlen == 32
1410  return "generic-rv32";
1411 #else
1412 #error "Unhandled value of __riscv_xlen"
1413 #endif
1414 #endif
1415 }
1416 #else
1417 StringRef sys::getHostCPUName() { return "generic"; }
1418 namespace llvm {
1419 namespace sys {
1420 namespace detail {
1421 namespace x86 {
1422 
1425 }
1426 
1427 } // namespace x86
1428 } // namespace detail
1429 } // namespace sys
1430 } // namespace llvm
1431 #endif
1432 
1433 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1434 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1435 // using the number of unique physical/core id pairs. The following
1436 // implementation reads the /proc/cpuinfo format on an x86_64 system.
1438  // Enabled represents the number of physical id/core id pairs with at least
1439  // one processor id enabled by the CPU affinity mask.
1440  cpu_set_t Affinity, Enabled;
1441  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1442  return -1;
1443  CPU_ZERO(&Enabled);
1444 
1445  // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1446  // mmapped because it appears to have 0 size.
1448  llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1449  if (std::error_code EC = Text.getError()) {
1450  llvm::errs() << "Can't read "
1451  << "/proc/cpuinfo: " << EC.message() << "\n";
1452  return -1;
1453  }
1455  (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1456  /*KeepEmpty=*/false);
1457  int CurProcessor = -1;
1458  int CurPhysicalId = -1;
1459  int CurSiblings = -1;
1460  int CurCoreId = -1;
1461  for (StringRef Line : strs) {
1462  std::pair<StringRef, StringRef> Data = Line.split(':');
1463  auto Name = Data.first.trim();
1464  auto Val = Data.second.trim();
1465  // These fields are available if the kernel is configured with CONFIG_SMP.
1466  if (Name == "processor")
1467  Val.getAsInteger(10, CurProcessor);
1468  else if (Name == "physical id")
1469  Val.getAsInteger(10, CurPhysicalId);
1470  else if (Name == "siblings")
1471  Val.getAsInteger(10, CurSiblings);
1472  else if (Name == "core id") {
1473  Val.getAsInteger(10, CurCoreId);
1474  // The processor id corresponds to an index into cpu_set_t.
1475  if (CPU_ISSET(CurProcessor, &Affinity))
1476  CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1477  }
1478  }
1479  return CPU_COUNT(&Enabled);
1480 }
1481 #elif defined(__linux__) && defined(__powerpc__)
1483  cpu_set_t Affinity;
1484  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
1485  return CPU_COUNT(&Affinity);
1486 
1487  // The call to sched_getaffinity() may have failed because the Affinity
1488  // mask is too small for the number of CPU's on the system (i.e. the
1489  // system has more than 1024 CPUs). Allocate a mask large enough for
1490  // twice as many CPUs.
1491  cpu_set_t *DynAffinity;
1492  DynAffinity = CPU_ALLOC(2048);
1493  if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
1494  int NumCPUs = CPU_COUNT(DynAffinity);
1495  CPU_FREE(DynAffinity);
1496  return NumCPUs;
1497  }
1498  return -1;
1499 }
1500 #elif defined(__linux__) && defined(__s390x__)
1501 int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); }
1502 #elif defined(__APPLE__)
1503 // Gets the number of *physical cores* on the machine.
1505  uint32_t count;
1506  size_t len = sizeof(count);
1507  sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1508  if (count < 1) {
1509  int nm[2];
1510  nm[0] = CTL_HW;
1511  nm[1] = HW_AVAILCPU;
1512  sysctl(nm, 2, &count, &len, NULL, 0);
1513  if (count < 1)
1514  return -1;
1515  }
1516  return count;
1517 }
1518 #elif defined(__MVS__)
1520  enum {
1521  // Byte offset of the pointer to the Communications Vector Table (CVT) in
1522  // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
1523  // will be zero-extended to uintptr_t.
1524  FLCCVT = 16,
1525  // Byte offset of the pointer to the Common System Data Area (CSD) in the
1526  // CVT. The table entry is a 31-bit pointer and will be zero-extended to
1527  // uintptr_t.
1528  CVTCSD = 660,
1529  // Byte offset to the number of live CPs in the LPAR, stored as a signed
1530  // 32-bit value in the table.
1531  CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
1532  };
1533  char *PSA = 0;
1534  char *CVT = reinterpret_cast<char *>(
1535  static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
1536  char *CSD = reinterpret_cast<char *>(
1537  static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
1538  return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
1539 }
1540 #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1541 // Defined in llvm/lib/Support/Windows/Threading.inc
1543 #else
1544 // On other systems, return -1 to indicate unknown.
1545 static int computeHostNumPhysicalCores() { return -1; }
1546 #endif
1547 
1549  static int NumCores = computeHostNumPhysicalCores();
1550  return NumCores;
1551 }
1552 
1553 #if defined(__i386__) || defined(_M_IX86) || \
1554  defined(__x86_64__) || defined(_M_X64)
1555 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1556  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1557  unsigned MaxLevel;
1558 
1559  if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1560  return false;
1561 
1562  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1563 
1564  Features["cx8"] = (EDX >> 8) & 1;
1565  Features["cmov"] = (EDX >> 15) & 1;
1566  Features["mmx"] = (EDX >> 23) & 1;
1567  Features["fxsr"] = (EDX >> 24) & 1;
1568  Features["sse"] = (EDX >> 25) & 1;
1569  Features["sse2"] = (EDX >> 26) & 1;
1570 
1571  Features["sse3"] = (ECX >> 0) & 1;
1572  Features["pclmul"] = (ECX >> 1) & 1;
1573  Features["ssse3"] = (ECX >> 9) & 1;
1574  Features["cx16"] = (ECX >> 13) & 1;
1575  Features["sse4.1"] = (ECX >> 19) & 1;
1576  Features["sse4.2"] = (ECX >> 20) & 1;
1577  Features["crc32"] = Features["sse4.2"];
1578  Features["movbe"] = (ECX >> 22) & 1;
1579  Features["popcnt"] = (ECX >> 23) & 1;
1580  Features["aes"] = (ECX >> 25) & 1;
1581  Features["rdrnd"] = (ECX >> 30) & 1;
1582 
1583  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1584  // indicates that the AVX registers will be saved and restored on context
1585  // switch, then we have full AVX support.
1586  bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1587  bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1588 #if defined(__APPLE__)
1589  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1590  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1591  // set right now.
1592  bool HasAVX512Save = true;
1593 #else
1594  // AVX512 requires additional context to be saved by the OS.
1595  bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1596 #endif
1597  // AMX requires additional context to be saved by the OS.
1598  const unsigned AMXBits = (1 << 17) | (1 << 18);
1599  bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1600 
1601  Features["avx"] = HasAVXSave;
1602  Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave;
1603  // Only enable XSAVE if OS has enabled support for saving YMM state.
1604  Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1605  Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave;
1606 
1607  unsigned MaxExtLevel;
1608  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1609 
1610  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1611  !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1612  Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1);
1613  Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1);
1614  Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1);
1615  Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1);
1616  Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1617  Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1);
1618  Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1619  Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
1620  Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1621 
1622  Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1);
1623 
1624  // Miscellaneous memory related features, detected by
1625  // using the 0x80000008 leaf of the CPUID instruction
1626  bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1627  !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1628  Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
1629  Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1630 
1631  bool HasLeaf7 =
1632  MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1633 
1634  Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
1635  Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
1636  Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
1637  // AVX2 is only supported if we have the OS save support from AVX.
1638  Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave;
1639  Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
1640  Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
1641  Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
1642  // AVX512 is only supported if the OS supports the context save for it.
1643  Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1644  Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1645  Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
1646  Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
1647  Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1648  Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1649  Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
1650  Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1651  Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1652  Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1653  Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
1654  Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1655  Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1656 
1657  Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
1658  Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
1659  Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
1660  Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
1661  Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
1662  Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1);
1663  Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1);
1664  Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
1665  Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1666  Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1667  Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1668  Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1669  Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1);
1670  Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1671  Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1);
1672  Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1);
1673  Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1);
1674  Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1);
1675 
1676  Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1);
1677  Features["avx512vp2intersect"] =
1678  HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1679  Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1);
1680  Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1);
1681  // There are two CPUID leafs which information associated with the pconfig
1682  // instruction:
1683  // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1684  // bit of EDX), while the EAX=0x1b leaf returns information on the
1685  // availability of specific pconfig leafs.
1686  // The target feature here only refers to the the first of these two.
1687  // Users might need to check for the availability of specific pconfig
1688  // leaves using cpuid, since that information is ignored while
1689  // detecting features using the "-march=native" flag.
1690  // For more info, see X86 ISA docs.
1691  Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1692  Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1693  Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1694  Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1695  Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1696  bool HasLeaf7Subleaf1 =
1697  MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1698  Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1699  Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1700  Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1701 
1702  bool HasLeafD = MaxLevel >= 0xd &&
1703  !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1704 
1705  // Only enable XSAVE if OS has enabled support for saving YMM state.
1706  Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1707  Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1708  Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1709 
1710  bool HasLeaf14 = MaxLevel >= 0x14 &&
1711  !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1712 
1713  Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1714 
1715  bool HasLeaf19 =
1716  MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1717  Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1718 
1719  return true;
1720 }
1721 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1722 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1723  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1724  if (!P)
1725  return false;
1726 
1728  P->getBuffer().split(Lines, "\n");
1729 
1730  SmallVector<StringRef, 32> CPUFeatures;
1731 
1732  // Look for the CPU features.
1733  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1734  if (Lines[I].startswith("Features")) {
1735  Lines[I].split(CPUFeatures, ' ');
1736  break;
1737  }
1738 
1739 #if defined(__aarch64__)
1740  // Keep track of which crypto features we have seen
1741  enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1742  uint32_t crypto = 0;
1743 #endif
1744 
1745  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1746  StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1747 #if defined(__aarch64__)
1748  .Case("asimd", "neon")
1749  .Case("fp", "fp-armv8")
1750  .Case("crc32", "crc")
1751  .Case("atomics", "lse")
1752  .Case("sve", "sve")
1753  .Case("sve2", "sve2")
1754 #else
1755  .Case("half", "fp16")
1756  .Case("neon", "neon")
1757  .Case("vfpv3", "vfp3")
1758  .Case("vfpv3d16", "d16")
1759  .Case("vfpv4", "vfp4")
1760  .Case("idiva", "hwdiv-arm")
1761  .Case("idivt", "hwdiv")
1762 #endif
1763  .Default("");
1764 
1765 #if defined(__aarch64__)
1766  // We need to check crypto separately since we need all of the crypto
1767  // extensions to enable the subtarget feature
1768  if (CPUFeatures[I] == "aes")
1769  crypto |= CAP_AES;
1770  else if (CPUFeatures[I] == "pmull")
1771  crypto |= CAP_PMULL;
1772  else if (CPUFeatures[I] == "sha1")
1773  crypto |= CAP_SHA1;
1774  else if (CPUFeatures[I] == "sha2")
1775  crypto |= CAP_SHA2;
1776 #endif
1777 
1778  if (LLVMFeatureStr != "")
1779  Features[LLVMFeatureStr] = true;
1780  }
1781 
1782 #if defined(__aarch64__)
1783  // If we have all crypto bits we can add the feature
1784  if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1785  Features["crypto"] = true;
1786 #endif
1787 
1788  return true;
1789 }
1790 #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1791 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1792  if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1793  Features["neon"] = true;
1794  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1795  Features["crc"] = true;
1796  if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1797  Features["crypto"] = true;
1798 
1799  return true;
1800 }
1801 #else
1802 bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1803 #endif
1804 
1805 std::string sys::getProcessTriple() {
1806  std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1807  Triple PT(Triple::normalize(TargetTripleString));
1808 
1809  if (sizeof(void *) == 8 && PT.isArch32Bit())
1810  PT = PT.get64BitArchVariant();
1811  if (sizeof(void *) == 4 && PT.isArch64Bit())
1812  PT = PT.get32BitArchVariant();
1813 
1814  return PT.str();
1815 }
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
MemoryBuffer.h
llvm::N86::EBX
@ EBX
Definition: X86MCTargetDesc.h:51
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::StringRef::endswith
LLVM_NODISCARD bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:301
llvm::sys::detail::x86::VendorSignatures::GENUINE_INTEL
@ GENUINE_INTEL
StringRef.h
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::StringSwitch::Default
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
llvm::MachO::CPU_SUBTYPE_POWERPC_603ev
@ CPU_SUBTYPE_POWERPC_603ev
Definition: MachO.h:1535
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
Host.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Content
T Content
Definition: ELFObjHandler.cpp:88
llvm::Triple::get32BitArchVariant
llvm::Triple get32BitArchVariant() const
Form a triple with a 32-bit variant of the current architecture.
Definition: Triple.cpp:1456
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Host.inc
startswith
static bool startswith(StringRef Magic, const char(&S)[N])
Definition: Magic.cpp:28
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::sys::getHostCPUFeatures
bool getHostCPUFeatures(StringMap< bool, MallocAllocator > &Features)
getHostCPUFeatures - Get the LLVM names for the host CPU features.
llvm::sys::detail::x86::VendorSignatures::UNKNOWN
@ UNKNOWN
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::MachO::CPU_SUBTYPE_POWERPC_7450
@ CPU_SUBTYPE_POWERPC_7450
Definition: MachO.h:1541
llvm::N86::ECX
@ ECX
Definition: X86MCTargetDesc.h:51
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:182
llvm::MachO::CPU_SUBTYPE_POWERPC_603
@ CPU_SUBTYPE_POWERPC_603
Definition: MachO.h:1533
x86
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On x86
Definition: README.txt:318
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::N86::EAX
@ EAX
Definition: X86MCTargetDesc.h:51
llvm::MachO::CPU_SUBTYPE_POWERPC_604e
@ CPU_SUBTYPE_POWERPC_604e
Definition: MachO.h:1537
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1444
llvm::MemoryBuffer::getFileAsStream
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileAsStream(const Twine &Filename)
Read all of the specified file into a MemoryBuffer as a stream (i.e.
Definition: MemoryBuffer.cpp:523
llvm::Reloc::Model
Model
Definition: CodeGen.h:22
llvm::sys::detail::x86
Helper functions to extract CPU details from CPUID on x86.
Definition: Host.h:71
llvm::Triple::get64BitArchVariant
llvm::Triple get64BitArchVariant() const
Form a triple with a 64-bit variant of the current architecture.
Definition: Triple.cpp:1535
llvm::sys::detail::x86::VendorSignatures::AUTHENTIC_AMD
@ AUTHENTIC_AMD
llvm::Triple::str
const std::string & str() const
Definition: Triple.h:404
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::X86::CPU_FEATURE_MAX
@ CPU_FEATURE_MAX
Definition: X86TargetParser.h:60
llvm::StringRef::split
LLVM_NODISCARD std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:753
llvm::MachO::CPU_SUBTYPE_POWERPC_602
@ CPU_SUBTYPE_POWERPC_602
Definition: MachO.h:1532
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::MachO::CPU_TYPE_POWERPC
@ CPU_TYPE_POWERPC
Definition: MachO.h:1452
llvm::ErrorOr::getError
std::error_code getError() const
Definition: ErrorOr.h:153
BCD.h
llvm::N86::EDX
@ EDX
Definition: X86MCTargetDesc.h:51
computeHostNumPhysicalCores
static int computeHostNumPhysicalCores()
Definition: Host.cpp:1545
StringMap.h
llvm::StringMap< bool >
getProcCpuinfoContent
static std::unique_ptr< llvm::MemoryBuffer > LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent()
Definition: Host.cpp:62
X86TargetParser.h
llvm::AMDGPU::FEATURE_FMA
@ FEATURE_FMA
Definition: TargetParser.h:125
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1709
uint64_t
llvm::StringRef::end
iterator end() const
Definition: StringRef.h:130
llvm::Triple::normalize
std::string normalize() const
Return the normalized form of this triple's string.
Definition: Triple.h:338
llvm::Triple::isArch32Bit
bool isArch32Bit() const
Test whether the architecture is 32-bit.
Definition: Triple.cpp:1448
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::MachO::CPU_SUBTYPE_POWERPC_620
@ CPU_SUBTYPE_POWERPC_620
Definition: MachO.h:1538
Host.inc
llvm::MachO::CPU_SUBTYPE_POWERPC_750
@ CPU_SUBTYPE_POWERPC_750
Definition: MachO.h:1539
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::sys::detail::getHostCPUNameForRISCV
StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent)
Definition: Host.cpp:389
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::MachO::CPU_SUBTYPE_POWERPC_601
@ CPU_SUBTYPE_POWERPC_601
Definition: MachO.h:1531
llvm::support::aligned
@ aligned
Definition: Endian.h:30
Triple.h
llvm::sys::getHostNumPhysicalCores
int getHostNumPhysicalCores()
Get the number of physical cores (as opposed to logical cores returned from thread::hardware_concurre...
Definition: Host.cpp:1548
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachO::CPU_SUBTYPE_POWERPC_604
@ CPU_SUBTYPE_POWERPC_604
Definition: MachO.h:1536
llvm::sys::detail::getHostCPUNameForBPF
StringRef getHostCPUNameForBPF()
Definition: Host.cpp:409
uint32_t
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::LPAC::UNKNOWN
@ UNKNOWN
Definition: LanaiAluCode.h:40
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachO::CPU_SUBTYPE_POWERPC_7400
@ CPU_SUBTYPE_POWERPC_7400
Definition: MachO.h:1540
llvm::sys::detail::getHostCPUNameForS390x
StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent)
Definition: Host.cpp:344
uint16_t
llvm::sys::detail::x86::VendorSignatures
VendorSignatures
Definition: Host.h:72
llvm::sys::getProcessTriple
std::string getProcessTriple()
getProcessTriple() - Return an appropriate target triple for generating code to be loaded into the cu...
Definition: Host.cpp:1805
llvm::MachO::CPU_SUBTYPE_POWERPC_603e
@ CPU_SUBTYPE_POWERPC_603e
Definition: MachO.h:1534
Enabled
static bool Enabled
Definition: Statistic.cpp:46
StringSwitch.h
SmallVector.h
llvm::sys::detail::getHostCPUNameForARM
StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent)
Definition: Host.cpp:156
llvm::sys::detail::getHostCPUNameForPowerPC
StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent)
Helper functions to extract HostCPUName from /proc/cpuinfo on linux.
Definition: Host.cpp:73
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::StringRef::const_iterator
const char * const_iterator
Definition: StringRef.h:63
llvm::sys::getHostCPUName
StringRef getHostCPUName()
getHostCPUName - Get the LLVM name for the host CPU.
Definition: Host.cpp:1417
llvm::StringSwitch
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
llvm::sys::detail::x86::getVendorSignature
VendorSignatures getVendorSignature(unsigned *MaxLeaf=nullptr)
Returns the host CPU's vendor.
Definition: Host.cpp:1423
raw_ostream.h
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:238
registers
Implement PPCInstrInfo::isLoadFromStackSlot isStoreToStackSlot for vector registers
Definition: README_ALTIVEC.txt:4
llvm::StringRef::begin
iterator begin() const
Definition: StringRef.h:128
llvm::MachO::CPU_SUBTYPE_POWERPC_970
@ CPU_SUBTYPE_POWERPC_970
Definition: MachO.h:1542