1//===-- Host.cpp - Implement OS Host Detection ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file implements the operating system Host detection.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/TargetParser/Host.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/StringSwitch.h"
18#include "llvm/Config/llvm-config.h"
19#include "llvm/Support/MemoryBuffer.h"
20#include "llvm/Support/raw_ostream.h"
21#include "llvm/TargetParser/Triple.h"
22#include "llvm/TargetParser/X86TargetParser.h"
23#include <string.h>
24
25// Include the platform-specific parts of this class.
26#ifdef LLVM_ON_UNIX
27#include "Unix/Host.inc"
28#include <sched.h>
29#endif
30#ifdef _WIN32
31#include "Windows/Host.inc"
32#endif
33#ifdef _MSC_VER
34#include <intrin.h>
35#endif
36#ifdef __MVS__
37#include "llvm/Support/BCD.h"
38#endif
39#if defined(__APPLE__)
40#include <mach/host_info.h>
41#include <mach/mach.h>
42#include <mach/mach_host.h>
43#include <mach/machine.h>
44#include <sys/param.h>
45#include <sys/sysctl.h>
46#endif
47#ifdef _AIX
48#include <sys/systemcfg.h>
49#endif
50#if defined(__sun__) && defined(__svr4__)
51#include <kstat.h>
52#endif
53
54#define DEBUG_TYPE "host-detection"
55
56//===----------------------------------------------------------------------===//
57//
58//  Implementations of the CPU detection routines
59//
60//===----------------------------------------------------------------------===//
61
62using namespace llvm;
63
64static std::unique_ptr<llvm::MemoryBuffer>
65    LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
66  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
67      llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
68  if (std::error_code EC = Text.getError()) {
69    llvm::errs() << "Can't read "
70                 << "/proc/cpuinfo: " << EC.message() << "\n";
71    return nullptr;
72  }
73  return std::move(*Text);
74}
75
76StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
77  // Access to the Processor Version Register (PVR) on PowerPC is privileged,
78  // and so we must use an operating-system interface to determine the current
79  // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
80  const char *generic = "generic";
81
82  // The cpu line is second (after the 'processor: 0' line), so if this
83  // buffer is too small then something has changed (or is wrong).
84  StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
85  StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
86
87  StringRef::const_iterator CIP = CPUInfoStart;
88
89  StringRef::const_iterator CPUStart = nullptr;
90  size_t CPULen = 0;
91
92  // We need to find the first line which starts with cpu, spaces, and a colon.
93  // After the colon, there may be some additional spaces and then the cpu type.
94  while (CIP < CPUInfoEnd && CPUStart == nullptr) {
95    if (CIP < CPUInfoEnd && *CIP == '\n')
96      ++CIP;
97
98    if (CIP < CPUInfoEnd && *CIP == 'c') {
99      ++CIP;
100      if (CIP < CPUInfoEnd && *CIP == 'p') {
101        ++CIP;
102        if (CIP < CPUInfoEnd && *CIP == 'u') {
103          ++CIP;
104          while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
105            ++CIP;
106
107          if (CIP < CPUInfoEnd && *CIP == ':') {
108            ++CIP;
109            while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
110              ++CIP;
111
112            if (CIP < CPUInfoEnd) {
113              CPUStart = CIP;
114              while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
115                                          *CIP != ',' && *CIP != '\n'))
116                ++CIP;
117              CPULen = CIP - CPUStart;
118            }
119          }
120        }
121      }
122    }
123
124    if (CPUStart == nullptr)
125      while (CIP < CPUInfoEnd && *CIP != '\n')
126        ++CIP;
127  }
128
129  if (CPUStart == nullptr)
130    return generic;
131
132  return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
133      .Case("604e", "604e")
134      .Case("604", "604")
135      .Case("7400", "7400")
136      .Case("7410", "7400")
137      .Case("7447", "7400")
138      .Case("7455", "7450")
139      .Case("G4", "g4")
140      .Case("POWER4", "970")
141      .Case("PPC970FX", "970")
142      .Case("PPC970MP", "970")
143      .Case("G5", "g5")
144      .Case("POWER5", "g5")
145      .Case("A2", "a2")
146      .Case("POWER6", "pwr6")
147      .Case("POWER7", "pwr7")
148      .Case("POWER8", "pwr8")
149      .Case("POWER8E", "pwr8")
150      .Case("POWER8NVL", "pwr8")
151      .Case("POWER9", "pwr9")
152      .Case("POWER10", "pwr10")
153      // FIXME: If we get a simulator or machine with the capabilities of
154      // mcpu=future, we should revisit this and add the name reported by the
155      // simulator/machine.
156      .Default(generic);
157}
158
159StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
160  // The cpuid register on arm is not accessible from user space. On Linux,
161  // it is exposed through the /proc/cpuinfo file.
162
163  // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
164  // in all cases.
165  SmallVector<StringRef, 32> Lines;
166  ProcCpuinfoContent.split(Lines, "\n");
167
168  // Look for the CPU implementer line.
169  StringRef Implementer;
170  StringRef Hardware;
171  StringRef Part;
172  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
173    if (Lines[I].starts_with("CPU implementer"))
174      Implementer = Lines[I].substr(15).ltrim("\t :");
175    if (Lines[I].starts_with("Hardware"))
176      Hardware = Lines[I].substr(8).ltrim("\t :");
177    if (Lines[I].starts_with("CPU part"))
178      Part = Lines[I].substr(8).ltrim("\t :");
179  }
180
181  if (Implementer == "0x41") { // ARM Ltd.
182    // MSM8992/8994 may give cpu part for the core that the kernel is running on,
183    // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
184    if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
185      return "cortex-a53";
186
187
188    // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
189    // values correspond to the "Part number" in the CP15/c0 register. The
190    // contents are specified in the various processor manuals.
191    // This corresponds to the Main ID Register in Technical Reference Manuals.
192    // and is used in programs like sys-utils
193    return StringSwitch<const char *>(Part)
194        .Case("0x926", "arm926ej-s")
195        .Case("0xb02", "mpcore")
196        .Case("0xb36", "arm1136j-s")
197        .Case("0xb56", "arm1156t2-s")
198        .Case("0xb76", "arm1176jz-s")
199        .Case("0xc08", "cortex-a8")
200        .Case("0xc09", "cortex-a9")
201        .Case("0xc0f", "cortex-a15")
202        .Case("0xc20", "cortex-m0")
203        .Case("0xc23", "cortex-m3")
204        .Case("0xc24", "cortex-m4")
205        .Case("0xd24", "cortex-m52")
206        .Case("0xd22", "cortex-m55")
207        .Case("0xd02", "cortex-a34")
208        .Case("0xd04", "cortex-a35")
209        .Case("0xd03", "cortex-a53")
210        .Case("0xd05", "cortex-a55")
211        .Case("0xd46", "cortex-a510")
212        .Case("0xd80", "cortex-a520")
213        .Case("0xd07", "cortex-a57")
214        .Case("0xd08", "cortex-a72")
215        .Case("0xd09", "cortex-a73")
216        .Case("0xd0a", "cortex-a75")
217        .Case("0xd0b", "cortex-a76")
218        .Case("0xd0d", "cortex-a77")
219        .Case("0xd41", "cortex-a78")
220        .Case("0xd47", "cortex-a710")
221        .Case("0xd4d", "cortex-a715")
222        .Case("0xd81", "cortex-a720")
223        .Case("0xd44", "cortex-x1")
224        .Case("0xd4c", "cortex-x1c")
225        .Case("0xd48", "cortex-x2")
226        .Case("0xd4e", "cortex-x3")
227        .Case("0xd82", "cortex-x4")
228        .Case("0xd0c", "neoverse-n1")
229        .Case("0xd49", "neoverse-n2")
230        .Case("0xd40", "neoverse-v1")
231        .Case("0xd4f", "neoverse-v2")
232        .Default("generic");
233  }
234
235  if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
236    return StringSwitch<const char *>(Part)
237      .Case("0x516", "thunderx2t99")
238      .Case("0x0516", "thunderx2t99")
239      .Case("0xaf", "thunderx2t99")
240      .Case("0x0af", "thunderx2t99")
241      .Case("0xa1", "thunderxt88")
242      .Case("0x0a1", "thunderxt88")
243      .Default("generic");
244  }
245
246  if (Implementer == "0x46") { // Fujitsu Ltd.
247    return StringSwitch<const char *>(Part)
248      .Case("0x001", "a64fx")
249      .Default("generic");
250  }
251
252  if (Implementer == "0x4e") { // NVIDIA Corporation
253    return StringSwitch<const char *>(Part)
254        .Case("0x004", "carmel")
255        .Default("generic");
256  }
257
258  if (Implementer == "0x48") // HiSilicon Technologies, Inc.
259    // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
260    // values correspond to the "Part number" in the CP15/c0 register. The
261    // contents are specified in the various processor manuals.
262    return StringSwitch<const char *>(Part)
263      .Case("0xd01", "tsv110")
264      .Default("generic");
265
266  if (Implementer == "0x51") // Qualcomm Technologies, Inc.
267    // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
268    // values correspond to the "Part number" in the CP15/c0 register. The
269    // contents are specified in the various processor manuals.
270    return StringSwitch<const char *>(Part)
271        .Case("0x06f", "krait") // APQ8064
272        .Case("0x201", "kryo")
273        .Case("0x205", "kryo")
274        .Case("0x211", "kryo")
275        .Case("0x800", "cortex-a73") // Kryo 2xx Gold
276        .Case("0x801", "cortex-a73") // Kryo 2xx Silver
277        .Case("0x802", "cortex-a75") // Kryo 3xx Gold
278        .Case("0x803", "cortex-a75") // Kryo 3xx Silver
279        .Case("0x804", "cortex-a76") // Kryo 4xx Gold
280        .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver
281        .Case("0xc00", "falkor")
282        .Case("0xc01", "saphira")
283        .Default("generic");
284  if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
285    // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
286    // any predictive pattern across variants and parts.
287    unsigned Variant = 0, Part = 0;
288
289    // Look for the CPU variant line, whose value is a 1 digit hexadecimal
290    // number, corresponding to the Variant bits in the CP15/C0 register.
291    for (auto I : Lines)
292      if (I.consume_front("CPU variant"))
293        I.ltrim("\t :").getAsInteger(0, Variant);
294
295    // Look for the CPU part line, whose value is a 3 digit hexadecimal
296    // number, corresponding to the PartNum bits in the CP15/C0 register.
297    for (auto I : Lines)
298      if (I.consume_front("CPU part"))
299        I.ltrim("\t :").getAsInteger(0, Part);
300
301    unsigned Exynos = (Variant << 12) | Part;
302    switch (Exynos) {
303    default:
304      // Default by falling through to Exynos M3.
305      [[fallthrough]];
306    case 0x1002:
307      return "exynos-m3";
308    case 0x1003:
309      return "exynos-m4";
310    }
311  }
312
313  if (Implementer == "0x6d") { // Microsoft Corporation.
314    // The Microsoft Azure Cobalt 100 CPU is handled as a Neoverse N2.
315    return StringSwitch<const char *>(Part)
316        .Case("0xd49", "neoverse-n2")
317        .Default("generic");
318  }
319
320  if (Implementer == "0xc0") { // Ampere Computing
321    return StringSwitch<const char *>(Part)
322        .Case("0xac3", "ampere1")
323        .Case("0xac4", "ampere1a")
324        .Case("0xac5", "ampere1b")
325        .Default("generic");
326  }
327
328  return "generic";
329}
330
331namespace {
332StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
333  switch (Id) {
334    case 2064:  // z900 not supported by LLVM
335    case 2066:
336    case 2084:  // z990 not supported by LLVM
337    case 2086:
338    case 2094:  // z9-109 not supported by LLVM
339    case 2096:
340      return "generic";
341    case 2097:
342    case 2098:
343      return "z10";
344    case 2817:
345    case 2818:
346      return "z196";
347    case 2827:
348    case 2828:
349      return "zEC12";
350    case 2964:
351    case 2965:
352      return HaveVectorSupport? "z13" : "zEC12";
353    case 3906:
354    case 3907:
355      return HaveVectorSupport? "z14" : "zEC12";
356    case 8561:
357    case 8562:
358      return HaveVectorSupport? "z15" : "zEC12";
359    case 3931:
360    case 3932:
361    default:
362      return HaveVectorSupport? "z16" : "zEC12";
363  }
364}
365} // end anonymous namespace
366
367StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
368  // STIDP is a privileged operation, so use /proc/cpuinfo instead.
369
370  // The "processor 0:" line comes after a fair amount of other information,
371  // including a cache breakdown, but this should be plenty.
372  SmallVector<StringRef, 32> Lines;
373  ProcCpuinfoContent.split(Lines, "\n");
374
375  // Look for the CPU features.
376  SmallVector<StringRef, 32> CPUFeatures;
377  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
378    if (Lines[I].starts_with("features")) {
379      size_t Pos = Lines[I].find(':');
380      if (Pos != StringRef::npos) {
381        Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
382        break;
383      }
384    }
385
386  // We need to check for the presence of vector support independently of
387  // the machine type, since we may only use the vector register set when
388  // supported by the kernel (and hypervisor).
389  bool HaveVectorSupport = false;
390  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
391    if (CPUFeatures[I] == "vx")
392      HaveVectorSupport = true;
393  }
394
395  // Now check the processor machine type.
396  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
397    if (Lines[I].starts_with("processor ")) {
398      size_t Pos = Lines[I].find("machine = ");
399      if (Pos != StringRef::npos) {
400        Pos += sizeof("machine = ") - 1;
401        unsigned int Id;
402        if (!Lines[I].drop_front(Pos).getAsInteger(10, Id))
403          return getCPUNameFromS390Model(Id, HaveVectorSupport);
404      }
405      break;
406    }
407  }
408
409  return "generic";
410}
411
412StringRef sys::detail::getHostCPUNameForRISCV(StringRef ProcCpuinfoContent) {
413  // There are 24 lines in /proc/cpuinfo
414  SmallVector<StringRef> Lines;
415  ProcCpuinfoContent.split(Lines, "\n");
416
417  // Look for uarch line to determine cpu name
418  StringRef UArch;
419  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
420    if (Lines[I].starts_with("uarch")) {
421      UArch = Lines[I].substr(5).ltrim("\t :");
422      break;
423    }
424  }
425
426  return StringSwitch<const char *>(UArch)
427      .Case("sifive,u74-mc", "sifive-u74")
428      .Case("sifive,bullet0", "sifive-u74")
429      .Default("generic");
430}
431
432StringRef sys::detail::getHostCPUNameForBPF() {
433#if !defined(__linux__) || !defined(__x86_64__)
434  return "generic";
435#else
436  uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
437      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
438    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
439      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
440      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
441      /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
442      0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
443      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
444      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
445      /* BPF_EXIT_INSN() */
446      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
447
448  uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
449      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
450    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
451      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
452      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
453      /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
454      0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
455      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
456      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
457      /* BPF_EXIT_INSN() */
458      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
459
460  struct bpf_prog_load_attr {
461    uint32_t prog_type;
462    uint32_t insn_cnt;
463    uint64_t insns;
464    uint64_t license;
465    uint32_t log_level;
466    uint32_t log_size;
467    uint64_t log_buf;
468    uint32_t kern_version;
469    uint32_t prog_flags;
470  } attr = {};
471  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
472  attr.insn_cnt = 5;
473  attr.insns = (uint64_t)v3_insns;
474  attr.license = (uint64_t)"DUMMY";
475
476  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
477                   sizeof(attr));
478  if (fd >= 0) {
479    close(fd);
480    return "v3";
481  }
482
483  /* Clear the whole attr in case its content changed by syscall. */
484  memset(&attr, 0, sizeof(attr));
485  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
486  attr.insn_cnt = 5;
487  attr.insns = (uint64_t)v2_insns;
488  attr.license = (uint64_t)"DUMMY";
489  fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
490  if (fd >= 0) {
491    close(fd);
492    return "v2";
493  }
494  return "v1";
495#endif
496}
497
498#if defined(__i386__) || defined(_M_IX86) || \
499    defined(__x86_64__) || defined(_M_X64)
500
501// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
502// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
503// support. Consequently, for i386, the presence of CPUID is checked first
504// via the corresponding eflags bit.
505// Removal of cpuid.h header motivated by PR30384
506// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
507// or test-suite, but are used in external projects e.g. libstdcxx
508static bool isCpuIdSupported() {
509#if defined(__GNUC__) || defined(__clang__)
510#if defined(__i386__)
511  int __cpuid_supported;
512  __asm__("  pushfl\n"
513          "  popl   %%eax\n"
514          "  movl   %%eax,%%ecx\n"
515          "  xorl   $0x00200000,%%eax\n"
516          "  pushl  %%eax\n"
517          "  popfl\n"
518          "  pushfl\n"
519          "  popl   %%eax\n"
520          "  movl   $0,%0\n"
521          "  cmpl   %%eax,%%ecx\n"
522          "  je     1f\n"
523          "  movl   $1,%0\n"
524          "1:"
525          : "=r"(__cpuid_supported)
526          :
527          : "eax", "ecx");
528  if (!__cpuid_supported)
529    return false;
530#endif
531  return true;
532#endif
533  return true;
534}
535
536/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
537/// the specified arguments.  If we can't run cpuid on the host, return true.
538static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
539                               unsigned *rECX, unsigned *rEDX) {
540#if defined(__GNUC__) || defined(__clang__)
541#if defined(__x86_64__)
542  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
543  // FIXME: should we save this for Clang?
544  __asm__("movq\t%%rbx, %%rsi\n\t"
545          "cpuid\n\t"
546          "xchgq\t%%rbx, %%rsi\n\t"
547          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
548          : "a"(value));
549  return false;
550#elif defined(__i386__)
551  __asm__("movl\t%%ebx, %%esi\n\t"
552          "cpuid\n\t"
553          "xchgl\t%%ebx, %%esi\n\t"
554          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
555          : "a"(value));
556  return false;
557#else
558  return true;
559#endif
560#elif defined(_MSC_VER)
561  // The MSVC intrinsic is portable across x86 and x64.
562  int registers[4];
563  __cpuid(registers, value);
564  *rEAX = registers[0];
565  *rEBX = registers[1];
566  *rECX = registers[2];
567  *rEDX = registers[3];
568  return false;
569#else
570  return true;
571#endif
572}
573
574namespace llvm {
575namespace sys {
576namespace detail {
577namespace x86 {
578
579VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
580  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
581  if (MaxLeaf == nullptr)
582    MaxLeaf = &EAX;
583  else
584    *MaxLeaf = 0;
585
586  if (!isCpuIdSupported())
587    return VendorSignatures::UNKNOWN;
588
589  if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
590    return VendorSignatures::UNKNOWN;
591
592  // "Genu ineI ntel"
593  if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e)
594    return VendorSignatures::GENUINE_INTEL;
595
596  // "Auth enti cAMD"
597  if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163)
598    return VendorSignatures::AUTHENTIC_AMD;
599
600  return VendorSignatures::UNKNOWN;
601}
602
603} // namespace x86
604} // namespace detail
605} // namespace sys
606} // namespace llvm
607
608using namespace llvm::sys::detail::x86;
609
610/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
611/// the 4 values in the specified arguments.  If we can't run cpuid on the host,
612/// return true.
613static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
614                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
615                                 unsigned *rEDX) {
616#if defined(__GNUC__) || defined(__clang__)
617#if defined(__x86_64__)
618  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
619  // FIXME: should we save this for Clang?
620  __asm__("movq\t%%rbx, %%rsi\n\t"
621          "cpuid\n\t"
622          "xchgq\t%%rbx, %%rsi\n\t"
623          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
624          : "a"(value), "c"(subleaf));
625  return false;
626#elif defined(__i386__)
627  __asm__("movl\t%%ebx, %%esi\n\t"
628          "cpuid\n\t"
629          "xchgl\t%%ebx, %%esi\n\t"
630          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
631          : "a"(value), "c"(subleaf));
632  return false;
633#else
634  return true;
635#endif
636#elif defined(_MSC_VER)
637  int registers[4];
638  __cpuidex(registers, value, subleaf);
639  *rEAX = registers[0];
640  *rEBX = registers[1];
641  *rECX = registers[2];
642  *rEDX = registers[3];
643  return false;
644#else
645  return true;
646#endif
647}
648
649// Read control register 0 (XCR0). Used to detect features such as AVX.
650static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
651#if defined(__GNUC__) || defined(__clang__)
652  // Check xgetbv; this uses a .byte sequence instead of the instruction
653  // directly because older assemblers do not include support for xgetbv and
654  // there is no easy way to conditionally compile based on the assembler used.
655  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
656  return false;
657#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
658  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
659  *rEAX = Result;
660  *rEDX = Result >> 32;
661  return false;
662#else
663  return true;
664#endif
665}
666
667static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
668                                 unsigned *Model) {
669  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
670  *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
671  if (*Family == 6 || *Family == 0xf) {
672    if (*Family == 0xf)
673      // Examine extended family ID if family ID is F.
674      *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
675    // Examine extended model ID if family ID is 6 or F.
676    *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
677  }
678}
679
680static StringRef
681getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
682                                const unsigned *Features,
683                                unsigned *Type, unsigned *Subtype) {
684  auto testFeature = [&](unsigned F) {
685    return (Features[F / 32] & (1U << (F % 32))) != 0;
686  };
687
688  StringRef CPU;
689
690  switch (Family) {
691  case 3:
692    CPU = "i386";
693    break;
694  case 4:
695    CPU = "i486";
696    break;
697  case 5:
698    if (testFeature(X86::FEATURE_MMX)) {
699      CPU = "pentium-mmx";
700      break;
701    }
702    CPU = "pentium";
703    break;
704  case 6:
705    switch (Model) {
706    case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
707               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
708               // mobile processor, Intel Core 2 Extreme processor, Intel
709               // Pentium Dual-Core processor, Intel Xeon processor, model
710               // 0Fh. All processors are manufactured using the 65 nm process.
711    case 0x16: // Intel Celeron processor model 16h. All processors are
712               // manufactured using the 65 nm process
713      CPU = "core2";
714      *Type = X86::INTEL_CORE2;
715      break;
716    case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
717               // 17h. All processors are manufactured using the 45 nm process.
718               //
719               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
720    case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
721               // the 45 nm process.
722      CPU = "penryn";
723      *Type = X86::INTEL_CORE2;
724      break;
725    case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
726               // processors are manufactured using the 45 nm process.
727    case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
728               // As found in a Summer 2010 model iMac.
729    case 0x1f:
730    case 0x2e:              // Nehalem EX
731      CPU = "nehalem";
732      *Type = X86::INTEL_COREI7;
733      *Subtype = X86::INTEL_COREI7_NEHALEM;
734      break;
735    case 0x25: // Intel Core i7, laptop version.
736    case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
737               // processors are manufactured using the 32 nm process.
738    case 0x2f: // Westmere EX
739      CPU = "westmere";
740      *Type = X86::INTEL_COREI7;
741      *Subtype = X86::INTEL_COREI7_WESTMERE;
742      break;
743    case 0x2a: // Intel Core i7 processor. All processors are manufactured
744               // using the 32 nm process.
745    case 0x2d:
746      CPU = "sandybridge";
747      *Type = X86::INTEL_COREI7;
748      *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
749      break;
750    case 0x3a:
751    case 0x3e:              // Ivy Bridge EP
752      CPU = "ivybridge";
753      *Type = X86::INTEL_COREI7;
754      *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
755      break;
756
757    // Haswell:
758    case 0x3c:
759    case 0x3f:
760    case 0x45:
761    case 0x46:
762      CPU = "haswell";
763      *Type = X86::INTEL_COREI7;
764      *Subtype = X86::INTEL_COREI7_HASWELL;
765      break;
766
767    // Broadwell:
768    case 0x3d:
769    case 0x47:
770    case 0x4f:
771    case 0x56:
772      CPU = "broadwell";
773      *Type = X86::INTEL_COREI7;
774      *Subtype = X86::INTEL_COREI7_BROADWELL;
775      break;
776
777    // Skylake:
778    case 0x4e:              // Skylake mobile
779    case 0x5e:              // Skylake desktop
780    case 0x8e:              // Kaby Lake mobile
781    case 0x9e:              // Kaby Lake desktop
782    case 0xa5:              // Comet Lake-H/S
783    case 0xa6:              // Comet Lake-U
784      CPU = "skylake";
785      *Type = X86::INTEL_COREI7;
786      *Subtype = X86::INTEL_COREI7_SKYLAKE;
787      break;
788
789    // Rocketlake:
790    case 0xa7:
791      CPU = "rocketlake";
792      *Type = X86::INTEL_COREI7;
793      *Subtype = X86::INTEL_COREI7_ROCKETLAKE;
794      break;
795
796    // Skylake Xeon:
797    case 0x55:
798      *Type = X86::INTEL_COREI7;
799      if (testFeature(X86::FEATURE_AVX512BF16)) {
800        CPU = "cooperlake";
801        *Subtype = X86::INTEL_COREI7_COOPERLAKE;
802      } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
803        CPU = "cascadelake";
804        *Subtype = X86::INTEL_COREI7_CASCADELAKE;
805      } else {
806        CPU = "skylake-avx512";
807        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
808      }
809      break;
810
811    // Cannonlake:
812    case 0x66:
813      CPU = "cannonlake";
814      *Type = X86::INTEL_COREI7;
815      *Subtype = X86::INTEL_COREI7_CANNONLAKE;
816      break;
817
818    // Icelake:
819    case 0x7d:
820    case 0x7e:
821      CPU = "icelake-client";
822      *Type = X86::INTEL_COREI7;
823      *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
824      break;
825
826    // Tigerlake:
827    case 0x8c:
828    case 0x8d:
829      CPU = "tigerlake";
830      *Type = X86::INTEL_COREI7;
831      *Subtype = X86::INTEL_COREI7_TIGERLAKE;
832      break;
833
834    // Alderlake:
835    case 0x97:
836    case 0x9a:
837    // Gracemont
838    case 0xbe:
839    // Raptorlake:
840    case 0xb7:
841    case 0xba:
842    case 0xbf:
843    // Meteorlake:
844    case 0xaa:
845    case 0xac:
846      CPU = "alderlake";
847      *Type = X86::INTEL_COREI7;
848      *Subtype = X86::INTEL_COREI7_ALDERLAKE;
849      break;
850
851    // Arrowlake:
852    case 0xc5:
853      CPU = "arrowlake";
854      *Type = X86::INTEL_COREI7;
855      *Subtype = X86::INTEL_COREI7_ARROWLAKE;
856      break;
857
858    // Arrowlake S:
859    case 0xc6:
860    // Lunarlake:
861    case 0xbd:
862      CPU = "arrowlake-s";
863      *Type = X86::INTEL_COREI7;
864      *Subtype = X86::INTEL_COREI7_ARROWLAKE_S;
865      break;
866
867    // Pantherlake:
868    case 0xcc:
869      CPU = "pantherlake";
870      *Type = X86::INTEL_COREI7;
871      *Subtype = X86::INTEL_COREI7_PANTHERLAKE;
872      break;
873
874    // Graniterapids:
875    case 0xad:
876      CPU = "graniterapids";
877      *Type = X86::INTEL_COREI7;
878      *Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
879      break;
880
881    // Granite Rapids D:
882    case 0xae:
883      CPU = "graniterapids-d";
884      *Type = X86::INTEL_COREI7;
885      *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
886      break;
887
888    // Icelake Xeon:
889    case 0x6a:
890    case 0x6c:
891      CPU = "icelake-server";
892      *Type = X86::INTEL_COREI7;
893      *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
894      break;
895
896    // Emerald Rapids:
897    case 0xcf:
898    // Sapphire Rapids:
899    case 0x8f:
900      CPU = "sapphirerapids";
901      *Type = X86::INTEL_COREI7;
902      *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS;
903      break;
904
905    case 0x1c: // Most 45 nm Intel Atom processors
906    case 0x26: // 45 nm Atom Lincroft
907    case 0x27: // 32 nm Atom Medfield
908    case 0x35: // 32 nm Atom Midview
909    case 0x36: // 32 nm Atom Midview
910      CPU = "bonnell";
911      *Type = X86::INTEL_BONNELL;
912      break;
913
914    // Atom Silvermont codes from the Intel software optimization guide.
915    case 0x37:
916    case 0x4a:
917    case 0x4d:
918    case 0x5a:
919    case 0x5d:
920    case 0x4c: // really airmont
921      CPU = "silvermont";
922      *Type = X86::INTEL_SILVERMONT;
923      break;
924    // Goldmont:
925    case 0x5c: // Apollo Lake
926    case 0x5f: // Denverton
927      CPU = "goldmont";
928      *Type = X86::INTEL_GOLDMONT;
929      break;
930    case 0x7a:
931      CPU = "goldmont-plus";
932      *Type = X86::INTEL_GOLDMONT_PLUS;
933      break;
934    case 0x86:
935    case 0x8a: // Lakefield
936    case 0x96: // Elkhart Lake
937    case 0x9c: // Jasper Lake
938      CPU = "tremont";
939      *Type = X86::INTEL_TREMONT;
940      break;
941
942    // Sierraforest:
943    case 0xaf:
944      CPU = "sierraforest";
945      *Type = X86::INTEL_SIERRAFOREST;
946      break;
947
948    // Grandridge:
949    case 0xb6:
950      CPU = "grandridge";
951      *Type = X86::INTEL_GRANDRIDGE;
952      break;
953
954    // Clearwaterforest:
955    case 0xdd:
956      CPU = "clearwaterforest";
957      *Type = X86::INTEL_CLEARWATERFOREST;
958      break;
959
960    // Xeon Phi (Knights Landing + Knights Mill):
961    case 0x57:
962      CPU = "knl";
963      *Type = X86::INTEL_KNL;
964      break;
965    case 0x85:
966      CPU = "knm";
967      *Type = X86::INTEL_KNM;
968      break;
969
970    default: // Unknown family 6 CPU, try to guess.
971      // Don't both with Type/Subtype here, they aren't used by the caller.
972      // They're used above to keep the code in sync with compiler-rt.
973      // TODO detect tigerlake host from model
974      if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
975        CPU = "tigerlake";
976      } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
977        CPU = "icelake-client";
978      } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
979        CPU = "cannonlake";
980      } else if (testFeature(X86::FEATURE_AVX512BF16)) {
981        CPU = "cooperlake";
982      } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
983        CPU = "cascadelake";
984      } else if (testFeature(X86::FEATURE_AVX512VL)) {
985        CPU = "skylake-avx512";
986      } else if (testFeature(X86::FEATURE_AVX512ER)) {
987        CPU = "knl";
988      } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
989        if (testFeature(X86::FEATURE_SHA))
990          CPU = "goldmont";
991        else
992          CPU = "skylake";
993      } else if (testFeature(X86::FEATURE_ADX)) {
994        CPU = "broadwell";
995      } else if (testFeature(X86::FEATURE_AVX2)) {
996        CPU = "haswell";
997      } else if (testFeature(X86::FEATURE_AVX)) {
998        CPU = "sandybridge";
999      } else if (testFeature(X86::FEATURE_SSE4_2)) {
1000        if (testFeature(X86::FEATURE_MOVBE))
1001          CPU = "silvermont";
1002        else
1003          CPU = "nehalem";
1004      } else if (testFeature(X86::FEATURE_SSE4_1)) {
1005        CPU = "penryn";
1006      } else if (testFeature(X86::FEATURE_SSSE3)) {
1007        if (testFeature(X86::FEATURE_MOVBE))
1008          CPU = "bonnell";
1009        else
1010          CPU = "core2";
1011      } else if (testFeature(X86::FEATURE_64BIT)) {
1012        CPU = "core2";
1013      } else if (testFeature(X86::FEATURE_SSE3)) {
1014        CPU = "yonah";
1015      } else if (testFeature(X86::FEATURE_SSE2)) {
1016        CPU = "pentium-m";
1017      } else if (testFeature(X86::FEATURE_SSE)) {
1018        CPU = "pentium3";
1019      } else if (testFeature(X86::FEATURE_MMX)) {
1020        CPU = "pentium2";
1021      } else {
1022        CPU = "pentiumpro";
1023      }
1024      break;
1025    }
1026    break;
1027  case 15: {
1028    if (testFeature(X86::FEATURE_64BIT)) {
1029      CPU = "nocona";
1030      break;
1031    }
1032    if (testFeature(X86::FEATURE_SSE3)) {
1033      CPU = "prescott";
1034      break;
1035    }
1036    CPU = "pentium4";
1037    break;
1038  }
1039  default:
1040    break; // Unknown.
1041  }
1042
1043  return CPU;
1044}
1045
1046static StringRef
1047getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
1048                              const unsigned *Features,
1049                              unsigned *Type, unsigned *Subtype) {
1050  auto testFeature = [&](unsigned F) {
1051    return (Features[F / 32] & (1U << (F % 32))) != 0;
1052  };
1053
1054  StringRef CPU;
1055
1056  switch (Family) {
1057  case 4:
1058    CPU = "i486";
1059    break;
1060  case 5:
1061    CPU = "pentium";
1062    switch (Model) {
1063    case 6:
1064    case 7:
1065      CPU = "k6";
1066      break;
1067    case 8:
1068      CPU = "k6-2";
1069      break;
1070    case 9:
1071    case 13:
1072      CPU = "k6-3";
1073      break;
1074    case 10:
1075      CPU = "geode";
1076      break;
1077    }
1078    break;
1079  case 6:
1080    if (testFeature(X86::FEATURE_SSE)) {
1081      CPU = "athlon-xp";
1082      break;
1083    }
1084    CPU = "athlon";
1085    break;
1086  case 15:
1087    if (testFeature(X86::FEATURE_SSE3)) {
1088      CPU = "k8-sse3";
1089      break;
1090    }
1091    CPU = "k8";
1092    break;
1093  case 16:
1094    CPU = "amdfam10";
1095    *Type = X86::AMDFAM10H; // "amdfam10"
1096    switch (Model) {
1097    case 2:
1098      *Subtype = X86::AMDFAM10H_BARCELONA;
1099      break;
1100    case 4:
1101      *Subtype = X86::AMDFAM10H_SHANGHAI;
1102      break;
1103    case 8:
1104      *Subtype = X86::AMDFAM10H_ISTANBUL;
1105      break;
1106    }
1107    break;
1108  case 20:
1109    CPU = "btver1";
1110    *Type = X86::AMD_BTVER1;
1111    break;
1112  case 21:
1113    CPU = "bdver1";
1114    *Type = X86::AMDFAM15H;
1115    if (Model >= 0x60 && Model <= 0x7f) {
1116      CPU = "bdver4";
1117      *Subtype = X86::AMDFAM15H_BDVER4;
1118      break; // 60h-7Fh: Excavator
1119    }
1120    if (Model >= 0x30 && Model <= 0x3f) {
1121      CPU = "bdver3";
1122      *Subtype = X86::AMDFAM15H_BDVER3;
1123      break; // 30h-3Fh: Steamroller
1124    }
1125    if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
1126      CPU = "bdver2";
1127      *Subtype = X86::AMDFAM15H_BDVER2;
1128      break; // 02h, 10h-1Fh: Piledriver
1129    }
1130    if (Model <= 0x0f) {
1131      *Subtype = X86::AMDFAM15H_BDVER1;
1132      break; // 00h-0Fh: Bulldozer
1133    }
1134    break;
1135  case 22:
1136    CPU = "btver2";
1137    *Type = X86::AMD_BTVER2;
1138    break;
1139  case 23:
1140    CPU = "znver1";
1141    *Type = X86::AMDFAM17H;
1142    if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
1143        (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
1144        (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
1145        (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
1146        (Model >= 0xa0 && Model <= 0xaf)) {
1147      // Family 17h Models 30h-3Fh (Starship) Zen 2
1148      // Family 17h Models 47h (Cardinal) Zen 2
1149      // Family 17h Models 60h-67h (Renoir) Zen 2
1150      // Family 17h Models 68h-6Fh (Lucienne) Zen 2
1151      // Family 17h Models 70h-7Fh (Matisse) Zen 2
1152      // Family 17h Models 84h-87h (ProjectX) Zen 2
1153      // Family 17h Models 90h-97h (VanGogh) Zen 2
1154      // Family 17h Models 98h-9Fh (Mero) Zen 2
1155      // Family 17h Models A0h-AFh (Mendocino) Zen 2
1156      CPU = "znver2";
1157      *Subtype = X86::AMDFAM17H_ZNVER2;
1158      break;
1159    }
1160    if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
1161      // Family 17h Models 10h-1Fh (Raven1) Zen
1162      // Family 17h Models 10h-1Fh (Picasso) Zen+
1163      // Family 17h Models 20h-2Fh (Raven2 x86) Zen
1164      *Subtype = X86::AMDFAM17H_ZNVER1;
1165      break;
1166    }
1167    break;
1168  case 25:
1169    CPU = "znver3";
1170    *Type = X86::AMDFAM19H;
1171    if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
1172        (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
1173        (Model >= 0x50 && Model <= 0x5f)) {
1174      // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
1175      // Family 19h Models 20h-2Fh (Vermeer) Zen 3
1176      // Family 19h Models 30h-3Fh (Badami) Zen 3
1177      // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
1178      // Family 19h Models 50h-5Fh (Cezanne) Zen 3
1179      *Subtype = X86::AMDFAM19H_ZNVER3;
1180      break;
1181    }
1182    if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
1183        (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
1184        (Model >= 0xa0 && Model <= 0xaf)) {
1185      // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
1186      // Family 19h Models 60h-6Fh (Raphael) Zen 4
1187      // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
1188      // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
1189      // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
1190      CPU = "znver4";
1191      *Subtype = X86::AMDFAM19H_ZNVER4;
1192      break; //  "znver4"
1193    }
1194    break;
1195  default:
1196    break; // Unknown AMD CPU.
1197  }
1198
1199  return CPU;
1200}
1201
1202static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1203                                 unsigned *Features) {
1204  unsigned EAX, EBX;
1205
1206  auto setFeature = [&](unsigned F) {
1207    Features[F / 32] |= 1U << (F % 32);
1208  };
1209
1210  if ((EDX >> 15) & 1)
1211    setFeature(X86::FEATURE_CMOV);
1212  if ((EDX >> 23) & 1)
1213    setFeature(X86::FEATURE_MMX);
1214  if ((EDX >> 25) & 1)
1215    setFeature(X86::FEATURE_SSE);
1216  if ((EDX >> 26) & 1)
1217    setFeature(X86::FEATURE_SSE2);
1218
1219  if ((ECX >> 0) & 1)
1220    setFeature(X86::FEATURE_SSE3);
1221  if ((ECX >> 1) & 1)
1222    setFeature(X86::FEATURE_PCLMUL);
1223  if ((ECX >> 9) & 1)
1224    setFeature(X86::FEATURE_SSSE3);
1225  if ((ECX >> 12) & 1)
1226    setFeature(X86::FEATURE_FMA);
1227  if ((ECX >> 19) & 1)
1228    setFeature(X86::FEATURE_SSE4_1);
1229  if ((ECX >> 20) & 1) {
1230    setFeature(X86::FEATURE_SSE4_2);
1231    setFeature(X86::FEATURE_CRC32);
1232  }
1233  if ((ECX >> 23) & 1)
1234    setFeature(X86::FEATURE_POPCNT);
1235  if ((ECX >> 25) & 1)
1236    setFeature(X86::FEATURE_AES);
1237
1238  if ((ECX >> 22) & 1)
1239    setFeature(X86::FEATURE_MOVBE);
1240
1241  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1242  // indicates that the AVX registers will be saved and restored on context
1243  // switch, then we have full AVX support.
1244  const unsigned AVXBits = (1 << 27) | (1 << 28);
1245  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1246                ((EAX & 0x6) == 0x6);
1247#if defined(__APPLE__)
1248  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1249  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1250  // set right now.
1251  bool HasAVX512Save = true;
1252#else
1253  // AVX512 requires additional context to be saved by the OS.
1254  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1255#endif
1256
1257  if (HasAVX)
1258    setFeature(X86::FEATURE_AVX);
1259
1260  bool HasLeaf7 =
1261      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1262
1263  if (HasLeaf7 && ((EBX >> 3) & 1))
1264    setFeature(X86::FEATURE_BMI);
1265  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1266    setFeature(X86::FEATURE_AVX2);
1267  if (HasLeaf7 && ((EBX >> 8) & 1))
1268    setFeature(X86::FEATURE_BMI2);
1269  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
1270    setFeature(X86::FEATURE_AVX512F);
1271    setFeature(X86::FEATURE_EVEX512);
1272  }
1273  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1274    setFeature(X86::FEATURE_AVX512DQ);
1275  if (HasLeaf7 && ((EBX >> 19) & 1))
1276    setFeature(X86::FEATURE_ADX);
1277  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1278    setFeature(X86::FEATURE_AVX512IFMA);
1279  if (HasLeaf7 && ((EBX >> 23) & 1))
1280    setFeature(X86::FEATURE_CLFLUSHOPT);
1281  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1282    setFeature(X86::FEATURE_AVX512PF);
1283  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1284    setFeature(X86::FEATURE_AVX512ER);
1285  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1286    setFeature(X86::FEATURE_AVX512CD);
1287  if (HasLeaf7 && ((EBX >> 29) & 1))
1288    setFeature(X86::FEATURE_SHA);
1289  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1290    setFeature(X86::FEATURE_AVX512BW);
1291  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1292    setFeature(X86::FEATURE_AVX512VL);
1293
1294  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1295    setFeature(X86::FEATURE_AVX512VBMI);
1296  if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1297    setFeature(X86::FEATURE_AVX512VBMI2);
1298  if (HasLeaf7 && ((ECX >> 8) & 1))
1299    setFeature(X86::FEATURE_GFNI);
1300  if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1301    setFeature(X86::FEATURE_VPCLMULQDQ);
1302  if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1303    setFeature(X86::FEATURE_AVX512VNNI);
1304  if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1305    setFeature(X86::FEATURE_AVX512BITALG);
1306  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1307    setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1308
1309  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1310    setFeature(X86::FEATURE_AVX5124VNNIW);
1311  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1312    setFeature(X86::FEATURE_AVX5124FMAPS);
1313  if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1314    setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1315
1316  // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1317  // return all 0s for invalid subleaves so check the limit.
1318  bool HasLeaf7Subleaf1 =
1319      HasLeaf7 && EAX >= 1 &&
1320      !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1321  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1322    setFeature(X86::FEATURE_AVX512BF16);
1323
1324  unsigned MaxExtLevel;
1325  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1326
1327  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1328                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1329  if (HasExtLeaf1 && ((ECX >> 6) & 1))
1330    setFeature(X86::FEATURE_SSE4_A);
1331  if (HasExtLeaf1 && ((ECX >> 11) & 1))
1332    setFeature(X86::FEATURE_XOP);
1333  if (HasExtLeaf1 && ((ECX >> 16) & 1))
1334    setFeature(X86::FEATURE_FMA4);
1335
1336  if (HasExtLeaf1 && ((EDX >> 29) & 1))
1337    setFeature(X86::FEATURE_64BIT);
1338}
1339
1340StringRef sys::getHostCPUName() {
1341  unsigned MaxLeaf = 0;
1342  const VendorSignatures Vendor = getVendorSignature(&MaxLeaf);
1343  if (Vendor == VendorSignatures::UNKNOWN)
1344    return "generic";
1345
1346  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1347  getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1348
1349  unsigned Family = 0, Model = 0;
1350  unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0};
1351  detectX86FamilyModel(EAX, &Family, &Model);
1352  getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
1353
1354  // These aren't consumed in this file, but we try to keep some source code the
1355  // same or similar to compiler-rt.
1356  unsigned Type = 0;
1357  unsigned Subtype = 0;
1358
1359  StringRef CPU;
1360
1361  if (Vendor == VendorSignatures::GENUINE_INTEL) {
1362    CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
1363                                          &Subtype);
1364  } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) {
1365    CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
1366                                        &Subtype);
1367  }
1368
1369  if (!CPU.empty())
1370    return CPU;
1371
1372  return "generic";
1373}
1374
1375#elif defined(__APPLE__) && defined(__powerpc__)
1376StringRef sys::getHostCPUName() {
1377  host_basic_info_data_t hostInfo;
1378  mach_msg_type_number_t infoCount;
1379
1380  infoCount = HOST_BASIC_INFO_COUNT;
1381  mach_port_t hostPort = mach_host_self();
1382  host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1383            &infoCount);
1384  mach_port_deallocate(mach_task_self(), hostPort);
1385
1386  if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1387    return "generic";
1388
1389  switch (hostInfo.cpu_subtype) {
1390  case CPU_SUBTYPE_POWERPC_601:
1391    return "601";
1392  case CPU_SUBTYPE_POWERPC_602:
1393    return "602";
1394  case CPU_SUBTYPE_POWERPC_603:
1395    return "603";
1396  case CPU_SUBTYPE_POWERPC_603e:
1397    return "603e";
1398  case CPU_SUBTYPE_POWERPC_603ev:
1399    return "603ev";
1400  case CPU_SUBTYPE_POWERPC_604:
1401    return "604";
1402  case CPU_SUBTYPE_POWERPC_604e:
1403    return "604e";
1404  case CPU_SUBTYPE_POWERPC_620:
1405    return "620";
1406  case CPU_SUBTYPE_POWERPC_750:
1407    return "750";
1408  case CPU_SUBTYPE_POWERPC_7400:
1409    return "7400";
1410  case CPU_SUBTYPE_POWERPC_7450:
1411    return "7450";
1412  case CPU_SUBTYPE_POWERPC_970:
1413    return "970";
1414  default:;
1415  }
1416
1417  return "generic";
1418}
1419#elif defined(__linux__) && defined(__powerpc__)
1420StringRef sys::getHostCPUName() {
1421  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1422  StringRef Content = P ? P->getBuffer() : "";
1423  return detail::getHostCPUNameForPowerPC(Content);
1424}
1425#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1426StringRef sys::getHostCPUName() {
1427  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1428  StringRef Content = P ? P->getBuffer() : "";
1429  return detail::getHostCPUNameForARM(Content);
1430}
1431#elif defined(__linux__) && defined(__s390x__)
1432StringRef sys::getHostCPUName() {
1433  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1434  StringRef Content = P ? P->getBuffer() : "";
1435  return detail::getHostCPUNameForS390x(Content);
1436}
1437#elif defined(__MVS__)
1438StringRef sys::getHostCPUName() {
1439  // Get pointer to Communications Vector Table (CVT).
1440  // The pointer is located at offset 16 of the Prefixed Save Area (PSA).
1441  // It is stored as 31 bit pointer and will be zero-extended to 64 bit.
1442  int *StartToCVTOffset = reinterpret_cast<int *>(0x10);
1443  // Since its stored as a 31-bit pointer, get the 4 bytes from the start
1444  // of address.
1445  int ReadValue = *StartToCVTOffset;
1446  // Explicitly clear the high order bit.
1447  ReadValue = (ReadValue & 0x7FFFFFFF);
1448  char *CVT = reinterpret_cast<char *>(ReadValue);
1449  // The model number is located in the CVT prefix at offset -6 and stored as
1450  // signless packed decimal.
1451  uint16_t Id = *(uint16_t *)&CVT[-6];
1452  // Convert number to integer.
1453  Id = decodePackedBCD<uint16_t>(Id, false);
1454  // Check for vector support. It's stored in field CVTFLAG5 (offset 244),
1455  // bit CVTVEF (X'80'). The facilities list is part of the PSA but the vector
1456  // extension can only be used if bit CVTVEF is on.
1457  bool HaveVectorSupport = CVT[244] & 0x80;
1458  return getCPUNameFromS390Model(Id, HaveVectorSupport);
1459}
1460#elif defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__))
1461#define CPUFAMILY_ARM_SWIFT 0x1e2d6381
1462#define CPUFAMILY_ARM_CYCLONE 0x37a09642
1463#define CPUFAMILY_ARM_TYPHOON 0x2c91a47e
1464#define CPUFAMILY_ARM_TWISTER 0x92fb37c8
1465#define CPUFAMILY_ARM_HURRICANE 0x67ceee93
1466#define CPUFAMILY_ARM_MONSOON_MISTRAL 0xe81e7ef6
1467#define CPUFAMILY_ARM_VORTEX_TEMPEST 0x07d34b9f
1468#define CPUFAMILY_ARM_LIGHTNING_THUNDER 0x462504d2
1469#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1b588bb3
1470
1471StringRef sys::getHostCPUName() {
1472  uint32_t Family;
1473  size_t Length = sizeof(Family);
1474  sysctlbyname("hw.cpufamily", &Family, &Length, NULL, 0);
1475
1476  switch (Family) {
1477  case CPUFAMILY_ARM_SWIFT:
1478    return "swift";
1479  case CPUFAMILY_ARM_CYCLONE:
1480    return "apple-a7";
1481  case CPUFAMILY_ARM_TYPHOON:
1482    return "apple-a8";
1483  case CPUFAMILY_ARM_TWISTER:
1484    return "apple-a9";
1485  case CPUFAMILY_ARM_HURRICANE:
1486    return "apple-a10";
1487  case CPUFAMILY_ARM_MONSOON_MISTRAL:
1488    return "apple-a11";
1489  case CPUFAMILY_ARM_VORTEX_TEMPEST:
1490    return "apple-a12";
1491  case CPUFAMILY_ARM_LIGHTNING_THUNDER:
1492    return "apple-a13";
1493  case CPUFAMILY_ARM_FIRESTORM_ICESTORM:
1494    return "apple-m1";
1495  default:
1496    // Default to the newest CPU we know about.
1497    return "apple-m1";
1498  }
1499}
1500#elif defined(_AIX)
1501StringRef sys::getHostCPUName() {
1502  switch (_system_configuration.implementation) {
1503  case POWER_4:
1504    if (_system_configuration.version == PV_4_3)
1505      return "970";
1506    return "pwr4";
1507  case POWER_5:
1508    if (_system_configuration.version == PV_5)
1509      return "pwr5";
1510    return "pwr5x";
1511  case POWER_6:
1512    if (_system_configuration.version == PV_6_Compat)
1513      return "pwr6";
1514    return "pwr6x";
1515  case POWER_7:
1516    return "pwr7";
1517  case POWER_8:
1518    return "pwr8";
1519  case POWER_9:
1520    return "pwr9";
1521// TODO: simplify this once the macro is available in all OS levels.
1522#ifdef POWER_10
1523  case POWER_10:
1524#else
1525  case 0x40000:
1526#endif
1527    return "pwr10";
1528  default:
1529    return "generic";
1530  }
1531}
1532#elif defined(__loongarch__)
1533StringRef sys::getHostCPUName() {
1534  // Use processor id to detect cpu name.
1535  uint32_t processor_id;
1536  __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
1537  // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h.
1538  switch (processor_id & 0xf000) {
1539  case 0xc000: // Loongson 64bit, 4-issue
1540    return "la464";
1541  // TODO: Others.
1542  default:
1543    break;
1544  }
1545  return "generic";
1546}
1547#elif defined(__riscv)
1548StringRef sys::getHostCPUName() {
1549#if defined(__linux__)
1550  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1551  StringRef Content = P ? P->getBuffer() : "";
1552  return detail::getHostCPUNameForRISCV(Content);
1553#else
1554#if __riscv_xlen == 64
1555  return "generic-rv64";
1556#elif __riscv_xlen == 32
1557  return "generic-rv32";
1558#else
1559#error "Unhandled value of __riscv_xlen"
1560#endif
1561#endif
1562}
1563#elif defined(__sparc__)
1564#if defined(__linux__)
1565StringRef sys::detail::getHostCPUNameForSPARC(StringRef ProcCpuinfoContent) {
1566  SmallVector<StringRef> Lines;
1567  ProcCpuinfoContent.split(Lines, "\n");
1568
1569  // Look for cpu line to determine cpu name
1570  StringRef Cpu;
1571  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
1572    if (Lines[I].starts_with("cpu")) {
1573      Cpu = Lines[I].substr(5).ltrim("\t :");
1574      break;
1575    }
1576  }
1577
1578  return StringSwitch<const char *>(Cpu)
1579      .StartsWith("SuperSparc", "supersparc")
1580      .StartsWith("HyperSparc", "hypersparc")
1581      .StartsWith("SpitFire", "ultrasparc")
1582      .StartsWith("BlackBird", "ultrasparc")
1583      .StartsWith("Sabre", " ultrasparc")
1584      .StartsWith("Hummingbird", "ultrasparc")
1585      .StartsWith("Cheetah", "ultrasparc3")
1586      .StartsWith("Jalapeno", "ultrasparc3")
1587      .StartsWith("Jaguar", "ultrasparc3")
1588      .StartsWith("Panther", "ultrasparc3")
1589      .StartsWith("Serrano", "ultrasparc3")
1590      .StartsWith("UltraSparc T1", "niagara")
1591      .StartsWith("UltraSparc T2", "niagara2")
1592      .StartsWith("UltraSparc T3", "niagara3")
1593      .StartsWith("UltraSparc T4", "niagara4")
1594      .StartsWith("UltraSparc T5", "niagara4")
1595      .StartsWith("LEON", "leon3")
1596      // niagara7/m8 not supported by LLVM yet.
1597      .StartsWith("SPARC-M7", "niagara4" /* "niagara7" */)
1598      .StartsWith("SPARC-S7", "niagara4" /* "niagara7" */)
1599      .StartsWith("SPARC-M8", "niagara4" /* "m8" */)
1600      .Default("generic");
1601}
1602#endif
1603
1604StringRef sys::getHostCPUName() {
1605#if defined(__linux__)
1606  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1607  StringRef Content = P ? P->getBuffer() : "";
1608  return detail::getHostCPUNameForSPARC(Content);
1609#elif defined(__sun__) && defined(__svr4__)
1610  char *buf = NULL;
1611  kstat_ctl_t *kc;
1612  kstat_t *ksp;
1613  kstat_named_t *brand = NULL;
1614
1615  kc = kstat_open();
1616  if (kc != NULL) {
1617    ksp = kstat_lookup(kc, const_cast<char *>("cpu_info"), -1, NULL);
1618    if (ksp != NULL && kstat_read(kc, ksp, NULL) != -1 &&
1619        ksp->ks_type == KSTAT_TYPE_NAMED)
1620      brand =
1621          (kstat_named_t *)kstat_data_lookup(ksp, const_cast<char *>("brand"));
1622    if (brand != NULL && brand->data_type == KSTAT_DATA_STRING)
1623      buf = KSTAT_NAMED_STR_PTR(brand);
1624  }
1625  kstat_close(kc);
1626
1627  return StringSwitch<const char *>(buf)
1628      .Case("TMS390S10", "supersparc") // Texas Instruments microSPARC I
1629      .Case("TMS390Z50", "supersparc") // Texas Instruments SuperSPARC I
1630      .Case("TMS390Z55",
1631            "supersparc") // Texas Instruments SuperSPARC I with SuperCache
1632      .Case("MB86904", "supersparc") // Fujitsu microSPARC II
1633      .Case("MB86907", "supersparc") // Fujitsu TurboSPARC
1634      .Case("RT623", "hypersparc")   // Ross hyperSPARC
1635      .Case("RT625", "hypersparc")
1636      .Case("RT626", "hypersparc")
1637      .Case("UltraSPARC-I", "ultrasparc")
1638      .Case("UltraSPARC-II", "ultrasparc")
1639      .Case("UltraSPARC-IIe", "ultrasparc")
1640      .Case("UltraSPARC-IIi", "ultrasparc")
1641      .Case("SPARC64-III", "ultrasparc")
1642      .Case("SPARC64-IV", "ultrasparc")
1643      .Case("UltraSPARC-III", "ultrasparc3")
1644      .Case("UltraSPARC-III+", "ultrasparc3")
1645      .Case("UltraSPARC-IIIi", "ultrasparc3")
1646      .Case("UltraSPARC-IIIi+", "ultrasparc3")
1647      .Case("UltraSPARC-IV", "ultrasparc3")
1648      .Case("UltraSPARC-IV+", "ultrasparc3")
1649      .Case("SPARC64-V", "ultrasparc3")
1650      .Case("SPARC64-VI", "ultrasparc3")
1651      .Case("SPARC64-VII", "ultrasparc3")
1652      .Case("UltraSPARC-T1", "niagara")
1653      .Case("UltraSPARC-T2", "niagara2")
1654      .Case("UltraSPARC-T2", "niagara2")
1655      .Case("UltraSPARC-T2+", "niagara2")
1656      .Case("SPARC-T3", "niagara3")
1657      .Case("SPARC-T4", "niagara4")
1658      .Case("SPARC-T5", "niagara4")
1659      // niagara7/m8 not supported by LLVM yet.
1660      .Case("SPARC-M7", "niagara4" /* "niagara7" */)
1661      .Case("SPARC-S7", "niagara4" /* "niagara7" */)
1662      .Case("SPARC-M8", "niagara4" /* "m8" */)
1663      .Default("generic");
1664#else
1665  return "generic";
1666#endif
1667}
1668#else
1669StringRef sys::getHostCPUName() { return "generic"; }
1670namespace llvm {
1671namespace sys {
1672namespace detail {
1673namespace x86 {
1674
1675VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
1676  return VendorSignatures::UNKNOWN;
1677}
1678
1679} // namespace x86
1680} // namespace detail
1681} // namespace sys
1682} // namespace llvm
1683#endif
1684
1685#if defined(__i386__) || defined(_M_IX86) || \
1686    defined(__x86_64__) || defined(_M_X64)
1687bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1688  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1689  unsigned MaxLevel;
1690
1691  if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
1692    return false;
1693
1694  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1695
1696  Features["cx8"]    = (EDX >>  8) & 1;
1697  Features["cmov"]   = (EDX >> 15) & 1;
1698  Features["mmx"]    = (EDX >> 23) & 1;
1699  Features["fxsr"]   = (EDX >> 24) & 1;
1700  Features["sse"]    = (EDX >> 25) & 1;
1701  Features["sse2"]   = (EDX >> 26) & 1;
1702
1703  Features["sse3"]   = (ECX >>  0) & 1;
1704  Features["pclmul"] = (ECX >>  1) & 1;
1705  Features["ssse3"]  = (ECX >>  9) & 1;
1706  Features["cx16"]   = (ECX >> 13) & 1;
1707  Features["sse4.1"] = (ECX >> 19) & 1;
1708  Features["sse4.2"] = (ECX >> 20) & 1;
1709  Features["crc32"]  = Features["sse4.2"];
1710  Features["movbe"]  = (ECX >> 22) & 1;
1711  Features["popcnt"] = (ECX >> 23) & 1;
1712  Features["aes"]    = (ECX >> 25) & 1;
1713  Features["rdrnd"]  = (ECX >> 30) & 1;
1714
1715  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1716  // indicates that the AVX registers will be saved and restored on context
1717  // switch, then we have full AVX support.
1718  bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
1719  bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6);
1720#if defined(__APPLE__)
1721  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1722  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1723  // set right now.
1724  bool HasAVX512Save = true;
1725#else
1726  // AVX512 requires additional context to be saved by the OS.
1727  bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1728#endif
1729  // AMX requires additional context to be saved by the OS.
1730  const unsigned AMXBits = (1 << 17) | (1 << 18);
1731  bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
1732
1733  Features["avx"]   = HasAVXSave;
1734  Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1735  // Only enable XSAVE if OS has enabled support for saving YMM state.
1736  Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1737  Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1738
1739  unsigned MaxExtLevel;
1740  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1741
1742  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1743                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1744  Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1745  Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1746  Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1747  Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1748  Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1749  Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1750  Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1751  Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1752  Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1753
1754  Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1755
1756  // Miscellaneous memory related features, detected by
1757  // using the 0x80000008 leaf of the CPUID instruction
1758  bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1759                     !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1760  Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1761  Features["rdpru"]    = HasExtLeaf8 && ((EBX >> 4) & 1);
1762  Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1763
1764  bool HasLeaf7 =
1765      MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1766
1767  Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1768  Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1769  Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1770  // AVX2 is only supported if we have the OS save support from AVX.
1771  Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1772  Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1773  Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1774  Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1775  // AVX512 is only supported if the OS supports the context save for it.
1776  Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1777  if (Features["avx512f"])
1778    Features["evex512"]  = true;
1779  Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1780  Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1781  Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1782  Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1783  Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1784  Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1785  Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1786  Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1787  Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1788  Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1789  Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1790  Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1791
1792  Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1793  Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1794  Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1795  Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1796  Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1797  Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1798  Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1799  Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1800  Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1801  Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1802  Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1803  Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1804  Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1805  Features["kl"]              = HasLeaf7 && ((ECX >> 23) & 1); // key locker
1806  Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1807  Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1808  Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1809  Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1810
1811  Features["uintr"]           = HasLeaf7 && ((EDX >> 5) & 1);
1812  Features["avx512vp2intersect"] =
1813      HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1814  Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1815  Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1816  // There are two CPUID leafs which information associated with the pconfig
1817  // instruction:
1818  // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1819  // bit of EDX), while the EAX=0x1b leaf returns information on the
1820  // availability of specific pconfig leafs.
1821  // The target feature here only refers to the the first of these two.
1822  // Users might need to check for the availability of specific pconfig
1823  // leaves using cpuid, since that information is ignored while
1824  // detecting features using the "-march=native" flag.
1825  // For more info, see X86 ISA docs.
1826  Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1827  Features["amx-bf16"]   = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
1828  Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
1829  Features["amx-tile"]   = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
1830  Features["amx-int8"]   = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
1831  // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
1832  // return all 0s for invalid subleaves so check the limit.
1833  bool HasLeaf7Subleaf1 =
1834      HasLeaf7 && EAX >= 1 &&
1835      !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1836  Features["sha512"]     = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
1837  Features["sm3"]        = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
1838  Features["sm4"]        = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
1839  Features["raoint"]     = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
1840  Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
1841  Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1842  Features["amx-fp16"]   = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
1843  Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
1844  Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
1845  Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
1846  Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
1847  Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
1848  Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
1849  Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
1850  Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
1851  Features["usermsr"]  = HasLeaf7Subleaf1 && ((EDX >> 15) & 1);
1852  Features["avx10.1-256"] = HasLeaf7Subleaf1 && ((EDX >> 19) & 1);
1853
1854  bool HasLeafD = MaxLevel >= 0xd &&
1855                  !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1856
1857  // Only enable XSAVE if OS has enabled support for saving YMM state.
1858  Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1859  Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1860  Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1861
1862  bool HasLeaf14 = MaxLevel >= 0x14 &&
1863                  !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1864
1865  Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1866
1867  bool HasLeaf19 =
1868      MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
1869  Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1);
1870
1871  bool HasLeaf24 =
1872      MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
1873  Features["avx10.1-512"] =
1874      Features["avx10.1-256"] && HasLeaf24 && ((EBX >> 18) & 1);
1875
1876  return true;
1877}
1878#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1879bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1880  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1881  if (!P)
1882    return false;
1883
1884  SmallVector<StringRef, 32> Lines;
1885  P->getBuffer().split(Lines, "\n");
1886
1887  SmallVector<StringRef, 32> CPUFeatures;
1888
1889  // Look for the CPU features.
1890  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1891    if (Lines[I].starts_with("Features")) {
1892      Lines[I].split(CPUFeatures, ' ');
1893      break;
1894    }
1895
1896#if defined(__aarch64__)
1897  // Keep track of which crypto features we have seen
1898  enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1899  uint32_t crypto = 0;
1900#endif
1901
1902  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1903    StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1904#if defined(__aarch64__)
1905                                   .Case("asimd", "neon")
1906                                   .Case("fp", "fp-armv8")
1907                                   .Case("crc32", "crc")
1908                                   .Case("atomics", "lse")
1909                                   .Case("sve", "sve")
1910                                   .Case("sve2", "sve2")
1911#else
1912                                   .Case("half", "fp16")
1913                                   .Case("neon", "neon")
1914                                   .Case("vfpv3", "vfp3")
1915                                   .Case("vfpv3d16", "vfp3d16")
1916                                   .Case("vfpv4", "vfp4")
1917                                   .Case("idiva", "hwdiv-arm")
1918                                   .Case("idivt", "hwdiv")
1919#endif
1920                                   .Default("");
1921
1922#if defined(__aarch64__)
1923    // We need to check crypto separately since we need all of the crypto
1924    // extensions to enable the subtarget feature
1925    if (CPUFeatures[I] == "aes")
1926      crypto |= CAP_AES;
1927    else if (CPUFeatures[I] == "pmull")
1928      crypto |= CAP_PMULL;
1929    else if (CPUFeatures[I] == "sha1")
1930      crypto |= CAP_SHA1;
1931    else if (CPUFeatures[I] == "sha2")
1932      crypto |= CAP_SHA2;
1933#endif
1934
1935    if (LLVMFeatureStr != "")
1936      Features[LLVMFeatureStr] = true;
1937  }
1938
1939#if defined(__aarch64__)
1940  // If we have all crypto bits we can add the feature
1941  if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1942    Features["crypto"] = true;
1943#endif
1944
1945  return true;
1946}
1947#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1948bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1949  if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1950    Features["neon"] = true;
1951  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1952    Features["crc"] = true;
1953  if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1954    Features["crypto"] = true;
1955
1956  return true;
1957}
1958#elif defined(__linux__) && defined(__loongarch__)
1959#include <sys/auxv.h>
1960bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1961  unsigned long hwcap = getauxval(AT_HWCAP);
1962  bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
1963  uint32_t cpucfg2 = 0x2;
1964  __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
1965
1966  Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
1967  Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
1968
1969  Features["lsx"] = hwcap & (1UL << 4);  // HWCAP_LOONGARCH_LSX
1970  Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
1971  Features["lvz"] = hwcap & (1UL << 9);  // HWCAP_LOONGARCH_LVZ
1972
1973  return true;
1974}
1975#else
1976bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1977#endif
1978
1979#if __APPLE__
1980/// \returns the \p triple, but with the Host's arch spliced in.
1981static Triple withHostArch(Triple T) {
1982#if defined(__arm__)
1983  T.setArch(Triple::arm);
1984  T.setArchName("arm");
1985#elif defined(__arm64e__)
1986  T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
1987  T.setArchName("arm64e");
1988#elif defined(__aarch64__)
1989  T.setArch(Triple::aarch64);
1990  T.setArchName("arm64");
1991#elif defined(__x86_64h__)
1992  T.setArch(Triple::x86_64);
1993  T.setArchName("x86_64h");
1994#elif defined(__x86_64__)
1995  T.setArch(Triple::x86_64);
1996  T.setArchName("x86_64");
1997#elif defined(__i386__)
1998  T.setArch(Triple::x86);
1999  T.setArchName("i386");
2000#elif defined(__powerpc__)
2001  T.setArch(Triple::ppc);
2002  T.setArchName("powerpc");
2003#else
2004#  error "Unimplemented host arch fixup"
2005#endif
2006  return T;
2007}
2008#endif
2009
2010std::string sys::getProcessTriple() {
2011  std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
2012  Triple PT(Triple::normalize(TargetTripleString));
2013
2014#if __APPLE__
2015  /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
2016  /// the slices. This fixes that up.
2017  PT = withHostArch(PT);
2018#endif
2019
2020  if (sizeof(void *) == 8 && PT.isArch32Bit())
2021    PT = PT.get64BitArchVariant();
2022  if (sizeof(void *) == 4 && PT.isArch64Bit())
2023    PT = PT.get32BitArchVariant();
2024
2025  return PT.str();
2026}
2027
2028void sys::printDefaultTargetAndDetectedCPU(raw_ostream &OS) {
2029#if LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO
2030  std::string CPU = std::string(sys::getHostCPUName());
2031  if (CPU == "generic")
2032    CPU = "(unknown)";
2033  OS << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
2034     << "  Host CPU: " << CPU << '\n';
2035#endif
2036}
2037