1//===-- cpu_model.c - Support for __cpu_model builtin  ------------*- C -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file is based on LLVM's lib/Support/Host.cpp.
10//  It implements the operating system Host concept and builtin
11//  __cpu_model for the compiler_rt library for x86 and
12//  __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef __has_attribute
17#define __has_attribute(attr) 0
18#endif
19
20#if __has_attribute(constructor)
21#if __GNUC__ >= 9
22// Ordinarily init priorities below 101 are disallowed as they are reserved for the
23// implementation. However, we are the implementation, so silence the diagnostic,
24// since it doesn't apply to us.
25#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
26#endif
27// We're choosing init priority 90 to force our constructors to run before any
28// constructors in the end user application (starting at priority 101). This value
29// matches the libgcc choice for the same functions.
30#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
31#else
32// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
33// this runs during initialization.
34#define CONSTRUCTOR_ATTRIBUTE
35#endif
36
37#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
38     defined(_M_X64)) &&                                                       \
39    (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
40
41#include <assert.h>
42
43#define bool int
44#define true 1
45#define false 0
46
47#ifdef _MSC_VER
48#include <intrin.h>
49#endif
50
51enum VendorSignatures {
52  SIG_INTEL = 0x756e6547, // Genu
53  SIG_AMD = 0x68747541,   // Auth
54};
55
56enum ProcessorVendors {
57  VENDOR_INTEL = 1,
58  VENDOR_AMD,
59  VENDOR_OTHER,
60  VENDOR_MAX
61};
62
63enum ProcessorTypes {
64  INTEL_BONNELL = 1,
65  INTEL_CORE2,
66  INTEL_COREI7,
67  AMDFAM10H,
68  AMDFAM15H,
69  INTEL_SILVERMONT,
70  INTEL_KNL,
71  AMD_BTVER1,
72  AMD_BTVER2,
73  AMDFAM17H,
74  INTEL_KNM,
75  INTEL_GOLDMONT,
76  INTEL_GOLDMONT_PLUS,
77  INTEL_TREMONT,
78  AMDFAM19H,
79  ZHAOXIN_FAM7H,
80  INTEL_SIERRAFOREST,
81  INTEL_GRANDRIDGE,
82  CPU_TYPE_MAX
83};
84
85enum ProcessorSubtypes {
86  INTEL_COREI7_NEHALEM = 1,
87  INTEL_COREI7_WESTMERE,
88  INTEL_COREI7_SANDYBRIDGE,
89  AMDFAM10H_BARCELONA,
90  AMDFAM10H_SHANGHAI,
91  AMDFAM10H_ISTANBUL,
92  AMDFAM15H_BDVER1,
93  AMDFAM15H_BDVER2,
94  AMDFAM15H_BDVER3,
95  AMDFAM15H_BDVER4,
96  AMDFAM17H_ZNVER1,
97  INTEL_COREI7_IVYBRIDGE,
98  INTEL_COREI7_HASWELL,
99  INTEL_COREI7_BROADWELL,
100  INTEL_COREI7_SKYLAKE,
101  INTEL_COREI7_SKYLAKE_AVX512,
102  INTEL_COREI7_CANNONLAKE,
103  INTEL_COREI7_ICELAKE_CLIENT,
104  INTEL_COREI7_ICELAKE_SERVER,
105  AMDFAM17H_ZNVER2,
106  INTEL_COREI7_CASCADELAKE,
107  INTEL_COREI7_TIGERLAKE,
108  INTEL_COREI7_COOPERLAKE,
109  INTEL_COREI7_SAPPHIRERAPIDS,
110  INTEL_COREI7_ALDERLAKE,
111  AMDFAM19H_ZNVER3,
112  INTEL_COREI7_ROCKETLAKE,
113  ZHAOXIN_FAM7H_LUJIAZUI,
114  AMDFAM19H_ZNVER4,
115  INTEL_COREI7_GRANITERAPIDS,
116  CPU_SUBTYPE_MAX
117};
118
119enum ProcessorFeatures {
120  FEATURE_CMOV = 0,
121  FEATURE_MMX,
122  FEATURE_POPCNT,
123  FEATURE_SSE,
124  FEATURE_SSE2,
125  FEATURE_SSE3,
126  FEATURE_SSSE3,
127  FEATURE_SSE4_1,
128  FEATURE_SSE4_2,
129  FEATURE_AVX,
130  FEATURE_AVX2,
131  FEATURE_SSE4_A,
132  FEATURE_FMA4,
133  FEATURE_XOP,
134  FEATURE_FMA,
135  FEATURE_AVX512F,
136  FEATURE_BMI,
137  FEATURE_BMI2,
138  FEATURE_AES,
139  FEATURE_PCLMUL,
140  FEATURE_AVX512VL,
141  FEATURE_AVX512BW,
142  FEATURE_AVX512DQ,
143  FEATURE_AVX512CD,
144  FEATURE_AVX512ER,
145  FEATURE_AVX512PF,
146  FEATURE_AVX512VBMI,
147  FEATURE_AVX512IFMA,
148  FEATURE_AVX5124VNNIW,
149  FEATURE_AVX5124FMAPS,
150  FEATURE_AVX512VPOPCNTDQ,
151  FEATURE_AVX512VBMI2,
152  FEATURE_GFNI,
153  FEATURE_VPCLMULQDQ,
154  FEATURE_AVX512VNNI,
155  FEATURE_AVX512BITALG,
156  FEATURE_AVX512BF16,
157  FEATURE_AVX512VP2INTERSECT,
158  CPU_FEATURE_MAX
159};
160
161// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
162// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
163// support. Consequently, for i386, the presence of CPUID is checked first
164// via the corresponding eflags bit.
165static bool isCpuIdSupported(void) {
166#if defined(__GNUC__) || defined(__clang__)
167#if defined(__i386__)
168  int __cpuid_supported;
169  __asm__("  pushfl\n"
170          "  popl   %%eax\n"
171          "  movl   %%eax,%%ecx\n"
172          "  xorl   $0x00200000,%%eax\n"
173          "  pushl  %%eax\n"
174          "  popfl\n"
175          "  pushfl\n"
176          "  popl   %%eax\n"
177          "  movl   $0,%0\n"
178          "  cmpl   %%eax,%%ecx\n"
179          "  je     1f\n"
180          "  movl   $1,%0\n"
181          "1:"
182          : "=r"(__cpuid_supported)
183          :
184          : "eax", "ecx");
185  if (!__cpuid_supported)
186    return false;
187#endif
188  return true;
189#endif
190  return true;
191}
192
193// This code is copied from lib/Support/Host.cpp.
194// Changes to either file should be mirrored in the other.
195
196/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
197/// the specified arguments.  If we can't run cpuid on the host, return true.
198static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
199                               unsigned *rECX, unsigned *rEDX) {
200#if defined(__GNUC__) || defined(__clang__)
201#if defined(__x86_64__)
202  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
203  // FIXME: should we save this for Clang?
204  __asm__("movq\t%%rbx, %%rsi\n\t"
205          "cpuid\n\t"
206          "xchgq\t%%rbx, %%rsi\n\t"
207          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
208          : "a"(value));
209  return false;
210#elif defined(__i386__)
211  __asm__("movl\t%%ebx, %%esi\n\t"
212          "cpuid\n\t"
213          "xchgl\t%%ebx, %%esi\n\t"
214          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
215          : "a"(value));
216  return false;
217#else
218  return true;
219#endif
220#elif defined(_MSC_VER)
221  // The MSVC intrinsic is portable across x86 and x64.
222  int registers[4];
223  __cpuid(registers, value);
224  *rEAX = registers[0];
225  *rEBX = registers[1];
226  *rECX = registers[2];
227  *rEDX = registers[3];
228  return false;
229#else
230  return true;
231#endif
232}
233
234/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
235/// the 4 values in the specified arguments.  If we can't run cpuid on the host,
236/// return true.
237static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
238                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
239                                 unsigned *rEDX) {
240#if defined(__GNUC__) || defined(__clang__)
241#if defined(__x86_64__)
242  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
243  // FIXME: should we save this for Clang?
244  __asm__("movq\t%%rbx, %%rsi\n\t"
245          "cpuid\n\t"
246          "xchgq\t%%rbx, %%rsi\n\t"
247          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
248          : "a"(value), "c"(subleaf));
249  return false;
250#elif defined(__i386__)
251  __asm__("movl\t%%ebx, %%esi\n\t"
252          "cpuid\n\t"
253          "xchgl\t%%ebx, %%esi\n\t"
254          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
255          : "a"(value), "c"(subleaf));
256  return false;
257#else
258  return true;
259#endif
260#elif defined(_MSC_VER)
261  int registers[4];
262  __cpuidex(registers, value, subleaf);
263  *rEAX = registers[0];
264  *rEBX = registers[1];
265  *rECX = registers[2];
266  *rEDX = registers[3];
267  return false;
268#else
269  return true;
270#endif
271}
272
273// Read control register 0 (XCR0). Used to detect features such as AVX.
274static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
275#if defined(__GNUC__) || defined(__clang__)
276  // Check xgetbv; this uses a .byte sequence instead of the instruction
277  // directly because older assemblers do not include support for xgetbv and
278  // there is no easy way to conditionally compile based on the assembler used.
279  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
280  return false;
281#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
282  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
283  *rEAX = Result;
284  *rEDX = Result >> 32;
285  return false;
286#else
287  return true;
288#endif
289}
290
291static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
292                                 unsigned *Model) {
293  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
294  *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
295  if (*Family == 6 || *Family == 0xf) {
296    if (*Family == 0xf)
297      // Examine extended family ID if family ID is F.
298      *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
299    // Examine extended model ID if family ID is 6 or F.
300    *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
301  }
302}
303
304static const char *
305getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
306                                const unsigned *Features,
307                                unsigned *Type, unsigned *Subtype) {
308#define testFeature(F)                                                         \
309  (Features[F / 32] & (1 << (F % 32))) != 0
310
311  // We select CPU strings to match the code in Host.cpp, but we don't use them
312  // in compiler-rt.
313  const char *CPU = 0;
314
315  switch (Family) {
316  case 6:
317    switch (Model) {
318    case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
319               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
320               // mobile processor, Intel Core 2 Extreme processor, Intel
321               // Pentium Dual-Core processor, Intel Xeon processor, model
322               // 0Fh. All processors are manufactured using the 65 nm process.
323    case 0x16: // Intel Celeron processor model 16h. All processors are
324               // manufactured using the 65 nm process
325      CPU = "core2";
326      *Type = INTEL_CORE2;
327      break;
328    case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
329               // 17h. All processors are manufactured using the 45 nm process.
330               //
331               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
332    case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
333               // the 45 nm process.
334      CPU = "penryn";
335      *Type = INTEL_CORE2;
336      break;
337    case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
338               // processors are manufactured using the 45 nm process.
339    case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
340               // As found in a Summer 2010 model iMac.
341    case 0x1f:
342    case 0x2e:              // Nehalem EX
343      CPU = "nehalem";
344      *Type = INTEL_COREI7;
345      *Subtype = INTEL_COREI7_NEHALEM;
346      break;
347    case 0x25: // Intel Core i7, laptop version.
348    case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
349               // processors are manufactured using the 32 nm process.
350    case 0x2f: // Westmere EX
351      CPU = "westmere";
352      *Type = INTEL_COREI7;
353      *Subtype = INTEL_COREI7_WESTMERE;
354      break;
355    case 0x2a: // Intel Core i7 processor. All processors are manufactured
356               // using the 32 nm process.
357    case 0x2d:
358      CPU = "sandybridge";
359      *Type = INTEL_COREI7;
360      *Subtype = INTEL_COREI7_SANDYBRIDGE;
361      break;
362    case 0x3a:
363    case 0x3e:              // Ivy Bridge EP
364      CPU = "ivybridge";
365      *Type = INTEL_COREI7;
366      *Subtype = INTEL_COREI7_IVYBRIDGE;
367      break;
368
369    // Haswell:
370    case 0x3c:
371    case 0x3f:
372    case 0x45:
373    case 0x46:
374      CPU = "haswell";
375      *Type = INTEL_COREI7;
376      *Subtype = INTEL_COREI7_HASWELL;
377      break;
378
379    // Broadwell:
380    case 0x3d:
381    case 0x47:
382    case 0x4f:
383    case 0x56:
384      CPU = "broadwell";
385      *Type = INTEL_COREI7;
386      *Subtype = INTEL_COREI7_BROADWELL;
387      break;
388
389    // Skylake:
390    case 0x4e:              // Skylake mobile
391    case 0x5e:              // Skylake desktop
392    case 0x8e:              // Kaby Lake mobile
393    case 0x9e:              // Kaby Lake desktop
394    case 0xa5:              // Comet Lake-H/S
395    case 0xa6:              // Comet Lake-U
396      CPU = "skylake";
397      *Type = INTEL_COREI7;
398      *Subtype = INTEL_COREI7_SKYLAKE;
399      break;
400
401    // Rocketlake:
402    case 0xa7:
403      CPU = "rocketlake";
404      *Type = INTEL_COREI7;
405      *Subtype = INTEL_COREI7_ROCKETLAKE;
406      break;
407
408    // Skylake Xeon:
409    case 0x55:
410      *Type = INTEL_COREI7;
411      if (testFeature(FEATURE_AVX512BF16)) {
412        CPU = "cooperlake";
413        *Subtype = INTEL_COREI7_COOPERLAKE;
414      } else if (testFeature(FEATURE_AVX512VNNI)) {
415        CPU = "cascadelake";
416        *Subtype = INTEL_COREI7_CASCADELAKE;
417      } else {
418        CPU = "skylake-avx512";
419        *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
420      }
421      break;
422
423    // Cannonlake:
424    case 0x66:
425      CPU = "cannonlake";
426      *Type = INTEL_COREI7;
427      *Subtype = INTEL_COREI7_CANNONLAKE;
428      break;
429
430    // Icelake:
431    case 0x7d:
432    case 0x7e:
433      CPU = "icelake-client";
434      *Type = INTEL_COREI7;
435      *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
436      break;
437
438    // Tigerlake:
439    case 0x8c:
440    case 0x8d:
441      CPU = "tigerlake";
442      *Type = INTEL_COREI7;
443      *Subtype = INTEL_COREI7_TIGERLAKE;
444      break;
445
446    // Alderlake:
447    case 0x97:
448    case 0x9a:
449    // Raptorlake:
450    case 0xb7:
451    // Meteorlake:
452    case 0xaa:
453    case 0xac:
454      CPU = "alderlake";
455      *Type = INTEL_COREI7;
456      *Subtype = INTEL_COREI7_ALDERLAKE;
457      break;
458
459    // Icelake Xeon:
460    case 0x6a:
461    case 0x6c:
462      CPU = "icelake-server";
463      *Type = INTEL_COREI7;
464      *Subtype = INTEL_COREI7_ICELAKE_SERVER;
465      break;
466
467    // Emerald Rapids:
468    case 0xcf:
469    // Sapphire Rapids:
470    case 0x8f:
471      CPU = "sapphirerapids";
472      *Type = INTEL_COREI7;
473      *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
474      break;
475
476    // Granite Rapids:
477    case 0xae:
478    case 0xad:
479      CPU = "graniterapids";
480      *Type = INTEL_COREI7;
481      *Subtype = INTEL_COREI7_GRANITERAPIDS;
482      break;
483
484    case 0x1c: // Most 45 nm Intel Atom processors
485    case 0x26: // 45 nm Atom Lincroft
486    case 0x27: // 32 nm Atom Medfield
487    case 0x35: // 32 nm Atom Midview
488    case 0x36: // 32 nm Atom Midview
489      CPU = "bonnell";
490      *Type = INTEL_BONNELL;
491      break;
492
493    // Atom Silvermont codes from the Intel software optimization guide.
494    case 0x37:
495    case 0x4a:
496    case 0x4d:
497    case 0x5a:
498    case 0x5d:
499    case 0x4c: // really airmont
500      CPU = "silvermont";
501      *Type = INTEL_SILVERMONT;
502      break;
503    // Goldmont:
504    case 0x5c: // Apollo Lake
505    case 0x5f: // Denverton
506      CPU = "goldmont";
507      *Type = INTEL_GOLDMONT;
508      break; // "goldmont"
509    case 0x7a:
510      CPU = "goldmont-plus";
511      *Type = INTEL_GOLDMONT_PLUS;
512      break;
513    case 0x86:
514      CPU = "tremont";
515      *Type = INTEL_TREMONT;
516      break;
517
518    // Sierraforest:
519    case 0xaf:
520      CPU = "sierraforest";
521      *Type = INTEL_SIERRAFOREST;
522      break;
523
524    // Grandridge:
525    case 0xb6:
526      CPU = "grandridge";
527      *Type = INTEL_GRANDRIDGE;
528      break;
529
530    case 0x57:
531      CPU = "knl";
532      *Type = INTEL_KNL;
533      break;
534
535    case 0x85:
536      CPU = "knm";
537      *Type = INTEL_KNM;
538      break;
539
540    default: // Unknown family 6 CPU.
541      break;
542    }
543    break;
544  default:
545    break; // Unknown.
546  }
547
548  return CPU;
549}
550
551static const char *
552getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
553                              const unsigned *Features,
554                              unsigned *Type, unsigned *Subtype) {
555  // We select CPU strings to match the code in Host.cpp, but we don't use them
556  // in compiler-rt.
557  const char *CPU = 0;
558
559  switch (Family) {
560  case 16:
561    CPU = "amdfam10";
562    *Type = AMDFAM10H;
563    switch (Model) {
564    case 2:
565      *Subtype = AMDFAM10H_BARCELONA;
566      break;
567    case 4:
568      *Subtype = AMDFAM10H_SHANGHAI;
569      break;
570    case 8:
571      *Subtype = AMDFAM10H_ISTANBUL;
572      break;
573    }
574    break;
575  case 20:
576    CPU = "btver1";
577    *Type = AMD_BTVER1;
578    break;
579  case 21:
580    CPU = "bdver1";
581    *Type = AMDFAM15H;
582    if (Model >= 0x60 && Model <= 0x7f) {
583      CPU = "bdver4";
584      *Subtype = AMDFAM15H_BDVER4;
585      break; // 60h-7Fh: Excavator
586    }
587    if (Model >= 0x30 && Model <= 0x3f) {
588      CPU = "bdver3";
589      *Subtype = AMDFAM15H_BDVER3;
590      break; // 30h-3Fh: Steamroller
591    }
592    if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
593      CPU = "bdver2";
594      *Subtype = AMDFAM15H_BDVER2;
595      break; // 02h, 10h-1Fh: Piledriver
596    }
597    if (Model <= 0x0f) {
598      *Subtype = AMDFAM15H_BDVER1;
599      break; // 00h-0Fh: Bulldozer
600    }
601    break;
602  case 22:
603    CPU = "btver2";
604    *Type = AMD_BTVER2;
605    break;
606  case 23:
607    CPU = "znver1";
608    *Type = AMDFAM17H;
609    if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
610      CPU = "znver2";
611      *Subtype = AMDFAM17H_ZNVER2;
612      break; // 30h-3fh, 71h: Zen2
613    }
614    if (Model <= 0x0f) {
615      *Subtype = AMDFAM17H_ZNVER1;
616      break; // 00h-0Fh: Zen1
617    }
618    break;
619  case 25:
620    CPU = "znver3";
621    *Type = AMDFAM19H;
622    if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) {
623      // Family 19h Models 00h-0Fh - Zen3
624      // Family 19h Models 20h-2Fh - Zen3
625      // Family 19h Models 30h-3Fh - Zen3
626      // Family 19h Models 40h-4Fh - Zen3+
627      // Family 19h Models 50h-5Fh - Zen3+
628      *Subtype = AMDFAM19H_ZNVER3;
629      break;
630    }
631    if ((Model >= 0x10 && Model <= 0x1f) ||
632        (Model >= 0x60 && Model <= 0x74) ||
633        (Model >= 0x78 && Model <= 0x7b) ||
634        (Model >= 0xA0 && Model <= 0xAf)) {
635      CPU = "znver4";
636      *Subtype = AMDFAM19H_ZNVER4;
637      break; //  "znver4"
638    }
639    break;
640  default:
641    break; // Unknown AMD CPU.
642  }
643
644  return CPU;
645}
646
647static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
648                                 unsigned *Features) {
649  unsigned EAX, EBX;
650
651#define setFeature(F)                                                          \
652  Features[F / 32] |= 1U << (F % 32)
653
654  if ((EDX >> 15) & 1)
655    setFeature(FEATURE_CMOV);
656  if ((EDX >> 23) & 1)
657    setFeature(FEATURE_MMX);
658  if ((EDX >> 25) & 1)
659    setFeature(FEATURE_SSE);
660  if ((EDX >> 26) & 1)
661    setFeature(FEATURE_SSE2);
662
663  if ((ECX >> 0) & 1)
664    setFeature(FEATURE_SSE3);
665  if ((ECX >> 1) & 1)
666    setFeature(FEATURE_PCLMUL);
667  if ((ECX >> 9) & 1)
668    setFeature(FEATURE_SSSE3);
669  if ((ECX >> 12) & 1)
670    setFeature(FEATURE_FMA);
671  if ((ECX >> 19) & 1)
672    setFeature(FEATURE_SSE4_1);
673  if ((ECX >> 20) & 1)
674    setFeature(FEATURE_SSE4_2);
675  if ((ECX >> 23) & 1)
676    setFeature(FEATURE_POPCNT);
677  if ((ECX >> 25) & 1)
678    setFeature(FEATURE_AES);
679
680  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
681  // indicates that the AVX registers will be saved and restored on context
682  // switch, then we have full AVX support.
683  const unsigned AVXBits = (1 << 27) | (1 << 28);
684  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
685                ((EAX & 0x6) == 0x6);
686#if defined(__APPLE__)
687  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
688  // save the AVX512 context if we use AVX512 instructions, even the bit is not
689  // set right now.
690  bool HasAVX512Save = true;
691#else
692  // AVX512 requires additional context to be saved by the OS.
693  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
694#endif
695
696  if (HasAVX)
697    setFeature(FEATURE_AVX);
698
699  bool HasLeaf7 =
700      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
701
702  if (HasLeaf7 && ((EBX >> 3) & 1))
703    setFeature(FEATURE_BMI);
704  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
705    setFeature(FEATURE_AVX2);
706  if (HasLeaf7 && ((EBX >> 8) & 1))
707    setFeature(FEATURE_BMI2);
708  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
709    setFeature(FEATURE_AVX512F);
710  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
711    setFeature(FEATURE_AVX512DQ);
712  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
713    setFeature(FEATURE_AVX512IFMA);
714  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
715    setFeature(FEATURE_AVX512PF);
716  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
717    setFeature(FEATURE_AVX512ER);
718  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
719    setFeature(FEATURE_AVX512CD);
720  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
721    setFeature(FEATURE_AVX512BW);
722  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
723    setFeature(FEATURE_AVX512VL);
724
725  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
726    setFeature(FEATURE_AVX512VBMI);
727  if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
728    setFeature(FEATURE_AVX512VBMI2);
729  if (HasLeaf7 && ((ECX >> 8) & 1))
730    setFeature(FEATURE_GFNI);
731  if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
732    setFeature(FEATURE_VPCLMULQDQ);
733  if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
734    setFeature(FEATURE_AVX512VNNI);
735  if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
736    setFeature(FEATURE_AVX512BITALG);
737  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
738    setFeature(FEATURE_AVX512VPOPCNTDQ);
739
740  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
741    setFeature(FEATURE_AVX5124VNNIW);
742  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
743    setFeature(FEATURE_AVX5124FMAPS);
744  if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
745    setFeature(FEATURE_AVX512VP2INTERSECT);
746
747  bool HasLeaf7Subleaf1 =
748      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
749  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
750    setFeature(FEATURE_AVX512BF16);
751
752  unsigned MaxExtLevel;
753  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
754
755  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
756                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
757  if (HasExtLeaf1 && ((ECX >> 6) & 1))
758    setFeature(FEATURE_SSE4_A);
759  if (HasExtLeaf1 && ((ECX >> 11) & 1))
760    setFeature(FEATURE_XOP);
761  if (HasExtLeaf1 && ((ECX >> 16) & 1))
762    setFeature(FEATURE_FMA4);
763#undef setFeature
764}
765
766#ifndef _WIN32
767__attribute__((visibility("hidden")))
768#endif
769int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
770
771#ifndef _WIN32
772__attribute__((visibility("hidden")))
773#endif
774struct __processor_model {
775  unsigned int __cpu_vendor;
776  unsigned int __cpu_type;
777  unsigned int __cpu_subtype;
778  unsigned int __cpu_features[1];
779} __cpu_model = {0, 0, 0, {0}};
780
781#ifndef _WIN32
782__attribute__((visibility("hidden")))
783#endif
784unsigned int __cpu_features2 = 0;
785
786// A constructor function that is sets __cpu_model and __cpu_features2 with
787// the right values.  This needs to run only once.  This constructor is
788// given the highest priority and it should run before constructors without
789// the priority set.  However, it still runs after ifunc initializers and
790// needs to be called explicitly there.
791
792int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
793  unsigned EAX, EBX, ECX, EDX;
794  unsigned MaxLeaf = 5;
795  unsigned Vendor;
796  unsigned Model, Family;
797  unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
798
799  // This function needs to run just once.
800  if (__cpu_model.__cpu_vendor)
801    return 0;
802
803  if (!isCpuIdSupported() ||
804      getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
805    __cpu_model.__cpu_vendor = VENDOR_OTHER;
806    return -1;
807  }
808
809  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
810  detectX86FamilyModel(EAX, &Family, &Model);
811
812  // Find available features.
813  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
814
815  assert((sizeof(Features)/sizeof(Features[0])) == 2);
816  __cpu_model.__cpu_features[0] = Features[0];
817  __cpu_features2 = Features[1];
818
819  if (Vendor == SIG_INTEL) {
820    // Get CPU type.
821    getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
822                                    &(__cpu_model.__cpu_type),
823                                    &(__cpu_model.__cpu_subtype));
824    __cpu_model.__cpu_vendor = VENDOR_INTEL;
825  } else if (Vendor == SIG_AMD) {
826    // Get CPU type.
827    getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
828                                  &(__cpu_model.__cpu_type),
829                                  &(__cpu_model.__cpu_subtype));
830    __cpu_model.__cpu_vendor = VENDOR_AMD;
831  } else
832    __cpu_model.__cpu_vendor = VENDOR_OTHER;
833
834  assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
835  assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
836  assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
837
838  return 0;
839}
840#elif defined(__aarch64__)
841
842#ifndef AT_HWCAP
843#define AT_HWCAP 16
844#endif
845#ifndef HWCAP_CPUID
846#define HWCAP_CPUID (1 << 11)
847#endif
848#ifndef HWCAP_FP
849#define HWCAP_FP (1 << 0)
850#endif
851#ifndef HWCAP_ASIMD
852#define HWCAP_ASIMD (1 << 1)
853#endif
854#ifndef HWCAP_AES
855#define HWCAP_AES (1 << 3)
856#endif
857#ifndef HWCAP_PMULL
858#define HWCAP_PMULL (1 << 4)
859#endif
860#ifndef HWCAP_SHA1
861#define HWCAP_SHA1 (1 << 5)
862#endif
863#ifndef HWCAP_SHA2
864#define HWCAP_SHA2 (1 << 6)
865#endif
866#ifndef HWCAP_ATOMICS
867#define HWCAP_ATOMICS (1 << 8)
868#endif
869#ifndef HWCAP_FPHP
870#define HWCAP_FPHP (1 << 9)
871#endif
872#ifndef HWCAP_ASIMDHP
873#define HWCAP_ASIMDHP (1 << 10)
874#endif
875#ifndef HWCAP_ASIMDRDM
876#define HWCAP_ASIMDRDM (1 << 12)
877#endif
878#ifndef HWCAP_JSCVT
879#define HWCAP_JSCVT (1 << 13)
880#endif
881#ifndef HWCAP_FCMA
882#define HWCAP_FCMA (1 << 14)
883#endif
884#ifndef HWCAP_LRCPC
885#define HWCAP_LRCPC (1 << 15)
886#endif
887#ifndef HWCAP_DCPOP
888#define HWCAP_DCPOP (1 << 16)
889#endif
890#ifndef HWCAP_SHA3
891#define HWCAP_SHA3 (1 << 17)
892#endif
893#ifndef HWCAP_SM3
894#define HWCAP_SM3 (1 << 18)
895#endif
896#ifndef HWCAP_SM4
897#define HWCAP_SM4 (1 << 19)
898#endif
899#ifndef HWCAP_ASIMDDP
900#define HWCAP_ASIMDDP (1 << 20)
901#endif
902#ifndef HWCAP_SHA512
903#define HWCAP_SHA512 (1 << 21)
904#endif
905#ifndef HWCAP_SVE
906#define HWCAP_SVE (1 << 22)
907#endif
908#ifndef HWCAP_ASIMDFHM
909#define HWCAP_ASIMDFHM (1 << 23)
910#endif
911#ifndef HWCAP_DIT
912#define HWCAP_DIT (1 << 24)
913#endif
914#ifndef HWCAP_ILRCPC
915#define HWCAP_ILRCPC (1 << 26)
916#endif
917#ifndef HWCAP_FLAGM
918#define HWCAP_FLAGM (1 << 27)
919#endif
920#ifndef HWCAP_SSBS
921#define HWCAP_SSBS (1 << 28)
922#endif
923#ifndef HWCAP_SB
924#define HWCAP_SB (1 << 29)
925#endif
926
927#ifndef AT_HWCAP2
928#define AT_HWCAP2 26
929#endif
930#ifndef HWCAP2_DCPODP
931#define HWCAP2_DCPODP (1 << 0)
932#endif
933#ifndef HWCAP2_SVE2
934#define HWCAP2_SVE2 (1 << 1)
935#endif
936#ifndef HWCAP2_SVEAES
937#define HWCAP2_SVEAES (1 << 2)
938#endif
939#ifndef HWCAP2_SVEPMULL
940#define HWCAP2_SVEPMULL (1 << 3)
941#endif
942#ifndef HWCAP2_SVEBITPERM
943#define HWCAP2_SVEBITPERM (1 << 4)
944#endif
945#ifndef HWCAP2_SVESHA3
946#define HWCAP2_SVESHA3 (1 << 5)
947#endif
948#ifndef HWCAP2_SVESM4
949#define HWCAP2_SVESM4 (1 << 6)
950#endif
951#ifndef HWCAP2_FLAGM2
952#define HWCAP2_FLAGM2 (1 << 7)
953#endif
954#ifndef HWCAP2_FRINT
955#define HWCAP2_FRINT (1 << 8)
956#endif
957#ifndef HWCAP2_SVEI8MM
958#define HWCAP2_SVEI8MM (1 << 9)
959#endif
960#ifndef HWCAP2_SVEF32MM
961#define HWCAP2_SVEF32MM (1 << 10)
962#endif
963#ifndef HWCAP2_SVEF64MM
964#define HWCAP2_SVEF64MM (1 << 11)
965#endif
966#ifndef HWCAP2_SVEBF16
967#define HWCAP2_SVEBF16 (1 << 12)
968#endif
969#ifndef HWCAP2_I8MM
970#define HWCAP2_I8MM (1 << 13)
971#endif
972#ifndef HWCAP2_BF16
973#define HWCAP2_BF16 (1 << 14)
974#endif
975#ifndef HWCAP2_DGH
976#define HWCAP2_DGH (1 << 15)
977#endif
978#ifndef HWCAP2_RNG
979#define HWCAP2_RNG (1 << 16)
980#endif
981#ifndef HWCAP2_BTI
982#define HWCAP2_BTI (1 << 17)
983#endif
984#ifndef HWCAP2_MTE
985#define HWCAP2_MTE (1 << 18)
986#endif
987#ifndef HWCAP2_RPRES
988#define HWCAP2_RPRES (1 << 21)
989#endif
990#ifndef HWCAP2_MTE3
991#define HWCAP2_MTE3 (1 << 22)
992#endif
993#ifndef HWCAP2_SME
994#define HWCAP2_SME (1 << 23)
995#endif
996#ifndef HWCAP2_SME_I16I64
997#define HWCAP2_SME_I16I64 (1 << 24)
998#endif
999#ifndef HWCAP2_SME_F64F64
1000#define HWCAP2_SME_F64F64 (1 << 25)
1001#endif
1002#ifndef HWCAP2_WFXT
1003#define HWCAP2_WFXT (1UL << 31)
1004#endif
1005#ifndef HWCAP2_EBF16
1006#define HWCAP2_EBF16 (1UL << 32)
1007#endif
1008#ifndef HWCAP2_SVE_EBF16
1009#define HWCAP2_SVE_EBF16 (1UL << 33)
1010#endif
1011
1012// LSE support detection for out-of-line atomics
1013// using HWCAP and Auxiliary vector
1014_Bool __aarch64_have_lse_atomics
1015    __attribute__((visibility("hidden"), nocommon));
1016
1017#if defined(__has_include)
1018#if __has_include(<sys/auxv.h>)
1019#include <sys/auxv.h>
1020#if __has_include(<asm/hwcap.h>)
1021#include <asm/hwcap.h>
1022
1023#if defined(__ANDROID__)
1024#include <string.h>
1025#include <sys/system_properties.h>
1026#elif defined(__Fuchsia__)
1027#include <zircon/features.h>
1028#include <zircon/syscalls.h>
1029#endif
1030
1031// Detect Exynos 9810 CPU
1032#define IF_EXYNOS9810                                                          \
1033  char arch[PROP_VALUE_MAX];                                                   \
1034  if (__system_property_get("ro.arch", arch) > 0 &&                            \
1035      strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0)
1036
1037static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
1038#if defined(__FreeBSD__)
1039  unsigned long hwcap;
1040  int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1041  __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0;
1042#elif defined(__Fuchsia__)
1043  // This ensures the vDSO is a direct link-time dependency of anything that
1044  // needs this initializer code.
1045#pragma comment(lib, "zircon")
1046  uint32_t features;
1047  zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features);
1048  __aarch64_have_lse_atomics =
1049      status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0;
1050#else
1051  unsigned long hwcap = getauxval(AT_HWCAP);
1052  _Bool result = (hwcap & HWCAP_ATOMICS) != 0;
1053#if defined(__ANDROID__)
1054  if (result) {
1055    // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0;
1056    // only the former support LSE atomics.  However, the kernel in the
1057    // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly
1058    // reported the feature as being supported.
1059    //
1060    // The kernel appears to have been corrected to mark it unsupported as of
1061    // the Android 9.0 release on those devices, and this issue has not been
1062    // observed anywhere else. Thus, this workaround may be removed if
1063    // compiler-rt ever drops support for Android 8.0.
1064    IF_EXYNOS9810 result = false;
1065  }
1066#endif // defined(__ANDROID__)
1067  __aarch64_have_lse_atomics = result;
1068#endif // defined(__FreeBSD__)
1069}
1070
1071#if !defined(DISABLE_AARCH64_FMV)
1072// CPUFeatures must correspond to the same AArch64 features in
1073// AArch64TargetParser.h
1074enum CPUFeatures {
1075  FEAT_RNG,
1076  FEAT_FLAGM,
1077  FEAT_FLAGM2,
1078  FEAT_FP16FML,
1079  FEAT_DOTPROD,
1080  FEAT_SM4,
1081  FEAT_RDM,
1082  FEAT_LSE,
1083  FEAT_FP,
1084  FEAT_SIMD,
1085  FEAT_CRC,
1086  FEAT_SHA1,
1087  FEAT_SHA2,
1088  FEAT_SHA3,
1089  FEAT_AES,
1090  FEAT_PMULL,
1091  FEAT_FP16,
1092  FEAT_DIT,
1093  FEAT_DPB,
1094  FEAT_DPB2,
1095  FEAT_JSCVT,
1096  FEAT_FCMA,
1097  FEAT_RCPC,
1098  FEAT_RCPC2,
1099  FEAT_FRINTTS,
1100  FEAT_DGH,
1101  FEAT_I8MM,
1102  FEAT_BF16,
1103  FEAT_EBF16,
1104  FEAT_RPRES,
1105  FEAT_SVE,
1106  FEAT_SVE_BF16,
1107  FEAT_SVE_EBF16,
1108  FEAT_SVE_I8MM,
1109  FEAT_SVE_F32MM,
1110  FEAT_SVE_F64MM,
1111  FEAT_SVE2,
1112  FEAT_SVE_AES,
1113  FEAT_SVE_PMULL128,
1114  FEAT_SVE_BITPERM,
1115  FEAT_SVE_SHA3,
1116  FEAT_SVE_SM4,
1117  FEAT_SME,
1118  FEAT_MEMTAG,
1119  FEAT_MEMTAG2,
1120  FEAT_MEMTAG3,
1121  FEAT_SB,
1122  FEAT_PREDRES,
1123  FEAT_SSBS,
1124  FEAT_SSBS2,
1125  FEAT_BTI,
1126  FEAT_LS64,
1127  FEAT_LS64_V,
1128  FEAT_LS64_ACCDATA,
1129  FEAT_WFXT,
1130  FEAT_SME_F64,
1131  FEAT_SME_I64,
1132  FEAT_SME2,
1133  FEAT_MAX
1134};
1135
1136// Architecture features used
1137// in Function Multi Versioning
1138struct {
1139  unsigned long long features;
1140  // As features grows new fields could be added
1141} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon));
1142
1143void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) {
1144#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
1145#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
1146#define extractBits(val, start, number)                                        \
1147  (val & ((1ULL << number) - 1ULL) << start) >> start
1148  if (hwcap & HWCAP_CRC32)
1149    setCPUFeature(FEAT_CRC);
1150  if (hwcap & HWCAP_PMULL)
1151    setCPUFeature(FEAT_PMULL);
1152  if (hwcap & HWCAP_FLAGM)
1153    setCPUFeature(FEAT_FLAGM);
1154  if (hwcap2 & HWCAP2_FLAGM2) {
1155    setCPUFeature(FEAT_FLAGM);
1156    setCPUFeature(FEAT_FLAGM2);
1157  }
1158  if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
1159    setCPUFeature(FEAT_SM4);
1160  if (hwcap & HWCAP_ASIMDDP)
1161    setCPUFeature(FEAT_DOTPROD);
1162  if (hwcap & HWCAP_ASIMDFHM)
1163    setCPUFeature(FEAT_FP16FML);
1164  if (hwcap & HWCAP_FPHP) {
1165    setCPUFeature(FEAT_FP16);
1166    setCPUFeature(FEAT_FP);
1167  }
1168  if (hwcap & HWCAP_DIT)
1169    setCPUFeature(FEAT_DIT);
1170  if (hwcap & HWCAP_ASIMDRDM)
1171    setCPUFeature(FEAT_RDM);
1172  if (hwcap & HWCAP_ILRCPC)
1173    setCPUFeature(FEAT_RCPC2);
1174  if (hwcap & HWCAP_AES)
1175    setCPUFeature(FEAT_AES);
1176  if (hwcap & HWCAP_SHA1)
1177    setCPUFeature(FEAT_SHA1);
1178  if (hwcap & HWCAP_SHA2)
1179    setCPUFeature(FEAT_SHA2);
1180  if (hwcap & HWCAP_JSCVT)
1181    setCPUFeature(FEAT_JSCVT);
1182  if (hwcap & HWCAP_FCMA)
1183    setCPUFeature(FEAT_FCMA);
1184  if (hwcap & HWCAP_SB)
1185    setCPUFeature(FEAT_SB);
1186  if (hwcap & HWCAP_SSBS)
1187    setCPUFeature(FEAT_SSBS2);
1188  if (hwcap2 & HWCAP2_MTE) {
1189    setCPUFeature(FEAT_MEMTAG);
1190    setCPUFeature(FEAT_MEMTAG2);
1191  }
1192  if (hwcap2 & HWCAP2_MTE3) {
1193    setCPUFeature(FEAT_MEMTAG);
1194    setCPUFeature(FEAT_MEMTAG2);
1195    setCPUFeature(FEAT_MEMTAG3);
1196  }
1197  if (hwcap2 & HWCAP2_SVEAES)
1198    setCPUFeature(FEAT_SVE_AES);
1199  if (hwcap2 & HWCAP2_SVEPMULL) {
1200    setCPUFeature(FEAT_SVE_AES);
1201    setCPUFeature(FEAT_SVE_PMULL128);
1202  }
1203  if (hwcap2 & HWCAP2_SVEBITPERM)
1204    setCPUFeature(FEAT_SVE_BITPERM);
1205  if (hwcap2 & HWCAP2_SVESHA3)
1206    setCPUFeature(FEAT_SVE_SHA3);
1207  if (hwcap2 & HWCAP2_SVESM4)
1208    setCPUFeature(FEAT_SVE_SM4);
1209  if (hwcap2 & HWCAP2_DCPODP)
1210    setCPUFeature(FEAT_DPB2);
1211  if (hwcap & HWCAP_ATOMICS)
1212    setCPUFeature(FEAT_LSE);
1213  if (hwcap2 & HWCAP2_RNG)
1214    setCPUFeature(FEAT_RNG);
1215  if (hwcap2 & HWCAP2_I8MM)
1216    setCPUFeature(FEAT_I8MM);
1217  if (hwcap2 & HWCAP2_EBF16)
1218    setCPUFeature(FEAT_EBF16);
1219  if (hwcap2 & HWCAP2_SVE_EBF16)
1220    setCPUFeature(FEAT_SVE_EBF16);
1221  if (hwcap2 & HWCAP2_DGH)
1222    setCPUFeature(FEAT_DGH);
1223  if (hwcap2 & HWCAP2_FRINT)
1224    setCPUFeature(FEAT_FRINTTS);
1225  if (hwcap2 & HWCAP2_SVEI8MM)
1226    setCPUFeature(FEAT_SVE_I8MM);
1227  if (hwcap2 & HWCAP2_SVEF32MM)
1228    setCPUFeature(FEAT_SVE_F32MM);
1229  if (hwcap2 & HWCAP2_SVEF64MM)
1230    setCPUFeature(FEAT_SVE_F64MM);
1231  if (hwcap2 & HWCAP2_BTI)
1232    setCPUFeature(FEAT_BTI);
1233  if (hwcap2 & HWCAP2_RPRES)
1234    setCPUFeature(FEAT_RPRES);
1235  if (hwcap2 & HWCAP2_WFXT)
1236    setCPUFeature(FEAT_WFXT);
1237  if (hwcap2 & HWCAP2_SME)
1238    setCPUFeature(FEAT_SME);
1239  if (hwcap2 & HWCAP2_SME_I16I64)
1240    setCPUFeature(FEAT_SME_I64);
1241  if (hwcap2 & HWCAP2_SME_F64F64)
1242    setCPUFeature(FEAT_SME_F64);
1243  if (hwcap & HWCAP_CPUID) {
1244    unsigned long ftr;
1245    getCPUFeature(ID_AA64PFR1_EL1, ftr);
1246    // ID_AA64PFR1_EL1.MTE >= 0b0001
1247    if (extractBits(ftr, 8, 4) >= 0x1)
1248      setCPUFeature(FEAT_MEMTAG);
1249    // ID_AA64PFR1_EL1.SSBS == 0b0001
1250    if (extractBits(ftr, 4, 4) == 0x1)
1251      setCPUFeature(FEAT_SSBS);
1252    // ID_AA64PFR1_EL1.SME == 0b0010
1253    if (extractBits(ftr, 24, 4) == 0x2)
1254      setCPUFeature(FEAT_SME2);
1255    getCPUFeature(ID_AA64PFR0_EL1, ftr);
1256    // ID_AA64PFR0_EL1.FP != 0b1111
1257    if (extractBits(ftr, 16, 4) != 0xF) {
1258      setCPUFeature(FEAT_FP);
1259      // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
1260      setCPUFeature(FEAT_SIMD);
1261    }
1262    // ID_AA64PFR0_EL1.SVE != 0b0000
1263    if (extractBits(ftr, 32, 4) != 0x0) {
1264      // get ID_AA64ZFR0_EL1, that name supported
1265      // if sve enabled only
1266      getCPUFeature(S3_0_C0_C4_4, ftr);
1267      // ID_AA64ZFR0_EL1.SVEver == 0b0000
1268      if (extractBits(ftr, 0, 4) == 0x0)
1269        setCPUFeature(FEAT_SVE);
1270      // ID_AA64ZFR0_EL1.SVEver == 0b0001
1271      if (extractBits(ftr, 0, 4) == 0x1)
1272        setCPUFeature(FEAT_SVE2);
1273      // ID_AA64ZFR0_EL1.BF16 != 0b0000
1274      if (extractBits(ftr, 20, 4) != 0x0)
1275        setCPUFeature(FEAT_SVE_BF16);
1276    }
1277    getCPUFeature(ID_AA64ISAR0_EL1, ftr);
1278    // ID_AA64ISAR0_EL1.SHA3 != 0b0000
1279    if (extractBits(ftr, 32, 4) != 0x0)
1280      setCPUFeature(FEAT_SHA3);
1281    getCPUFeature(ID_AA64ISAR1_EL1, ftr);
1282    // ID_AA64ISAR1_EL1.DPB >= 0b0001
1283    if (extractBits(ftr, 0, 4) >= 0x1)
1284      setCPUFeature(FEAT_DPB);
1285    // ID_AA64ISAR1_EL1.LRCPC != 0b0000
1286    if (extractBits(ftr, 20, 4) != 0x0)
1287      setCPUFeature(FEAT_RCPC);
1288    // ID_AA64ISAR1_EL1.SPECRES == 0b0001
1289    if (extractBits(ftr, 40, 4) == 0x2)
1290      setCPUFeature(FEAT_PREDRES);
1291    // ID_AA64ISAR1_EL1.BF16 != 0b0000
1292    if (extractBits(ftr, 44, 4) != 0x0)
1293      setCPUFeature(FEAT_BF16);
1294    // ID_AA64ISAR1_EL1.LS64 >= 0b0001
1295    if (extractBits(ftr, 60, 4) >= 0x1)
1296      setCPUFeature(FEAT_LS64);
1297    // ID_AA64ISAR1_EL1.LS64 >= 0b0010
1298    if (extractBits(ftr, 60, 4) >= 0x2)
1299      setCPUFeature(FEAT_LS64_V);
1300    // ID_AA64ISAR1_EL1.LS64 >= 0b0011
1301    if (extractBits(ftr, 60, 4) >= 0x3)
1302      setCPUFeature(FEAT_LS64_ACCDATA);
1303  } else {
1304    // Set some features in case of no CPUID support
1305    if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
1306      setCPUFeature(FEAT_FP);
1307      // FP and AdvSIMD fields have the same value
1308      setCPUFeature(FEAT_SIMD);
1309    }
1310    if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
1311      setCPUFeature(FEAT_DPB);
1312    if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
1313      setCPUFeature(FEAT_RCPC);
1314    if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
1315      setCPUFeature(FEAT_BF16);
1316    if (hwcap2 & HWCAP2_SVEBF16)
1317      setCPUFeature(FEAT_SVE_BF16);
1318    if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
1319      setCPUFeature(FEAT_SVE2);
1320    if (hwcap & HWCAP_SHA3)
1321      setCPUFeature(FEAT_SHA3);
1322  }
1323}
1324
1325void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) {
1326  unsigned long hwcap;
1327  unsigned long hwcap2;
1328  // CPU features already initialized.
1329  if (__aarch64_cpu_features.features)
1330    return;
1331  setCPUFeature(FEAT_MAX);
1332#if defined(__FreeBSD__)
1333  int res = 0;
1334  res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
1335  res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2);
1336  if (res)
1337    return;
1338#else
1339#if defined(__ANDROID__)
1340  // Don't set any CPU features,
1341  // detection could be wrong on Exynos 9810.
1342  IF_EXYNOS9810 return;
1343#endif // defined(__ANDROID__)
1344  hwcap = getauxval(AT_HWCAP);
1345  hwcap2 = getauxval(AT_HWCAP2);
1346#endif // defined(__FreeBSD__)
1347  init_cpu_features_resolver(hwcap, hwcap2);
1348#undef extractBits
1349#undef getCPUFeature
1350#undef setCPUFeature
1351#undef IF_EXYNOS9810
1352}
1353#endif // !defined(DISABLE_AARCH64_FMV)
1354#endif // defined(__has_include)
1355#endif // __has_include(<sys/auxv.h>)
1356#endif // __has_include(<asm/hwcap.h>)
1357#endif // defined(__aarch64__)
1358