1//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file is based on LLVM's lib/Support/Host.cpp. 10// It implements the operating system Host concept and builtin 11// __cpu_model for the compiler_rt library for x86 and 12// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64. 13// 14//===----------------------------------------------------------------------===// 15 16#ifndef __has_attribute 17#define __has_attribute(attr) 0 18#endif 19 20#if __has_attribute(constructor) 21#if __GNUC__ >= 9 22// Ordinarily init priorities below 101 are disallowed as they are reserved for the 23// implementation. However, we are the implementation, so silence the diagnostic, 24// since it doesn't apply to us. 25#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" 26#endif 27// We're choosing init priority 90 to force our constructors to run before any 28// constructors in the end user application (starting at priority 101). This value 29// matches the libgcc choice for the same functions. 30#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) 31#else 32// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that 33// this runs during initialization. 34#define CONSTRUCTOR_ATTRIBUTE 35#endif 36 37#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 38 defined(_M_X64)) && \ 39 (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) 40 41#include <assert.h> 42 43#define bool int 44#define true 1 45#define false 0 46 47#ifdef _MSC_VER 48#include <intrin.h> 49#endif 50 51enum VendorSignatures { 52 SIG_INTEL = 0x756e6547, // Genu 53 SIG_AMD = 0x68747541, // Auth 54}; 55 56enum ProcessorVendors { 57 VENDOR_INTEL = 1, 58 VENDOR_AMD, 59 VENDOR_OTHER, 60 VENDOR_MAX 61}; 62 63enum ProcessorTypes { 64 INTEL_BONNELL = 1, 65 INTEL_CORE2, 66 INTEL_COREI7, 67 AMDFAM10H, 68 AMDFAM15H, 69 INTEL_SILVERMONT, 70 INTEL_KNL, 71 AMD_BTVER1, 72 AMD_BTVER2, 73 AMDFAM17H, 74 INTEL_KNM, 75 INTEL_GOLDMONT, 76 INTEL_GOLDMONT_PLUS, 77 INTEL_TREMONT, 78 AMDFAM19H, 79 ZHAOXIN_FAM7H, 80 INTEL_SIERRAFOREST, 81 INTEL_GRANDRIDGE, 82 CPU_TYPE_MAX 83}; 84 85enum ProcessorSubtypes { 86 INTEL_COREI7_NEHALEM = 1, 87 INTEL_COREI7_WESTMERE, 88 INTEL_COREI7_SANDYBRIDGE, 89 AMDFAM10H_BARCELONA, 90 AMDFAM10H_SHANGHAI, 91 AMDFAM10H_ISTANBUL, 92 AMDFAM15H_BDVER1, 93 AMDFAM15H_BDVER2, 94 AMDFAM15H_BDVER3, 95 AMDFAM15H_BDVER4, 96 AMDFAM17H_ZNVER1, 97 INTEL_COREI7_IVYBRIDGE, 98 INTEL_COREI7_HASWELL, 99 INTEL_COREI7_BROADWELL, 100 INTEL_COREI7_SKYLAKE, 101 INTEL_COREI7_SKYLAKE_AVX512, 102 INTEL_COREI7_CANNONLAKE, 103 INTEL_COREI7_ICELAKE_CLIENT, 104 INTEL_COREI7_ICELAKE_SERVER, 105 AMDFAM17H_ZNVER2, 106 INTEL_COREI7_CASCADELAKE, 107 INTEL_COREI7_TIGERLAKE, 108 INTEL_COREI7_COOPERLAKE, 109 INTEL_COREI7_SAPPHIRERAPIDS, 110 INTEL_COREI7_ALDERLAKE, 111 AMDFAM19H_ZNVER3, 112 INTEL_COREI7_ROCKETLAKE, 113 ZHAOXIN_FAM7H_LUJIAZUI, 114 AMDFAM19H_ZNVER4, 115 INTEL_COREI7_GRANITERAPIDS, 116 CPU_SUBTYPE_MAX 117}; 118 119enum ProcessorFeatures { 120 FEATURE_CMOV = 0, 121 FEATURE_MMX, 122 FEATURE_POPCNT, 123 FEATURE_SSE, 124 FEATURE_SSE2, 125 FEATURE_SSE3, 126 FEATURE_SSSE3, 127 FEATURE_SSE4_1, 128 FEATURE_SSE4_2, 129 FEATURE_AVX, 130 FEATURE_AVX2, 131 FEATURE_SSE4_A, 132 FEATURE_FMA4, 133 FEATURE_XOP, 134 FEATURE_FMA, 135 FEATURE_AVX512F, 136 FEATURE_BMI, 137 FEATURE_BMI2, 138 FEATURE_AES, 139 FEATURE_PCLMUL, 140 FEATURE_AVX512VL, 141 FEATURE_AVX512BW, 142 FEATURE_AVX512DQ, 143 FEATURE_AVX512CD, 144 FEATURE_AVX512ER, 145 FEATURE_AVX512PF, 146 FEATURE_AVX512VBMI, 147 FEATURE_AVX512IFMA, 148 FEATURE_AVX5124VNNIW, 149 FEATURE_AVX5124FMAPS, 150 FEATURE_AVX512VPOPCNTDQ, 151 FEATURE_AVX512VBMI2, 152 FEATURE_GFNI, 153 FEATURE_VPCLMULQDQ, 154 FEATURE_AVX512VNNI, 155 FEATURE_AVX512BITALG, 156 FEATURE_AVX512BF16, 157 FEATURE_AVX512VP2INTERSECT, 158 CPU_FEATURE_MAX 159}; 160 161// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). 162// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID 163// support. Consequently, for i386, the presence of CPUID is checked first 164// via the corresponding eflags bit. 165static bool isCpuIdSupported(void) { 166#if defined(__GNUC__) || defined(__clang__) 167#if defined(__i386__) 168 int __cpuid_supported; 169 __asm__(" pushfl\n" 170 " popl %%eax\n" 171 " movl %%eax,%%ecx\n" 172 " xorl $0x00200000,%%eax\n" 173 " pushl %%eax\n" 174 " popfl\n" 175 " pushfl\n" 176 " popl %%eax\n" 177 " movl $0,%0\n" 178 " cmpl %%eax,%%ecx\n" 179 " je 1f\n" 180 " movl $1,%0\n" 181 "1:" 182 : "=r"(__cpuid_supported) 183 : 184 : "eax", "ecx"); 185 if (!__cpuid_supported) 186 return false; 187#endif 188 return true; 189#endif 190 return true; 191} 192 193// This code is copied from lib/Support/Host.cpp. 194// Changes to either file should be mirrored in the other. 195 196/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in 197/// the specified arguments. If we can't run cpuid on the host, return true. 198static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, 199 unsigned *rECX, unsigned *rEDX) { 200#if defined(__GNUC__) || defined(__clang__) 201#if defined(__x86_64__) 202 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 203 // FIXME: should we save this for Clang? 204 __asm__("movq\t%%rbx, %%rsi\n\t" 205 "cpuid\n\t" 206 "xchgq\t%%rbx, %%rsi\n\t" 207 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 208 : "a"(value)); 209 return false; 210#elif defined(__i386__) 211 __asm__("movl\t%%ebx, %%esi\n\t" 212 "cpuid\n\t" 213 "xchgl\t%%ebx, %%esi\n\t" 214 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 215 : "a"(value)); 216 return false; 217#else 218 return true; 219#endif 220#elif defined(_MSC_VER) 221 // The MSVC intrinsic is portable across x86 and x64. 222 int registers[4]; 223 __cpuid(registers, value); 224 *rEAX = registers[0]; 225 *rEBX = registers[1]; 226 *rECX = registers[2]; 227 *rEDX = registers[3]; 228 return false; 229#else 230 return true; 231#endif 232} 233 234/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return 235/// the 4 values in the specified arguments. If we can't run cpuid on the host, 236/// return true. 237static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, 238 unsigned *rEAX, unsigned *rEBX, unsigned *rECX, 239 unsigned *rEDX) { 240#if defined(__GNUC__) || defined(__clang__) 241#if defined(__x86_64__) 242 // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. 243 // FIXME: should we save this for Clang? 244 __asm__("movq\t%%rbx, %%rsi\n\t" 245 "cpuid\n\t" 246 "xchgq\t%%rbx, %%rsi\n\t" 247 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 248 : "a"(value), "c"(subleaf)); 249 return false; 250#elif defined(__i386__) 251 __asm__("movl\t%%ebx, %%esi\n\t" 252 "cpuid\n\t" 253 "xchgl\t%%ebx, %%esi\n\t" 254 : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) 255 : "a"(value), "c"(subleaf)); 256 return false; 257#else 258 return true; 259#endif 260#elif defined(_MSC_VER) 261 int registers[4]; 262 __cpuidex(registers, value, subleaf); 263 *rEAX = registers[0]; 264 *rEBX = registers[1]; 265 *rECX = registers[2]; 266 *rEDX = registers[3]; 267 return false; 268#else 269 return true; 270#endif 271} 272 273// Read control register 0 (XCR0). Used to detect features such as AVX. 274static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { 275#if defined(__GNUC__) || defined(__clang__) 276 // Check xgetbv; this uses a .byte sequence instead of the instruction 277 // directly because older assemblers do not include support for xgetbv and 278 // there is no easy way to conditionally compile based on the assembler used. 279 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); 280 return false; 281#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) 282 unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); 283 *rEAX = Result; 284 *rEDX = Result >> 32; 285 return false; 286#else 287 return true; 288#endif 289} 290 291static void detectX86FamilyModel(unsigned EAX, unsigned *Family, 292 unsigned *Model) { 293 *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 294 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 295 if (*Family == 6 || *Family == 0xf) { 296 if (*Family == 0xf) 297 // Examine extended family ID if family ID is F. 298 *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 299 // Examine extended model ID if family ID is 6 or F. 300 *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 301 } 302} 303 304static const char * 305getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, 306 const unsigned *Features, 307 unsigned *Type, unsigned *Subtype) { 308#define testFeature(F) \ 309 (Features[F / 32] & (1 << (F % 32))) != 0 310 311 // We select CPU strings to match the code in Host.cpp, but we don't use them 312 // in compiler-rt. 313 const char *CPU = 0; 314 315 switch (Family) { 316 case 6: 317 switch (Model) { 318 case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile 319 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad 320 // mobile processor, Intel Core 2 Extreme processor, Intel 321 // Pentium Dual-Core processor, Intel Xeon processor, model 322 // 0Fh. All processors are manufactured using the 65 nm process. 323 case 0x16: // Intel Celeron processor model 16h. All processors are 324 // manufactured using the 65 nm process 325 CPU = "core2"; 326 *Type = INTEL_CORE2; 327 break; 328 case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model 329 // 17h. All processors are manufactured using the 45 nm process. 330 // 331 // 45nm: Penryn , Wolfdale, Yorkfield (XE) 332 case 0x1d: // Intel Xeon processor MP. All processors are manufactured using 333 // the 45 nm process. 334 CPU = "penryn"; 335 *Type = INTEL_CORE2; 336 break; 337 case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All 338 // processors are manufactured using the 45 nm process. 339 case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. 340 // As found in a Summer 2010 model iMac. 341 case 0x1f: 342 case 0x2e: // Nehalem EX 343 CPU = "nehalem"; 344 *Type = INTEL_COREI7; 345 *Subtype = INTEL_COREI7_NEHALEM; 346 break; 347 case 0x25: // Intel Core i7, laptop version. 348 case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All 349 // processors are manufactured using the 32 nm process. 350 case 0x2f: // Westmere EX 351 CPU = "westmere"; 352 *Type = INTEL_COREI7; 353 *Subtype = INTEL_COREI7_WESTMERE; 354 break; 355 case 0x2a: // Intel Core i7 processor. All processors are manufactured 356 // using the 32 nm process. 357 case 0x2d: 358 CPU = "sandybridge"; 359 *Type = INTEL_COREI7; 360 *Subtype = INTEL_COREI7_SANDYBRIDGE; 361 break; 362 case 0x3a: 363 case 0x3e: // Ivy Bridge EP 364 CPU = "ivybridge"; 365 *Type = INTEL_COREI7; 366 *Subtype = INTEL_COREI7_IVYBRIDGE; 367 break; 368 369 // Haswell: 370 case 0x3c: 371 case 0x3f: 372 case 0x45: 373 case 0x46: 374 CPU = "haswell"; 375 *Type = INTEL_COREI7; 376 *Subtype = INTEL_COREI7_HASWELL; 377 break; 378 379 // Broadwell: 380 case 0x3d: 381 case 0x47: 382 case 0x4f: 383 case 0x56: 384 CPU = "broadwell"; 385 *Type = INTEL_COREI7; 386 *Subtype = INTEL_COREI7_BROADWELL; 387 break; 388 389 // Skylake: 390 case 0x4e: // Skylake mobile 391 case 0x5e: // Skylake desktop 392 case 0x8e: // Kaby Lake mobile 393 case 0x9e: // Kaby Lake desktop 394 case 0xa5: // Comet Lake-H/S 395 case 0xa6: // Comet Lake-U 396 CPU = "skylake"; 397 *Type = INTEL_COREI7; 398 *Subtype = INTEL_COREI7_SKYLAKE; 399 break; 400 401 // Rocketlake: 402 case 0xa7: 403 CPU = "rocketlake"; 404 *Type = INTEL_COREI7; 405 *Subtype = INTEL_COREI7_ROCKETLAKE; 406 break; 407 408 // Skylake Xeon: 409 case 0x55: 410 *Type = INTEL_COREI7; 411 if (testFeature(FEATURE_AVX512BF16)) { 412 CPU = "cooperlake"; 413 *Subtype = INTEL_COREI7_COOPERLAKE; 414 } else if (testFeature(FEATURE_AVX512VNNI)) { 415 CPU = "cascadelake"; 416 *Subtype = INTEL_COREI7_CASCADELAKE; 417 } else { 418 CPU = "skylake-avx512"; 419 *Subtype = INTEL_COREI7_SKYLAKE_AVX512; 420 } 421 break; 422 423 // Cannonlake: 424 case 0x66: 425 CPU = "cannonlake"; 426 *Type = INTEL_COREI7; 427 *Subtype = INTEL_COREI7_CANNONLAKE; 428 break; 429 430 // Icelake: 431 case 0x7d: 432 case 0x7e: 433 CPU = "icelake-client"; 434 *Type = INTEL_COREI7; 435 *Subtype = INTEL_COREI7_ICELAKE_CLIENT; 436 break; 437 438 // Tigerlake: 439 case 0x8c: 440 case 0x8d: 441 CPU = "tigerlake"; 442 *Type = INTEL_COREI7; 443 *Subtype = INTEL_COREI7_TIGERLAKE; 444 break; 445 446 // Alderlake: 447 case 0x97: 448 case 0x9a: 449 // Raptorlake: 450 case 0xb7: 451 // Meteorlake: 452 case 0xaa: 453 case 0xac: 454 CPU = "alderlake"; 455 *Type = INTEL_COREI7; 456 *Subtype = INTEL_COREI7_ALDERLAKE; 457 break; 458 459 // Icelake Xeon: 460 case 0x6a: 461 case 0x6c: 462 CPU = "icelake-server"; 463 *Type = INTEL_COREI7; 464 *Subtype = INTEL_COREI7_ICELAKE_SERVER; 465 break; 466 467 // Emerald Rapids: 468 case 0xcf: 469 // Sapphire Rapids: 470 case 0x8f: 471 CPU = "sapphirerapids"; 472 *Type = INTEL_COREI7; 473 *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; 474 break; 475 476 // Granite Rapids: 477 case 0xae: 478 case 0xad: 479 CPU = "graniterapids"; 480 *Type = INTEL_COREI7; 481 *Subtype = INTEL_COREI7_GRANITERAPIDS; 482 break; 483 484 case 0x1c: // Most 45 nm Intel Atom processors 485 case 0x26: // 45 nm Atom Lincroft 486 case 0x27: // 32 nm Atom Medfield 487 case 0x35: // 32 nm Atom Midview 488 case 0x36: // 32 nm Atom Midview 489 CPU = "bonnell"; 490 *Type = INTEL_BONNELL; 491 break; 492 493 // Atom Silvermont codes from the Intel software optimization guide. 494 case 0x37: 495 case 0x4a: 496 case 0x4d: 497 case 0x5a: 498 case 0x5d: 499 case 0x4c: // really airmont 500 CPU = "silvermont"; 501 *Type = INTEL_SILVERMONT; 502 break; 503 // Goldmont: 504 case 0x5c: // Apollo Lake 505 case 0x5f: // Denverton 506 CPU = "goldmont"; 507 *Type = INTEL_GOLDMONT; 508 break; // "goldmont" 509 case 0x7a: 510 CPU = "goldmont-plus"; 511 *Type = INTEL_GOLDMONT_PLUS; 512 break; 513 case 0x86: 514 CPU = "tremont"; 515 *Type = INTEL_TREMONT; 516 break; 517 518 // Sierraforest: 519 case 0xaf: 520 CPU = "sierraforest"; 521 *Type = INTEL_SIERRAFOREST; 522 break; 523 524 // Grandridge: 525 case 0xb6: 526 CPU = "grandridge"; 527 *Type = INTEL_GRANDRIDGE; 528 break; 529 530 case 0x57: 531 CPU = "knl"; 532 *Type = INTEL_KNL; 533 break; 534 535 case 0x85: 536 CPU = "knm"; 537 *Type = INTEL_KNM; 538 break; 539 540 default: // Unknown family 6 CPU. 541 break; 542 } 543 break; 544 default: 545 break; // Unknown. 546 } 547 548 return CPU; 549} 550 551static const char * 552getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, 553 const unsigned *Features, 554 unsigned *Type, unsigned *Subtype) { 555 // We select CPU strings to match the code in Host.cpp, but we don't use them 556 // in compiler-rt. 557 const char *CPU = 0; 558 559 switch (Family) { 560 case 16: 561 CPU = "amdfam10"; 562 *Type = AMDFAM10H; 563 switch (Model) { 564 case 2: 565 *Subtype = AMDFAM10H_BARCELONA; 566 break; 567 case 4: 568 *Subtype = AMDFAM10H_SHANGHAI; 569 break; 570 case 8: 571 *Subtype = AMDFAM10H_ISTANBUL; 572 break; 573 } 574 break; 575 case 20: 576 CPU = "btver1"; 577 *Type = AMD_BTVER1; 578 break; 579 case 21: 580 CPU = "bdver1"; 581 *Type = AMDFAM15H; 582 if (Model >= 0x60 && Model <= 0x7f) { 583 CPU = "bdver4"; 584 *Subtype = AMDFAM15H_BDVER4; 585 break; // 60h-7Fh: Excavator 586 } 587 if (Model >= 0x30 && Model <= 0x3f) { 588 CPU = "bdver3"; 589 *Subtype = AMDFAM15H_BDVER3; 590 break; // 30h-3Fh: Steamroller 591 } 592 if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { 593 CPU = "bdver2"; 594 *Subtype = AMDFAM15H_BDVER2; 595 break; // 02h, 10h-1Fh: Piledriver 596 } 597 if (Model <= 0x0f) { 598 *Subtype = AMDFAM15H_BDVER1; 599 break; // 00h-0Fh: Bulldozer 600 } 601 break; 602 case 22: 603 CPU = "btver2"; 604 *Type = AMD_BTVER2; 605 break; 606 case 23: 607 CPU = "znver1"; 608 *Type = AMDFAM17H; 609 if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { 610 CPU = "znver2"; 611 *Subtype = AMDFAM17H_ZNVER2; 612 break; // 30h-3fh, 71h: Zen2 613 } 614 if (Model <= 0x0f) { 615 *Subtype = AMDFAM17H_ZNVER1; 616 break; // 00h-0Fh: Zen1 617 } 618 break; 619 case 25: 620 CPU = "znver3"; 621 *Type = AMDFAM19H; 622 if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { 623 // Family 19h Models 00h-0Fh - Zen3 624 // Family 19h Models 20h-2Fh - Zen3 625 // Family 19h Models 30h-3Fh - Zen3 626 // Family 19h Models 40h-4Fh - Zen3+ 627 // Family 19h Models 50h-5Fh - Zen3+ 628 *Subtype = AMDFAM19H_ZNVER3; 629 break; 630 } 631 if ((Model >= 0x10 && Model <= 0x1f) || 632 (Model >= 0x60 && Model <= 0x74) || 633 (Model >= 0x78 && Model <= 0x7b) || 634 (Model >= 0xA0 && Model <= 0xAf)) { 635 CPU = "znver4"; 636 *Subtype = AMDFAM19H_ZNVER4; 637 break; // "znver4" 638 } 639 break; 640 default: 641 break; // Unknown AMD CPU. 642 } 643 644 return CPU; 645} 646 647static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, 648 unsigned *Features) { 649 unsigned EAX, EBX; 650 651#define setFeature(F) \ 652 Features[F / 32] |= 1U << (F % 32) 653 654 if ((EDX >> 15) & 1) 655 setFeature(FEATURE_CMOV); 656 if ((EDX >> 23) & 1) 657 setFeature(FEATURE_MMX); 658 if ((EDX >> 25) & 1) 659 setFeature(FEATURE_SSE); 660 if ((EDX >> 26) & 1) 661 setFeature(FEATURE_SSE2); 662 663 if ((ECX >> 0) & 1) 664 setFeature(FEATURE_SSE3); 665 if ((ECX >> 1) & 1) 666 setFeature(FEATURE_PCLMUL); 667 if ((ECX >> 9) & 1) 668 setFeature(FEATURE_SSSE3); 669 if ((ECX >> 12) & 1) 670 setFeature(FEATURE_FMA); 671 if ((ECX >> 19) & 1) 672 setFeature(FEATURE_SSE4_1); 673 if ((ECX >> 20) & 1) 674 setFeature(FEATURE_SSE4_2); 675 if ((ECX >> 23) & 1) 676 setFeature(FEATURE_POPCNT); 677 if ((ECX >> 25) & 1) 678 setFeature(FEATURE_AES); 679 680 // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV 681 // indicates that the AVX registers will be saved and restored on context 682 // switch, then we have full AVX support. 683 const unsigned AVXBits = (1 << 27) | (1 << 28); 684 bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && 685 ((EAX & 0x6) == 0x6); 686#if defined(__APPLE__) 687 // Darwin lazily saves the AVX512 context on first use: trust that the OS will 688 // save the AVX512 context if we use AVX512 instructions, even the bit is not 689 // set right now. 690 bool HasAVX512Save = true; 691#else 692 // AVX512 requires additional context to be saved by the OS. 693 bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); 694#endif 695 696 if (HasAVX) 697 setFeature(FEATURE_AVX); 698 699 bool HasLeaf7 = 700 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); 701 702 if (HasLeaf7 && ((EBX >> 3) & 1)) 703 setFeature(FEATURE_BMI); 704 if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) 705 setFeature(FEATURE_AVX2); 706 if (HasLeaf7 && ((EBX >> 8) & 1)) 707 setFeature(FEATURE_BMI2); 708 if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) 709 setFeature(FEATURE_AVX512F); 710 if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) 711 setFeature(FEATURE_AVX512DQ); 712 if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) 713 setFeature(FEATURE_AVX512IFMA); 714 if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) 715 setFeature(FEATURE_AVX512PF); 716 if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) 717 setFeature(FEATURE_AVX512ER); 718 if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) 719 setFeature(FEATURE_AVX512CD); 720 if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) 721 setFeature(FEATURE_AVX512BW); 722 if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) 723 setFeature(FEATURE_AVX512VL); 724 725 if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) 726 setFeature(FEATURE_AVX512VBMI); 727 if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) 728 setFeature(FEATURE_AVX512VBMI2); 729 if (HasLeaf7 && ((ECX >> 8) & 1)) 730 setFeature(FEATURE_GFNI); 731 if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) 732 setFeature(FEATURE_VPCLMULQDQ); 733 if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) 734 setFeature(FEATURE_AVX512VNNI); 735 if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) 736 setFeature(FEATURE_AVX512BITALG); 737 if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) 738 setFeature(FEATURE_AVX512VPOPCNTDQ); 739 740 if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) 741 setFeature(FEATURE_AVX5124VNNIW); 742 if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) 743 setFeature(FEATURE_AVX5124FMAPS); 744 if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) 745 setFeature(FEATURE_AVX512VP2INTERSECT); 746 747 bool HasLeaf7Subleaf1 = 748 MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); 749 if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) 750 setFeature(FEATURE_AVX512BF16); 751 752 unsigned MaxExtLevel; 753 getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); 754 755 bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && 756 !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); 757 if (HasExtLeaf1 && ((ECX >> 6) & 1)) 758 setFeature(FEATURE_SSE4_A); 759 if (HasExtLeaf1 && ((ECX >> 11) & 1)) 760 setFeature(FEATURE_XOP); 761 if (HasExtLeaf1 && ((ECX >> 16) & 1)) 762 setFeature(FEATURE_FMA4); 763#undef setFeature 764} 765 766#ifndef _WIN32 767__attribute__((visibility("hidden"))) 768#endif 769int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; 770 771#ifndef _WIN32 772__attribute__((visibility("hidden"))) 773#endif 774struct __processor_model { 775 unsigned int __cpu_vendor; 776 unsigned int __cpu_type; 777 unsigned int __cpu_subtype; 778 unsigned int __cpu_features[1]; 779} __cpu_model = {0, 0, 0, {0}}; 780 781#ifndef _WIN32 782__attribute__((visibility("hidden"))) 783#endif 784unsigned int __cpu_features2 = 0; 785 786// A constructor function that is sets __cpu_model and __cpu_features2 with 787// the right values. This needs to run only once. This constructor is 788// given the highest priority and it should run before constructors without 789// the priority set. However, it still runs after ifunc initializers and 790// needs to be called explicitly there. 791 792int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { 793 unsigned EAX, EBX, ECX, EDX; 794 unsigned MaxLeaf = 5; 795 unsigned Vendor; 796 unsigned Model, Family; 797 unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; 798 799 // This function needs to run just once. 800 if (__cpu_model.__cpu_vendor) 801 return 0; 802 803 if (!isCpuIdSupported() || 804 getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { 805 __cpu_model.__cpu_vendor = VENDOR_OTHER; 806 return -1; 807 } 808 809 getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); 810 detectX86FamilyModel(EAX, &Family, &Model); 811 812 // Find available features. 813 getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); 814 815 assert((sizeof(Features)/sizeof(Features[0])) == 2); 816 __cpu_model.__cpu_features[0] = Features[0]; 817 __cpu_features2 = Features[1]; 818 819 if (Vendor == SIG_INTEL) { 820 // Get CPU type. 821 getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], 822 &(__cpu_model.__cpu_type), 823 &(__cpu_model.__cpu_subtype)); 824 __cpu_model.__cpu_vendor = VENDOR_INTEL; 825 } else if (Vendor == SIG_AMD) { 826 // Get CPU type. 827 getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], 828 &(__cpu_model.__cpu_type), 829 &(__cpu_model.__cpu_subtype)); 830 __cpu_model.__cpu_vendor = VENDOR_AMD; 831 } else 832 __cpu_model.__cpu_vendor = VENDOR_OTHER; 833 834 assert(__cpu_model.__cpu_vendor < VENDOR_MAX); 835 assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); 836 assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); 837 838 return 0; 839} 840#elif defined(__aarch64__) 841 842#ifndef AT_HWCAP 843#define AT_HWCAP 16 844#endif 845#ifndef HWCAP_CPUID 846#define HWCAP_CPUID (1 << 11) 847#endif 848#ifndef HWCAP_FP 849#define HWCAP_FP (1 << 0) 850#endif 851#ifndef HWCAP_ASIMD 852#define HWCAP_ASIMD (1 << 1) 853#endif 854#ifndef HWCAP_AES 855#define HWCAP_AES (1 << 3) 856#endif 857#ifndef HWCAP_PMULL 858#define HWCAP_PMULL (1 << 4) 859#endif 860#ifndef HWCAP_SHA1 861#define HWCAP_SHA1 (1 << 5) 862#endif 863#ifndef HWCAP_SHA2 864#define HWCAP_SHA2 (1 << 6) 865#endif 866#ifndef HWCAP_ATOMICS 867#define HWCAP_ATOMICS (1 << 8) 868#endif 869#ifndef HWCAP_FPHP 870#define HWCAP_FPHP (1 << 9) 871#endif 872#ifndef HWCAP_ASIMDHP 873#define HWCAP_ASIMDHP (1 << 10) 874#endif 875#ifndef HWCAP_ASIMDRDM 876#define HWCAP_ASIMDRDM (1 << 12) 877#endif 878#ifndef HWCAP_JSCVT 879#define HWCAP_JSCVT (1 << 13) 880#endif 881#ifndef HWCAP_FCMA 882#define HWCAP_FCMA (1 << 14) 883#endif 884#ifndef HWCAP_LRCPC 885#define HWCAP_LRCPC (1 << 15) 886#endif 887#ifndef HWCAP_DCPOP 888#define HWCAP_DCPOP (1 << 16) 889#endif 890#ifndef HWCAP_SHA3 891#define HWCAP_SHA3 (1 << 17) 892#endif 893#ifndef HWCAP_SM3 894#define HWCAP_SM3 (1 << 18) 895#endif 896#ifndef HWCAP_SM4 897#define HWCAP_SM4 (1 << 19) 898#endif 899#ifndef HWCAP_ASIMDDP 900#define HWCAP_ASIMDDP (1 << 20) 901#endif 902#ifndef HWCAP_SHA512 903#define HWCAP_SHA512 (1 << 21) 904#endif 905#ifndef HWCAP_SVE 906#define HWCAP_SVE (1 << 22) 907#endif 908#ifndef HWCAP_ASIMDFHM 909#define HWCAP_ASIMDFHM (1 << 23) 910#endif 911#ifndef HWCAP_DIT 912#define HWCAP_DIT (1 << 24) 913#endif 914#ifndef HWCAP_ILRCPC 915#define HWCAP_ILRCPC (1 << 26) 916#endif 917#ifndef HWCAP_FLAGM 918#define HWCAP_FLAGM (1 << 27) 919#endif 920#ifndef HWCAP_SSBS 921#define HWCAP_SSBS (1 << 28) 922#endif 923#ifndef HWCAP_SB 924#define HWCAP_SB (1 << 29) 925#endif 926 927#ifndef AT_HWCAP2 928#define AT_HWCAP2 26 929#endif 930#ifndef HWCAP2_DCPODP 931#define HWCAP2_DCPODP (1 << 0) 932#endif 933#ifndef HWCAP2_SVE2 934#define HWCAP2_SVE2 (1 << 1) 935#endif 936#ifndef HWCAP2_SVEAES 937#define HWCAP2_SVEAES (1 << 2) 938#endif 939#ifndef HWCAP2_SVEPMULL 940#define HWCAP2_SVEPMULL (1 << 3) 941#endif 942#ifndef HWCAP2_SVEBITPERM 943#define HWCAP2_SVEBITPERM (1 << 4) 944#endif 945#ifndef HWCAP2_SVESHA3 946#define HWCAP2_SVESHA3 (1 << 5) 947#endif 948#ifndef HWCAP2_SVESM4 949#define HWCAP2_SVESM4 (1 << 6) 950#endif 951#ifndef HWCAP2_FLAGM2 952#define HWCAP2_FLAGM2 (1 << 7) 953#endif 954#ifndef HWCAP2_FRINT 955#define HWCAP2_FRINT (1 << 8) 956#endif 957#ifndef HWCAP2_SVEI8MM 958#define HWCAP2_SVEI8MM (1 << 9) 959#endif 960#ifndef HWCAP2_SVEF32MM 961#define HWCAP2_SVEF32MM (1 << 10) 962#endif 963#ifndef HWCAP2_SVEF64MM 964#define HWCAP2_SVEF64MM (1 << 11) 965#endif 966#ifndef HWCAP2_SVEBF16 967#define HWCAP2_SVEBF16 (1 << 12) 968#endif 969#ifndef HWCAP2_I8MM 970#define HWCAP2_I8MM (1 << 13) 971#endif 972#ifndef HWCAP2_BF16 973#define HWCAP2_BF16 (1 << 14) 974#endif 975#ifndef HWCAP2_DGH 976#define HWCAP2_DGH (1 << 15) 977#endif 978#ifndef HWCAP2_RNG 979#define HWCAP2_RNG (1 << 16) 980#endif 981#ifndef HWCAP2_BTI 982#define HWCAP2_BTI (1 << 17) 983#endif 984#ifndef HWCAP2_MTE 985#define HWCAP2_MTE (1 << 18) 986#endif 987#ifndef HWCAP2_RPRES 988#define HWCAP2_RPRES (1 << 21) 989#endif 990#ifndef HWCAP2_MTE3 991#define HWCAP2_MTE3 (1 << 22) 992#endif 993#ifndef HWCAP2_SME 994#define HWCAP2_SME (1 << 23) 995#endif 996#ifndef HWCAP2_SME_I16I64 997#define HWCAP2_SME_I16I64 (1 << 24) 998#endif 999#ifndef HWCAP2_SME_F64F64 1000#define HWCAP2_SME_F64F64 (1 << 25) 1001#endif 1002#ifndef HWCAP2_WFXT 1003#define HWCAP2_WFXT (1UL << 31) 1004#endif 1005#ifndef HWCAP2_EBF16 1006#define HWCAP2_EBF16 (1UL << 32) 1007#endif 1008#ifndef HWCAP2_SVE_EBF16 1009#define HWCAP2_SVE_EBF16 (1UL << 33) 1010#endif 1011 1012// LSE support detection for out-of-line atomics 1013// using HWCAP and Auxiliary vector 1014_Bool __aarch64_have_lse_atomics 1015 __attribute__((visibility("hidden"), nocommon)); 1016 1017#if defined(__has_include) 1018#if __has_include(<sys/auxv.h>) 1019#include <sys/auxv.h> 1020#if __has_include(<asm/hwcap.h>) 1021#include <asm/hwcap.h> 1022 1023#if defined(__ANDROID__) 1024#include <string.h> 1025#include <sys/system_properties.h> 1026#elif defined(__Fuchsia__) 1027#include <zircon/features.h> 1028#include <zircon/syscalls.h> 1029#endif 1030 1031// Detect Exynos 9810 CPU 1032#define IF_EXYNOS9810 \ 1033 char arch[PROP_VALUE_MAX]; \ 1034 if (__system_property_get("ro.arch", arch) > 0 && \ 1035 strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) 1036 1037static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { 1038#if defined(__FreeBSD__) 1039 unsigned long hwcap; 1040 int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); 1041 __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0; 1042#elif defined(__Fuchsia__) 1043 // This ensures the vDSO is a direct link-time dependency of anything that 1044 // needs this initializer code. 1045#pragma comment(lib, "zircon") 1046 uint32_t features; 1047 zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); 1048 __aarch64_have_lse_atomics = 1049 status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0; 1050#else 1051 unsigned long hwcap = getauxval(AT_HWCAP); 1052 _Bool result = (hwcap & HWCAP_ATOMICS) != 0; 1053#if defined(__ANDROID__) 1054 if (result) { 1055 // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; 1056 // only the former support LSE atomics. However, the kernel in the 1057 // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly 1058 // reported the feature as being supported. 1059 // 1060 // The kernel appears to have been corrected to mark it unsupported as of 1061 // the Android 9.0 release on those devices, and this issue has not been 1062 // observed anywhere else. Thus, this workaround may be removed if 1063 // compiler-rt ever drops support for Android 8.0. 1064 IF_EXYNOS9810 result = false; 1065 } 1066#endif // defined(__ANDROID__) 1067 __aarch64_have_lse_atomics = result; 1068#endif // defined(__FreeBSD__) 1069} 1070 1071#if !defined(DISABLE_AARCH64_FMV) 1072// CPUFeatures must correspond to the same AArch64 features in 1073// AArch64TargetParser.h 1074enum CPUFeatures { 1075 FEAT_RNG, 1076 FEAT_FLAGM, 1077 FEAT_FLAGM2, 1078 FEAT_FP16FML, 1079 FEAT_DOTPROD, 1080 FEAT_SM4, 1081 FEAT_RDM, 1082 FEAT_LSE, 1083 FEAT_FP, 1084 FEAT_SIMD, 1085 FEAT_CRC, 1086 FEAT_SHA1, 1087 FEAT_SHA2, 1088 FEAT_SHA3, 1089 FEAT_AES, 1090 FEAT_PMULL, 1091 FEAT_FP16, 1092 FEAT_DIT, 1093 FEAT_DPB, 1094 FEAT_DPB2, 1095 FEAT_JSCVT, 1096 FEAT_FCMA, 1097 FEAT_RCPC, 1098 FEAT_RCPC2, 1099 FEAT_FRINTTS, 1100 FEAT_DGH, 1101 FEAT_I8MM, 1102 FEAT_BF16, 1103 FEAT_EBF16, 1104 FEAT_RPRES, 1105 FEAT_SVE, 1106 FEAT_SVE_BF16, 1107 FEAT_SVE_EBF16, 1108 FEAT_SVE_I8MM, 1109 FEAT_SVE_F32MM, 1110 FEAT_SVE_F64MM, 1111 FEAT_SVE2, 1112 FEAT_SVE_AES, 1113 FEAT_SVE_PMULL128, 1114 FEAT_SVE_BITPERM, 1115 FEAT_SVE_SHA3, 1116 FEAT_SVE_SM4, 1117 FEAT_SME, 1118 FEAT_MEMTAG, 1119 FEAT_MEMTAG2, 1120 FEAT_MEMTAG3, 1121 FEAT_SB, 1122 FEAT_PREDRES, 1123 FEAT_SSBS, 1124 FEAT_SSBS2, 1125 FEAT_BTI, 1126 FEAT_LS64, 1127 FEAT_LS64_V, 1128 FEAT_LS64_ACCDATA, 1129 FEAT_WFXT, 1130 FEAT_SME_F64, 1131 FEAT_SME_I64, 1132 FEAT_SME2, 1133 FEAT_MAX 1134}; 1135 1136// Architecture features used 1137// in Function Multi Versioning 1138struct { 1139 unsigned long long features; 1140 // As features grows new fields could be added 1141} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); 1142 1143void init_cpu_features_resolver(unsigned long hwcap, unsigned long hwcap2) { 1144#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F 1145#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) 1146#define extractBits(val, start, number) \ 1147 (val & ((1ULL << number) - 1ULL) << start) >> start 1148 if (hwcap & HWCAP_CRC32) 1149 setCPUFeature(FEAT_CRC); 1150 if (hwcap & HWCAP_PMULL) 1151 setCPUFeature(FEAT_PMULL); 1152 if (hwcap & HWCAP_FLAGM) 1153 setCPUFeature(FEAT_FLAGM); 1154 if (hwcap2 & HWCAP2_FLAGM2) { 1155 setCPUFeature(FEAT_FLAGM); 1156 setCPUFeature(FEAT_FLAGM2); 1157 } 1158 if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) 1159 setCPUFeature(FEAT_SM4); 1160 if (hwcap & HWCAP_ASIMDDP) 1161 setCPUFeature(FEAT_DOTPROD); 1162 if (hwcap & HWCAP_ASIMDFHM) 1163 setCPUFeature(FEAT_FP16FML); 1164 if (hwcap & HWCAP_FPHP) { 1165 setCPUFeature(FEAT_FP16); 1166 setCPUFeature(FEAT_FP); 1167 } 1168 if (hwcap & HWCAP_DIT) 1169 setCPUFeature(FEAT_DIT); 1170 if (hwcap & HWCAP_ASIMDRDM) 1171 setCPUFeature(FEAT_RDM); 1172 if (hwcap & HWCAP_ILRCPC) 1173 setCPUFeature(FEAT_RCPC2); 1174 if (hwcap & HWCAP_AES) 1175 setCPUFeature(FEAT_AES); 1176 if (hwcap & HWCAP_SHA1) 1177 setCPUFeature(FEAT_SHA1); 1178 if (hwcap & HWCAP_SHA2) 1179 setCPUFeature(FEAT_SHA2); 1180 if (hwcap & HWCAP_JSCVT) 1181 setCPUFeature(FEAT_JSCVT); 1182 if (hwcap & HWCAP_FCMA) 1183 setCPUFeature(FEAT_FCMA); 1184 if (hwcap & HWCAP_SB) 1185 setCPUFeature(FEAT_SB); 1186 if (hwcap & HWCAP_SSBS) 1187 setCPUFeature(FEAT_SSBS2); 1188 if (hwcap2 & HWCAP2_MTE) { 1189 setCPUFeature(FEAT_MEMTAG); 1190 setCPUFeature(FEAT_MEMTAG2); 1191 } 1192 if (hwcap2 & HWCAP2_MTE3) { 1193 setCPUFeature(FEAT_MEMTAG); 1194 setCPUFeature(FEAT_MEMTAG2); 1195 setCPUFeature(FEAT_MEMTAG3); 1196 } 1197 if (hwcap2 & HWCAP2_SVEAES) 1198 setCPUFeature(FEAT_SVE_AES); 1199 if (hwcap2 & HWCAP2_SVEPMULL) { 1200 setCPUFeature(FEAT_SVE_AES); 1201 setCPUFeature(FEAT_SVE_PMULL128); 1202 } 1203 if (hwcap2 & HWCAP2_SVEBITPERM) 1204 setCPUFeature(FEAT_SVE_BITPERM); 1205 if (hwcap2 & HWCAP2_SVESHA3) 1206 setCPUFeature(FEAT_SVE_SHA3); 1207 if (hwcap2 & HWCAP2_SVESM4) 1208 setCPUFeature(FEAT_SVE_SM4); 1209 if (hwcap2 & HWCAP2_DCPODP) 1210 setCPUFeature(FEAT_DPB2); 1211 if (hwcap & HWCAP_ATOMICS) 1212 setCPUFeature(FEAT_LSE); 1213 if (hwcap2 & HWCAP2_RNG) 1214 setCPUFeature(FEAT_RNG); 1215 if (hwcap2 & HWCAP2_I8MM) 1216 setCPUFeature(FEAT_I8MM); 1217 if (hwcap2 & HWCAP2_EBF16) 1218 setCPUFeature(FEAT_EBF16); 1219 if (hwcap2 & HWCAP2_SVE_EBF16) 1220 setCPUFeature(FEAT_SVE_EBF16); 1221 if (hwcap2 & HWCAP2_DGH) 1222 setCPUFeature(FEAT_DGH); 1223 if (hwcap2 & HWCAP2_FRINT) 1224 setCPUFeature(FEAT_FRINTTS); 1225 if (hwcap2 & HWCAP2_SVEI8MM) 1226 setCPUFeature(FEAT_SVE_I8MM); 1227 if (hwcap2 & HWCAP2_SVEF32MM) 1228 setCPUFeature(FEAT_SVE_F32MM); 1229 if (hwcap2 & HWCAP2_SVEF64MM) 1230 setCPUFeature(FEAT_SVE_F64MM); 1231 if (hwcap2 & HWCAP2_BTI) 1232 setCPUFeature(FEAT_BTI); 1233 if (hwcap2 & HWCAP2_RPRES) 1234 setCPUFeature(FEAT_RPRES); 1235 if (hwcap2 & HWCAP2_WFXT) 1236 setCPUFeature(FEAT_WFXT); 1237 if (hwcap2 & HWCAP2_SME) 1238 setCPUFeature(FEAT_SME); 1239 if (hwcap2 & HWCAP2_SME_I16I64) 1240 setCPUFeature(FEAT_SME_I64); 1241 if (hwcap2 & HWCAP2_SME_F64F64) 1242 setCPUFeature(FEAT_SME_F64); 1243 if (hwcap & HWCAP_CPUID) { 1244 unsigned long ftr; 1245 getCPUFeature(ID_AA64PFR1_EL1, ftr); 1246 // ID_AA64PFR1_EL1.MTE >= 0b0001 1247 if (extractBits(ftr, 8, 4) >= 0x1) 1248 setCPUFeature(FEAT_MEMTAG); 1249 // ID_AA64PFR1_EL1.SSBS == 0b0001 1250 if (extractBits(ftr, 4, 4) == 0x1) 1251 setCPUFeature(FEAT_SSBS); 1252 // ID_AA64PFR1_EL1.SME == 0b0010 1253 if (extractBits(ftr, 24, 4) == 0x2) 1254 setCPUFeature(FEAT_SME2); 1255 getCPUFeature(ID_AA64PFR0_EL1, ftr); 1256 // ID_AA64PFR0_EL1.FP != 0b1111 1257 if (extractBits(ftr, 16, 4) != 0xF) { 1258 setCPUFeature(FEAT_FP); 1259 // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP 1260 setCPUFeature(FEAT_SIMD); 1261 } 1262 // ID_AA64PFR0_EL1.SVE != 0b0000 1263 if (extractBits(ftr, 32, 4) != 0x0) { 1264 // get ID_AA64ZFR0_EL1, that name supported 1265 // if sve enabled only 1266 getCPUFeature(S3_0_C0_C4_4, ftr); 1267 // ID_AA64ZFR0_EL1.SVEver == 0b0000 1268 if (extractBits(ftr, 0, 4) == 0x0) 1269 setCPUFeature(FEAT_SVE); 1270 // ID_AA64ZFR0_EL1.SVEver == 0b0001 1271 if (extractBits(ftr, 0, 4) == 0x1) 1272 setCPUFeature(FEAT_SVE2); 1273 // ID_AA64ZFR0_EL1.BF16 != 0b0000 1274 if (extractBits(ftr, 20, 4) != 0x0) 1275 setCPUFeature(FEAT_SVE_BF16); 1276 } 1277 getCPUFeature(ID_AA64ISAR0_EL1, ftr); 1278 // ID_AA64ISAR0_EL1.SHA3 != 0b0000 1279 if (extractBits(ftr, 32, 4) != 0x0) 1280 setCPUFeature(FEAT_SHA3); 1281 getCPUFeature(ID_AA64ISAR1_EL1, ftr); 1282 // ID_AA64ISAR1_EL1.DPB >= 0b0001 1283 if (extractBits(ftr, 0, 4) >= 0x1) 1284 setCPUFeature(FEAT_DPB); 1285 // ID_AA64ISAR1_EL1.LRCPC != 0b0000 1286 if (extractBits(ftr, 20, 4) != 0x0) 1287 setCPUFeature(FEAT_RCPC); 1288 // ID_AA64ISAR1_EL1.SPECRES == 0b0001 1289 if (extractBits(ftr, 40, 4) == 0x2) 1290 setCPUFeature(FEAT_PREDRES); 1291 // ID_AA64ISAR1_EL1.BF16 != 0b0000 1292 if (extractBits(ftr, 44, 4) != 0x0) 1293 setCPUFeature(FEAT_BF16); 1294 // ID_AA64ISAR1_EL1.LS64 >= 0b0001 1295 if (extractBits(ftr, 60, 4) >= 0x1) 1296 setCPUFeature(FEAT_LS64); 1297 // ID_AA64ISAR1_EL1.LS64 >= 0b0010 1298 if (extractBits(ftr, 60, 4) >= 0x2) 1299 setCPUFeature(FEAT_LS64_V); 1300 // ID_AA64ISAR1_EL1.LS64 >= 0b0011 1301 if (extractBits(ftr, 60, 4) >= 0x3) 1302 setCPUFeature(FEAT_LS64_ACCDATA); 1303 } else { 1304 // Set some features in case of no CPUID support 1305 if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { 1306 setCPUFeature(FEAT_FP); 1307 // FP and AdvSIMD fields have the same value 1308 setCPUFeature(FEAT_SIMD); 1309 } 1310 if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) 1311 setCPUFeature(FEAT_DPB); 1312 if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) 1313 setCPUFeature(FEAT_RCPC); 1314 if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) 1315 setCPUFeature(FEAT_BF16); 1316 if (hwcap2 & HWCAP2_SVEBF16) 1317 setCPUFeature(FEAT_SVE_BF16); 1318 if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) 1319 setCPUFeature(FEAT_SVE2); 1320 if (hwcap & HWCAP_SHA3) 1321 setCPUFeature(FEAT_SHA3); 1322 } 1323} 1324 1325void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) { 1326 unsigned long hwcap; 1327 unsigned long hwcap2; 1328 // CPU features already initialized. 1329 if (__aarch64_cpu_features.features) 1330 return; 1331 setCPUFeature(FEAT_MAX); 1332#if defined(__FreeBSD__) 1333 int res = 0; 1334 res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); 1335 res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); 1336 if (res) 1337 return; 1338#else 1339#if defined(__ANDROID__) 1340 // Don't set any CPU features, 1341 // detection could be wrong on Exynos 9810. 1342 IF_EXYNOS9810 return; 1343#endif // defined(__ANDROID__) 1344 hwcap = getauxval(AT_HWCAP); 1345 hwcap2 = getauxval(AT_HWCAP2); 1346#endif // defined(__FreeBSD__) 1347 init_cpu_features_resolver(hwcap, hwcap2); 1348#undef extractBits 1349#undef getCPUFeature 1350#undef setCPUFeature 1351#undef IF_EXYNOS9810 1352} 1353#endif // !defined(DISABLE_AARCH64_FMV) 1354#endif // defined(__has_include) 1355#endif // __has_include(<sys/auxv.h>) 1356#endif // __has_include(<asm/hwcap.h>) 1357#endif // defined(__aarch64__) 1358