driver-i386.c revision 1.5
1294464Sdes/* Subroutines for the gcc driver. 2180750Sdes Copyright (C) 2006-2015 Free Software Foundation, Inc. 3180750Sdes 4180750SdesThis file is part of GCC. 5180750Sdes 6180750SdesGCC is free software; you can redistribute it and/or modify 7180750Sdesit under the terms of the GNU General Public License as published by 8180750Sdesthe Free Software Foundation; either version 3, or (at your option) 9180750Sdesany later version. 10180750Sdes 11180750SdesGCC is distributed in the hope that it will be useful, 12180750Sdesbut WITHOUT ANY WARRANTY; without even the implied warranty of 13180750SdesMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14180750SdesGNU General Public License for more details. 15180750Sdes 16180750SdesYou should have received a copy of the GNU General Public License 17180750Sdesalong with GCC; see the file COPYING3. If not see 18180750Sdes<http://www.gnu.org/licenses/>. */ 19180750Sdes 20180750Sdes#include "config.h" 21180750Sdes#include "system.h" 22180750Sdes#include "coretypes.h" 23180750Sdes#include "tm.h" 24180750Sdes 25180750Sdesconst char *host_detect_local_cpu (int argc, const char **argv); 26180750Sdes 27180750Sdes#if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__)) 28180750Sdes#include "cpuid.h" 29180750Sdes 30180750Sdesstruct cache_desc 31180750Sdes{ 32180750Sdes unsigned sizekb; 33180750Sdes unsigned assoc; 34180750Sdes unsigned line; 35180750Sdes}; 36180750Sdes 37180750Sdes/* Returns command line parameters that describe size and 38180750Sdes cache line size of the processor caches. */ 39180750Sdes 40180750Sdesstatic char * 41180750Sdesdescribe_cache (struct cache_desc level1, struct cache_desc level2) 42180750Sdes{ 43180750Sdes char size[100], line[100], size2[100]; 44180750Sdes 45180750Sdes /* At the moment, gcc does not use the information 46180750Sdes about the associativity of the cache. */ 47180750Sdes 48180750Sdes snprintf (size, sizeof (size), 49180750Sdes "--param l1-cache-size=%u ", level1.sizekb); 50180750Sdes snprintf (line, sizeof (line), 51180750Sdes "--param l1-cache-line-size=%u ", level1.line); 52180750Sdes 53180750Sdes snprintf (size2, sizeof (size2), 54180750Sdes "--param l2-cache-size=%u ", level2.sizekb); 55180750Sdes 56180750Sdes return concat (size, line, size2, NULL); 57180750Sdes} 58180750Sdes 59180750Sdes/* Detect L2 cache parameters using CPUID extended function 0x80000006. */ 60180750Sdes 61180750Sdesstatic void 62180750Sdesdetect_l2_cache (struct cache_desc *level2) 63180750Sdes{ 64180750Sdes unsigned eax, ebx, ecx, edx; 65180750Sdes unsigned assoc; 66180750Sdes 67180750Sdes __cpuid (0x80000006, eax, ebx, ecx, edx); 68180750Sdes 69180750Sdes level2->sizekb = (ecx >> 16) & 0xffff; 70180750Sdes level2->line = ecx & 0xff; 71180750Sdes 72180750Sdes assoc = (ecx >> 12) & 0xf; 73180750Sdes if (assoc == 6) 74180750Sdes assoc = 8; 75180750Sdes else if (assoc == 8) 76180750Sdes assoc = 16; 77180750Sdes else if (assoc >= 0xa && assoc <= 0xc) 78180750Sdes assoc = 32 + (assoc - 0xa) * 16; 79180750Sdes else if (assoc >= 0xd && assoc <= 0xe) 80180750Sdes assoc = 96 + (assoc - 0xd) * 32; 81180750Sdes 82180750Sdes level2->assoc = assoc; 83180750Sdes} 84180750Sdes 85180750Sdes/* Returns the description of caches for an AMD processor. */ 86180750Sdes 87180750Sdesstatic const char * 88180750Sdesdetect_caches_amd (unsigned max_ext_level) 89180750Sdes{ 90261320Sdes unsigned eax, ebx, ecx, edx; 91180750Sdes 92180750Sdes struct cache_desc level1, level2 = {0, 0, 0}; 93180750Sdes 94180750Sdes if (max_ext_level < 0x80000005) 95180750Sdes return ""; 96261320Sdes 97180750Sdes __cpuid (0x80000005, eax, ebx, ecx, edx); 98180750Sdes 99180750Sdes level1.sizekb = (ecx >> 24) & 0xff; 100192595Sdes level1.assoc = (ecx >> 16) & 0xff; 101180750Sdes level1.line = ecx & 0xff; 102192595Sdes 103180750Sdes if (max_ext_level >= 0x80000006) 104180750Sdes detect_l2_cache (&level2); 105180750Sdes 106180750Sdes return describe_cache (level1, level2); 107180750Sdes} 108180750Sdes 109180750Sdes/* Decodes the size, the associativity and the cache line size of 110180750Sdes L1/L2 caches of an Intel processor. Values are based on 111180750Sdes "Intel Processor Identification and the CPUID Instruction" 112180750Sdes [Application Note 485], revision -032, December 2007. */ 113180750Sdes 114180750Sdesstatic void 115180750Sdesdecode_caches_intel (unsigned reg, bool xeon_mp, 116180750Sdes struct cache_desc *level1, struct cache_desc *level2) 117180750Sdes{ 118180750Sdes int i; 119180750Sdes 120180750Sdes for (i = 24; i >= 0; i -= 8) 121180750Sdes switch ((reg >> i) & 0xff) 122180750Sdes { 123180750Sdes case 0x0a: 124180750Sdes level1->sizekb = 8; level1->assoc = 2; level1->line = 32; 125180750Sdes break; 126180750Sdes case 0x0c: 127180750Sdes level1->sizekb = 16; level1->assoc = 4; level1->line = 32; 128204917Sdes break; 129204917Sdes case 0x0d: 130180750Sdes level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 131180750Sdes break; 132180750Sdes case 0x0e: 133180750Sdes level1->sizekb = 24; level1->assoc = 6; level1->line = 64; 134180750Sdes break; 135180750Sdes case 0x21: 136180750Sdes level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 137180750Sdes break; 138180750Sdes case 0x24: 139180750Sdes level2->sizekb = 1024; level2->assoc = 16; level2->line = 64; 140180750Sdes break; 141180750Sdes case 0x2c: 142180750Sdes level1->sizekb = 32; level1->assoc = 8; level1->line = 64; 143180750Sdes break; 144180750Sdes case 0x39: 145180750Sdes level2->sizekb = 128; level2->assoc = 4; level2->line = 64; 146180750Sdes break; 147180750Sdes case 0x3a: 148180750Sdes level2->sizekb = 192; level2->assoc = 6; level2->line = 64; 149180750Sdes break; 150180750Sdes case 0x3b: 151180750Sdes level2->sizekb = 128; level2->assoc = 2; level2->line = 64; 152180750Sdes break; 153180750Sdes case 0x3c: 154180750Sdes level2->sizekb = 256; level2->assoc = 4; level2->line = 64; 155180750Sdes break; 156180750Sdes case 0x3d: 157180750Sdes level2->sizekb = 384; level2->assoc = 6; level2->line = 64; 158180750Sdes break; 159180750Sdes case 0x3e: 160180750Sdes level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 161180750Sdes break; 162180750Sdes case 0x41: 163180750Sdes level2->sizekb = 128; level2->assoc = 4; level2->line = 32; 164180750Sdes break; 165180750Sdes case 0x42: 166180750Sdes level2->sizekb = 256; level2->assoc = 4; level2->line = 32; 167180750Sdes break; 168180750Sdes case 0x43: 169180750Sdes level2->sizekb = 512; level2->assoc = 4; level2->line = 32; 170180750Sdes break; 171180750Sdes case 0x44: 172180750Sdes level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; 173180750Sdes break; 174180750Sdes case 0x45: 175180750Sdes level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; 176180750Sdes break; 177180750Sdes case 0x48: 178180750Sdes level2->sizekb = 3072; level2->assoc = 12; level2->line = 64; 179180750Sdes break; 180180750Sdes case 0x49: 181180750Sdes if (xeon_mp) 182180750Sdes break; 183180750Sdes level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; 184180750Sdes break; 185180750Sdes case 0x4e: 186180750Sdes level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; 187180750Sdes break; 188180750Sdes case 0x60: 189180750Sdes level1->sizekb = 16; level1->assoc = 8; level1->line = 64; 190180750Sdes break; 191180750Sdes case 0x66: 192180750Sdes level1->sizekb = 8; level1->assoc = 4; level1->line = 64; 193180750Sdes break; 194180750Sdes case 0x67: 195180750Sdes level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 196180750Sdes break; 197180750Sdes case 0x68: 198180750Sdes level1->sizekb = 32; level1->assoc = 4; level1->line = 64; 199180750Sdes break; 200180750Sdes case 0x78: 201180750Sdes level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; 202180750Sdes break; 203180750Sdes case 0x79: 204180750Sdes level2->sizekb = 128; level2->assoc = 8; level2->line = 64; 205180750Sdes break; 206180750Sdes case 0x7a: 207180750Sdes level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 208180750Sdes break; 209180750Sdes case 0x7b: 210180750Sdes level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 211180750Sdes break; 212180750Sdes case 0x7c: 213180750Sdes level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 214180750Sdes break; 215180750Sdes case 0x7d: 216180750Sdes level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; 217180750Sdes break; 218180750Sdes case 0x7f: 219180750Sdes level2->sizekb = 512; level2->assoc = 2; level2->line = 64; 220180750Sdes break; 221180750Sdes case 0x80: 222180750Sdes level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 223180750Sdes break; 224180750Sdes case 0x82: 225180750Sdes level2->sizekb = 256; level2->assoc = 8; level2->line = 32; 226180750Sdes break; 227180750Sdes case 0x83: 228180750Sdes level2->sizekb = 512; level2->assoc = 8; level2->line = 32; 229180750Sdes break; 230180750Sdes case 0x84: 231180750Sdes level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; 232180750Sdes break; 233180750Sdes case 0x85: 234180750Sdes level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; 235180750Sdes break; 236180750Sdes case 0x86: 237180750Sdes level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 238180750Sdes break; 239180750Sdes case 0x87: 240180750Sdes level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 241180750Sdes 242180750Sdes default: 243180750Sdes break; 244180750Sdes } 245180750Sdes} 246180750Sdes 247180750Sdes/* Detect cache parameters using CPUID function 2. */ 248180750Sdes 249180750Sdesstatic void 250180750Sdesdetect_caches_cpuid2 (bool xeon_mp, 251180750Sdes struct cache_desc *level1, struct cache_desc *level2) 252180750Sdes{ 253180750Sdes unsigned regs[4]; 254180750Sdes int nreps, i; 255180750Sdes 256180750Sdes __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 257180750Sdes 258180750Sdes nreps = regs[0] & 0x0f; 259180750Sdes regs[0] &= ~0x0f; 260180750Sdes 261180750Sdes while (--nreps >= 0) 262180750Sdes { 263180750Sdes for (i = 0; i < 4; i++) 264180750Sdes if (regs[i] && !((regs[i] >> 31) & 1)) 265180750Sdes decode_caches_intel (regs[i], xeon_mp, level1, level2); 266180750Sdes 267180750Sdes if (nreps) 268180750Sdes __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 269180750Sdes } 270180750Sdes} 271180750Sdes 272180750Sdes/* Detect cache parameters using CPUID function 4. This 273180750Sdes method doesn't require hardcoded tables. */ 274180750Sdes 275180750Sdesenum cache_type 276180750Sdes{ 277180750Sdes CACHE_END = 0, 278180750Sdes CACHE_DATA = 1, 279180750Sdes CACHE_INST = 2, 280180750Sdes CACHE_UNIFIED = 3 281180750Sdes}; 282180750Sdes 283180750Sdesstatic void 284180750Sdesdetect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, 285180750Sdes struct cache_desc *level3) 286180750Sdes{ 287180750Sdes struct cache_desc *cache; 288180750Sdes 289180750Sdes unsigned eax, ebx, ecx, edx; 290180750Sdes int count; 291180750Sdes 292180750Sdes for (count = 0;; count++) 293180750Sdes { 294180750Sdes __cpuid_count(4, count, eax, ebx, ecx, edx); 295180750Sdes switch (eax & 0x1f) 296180750Sdes { 297180750Sdes case CACHE_END: 298180750Sdes return; 299180750Sdes case CACHE_DATA: 300180750Sdes case CACHE_UNIFIED: 301180750Sdes { 302180750Sdes switch ((eax >> 5) & 0x07) 303180750Sdes { 304180750Sdes case 1: 305180750Sdes cache = level1; 306180750Sdes break; 307180750Sdes case 2: 308180750Sdes cache = level2; 309180750Sdes break; 310180750Sdes case 3: 311180750Sdes cache = level3; 312180750Sdes break; 313180750Sdes default: 314180750Sdes cache = NULL; 315180750Sdes } 316180750Sdes 317180750Sdes if (cache) 318180750Sdes { 319180750Sdes unsigned sets = ecx + 1; 320240075Sdes unsigned part = ((ebx >> 12) & 0x03ff) + 1; 321180750Sdes 322180750Sdes cache->assoc = ((ebx >> 22) & 0x03ff) + 1; 323180750Sdes cache->line = (ebx & 0x0fff) + 1; 324180750Sdes 325180750Sdes cache->sizekb = (cache->assoc * part 326180750Sdes * cache->line * sets) / 1024; 327180750Sdes } 328180750Sdes } 329180750Sdes default: 330180750Sdes break; 331180750Sdes } 332180750Sdes } 333180750Sdes} 334180750Sdes 335180750Sdes/* Returns the description of caches for an Intel processor. */ 336180750Sdes 337180750Sdesstatic const char * 338180750Sdesdetect_caches_intel (bool xeon_mp, unsigned max_level, 339180750Sdes unsigned max_ext_level, unsigned *l2sizekb) 340180750Sdes{ 341180750Sdes struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; 342180750Sdes 343180750Sdes if (max_level >= 4) 344180750Sdes detect_caches_cpuid4 (&level1, &level2, &level3); 345180750Sdes else if (max_level >= 2) 346180750Sdes detect_caches_cpuid2 (xeon_mp, &level1, &level2); 347180750Sdes else 348180750Sdes return ""; 349180750Sdes 350180750Sdes if (level1.sizekb == 0) 351180750Sdes return ""; 352180750Sdes 353180750Sdes /* Let the L3 replace the L2. This assumes inclusive caches 354180750Sdes and single threaded program for now. */ 355180750Sdes if (level3.sizekb) 356180750Sdes level2 = level3; 357180750Sdes 358180750Sdes /* Intel CPUs are equipped with AMD style L2 cache info. Try this 359180750Sdes method if other methods fail to provide L2 cache parameters. */ 360180750Sdes if (level2.sizekb == 0 && max_ext_level >= 0x80000006) 361180750Sdes detect_l2_cache (&level2); 362180750Sdes 363180750Sdes *l2sizekb = level2.sizekb; 364180750Sdes 365180750Sdes return describe_cache (level1, level2); 366180750Sdes} 367180750Sdes 368180750Sdes/* This will be called by the spec parser in gcc.c when it sees 369180750Sdes a %:local_cpu_detect(args) construct. Currently it will be called 370180750Sdes with either "arch" or "tune" as argument depending on if -march=native 371180750Sdes or -mtune=native is to be substituted. 372180750Sdes 373180750Sdes It returns a string containing new command line parameters to be 374180750Sdes put at the place of the above two options, depending on what CPU 375180750Sdes this is executed. E.g. "-march=k8" on an AMD64 machine 376180750Sdes for -march=native. 377180750Sdes 378180750Sdes ARGC and ARGV are set depending on the actual arguments given 379180750Sdes in the spec. */ 380180750Sdes 381180750Sdesconst char *host_detect_local_cpu (int argc, const char **argv) 382180750Sdes{ 383180750Sdes enum processor_type processor = PROCESSOR_I386; 384180750Sdes const char *cpu = "i386"; 385180750Sdes 386180750Sdes const char *cache = ""; 387180750Sdes const char *options = ""; 388180750Sdes 389180750Sdes unsigned int eax, ebx, ecx, edx; 390180750Sdes 391180750Sdes unsigned int max_level, ext_level; 392180750Sdes 393180750Sdes unsigned int vendor; 394180750Sdes unsigned int model, family; 395180750Sdes 396180750Sdes unsigned int has_sse3, has_ssse3, has_cmpxchg16b; 397180750Sdes unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; 398180750Sdes 399180750Sdes /* Extended features */ 400180750Sdes unsigned int has_lahf_lm = 0, has_sse4a = 0; 401180750Sdes unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; 402180750Sdes unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; 403180750Sdes unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; 404180750Sdes unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; 405180750Sdes unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; 406180750Sdes unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; 407180750Sdes unsigned int has_hle = 0, has_rtm = 0; 408180750Sdes unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; 409180750Sdes unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; 410180750Sdes unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; 411180750Sdes unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; 412180750Sdes unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; 413180750Sdes unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; 414180750Sdes unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; 415180750Sdes unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; 416180750Sdes unsigned int has_pcommit = 0, has_mwaitx = 0; 417180750Sdes 418180750Sdes bool arch; 419180750Sdes 420180750Sdes unsigned int l2sizekb = 0; 421180750Sdes 422255767Sdes if (argc < 1) 423180750Sdes return NULL; 424180750Sdes 425180750Sdes arch = !strcmp (argv[0], "arch"); 426204917Sdes 427204917Sdes if (!arch && strcmp (argv[0], "tune")) 428204917Sdes return NULL; 429204917Sdes 430204917Sdes max_level = __get_cpuid_max (0, &vendor); 431204917Sdes if (max_level < 1) 432204917Sdes goto done; 433204917Sdes 434204917Sdes __cpuid (1, eax, ebx, ecx, edx); 435204917Sdes 436204917Sdes model = (eax >> 4) & 0x0f; 437204917Sdes family = (eax >> 8) & 0x0f; 438204917Sdes if (vendor == signature_INTEL_ebx 439204917Sdes || vendor == signature_AMD_ebx) 440204917Sdes { 441204917Sdes unsigned int extended_model, extended_family; 442204917Sdes 443204917Sdes extended_model = (eax >> 12) & 0xf0; 444204917Sdes extended_family = (eax >> 20) & 0xff; 445204917Sdes if (family == 0x0f) 446204917Sdes { 447204917Sdes family += extended_family; 448204917Sdes model += extended_model; 449204917Sdes } 450204917Sdes else if (family == 0x06) 451204917Sdes model += extended_model; 452204917Sdes } 453204917Sdes 454204917Sdes has_sse3 = ecx & bit_SSE3; 455204917Sdes has_ssse3 = ecx & bit_SSSE3; 456204917Sdes has_sse4_1 = ecx & bit_SSE4_1; 457204917Sdes has_sse4_2 = ecx & bit_SSE4_2; 458204917Sdes has_avx = ecx & bit_AVX; 459204917Sdes has_osxsave = ecx & bit_OSXSAVE; 460204917Sdes has_cmpxchg16b = ecx & bit_CMPXCHG16B; 461204917Sdes has_movbe = ecx & bit_MOVBE; 462204917Sdes has_popcnt = ecx & bit_POPCNT; 463204917Sdes has_aes = ecx & bit_AES; 464204917Sdes has_pclmul = ecx & bit_PCLMUL; 465204917Sdes has_fma = ecx & bit_FMA; 466204917Sdes has_f16c = ecx & bit_F16C; 467204917Sdes has_rdrnd = ecx & bit_RDRND; 468204917Sdes has_xsave = ecx & bit_XSAVE; 469204917Sdes 470204917Sdes has_cmpxchg8b = edx & bit_CMPXCHG8B; 471204917Sdes has_cmov = edx & bit_CMOV; 472204917Sdes has_mmx = edx & bit_MMX; 473204917Sdes has_fxsr = edx & bit_FXSAVE; 474204917Sdes has_sse = edx & bit_SSE; 475204917Sdes has_sse2 = edx & bit_SSE2; 476204917Sdes 477204917Sdes if (max_level >= 7) 478204917Sdes { 479204917Sdes __cpuid_count (7, 0, eax, ebx, ecx, edx); 480204917Sdes 481204917Sdes has_bmi = ebx & bit_BMI; 482204917Sdes has_hle = ebx & bit_HLE; 483204917Sdes has_rtm = ebx & bit_RTM; 484204917Sdes has_avx2 = ebx & bit_AVX2; 485204917Sdes has_bmi2 = ebx & bit_BMI2; 486204917Sdes has_fsgsbase = ebx & bit_FSGSBASE; 487204917Sdes has_rdseed = ebx & bit_RDSEED; 488204917Sdes has_adx = ebx & bit_ADX; 489204917Sdes has_avx512f = ebx & bit_AVX512F; 490204917Sdes has_avx512er = ebx & bit_AVX512ER; 491204917Sdes has_avx512pf = ebx & bit_AVX512PF; 492204917Sdes has_avx512cd = ebx & bit_AVX512CD; 493204917Sdes has_sha = ebx & bit_SHA; 494204917Sdes has_pcommit = ebx & bit_PCOMMIT; 495204917Sdes has_clflushopt = ebx & bit_CLFLUSHOPT; 496255767Sdes has_clwb = ebx & bit_CLWB; 497204917Sdes has_avx512dq = ebx & bit_AVX512DQ; 498204917Sdes has_avx512bw = ebx & bit_AVX512BW; 499204917Sdes has_avx512vl = ebx & bit_AVX512VL; 500 has_avx512vl = ebx & bit_AVX512IFMA; 501 502 has_prefetchwt1 = ecx & bit_PREFETCHWT1; 503 has_avx512vl = ecx & bit_AVX512VBMI; 504 } 505 506 if (max_level >= 13) 507 { 508 __cpuid_count (13, 1, eax, ebx, ecx, edx); 509 510 has_xsaveopt = eax & bit_XSAVEOPT; 511 has_xsavec = eax & bit_XSAVEC; 512 has_xsaves = eax & bit_XSAVES; 513 } 514 515 /* Check cpuid level of extended features. */ 516 __cpuid (0x80000000, ext_level, ebx, ecx, edx); 517 518 if (ext_level > 0x80000000) 519 { 520 __cpuid (0x80000001, eax, ebx, ecx, edx); 521 522 has_lahf_lm = ecx & bit_LAHF_LM; 523 has_sse4a = ecx & bit_SSE4a; 524 has_abm = ecx & bit_ABM; 525 has_lwp = ecx & bit_LWP; 526 has_fma4 = ecx & bit_FMA4; 527 has_xop = ecx & bit_XOP; 528 has_tbm = ecx & bit_TBM; 529 has_lzcnt = ecx & bit_LZCNT; 530 has_prfchw = ecx & bit_PRFCHW; 531 532 has_longmode = edx & bit_LM; 533 has_3dnowp = edx & bit_3DNOWP; 534 has_3dnow = edx & bit_3DNOW; 535 has_mwaitx = ecx & bit_MWAITX; 536 } 537 538 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ 539#define XCR_XFEATURE_ENABLED_MASK 0x0 540#define XSTATE_FP 0x1 541#define XSTATE_SSE 0x2 542#define XSTATE_YMM 0x4 543#define XSTATE_OPMASK 0x20 544#define XSTATE_ZMM 0x40 545#define XSTATE_HI_ZMM 0x80 546 if (has_osxsave) 547 asm (".byte 0x0f; .byte 0x01; .byte 0xd0" 548 : "=a" (eax), "=d" (edx) 549 : "c" (XCR_XFEATURE_ENABLED_MASK)); 550 551 /* Check if SSE and YMM states are supported. */ 552 if (!has_osxsave 553 || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) 554 { 555 has_avx = 0; 556 has_avx2 = 0; 557 has_fma = 0; 558 has_fma4 = 0; 559 has_f16c = 0; 560 has_xop = 0; 561 has_xsave = 0; 562 has_xsaveopt = 0; 563 has_xsaves = 0; 564 has_xsavec = 0; 565 } 566 567 if (!has_osxsave 568 || (eax & 569 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) 570 != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) 571 { 572 has_avx512f = 0; 573 has_avx512er = 0; 574 has_avx512pf = 0; 575 has_avx512cd = 0; 576 has_avx512dq = 0; 577 has_avx512bw = 0; 578 has_avx512vl = 0; 579 } 580 581 if (!arch) 582 { 583 if (vendor == signature_AMD_ebx 584 || vendor == signature_CENTAUR_ebx 585 || vendor == signature_CYRIX_ebx 586 || vendor == signature_NSC_ebx) 587 cache = detect_caches_amd (ext_level); 588 else if (vendor == signature_INTEL_ebx) 589 { 590 bool xeon_mp = (family == 15 && model == 6); 591 cache = detect_caches_intel (xeon_mp, max_level, 592 ext_level, &l2sizekb); 593 } 594 } 595 596 if (vendor == signature_AMD_ebx) 597 { 598 unsigned int name; 599 600 /* Detect geode processor by its processor signature. */ 601 if (ext_level > 0x80000001) 602 __cpuid (0x80000002, name, ebx, ecx, edx); 603 else 604 name = 0; 605 606 if (name == signature_NSC_ebx) 607 processor = PROCESSOR_GEODE; 608 else if (has_movbe && family == 22) 609 processor = PROCESSOR_BTVER2; 610 else if (has_avx2) 611 processor = PROCESSOR_BDVER4; 612 else if (has_xsaveopt) 613 processor = PROCESSOR_BDVER3; 614 else if (has_bmi) 615 processor = PROCESSOR_BDVER2; 616 else if (has_xop) 617 processor = PROCESSOR_BDVER1; 618 else if (has_sse4a && has_ssse3) 619 processor = PROCESSOR_BTVER1; 620 else if (has_sse4a) 621 processor = PROCESSOR_AMDFAM10; 622 else if (has_sse2 || has_longmode) 623 processor = PROCESSOR_K8; 624 else if (has_3dnowp && family == 6) 625 processor = PROCESSOR_ATHLON; 626 else if (has_mmx) 627 processor = PROCESSOR_K6; 628 else 629 processor = PROCESSOR_PENTIUM; 630 } 631 else if (vendor == signature_CENTAUR_ebx) 632 { 633 if (arch) 634 { 635 switch (family) 636 { 637 case 6: 638 if (model > 9) 639 /* Use the default detection procedure. */ 640 processor = PROCESSOR_GENERIC; 641 else if (model == 9) 642 cpu = "c3-2"; 643 else if (model >= 6) 644 cpu = "c3"; 645 else 646 processor = PROCESSOR_GENERIC; 647 break; 648 case 5: 649 if (has_3dnow) 650 cpu = "winchip2"; 651 else if (has_mmx) 652 cpu = "winchip2-c6"; 653 else 654 processor = PROCESSOR_GENERIC; 655 break; 656 default: 657 /* We have no idea. */ 658 processor = PROCESSOR_GENERIC; 659 } 660 } 661 } 662 else 663 { 664 switch (family) 665 { 666 case 4: 667 processor = PROCESSOR_I486; 668 break; 669 case 5: 670 processor = PROCESSOR_PENTIUM; 671 break; 672 case 6: 673 processor = PROCESSOR_PENTIUMPRO; 674 break; 675 case 15: 676 processor = PROCESSOR_PENTIUM4; 677 break; 678 default: 679 /* We have no idea. */ 680 processor = PROCESSOR_GENERIC; 681 } 682 } 683 684 switch (processor) 685 { 686 case PROCESSOR_I386: 687 /* Default. */ 688 break; 689 case PROCESSOR_I486: 690 cpu = "i486"; 691 break; 692 case PROCESSOR_PENTIUM: 693 if (arch && has_mmx) 694 cpu = "pentium-mmx"; 695 else 696 cpu = "pentium"; 697 break; 698 case PROCESSOR_PENTIUMPRO: 699 switch (model) 700 { 701 case 0x1c: 702 case 0x26: 703 /* Bonnell. */ 704 cpu = "bonnell"; 705 break; 706 case 0x37: 707 case 0x4a: 708 case 0x4d: 709 case 0x5a: 710 case 0x5d: 711 /* Silvermont. */ 712 cpu = "silvermont"; 713 break; 714 case 0x0f: 715 /* Merom. */ 716 case 0x17: 717 case 0x1d: 718 /* Penryn. */ 719 cpu = "core2"; 720 break; 721 case 0x1a: 722 case 0x1e: 723 case 0x1f: 724 case 0x2e: 725 /* Nehalem. */ 726 cpu = "nehalem"; 727 break; 728 case 0x25: 729 case 0x2c: 730 case 0x2f: 731 /* Westmere. */ 732 cpu = "westmere"; 733 break; 734 case 0x2a: 735 case 0x2d: 736 /* Sandy Bridge. */ 737 cpu = "sandybridge"; 738 break; 739 case 0x3a: 740 case 0x3e: 741 /* Ivy Bridge. */ 742 cpu = "ivybridge"; 743 break; 744 case 0x3c: 745 case 0x3f: 746 case 0x45: 747 case 0x46: 748 /* Haswell. */ 749 cpu = "haswell"; 750 break; 751 case 0x3d: 752 case 0x4f: 753 case 0x56: 754 /* Broadwell. */ 755 cpu = "broadwell"; 756 break; 757 case 0x57: 758 /* Knights Landing. */ 759 cpu = "knl"; 760 break; 761 default: 762 if (arch) 763 { 764 /* This is unknown family 0x6 CPU. */ 765 /* Assume Knights Landing. */ 766 if (has_avx512f) 767 cpu = "knl"; 768 /* Assume Broadwell. */ 769 else if (has_adx) 770 cpu = "broadwell"; 771 else if (has_avx2) 772 /* Assume Haswell. */ 773 cpu = "haswell"; 774 else if (has_avx) 775 /* Assume Sandy Bridge. */ 776 cpu = "sandybridge"; 777 else if (has_sse4_2) 778 { 779 if (has_movbe) 780 /* Assume Silvermont. */ 781 cpu = "silvermont"; 782 else 783 /* Assume Nehalem. */ 784 cpu = "nehalem"; 785 } 786 else if (has_ssse3) 787 { 788 if (has_movbe) 789 /* Assume Bonnell. */ 790 cpu = "bonnell"; 791 else 792 /* Assume Core 2. */ 793 cpu = "core2"; 794 } 795 else if (has_longmode) 796 /* Perhaps some emulator? Assume x86-64, otherwise gcc 797 -march=native would be unusable for 64-bit compilations, 798 as all the CPUs below are 32-bit only. */ 799 cpu = "x86-64"; 800 else if (has_sse3) 801 /* It is Core Duo. */ 802 cpu = "pentium-m"; 803 else if (has_sse2) 804 /* It is Pentium M. */ 805 cpu = "pentium-m"; 806 else if (has_sse) 807 /* It is Pentium III. */ 808 cpu = "pentium3"; 809 else if (has_mmx) 810 /* It is Pentium II. */ 811 cpu = "pentium2"; 812 else 813 /* Default to Pentium Pro. */ 814 cpu = "pentiumpro"; 815 } 816 else 817 /* For -mtune, we default to -mtune=generic. */ 818 cpu = "generic"; 819 break; 820 } 821 break; 822 case PROCESSOR_PENTIUM4: 823 if (has_sse3) 824 { 825 if (has_longmode) 826 cpu = "nocona"; 827 else 828 cpu = "prescott"; 829 } 830 else 831 cpu = "pentium4"; 832 break; 833 case PROCESSOR_GEODE: 834 cpu = "geode"; 835 break; 836 case PROCESSOR_K6: 837 if (arch && has_3dnow) 838 cpu = "k6-3"; 839 else 840 cpu = "k6"; 841 break; 842 case PROCESSOR_ATHLON: 843 if (arch && has_sse) 844 cpu = "athlon-4"; 845 else 846 cpu = "athlon"; 847 break; 848 case PROCESSOR_K8: 849 if (arch && has_sse3) 850 cpu = "k8-sse3"; 851 else 852 cpu = "k8"; 853 break; 854 case PROCESSOR_AMDFAM10: 855 cpu = "amdfam10"; 856 break; 857 case PROCESSOR_BDVER1: 858 cpu = "bdver1"; 859 break; 860 case PROCESSOR_BDVER2: 861 cpu = "bdver2"; 862 break; 863 case PROCESSOR_BDVER3: 864 cpu = "bdver3"; 865 break; 866 case PROCESSOR_BDVER4: 867 cpu = "bdver4"; 868 break; 869 case PROCESSOR_BTVER1: 870 cpu = "btver1"; 871 break; 872 case PROCESSOR_BTVER2: 873 cpu = "btver2"; 874 break; 875 876 default: 877 /* Use something reasonable. */ 878 if (arch) 879 { 880 if (has_ssse3) 881 cpu = "core2"; 882 else if (has_sse3) 883 { 884 if (has_longmode) 885 cpu = "nocona"; 886 else 887 cpu = "prescott"; 888 } 889 else if (has_sse2) 890 cpu = "pentium4"; 891 else if (has_cmov) 892 cpu = "pentiumpro"; 893 else if (has_mmx) 894 cpu = "pentium-mmx"; 895 else if (has_cmpxchg8b) 896 cpu = "pentium"; 897 } 898 else 899 cpu = "generic"; 900 } 901 902 if (arch) 903 { 904 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx"; 905 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow"; 906 const char *sse = has_sse ? " -msse" : " -mno-sse"; 907 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2"; 908 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3"; 909 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3"; 910 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a"; 911 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16"; 912 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf"; 913 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe"; 914 const char *aes = has_aes ? " -maes" : " -mno-aes"; 915 const char *sha = has_sha ? " -msha" : " -mno-sha"; 916 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul"; 917 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt"; 918 const char *abm = has_abm ? " -mabm" : " -mno-abm"; 919 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp"; 920 const char *fma = has_fma ? " -mfma" : " -mno-fma"; 921 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; 922 const char *xop = has_xop ? " -mxop" : " -mno-xop"; 923 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; 924 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; 925 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; 926 const char *avx = has_avx ? " -mavx" : " -mno-avx"; 927 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; 928 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2"; 929 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1"; 930 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; 931 const char *hle = has_hle ? " -mhle" : " -mno-hle"; 932 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm"; 933 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd"; 934 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c"; 935 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase"; 936 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed"; 937 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw"; 938 const char *adx = has_adx ? " -madx" : " -mno-adx"; 939 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; 940 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; 941 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; 942 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f"; 943 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er"; 944 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd"; 945 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf"; 946 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1"; 947 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt"; 948 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec"; 949 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves"; 950 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq"; 951 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw"; 952 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl"; 953 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma"; 954 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi"; 955 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; 956 const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit"; 957 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; 958 959 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, 960 sse4a, cx16, sahf, movbe, aes, sha, pclmul, 961 popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, 962 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, 963 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, 964 fxsr, xsave, xsaveopt, avx512f, avx512er, 965 avx512cd, avx512pf, prefetchwt1, clflushopt, 966 xsavec, xsaves, avx512dq, avx512bw, avx512vl, 967 avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL); 968 } 969 970done: 971 return concat (cache, "-m", argv[0], "=", cpu, options, NULL); 972} 973#else 974 975/* If we are compiling with GCC where %EBX register is fixed, then the 976 driver will just ignore -march and -mtune "native" target and will leave 977 to the newly built compiler to generate code for its default target. */ 978 979const char *host_detect_local_cpu (int, const char **) 980{ 981 return NULL; 982} 983#endif /* __GNUC__ */ 984