driver-i386.c revision 1.11
1/* Subroutines for the gcc driver.
2   Copyright (C) 2006-2019 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26
27const char *host_detect_local_cpu (int argc, const char **argv);
28
29#if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30#include "cpuid.h"
31
32struct cache_desc
33{
34  unsigned sizekb;
35  unsigned assoc;
36  unsigned line;
37};
38
39/* Returns command line parameters that describe size and
40   cache line size of the processor caches.  */
41
42static char *
43describe_cache (struct cache_desc level1, struct cache_desc level2)
44{
45  char size[100], line[100], size2[100];
46
47  /* At the moment, gcc does not use the information
48     about the associativity of the cache.  */
49
50  snprintf (size, sizeof (size),
51	    "--param l1-cache-size=%u ", level1.sizekb);
52  snprintf (line, sizeof (line),
53	    "--param l1-cache-line-size=%u ", level1.line);
54
55  snprintf (size2, sizeof (size2),
56	    "--param l2-cache-size=%u ", level2.sizekb);
57
58  return concat (size, line, size2, NULL);
59}
60
61/* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
62
63static void
64detect_l2_cache (struct cache_desc *level2)
65{
66  unsigned eax, ebx, ecx, edx;
67  unsigned assoc;
68
69  __cpuid (0x80000006, eax, ebx, ecx, edx);
70
71  level2->sizekb = (ecx >> 16) & 0xffff;
72  level2->line = ecx & 0xff;
73
74  assoc = (ecx >> 12) & 0xf;
75  if (assoc == 6)
76    assoc = 8;
77  else if (assoc == 8)
78    assoc = 16;
79  else if (assoc >= 0xa && assoc <= 0xc)
80    assoc = 32 + (assoc - 0xa) * 16;
81  else if (assoc >= 0xd && assoc <= 0xe)
82    assoc = 96 + (assoc - 0xd) * 32;
83
84  level2->assoc = assoc;
85}
86
87/* Returns the description of caches for an AMD processor.  */
88
89static const char *
90detect_caches_amd (unsigned max_ext_level)
91{
92  unsigned eax, ebx, ecx, edx;
93
94  struct cache_desc level1, level2 = {0, 0, 0};
95
96  if (max_ext_level < 0x80000005)
97    return "";
98
99  __cpuid (0x80000005, eax, ebx, ecx, edx);
100
101  level1.sizekb = (ecx >> 24) & 0xff;
102  level1.assoc = (ecx >> 16) & 0xff;
103  level1.line = ecx & 0xff;
104
105  if (max_ext_level >= 0x80000006)
106    detect_l2_cache (&level2);
107
108  return describe_cache (level1, level2);
109}
110
111/* Decodes the size, the associativity and the cache line size of
112   L1/L2 caches of an Intel processor.  Values are based on
113   "Intel Processor Identification and the CPUID Instruction"
114   [Application Note 485], revision -032, December 2007.  */
115
116static void
117decode_caches_intel (unsigned reg, bool xeon_mp,
118		     struct cache_desc *level1, struct cache_desc *level2)
119{
120  int i;
121
122  for (i = 24; i >= 0; i -= 8)
123    switch ((reg >> i) & 0xff)
124      {
125      case 0x0a:
126	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127	break;
128      case 0x0c:
129	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130	break;
131      case 0x0d:
132	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133	break;
134      case 0x0e:
135	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136	break;
137      case 0x21:
138	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139	break;
140      case 0x24:
141	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142	break;
143      case 0x2c:
144	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145	break;
146      case 0x39:
147	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148	break;
149      case 0x3a:
150	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151	break;
152      case 0x3b:
153	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154	break;
155      case 0x3c:
156	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157	break;
158      case 0x3d:
159	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160	break;
161      case 0x3e:
162	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163	break;
164      case 0x41:
165	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166	break;
167      case 0x42:
168	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169	break;
170      case 0x43:
171	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172	break;
173      case 0x44:
174	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175	break;
176      case 0x45:
177	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178	break;
179      case 0x48:
180	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181	break;
182      case 0x49:
183	if (xeon_mp)
184	  break;
185	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186	break;
187      case 0x4e:
188	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189	break;
190      case 0x60:
191	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192	break;
193      case 0x66:
194	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195	break;
196      case 0x67:
197	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198	break;
199      case 0x68:
200	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201	break;
202      case 0x78:
203	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204	break;
205      case 0x79:
206	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207	break;
208      case 0x7a:
209	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210	break;
211      case 0x7b:
212	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213	break;
214      case 0x7c:
215	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216	break;
217      case 0x7d:
218	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219	break;
220      case 0x7f:
221	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222	break;
223      case 0x80:
224	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225	break;
226      case 0x82:
227	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228	break;
229      case 0x83:
230	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231	break;
232      case 0x84:
233	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234	break;
235      case 0x85:
236	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237	break;
238      case 0x86:
239	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240	break;
241      case 0x87:
242	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
243
244      default:
245	break;
246      }
247}
248
249/* Detect cache parameters using CPUID function 2.  */
250
251static void
252detect_caches_cpuid2 (bool xeon_mp,
253		      struct cache_desc *level1, struct cache_desc *level2)
254{
255  unsigned regs[4];
256  int nreps, i;
257
258  __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
259
260  nreps = regs[0] & 0x0f;
261  regs[0] &= ~0x0f;
262
263  while (--nreps >= 0)
264    {
265      for (i = 0; i < 4; i++)
266	if (regs[i] && !((regs[i] >> 31) & 1))
267	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
268
269      if (nreps)
270	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
271    }
272}
273
274/* Detect cache parameters using CPUID function 4. This
275   method doesn't require hardcoded tables.  */
276
277enum cache_type
278{
279  CACHE_END = 0,
280  CACHE_DATA = 1,
281  CACHE_INST = 2,
282  CACHE_UNIFIED = 3
283};
284
285static void
286detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287		      struct cache_desc *level3)
288{
289  struct cache_desc *cache;
290
291  unsigned eax, ebx, ecx, edx;
292  int count;
293
294  for (count = 0;; count++)
295    {
296      __cpuid_count(4, count, eax, ebx, ecx, edx);
297      switch (eax & 0x1f)
298	{
299	case CACHE_END:
300	  return;
301	case CACHE_DATA:
302	case CACHE_UNIFIED:
303	  {
304	    switch ((eax >> 5) & 0x07)
305	      {
306	      case 1:
307		cache = level1;
308		break;
309	      case 2:
310		cache = level2;
311		break;
312	      case 3:
313		cache = level3;
314		break;
315	      default:
316		cache = NULL;
317	      }
318
319	    if (cache)
320	      {
321		unsigned sets = ecx + 1;
322		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
323
324		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325		cache->line = (ebx & 0x0fff) + 1;
326
327		cache->sizekb = (cache->assoc * part
328				 * cache->line * sets) / 1024;
329	      }
330	  }
331	default:
332	  break;
333	}
334    }
335}
336
337/* Returns the description of caches for an Intel processor.  */
338
339static const char *
340detect_caches_intel (bool xeon_mp, unsigned max_level,
341		     unsigned max_ext_level, unsigned *l2sizekb)
342{
343  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
344
345  if (max_level >= 4)
346    detect_caches_cpuid4 (&level1, &level2, &level3);
347  else if (max_level >= 2)
348    detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349  else
350    return "";
351
352  if (level1.sizekb == 0)
353    return "";
354
355  /* Let the L3 replace the L2. This assumes inclusive caches
356     and single threaded program for now. */
357  if (level3.sizekb)
358    level2 = level3;
359
360  /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
361     method if other methods fail to provide L2 cache parameters.  */
362  if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363    detect_l2_cache (&level2);
364
365  *l2sizekb = level2.sizekb;
366
367  return describe_cache (level1, level2);
368}
369
370/* This will be called by the spec parser in gcc.c when it sees
371   a %:local_cpu_detect(args) construct.  Currently it will be called
372   with either "arch" or "tune" as argument depending on if -march=native
373   or -mtune=native is to be substituted.
374
375   It returns a string containing new command line parameters to be
376   put at the place of the above two options, depending on what CPU
377   this is executed.  E.g. "-march=k8" on an AMD64 machine
378   for -march=native.
379
380   ARGC and ARGV are set depending on the actual arguments given
381   in the spec.  */
382
383const char *host_detect_local_cpu (int argc, const char **argv)
384{
385  enum processor_type processor = PROCESSOR_I386;
386  const char *cpu = "i386";
387
388  const char *cache = "";
389  const char *options = "";
390
391  unsigned int eax, ebx, ecx, edx;
392
393  unsigned int max_level, ext_level;
394
395  unsigned int vendor;
396  unsigned int model, family;
397
398  unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399  unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
400
401  /* Extended features */
402  unsigned int has_lahf_lm = 0, has_sse4a = 0;
403  unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405  unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406  unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407  unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409  unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410  unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411  unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412  unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413  unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414  unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415  unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416  unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417  unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418  unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419  unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420  unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421  unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422  unsigned int has_avx512bitalg = 0;
423  unsigned int has_shstk = 0;
424  unsigned int has_avx512vnni = 0, has_vaes = 0;
425  unsigned int has_vpclmulqdq = 0;
426  unsigned int has_movdiri = 0, has_movdir64b = 0;
427  unsigned int has_waitpkg = 0;
428  unsigned int has_cldemote = 0;
429
430  unsigned int has_ptwrite = 0;
431
432  bool arch;
433
434  unsigned int l2sizekb = 0;
435
436  if (argc < 1)
437    return NULL;
438
439  arch = !strcmp (argv[0], "arch");
440
441  if (!arch && strcmp (argv[0], "tune"))
442    return NULL;
443
444  max_level = __get_cpuid_max (0, &vendor);
445  if (max_level < 1)
446    goto done;
447
448  __cpuid (1, eax, ebx, ecx, edx);
449
450  model = (eax >> 4) & 0x0f;
451  family = (eax >> 8) & 0x0f;
452  if (vendor == signature_INTEL_ebx
453      || vendor == signature_AMD_ebx)
454    {
455      unsigned int extended_model, extended_family;
456
457      extended_model = (eax >> 12) & 0xf0;
458      extended_family = (eax >> 20) & 0xff;
459      if (family == 0x0f)
460	{
461	  family += extended_family;
462	  model += extended_model;
463	}
464      else if (family == 0x06)
465	model += extended_model;
466    }
467
468  has_sse3 = ecx & bit_SSE3;
469  has_ssse3 = ecx & bit_SSSE3;
470  has_sse4_1 = ecx & bit_SSE4_1;
471  has_sse4_2 = ecx & bit_SSE4_2;
472  has_avx = ecx & bit_AVX;
473  has_osxsave = ecx & bit_OSXSAVE;
474  has_cmpxchg16b = ecx & bit_CMPXCHG16B;
475  has_movbe = ecx & bit_MOVBE;
476  has_popcnt = ecx & bit_POPCNT;
477  has_aes = ecx & bit_AES;
478  has_pclmul = ecx & bit_PCLMUL;
479  has_fma = ecx & bit_FMA;
480  has_f16c = ecx & bit_F16C;
481  has_rdrnd = ecx & bit_RDRND;
482  has_xsave = ecx & bit_XSAVE;
483
484  has_cmpxchg8b = edx & bit_CMPXCHG8B;
485  has_cmov = edx & bit_CMOV;
486  has_mmx = edx & bit_MMX;
487  has_fxsr = edx & bit_FXSAVE;
488  has_sse = edx & bit_SSE;
489  has_sse2 = edx & bit_SSE2;
490
491  if (max_level >= 7)
492    {
493      __cpuid_count (7, 0, eax, ebx, ecx, edx);
494
495      has_bmi = ebx & bit_BMI;
496      has_sgx = ebx & bit_SGX;
497      has_hle = ebx & bit_HLE;
498      has_rtm = ebx & bit_RTM;
499      has_avx2 = ebx & bit_AVX2;
500      has_bmi2 = ebx & bit_BMI2;
501      has_fsgsbase = ebx & bit_FSGSBASE;
502      has_rdseed = ebx & bit_RDSEED;
503      has_adx = ebx & bit_ADX;
504      has_avx512f = ebx & bit_AVX512F;
505      has_avx512er = ebx & bit_AVX512ER;
506      has_avx512pf = ebx & bit_AVX512PF;
507      has_avx512cd = ebx & bit_AVX512CD;
508      has_sha = ebx & bit_SHA;
509      has_clflushopt = ebx & bit_CLFLUSHOPT;
510      has_clwb = ebx & bit_CLWB;
511      has_avx512dq = ebx & bit_AVX512DQ;
512      has_avx512bw = ebx & bit_AVX512BW;
513      has_avx512vl = ebx & bit_AVX512VL;
514      has_avx512ifma = ebx & bit_AVX512IFMA;
515
516      has_prefetchwt1 = ecx & bit_PREFETCHWT1;
517      has_avx512vbmi = ecx & bit_AVX512VBMI;
518      has_pku = ecx & bit_OSPKE;
519      has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
520      has_avx512vnni = ecx & bit_AVX512VNNI;
521      has_rdpid = ecx & bit_RDPID;
522      has_gfni = ecx & bit_GFNI;
523      has_vaes = ecx & bit_VAES;
524      has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
525      has_avx512bitalg = ecx & bit_AVX512BITALG;
526      has_movdiri = ecx & bit_MOVDIRI;
527      has_movdir64b = ecx & bit_MOVDIR64B;
528      has_cldemote = ecx & bit_CLDEMOTE;
529
530      has_avx5124vnniw = edx & bit_AVX5124VNNIW;
531      has_avx5124fmaps = edx & bit_AVX5124FMAPS;
532
533      has_shstk = ecx & bit_SHSTK;
534      has_pconfig = edx & bit_PCONFIG;
535      has_waitpkg = ecx & bit_WAITPKG;
536    }
537
538  if (max_level >= 13)
539    {
540      __cpuid_count (13, 1, eax, ebx, ecx, edx);
541
542      has_xsaveopt = eax & bit_XSAVEOPT;
543      has_xsavec = eax & bit_XSAVEC;
544      has_xsaves = eax & bit_XSAVES;
545    }
546
547  if (max_level >= 0x14)
548    {
549      __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
550
551      has_ptwrite = ebx & bit_PTWRITE;
552    }
553
554  /* Check cpuid level of extended features.  */
555  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
556
557  if (ext_level >= 0x80000001)
558    {
559      __cpuid (0x80000001, eax, ebx, ecx, edx);
560
561      has_lahf_lm = ecx & bit_LAHF_LM;
562      has_sse4a = ecx & bit_SSE4a;
563      has_abm = ecx & bit_ABM;
564      has_lwp = ecx & bit_LWP;
565      has_fma4 = ecx & bit_FMA4;
566      has_xop = ecx & bit_XOP;
567      has_tbm = ecx & bit_TBM;
568      has_lzcnt = ecx & bit_LZCNT;
569      has_prfchw = ecx & bit_PRFCHW;
570
571      has_longmode = edx & bit_LM;
572      has_3dnowp = edx & bit_3DNOWP;
573      has_3dnow = edx & bit_3DNOW;
574      has_mwaitx = ecx & bit_MWAITX;
575    }
576
577  if (ext_level >= 0x80000008)
578    {
579      __cpuid (0x80000008, eax, ebx, ecx, edx);
580      has_clzero = ebx & bit_CLZERO;
581      has_wbnoinvd = ebx & bit_WBNOINVD;
582    }
583
584  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
585#define XCR_XFEATURE_ENABLED_MASK	0x0
586#define XSTATE_FP			0x1
587#define XSTATE_SSE			0x2
588#define XSTATE_YMM			0x4
589#define XSTATE_OPMASK			0x20
590#define XSTATE_ZMM			0x40
591#define XSTATE_HI_ZMM			0x80
592
593#define XCR_AVX_ENABLED_MASK \
594  (XSTATE_SSE | XSTATE_YMM)
595#define XCR_AVX512F_ENABLED_MASK \
596  (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
597
598  if (has_osxsave)
599    asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
600	 : "=a" (eax), "=d" (edx)
601	 : "c" (XCR_XFEATURE_ENABLED_MASK));
602  else
603    eax = 0;
604
605  /* Check if AVX registers are supported.  */
606  if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
607    {
608      has_avx = 0;
609      has_avx2 = 0;
610      has_fma = 0;
611      has_fma4 = 0;
612      has_f16c = 0;
613      has_xop = 0;
614      has_xsave = 0;
615      has_xsaveopt = 0;
616      has_xsaves = 0;
617      has_xsavec = 0;
618    }
619
620  /* Check if AVX512F registers are supported.  */
621  if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
622    {
623      has_avx512f = 0;
624      has_avx512er = 0;
625      has_avx512pf = 0;
626      has_avx512cd = 0;
627      has_avx512dq = 0;
628      has_avx512bw = 0;
629      has_avx512vl = 0;
630    }
631
632  if (!arch)
633    {
634      if (vendor == signature_AMD_ebx
635	  || vendor == signature_CENTAUR_ebx
636	  || vendor == signature_CYRIX_ebx
637	  || vendor == signature_NSC_ebx)
638	cache = detect_caches_amd (ext_level);
639      else if (vendor == signature_INTEL_ebx)
640	{
641	  bool xeon_mp = (family == 15 && model == 6);
642	  cache = detect_caches_intel (xeon_mp, max_level,
643				       ext_level, &l2sizekb);
644	}
645    }
646
647  if (vendor == signature_AMD_ebx)
648    {
649      unsigned int name;
650
651      /* Detect geode processor by its processor signature.  */
652      if (ext_level >= 0x80000002)
653	__cpuid (0x80000002, name, ebx, ecx, edx);
654      else
655	name = 0;
656
657      if (name == signature_NSC_ebx)
658	processor = PROCESSOR_GEODE;
659      else if (has_movbe && family == 22)
660	processor = PROCESSOR_BTVER2;
661      else if (has_clwb)
662	processor = PROCESSOR_ZNVER2;
663      else if (has_clzero)
664	processor = PROCESSOR_ZNVER1;
665      else if (has_avx2)
666        processor = PROCESSOR_BDVER4;
667      else if (has_xsaveopt)
668        processor = PROCESSOR_BDVER3;
669      else if (has_bmi)
670        processor = PROCESSOR_BDVER2;
671      else if (has_xop)
672	processor = PROCESSOR_BDVER1;
673      else if (has_sse4a && has_ssse3)
674        processor = PROCESSOR_BTVER1;
675      else if (has_sse4a)
676	processor = PROCESSOR_AMDFAM10;
677      else if (has_sse2 || has_longmode)
678	processor = PROCESSOR_K8;
679      else if (has_3dnowp && family == 6)
680	processor = PROCESSOR_ATHLON;
681      else if (has_mmx)
682	processor = PROCESSOR_K6;
683      else
684	processor = PROCESSOR_PENTIUM;
685    }
686  else if (vendor == signature_CENTAUR_ebx)
687    {
688      processor = PROCESSOR_GENERIC;
689
690      switch (family)
691	{
692	default:
693	  /* We have no idea.  */
694	  break;
695
696	case 5:
697	  if (has_3dnow || has_mmx)
698	    processor = PROCESSOR_I486;
699	  break;
700
701	case 6:
702	  if (has_longmode)
703	    processor = PROCESSOR_K8;
704	  else if (model >= 9)
705	    processor = PROCESSOR_PENTIUMPRO;
706	  else if (model >= 6)
707	    processor = PROCESSOR_I486;
708	}
709    }
710  else
711    {
712      switch (family)
713	{
714	case 4:
715	  processor = PROCESSOR_I486;
716	  break;
717	case 5:
718	  processor = PROCESSOR_PENTIUM;
719	  break;
720	case 6:
721	  processor = PROCESSOR_PENTIUMPRO;
722	  break;
723	case 15:
724	  processor = PROCESSOR_PENTIUM4;
725	  break;
726	default:
727	  /* We have no idea.  */
728	  processor = PROCESSOR_GENERIC;
729	}
730    }
731
732  switch (processor)
733    {
734    case PROCESSOR_I386:
735      /* Default.  */
736      break;
737    case PROCESSOR_I486:
738      if (arch && vendor == signature_CENTAUR_ebx)
739	{
740	  if (model >= 6)
741	    cpu = "c3";
742	  else if (has_3dnow)
743	    cpu = "winchip2";
744	  else
745	    /* Assume WinChip C6.  */
746	    cpu = "winchip-c6";
747	}
748      else
749	cpu = "i486";
750      break;
751    case PROCESSOR_PENTIUM:
752      if (arch && has_mmx)
753	cpu = "pentium-mmx";
754      else
755	cpu = "pentium";
756      break;
757    case PROCESSOR_PENTIUMPRO:
758      switch (model)
759	{
760	case 0x1c:
761	case 0x26:
762	  /* Bonnell.  */
763	  cpu = "bonnell";
764	  break;
765	case 0x37:
766	case 0x4a:
767	case 0x4d:
768	case 0x5a:
769	case 0x5d:
770	  /* Silvermont.  */
771	  cpu = "silvermont";
772	  break;
773	case 0x5c:
774	case 0x5f:
775	  /* Goldmont.  */
776	  cpu = "goldmont";
777	  break;
778	case 0x7a:
779	  /* Goldmont Plus.  */
780	  cpu = "goldmont-plus";
781	  break;
782	case 0x0f:
783	  /* Merom.  */
784	case 0x17:
785	case 0x1d:
786	  /* Penryn.  */
787	  cpu = "core2";
788	  break;
789	case 0x1a:
790	case 0x1e:
791	case 0x1f:
792	case 0x2e:
793	  /* Nehalem.  */
794	  cpu = "nehalem";
795	  break;
796	case 0x25:
797	case 0x2c:
798	case 0x2f:
799	  /* Westmere.  */
800	  cpu = "westmere";
801	  break;
802	case 0x2a:
803	case 0x2d:
804	  /* Sandy Bridge.  */
805	  cpu = "sandybridge";
806	  break;
807	case 0x3a:
808	case 0x3e:
809	  /* Ivy Bridge.  */
810	  cpu = "ivybridge";
811	  break;
812	case 0x3c:
813	case 0x3f:
814	case 0x45:
815	case 0x46:
816	  /* Haswell.  */
817	  cpu = "haswell";
818	  break;
819	case 0x3d:
820	case 0x47:
821	case 0x4f:
822	case 0x56:
823	  /* Broadwell.  */
824	  cpu = "broadwell";
825	  break;
826	case 0x4e:
827	case 0x5e:
828	  /* Skylake.  */
829	case 0x8e:
830	case 0x9e:
831	  /* Kaby Lake.  */
832	  cpu = "skylake";
833	  break;
834	case 0x55:
835	  if (has_avx512vnni)
836	    /* Cascade Lake.  */
837	    cpu = "cascadelake";
838	  else
839	    /* Skylake with AVX-512.  */
840	    cpu = "skylake-avx512";
841	  break;
842	case 0x57:
843	  /* Knights Landing.  */
844	  cpu = "knl";
845	  break;
846	case 0x66:
847	  /* Cannon Lake.  */
848	  cpu = "cannonlake";
849	  break;
850	case 0x85:
851	  /* Knights Mill.  */
852	  cpu = "knm";
853	  break;
854	default:
855	  if (arch)
856	    {
857	      /* This is unknown family 0x6 CPU.  */
858	      /* Assume Ice Lake Server.  */
859	      if (has_wbnoinvd)
860		cpu = "icelake-server";
861	      /* Assume Ice Lake.  */
862	      else if (has_gfni)
863		cpu = "icelake-client";
864	      /* Assume Cannon Lake.  */
865	      else if (has_avx512vbmi)
866		cpu = "cannonlake";
867	      /* Assume Knights Mill.  */
868	      else if (has_avx5124vnniw)
869		cpu = "knm";
870	      /* Assume Knights Landing.  */
871	      else if (has_avx512er)
872		cpu = "knl";
873	      /* Assume Skylake with AVX-512.  */
874	      else if (has_avx512f)
875		cpu = "skylake-avx512";
876	      /* Assume Skylake.  */
877	      else if (has_clflushopt)
878		cpu = "skylake";
879	      /* Assume Broadwell.  */
880	      else if (has_adx)
881		cpu = "broadwell";
882	      else if (has_avx2)
883		/* Assume Haswell.  */
884		cpu = "haswell";
885	      else if (has_avx)
886		/* Assume Sandy Bridge.  */
887		cpu = "sandybridge";
888	      else if (has_sse4_2)
889		{
890		  if (has_gfni)
891		    /* Assume Tremont.  */
892		    cpu = "tremont";
893		  else if (has_sgx)
894		    /* Assume Goldmont Plus.  */
895		    cpu = "goldmont-plus";
896		  else if (has_xsave)
897		    /* Assume Goldmont.  */
898		    cpu = "goldmont";
899		  else if (has_movbe)
900		    /* Assume Silvermont.  */
901		    cpu = "silvermont";
902		  else
903		    /* Assume Nehalem.  */
904		    cpu = "nehalem";
905		}
906	      else if (has_ssse3)
907		{
908		  if (has_movbe)
909		    /* Assume Bonnell.  */
910		    cpu = "bonnell";
911		  else
912		    /* Assume Core 2.  */
913		    cpu = "core2";
914		}
915	      else if (has_longmode)
916		/* Perhaps some emulator?  Assume x86-64, otherwise gcc
917		   -march=native would be unusable for 64-bit compilations,
918		   as all the CPUs below are 32-bit only.  */
919		cpu = "x86-64";
920	      else if (has_sse3)
921		{
922		  if (vendor == signature_CENTAUR_ebx)
923		    /* C7 / Eden "Esther" */
924		    cpu = "c7";
925		  else
926		    /* It is Core Duo.  */
927		    cpu = "pentium-m";
928		}
929	      else if (has_sse2)
930		/* It is Pentium M.  */
931		cpu = "pentium-m";
932	      else if (has_sse)
933		{
934		  if (vendor == signature_CENTAUR_ebx)
935		    {
936		      if (model >= 9)
937			/* Eden "Nehemiah" */
938			cpu = "nehemiah";
939		      else
940			cpu = "c3-2";
941		    }
942		  else
943		    /* It is Pentium III.  */
944		    cpu = "pentium3";
945		}
946	      else if (has_mmx)
947		/* It is Pentium II.  */
948		cpu = "pentium2";
949	      else
950		/* Default to Pentium Pro.  */
951		cpu = "pentiumpro";
952	    }
953	  else
954	    /* For -mtune, we default to -mtune=generic.  */
955	    cpu = "generic";
956	  break;
957	}
958      break;
959    case PROCESSOR_PENTIUM4:
960      if (has_sse3)
961	{
962	  if (has_longmode)
963	    cpu = "nocona";
964	  else
965	    cpu = "prescott";
966	}
967      else
968	cpu = "pentium4";
969      break;
970    case PROCESSOR_GEODE:
971      cpu = "geode";
972      break;
973    case PROCESSOR_K6:
974      if (arch && has_3dnow)
975	cpu = "k6-3";
976      else
977	cpu = "k6";
978      break;
979    case PROCESSOR_ATHLON:
980      if (arch && has_sse)
981	cpu = "athlon-4";
982      else
983	cpu = "athlon";
984      break;
985    case PROCESSOR_K8:
986      if (arch)
987	{
988	  if (vendor == signature_CENTAUR_ebx)
989	    {
990	      if (has_sse4_1)
991		/* Nano 3000 | Nano dual / quad core | Eden X4 */
992		cpu = "nano-3000";
993	      else if (has_ssse3)
994		/* Nano 1000 | Nano 2000 */
995		cpu = "nano";
996	      else if (has_sse3)
997		/* Eden X2 */
998		cpu = "eden-x2";
999	      else
1000		/* Default to k8 */
1001		cpu = "k8";
1002	    }
1003	  else if (has_sse3)
1004	    cpu = "k8-sse3";
1005	  else
1006	    cpu = "k8";
1007	}
1008      else
1009	/* For -mtune, we default to -mtune=k8 */
1010	cpu = "k8";
1011      break;
1012    case PROCESSOR_AMDFAM10:
1013      cpu = "amdfam10";
1014      break;
1015    case PROCESSOR_BDVER1:
1016      cpu = "bdver1";
1017      break;
1018    case PROCESSOR_BDVER2:
1019      cpu = "bdver2";
1020      break;
1021    case PROCESSOR_BDVER3:
1022      cpu = "bdver3";
1023      break;
1024    case PROCESSOR_BDVER4:
1025      cpu = "bdver4";
1026      break;
1027    case PROCESSOR_ZNVER1:
1028      cpu = "znver1";
1029      break;
1030    case PROCESSOR_ZNVER2:
1031      cpu = "znver2";
1032      break;
1033    case PROCESSOR_BTVER1:
1034      cpu = "btver1";
1035      break;
1036    case PROCESSOR_BTVER2:
1037      cpu = "btver2";
1038      break;
1039
1040    default:
1041      /* Use something reasonable.  */
1042      if (arch)
1043	{
1044	  if (has_ssse3)
1045	    cpu = "core2";
1046	  else if (has_sse3)
1047	    {
1048	      if (has_longmode)
1049		cpu = "nocona";
1050	      else
1051		cpu = "prescott";
1052	    }
1053	  else if (has_longmode)
1054	    /* Perhaps some emulator?  Assume x86-64, otherwise gcc
1055	       -march=native would be unusable for 64-bit compilations,
1056	       as all the CPUs below are 32-bit only.  */
1057	    cpu = "x86-64";
1058	  else if (has_sse2)
1059	    cpu = "pentium4";
1060	  else if (has_cmov)
1061	    cpu = "pentiumpro";
1062	  else if (has_mmx)
1063	    cpu = "pentium-mmx";
1064	  else if (has_cmpxchg8b)
1065	    cpu = "pentium";
1066	}
1067      else
1068	cpu = "generic";
1069    }
1070
1071  if (arch)
1072    {
1073      const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1074      const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1075      const char *sse = has_sse ? " -msse" : " -mno-sse";
1076      const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1077      const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1078      const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1079      const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1080      const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1081      const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1082      const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1083      const char *aes = has_aes ? " -maes" : " -mno-aes";
1084      const char *sha = has_sha ? " -msha" : " -mno-sha";
1085      const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1086      const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1087      const char *abm = has_abm ? " -mabm" : " -mno-abm";
1088      const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1089      const char *fma = has_fma ? " -mfma" : " -mno-fma";
1090      const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1091      const char *xop = has_xop ? " -mxop" : " -mno-xop";
1092      const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1093      const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1094      const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1095      const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1096      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1097      const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1098      const char *avx = has_avx ? " -mavx" : " -mno-avx";
1099      const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1100      const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1101      const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1102      const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1103      const char *hle = has_hle ? " -mhle" : " -mno-hle";
1104      const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1105      const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1106      const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1107      const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1108      const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1109      const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1110      const char *adx = has_adx ? " -madx" : " -mno-adx";
1111      const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1112      const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1113      const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1114      const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1115      const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1116      const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1117      const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1118      const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1119      const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1120      const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1121      const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1122      const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1123      const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1124      const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1125      const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1126      const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1127      const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1128      const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1129      const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1130      const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1131      const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1132      const char *mwaitx  = has_mwaitx  ? " -mmwaitx"  : " -mno-mwaitx";
1133      const char *clzero  = has_clzero  ? " -mclzero"  : " -mno-clzero";
1134      const char *pku = has_pku ? " -mpku" : " -mno-pku";
1135      const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1136      const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1137      const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1138      const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1139      const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1140      const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1141      const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1142      const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1143      const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
1144      const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
1145      const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1146
1147      options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1148			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1149			popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1150			pconfig, wbnoinvd,
1151			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1152			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1153			fxsr, xsave, xsaveopt, avx512f, avx512er,
1154			avx512cd, avx512pf, prefetchwt1, clflushopt,
1155			xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1156			avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1157			clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1158			avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1159			avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
1160			ptwrite,
1161			NULL);
1162    }
1163
1164done:
1165  return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1166}
1167#else
1168
1169/* If we are compiling with GCC where %EBX register is fixed, then the
1170   driver will just ignore -march and -mtune "native" target and will leave
1171   to the newly built compiler to generate code for its default target.  */
1172
1173const char *host_detect_local_cpu (int, const char **)
1174{
1175  return NULL;
1176}
1177#endif /* __GNUC__ */
1178