driver-i386.c revision 1.5
1294464Sdes/* Subroutines for the gcc driver.
2180750Sdes   Copyright (C) 2006-2015 Free Software Foundation, Inc.
3180750Sdes
4180750SdesThis file is part of GCC.
5180750Sdes
6180750SdesGCC is free software; you can redistribute it and/or modify
7180750Sdesit under the terms of the GNU General Public License as published by
8180750Sdesthe Free Software Foundation; either version 3, or (at your option)
9180750Sdesany later version.
10180750Sdes
11180750SdesGCC is distributed in the hope that it will be useful,
12180750Sdesbut WITHOUT ANY WARRANTY; without even the implied warranty of
13180750SdesMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14180750SdesGNU General Public License for more details.
15180750Sdes
16180750SdesYou should have received a copy of the GNU General Public License
17180750Sdesalong with GCC; see the file COPYING3.  If not see
18180750Sdes<http://www.gnu.org/licenses/>.  */
19180750Sdes
20180750Sdes#include "config.h"
21180750Sdes#include "system.h"
22180750Sdes#include "coretypes.h"
23180750Sdes#include "tm.h"
24180750Sdes
25180750Sdesconst char *host_detect_local_cpu (int argc, const char **argv);
26180750Sdes
27180750Sdes#if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28180750Sdes#include "cpuid.h"
29180750Sdes
30180750Sdesstruct cache_desc
31180750Sdes{
32180750Sdes  unsigned sizekb;
33180750Sdes  unsigned assoc;
34180750Sdes  unsigned line;
35180750Sdes};
36180750Sdes
37180750Sdes/* Returns command line parameters that describe size and
38180750Sdes   cache line size of the processor caches.  */
39180750Sdes
40180750Sdesstatic char *
41180750Sdesdescribe_cache (struct cache_desc level1, struct cache_desc level2)
42180750Sdes{
43180750Sdes  char size[100], line[100], size2[100];
44180750Sdes
45180750Sdes  /* At the moment, gcc does not use the information
46180750Sdes     about the associativity of the cache.  */
47180750Sdes
48180750Sdes  snprintf (size, sizeof (size),
49180750Sdes	    "--param l1-cache-size=%u ", level1.sizekb);
50180750Sdes  snprintf (line, sizeof (line),
51180750Sdes	    "--param l1-cache-line-size=%u ", level1.line);
52180750Sdes
53180750Sdes  snprintf (size2, sizeof (size2),
54180750Sdes	    "--param l2-cache-size=%u ", level2.sizekb);
55180750Sdes
56180750Sdes  return concat (size, line, size2, NULL);
57180750Sdes}
58180750Sdes
59180750Sdes/* Detect L2 cache parameters using CPUID extended function 0x80000006.  */
60180750Sdes
61180750Sdesstatic void
62180750Sdesdetect_l2_cache (struct cache_desc *level2)
63180750Sdes{
64180750Sdes  unsigned eax, ebx, ecx, edx;
65180750Sdes  unsigned assoc;
66180750Sdes
67180750Sdes  __cpuid (0x80000006, eax, ebx, ecx, edx);
68180750Sdes
69180750Sdes  level2->sizekb = (ecx >> 16) & 0xffff;
70180750Sdes  level2->line = ecx & 0xff;
71180750Sdes
72180750Sdes  assoc = (ecx >> 12) & 0xf;
73180750Sdes  if (assoc == 6)
74180750Sdes    assoc = 8;
75180750Sdes  else if (assoc == 8)
76180750Sdes    assoc = 16;
77180750Sdes  else if (assoc >= 0xa && assoc <= 0xc)
78180750Sdes    assoc = 32 + (assoc - 0xa) * 16;
79180750Sdes  else if (assoc >= 0xd && assoc <= 0xe)
80180750Sdes    assoc = 96 + (assoc - 0xd) * 32;
81180750Sdes
82180750Sdes  level2->assoc = assoc;
83180750Sdes}
84180750Sdes
85180750Sdes/* Returns the description of caches for an AMD processor.  */
86180750Sdes
87180750Sdesstatic const char *
88180750Sdesdetect_caches_amd (unsigned max_ext_level)
89180750Sdes{
90261320Sdes  unsigned eax, ebx, ecx, edx;
91180750Sdes
92180750Sdes  struct cache_desc level1, level2 = {0, 0, 0};
93180750Sdes
94180750Sdes  if (max_ext_level < 0x80000005)
95180750Sdes    return "";
96261320Sdes
97180750Sdes  __cpuid (0x80000005, eax, ebx, ecx, edx);
98180750Sdes
99180750Sdes  level1.sizekb = (ecx >> 24) & 0xff;
100192595Sdes  level1.assoc = (ecx >> 16) & 0xff;
101180750Sdes  level1.line = ecx & 0xff;
102192595Sdes
103180750Sdes  if (max_ext_level >= 0x80000006)
104180750Sdes    detect_l2_cache (&level2);
105180750Sdes
106180750Sdes  return describe_cache (level1, level2);
107180750Sdes}
108180750Sdes
109180750Sdes/* Decodes the size, the associativity and the cache line size of
110180750Sdes   L1/L2 caches of an Intel processor.  Values are based on
111180750Sdes   "Intel Processor Identification and the CPUID Instruction"
112180750Sdes   [Application Note 485], revision -032, December 2007.  */
113180750Sdes
114180750Sdesstatic void
115180750Sdesdecode_caches_intel (unsigned reg, bool xeon_mp,
116180750Sdes		     struct cache_desc *level1, struct cache_desc *level2)
117180750Sdes{
118180750Sdes  int i;
119180750Sdes
120180750Sdes  for (i = 24; i >= 0; i -= 8)
121180750Sdes    switch ((reg >> i) & 0xff)
122180750Sdes      {
123180750Sdes      case 0x0a:
124180750Sdes	level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125180750Sdes	break;
126180750Sdes      case 0x0c:
127180750Sdes	level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128204917Sdes	break;
129204917Sdes      case 0x0d:
130180750Sdes	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131180750Sdes	break;
132180750Sdes      case 0x0e:
133180750Sdes	level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134180750Sdes	break;
135180750Sdes      case 0x21:
136180750Sdes	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137180750Sdes	break;
138180750Sdes      case 0x24:
139180750Sdes	level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140180750Sdes	break;
141180750Sdes      case 0x2c:
142180750Sdes	level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143180750Sdes	break;
144180750Sdes      case 0x39:
145180750Sdes	level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146180750Sdes	break;
147180750Sdes      case 0x3a:
148180750Sdes	level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149180750Sdes	break;
150180750Sdes      case 0x3b:
151180750Sdes	level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152180750Sdes	break;
153180750Sdes      case 0x3c:
154180750Sdes	level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155180750Sdes	break;
156180750Sdes      case 0x3d:
157180750Sdes	level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158180750Sdes	break;
159180750Sdes      case 0x3e:
160180750Sdes	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161180750Sdes	break;
162180750Sdes      case 0x41:
163180750Sdes	level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164180750Sdes	break;
165180750Sdes      case 0x42:
166180750Sdes	level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167180750Sdes	break;
168180750Sdes      case 0x43:
169180750Sdes	level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170180750Sdes	break;
171180750Sdes      case 0x44:
172180750Sdes	level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173180750Sdes	break;
174180750Sdes      case 0x45:
175180750Sdes	level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176180750Sdes	break;
177180750Sdes      case 0x48:
178180750Sdes	level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179180750Sdes	break;
180180750Sdes      case 0x49:
181180750Sdes	if (xeon_mp)
182180750Sdes	  break;
183180750Sdes	level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184180750Sdes	break;
185180750Sdes      case 0x4e:
186180750Sdes	level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187180750Sdes	break;
188180750Sdes      case 0x60:
189180750Sdes	level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190180750Sdes	break;
191180750Sdes      case 0x66:
192180750Sdes	level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193180750Sdes	break;
194180750Sdes      case 0x67:
195180750Sdes	level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196180750Sdes	break;
197180750Sdes      case 0x68:
198180750Sdes	level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199180750Sdes	break;
200180750Sdes      case 0x78:
201180750Sdes	level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202180750Sdes	break;
203180750Sdes      case 0x79:
204180750Sdes	level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205180750Sdes	break;
206180750Sdes      case 0x7a:
207180750Sdes	level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208180750Sdes	break;
209180750Sdes      case 0x7b:
210180750Sdes	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211180750Sdes	break;
212180750Sdes      case 0x7c:
213180750Sdes	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214180750Sdes	break;
215180750Sdes      case 0x7d:
216180750Sdes	level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217180750Sdes	break;
218180750Sdes      case 0x7f:
219180750Sdes	level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220180750Sdes	break;
221180750Sdes      case 0x80:
222180750Sdes	level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223180750Sdes	break;
224180750Sdes      case 0x82:
225180750Sdes	level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226180750Sdes	break;
227180750Sdes      case 0x83:
228180750Sdes	level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229180750Sdes	break;
230180750Sdes      case 0x84:
231180750Sdes	level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232180750Sdes	break;
233180750Sdes      case 0x85:
234180750Sdes	level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235180750Sdes	break;
236180750Sdes      case 0x86:
237180750Sdes	level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238180750Sdes	break;
239180750Sdes      case 0x87:
240180750Sdes	level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
241180750Sdes
242180750Sdes      default:
243180750Sdes	break;
244180750Sdes      }
245180750Sdes}
246180750Sdes
247180750Sdes/* Detect cache parameters using CPUID function 2.  */
248180750Sdes
249180750Sdesstatic void
250180750Sdesdetect_caches_cpuid2 (bool xeon_mp,
251180750Sdes		      struct cache_desc *level1, struct cache_desc *level2)
252180750Sdes{
253180750Sdes  unsigned regs[4];
254180750Sdes  int nreps, i;
255180750Sdes
256180750Sdes  __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
257180750Sdes
258180750Sdes  nreps = regs[0] & 0x0f;
259180750Sdes  regs[0] &= ~0x0f;
260180750Sdes
261180750Sdes  while (--nreps >= 0)
262180750Sdes    {
263180750Sdes      for (i = 0; i < 4; i++)
264180750Sdes	if (regs[i] && !((regs[i] >> 31) & 1))
265180750Sdes	  decode_caches_intel (regs[i], xeon_mp, level1, level2);
266180750Sdes
267180750Sdes      if (nreps)
268180750Sdes	__cpuid (2, regs[0], regs[1], regs[2], regs[3]);
269180750Sdes    }
270180750Sdes}
271180750Sdes
272180750Sdes/* Detect cache parameters using CPUID function 4. This
273180750Sdes   method doesn't require hardcoded tables.  */
274180750Sdes
275180750Sdesenum cache_type
276180750Sdes{
277180750Sdes  CACHE_END = 0,
278180750Sdes  CACHE_DATA = 1,
279180750Sdes  CACHE_INST = 2,
280180750Sdes  CACHE_UNIFIED = 3
281180750Sdes};
282180750Sdes
283180750Sdesstatic void
284180750Sdesdetect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285180750Sdes		      struct cache_desc *level3)
286180750Sdes{
287180750Sdes  struct cache_desc *cache;
288180750Sdes
289180750Sdes  unsigned eax, ebx, ecx, edx;
290180750Sdes  int count;
291180750Sdes
292180750Sdes  for (count = 0;; count++)
293180750Sdes    {
294180750Sdes      __cpuid_count(4, count, eax, ebx, ecx, edx);
295180750Sdes      switch (eax & 0x1f)
296180750Sdes	{
297180750Sdes	case CACHE_END:
298180750Sdes	  return;
299180750Sdes	case CACHE_DATA:
300180750Sdes	case CACHE_UNIFIED:
301180750Sdes	  {
302180750Sdes	    switch ((eax >> 5) & 0x07)
303180750Sdes	      {
304180750Sdes	      case 1:
305180750Sdes		cache = level1;
306180750Sdes		break;
307180750Sdes	      case 2:
308180750Sdes		cache = level2;
309180750Sdes		break;
310180750Sdes	      case 3:
311180750Sdes		cache = level3;
312180750Sdes		break;
313180750Sdes	      default:
314180750Sdes		cache = NULL;
315180750Sdes	      }
316180750Sdes
317180750Sdes	    if (cache)
318180750Sdes	      {
319180750Sdes		unsigned sets = ecx + 1;
320240075Sdes		unsigned part = ((ebx >> 12) & 0x03ff) + 1;
321180750Sdes
322180750Sdes		cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323180750Sdes		cache->line = (ebx & 0x0fff) + 1;
324180750Sdes
325180750Sdes		cache->sizekb = (cache->assoc * part
326180750Sdes				 * cache->line * sets) / 1024;
327180750Sdes	      }
328180750Sdes	  }
329180750Sdes	default:
330180750Sdes	  break;
331180750Sdes	}
332180750Sdes    }
333180750Sdes}
334180750Sdes
335180750Sdes/* Returns the description of caches for an Intel processor.  */
336180750Sdes
337180750Sdesstatic const char *
338180750Sdesdetect_caches_intel (bool xeon_mp, unsigned max_level,
339180750Sdes		     unsigned max_ext_level, unsigned *l2sizekb)
340180750Sdes{
341180750Sdes  struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
342180750Sdes
343180750Sdes  if (max_level >= 4)
344180750Sdes    detect_caches_cpuid4 (&level1, &level2, &level3);
345180750Sdes  else if (max_level >= 2)
346180750Sdes    detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347180750Sdes  else
348180750Sdes    return "";
349180750Sdes
350180750Sdes  if (level1.sizekb == 0)
351180750Sdes    return "";
352180750Sdes
353180750Sdes  /* Let the L3 replace the L2. This assumes inclusive caches
354180750Sdes     and single threaded program for now. */
355180750Sdes  if (level3.sizekb)
356180750Sdes    level2 = level3;
357180750Sdes
358180750Sdes  /* Intel CPUs are equipped with AMD style L2 cache info.  Try this
359180750Sdes     method if other methods fail to provide L2 cache parameters.  */
360180750Sdes  if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361180750Sdes    detect_l2_cache (&level2);
362180750Sdes
363180750Sdes  *l2sizekb = level2.sizekb;
364180750Sdes
365180750Sdes  return describe_cache (level1, level2);
366180750Sdes}
367180750Sdes
368180750Sdes/* This will be called by the spec parser in gcc.c when it sees
369180750Sdes   a %:local_cpu_detect(args) construct.  Currently it will be called
370180750Sdes   with either "arch" or "tune" as argument depending on if -march=native
371180750Sdes   or -mtune=native is to be substituted.
372180750Sdes
373180750Sdes   It returns a string containing new command line parameters to be
374180750Sdes   put at the place of the above two options, depending on what CPU
375180750Sdes   this is executed.  E.g. "-march=k8" on an AMD64 machine
376180750Sdes   for -march=native.
377180750Sdes
378180750Sdes   ARGC and ARGV are set depending on the actual arguments given
379180750Sdes   in the spec.  */
380180750Sdes
381180750Sdesconst char *host_detect_local_cpu (int argc, const char **argv)
382180750Sdes{
383180750Sdes  enum processor_type processor = PROCESSOR_I386;
384180750Sdes  const char *cpu = "i386";
385180750Sdes
386180750Sdes  const char *cache = "";
387180750Sdes  const char *options = "";
388180750Sdes
389180750Sdes  unsigned int eax, ebx, ecx, edx;
390180750Sdes
391180750Sdes  unsigned int max_level, ext_level;
392180750Sdes
393180750Sdes  unsigned int vendor;
394180750Sdes  unsigned int model, family;
395180750Sdes
396180750Sdes  unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397180750Sdes  unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
398180750Sdes
399180750Sdes  /* Extended features */
400180750Sdes  unsigned int has_lahf_lm = 0, has_sse4a = 0;
401180750Sdes  unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402180750Sdes  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403180750Sdes  unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404180750Sdes  unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405180750Sdes  unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406180750Sdes  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407180750Sdes  unsigned int has_hle = 0, has_rtm = 0;
408180750Sdes  unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409180750Sdes  unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410180750Sdes  unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411180750Sdes  unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412180750Sdes  unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413180750Sdes  unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414180750Sdes  unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415180750Sdes  unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416180750Sdes  unsigned int has_pcommit = 0, has_mwaitx = 0;
417180750Sdes
418180750Sdes  bool arch;
419180750Sdes
420180750Sdes  unsigned int l2sizekb = 0;
421180750Sdes
422255767Sdes  if (argc < 1)
423180750Sdes    return NULL;
424180750Sdes
425180750Sdes  arch = !strcmp (argv[0], "arch");
426204917Sdes
427204917Sdes  if (!arch && strcmp (argv[0], "tune"))
428204917Sdes    return NULL;
429204917Sdes
430204917Sdes  max_level = __get_cpuid_max (0, &vendor);
431204917Sdes  if (max_level < 1)
432204917Sdes    goto done;
433204917Sdes
434204917Sdes  __cpuid (1, eax, ebx, ecx, edx);
435204917Sdes
436204917Sdes  model = (eax >> 4) & 0x0f;
437204917Sdes  family = (eax >> 8) & 0x0f;
438204917Sdes  if (vendor == signature_INTEL_ebx
439204917Sdes      || vendor == signature_AMD_ebx)
440204917Sdes    {
441204917Sdes      unsigned int extended_model, extended_family;
442204917Sdes
443204917Sdes      extended_model = (eax >> 12) & 0xf0;
444204917Sdes      extended_family = (eax >> 20) & 0xff;
445204917Sdes      if (family == 0x0f)
446204917Sdes	{
447204917Sdes	  family += extended_family;
448204917Sdes	  model += extended_model;
449204917Sdes	}
450204917Sdes      else if (family == 0x06)
451204917Sdes	model += extended_model;
452204917Sdes    }
453204917Sdes
454204917Sdes  has_sse3 = ecx & bit_SSE3;
455204917Sdes  has_ssse3 = ecx & bit_SSSE3;
456204917Sdes  has_sse4_1 = ecx & bit_SSE4_1;
457204917Sdes  has_sse4_2 = ecx & bit_SSE4_2;
458204917Sdes  has_avx = ecx & bit_AVX;
459204917Sdes  has_osxsave = ecx & bit_OSXSAVE;
460204917Sdes  has_cmpxchg16b = ecx & bit_CMPXCHG16B;
461204917Sdes  has_movbe = ecx & bit_MOVBE;
462204917Sdes  has_popcnt = ecx & bit_POPCNT;
463204917Sdes  has_aes = ecx & bit_AES;
464204917Sdes  has_pclmul = ecx & bit_PCLMUL;
465204917Sdes  has_fma = ecx & bit_FMA;
466204917Sdes  has_f16c = ecx & bit_F16C;
467204917Sdes  has_rdrnd = ecx & bit_RDRND;
468204917Sdes  has_xsave = ecx & bit_XSAVE;
469204917Sdes
470204917Sdes  has_cmpxchg8b = edx & bit_CMPXCHG8B;
471204917Sdes  has_cmov = edx & bit_CMOV;
472204917Sdes  has_mmx = edx & bit_MMX;
473204917Sdes  has_fxsr = edx & bit_FXSAVE;
474204917Sdes  has_sse = edx & bit_SSE;
475204917Sdes  has_sse2 = edx & bit_SSE2;
476204917Sdes
477204917Sdes  if (max_level >= 7)
478204917Sdes    {
479204917Sdes      __cpuid_count (7, 0, eax, ebx, ecx, edx);
480204917Sdes
481204917Sdes      has_bmi = ebx & bit_BMI;
482204917Sdes      has_hle = ebx & bit_HLE;
483204917Sdes      has_rtm = ebx & bit_RTM;
484204917Sdes      has_avx2 = ebx & bit_AVX2;
485204917Sdes      has_bmi2 = ebx & bit_BMI2;
486204917Sdes      has_fsgsbase = ebx & bit_FSGSBASE;
487204917Sdes      has_rdseed = ebx & bit_RDSEED;
488204917Sdes      has_adx = ebx & bit_ADX;
489204917Sdes      has_avx512f = ebx & bit_AVX512F;
490204917Sdes      has_avx512er = ebx & bit_AVX512ER;
491204917Sdes      has_avx512pf = ebx & bit_AVX512PF;
492204917Sdes      has_avx512cd = ebx & bit_AVX512CD;
493204917Sdes      has_sha = ebx & bit_SHA;
494204917Sdes      has_pcommit = ebx & bit_PCOMMIT;
495204917Sdes      has_clflushopt = ebx & bit_CLFLUSHOPT;
496255767Sdes      has_clwb = ebx & bit_CLWB;
497204917Sdes      has_avx512dq = ebx & bit_AVX512DQ;
498204917Sdes      has_avx512bw = ebx & bit_AVX512BW;
499204917Sdes      has_avx512vl = ebx & bit_AVX512VL;
500      has_avx512vl = ebx & bit_AVX512IFMA;
501
502      has_prefetchwt1 = ecx & bit_PREFETCHWT1;
503      has_avx512vl = ecx & bit_AVX512VBMI;
504    }
505
506  if (max_level >= 13)
507    {
508      __cpuid_count (13, 1, eax, ebx, ecx, edx);
509
510      has_xsaveopt = eax & bit_XSAVEOPT;
511      has_xsavec = eax & bit_XSAVEC;
512      has_xsaves = eax & bit_XSAVES;
513    }
514
515  /* Check cpuid level of extended features.  */
516  __cpuid (0x80000000, ext_level, ebx, ecx, edx);
517
518  if (ext_level > 0x80000000)
519    {
520      __cpuid (0x80000001, eax, ebx, ecx, edx);
521
522      has_lahf_lm = ecx & bit_LAHF_LM;
523      has_sse4a = ecx & bit_SSE4a;
524      has_abm = ecx & bit_ABM;
525      has_lwp = ecx & bit_LWP;
526      has_fma4 = ecx & bit_FMA4;
527      has_xop = ecx & bit_XOP;
528      has_tbm = ecx & bit_TBM;
529      has_lzcnt = ecx & bit_LZCNT;
530      has_prfchw = ecx & bit_PRFCHW;
531
532      has_longmode = edx & bit_LM;
533      has_3dnowp = edx & bit_3DNOWP;
534      has_3dnow = edx & bit_3DNOW;
535      has_mwaitx = ecx & bit_MWAITX;
536    }
537
538  /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv.  */
539#define XCR_XFEATURE_ENABLED_MASK	0x0
540#define XSTATE_FP			0x1
541#define XSTATE_SSE			0x2
542#define XSTATE_YMM			0x4
543#define XSTATE_OPMASK			0x20
544#define XSTATE_ZMM			0x40
545#define XSTATE_HI_ZMM			0x80
546  if (has_osxsave)
547    asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
548	 : "=a" (eax), "=d" (edx)
549	 : "c" (XCR_XFEATURE_ENABLED_MASK));
550
551  /* Check if SSE and YMM states are supported.  */
552  if (!has_osxsave
553      || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM))
554    {
555      has_avx = 0;
556      has_avx2 = 0;
557      has_fma = 0;
558      has_fma4 = 0;
559      has_f16c = 0;
560      has_xop = 0;
561      has_xsave = 0;
562      has_xsaveopt = 0;
563      has_xsaves = 0;
564      has_xsavec = 0;
565    }
566
567  if (!has_osxsave
568      || (eax &
569	  (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
570	  != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM))
571    {
572      has_avx512f = 0;
573      has_avx512er = 0;
574      has_avx512pf = 0;
575      has_avx512cd = 0;
576      has_avx512dq = 0;
577      has_avx512bw = 0;
578      has_avx512vl = 0;
579    }
580
581  if (!arch)
582    {
583      if (vendor == signature_AMD_ebx
584	  || vendor == signature_CENTAUR_ebx
585	  || vendor == signature_CYRIX_ebx
586	  || vendor == signature_NSC_ebx)
587	cache = detect_caches_amd (ext_level);
588      else if (vendor == signature_INTEL_ebx)
589	{
590	  bool xeon_mp = (family == 15 && model == 6);
591	  cache = detect_caches_intel (xeon_mp, max_level,
592				       ext_level, &l2sizekb);
593	}
594    }
595
596  if (vendor == signature_AMD_ebx)
597    {
598      unsigned int name;
599
600      /* Detect geode processor by its processor signature.  */
601      if (ext_level > 0x80000001)
602	__cpuid (0x80000002, name, ebx, ecx, edx);
603      else
604	name = 0;
605
606      if (name == signature_NSC_ebx)
607	processor = PROCESSOR_GEODE;
608      else if (has_movbe && family == 22)
609	processor = PROCESSOR_BTVER2;
610      else if (has_avx2)
611        processor = PROCESSOR_BDVER4;
612      else if (has_xsaveopt)
613        processor = PROCESSOR_BDVER3;
614      else if (has_bmi)
615        processor = PROCESSOR_BDVER2;
616      else if (has_xop)
617	processor = PROCESSOR_BDVER1;
618      else if (has_sse4a && has_ssse3)
619        processor = PROCESSOR_BTVER1;
620      else if (has_sse4a)
621	processor = PROCESSOR_AMDFAM10;
622      else if (has_sse2 || has_longmode)
623	processor = PROCESSOR_K8;
624      else if (has_3dnowp && family == 6)
625	processor = PROCESSOR_ATHLON;
626      else if (has_mmx)
627	processor = PROCESSOR_K6;
628      else
629	processor = PROCESSOR_PENTIUM;
630    }
631  else if (vendor == signature_CENTAUR_ebx)
632    {
633      if (arch)
634	{
635	  switch (family)
636	    {
637	    case 6:
638	      if (model > 9)
639		/* Use the default detection procedure.  */
640		processor = PROCESSOR_GENERIC;
641	      else if (model == 9)
642		cpu = "c3-2";
643	      else if (model >= 6)
644		cpu = "c3";
645	      else
646		processor = PROCESSOR_GENERIC;
647	      break;
648	    case 5:
649	      if (has_3dnow)
650		cpu = "winchip2";
651	      else if (has_mmx)
652		cpu = "winchip2-c6";
653	      else
654		processor = PROCESSOR_GENERIC;
655	      break;
656	    default:
657	      /* We have no idea.  */
658	      processor = PROCESSOR_GENERIC;
659	    }
660	}
661    }
662  else
663    {
664      switch (family)
665	{
666	case 4:
667	  processor = PROCESSOR_I486;
668	  break;
669	case 5:
670	  processor = PROCESSOR_PENTIUM;
671	  break;
672	case 6:
673	  processor = PROCESSOR_PENTIUMPRO;
674	  break;
675	case 15:
676	  processor = PROCESSOR_PENTIUM4;
677	  break;
678	default:
679	  /* We have no idea.  */
680	  processor = PROCESSOR_GENERIC;
681	}
682    }
683
684  switch (processor)
685    {
686    case PROCESSOR_I386:
687      /* Default.  */
688      break;
689    case PROCESSOR_I486:
690      cpu = "i486";
691      break;
692    case PROCESSOR_PENTIUM:
693      if (arch && has_mmx)
694	cpu = "pentium-mmx";
695      else
696	cpu = "pentium";
697      break;
698    case PROCESSOR_PENTIUMPRO:
699      switch (model)
700	{
701	case 0x1c:
702	case 0x26:
703	  /* Bonnell.  */
704	  cpu = "bonnell";
705	  break;
706	case 0x37:
707	case 0x4a:
708	case 0x4d:
709	case 0x5a:
710	case 0x5d:
711	  /* Silvermont.  */
712	  cpu = "silvermont";
713	  break;
714	case 0x0f:
715	  /* Merom.  */
716	case 0x17:
717	case 0x1d:
718	  /* Penryn.  */
719	  cpu = "core2";
720	  break;
721	case 0x1a:
722	case 0x1e:
723	case 0x1f:
724	case 0x2e:
725	  /* Nehalem.  */
726	  cpu = "nehalem";
727	  break;
728	case 0x25:
729	case 0x2c:
730	case 0x2f:
731	  /* Westmere.  */
732	  cpu = "westmere";
733	  break;
734	case 0x2a:
735	case 0x2d:
736	  /* Sandy Bridge.  */
737	  cpu = "sandybridge";
738	  break;
739	case 0x3a:
740	case 0x3e:
741	  /* Ivy Bridge.  */
742	  cpu = "ivybridge";
743	  break;
744	case 0x3c:
745	case 0x3f:
746	case 0x45:
747	case 0x46:
748	  /* Haswell.  */
749	  cpu = "haswell";
750	  break;
751	case 0x3d:
752	case 0x4f:
753	case 0x56:
754	  /* Broadwell.  */
755	  cpu = "broadwell";
756	  break;
757	case 0x57:
758	  /* Knights Landing.  */
759	  cpu = "knl";
760	  break;
761	default:
762	  if (arch)
763	    {
764	      /* This is unknown family 0x6 CPU.  */
765	      /* Assume Knights Landing.  */
766	      if (has_avx512f)
767		cpu = "knl";
768	      /* Assume Broadwell.  */
769	      else if (has_adx)
770		cpu = "broadwell";
771	      else if (has_avx2)
772		/* Assume Haswell.  */
773		cpu = "haswell";
774	      else if (has_avx)
775		/* Assume Sandy Bridge.  */
776		cpu = "sandybridge";
777	      else if (has_sse4_2)
778		{
779		  if (has_movbe)
780		    /* Assume Silvermont.  */
781		    cpu = "silvermont";
782		  else
783		    /* Assume Nehalem.  */
784		    cpu = "nehalem";
785		}
786	      else if (has_ssse3)
787		{
788		  if (has_movbe)
789		    /* Assume Bonnell.  */
790		    cpu = "bonnell";
791		  else
792		    /* Assume Core 2.  */
793		    cpu = "core2";
794		}
795	      else if (has_longmode)
796		/* Perhaps some emulator?  Assume x86-64, otherwise gcc
797		   -march=native would be unusable for 64-bit compilations,
798		   as all the CPUs below are 32-bit only.  */
799		cpu = "x86-64";
800	      else if (has_sse3)
801		/* It is Core Duo.  */
802		cpu = "pentium-m";
803	      else if (has_sse2)
804		/* It is Pentium M.  */
805		cpu = "pentium-m";
806	      else if (has_sse)
807		/* It is Pentium III.  */
808		cpu = "pentium3";
809	      else if (has_mmx)
810		/* It is Pentium II.  */
811		cpu = "pentium2";
812	      else
813		/* Default to Pentium Pro.  */
814		cpu = "pentiumpro";
815	    }
816	  else
817	    /* For -mtune, we default to -mtune=generic.  */
818	    cpu = "generic";
819	  break;
820	}
821      break;
822    case PROCESSOR_PENTIUM4:
823      if (has_sse3)
824	{
825	  if (has_longmode)
826	    cpu = "nocona";
827	  else
828	    cpu = "prescott";
829	}
830      else
831	cpu = "pentium4";
832      break;
833    case PROCESSOR_GEODE:
834      cpu = "geode";
835      break;
836    case PROCESSOR_K6:
837      if (arch && has_3dnow)
838	cpu = "k6-3";
839      else
840	cpu = "k6";
841      break;
842    case PROCESSOR_ATHLON:
843      if (arch && has_sse)
844	cpu = "athlon-4";
845      else
846	cpu = "athlon";
847      break;
848    case PROCESSOR_K8:
849      if (arch && has_sse3)
850	cpu = "k8-sse3";
851      else
852	cpu = "k8";
853      break;
854    case PROCESSOR_AMDFAM10:
855      cpu = "amdfam10";
856      break;
857    case PROCESSOR_BDVER1:
858      cpu = "bdver1";
859      break;
860    case PROCESSOR_BDVER2:
861      cpu = "bdver2";
862      break;
863    case PROCESSOR_BDVER3:
864      cpu = "bdver3";
865      break;
866    case PROCESSOR_BDVER4:
867      cpu = "bdver4";
868      break;
869    case PROCESSOR_BTVER1:
870      cpu = "btver1";
871      break;
872    case PROCESSOR_BTVER2:
873      cpu = "btver2";
874      break;
875
876    default:
877      /* Use something reasonable.  */
878      if (arch)
879	{
880	  if (has_ssse3)
881	    cpu = "core2";
882	  else if (has_sse3)
883	    {
884	      if (has_longmode)
885		cpu = "nocona";
886	      else
887		cpu = "prescott";
888	    }
889	  else if (has_sse2)
890	    cpu = "pentium4";
891	  else if (has_cmov)
892	    cpu = "pentiumpro";
893	  else if (has_mmx)
894	    cpu = "pentium-mmx";
895	  else if (has_cmpxchg8b)
896	    cpu = "pentium";
897	}
898      else
899	cpu = "generic";
900    }
901
902  if (arch)
903    {
904      const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
905      const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
906      const char *sse = has_sse ? " -msse" : " -mno-sse";
907      const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
908      const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
909      const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
910      const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
911      const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
912      const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
913      const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
914      const char *aes = has_aes ? " -maes" : " -mno-aes";
915      const char *sha = has_sha ? " -msha" : " -mno-sha";
916      const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
917      const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
918      const char *abm = has_abm ? " -mabm" : " -mno-abm";
919      const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
920      const char *fma = has_fma ? " -mfma" : " -mno-fma";
921      const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
922      const char *xop = has_xop ? " -mxop" : " -mno-xop";
923      const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
924      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
925      const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
926      const char *avx = has_avx ? " -mavx" : " -mno-avx";
927      const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
928      const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
929      const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
930      const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
931      const char *hle = has_hle ? " -mhle" : " -mno-hle";
932      const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
933      const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
934      const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
935      const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
936      const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
937      const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
938      const char *adx = has_adx ? " -madx" : " -mno-adx";
939      const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
940      const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
941      const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
942      const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
943      const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
944      const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
945      const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
946      const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
947      const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
948      const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
949      const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
950      const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
951      const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
952      const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
953      const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
954      const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
955      const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
956      const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit";
957      const char *mwaitx  = has_mwaitx  ? " -mmwaitx"  : " -mno-mwaitx";
958
959      options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
960			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
961			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
962			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
963			hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
964			fxsr, xsave, xsaveopt, avx512f, avx512er,
965			avx512cd, avx512pf, prefetchwt1, clflushopt,
966			xsavec, xsaves, avx512dq, avx512bw, avx512vl,
967			avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL);
968    }
969
970done:
971  return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
972}
973#else
974
975/* If we are compiling with GCC where %EBX register is fixed, then the
976   driver will just ignore -march and -mtune "native" target and will leave
977   to the newly built compiler to generate code for its default target.  */
978
979const char *host_detect_local_cpu (int, const char **)
980{
981  return NULL;
982}
983#endif /* __GNUC__ */
984