1/* x86 fat binary initializers.
2
3   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5   COMPLETELY IN FUTURE GNU MP RELEASES.
6
7Copyright 2003, 2004, 2011-2013, 2015, 2017, 2018 Free Software Foundation,
8Inc.
9
10This file is part of the GNU MP Library.
11
12The GNU MP Library is free software; you can redistribute it and/or modify
13it under the terms of either:
14
15  * the GNU Lesser General Public License as published by the Free
16    Software Foundation; either version 3 of the License, or (at your
17    option) any later version.
18
19or
20
21  * the GNU General Public License as published by the Free Software
22    Foundation; either version 2 of the License, or (at your option) any
23    later version.
24
25or both in parallel, as here.
26
27The GNU MP Library is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
29or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
30for more details.
31
32You should have received copies of the GNU General Public License and the
33GNU Lesser General Public License along with the GNU MP Library.  If not,
34see https://www.gnu.org/licenses/.  */
35
36#include <stdio.h>    /* for printf */
37#include <stdlib.h>   /* for getenv */
38#include <string.h>
39
40#include "gmp-impl.h"
41
42/* Change this to "#define TRACE(x) x" for some traces. */
43#define TRACE(x)
44
45
46/* fat_entry.asm */
47long __gmpn_cpuid (char [12], int);
48int  __gmpn_cpuid_available (void);
49
50
51#if WANT_FAKE_CPUID
52/* The "name"s in the table are values for the GMP_CPU_TYPE environment
53   variable.  Anything can be used, but for now it's the canonical cpu types
54   as per config.guess/config.sub.  */
55
56#define __gmpn_cpuid            fake_cpuid
57#define __gmpn_cpuid_available  fake_cpuid_available
58
59#define MAKE_FMS(family, model)						\
60  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
61   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
62
63static struct {
64  const char  *name;
65  const char  *vendor;
66  unsigned    fms;
67} fake_cpuid_table[] = {
68  { "i386",       "" },
69  { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
70  { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
71  { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
72  { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
73  { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
74  { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
75  { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
76  { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
77  { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
78  { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
79  { "nehalem",    "GenuineIntel", MAKE_FMS (6, 0x1a) },
80  { "nhm",        "GenuineIntel", MAKE_FMS (6, 0x1a) },
81  { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
82  { "westmere",   "GenuineIntel", MAKE_FMS (6, 0x25) },
83  { "wsm",        "GenuineIntel", MAKE_FMS (6, 0x25) },
84  { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) },
85  { "sbr",        "GenuineIntel", MAKE_FMS (6, 0x2a) },
86  { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) },
87  { "slm",        "GenuineIntel", MAKE_FMS (6, 0x37) },
88  { "haswell",    "GenuineIntel", MAKE_FMS (6, 0x3c) },
89  { "hwl",        "GenuineIntel", MAKE_FMS (6, 0x3c) },
90  { "broadwell",  "GenuineIntel", MAKE_FMS (6, 0x3d) },
91  { "bwl",        "GenuineIntel", MAKE_FMS (6, 0x3d) },
92  { "skylake",    "GenuineIntel", MAKE_FMS (6, 0x5e) },
93  { "sky",        "GenuineIntel", MAKE_FMS (6, 0x5e) },
94
95  { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
96  { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
97  { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
98  { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
99  { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
100  { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
101  { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
102  { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
103  { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
104  { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
105  { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
106  { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
107  { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
108  { "zen",        "AuthenticAMD", MAKE_FMS (23, 1) },
109
110  { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
111  { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
112  { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
113};
114
115static int
116fake_cpuid_lookup (void)
117{
118  char  *s;
119  int   i;
120
121  s = getenv ("GMP_CPU_TYPE");
122  if (s == NULL)
123    {
124      printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
125      abort ();
126    }
127
128  for (i = 0; i < numberof (fake_cpuid_table); i++)
129    if (strcmp (s, fake_cpuid_table[i].name) == 0)
130      return i;
131
132  printf ("GMP_CPU_TYPE=%s unknown\n", s);
133  abort ();
134}
135
136static int
137fake_cpuid_available (void)
138{
139  return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
140}
141
142static long
143fake_cpuid (char dst[12], int id)
144{
145  int  i = fake_cpuid_lookup();
146
147  switch (id) {
148  case 0:
149    memcpy (dst, fake_cpuid_table[i].vendor, 12);
150    return 0;
151  case 1:
152    return fake_cpuid_table[i].fms;
153  default:
154    printf ("fake_cpuid(): oops, unknown id %d\n", id);
155    abort ();
156  }
157}
158#endif
159
160
161typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
162typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
163
164struct cpuvec_t __gmpn_cpuvec = {
165  __MPN(add_n_init),
166  0,
167  0,
168  __MPN(addmul_1_init),
169  0,
170  __MPN(bdiv_dbm1c_init),
171  __MPN(cnd_add_n_init),
172  __MPN(cnd_sub_n_init),
173  __MPN(com_init),
174  __MPN(copyd_init),
175  __MPN(copyi_init),
176  __MPN(divexact_1_init),
177  __MPN(divrem_1_init),
178  __MPN(gcd_11_init),
179  __MPN(lshift_init),
180  __MPN(lshiftc_init),
181  __MPN(mod_1_init),
182  __MPN(mod_1_1p_init),
183  __MPN(mod_1_1p_cps_init),
184  __MPN(mod_1s_2p_init),
185  __MPN(mod_1s_2p_cps_init),
186  __MPN(mod_1s_4p_init),
187  __MPN(mod_1s_4p_cps_init),
188  __MPN(mod_34lsub1_init),
189  __MPN(modexact_1c_odd_init),
190  __MPN(mul_1_init),
191  __MPN(mul_basecase_init),
192  __MPN(mullo_basecase_init),
193  __MPN(preinv_divrem_1_init),
194  __MPN(preinv_mod_1_init),
195  __MPN(redc_1_init),
196  __MPN(redc_2_init),
197  __MPN(rshift_init),
198  __MPN(sqr_basecase_init),
199  __MPN(sub_n_init),
200  0,
201  __MPN(submul_1_init),
202  0
203};
204
205int __gmpn_cpuvec_initialized = 0;
206
207/* The following setups start with generic x86, then overwrite with
208   specifics for a chip, and higher versions of that chip.
209
210   The arrangement of the setups here will normally be the same as the $path
211   selections in configure.in for the respective chips.
212
213   This code is reentrant and thread safe.  We always calculate the same
214   decided_cpuvec, so if two copies of the code are running it doesn't
215   matter which completes first, both write the same to __gmpn_cpuvec.
216
217   We need to go via decided_cpuvec because if one thread has completed
218   __gmpn_cpuvec then it may be making use of the threshold values in that
219   vector.  If another thread is still running __gmpn_cpuvec_init then we
220   don't want it to write different values to those fields since some of the
221   asm routines only operate correctly up to their own defined threshold,
222   not an arbitrary value.  */
223
224void
225__gmpn_cpuvec_init (void)
226{
227  struct cpuvec_t  decided_cpuvec;
228
229  TRACE (printf ("__gmpn_cpuvec_init:\n"));
230
231  memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
232
233  CPUVEC_SETUP_x86;
234  CPUVEC_SETUP_fat;
235
236  if (! __gmpn_cpuid_available ())
237    {
238      TRACE (printf ("  80386, or early 80486 without cpuid\n"));
239    }
240  else
241    {
242      char vendor_string[13];
243      char dummy_string[12];
244      long fms;
245      int family, model;
246
247      __gmpn_cpuid (vendor_string, 0);
248      vendor_string[12] = 0;
249
250      fms = __gmpn_cpuid (dummy_string, 1);
251      family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
252      model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
253
254      if (strcmp (vendor_string, "GenuineIntel") == 0)
255        {
256          switch (family)
257            {
258            case 4:
259              TRACE (printf ("  80486 with cpuid\n"));
260              break;
261
262            case 5:
263              TRACE (printf ("  pentium\n"));
264              CPUVEC_SETUP_pentium;
265              if (model == 4 || model == 8)
266                {
267                  TRACE (printf ("  pentiummmx\n"));
268                  CPUVEC_SETUP_pentium_mmx;
269                }
270              break;
271
272            case 6:
273              TRACE (printf ("  p6\n"));
274              CPUVEC_SETUP_p6;
275	      switch (model)
276		{
277		case 0x00:
278		case 0x01:
279		  TRACE (printf ("  pentiumpro\n"));
280		  break;
281
282		case 0x02:
283		case 0x03:
284		case 0x04:
285		case 0x05:
286		case 0x06:
287		  TRACE (printf ("  pentium2\n"));
288                  CPUVEC_SETUP_p6_mmx;
289		  break;
290
291		case 0x07:
292		case 0x08:
293		case 0x0a:
294		case 0x0b:
295		case 0x0c:
296		  TRACE (printf ("  pentium3\n"));
297                  CPUVEC_SETUP_p6_mmx;
298                  CPUVEC_SETUP_p6_p3mmx;
299		  break;
300
301		case 0x09:		/* Banias */
302		case 0x0d:		/* Dothan */
303		case 0x0e:		/* Yonah */
304		  TRACE (printf ("  Banias/Dothan/Yonah\n"));
305                  CPUVEC_SETUP_p6_mmx;
306                  CPUVEC_SETUP_p6_p3mmx;
307                  CPUVEC_SETUP_p6_sse2;
308		  break;
309
310		case 0x0f:		/* Conroe Merom Kentsfield Allendale */
311		case 0x10:
312		case 0x11:
313		case 0x12:
314		case 0x13:
315		case 0x14:
316		case 0x15:
317		case 0x16:
318		case 0x17:		/* PNR Wolfdale Yorkfield */
319		case 0x18:
320		case 0x19:
321		case 0x1d:		/* PNR Dunnington */
322		  TRACE (printf ("  Conroe\n"));
323                  CPUVEC_SETUP_p6_mmx;
324                  CPUVEC_SETUP_p6_p3mmx;
325                  CPUVEC_SETUP_p6_sse2;
326		  CPUVEC_SETUP_core2;
327		  break;
328
329		case 0x1c:		/* Atom Silverthorne */
330		case 0x26:		/* Atom Lincroft */
331		case 0x27:		/* Atom Saltwell */
332		case 0x36:		/* Atom Cedarview/Saltwell */
333		  TRACE (printf ("  atom\n"));
334		  CPUVEC_SETUP_atom;
335		  CPUVEC_SETUP_atom_mmx;
336		  CPUVEC_SETUP_atom_sse2;
337		  break;
338
339		case 0x37:		/* Silvermont */
340		case 0x4a:		/* Silvermont */
341		case 0x4c:		/* Airmont */
342		case 0x4d:		/* Silvermont/Avoton */
343		case 0x5a:		/* Silvermont */
344		  TRACE (printf ("  silvermont\n"));
345		  CPUVEC_SETUP_atom;
346		  CPUVEC_SETUP_atom_mmx;
347		  CPUVEC_SETUP_atom_sse2;
348		  CPUVEC_SETUP_silvermont;
349		  break;
350
351		case 0x5c:		/* Goldmont */
352		case 0x5f:		/* Goldmont */
353		case 0x7a:		/* Goldmont Plus */
354		  TRACE (printf ("  goldmont\n"));
355		  CPUVEC_SETUP_atom;
356		  CPUVEC_SETUP_atom_mmx;
357		  CPUVEC_SETUP_atom_sse2;
358		  CPUVEC_SETUP_goldmont;
359		  break;
360
361		case 0x1a:		/* NHM Gainestown */
362		case 0x1b:
363		case 0x1e:		/* NHM Lynnfield/Jasper */
364		case 0x1f:
365		case 0x20:
366		case 0x21:
367		case 0x22:
368		case 0x23:
369		case 0x24:
370		case 0x25:		/* WSM Clarkdale/Arrandale */
371		case 0x28:
372		case 0x29:
373		case 0x2b:
374		case 0x2c:		/* WSM Gulftown */
375		case 0x2e:		/* NHM Beckton */
376		case 0x2f:		/* WSM Eagleton */
377		  TRACE (printf ("  nehalem/westmere\n"));
378                  CPUVEC_SETUP_p6_mmx;
379                  CPUVEC_SETUP_p6_p3mmx;
380                  CPUVEC_SETUP_p6_sse2;
381		  CPUVEC_SETUP_core2;
382		  CPUVEC_SETUP_coreinhm;
383		  break;
384
385		case 0x2a:		/* SBR */
386		case 0x2d:		/* SBR-EP */
387		case 0x3a:		/* IBR */
388		case 0x3e:		/* IBR Ivytown */
389		case 0x3c:		/* Haswell client */
390		case 0x3f:		/* Haswell server */
391		case 0x45:		/* Haswell ULT */
392		case 0x46:		/* Crystal Well */
393		case 0x3d:		/* Broadwell */
394		case 0x47:		/* Broadwell */
395		case 0x4f:		/* Broadwell server */
396		case 0x56:		/* Broadwell microserver */
397		case 0x4e:		/* Skylake client */
398		case 0x55:		/* Skylake server */
399		case 0x5e:		/* Skylake */
400		case 0x8e:		/* Kabylake */
401		case 0x9e:		/* Kabylake */
402		  TRACE (printf ("  sandybridge\n"));
403                  CPUVEC_SETUP_p6_mmx;
404                  CPUVEC_SETUP_p6_p3mmx;
405                  CPUVEC_SETUP_p6_sse2;
406		  CPUVEC_SETUP_core2;
407		  CPUVEC_SETUP_coreinhm;
408		  CPUVEC_SETUP_coreisbr;
409		  break;
410		}
411              break;
412
413            case 15:
414              TRACE (printf ("  pentium4\n"));
415              CPUVEC_SETUP_pentium4;
416              CPUVEC_SETUP_pentium4_mmx;
417              CPUVEC_SETUP_pentium4_sse2;
418              break;
419            }
420        }
421      else if (strcmp (vendor_string, "AuthenticAMD") == 0)
422        {
423          switch (family)
424            {
425            case 5:
426              if (model <= 3)
427                {
428                  TRACE (printf ("  k5\n"));
429                }
430              else
431                {
432                  TRACE (printf ("  k6\n"));
433                  CPUVEC_SETUP_k6;
434                  CPUVEC_SETUP_k6_mmx;
435                  if (model >= 8)
436                    {
437                      TRACE (printf ("  k62\n"));
438                      CPUVEC_SETUP_k6_k62mmx;
439                    }
440                  if (model >= 9)
441                    {
442                      TRACE (printf ("  k63\n"));
443                    }
444                }
445              break;
446            case 6:
447              TRACE (printf ("  athlon\n"));
448              CPUVEC_SETUP_k7;
449              CPUVEC_SETUP_k7_mmx;
450              break;
451
452            case 0x0f:		/* k8 */
453            case 0x11:		/* "fam 11h", mix of k8 and k10 */
454            case 0x13:		/* unknown, conservatively assume k8  */
455              TRACE (printf ("  k8\n"));
456              CPUVEC_SETUP_k7;
457              CPUVEC_SETUP_k7_mmx;
458              CPUVEC_SETUP_k8;
459	      break;
460
461            case 0x10:		/* k10 */
462            case 0x12:		/* k10 (llano) */
463              TRACE (printf ("  k10\n"));
464              CPUVEC_SETUP_k7;
465              CPUVEC_SETUP_k7_mmx;
466	      break;
467
468            case 0x14:		/* bobcat */
469            case 0x16:		/* jaguar */
470              TRACE (printf ("  bobcat\n"));
471              CPUVEC_SETUP_k7;
472              CPUVEC_SETUP_k7_mmx;
473              CPUVEC_SETUP_bt1;
474	      break;
475
476            case 0x15:		/* bulldozer */
477              TRACE (printf ("  bulldozer\n"));
478              CPUVEC_SETUP_k7;
479              CPUVEC_SETUP_k7_mmx;
480              CPUVEC_SETUP_bd1;
481	      break;
482
483	    case 0x17:		/* zen */
484	    case 0x19:		/* zen3 */
485	      TRACE (printf ("  zen\n"));
486	      CPUVEC_SETUP_k7;
487	      CPUVEC_SETUP_k7_mmx;
488	      break;
489            }
490        }
491      else if (strcmp (vendor_string, "CentaurHauls") == 0)
492        {
493          switch (family)
494            {
495            case 6:
496              TRACE (printf ("  viac3\n"));
497              if (model >= 9)
498                {
499                  TRACE (printf ("  viac32\n"));
500                }
501	      if (model >= 15)
502		{
503                  TRACE (printf ("  nano\n"));
504		  CPUVEC_SETUP_nano;
505		}
506              break;
507            }
508        }
509      else if (strcmp (vendor_string, "CyrixInstead") == 0)
510        {
511          /* Should recognize Cyrix' processors too.  */
512          TRACE (printf ("  cyrix something\n"));
513        }
514    }
515
516  /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
517     Instead default to the plain versions from whichever CPU we detected.
518     The function arguments are compatible, no need for any glue code.  */
519  if (decided_cpuvec.preinv_divrem_1 == NULL)
520    decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
521  if (decided_cpuvec.preinv_mod_1 == NULL)
522    decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
523
524  ASSERT_CPUVEC (decided_cpuvec);
525  CPUVEC_INSTALL (decided_cpuvec);
526
527  /* Set this once the threshold fields are ready.
528     Use volatile to prevent it getting moved.  */
529  *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
530}
531