1/* x86 fat binary initializers.
2
3   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
4   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5   COMPLETELY IN FUTURE GNU MP RELEASES.
6
7Copyright 2003, 2004, 2011 Free Software Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation; either version 3 of the License, or (at your
14option) any later version.
15
16The GNU MP Library is distributed in the hope that it will be useful, but
17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23
24#include <stdio.h>    /* for printf */
25#include <stdlib.h>   /* for getenv */
26#include <string.h>
27
28#include "gmp.h"
29#include "gmp-impl.h"
30
31/* Change this to "#define TRACE(x) x" for some traces. */
32#define TRACE(x)
33
34/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
35#define WANT_FAKE_CPUID  0
36
37
38/* fat_entry.asm */
39long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
40int  __gmpn_cpuid_available __GMP_PROTO ((void));
41
42
43#if WANT_FAKE_CPUID
44/* The "name"s in the table are values for the GMP_CPU_TYPE environment
45   variable.  Anything can be used, but for now it's the canonical cpu types
46   as per config.guess/config.sub.  */
47
48#define __gmpn_cpuid            fake_cpuid
49#define __gmpn_cpuid_available  fake_cpuid_available
50
51#define MAKE_FMS(family, model)						\
52  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
53   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
54
55static struct {
56  const char  *name;
57  const char  vendor[13];
58  unsigned    fms;
59} fake_cpuid_table[] = {
60  { "i386",       "" },
61  { "i486",       "GenuineIntel", MAKE_FMS (4, 0) },
62  { "pentium",    "GenuineIntel", MAKE_FMS (5, 0) },
63  { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) },
64  { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
65  { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
66  { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
67  { "pentium4",   "GenuineIntel", MAKE_FMS (7, 0) },
68
69  { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
70  { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
71  { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
72  { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
73  { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
74  { "x86_64",     "AuthenticAMD", MAKE_FMS (15, 0) },
75
76  { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
77  { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
78};
79
80static int
81fake_cpuid_lookup (void)
82{
83  char  *s;
84  int   i;
85
86  s = getenv ("GMP_CPU_TYPE");
87  if (s == NULL)
88    {
89      printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
90      abort ();
91    }
92
93  for (i = 0; i < numberof (fake_cpuid_table); i++)
94    if (strcmp (s, fake_cpuid_table[i].name) == 0)
95      return i;
96
97  printf ("GMP_CPU_TYPE=%s unknown\n", s);
98  abort ();
99}
100
101static int
102fake_cpuid_available (void)
103{
104  return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0';
105}
106
107static long
108fake_cpuid (char dst[12], int id)
109{
110  int  i = fake_cpuid_lookup();
111
112  switch (id) {
113  case 0:
114    memcpy (dst, fake_cpuid_table[i].vendor, 12);
115    return 0;
116  case 1:
117    return fake_cpuid_table[i].fms;
118  default:
119    printf ("fake_cpuid(): oops, unknown id %d\n", id);
120    abort ();
121  }
122}
123#endif
124
125
126typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
127typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
128
129struct cpuvec_t __gmpn_cpuvec = {
130  __MPN(add_n_init),
131  __MPN(addmul_1_init),
132  __MPN(copyd_init),
133  __MPN(copyi_init),
134  __MPN(divexact_1_init),
135  __MPN(divexact_by3c_init),
136  __MPN(divrem_1_init),
137  __MPN(gcd_1_init),
138  __MPN(lshift_init),
139  __MPN(mod_1_init),
140  __MPN(mod_34lsub1_init),
141  __MPN(modexact_1c_odd_init),
142  __MPN(mul_1_init),
143  __MPN(mul_basecase_init),
144  __MPN(preinv_divrem_1_init),
145  __MPN(preinv_mod_1_init),
146  __MPN(rshift_init),
147  __MPN(sqr_basecase_init),
148  __MPN(sub_n_init),
149  __MPN(submul_1_init),
150  0
151};
152
153
154/* The following setups start with generic x86, then overwrite with
155   specifics for a chip, and higher versions of that chip.
156
157   The arrangement of the setups here will normally be the same as the $path
158   selections in configure.in for the respective chips.
159
160   This code is reentrant and thread safe.  We always calculate the same
161   decided_cpuvec, so if two copies of the code are running it doesn't
162   matter which completes first, both write the same to __gmpn_cpuvec.
163
164   We need to go via decided_cpuvec because if one thread has completed
165   __gmpn_cpuvec then it may be making use of the threshold values in that
166   vector.  If another thread is still running __gmpn_cpuvec_init then we
167   don't want it to write different values to those fields since some of the
168   asm routines only operate correctly up to their own defined threshold,
169   not an arbitrary value.  */
170
171void
172__gmpn_cpuvec_init (void)
173{
174  struct cpuvec_t  decided_cpuvec;
175
176  TRACE (printf ("__gmpn_cpuvec_init:\n"));
177
178  memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
179
180  CPUVEC_SETUP_x86;
181  CPUVEC_SETUP_fat;
182
183  if (! __gmpn_cpuid_available ())
184    {
185      TRACE (printf ("  80386, or early 80486 without cpuid\n"));
186    }
187  else
188    {
189      char vendor_string[13];
190      char dummy_string[12];
191      long fms;
192      int family, model;
193
194      __gmpn_cpuid (vendor_string, 0);
195      vendor_string[12] = 0;
196
197      fms = __gmpn_cpuid (dummy_string, 1);
198      family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
199      model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
200
201      if (strcmp (vendor_string, "GenuineIntel") == 0)
202        {
203          switch (family)
204            {
205            case 4:
206              TRACE (printf ("  80486 with cpuid\n"));
207              break;
208
209            case 5:
210              TRACE (printf ("  pentium\n"));
211              CPUVEC_SETUP_pentium;
212              if (model >= 4)
213                {
214                  TRACE (printf ("  pentiummmx\n"));
215                  CPUVEC_SETUP_pentium_mmx;
216                }
217              break;
218
219            case 6:
220              TRACE (printf ("  p6\n"));
221              CPUVEC_SETUP_p6;
222              if (model >= 2)
223                {
224                  TRACE (printf ("  pentium2\n"));
225                  CPUVEC_SETUP_p6_mmx;
226                }
227              if (model >= 7)
228                {
229                  TRACE (printf ("  pentium3\n"));
230                  CPUVEC_SETUP_p6_p3mmx;
231                }
232              if (model >= 0xD || model == 9)
233                {
234                  TRACE (printf ("  p6 with sse2\n"));
235                  CPUVEC_SETUP_p6_sse2;
236                }
237              break;
238
239            case 15:
240              TRACE (printf ("  pentium4\n"));
241              CPUVEC_SETUP_pentium4;
242              CPUVEC_SETUP_pentium4_mmx;
243              CPUVEC_SETUP_pentium4_sse2;
244              break;
245            }
246        }
247      else if (strcmp (vendor_string, "AuthenticAMD") == 0)
248        {
249          switch (family)
250            {
251            case 5:
252              if (model <= 3)
253                {
254                  TRACE (printf ("  k5\n"));
255                }
256              else
257                {
258                  TRACE (printf ("  k6\n"));
259                  CPUVEC_SETUP_k6;
260                  CPUVEC_SETUP_k6_mmx;
261                  if (model >= 8)
262                    {
263                      TRACE (printf ("  k62\n"));
264                      CPUVEC_SETUP_k6_k62mmx;
265                    }
266                  if (model >= 9)
267                    {
268                      TRACE (printf ("  k63\n"));
269                    }
270                }
271              break;
272            case 6:
273              TRACE (printf ("  athlon\n"));
274            athlon:
275              CPUVEC_SETUP_k7;
276              CPUVEC_SETUP_k7_mmx;
277              break;
278            case 15:
279              TRACE (printf ("  x86_64\n"));
280              goto athlon;
281            }
282        }
283      else if (strcmp (vendor_string, "CentaurHauls") == 0)
284        {
285          switch (family)
286            {
287            case 6:
288              TRACE (printf ("  viac3\n"));
289              if (model >= 9)
290                {
291                  TRACE (printf ("  viac32\n"));
292                }
293              break;
294            }
295        }
296      else if (strcmp (vendor_string, "CyrixInstead") == 0)
297        {
298          /* Should recognize Cyrix' processors too.  */
299          TRACE (printf ("  cyrix something\n"));
300        }
301    }
302
303  /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
304     Instead default to the plain versions from whichever CPU we detected.
305     The function arguments are compatible, no need for any glue code.  */
306  if (decided_cpuvec.preinv_divrem_1 == NULL)
307    decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
308  if (decided_cpuvec.preinv_mod_1 == NULL)
309    decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
310
311  ASSERT_CPUVEC (decided_cpuvec);
312  CPUVEC_INSTALL (decided_cpuvec);
313
314  /* Set this once the threshold fields are ready.
315     Use volatile to prevent it getting moved.  */
316  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
317}
318