1/* x86 fat binary initializers. 2 3 THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY. 4 THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 5 COMPLETELY IN FUTURE GNU MP RELEASES. 6 7Copyright 2003, 2004, 2011-2013, 2015, 2017, 2018 Free Software Foundation, 8Inc. 9 10This file is part of the GNU MP Library. 11 12The GNU MP Library is free software; you can redistribute it and/or modify 13it under the terms of either: 14 15 * the GNU Lesser General Public License as published by the Free 16 Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19or 20 21 * the GNU General Public License as published by the Free Software 22 Foundation; either version 2 of the License, or (at your option) any 23 later version. 24 25or both in parallel, as here. 26 27The GNU MP Library is distributed in the hope that it will be useful, but 28WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 29or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 30for more details. 31 32You should have received copies of the GNU General Public License and the 33GNU Lesser General Public License along with the GNU MP Library. If not, 34see https://www.gnu.org/licenses/. */ 35 36#include <stdio.h> /* for printf */ 37#include <stdlib.h> /* for getenv */ 38#include <string.h> 39 40#include "gmp-impl.h" 41 42/* Change this to "#define TRACE(x) x" for some traces. */ 43#define TRACE(x) 44 45 46/* fat_entry.asm */ 47long __gmpn_cpuid (char [12], int); 48int __gmpn_cpuid_available (void); 49 50 51#if WANT_FAKE_CPUID 52/* The "name"s in the table are values for the GMP_CPU_TYPE environment 53 variable. Anything can be used, but for now it's the canonical cpu types 54 as per config.guess/config.sub. */ 55 56#define __gmpn_cpuid fake_cpuid 57#define __gmpn_cpuid_available fake_cpuid_available 58 59#define MAKE_FMS(family, model) \ 60 ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \ 61 + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12)) 62 63static struct { 64 const char *name; 65 const char *vendor; 66 unsigned fms; 67} fake_cpuid_table[] = { 68 { "i386", "" }, 69 { "i486", "GenuineIntel", MAKE_FMS (4, 0) }, 70 { "pentium", "GenuineIntel", MAKE_FMS (5, 0) }, 71 { "pentiummmx", "GenuineIntel", MAKE_FMS (5, 4) }, 72 { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) }, 73 { "pentium2", "GenuineIntel", MAKE_FMS (6, 2) }, 74 { "pentium3", "GenuineIntel", MAKE_FMS (6, 7) }, 75 { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) }, 76 { "prescott", "GenuineIntel", MAKE_FMS (15, 3) }, 77 { "nocona", "GenuineIntel", MAKE_FMS (15, 4) }, 78 { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) }, 79 { "nehalem", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 80 { "nhm", "GenuineIntel", MAKE_FMS (6, 0x1a) }, 81 { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) }, 82 { "westmere", "GenuineIntel", MAKE_FMS (6, 0x25) }, 83 { "wsm", "GenuineIntel", MAKE_FMS (6, 0x25) }, 84 { "sandybridge","GenuineIntel", MAKE_FMS (6, 0x2a) }, 85 { "sbr", "GenuineIntel", MAKE_FMS (6, 0x2a) }, 86 { "silvermont", "GenuineIntel", MAKE_FMS (6, 0x37) }, 87 { "slm", "GenuineIntel", MAKE_FMS (6, 0x37) }, 88 { "haswell", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 89 { "hwl", "GenuineIntel", MAKE_FMS (6, 0x3c) }, 90 { "broadwell", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 91 { "bwl", "GenuineIntel", MAKE_FMS (6, 0x3d) }, 92 { "skylake", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 93 { "sky", "GenuineIntel", MAKE_FMS (6, 0x5e) }, 94 95 { "k5", "AuthenticAMD", MAKE_FMS (5, 0) }, 96 { "k6", "AuthenticAMD", MAKE_FMS (5, 3) }, 97 { "k62", "AuthenticAMD", MAKE_FMS (5, 8) }, 98 { "k63", "AuthenticAMD", MAKE_FMS (5, 9) }, 99 { "athlon", "AuthenticAMD", MAKE_FMS (6, 0) }, 100 { "k8", "AuthenticAMD", MAKE_FMS (15, 0) }, 101 { "k10", "AuthenticAMD", MAKE_FMS (16, 0) }, 102 { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) }, 103 { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) }, 104 { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) }, 105 { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) }, 106 { "excavator", "AuthenticAMD", MAKE_FMS (21, 0x60) }, 107 { "jaguar", "AuthenticAMD", MAKE_FMS (22, 1) }, 108 { "zen", "AuthenticAMD", MAKE_FMS (23, 1) }, 109 110 { "viac3", "CentaurHauls", MAKE_FMS (6, 0) }, 111 { "viac32", "CentaurHauls", MAKE_FMS (6, 9) }, 112 { "nano", "CentaurHauls", MAKE_FMS (6, 15) }, 113}; 114 115static int 116fake_cpuid_lookup (void) 117{ 118 char *s; 119 int i; 120 121 s = getenv ("GMP_CPU_TYPE"); 122 if (s == NULL) 123 { 124 printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n"); 125 abort (); 126 } 127 128 for (i = 0; i < numberof (fake_cpuid_table); i++) 129 if (strcmp (s, fake_cpuid_table[i].name) == 0) 130 return i; 131 132 printf ("GMP_CPU_TYPE=%s unknown\n", s); 133 abort (); 134} 135 136static int 137fake_cpuid_available (void) 138{ 139 return fake_cpuid_table[fake_cpuid_lookup()].vendor[0] != '\0'; 140} 141 142static long 143fake_cpuid (char dst[12], int id) 144{ 145 int i = fake_cpuid_lookup(); 146 147 switch (id) { 148 case 0: 149 memcpy (dst, fake_cpuid_table[i].vendor, 12); 150 return 0; 151 case 1: 152 return fake_cpuid_table[i].fms; 153 default: 154 printf ("fake_cpuid(): oops, unknown id %d\n", id); 155 abort (); 156 } 157} 158#endif 159 160 161typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t)); 162typedef DECL_preinv_mod_1 ((*preinv_mod_1_t)); 163 164struct cpuvec_t __gmpn_cpuvec = { 165 __MPN(add_n_init), 166 0, 167 0, 168 __MPN(addmul_1_init), 169 0, 170 __MPN(bdiv_dbm1c_init), 171 __MPN(cnd_add_n_init), 172 __MPN(cnd_sub_n_init), 173 __MPN(com_init), 174 __MPN(copyd_init), 175 __MPN(copyi_init), 176 __MPN(divexact_1_init), 177 __MPN(divrem_1_init), 178 __MPN(gcd_11_init), 179 __MPN(lshift_init), 180 __MPN(lshiftc_init), 181 __MPN(mod_1_init), 182 __MPN(mod_1_1p_init), 183 __MPN(mod_1_1p_cps_init), 184 __MPN(mod_1s_2p_init), 185 __MPN(mod_1s_2p_cps_init), 186 __MPN(mod_1s_4p_init), 187 __MPN(mod_1s_4p_cps_init), 188 __MPN(mod_34lsub1_init), 189 __MPN(modexact_1c_odd_init), 190 __MPN(mul_1_init), 191 __MPN(mul_basecase_init), 192 __MPN(mullo_basecase_init), 193 __MPN(preinv_divrem_1_init), 194 __MPN(preinv_mod_1_init), 195 __MPN(redc_1_init), 196 __MPN(redc_2_init), 197 __MPN(rshift_init), 198 __MPN(sqr_basecase_init), 199 __MPN(sub_n_init), 200 0, 201 __MPN(submul_1_init), 202 0 203}; 204 205int __gmpn_cpuvec_initialized = 0; 206 207/* The following setups start with generic x86, then overwrite with 208 specifics for a chip, and higher versions of that chip. 209 210 The arrangement of the setups here will normally be the same as the $path 211 selections in configure.in for the respective chips. 212 213 This code is reentrant and thread safe. We always calculate the same 214 decided_cpuvec, so if two copies of the code are running it doesn't 215 matter which completes first, both write the same to __gmpn_cpuvec. 216 217 We need to go via decided_cpuvec because if one thread has completed 218 __gmpn_cpuvec then it may be making use of the threshold values in that 219 vector. If another thread is still running __gmpn_cpuvec_init then we 220 don't want it to write different values to those fields since some of the 221 asm routines only operate correctly up to their own defined threshold, 222 not an arbitrary value. */ 223 224void 225__gmpn_cpuvec_init (void) 226{ 227 struct cpuvec_t decided_cpuvec; 228 229 TRACE (printf ("__gmpn_cpuvec_init:\n")); 230 231 memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec)); 232 233 CPUVEC_SETUP_x86; 234 CPUVEC_SETUP_fat; 235 236 if (! __gmpn_cpuid_available ()) 237 { 238 TRACE (printf (" 80386, or early 80486 without cpuid\n")); 239 } 240 else 241 { 242 char vendor_string[13]; 243 char dummy_string[12]; 244 long fms; 245 int family, model; 246 247 __gmpn_cpuid (vendor_string, 0); 248 vendor_string[12] = 0; 249 250 fms = __gmpn_cpuid (dummy_string, 1); 251 family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff); 252 model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0); 253 254 if (strcmp (vendor_string, "GenuineIntel") == 0) 255 { 256 switch (family) 257 { 258 case 4: 259 TRACE (printf (" 80486 with cpuid\n")); 260 break; 261 262 case 5: 263 TRACE (printf (" pentium\n")); 264 CPUVEC_SETUP_pentium; 265 if (model == 4 || model == 8) 266 { 267 TRACE (printf (" pentiummmx\n")); 268 CPUVEC_SETUP_pentium_mmx; 269 } 270 break; 271 272 case 6: 273 TRACE (printf (" p6\n")); 274 CPUVEC_SETUP_p6; 275 switch (model) 276 { 277 case 0x00: 278 case 0x01: 279 TRACE (printf (" pentiumpro\n")); 280 break; 281 282 case 0x02: 283 case 0x03: 284 case 0x04: 285 case 0x05: 286 case 0x06: 287 TRACE (printf (" pentium2\n")); 288 CPUVEC_SETUP_p6_mmx; 289 break; 290 291 case 0x07: 292 case 0x08: 293 case 0x0a: 294 case 0x0b: 295 case 0x0c: 296 TRACE (printf (" pentium3\n")); 297 CPUVEC_SETUP_p6_mmx; 298 CPUVEC_SETUP_p6_p3mmx; 299 break; 300 301 case 0x09: /* Banias */ 302 case 0x0d: /* Dothan */ 303 case 0x0e: /* Yonah */ 304 TRACE (printf (" Banias/Dothan/Yonah\n")); 305 CPUVEC_SETUP_p6_mmx; 306 CPUVEC_SETUP_p6_p3mmx; 307 CPUVEC_SETUP_p6_sse2; 308 break; 309 310 case 0x0f: /* Conroe Merom Kentsfield Allendale */ 311 case 0x10: 312 case 0x11: 313 case 0x12: 314 case 0x13: 315 case 0x14: 316 case 0x15: 317 case 0x16: 318 case 0x17: /* PNR Wolfdale Yorkfield */ 319 case 0x18: 320 case 0x19: 321 case 0x1d: /* PNR Dunnington */ 322 TRACE (printf (" Conroe\n")); 323 CPUVEC_SETUP_p6_mmx; 324 CPUVEC_SETUP_p6_p3mmx; 325 CPUVEC_SETUP_p6_sse2; 326 CPUVEC_SETUP_core2; 327 break; 328 329 case 0x1c: /* Atom Silverthorne */ 330 case 0x26: /* Atom Lincroft */ 331 case 0x27: /* Atom Saltwell */ 332 case 0x36: /* Atom Cedarview/Saltwell */ 333 TRACE (printf (" atom\n")); 334 CPUVEC_SETUP_atom; 335 CPUVEC_SETUP_atom_mmx; 336 CPUVEC_SETUP_atom_sse2; 337 break; 338 339 case 0x37: /* Silvermont */ 340 case 0x4a: /* Silvermont */ 341 case 0x4c: /* Airmont */ 342 case 0x4d: /* Silvermont/Avoton */ 343 case 0x5a: /* Silvermont */ 344 TRACE (printf (" silvermont\n")); 345 CPUVEC_SETUP_atom; 346 CPUVEC_SETUP_atom_mmx; 347 CPUVEC_SETUP_atom_sse2; 348 CPUVEC_SETUP_silvermont; 349 break; 350 351 case 0x5c: /* Goldmont */ 352 case 0x5f: /* Goldmont */ 353 case 0x7a: /* Goldmont Plus */ 354 TRACE (printf (" goldmont\n")); 355 CPUVEC_SETUP_atom; 356 CPUVEC_SETUP_atom_mmx; 357 CPUVEC_SETUP_atom_sse2; 358 CPUVEC_SETUP_goldmont; 359 break; 360 361 case 0x1a: /* NHM Gainestown */ 362 case 0x1b: 363 case 0x1e: /* NHM Lynnfield/Jasper */ 364 case 0x1f: 365 case 0x20: 366 case 0x21: 367 case 0x22: 368 case 0x23: 369 case 0x24: 370 case 0x25: /* WSM Clarkdale/Arrandale */ 371 case 0x28: 372 case 0x29: 373 case 0x2b: 374 case 0x2c: /* WSM Gulftown */ 375 case 0x2e: /* NHM Beckton */ 376 case 0x2f: /* WSM Eagleton */ 377 TRACE (printf (" nehalem/westmere\n")); 378 CPUVEC_SETUP_p6_mmx; 379 CPUVEC_SETUP_p6_p3mmx; 380 CPUVEC_SETUP_p6_sse2; 381 CPUVEC_SETUP_core2; 382 CPUVEC_SETUP_coreinhm; 383 break; 384 385 case 0x2a: /* SBR */ 386 case 0x2d: /* SBR-EP */ 387 case 0x3a: /* IBR */ 388 case 0x3e: /* IBR Ivytown */ 389 case 0x3c: /* Haswell client */ 390 case 0x3f: /* Haswell server */ 391 case 0x45: /* Haswell ULT */ 392 case 0x46: /* Crystal Well */ 393 case 0x3d: /* Broadwell */ 394 case 0x47: /* Broadwell */ 395 case 0x4f: /* Broadwell server */ 396 case 0x56: /* Broadwell microserver */ 397 case 0x4e: /* Skylake client */ 398 case 0x55: /* Skylake server */ 399 case 0x5e: /* Skylake */ 400 case 0x8e: /* Kabylake */ 401 case 0x9e: /* Kabylake */ 402 TRACE (printf (" sandybridge\n")); 403 CPUVEC_SETUP_p6_mmx; 404 CPUVEC_SETUP_p6_p3mmx; 405 CPUVEC_SETUP_p6_sse2; 406 CPUVEC_SETUP_core2; 407 CPUVEC_SETUP_coreinhm; 408 CPUVEC_SETUP_coreisbr; 409 break; 410 } 411 break; 412 413 case 15: 414 TRACE (printf (" pentium4\n")); 415 CPUVEC_SETUP_pentium4; 416 CPUVEC_SETUP_pentium4_mmx; 417 CPUVEC_SETUP_pentium4_sse2; 418 break; 419 } 420 } 421 else if (strcmp (vendor_string, "AuthenticAMD") == 0) 422 { 423 switch (family) 424 { 425 case 5: 426 if (model <= 3) 427 { 428 TRACE (printf (" k5\n")); 429 } 430 else 431 { 432 TRACE (printf (" k6\n")); 433 CPUVEC_SETUP_k6; 434 CPUVEC_SETUP_k6_mmx; 435 if (model >= 8) 436 { 437 TRACE (printf (" k62\n")); 438 CPUVEC_SETUP_k6_k62mmx; 439 } 440 if (model >= 9) 441 { 442 TRACE (printf (" k63\n")); 443 } 444 } 445 break; 446 case 6: 447 TRACE (printf (" athlon\n")); 448 CPUVEC_SETUP_k7; 449 CPUVEC_SETUP_k7_mmx; 450 break; 451 452 case 0x0f: /* k8 */ 453 case 0x11: /* "fam 11h", mix of k8 and k10 */ 454 case 0x13: /* unknown, conservatively assume k8 */ 455 TRACE (printf (" k8\n")); 456 CPUVEC_SETUP_k7; 457 CPUVEC_SETUP_k7_mmx; 458 CPUVEC_SETUP_k8; 459 break; 460 461 case 0x10: /* k10 */ 462 case 0x12: /* k10 (llano) */ 463 TRACE (printf (" k10\n")); 464 CPUVEC_SETUP_k7; 465 CPUVEC_SETUP_k7_mmx; 466 break; 467 468 case 0x14: /* bobcat */ 469 case 0x16: /* jaguar */ 470 TRACE (printf (" bobcat\n")); 471 CPUVEC_SETUP_k7; 472 CPUVEC_SETUP_k7_mmx; 473 CPUVEC_SETUP_bt1; 474 break; 475 476 case 0x15: /* bulldozer */ 477 TRACE (printf (" bulldozer\n")); 478 CPUVEC_SETUP_k7; 479 CPUVEC_SETUP_k7_mmx; 480 CPUVEC_SETUP_bd1; 481 break; 482 483 case 0x17: /* zen */ 484 case 0x19: /* zen3 */ 485 TRACE (printf (" zen\n")); 486 CPUVEC_SETUP_k7; 487 CPUVEC_SETUP_k7_mmx; 488 break; 489 } 490 } 491 else if (strcmp (vendor_string, "CentaurHauls") == 0) 492 { 493 switch (family) 494 { 495 case 6: 496 TRACE (printf (" viac3\n")); 497 if (model >= 9) 498 { 499 TRACE (printf (" viac32\n")); 500 } 501 if (model >= 15) 502 { 503 TRACE (printf (" nano\n")); 504 CPUVEC_SETUP_nano; 505 } 506 break; 507 } 508 } 509 else if (strcmp (vendor_string, "CyrixInstead") == 0) 510 { 511 /* Should recognize Cyrix' processors too. */ 512 TRACE (printf (" cyrix something\n")); 513 } 514 } 515 516 /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1. 517 Instead default to the plain versions from whichever CPU we detected. 518 The function arguments are compatible, no need for any glue code. */ 519 if (decided_cpuvec.preinv_divrem_1 == NULL) 520 decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1; 521 if (decided_cpuvec.preinv_mod_1 == NULL) 522 decided_cpuvec.preinv_mod_1 =(preinv_mod_1_t) decided_cpuvec.mod_1; 523 524 ASSERT_CPUVEC (decided_cpuvec); 525 CPUVEC_INSTALL (decided_cpuvec); 526 527 /* Set this once the threshold fields are ready. 528 Use volatile to prevent it getting moved. */ 529 *((volatile int *) &__gmpn_cpuvec_initialized) = 1; 530} 531