1#include <linux/init.h> 2#include <linux/bitops.h> 3#include <linux/mm.h> 4 5#include <linux/io.h> 6#include <asm/processor.h> 7#include <asm/apic.h> 8#include <asm/cpu.h> 9#include <asm/pci-direct.h> 10 11#ifdef CONFIG_X86_64 12# include <asm/numa_64.h> 13# include <asm/mmconfig.h> 14# include <asm/cacheflush.h> 15#endif 16 17#include "cpu.h" 18 19#ifdef CONFIG_X86_32 20/* 21 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 22 * misexecution of code under Linux. Owners of such processors should 23 * contact AMD for precise details and a CPU swap. 24 * 25 * See http://www.multimania.com/poulot/k6bug.html 26 * http://www.amd.com/K6/k6docs/revgd.html 27 * 28 * The following test is erm.. interesting. AMD neglected to up 29 * the chip setting when fixing the bug but they also tweaked some 30 * performance at the same time.. 31 */ 32 33extern void vide(void); 34__asm__(".align 4\nvide: ret"); 35 36static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) 37{ 38#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */ 39#define CBAR_ENB (0x80000000) 40#define CBAR_KEY (0X000000CB) 41 if (c->x86_model == 9 || c->x86_model == 10) { 42 if (inl(CBAR) & CBAR_ENB) 43 outl(0 | CBAR_KEY, CBAR); 44 } 45} 46 47 48static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) 49{ 50 u32 l, h; 51 int mbytes = num_physpages >> (20-PAGE_SHIFT); 52 53 if (c->x86_model < 6) { 54 /* Based on AMD doc 20734R - June 2000 */ 55 if (c->x86_model == 0) { 56 clear_cpu_cap(c, X86_FEATURE_APIC); 57 set_cpu_cap(c, X86_FEATURE_PGE); 58 } 59 return; 60 } 61 62 if (c->x86_model == 6 && c->x86_mask == 1) { 63 const int K6_BUG_LOOP = 1000000; 64 int n; 65 void (*f_vide)(void); 66 unsigned long d, d2; 67 68 printk(KERN_INFO "AMD K6 stepping B detected - "); 69 70 /* 71 * It looks like AMD fixed the 2.6.2 bug and improved indirect 72 * calls at the same time. 73 */ 74 75 n = K6_BUG_LOOP; 76 f_vide = vide; 77 rdtscl(d); 78 while (n--) 79 f_vide(); 80 rdtscl(d2); 81 d = d2-d; 82 83 if (d > 20*K6_BUG_LOOP) 84 printk(KERN_CONT 85 "system stability may be impaired when more than 32 MB are used.\n"); 86 else 87 printk(KERN_CONT "probably OK (after B9730xxxx).\n"); 88 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); 89 } 90 91 /* K6 with old style WHCR */ 92 if (c->x86_model < 8 || 93 (c->x86_model == 8 && c->x86_mask < 8)) { 94 /* We can only write allocate on the low 508Mb */ 95 if (mbytes > 508) 96 mbytes = 508; 97 98 rdmsr(MSR_K6_WHCR, l, h); 99 if ((l&0x0000FFFF) == 0) { 100 unsigned long flags; 101 l = (1<<0)|((mbytes/4)<<1); 102 local_irq_save(flags); 103 wbinvd(); 104 wrmsr(MSR_K6_WHCR, l, h); 105 local_irq_restore(flags); 106 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", 107 mbytes); 108 } 109 return; 110 } 111 112 if ((c->x86_model == 8 && c->x86_mask > 7) || 113 c->x86_model == 9 || c->x86_model == 13) { 114 /* The more serious chips .. */ 115 116 if (mbytes > 4092) 117 mbytes = 4092; 118 119 rdmsr(MSR_K6_WHCR, l, h); 120 if ((l&0xFFFF0000) == 0) { 121 unsigned long flags; 122 l = ((mbytes>>2)<<22)|(1<<16); 123 local_irq_save(flags); 124 wbinvd(); 125 wrmsr(MSR_K6_WHCR, l, h); 126 local_irq_restore(flags); 127 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", 128 mbytes); 129 } 130 131 return; 132 } 133 134 if (c->x86_model == 10) { 135 /* AMD Geode LX is model 10 */ 136 /* placeholder for any needed mods */ 137 return; 138 } 139} 140 141static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) 142{ 143#ifdef CONFIG_SMP 144 /* calling is from identify_secondary_cpu() ? */ 145 if (c->cpu_index == boot_cpu_id) 146 return; 147 148 /* 149 * Certain Athlons might work (for various values of 'work') in SMP 150 * but they are not certified as MP capable. 151 */ 152 /* Athlon 660/661 is valid. */ 153 if ((c->x86_model == 6) && ((c->x86_mask == 0) || 154 (c->x86_mask == 1))) 155 goto valid_k7; 156 157 /* Duron 670 is valid */ 158 if ((c->x86_model == 7) && (c->x86_mask == 0)) 159 goto valid_k7; 160 161 /* 162 * Athlon 662, Duron 671, and Athlon >model 7 have capability 163 * bit. It's worth noting that the A5 stepping (662) of some 164 * Athlon XP's have the MP bit set. 165 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for 166 * more. 167 */ 168 if (((c->x86_model == 6) && (c->x86_mask >= 2)) || 169 ((c->x86_model == 7) && (c->x86_mask >= 1)) || 170 (c->x86_model > 7)) 171 if (cpu_has_mp) 172 goto valid_k7; 173 174 /* If we get here, not a certified SMP capable AMD system. */ 175 176 /* 177 * Don't taint if we are running SMP kernel on a single non-MP 178 * approved Athlon 179 */ 180 WARN_ONCE(1, "WARNING: This combination of AMD" 181 " processors is not suitable for SMP.\n"); 182 if (!test_taint(TAINT_UNSAFE_SMP)) 183 add_taint(TAINT_UNSAFE_SMP); 184 185valid_k7: 186 ; 187#endif 188} 189 190static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 191{ 192 u32 l, h; 193 194 /* 195 * Bit 15 of Athlon specific MSR 15, needs to be 0 196 * to enable SSE on Palomino/Morgan/Barton CPU's. 197 * If the BIOS didn't enable it already, enable it here. 198 */ 199 if (c->x86_model >= 6 && c->x86_model <= 10) { 200 if (!cpu_has(c, X86_FEATURE_XMM)) { 201 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); 202 rdmsr(MSR_K7_HWCR, l, h); 203 l &= ~0x00008000; 204 wrmsr(MSR_K7_HWCR, l, h); 205 set_cpu_cap(c, X86_FEATURE_XMM); 206 } 207 } 208 209 /* 210 * It's been determined by AMD that Athlons since model 8 stepping 1 211 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx 212 * As per AMD technical note 27212 0.2 213 */ 214 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 215 rdmsr(MSR_K7_CLK_CTL, l, h); 216 if ((l & 0xfff00000) != 0x20000000) { 217 printk(KERN_INFO 218 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", 219 l, ((l & 0x000fffff)|0x20000000)); 220 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 221 } 222 } 223 224 set_cpu_cap(c, X86_FEATURE_K7); 225 226 amd_k7_smp_check(c); 227} 228#endif 229 230#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 231static int __cpuinit nearby_node(int apicid) 232{ 233 int i, node; 234 235 for (i = apicid - 1; i >= 0; i--) { 236 node = apicid_to_node[i]; 237 if (node != NUMA_NO_NODE && node_online(node)) 238 return node; 239 } 240 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { 241 node = apicid_to_node[i]; 242 if (node != NUMA_NO_NODE && node_online(node)) 243 return node; 244 } 245 return first_node(node_online_map); /* Shouldn't happen */ 246} 247#endif 248 249/* 250 * Fixup core topology information for AMD multi-node processors. 251 * Assumption: Number of cores in each internal node is the same. 252 */ 253#ifdef CONFIG_X86_HT 254static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 255{ 256 unsigned long long value; 257 u32 nodes, cores_per_node; 258 int cpu = smp_processor_id(); 259 260 if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) 261 return; 262 263 /* fixup topology information only once for a core */ 264 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 265 return; 266 267 rdmsrl(MSR_FAM10H_NODE_ID, value); 268 269 nodes = ((value >> 3) & 7) + 1; 270 if (nodes == 1) 271 return; 272 273 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 274 cores_per_node = c->x86_max_cores / nodes; 275 276 /* store NodeID, use llc_shared_map to store sibling info */ 277 per_cpu(cpu_llc_id, cpu) = value & 7; 278 279 /* fixup core id to be in range from 0 to (cores_per_node - 1) */ 280 c->cpu_core_id = c->cpu_core_id % cores_per_node; 281} 282#endif 283 284/* 285 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 286 * Assumes number of cores is a power of two. 287 */ 288static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) 289{ 290#ifdef CONFIG_X86_HT 291 unsigned bits; 292 int cpu = smp_processor_id(); 293 294 bits = c->x86_coreid_bits; 295 /* Low order bits define the core id (index of core in socket) */ 296 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1); 297 /* Convert the initial APIC ID into the socket ID */ 298 c->phys_proc_id = c->initial_apicid >> bits; 299 /* use socket ID also for last level cache */ 300 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 301 /* fixup topology information on multi-node processors */ 302 amd_fixup_dcm(c); 303#endif 304} 305 306int amd_get_nb_id(int cpu) 307{ 308 int id = 0; 309#ifdef CONFIG_SMP 310 id = per_cpu(cpu_llc_id, cpu); 311#endif 312 return id; 313} 314EXPORT_SYMBOL_GPL(amd_get_nb_id); 315 316static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 317{ 318#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 319 int cpu = smp_processor_id(); 320 int node; 321 unsigned apicid = c->apicid; 322 323 node = per_cpu(cpu_llc_id, cpu); 324 325 if (apicid_to_node[apicid] != NUMA_NO_NODE) 326 node = apicid_to_node[apicid]; 327 if (!node_online(node)) { 328 /* Two possibilities here: 329 - The CPU is missing memory and no node was created. 330 In that case try picking one from a nearby CPU 331 - The APIC IDs differ from the HyperTransport node IDs 332 which the K8 northbridge parsing fills in. 333 Assume they are all increased by a constant offset, 334 but in the same order as the HT nodeids. 335 If that doesn't result in a usable node fall back to the 336 path for the previous case. */ 337 338 int ht_nodeid = c->initial_apicid; 339 340 if (ht_nodeid >= 0 && 341 apicid_to_node[ht_nodeid] != NUMA_NO_NODE) 342 node = apicid_to_node[ht_nodeid]; 343 /* Pick a nearby node */ 344 if (!node_online(node)) 345 node = nearby_node(apicid); 346 } 347 numa_set_node(cpu, node); 348#endif 349} 350 351static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) 352{ 353#ifdef CONFIG_X86_HT 354 unsigned bits, ecx; 355 356 /* Multi core CPU? */ 357 if (c->extended_cpuid_level < 0x80000008) 358 return; 359 360 ecx = cpuid_ecx(0x80000008); 361 362 c->x86_max_cores = (ecx & 0xff) + 1; 363 364 /* CPU telling us the core id bits shift? */ 365 bits = (ecx >> 12) & 0xF; 366 367 /* Otherwise recompute */ 368 if (bits == 0) { 369 while ((1 << bits) < c->x86_max_cores) 370 bits++; 371 } 372 373 c->x86_coreid_bits = bits; 374#endif 375} 376 377static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 378{ 379 early_init_amd_mc(c); 380 381 /* 382 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 383 * with P/T states and does not stop in deep C-states 384 */ 385 if (c->x86_power & (1 << 8)) { 386 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 387 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 388 } 389 390#ifdef CONFIG_X86_64 391 set_cpu_cap(c, X86_FEATURE_SYSCALL32); 392#else 393 /* Set MTRR capability flag if appropriate */ 394 if (c->x86 == 5) 395 if (c->x86_model == 13 || c->x86_model == 9 || 396 (c->x86_model == 8 && c->x86_mask >= 8)) 397 set_cpu_cap(c, X86_FEATURE_K6_MTRR); 398#endif 399#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) 400 /* check CPU config space for extended APIC ID */ 401 if (cpu_has_apic && c->x86 >= 0xf) { 402 unsigned int val; 403 val = read_pci_config(0, 24, 0, 0x68); 404 if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) 405 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 406 } 407#endif 408} 409 410static void __cpuinit init_amd(struct cpuinfo_x86 *c) 411{ 412#ifdef CONFIG_SMP 413 unsigned long long value; 414 415 /* 416 * Disable TLB flush filter by setting HWCR.FFDIS on K8 417 * bit 6 of msr C001_0015 418 * 419 * Errata 63 for SH-B3 steppings 420 * Errata 122 for all steppings (F+ have it disabled by default) 421 */ 422 if (c->x86 == 0xf) { 423 rdmsrl(MSR_K7_HWCR, value); 424 value |= 1 << 6; 425 wrmsrl(MSR_K7_HWCR, value); 426 } 427#endif 428 429 early_init_amd(c); 430 431 /* 432 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 433 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 434 */ 435 clear_cpu_cap(c, 0*32+31); 436 437#ifdef CONFIG_X86_64 438 /* On C+ stepping K8 rep microcode works well for copy/memset */ 439 if (c->x86 == 0xf) { 440 u32 level; 441 442 level = cpuid_eax(1); 443 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 444 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 445 446 /* 447 * Some BIOSes incorrectly force this feature, but only K8 448 * revision D (model = 0x14) and later actually support it. 449 * (AMD Erratum #110, docId: 25759). 450 */ 451 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { 452 u64 val; 453 454 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 455 if (!rdmsrl_amd_safe(0xc001100d, &val)) { 456 val &= ~(1ULL << 32); 457 wrmsrl_amd_safe(0xc001100d, val); 458 } 459 } 460 461 } 462 if (c->x86 >= 0x10) 463 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 464 465 /* get apicid instead of initial apic id from cpuid */ 466 c->apicid = hard_smp_processor_id(); 467#else 468 469 470 switch (c->x86) { 471 case 4: 472 init_amd_k5(c); 473 break; 474 case 5: 475 init_amd_k6(c); 476 break; 477 case 6: /* An Athlon/Duron */ 478 init_amd_k7(c); 479 break; 480 } 481 482 /* K6s reports MCEs but don't actually have all the MSRs */ 483 if (c->x86 < 6) 484 clear_cpu_cap(c, X86_FEATURE_MCE); 485#endif 486 487 if (c->x86 >= 6) 488 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 489 490 if (!c->x86_model_id[0]) { 491 switch (c->x86) { 492 case 0xf: 493 /* Should distinguish Models here, but this is only 494 a fallback anyways. */ 495 strcpy(c->x86_model_id, "Hammer"); 496 break; 497 } 498 } 499 500 cpu_detect_cache_sizes(c); 501 502 /* Multi core CPU? */ 503 if (c->extended_cpuid_level >= 0x80000008) { 504 amd_detect_cmp(c); 505 srat_detect_node(c); 506 } 507 508#ifdef CONFIG_X86_32 509 detect_ht(c); 510#endif 511 512 if (c->extended_cpuid_level >= 0x80000006) { 513 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) 514 num_cache_leaves = 4; 515 else 516 num_cache_leaves = 3; 517 } 518 519 if (c->x86 >= 0xf) 520 set_cpu_cap(c, X86_FEATURE_K8); 521 522 if (cpu_has_xmm2) { 523 /* MFENCE stops RDTSC speculation */ 524 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 525 } 526 527#ifdef CONFIG_X86_64 528 if (c->x86 == 0x10) { 529 /* do this for boot cpu */ 530 if (c == &boot_cpu_data) 531 check_enable_amd_mmconf_dmi(); 532 533 fam10h_check_enable_mmcfg(); 534 } 535 536 if (c == &boot_cpu_data && c->x86 >= 0xf) { 537 unsigned long long tseg; 538 539 /* 540 * Split up direct mapping around the TSEG SMM area. 541 * Don't do it for gbpages because there seems very little 542 * benefit in doing so. 543 */ 544 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 545 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 546 if ((tseg>>PMD_SHIFT) < 547 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || 548 ((tseg>>PMD_SHIFT) < 549 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && 550 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) 551 set_memory_4k((unsigned long)__va(tseg), 1); 552 } 553 } 554#endif 555} 556 557#ifdef CONFIG_X86_32 558static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, 559 unsigned int size) 560{ 561 /* AMD errata T13 (order #21922) */ 562 if ((c->x86 == 6)) { 563 /* Duron Rev A0 */ 564 if (c->x86_model == 3 && c->x86_mask == 0) 565 size = 64; 566 /* Tbird rev A1/A2 */ 567 if (c->x86_model == 4 && 568 (c->x86_mask == 0 || c->x86_mask == 1)) 569 size = 256; 570 } 571 return size; 572} 573#endif 574 575static const struct cpu_dev __cpuinitconst amd_cpu_dev = { 576 .c_vendor = "AMD", 577 .c_ident = { "AuthenticAMD" }, 578#ifdef CONFIG_X86_32 579 .c_models = { 580 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = 581 { 582 [3] = "486 DX/2", 583 [7] = "486 DX/2-WB", 584 [8] = "486 DX/4", 585 [9] = "486 DX/4-WB", 586 [14] = "Am5x86-WT", 587 [15] = "Am5x86-WB" 588 } 589 }, 590 }, 591 .c_size_cache = amd_size_cache, 592#endif 593 .c_early_init = early_init_amd, 594 .c_init = init_amd, 595 .c_x86_vendor = X86_VENDOR_AMD, 596}; 597 598cpu_dev_register(amd_cpu_dev); 599 600/* 601 * AMD errata checking 602 * 603 * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or 604 * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that 605 * have an OSVW id assigned, which it takes as first argument. Both take a 606 * variable number of family-specific model-stepping ranges created by 607 * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const 608 * int[] in arch/x86/include/asm/processor.h. 609 * 610 * Example: 611 * 612 * const int amd_erratum_319[] = 613 * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), 614 * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), 615 * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); 616 */ 617 618const int amd_erratum_400[] = 619 AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), 620 AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); 621EXPORT_SYMBOL_GPL(amd_erratum_400); 622 623const int amd_erratum_383[] = 624 AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); 625EXPORT_SYMBOL_GPL(amd_erratum_383); 626 627bool cpu_has_amd_erratum(const int *erratum) 628{ 629 struct cpuinfo_x86 *cpu = ¤t_cpu_data; 630 int osvw_id = *erratum++; 631 u32 range; 632 u32 ms; 633 634 /* 635 * If called early enough that current_cpu_data hasn't been initialized 636 * yet, fall back to boot_cpu_data. 637 */ 638 if (cpu->x86 == 0) 639 cpu = &boot_cpu_data; 640 641 if (cpu->x86_vendor != X86_VENDOR_AMD) 642 return false; 643 644 if (osvw_id >= 0 && osvw_id < 65536 && 645 cpu_has(cpu, X86_FEATURE_OSVW)) { 646 u64 osvw_len; 647 648 rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); 649 if (osvw_id < osvw_len) { 650 u64 osvw_bits; 651 652 rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), 653 osvw_bits); 654 return osvw_bits & (1ULL << (osvw_id & 0x3f)); 655 } 656 } 657 658 /* OSVW unavailable or ID unknown, match family-model-stepping range */ 659 ms = (cpu->x86_model << 4) | cpu->x86_mask; 660 while ((range = *erratum++)) 661 if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && 662 (ms >= AMD_MODEL_RANGE_START(range)) && 663 (ms <= AMD_MODEL_RANGE_END(range))) 664 return true; 665 666 return false; 667} 668 669EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); 670