1/* 2 * x86 SMP booting functions 3 * 4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> 5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 6 * 7 * Much of the core SMP work is based on previous work by Thomas Radke, to 8 * whom a great many thanks are extended. 9 * 10 * Thanks to Intel for making available several different Pentium, 11 * Pentium Pro and Pentium-II/Xeon MP machines. 12 * Original development of Linux SMP code supported by Caldera. 13 * 14 * This code is released under the GNU General Public License version 2 or 15 * later. 16 * 17 * Fixes 18 * Felix Koop : NR_CPUS used properly 19 * Jose Renau : Handle single CPU case. 20 * Alan Cox : By repeated request 8) - Total BogoMIP report. 21 * Greg Wright : Fix for kernel stacks panic. 22 * Erich Boleyn : MP v1.4 and additional changes. 23 * Matthias Sattler : Changes for 2.1 kernel map. 24 * Michel Lespinasse : Changes for 2.1 kernel map. 25 * Michael Chastain : Change trampoline.S to gnu as. 26 * Alan Cox : Dumb bug: 'B' step PPro's are fine 27 * Ingo Molnar : Added APIC timers, based on code 28 * from Jose Renau 29 * Ingo Molnar : various cleanups and rewrites 30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. 31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs 32 * Martin J. Bligh : Added support for multi-quad systems 33 */ 34 35#include <linux/config.h> 36#include <linux/init.h> 37 38#include <linux/mm.h> 39#include <linux/kernel_stat.h> 40#include <linux/smp_lock.h> 41#include <linux/irq.h> 42#include <linux/bootmem.h> 43 44#include <linux/delay.h> 45#include <linux/mc146818rtc.h> 46#include <asm/mtrr.h> 47#include <asm/pgalloc.h> 48#include <asm/smpboot.h> 49 50/* Set if we find a B stepping CPU */ 51static int smp_b_stepping; 52 53/* Setup configured maximum number of CPUs to activate */ 54static int max_cpus = -1; 55 56/* Total count of live CPUs */ 57int smp_num_cpus = 1; 58 59/* Number of siblings per CPU package */ 60int smp_num_siblings = 1; 61int __initdata phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ 62 63/* Bitmask of currently online CPUs */ 64unsigned long cpu_online_map; 65 66static volatile unsigned long cpu_callin_map; 67static volatile unsigned long cpu_callout_map; 68 69/* Per CPU bogomips and other parameters */ 70struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 71 72/* Set when the idlers are all forked */ 73int smp_threads_ready; 74 75/* 76 * Setup routine for controlling SMP activation 77 * 78 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP 79 * activation entirely (the MPS table probe still happens, though). 80 * 81 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer 82 * greater than 0, limits the maximum number of CPUs activated in 83 * SMP mode to <NUM>. 84 */ 85 86static int __init nosmp(char *str) 87{ 88 max_cpus = 0; 89 return 1; 90} 91 92__setup("nosmp", nosmp); 93 94static int __init maxcpus(char *str) 95{ 96 get_option(&str, &max_cpus); 97 return 1; 98} 99 100__setup("maxcpus=", maxcpus); 101 102/* 103 * Trampoline 80x86 program as an array. 104 */ 105 106extern unsigned char trampoline_data []; 107extern unsigned char trampoline_end []; 108static unsigned char *trampoline_base; 109 110/* 111 * Currently trivial. Write the real->protected mode 112 * bootstrap into the page concerned. The caller 113 * has made sure it's suitably aligned. 114 */ 115 116static unsigned long __init setup_trampoline(void) 117{ 118 memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); 119 return virt_to_phys(trampoline_base); 120} 121 122/* 123 * We are called very early to get the low memory for the 124 * SMP bootup trampoline page. 125 */ 126void __init smp_alloc_memory(void) 127{ 128 trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); 129 /* 130 * Has to be in very low memory so we can execute 131 * real-mode AP code. 132 */ 133 if (__pa(trampoline_base) >= 0x9F000) 134 BUG(); 135} 136 137/* 138 * The bootstrap kernel entry code has set these up. Save them for 139 * a given CPU 140 */ 141 142void __init smp_store_cpu_info(int id) 143{ 144 struct cpuinfo_x86 *c = cpu_data + id; 145 146 *c = boot_cpu_data; 147 c->pte_quick = 0; 148 c->pmd_quick = 0; 149 c->pgd_quick = 0; 150 c->pgtable_cache_sz = 0; 151 identify_cpu(c); 152 /* 153 * Mask B, Pentium, but not Pentium MMX 154 */ 155 if (c->x86_vendor == X86_VENDOR_INTEL && 156 c->x86 == 5 && 157 c->x86_mask >= 1 && c->x86_mask <= 4 && 158 c->x86_model <= 3) 159 /* 160 * Remember we have B step Pentia with bugs 161 */ 162 smp_b_stepping = 1; 163} 164 165/* 166 * Architecture specific routine called by the kernel just before init is 167 * fired off. This allows the BP to have everything in order [we hope]. 168 * At the end of this all the APs will hit the system scheduling and off 169 * we go. Each AP will load the system gdt's and jump through the kernel 170 * init into idle(). At this point the scheduler will one day take over 171 * and give them jobs to do. smp_callin is a standard routine 172 * we use to track CPUs as they power up. 173 */ 174 175static atomic_t smp_commenced = ATOMIC_INIT(0); 176 177void __init smp_commence(void) 178{ 179 /* 180 * Lets the callins below out of their loop. 181 */ 182 Dprintk("Setting commenced=1, go go go\n"); 183 184 wmb(); 185 atomic_set(&smp_commenced,1); 186} 187 188/* 189 * TSC synchronization. 190 * 191 * We first check wether all CPUs have their TSC's synchronized, 192 * then we print a warning if not, and always resync. 193 */ 194 195static atomic_t tsc_start_flag = ATOMIC_INIT(0); 196static atomic_t tsc_count_start = ATOMIC_INIT(0); 197static atomic_t tsc_count_stop = ATOMIC_INIT(0); 198static unsigned long long tsc_values[NR_CPUS]; 199 200#define NR_LOOPS 5 201 202extern unsigned long fast_gettimeoffset_quotient; 203 204/* 205 * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit 206 * multiplication. Not terribly optimized but we need it at boot time only 207 * anyway. 208 * 209 * result == a / b 210 * == (a1 + a2*(2^32)) / b 211 * == a1/b + a2*(2^32/b) 212 * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b 213 * ^---- (this multiplication can overflow) 214 */ 215 216static unsigned long long __init div64 (unsigned long long a, unsigned long b0) 217{ 218 unsigned int a1, a2; 219 unsigned long long res; 220 221 a1 = ((unsigned int*)&a)[0]; 222 a2 = ((unsigned int*)&a)[1]; 223 224 res = a1/b0 + 225 (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + 226 a2 / b0 + 227 (a2 * (0xffffffff % b0)) / b0; 228 229 return res; 230} 231 232static void __init synchronize_tsc_bp (void) 233{ 234 int i; 235 unsigned long long t0; 236 unsigned long long sum, avg; 237 long long delta; 238 unsigned long one_usec; 239 int buggy = 0; 240 241 printk("checking TSC synchronization across CPUs: "); 242 243 one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2); 244 245 atomic_set(&tsc_start_flag, 1); 246 wmb(); 247 248 /* 249 * We loop a few times to get a primed instruction cache, 250 * then the last pass is more or less synchronized and 251 * the BP and APs set their cycle counters to zero all at 252 * once. This reduces the chance of having random offsets 253 * between the processors, and guarantees that the maximum 254 * delay between the cycle counters is never bigger than 255 * the latency of information-passing (cachelines) between 256 * two CPUs. 257 */ 258 for (i = 0; i < NR_LOOPS; i++) { 259 /* 260 * all APs synchronize but they loop on '== num_cpus' 261 */ 262 while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); 263 atomic_set(&tsc_count_stop, 0); 264 wmb(); 265 /* 266 * this lets the APs save their current TSC: 267 */ 268 atomic_inc(&tsc_count_start); 269 270 rdtscll(tsc_values[smp_processor_id()]); 271 /* 272 * We clear the TSC in the last loop: 273 */ 274 if (i == NR_LOOPS-1) 275 write_tsc(0, 0); 276 277 /* 278 * Wait for all APs to leave the synchronization point: 279 */ 280 while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); 281 atomic_set(&tsc_count_start, 0); 282 wmb(); 283 atomic_inc(&tsc_count_stop); 284 } 285 286 sum = 0; 287 for (i = 0; i < smp_num_cpus; i++) { 288 t0 = tsc_values[i]; 289 sum += t0; 290 } 291 avg = div64(sum, smp_num_cpus); 292 293 sum = 0; 294 for (i = 0; i < smp_num_cpus; i++) { 295 delta = tsc_values[i] - avg; 296 if (delta < 0) 297 delta = -delta; 298 /* 299 * We report bigger than 2 microseconds clock differences. 300 */ 301 if (delta > 2*one_usec) { 302 long realdelta; 303 if (!buggy) { 304 buggy = 1; 305 printk("\n"); 306 } 307 realdelta = div64(delta, one_usec); 308 if (tsc_values[i] < avg) 309 realdelta = -realdelta; 310 311 printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", 312 i, realdelta); 313 } 314 315 sum += delta; 316 } 317 if (!buggy) 318 printk("passed.\n"); 319} 320 321static void __init synchronize_tsc_ap (void) 322{ 323 int i; 324 325 /* 326 * smp_num_cpus is not necessarily known at the time 327 * this gets called, so we first wait for the BP to 328 * finish SMP initialization: 329 */ 330 while (!atomic_read(&tsc_start_flag)) mb(); 331 332 for (i = 0; i < NR_LOOPS; i++) { 333 atomic_inc(&tsc_count_start); 334 while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); 335 336 rdtscll(tsc_values[smp_processor_id()]); 337 if (i == NR_LOOPS-1) 338 write_tsc(0, 0); 339 340 atomic_inc(&tsc_count_stop); 341 while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); 342 } 343} 344#undef NR_LOOPS 345 346extern void calibrate_delay(void); 347 348static atomic_t init_deasserted; 349 350void __init smp_callin(void) 351{ 352 int cpuid, phys_id; 353 unsigned long timeout; 354 355 /* 356 * If waken up by an INIT in an 82489DX configuration 357 * we may get here before an INIT-deassert IPI reaches 358 * our local APIC. We have to wait for the IPI or we'll 359 * lock up on an APIC access. 360 */ 361 if (!clustered_apic_mode) 362 while (!atomic_read(&init_deasserted)); 363 364 /* 365 * (This works even if the APIC is not enabled.) 366 */ 367 phys_id = GET_APIC_ID(apic_read(APIC_ID)); 368 cpuid = current->processor; 369 if (test_and_set_bit(cpuid, &cpu_online_map)) { 370 printk("huh, phys CPU#%d, CPU#%d already present??\n", 371 phys_id, cpuid); 372 BUG(); 373 } 374 Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); 375 376 /* 377 * STARTUP IPIs are fragile beasts as they might sometimes 378 * trigger some glue motherboard logic. Complete APIC bus 379 * silence for 1 second, this overestimates the time the 380 * boot CPU is spending to send the up to 2 STARTUP IPIs 381 * by a factor of two. This should be enough. 382 */ 383 384 /* 385 * Waiting 2s total for startup (udelay is not yet working) 386 */ 387 timeout = jiffies + 2*HZ; 388 while (time_before(jiffies, timeout)) { 389 /* 390 * Has the boot CPU finished it's STARTUP sequence? 391 */ 392 if (test_bit(cpuid, &cpu_callout_map)) 393 break; 394 rep_nop(); 395 } 396 397 if (!time_before(jiffies, timeout)) { 398 printk("BUG: CPU%d started up but did not get a callout!\n", 399 cpuid); 400 BUG(); 401 } 402 403 /* 404 * the boot CPU has finished the init stage and is spinning 405 * on callin_map until we finish. We are free to set up this 406 * CPU, first the APIC. (this is probably redundant on most 407 * boards) 408 */ 409 410 Dprintk("CALLIN, before setup_local_APIC().\n"); 411 /* 412 * Because we use NMIs rather than the INIT-STARTUP sequence to 413 * bootstrap the CPUs, the APIC may be in a wierd state. Kick it. 414 */ 415 if (clustered_apic_mode) 416 clear_local_APIC(); 417 setup_local_APIC(); 418 419 __sti(); 420 421#ifdef CONFIG_MTRR 422 /* 423 * Must be done before calibration delay is computed 424 */ 425 mtrr_init_secondary_cpu (); 426#endif 427 /* 428 * Get our bogomips. 429 */ 430 calibrate_delay(); 431 Dprintk("Stack at about %p\n",&cpuid); 432 433 /* 434 * Save our processor parameters 435 */ 436 smp_store_cpu_info(cpuid); 437 438 /* 439 * Allow the master to continue. 440 */ 441 set_bit(cpuid, &cpu_callin_map); 442 443 /* 444 * Synchronize the TSC with the BP 445 */ 446 if (cpu_has_tsc) 447 synchronize_tsc_ap(); 448} 449 450int cpucount; 451 452extern int cpu_idle(void); 453 454/* 455 * Activate a secondary processor. 456 */ 457int __init start_secondary(void *unused) 458{ 459 /* 460 * Dont put anything before smp_callin(), SMP 461 * booting is too fragile that we want to limit the 462 * things done here to the most necessary things. 463 */ 464 cpu_init(); 465 smp_callin(); 466 while (!atomic_read(&smp_commenced)) 467 rep_nop(); 468 /* 469 * low-memory mappings have been cleared, flush them from 470 * the local TLBs too. 471 */ 472 local_flush_tlb(); 473 474 return cpu_idle(); 475} 476 477/* 478 * Everything has been set up for the secondary 479 * CPUs - they just need to reload everything 480 * from the task structure 481 * This function must not return. 482 */ 483void __init initialize_secondary(void) 484{ 485 /* 486 * We don't actually need to load the full TSS, 487 * basically just the stack pointer and the eip. 488 */ 489 490 asm volatile( 491 "movl %0,%%esp\n\t" 492 "jmp *%1" 493 : 494 :"r" (current->thread.esp),"r" (current->thread.eip)); 495} 496 497extern struct { 498 void * esp; 499 unsigned short ss; 500} stack_start; 501 502static int __init fork_by_hand(void) 503{ 504 struct pt_regs regs; 505 /* 506 * don't care about the eip and regs settings since 507 * we'll never reschedule the forked task. 508 */ 509 return do_fork(CLONE_VM|CLONE_PID, 0, ®s, 0); 510} 511 512/* which physical APIC ID maps to which logical CPU number */ 513volatile int physical_apicid_2_cpu[MAX_APICID]; 514/* which logical CPU number maps to which physical APIC ID */ 515volatile int cpu_2_physical_apicid[NR_CPUS]; 516 517/* which logical APIC ID maps to which logical CPU number */ 518volatile int logical_apicid_2_cpu[MAX_APICID]; 519/* which logical CPU number maps to which logical APIC ID */ 520volatile int cpu_2_logical_apicid[NR_CPUS]; 521 522static inline void init_cpu_to_apicid(void) 523/* Initialize all maps between cpu number and apicids */ 524{ 525 int apicid, cpu; 526 527 for (apicid = 0; apicid < MAX_APICID; apicid++) { 528 physical_apicid_2_cpu[apicid] = -1; 529 logical_apicid_2_cpu[apicid] = -1; 530 } 531 for (cpu = 0; cpu < NR_CPUS; cpu++) { 532 cpu_2_physical_apicid[cpu] = -1; 533 cpu_2_logical_apicid[cpu] = -1; 534 } 535} 536 537static inline void map_cpu_to_boot_apicid(int cpu, int apicid) 538/* 539 * set up a mapping between cpu and apicid. Uses logical apicids for multiquad, 540 * else physical apic ids 541 */ 542{ 543 if (clustered_apic_mode) { 544 logical_apicid_2_cpu[apicid] = cpu; 545 cpu_2_logical_apicid[cpu] = apicid; 546 } else { 547 physical_apicid_2_cpu[apicid] = cpu; 548 cpu_2_physical_apicid[cpu] = apicid; 549 } 550} 551 552static inline void unmap_cpu_to_boot_apicid(int cpu, int apicid) 553/* 554 * undo a mapping between cpu and apicid. Uses logical apicids for multiquad, 555 * else physical apic ids 556 */ 557{ 558 if (clustered_apic_mode) { 559 logical_apicid_2_cpu[apicid] = -1; 560 cpu_2_logical_apicid[cpu] = -1; 561 } else { 562 physical_apicid_2_cpu[apicid] = -1; 563 cpu_2_physical_apicid[cpu] = -1; 564 } 565} 566 567#if APIC_DEBUG 568static inline void inquire_remote_apic(int apicid) 569{ 570 int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 571 char *names[] = { "ID", "VERSION", "SPIV" }; 572 int timeout, status; 573 574 printk("Inquiring remote APIC #%d...\n", apicid); 575 576 for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { 577 printk("... APIC #%d %s: ", apicid, names[i]); 578 579 /* 580 * Wait for idle. 581 */ 582 apic_wait_icr_idle(); 583 584 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 585 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); 586 587 timeout = 0; 588 do { 589 udelay(100); 590 status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; 591 } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); 592 593 switch (status) { 594 case APIC_ICR_RR_VALID: 595 status = apic_read(APIC_RRR); 596 printk("%08x\n", status); 597 break; 598 default: 599 printk("failed\n"); 600 } 601 } 602} 603#endif 604 605static int wakeup_secondary_via_NMI(int logical_apicid) 606/* 607 * Poke the other CPU in the eye to wake it up. Remember that the normal 608 * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this 609 * won't ... remember to clear down the APIC, etc later. 610 */ 611{ 612 unsigned long send_status = 0, accept_status = 0; 613 int timeout, maxlvt; 614 615 /* Target chip */ 616 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); 617 618 /* Boot on the stack */ 619 /* Kick the second */ 620 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 621 622 Dprintk("Waiting for send to finish...\n"); 623 timeout = 0; 624 do { 625 Dprintk("+"); 626 udelay(100); 627 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 628 } while (send_status && (timeout++ < 1000)); 629 630 /* 631 * Give the other CPU some time to accept the IPI. 632 */ 633 udelay(200); 634 /* 635 * Due to the Pentium erratum 3AP. 636 */ 637 maxlvt = get_maxlvt(); 638 if (maxlvt > 3) { 639 apic_read_around(APIC_SPIV); 640 apic_write(APIC_ESR, 0); 641 } 642 accept_status = (apic_read(APIC_ESR) & 0xEF); 643 Dprintk("NMI sent.\n"); 644 645 if (send_status) 646 printk("APIC never delivered???\n"); 647 if (accept_status) 648 printk("APIC delivery error (%lx).\n", accept_status); 649 650 return (send_status | accept_status); 651} 652 653static int wakeup_secondary_via_INIT(int phys_apicid, unsigned long start_eip) 654{ 655 unsigned long send_status = 0, accept_status = 0; 656 int maxlvt, timeout, num_starts, j; 657 658 Dprintk("Asserting INIT.\n"); 659 660 /* 661 * Turn INIT on target chip 662 */ 663 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); 664 665 /* 666 * Send IPI 667 */ 668 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT 669 | APIC_DM_INIT); 670 671 Dprintk("Waiting for send to finish...\n"); 672 timeout = 0; 673 do { 674 Dprintk("+"); 675 udelay(100); 676 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 677 } while (send_status && (timeout++ < 1000)); 678 679 mdelay(10); 680 681 Dprintk("Deasserting INIT.\n"); 682 683 /* Target chip */ 684 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); 685 686 /* Send IPI */ 687 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 688 689 Dprintk("Waiting for send to finish...\n"); 690 timeout = 0; 691 do { 692 Dprintk("+"); 693 udelay(100); 694 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 695 } while (send_status && (timeout++ < 1000)); 696 697 atomic_set(&init_deasserted, 1); 698 699 /* 700 * Should we send STARTUP IPIs ? 701 * 702 * Determine this based on the APIC version. 703 * If we don't have an integrated APIC, don't send the STARTUP IPIs. 704 */ 705 if (APIC_INTEGRATED(apic_version[phys_apicid])) 706 num_starts = 2; 707 else 708 num_starts = 0; 709 710 /* 711 * Run STARTUP IPI loop. 712 */ 713 Dprintk("#startup loops: %d.\n", num_starts); 714 715 maxlvt = get_maxlvt(); 716 717 for (j = 1; j <= num_starts; j++) { 718 Dprintk("Sending STARTUP #%d.\n",j); 719 apic_read_around(APIC_SPIV); 720 apic_write(APIC_ESR, 0); 721 apic_read(APIC_ESR); 722 Dprintk("After apic_write.\n"); 723 724 /* 725 * STARTUP IPI 726 */ 727 728 /* Target chip */ 729 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); 730 731 /* Boot on the stack */ 732 /* Kick the second */ 733 apic_write_around(APIC_ICR, APIC_DM_STARTUP 734 | (start_eip >> 12)); 735 736 /* 737 * Give the other CPU some time to accept the IPI. 738 */ 739 udelay(300); 740 741 Dprintk("Startup point 1.\n"); 742 743 Dprintk("Waiting for send to finish...\n"); 744 timeout = 0; 745 do { 746 Dprintk("+"); 747 udelay(100); 748 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; 749 } while (send_status && (timeout++ < 1000)); 750 751 /* 752 * Give the other CPU some time to accept the IPI. 753 */ 754 udelay(200); 755 /* 756 * Due to the Pentium erratum 3AP. 757 */ 758 if (maxlvt > 3) { 759 apic_read_around(APIC_SPIV); 760 apic_write(APIC_ESR, 0); 761 } 762 accept_status = (apic_read(APIC_ESR) & 0xEF); 763 if (send_status || accept_status) 764 break; 765 } 766 Dprintk("After Startup.\n"); 767 768 if (send_status) 769 printk("APIC never delivered???\n"); 770 if (accept_status) 771 printk("APIC delivery error (%lx).\n", accept_status); 772 773 return (send_status | accept_status); 774} 775 776extern unsigned long cpu_initialized; 777 778static void __init do_boot_cpu (int apicid) 779/* 780 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 781 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 782 */ 783{ 784 struct task_struct *idle; 785 unsigned long boot_error = 0; 786 int timeout, cpu; 787 unsigned long start_eip; 788 unsigned short nmi_high, nmi_low; 789 790 cpu = ++cpucount; 791 /* 792 * We can't use kernel_thread since we must avoid to 793 * reschedule the child. 794 */ 795 if (fork_by_hand() < 0) 796 panic("failed fork for CPU %d", cpu); 797 798 /* 799 * We remove it from the pidhash and the runqueue 800 * once we got the process: 801 */ 802 idle = init_task.prev_task; 803 if (!idle) 804 panic("No idle process for CPU %d", cpu); 805 806 idle->processor = cpu; 807 idle->cpus_runnable = 1 << cpu; /* we schedule the first task manually */ 808 809 map_cpu_to_boot_apicid(cpu, apicid); 810 811 idle->thread.eip = (unsigned long) start_secondary; 812 813 del_from_runqueue(idle); 814 unhash_process(idle); 815 init_tasks[cpu] = idle; 816 817 /* start_eip had better be page-aligned! */ 818 start_eip = setup_trampoline(); 819 820 /* So we see what's up */ 821 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); 822 stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); 823 824 /* 825 * This grunge runs the startup process for 826 * the targeted processor. 827 */ 828 829 atomic_set(&init_deasserted, 0); 830 831 Dprintk("Setting warm reset code and vector.\n"); 832 833 if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { 834 /* stash the current NMI vector, so we can put things back */ 835 nmi_high = *((volatile unsigned short *) TRAMPOLINE_HIGH); 836 nmi_low = *((volatile unsigned short *) TRAMPOLINE_LOW); 837 } 838 839 CMOS_WRITE(0xa, 0xf); 840 local_flush_tlb(); 841 Dprintk("1.\n"); 842 *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; 843 Dprintk("2.\n"); 844 *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; 845 Dprintk("3.\n"); 846 847 /* 848 * Be paranoid about clearing APIC errors. 849 */ 850 if (!clustered_apic_mode && APIC_INTEGRATED(apic_version[apicid])) { 851 apic_read_around(APIC_SPIV); 852 apic_write(APIC_ESR, 0); 853 apic_read(APIC_ESR); 854 } 855 856 /* 857 * Status is now clean 858 */ 859 boot_error = 0; 860 861 /* 862 * Starting actual IPI sequence... 863 */ 864 865 if (clustered_apic_mode == CLUSTERED_APIC_NUMAQ) 866 boot_error = wakeup_secondary_via_NMI(apicid); 867 else 868 boot_error = wakeup_secondary_via_INIT(apicid, start_eip); 869 870 if (!boot_error) { 871 /* 872 * allow APs to start initializing. 873 */ 874 Dprintk("Before Callout %d.\n", cpu); 875 set_bit(cpu, &cpu_callout_map); 876 Dprintk("After Callout %d.\n", cpu); 877 878 /* 879 * Wait 5s total for a response 880 */ 881 for (timeout = 0; timeout < 50000; timeout++) { 882 if (test_bit(cpu, &cpu_callin_map)) 883 break; /* It has booted */ 884 udelay(100); 885 } 886 887 if (test_bit(cpu, &cpu_callin_map)) { 888 /* number CPUs logically, starting from 1 (BSP is 0) */ 889 Dprintk("OK.\n"); 890 printk("CPU%d: ", cpu); 891 print_cpu_info(&cpu_data[cpu]); 892 Dprintk("CPU has booted.\n"); 893 } else { 894 boot_error= 1; 895 if (*((volatile unsigned char *)phys_to_virt(8192)) 896 == 0xA5) 897 /* trampoline started but...? */ 898 printk("Stuck ??\n"); 899 else 900 /* trampoline code not run */ 901 printk("Not responding.\n"); 902#if APIC_DEBUG 903 if (!clustered_apic_mode) 904 inquire_remote_apic(apicid); 905#endif 906 } 907 } 908 if (boot_error) { 909 /* Try to put things back the way they were before ... */ 910 unmap_cpu_to_boot_apicid(cpu, apicid); 911 clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */ 912 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ 913 clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ 914 cpucount--; 915 } 916 917 /* mark "stuck" area as not stuck */ 918 *((volatile unsigned long *)phys_to_virt(8192)) = 0; 919 920 if(clustered_apic_mode == CLUSTERED_APIC_NUMAQ) { 921 printk("Restoring NMI vector\n"); 922 *((volatile unsigned short *) TRAMPOLINE_HIGH) = nmi_high; 923 *((volatile unsigned short *) TRAMPOLINE_LOW) = nmi_low; 924 } 925} 926 927cycles_t cacheflush_time; 928 929static void smp_tune_scheduling (void) 930{ 931 unsigned long cachesize; /* kB */ 932 unsigned long bandwidth = 350; /* MB/s */ 933 /* 934 * Rough estimation for SMP scheduling, this is the number of 935 * cycles it takes for a fully memory-limited process to flush 936 * the SMP-local cache. 937 * 938 * (For a P5 this pretty much means we will choose another idle 939 * CPU almost always at wakeup time (this is due to the small 940 * L1 cache), on PIIs it's around 50-100 usecs, depending on 941 * the cache size) 942 */ 943 944 if (!cpu_khz) { 945 /* 946 * this basically disables processor-affinity 947 * scheduling on SMP without a TSC. 948 */ 949 cacheflush_time = 0; 950 return; 951 } else { 952 cachesize = boot_cpu_data.x86_cache_size; 953 if (cachesize == -1) { 954 cachesize = 16; /* Pentiums, 2x8kB cache */ 955 bandwidth = 100; 956 } 957 958 cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth; 959 } 960 961 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", 962 (long)cacheflush_time/(cpu_khz/1000), 963 ((long)cacheflush_time*100/(cpu_khz/1000)) % 100); 964} 965 966/* 967 * Cycle through the processors sending APIC IPIs to boot each. 968 */ 969 970static int boot_cpu_logical_apicid; 971/* Where the IO area was mapped on multiquad, always 0 otherwise */ 972void *xquad_portio; 973 974int cpu_sibling_map[NR_CPUS] __cacheline_aligned; 975 976void __init smp_boot_cpus(void) 977{ 978 int apicid, cpu, bit; 979 980 if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && (numnodes > 1)) { 981 printk("Remapping cross-quad port I/O for %d quads\n", 982 numnodes); 983 printk("xquad_portio vaddr 0x%08lx, len %08lx\n", 984 (u_long) xquad_portio, 985 (u_long) numnodes * XQUAD_PORTIO_LEN); 986 xquad_portio = ioremap (XQUAD_PORTIO_BASE, 987 numnodes * XQUAD_PORTIO_LEN); 988 } 989 990#ifdef CONFIG_MTRR 991 /* Must be done before other processors booted */ 992 mtrr_init_boot_cpu (); 993#endif 994 /* 995 * Initialize the logical to physical CPU number mapping 996 */ 997 998 init_cpu_to_apicid(); 999 1000 /* 1001 * Setup boot CPU information 1002 */ 1003 smp_store_cpu_info(0); /* Final full version of the data */ 1004 printk("CPU%d: ", 0); 1005 print_cpu_info(&cpu_data[0]); 1006 1007 /* 1008 * We have the boot CPU online for sure. 1009 */ 1010 set_bit(0, &cpu_online_map); 1011 boot_cpu_logical_apicid = logical_smp_processor_id(); 1012 map_cpu_to_boot_apicid(0, boot_cpu_apicid); 1013 1014 global_irq_holder = 0; 1015 current->processor = 0; 1016 init_idle(); 1017 smp_tune_scheduling(); 1018 1019 /* 1020 * If we couldnt find an SMP configuration at boot time, 1021 * get out of here now! 1022 */ 1023 if (!smp_found_config) { 1024 printk(KERN_NOTICE "SMP motherboard not detected.\n"); 1025#ifndef CONFIG_VISWS 1026 io_apic_irqs = 0; 1027#endif 1028 cpu_online_map = phys_cpu_present_map = 1; 1029 smp_num_cpus = 1; 1030 if (APIC_init_uniprocessor()) 1031 printk(KERN_NOTICE "Local APIC not detected." 1032 " Using dummy APIC emulation.\n"); 1033 goto smp_done; 1034 } 1035 1036 /* 1037 * Should not be necessary because the MP table should list the boot 1038 * CPU too, but we do it for the sake of robustness anyway. 1039 * Makes no sense to do this check in clustered apic mode, so skip it 1040 */ 1041 if (!clustered_apic_mode && 1042 !test_bit(boot_cpu_physical_apicid, &phys_cpu_present_map)) { 1043 printk("weird, boot CPU (#%d) not listed by the BIOS.\n", 1044 boot_cpu_physical_apicid); 1045 phys_cpu_present_map |= (1 << hard_smp_processor_id()); 1046 } 1047 1048 /* 1049 * If we couldn't find a local APIC, then get out of here now! 1050 */ 1051 if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && 1052 !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) { 1053 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1054 boot_cpu_physical_apicid); 1055 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); 1056#ifndef CONFIG_VISWS 1057 io_apic_irqs = 0; 1058#endif 1059 cpu_online_map = phys_cpu_present_map = 1; 1060 smp_num_cpus = 1; 1061 goto smp_done; 1062 } 1063 1064 verify_local_APIC(); 1065 1066 /* 1067 * If SMP should be disabled, then really disable it! 1068 */ 1069 if (!max_cpus) { 1070 smp_found_config = 0; 1071 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); 1072#ifndef CONFIG_VISWS 1073 io_apic_irqs = 0; 1074#endif 1075 cpu_online_map = phys_cpu_present_map = 1; 1076 smp_num_cpus = 1; 1077 goto smp_done; 1078 } 1079 1080 connect_bsp_APIC(); 1081 setup_local_APIC(); 1082 1083 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) 1084 BUG(); 1085 1086 /* 1087 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu 1088 * 1089 * In clustered apic mode, phys_cpu_present_map is a constructed thus: 1090 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 1091 * clustered apic ID. 1092 */ 1093 Dprintk("CPU present map: %lx\n", phys_cpu_present_map); 1094 1095 for (bit = 0; bit < NR_CPUS; bit++) { 1096 apicid = cpu_present_to_apicid(bit); 1097 /* 1098 * Don't even attempt to start the boot CPU! 1099 */ 1100 if (apicid == boot_cpu_apicid) 1101 continue; 1102 1103 if (!(phys_cpu_present_map & (1 << bit))) 1104 continue; 1105 if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) 1106 continue; 1107 1108 do_boot_cpu(apicid); 1109 1110 /* 1111 * Make sure we unmap all failed CPUs 1112 */ 1113 if ((boot_apicid_to_cpu(apicid) == -1) && 1114 (phys_cpu_present_map & (1 << bit))) 1115 printk("CPU #%d not responding - cannot use it.\n", 1116 apicid); 1117 } 1118 1119 /* 1120 * Cleanup possible dangling ends... 1121 */ 1122#ifndef CONFIG_VISWS 1123 { 1124 /* 1125 * Install writable page 0 entry to set BIOS data area. 1126 */ 1127 local_flush_tlb(); 1128 1129 /* 1130 * Paranoid: Set warm reset code and vector here back 1131 * to default values. 1132 */ 1133 CMOS_WRITE(0, 0xf); 1134 1135 *((volatile long *) phys_to_virt(0x467)) = 0; 1136 } 1137#endif 1138 1139 /* 1140 * Allow the user to impress friends. 1141 */ 1142 1143 Dprintk("Before bogomips.\n"); 1144 if (!cpucount) { 1145 printk(KERN_ERR "Error: only one processor found.\n"); 1146 } else { 1147 unsigned long bogosum = 0; 1148 for (cpu = 0; cpu < NR_CPUS; cpu++) 1149 if (cpu_online_map & (1<<cpu)) 1150 bogosum += cpu_data[cpu].loops_per_jiffy; 1151 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", 1152 cpucount+1, 1153 bogosum/(500000/HZ), 1154 (bogosum/(5000/HZ))%100); 1155 Dprintk("Before bogocount - setting activated=1.\n"); 1156 } 1157 smp_num_cpus = cpucount + 1; 1158 1159 if (smp_b_stepping) 1160 printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); 1161 Dprintk("Boot done.\n"); 1162 1163 /* 1164 * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so 1165 * that we can tell the sibling CPU efficiently. 1166 */ 1167 if (test_bit(X86_FEATURE_HT, boot_cpu_data.x86_capability) 1168 && smp_num_siblings > 1) { 1169 for (cpu = 0; cpu < NR_CPUS; cpu++) 1170 cpu_sibling_map[cpu] = NO_PROC_ID; 1171 1172 for (cpu = 0; cpu < smp_num_cpus; cpu++) { 1173 int i; 1174 1175 for (i = 0; i < smp_num_cpus; i++) { 1176 if (i == cpu) 1177 continue; 1178 if (phys_proc_id[cpu] == phys_proc_id[i]) { 1179 cpu_sibling_map[cpu] = i; 1180 printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); 1181 break; 1182 } 1183 } 1184 if (cpu_sibling_map[cpu] == NO_PROC_ID) { 1185 smp_num_siblings = 1; 1186 printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); 1187 } 1188 } 1189 } 1190 1191#ifndef CONFIG_VISWS 1192 /* 1193 * Here we can be sure that there is an IO-APIC in the system. Let's 1194 * go and set it up: 1195 */ 1196 if (!skip_ioapic_setup && nr_ioapics) 1197 setup_IO_APIC(); 1198#endif 1199 1200 /* 1201 * Set up all local APIC timers in the system: 1202 */ 1203 setup_APIC_clocks(); 1204 1205#if defined(CONFIG_KERNPROF) 1206 /* 1207 * Set up all local APIC performance counter overflow vectors, 1208 * if available: 1209 */ 1210 if (cpu_has_msr && boot_cpu_data.x86 == 6) 1211 setup_APIC_perfctr(); 1212#endif 1213 1214 /* 1215 * Synchronize the TSC with the AP 1216 */ 1217 if (cpu_has_tsc && cpucount) 1218 synchronize_tsc_bp(); 1219 1220smp_done: 1221 zap_low_mappings(); 1222} 1223