mp_x86.c revision 178072
1/*- 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 178072 2008-04-10 18:38:31Z takawata $"); 28 29#include "opt_apic.h" 30#include "opt_cpu.h" 31#include "opt_kstack_pages.h" 32#include "opt_mp_watchdog.h" 33#include "opt_sched.h" 34#include "opt_smp.h" 35 36#if !defined(lint) 37#if !defined(SMP) 38#error How did you get here? 39#endif 40 41#ifndef DEV_APIC 42#error The apic device is required for SMP, add "device apic" to your config file. 43#endif 44#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 45#error SMP not supported with CPU_DISABLE_CMPXCHG 46#endif 47#endif /* not lint */ 48 49#include <sys/param.h> 50#include <sys/systm.h> 51#include <sys/bus.h> 52#include <sys/cons.h> /* cngetc() */ 53#ifdef GPROF 54#include <sys/gmon.h> 55#endif 56#include <sys/kernel.h> 57#include <sys/ktr.h> 58#include <sys/lock.h> 59#include <sys/malloc.h> 60#include <sys/memrange.h> 61#include <sys/mutex.h> 62#include <sys/pcpu.h> 63#include <sys/proc.h> 64#include <sys/sched.h> 65#include <sys/smp.h> 66#include <sys/sysctl.h> 67 68#include <vm/vm.h> 69#include <vm/vm_param.h> 70#include <vm/pmap.h> 71#include <vm/vm_kern.h> 72#include <vm/vm_extern.h> 73 74#include <machine/apicreg.h> 75#include <machine/md_var.h> 76#include <machine/mp_watchdog.h> 77#include <machine/pcb.h> 78#include <machine/psl.h> 79#include <machine/smp.h> 80#include <machine/specialreg.h> 81 82#define WARMBOOT_TARGET 0 83#define WARMBOOT_OFF (KERNBASE + 0x0467) 84#define WARMBOOT_SEG (KERNBASE + 0x0469) 85 86#define CMOS_REG (0x70) 87#define CMOS_DATA (0x71) 88#define BIOS_RESET (0x0f) 89#define BIOS_WARM (0x0a) 90 91/* 92 * this code MUST be enabled here and in mpboot.s. 93 * it follows the very early stages of AP boot by placing values in CMOS ram. 94 * it NORMALLY will never be needed and thus the primitive method for enabling. 95 * 96#define CHECK_POINTS 97 */ 98 99#if defined(CHECK_POINTS) && !defined(PC98) 100#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 101#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 102 103#define CHECK_INIT(D); \ 104 CHECK_WRITE(0x34, (D)); \ 105 CHECK_WRITE(0x35, (D)); \ 106 CHECK_WRITE(0x36, (D)); \ 107 CHECK_WRITE(0x37, (D)); \ 108 CHECK_WRITE(0x38, (D)); \ 109 CHECK_WRITE(0x39, (D)); 110 111#define CHECK_PRINT(S); \ 112 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 113 (S), \ 114 CHECK_READ(0x34), \ 115 CHECK_READ(0x35), \ 116 CHECK_READ(0x36), \ 117 CHECK_READ(0x37), \ 118 CHECK_READ(0x38), \ 119 CHECK_READ(0x39)); 120 121#else /* CHECK_POINTS */ 122 123#define CHECK_INIT(D) 124#define CHECK_PRINT(S) 125#define CHECK_WRITE(A, D) 126 127#endif /* CHECK_POINTS */ 128 129/* lock region used by kernel profiling */ 130int mcount_lock; 131 132int mp_naps; /* # of Applications processors */ 133int boot_cpu_id = -1; /* designated BSP */ 134extern int nkpt; 135 136extern struct pcpu __pcpu[]; 137 138/* AP uses this during bootstrap. Do not staticize. */ 139char *bootSTK; 140static int bootAP; 141 142/* Free these after use */ 143void *bootstacks[MAXCPU]; 144 145/* Hotwire a 0->4MB V==P mapping */ 146extern pt_entry_t *KPTphys; 147 148struct pcb stoppcbs[MAXCPU]; 149 150/* Variables needed for SMP tlb shootdown. */ 151vm_offset_t smp_tlb_addr1; 152vm_offset_t smp_tlb_addr2; 153volatile int smp_tlb_wait; 154 155#ifdef STOP_NMI 156volatile cpumask_t ipi_nmi_pending; 157 158static void ipi_nmi_selected(u_int32_t cpus); 159#endif 160 161#ifdef COUNT_IPIS 162/* Interrupt counts. */ 163static u_long *ipi_preempt_counts[MAXCPU]; 164static u_long *ipi_ast_counts[MAXCPU]; 165u_long *ipi_invltlb_counts[MAXCPU]; 166u_long *ipi_invlrng_counts[MAXCPU]; 167u_long *ipi_invlpg_counts[MAXCPU]; 168u_long *ipi_invlcache_counts[MAXCPU]; 169u_long *ipi_rendezvous_counts[MAXCPU]; 170u_long *ipi_lazypmap_counts[MAXCPU]; 171#endif 172 173/* 174 * Local data and functions. 175 */ 176 177#ifdef STOP_NMI 178/* 179 * Provide an alternate method of stopping other CPUs. If another CPU has 180 * disabled interrupts the conventional STOP IPI will be blocked. This 181 * NMI-based stop should get through in that case. 182 */ 183static int stop_cpus_with_nmi = 1; 184SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW, 185 &stop_cpus_with_nmi, 0, ""); 186TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi); 187#else 188#define stop_cpus_with_nmi 0 189#endif 190 191static u_int logical_cpus; 192 193/* used to hold the AP's until we are ready to release them */ 194static struct mtx ap_boot_mtx; 195 196/* Set to 1 once we're ready to let the APs out of the pen. */ 197static volatile int aps_ready = 0; 198 199/* 200 * Store data from cpu_add() until later in the boot when we actually setup 201 * the APs. 202 */ 203struct cpu_info { 204 int cpu_present:1; 205 int cpu_bsp:1; 206 int cpu_disabled:1; 207} static cpu_info[MAX_APIC_ID + 1]; 208int cpu_apic_ids[MAXCPU]; 209 210/* Holds pending bitmap based IPIs per CPU */ 211static volatile u_int cpu_ipi_pending[MAXCPU]; 212 213static u_int boot_address; 214 215static void assign_cpu_ids(void); 216static void install_ap_tramp(void); 217static void set_interrupt_apic_ids(void); 218static int start_all_aps(void); 219static int start_ap(int apic_id); 220static void release_aps(void *dummy); 221 222static int hlt_logical_cpus; 223static u_int hyperthreading_cpus; 224static cpumask_t hyperthreading_cpus_mask; 225static int hyperthreading_allowed = 1; 226static struct sysctl_ctx_list logical_cpu_clist; 227 228static void 229mem_range_AP_init(void) 230{ 231 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 232 mem_range_softc.mr_op->initAP(&mem_range_softc); 233} 234 235struct cpu_group * 236cpu_topo(void) 237{ 238 if (cpu_cores == 0) 239 cpu_cores = 1; 240 if (cpu_logical == 0) 241 cpu_logical = 1; 242 if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 243 printf("WARNING: Non-uniform processors.\n"); 244 printf("WARNING: Using suboptimal topology.\n"); 245 return (smp_topo_none()); 246 } 247 /* 248 * No multi-core or hyper-threaded. 249 */ 250 if (cpu_logical * cpu_cores == 1) 251 return (smp_topo_none()); 252 /* 253 * Only HTT no multi-core. 254 */ 255 if (cpu_logical > 1 && cpu_cores == 1) 256 return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 257 /* 258 * Only multi-core no HTT. 259 */ 260 if (cpu_cores > 1 && cpu_logical == 1) 261 return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 262 /* 263 * Both HTT and multi-core. 264 */ 265 return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 266 CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 267} 268 269 270/* 271 * Calculate usable address in base memory for AP trampoline code. 272 */ 273u_int 274mp_bootaddress(u_int basemem) 275{ 276 277 boot_address = trunc_page(basemem); /* round down to 4k boundary */ 278 if ((basemem - boot_address) < bootMP_size) 279 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 280 281 return boot_address; 282} 283 284void 285cpu_add(u_int apic_id, char boot_cpu) 286{ 287 288 if (apic_id > MAX_APIC_ID) { 289 panic("SMP: APIC ID %d too high", apic_id); 290 return; 291 } 292 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 293 apic_id)); 294 cpu_info[apic_id].cpu_present = 1; 295 if (boot_cpu) { 296 KASSERT(boot_cpu_id == -1, 297 ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 298 boot_cpu_id)); 299 boot_cpu_id = apic_id; 300 cpu_info[apic_id].cpu_bsp = 1; 301 } 302 if (mp_ncpus < MAXCPU) 303 mp_ncpus++; 304 if (bootverbose) 305 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 306 "AP"); 307} 308 309void 310cpu_mp_setmaxid(void) 311{ 312 313 mp_maxid = MAXCPU - 1; 314} 315 316int 317cpu_mp_probe(void) 318{ 319 320 /* 321 * Always record BSP in CPU map so that the mbuf init code works 322 * correctly. 323 */ 324 all_cpus = 1; 325 if (mp_ncpus == 0) { 326 /* 327 * No CPUs were found, so this must be a UP system. Setup 328 * the variables to represent a system with a single CPU 329 * with an id of 0. 330 */ 331 mp_ncpus = 1; 332 return (0); 333 } 334 335 /* At least one CPU was found. */ 336 if (mp_ncpus == 1) { 337 /* 338 * One CPU was found, so this must be a UP system with 339 * an I/O APIC. 340 */ 341 return (0); 342 } 343 344 /* At least two CPUs were found. */ 345 return (1); 346} 347 348/* 349 * Initialize the IPI handlers and start up the AP's. 350 */ 351void 352cpu_mp_start(void) 353{ 354 int i; 355 u_int threads_per_cache, p[4]; 356 357 /* Initialize the logical ID to APIC ID table. */ 358 for (i = 0; i < MAXCPU; i++) { 359 cpu_apic_ids[i] = -1; 360 cpu_ipi_pending[i] = 0; 361 } 362 363 /* Install an inter-CPU IPI for TLB invalidation */ 364 setidt(IPI_INVLTLB, IDTVEC(invltlb), 365 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 366 setidt(IPI_INVLPG, IDTVEC(invlpg), 367 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 368 setidt(IPI_INVLRNG, IDTVEC(invlrng), 369 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 370 371 /* Install an inter-CPU IPI for cache invalidation. */ 372 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 373 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 374 375 /* Install an inter-CPU IPI for lazy pmap release */ 376 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), 377 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 378 379 /* Install an inter-CPU IPI for all-CPU rendezvous */ 380 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 381 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 382 383 /* Install generic inter-CPU IPI handler */ 384 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 385 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 386 387 /* Install an inter-CPU IPI for CPU stop/restart */ 388 setidt(IPI_STOP, IDTVEC(cpustop), 389 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 390 391 392 /* Set boot_cpu_id if needed. */ 393 if (boot_cpu_id == -1) { 394 boot_cpu_id = PCPU_GET(apic_id); 395 cpu_info[boot_cpu_id].cpu_bsp = 1; 396 } else 397 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 398 ("BSP's APIC ID doesn't match boot_cpu_id")); 399 cpu_apic_ids[0] = boot_cpu_id; 400 401 assign_cpu_ids(); 402 403 /* Start each Application Processor */ 404 start_all_aps(); 405 406 /* Setup the initial logical CPUs info. */ 407 logical_cpus = logical_cpus_mask = 0; 408 if (cpu_feature & CPUID_HTT) 409 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 410 411 /* 412 * Work out if hyperthreading is *really* enabled. This 413 * is made really ugly by the fact that processors lie: Dual 414 * core processors claim to be hyperthreaded even when they're 415 * not, presumably because they want to be treated the same 416 * way as HTT with respect to per-cpu software licensing. 417 * At the time of writing (May 12, 2005) the only hyperthreaded 418 * cpus are from Intel, and Intel's dual-core processors can be 419 * identified via the "deterministic cache parameters" cpuid 420 * calls. 421 */ 422 /* 423 * First determine if this is an Intel processor which claims 424 * to have hyperthreading support. 425 */ 426 if ((cpu_feature & CPUID_HTT) && 427 (strcmp(cpu_vendor, "GenuineIntel") == 0)) { 428 /* 429 * If the "deterministic cache parameters" cpuid calls 430 * are available, use them. 431 */ 432 if (cpu_high >= 4) { 433 /* Ask the processor about the L1 cache. */ 434 for (i = 0; i < 1; i++) { 435 cpuid_count(4, i, p); 436 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; 437 if (hyperthreading_cpus < threads_per_cache) 438 hyperthreading_cpus = threads_per_cache; 439 if ((p[0] & 0x1f) == 0) 440 break; 441 } 442 } 443 444 /* 445 * If the deterministic cache parameters are not 446 * available, or if no caches were reported to exist, 447 * just accept what the HTT flag indicated. 448 */ 449 if (hyperthreading_cpus == 0) 450 hyperthreading_cpus = logical_cpus; 451 } 452 453 set_interrupt_apic_ids(); 454} 455 456 457/* 458 * Print various information about the SMP system hardware and setup. 459 */ 460void 461cpu_mp_announce(void) 462{ 463 int i, x; 464 465 /* List CPUs */ 466 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 467 for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 468 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 469 continue; 470 if (cpu_info[x].cpu_disabled) 471 printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 472 else { 473 KASSERT(i < mp_ncpus, 474 ("mp_ncpus and actual cpus are out of whack")); 475 printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 476 } 477 } 478} 479 480/* 481 * AP CPU's call this to initialize themselves. 482 */ 483void 484init_secondary(void) 485{ 486 struct pcpu *pc; 487 vm_offset_t addr; 488 int gsel_tss; 489 int x, myid; 490 u_int cr0; 491 492 /* bootAP is set in start_ap() to our ID. */ 493 myid = bootAP; 494 495 /* Get per-cpu data */ 496 pc = &__pcpu[myid]; 497 498 /* prime data page for it to use */ 499 pcpu_init(pc, myid, sizeof(struct pcpu)); 500 pc->pc_apic_id = cpu_apic_ids[myid]; 501 pc->pc_prvspace = pc; 502 pc->pc_curthread = 0; 503 504 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 505 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 506 507 for (x = 0; x < NGDT; x++) { 508 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 509 } 510 511 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 512 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 513 lgdt(&r_gdt); /* does magic intra-segment return */ 514 515 lidt(&r_idt); 516 517 lldt(_default_ldt); 518 PCPU_SET(currentldt, _default_ldt); 519 520 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 521 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 522 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 523 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 524 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 525 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 526 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 527 ltr(gsel_tss); 528 529 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 530 531 /* 532 * Set to a known state: 533 * Set by mpboot.s: CR0_PG, CR0_PE 534 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 535 */ 536 cr0 = rcr0(); 537 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 538 load_cr0(cr0); 539 CHECK_WRITE(0x38, 5); 540 541 /* Disable local APIC just to be sure. */ 542 lapic_disable(); 543 544 /* signal our startup to the BSP. */ 545 mp_naps++; 546 CHECK_WRITE(0x39, 6); 547 548 /* Spin until the BSP releases the AP's. */ 549 while (!aps_ready) 550 ia32_pause(); 551 552 /* BSP may have changed PTD while we were waiting */ 553 invltlb(); 554 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 555 invlpg(addr); 556 557#if defined(I586_CPU) && !defined(NO_F00F_HACK) 558 lidt(&r_idt); 559#endif 560 561 /* Initialize the PAT MSR if present. */ 562 pmap_init_pat(); 563 564 /* set up CPU registers and state */ 565 cpu_setregs(); 566 567 /* set up FPU state on the AP */ 568 npxinit(__INITIAL_NPXCW__); 569 570 /* set up SSE registers */ 571 enable_sse(); 572 573#ifdef PAE 574 /* Enable the PTE no-execute bit. */ 575 if ((amd_feature & AMDID_NX) != 0) { 576 uint64_t msr; 577 578 msr = rdmsr(MSR_EFER) | EFER_NXE; 579 wrmsr(MSR_EFER, msr); 580 } 581#endif 582 583 /* A quick check from sanity claus */ 584 if (PCPU_GET(apic_id) != lapic_id()) { 585 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 586 printf("SMP: actual apic_id = %d\n", lapic_id()); 587 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 588 panic("cpuid mismatch! boom!!"); 589 } 590 591 /* Initialize curthread. */ 592 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 593 PCPU_SET(curthread, PCPU_GET(idlethread)); 594 595 mtx_lock_spin(&ap_boot_mtx); 596 597 /* Init local apic for irq's */ 598 lapic_setup(1); 599 600 /* Set memory range attributes for this CPU to match the BSP */ 601 mem_range_AP_init(); 602 603 smp_cpus++; 604 605 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 606 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 607 608 /* Determine if we are a logical CPU. */ 609 if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 610 logical_cpus_mask |= PCPU_GET(cpumask); 611 612 /* Determine if we are a hyperthread. */ 613 if (hyperthreading_cpus > 1 && 614 PCPU_GET(apic_id) % hyperthreading_cpus != 0) 615 hyperthreading_cpus_mask |= PCPU_GET(cpumask); 616 617 /* Build our map of 'other' CPUs. */ 618 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 619 620 if (bootverbose) 621 lapic_dump("AP"); 622 623 if (smp_cpus == mp_ncpus) { 624 /* enable IPI's, tlb shootdown, freezes etc */ 625 atomic_store_rel_int(&smp_started, 1); 626 smp_active = 1; /* historic */ 627 } 628 629 mtx_unlock_spin(&ap_boot_mtx); 630 631 /* wait until all the AP's are up */ 632 while (smp_started == 0) 633 ia32_pause(); 634 635 /* enter the scheduler */ 636 sched_throw(NULL); 637 638 panic("scheduler returned us to %s", __func__); 639 /* NOTREACHED */ 640} 641 642/******************************************************************* 643 * local functions and data 644 */ 645 646/* 647 * We tell the I/O APIC code about all the CPUs we want to receive 648 * interrupts. If we don't want certain CPUs to receive IRQs we 649 * can simply not tell the I/O APIC code about them in this function. 650 * We also do not tell it about the BSP since it tells itself about 651 * the BSP internally to work with UP kernels and on UP machines. 652 */ 653static void 654set_interrupt_apic_ids(void) 655{ 656 u_int i, apic_id; 657 658 for (i = 0; i < MAXCPU; i++) { 659 apic_id = cpu_apic_ids[i]; 660 if (apic_id == -1) 661 continue; 662 if (cpu_info[apic_id].cpu_bsp) 663 continue; 664 if (cpu_info[apic_id].cpu_disabled) 665 continue; 666 667 /* Don't let hyperthreads service interrupts. */ 668 if (hyperthreading_cpus > 1 && 669 apic_id % hyperthreading_cpus != 0) 670 continue; 671 672 intr_add_cpu(i); 673 } 674} 675 676/* 677 * Assign logical CPU IDs to local APICs. 678 */ 679static void 680assign_cpu_ids(void) 681{ 682 u_int i; 683 684 /* Check for explicitly disabled CPUs. */ 685 for (i = 0; i <= MAX_APIC_ID; i++) { 686 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 687 continue; 688 689 /* Don't use this CPU if it has been disabled by a tunable. */ 690 if (resource_disabled("lapic", i)) { 691 cpu_info[i].cpu_disabled = 1; 692 continue; 693 } 694 } 695 696 /* 697 * Assign CPU IDs to local APIC IDs and disable any CPUs 698 * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 699 * so we only have to assign IDs for APs. 700 */ 701 mp_ncpus = 1; 702 for (i = 0; i <= MAX_APIC_ID; i++) { 703 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 704 cpu_info[i].cpu_disabled) 705 continue; 706 707 if (mp_ncpus < MAXCPU) { 708 cpu_apic_ids[mp_ncpus] = i; 709 mp_ncpus++; 710 } else 711 cpu_info[i].cpu_disabled = 1; 712 } 713 KASSERT(mp_maxid >= mp_ncpus - 1, 714 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 715 mp_ncpus)); 716} 717 718/* 719 * start each AP in our list 720 */ 721/* Lowest 1MB is already mapped: don't touch*/ 722#define TMPMAP_START 1 723static int 724start_all_aps(void) 725{ 726#ifndef PC98 727 u_char mpbiosreason; 728#endif 729 uintptr_t kptbase; 730 u_int32_t mpbioswarmvec; 731 int apic_id, cpu, i; 732 733 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 734 735 /* install the AP 1st level boot code */ 736 install_ap_tramp(); 737 738 /* save the current value of the warm-start vector */ 739 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 740#ifndef PC98 741 outb(CMOS_REG, BIOS_RESET); 742 mpbiosreason = inb(CMOS_DATA); 743#endif 744 745 /* set up temporary P==V mapping for AP boot */ 746 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 747 748 kptbase = (uintptr_t)(void *)KPTphys; 749 for (i = TMPMAP_START; i < NKPT; i++) 750 PTD[i] = (pd_entry_t)(PG_V | PG_RW | 751 ((kptbase + i * PAGE_SIZE) & PG_FRAME)); 752 invltlb(); 753 754 /* start each AP */ 755 for (cpu = 1; cpu < mp_ncpus; cpu++) { 756 apic_id = cpu_apic_ids[cpu]; 757 758 /* allocate and set up an idle stack data page */ 759 bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 760 761 /* setup a vector to our boot code */ 762 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 763 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 764#ifndef PC98 765 outb(CMOS_REG, BIOS_RESET); 766 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 767#endif 768 769 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4; 770 bootAP = cpu; 771 772 /* attempt to start the Application Processor */ 773 CHECK_INIT(99); /* setup checkpoints */ 774 if (!start_ap(apic_id)) { 775 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 776 CHECK_PRINT("trace"); /* show checkpoints */ 777 /* better panic as the AP may be running loose */ 778 printf("panic y/n? [y] "); 779 if (cngetc() != 'n') 780 panic("bye-bye"); 781 } 782 CHECK_PRINT("trace"); /* show checkpoints */ 783 784 all_cpus |= (1 << cpu); /* record AP in CPU map */ 785 } 786 787 /* build our map of 'other' CPUs */ 788 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 789 790 /* restore the warmstart vector */ 791 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 792 793#ifndef PC98 794 outb(CMOS_REG, BIOS_RESET); 795 outb(CMOS_DATA, mpbiosreason); 796#endif 797 798 /* Undo V==P hack from above */ 799 for (i = TMPMAP_START; i < NKPT; i++) 800 PTD[i] = 0; 801 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 802 803 /* number of APs actually started */ 804 return mp_naps; 805} 806 807/* 808 * load the 1st level AP boot code into base memory. 809 */ 810 811/* targets for relocation */ 812extern void bigJump(void); 813extern void bootCodeSeg(void); 814extern void bootDataSeg(void); 815extern void MPentry(void); 816extern u_int MP_GDT; 817extern u_int mp_gdtbase; 818 819static void 820install_ap_tramp(void) 821{ 822 int x; 823 int size = *(int *) ((u_long) & bootMP_size); 824 vm_offset_t va = boot_address + KERNBASE; 825 u_char *src = (u_char *) ((u_long) bootMP); 826 u_char *dst = (u_char *) va; 827 u_int boot_base = (u_int) bootMP; 828 u_int8_t *dst8; 829 u_int16_t *dst16; 830 u_int32_t *dst32; 831 832 KASSERT (size <= PAGE_SIZE, 833 ("'size' do not fit into PAGE_SIZE, as expected.")); 834 pmap_kenter(va, boot_address); 835 pmap_invalidate_page (kernel_pmap, va); 836 for (x = 0; x < size; ++x) 837 *dst++ = *src++; 838 839 /* 840 * modify addresses in code we just moved to basemem. unfortunately we 841 * need fairly detailed info about mpboot.s for this to work. changes 842 * to mpboot.s might require changes here. 843 */ 844 845 /* boot code is located in KERNEL space */ 846 dst = (u_char *) va; 847 848 /* modify the lgdt arg */ 849 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 850 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 851 852 /* modify the ljmp target for MPentry() */ 853 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 854 *dst32 = ((u_int) MPentry - KERNBASE); 855 856 /* modify the target for boot code segment */ 857 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 858 dst8 = (u_int8_t *) (dst16 + 1); 859 *dst16 = (u_int) boot_address & 0xffff; 860 *dst8 = ((u_int) boot_address >> 16) & 0xff; 861 862 /* modify the target for boot data segment */ 863 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 864 dst8 = (u_int8_t *) (dst16 + 1); 865 *dst16 = (u_int) boot_address & 0xffff; 866 *dst8 = ((u_int) boot_address >> 16) & 0xff; 867} 868 869/* 870 * This function starts the AP (application processor) identified 871 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 872 * to accomplish this. This is necessary because of the nuances 873 * of the different hardware we might encounter. It isn't pretty, 874 * but it seems to work. 875 */ 876static int 877start_ap(int apic_id) 878{ 879 int vector, ms; 880 int cpus; 881 882 /* calculate the vector */ 883 vector = (boot_address >> 12) & 0xff; 884 885 /* used as a watchpoint to signal AP startup */ 886 cpus = mp_naps; 887 888 /* 889 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 890 * and running the target CPU. OR this INIT IPI might be latched (P5 891 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 892 * ignored. 893 */ 894 895 /* do an INIT IPI: assert RESET */ 896 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 897 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); 898 899 /* wait for pending status end */ 900 lapic_ipi_wait(-1); 901 902 /* do an INIT IPI: deassert RESET */ 903 lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | 904 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); 905 906 /* wait for pending status end */ 907 DELAY(10000); /* wait ~10mS */ 908 lapic_ipi_wait(-1); 909 910 /* 911 * next we do a STARTUP IPI: the previous INIT IPI might still be 912 * latched, (P5 bug) this 1st STARTUP would then terminate 913 * immediately, and the previously started INIT IPI would continue. OR 914 * the previous INIT IPI has already run. and this STARTUP IPI will 915 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 916 * will run. 917 */ 918 919 /* do a STARTUP IPI */ 920 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 921 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 922 vector, apic_id); 923 lapic_ipi_wait(-1); 924 DELAY(200); /* wait ~200uS */ 925 926 /* 927 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 928 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 929 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 930 * recognized after hardware RESET or INIT IPI. 931 */ 932 933 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 934 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 935 vector, apic_id); 936 lapic_ipi_wait(-1); 937 DELAY(200); /* wait ~200uS */ 938 939 /* Wait up to 5 seconds for it to start. */ 940 for (ms = 0; ms < 5000; ms++) { 941 if (mp_naps > cpus) 942 return 1; /* return SUCCESS */ 943 DELAY(1000); 944 } 945 return 0; /* return FAILURE */ 946} 947 948#ifdef COUNT_XINVLTLB_HITS 949u_int xhits_gbl[MAXCPU]; 950u_int xhits_pg[MAXCPU]; 951u_int xhits_rng[MAXCPU]; 952SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); 953SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, 954 sizeof(xhits_gbl), "IU", ""); 955SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, 956 sizeof(xhits_pg), "IU", ""); 957SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, 958 sizeof(xhits_rng), "IU", ""); 959 960u_int ipi_global; 961u_int ipi_page; 962u_int ipi_range; 963u_int ipi_range_size; 964SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); 965SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); 966SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); 967SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 968 0, ""); 969 970u_int ipi_masked_global; 971u_int ipi_masked_page; 972u_int ipi_masked_range; 973u_int ipi_masked_range_size; 974SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, 975 &ipi_masked_global, 0, ""); 976SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, 977 &ipi_masked_page, 0, ""); 978SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, 979 &ipi_masked_range, 0, ""); 980SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, 981 &ipi_masked_range_size, 0, ""); 982#endif /* COUNT_XINVLTLB_HITS */ 983 984/* 985 * Flush the TLB on all other CPU's 986 */ 987static void 988smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 989{ 990 u_int ncpu; 991 992 ncpu = mp_ncpus - 1; /* does not shootdown self */ 993 if (ncpu < 1) 994 return; /* no other cpus */ 995 if (!(read_eflags() & PSL_I)) 996 panic("%s: interrupts disabled", __func__); 997 mtx_lock_spin(&smp_ipi_mtx); 998 smp_tlb_addr1 = addr1; 999 smp_tlb_addr2 = addr2; 1000 atomic_store_rel_int(&smp_tlb_wait, 0); 1001 ipi_all_but_self(vector); 1002 while (smp_tlb_wait < ncpu) 1003 ia32_pause(); 1004 mtx_unlock_spin(&smp_ipi_mtx); 1005} 1006 1007static void 1008smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1009{ 1010 int ncpu, othercpus; 1011 1012 othercpus = mp_ncpus - 1; 1013 if (mask == (u_int)-1) { 1014 ncpu = othercpus; 1015 if (ncpu < 1) 1016 return; 1017 } else { 1018 mask &= ~PCPU_GET(cpumask); 1019 if (mask == 0) 1020 return; 1021 ncpu = bitcount32(mask); 1022 if (ncpu > othercpus) { 1023 /* XXX this should be a panic offence */ 1024 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 1025 ncpu, othercpus); 1026 ncpu = othercpus; 1027 } 1028 /* XXX should be a panic, implied by mask == 0 above */ 1029 if (ncpu < 1) 1030 return; 1031 } 1032 if (!(read_eflags() & PSL_I)) 1033 panic("%s: interrupts disabled", __func__); 1034 mtx_lock_spin(&smp_ipi_mtx); 1035 smp_tlb_addr1 = addr1; 1036 smp_tlb_addr2 = addr2; 1037 atomic_store_rel_int(&smp_tlb_wait, 0); 1038 if (mask == (u_int)-1) 1039 ipi_all_but_self(vector); 1040 else 1041 ipi_selected(mask, vector); 1042 while (smp_tlb_wait < ncpu) 1043 ia32_pause(); 1044 mtx_unlock_spin(&smp_ipi_mtx); 1045} 1046 1047void 1048smp_cache_flush(void) 1049{ 1050 1051 if (smp_started) 1052 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1053} 1054 1055void 1056smp_invltlb(void) 1057{ 1058 1059 if (smp_started) { 1060 smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1061#ifdef COUNT_XINVLTLB_HITS 1062 ipi_global++; 1063#endif 1064 } 1065} 1066 1067void 1068smp_invlpg(vm_offset_t addr) 1069{ 1070 1071 if (smp_started) { 1072 smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1073#ifdef COUNT_XINVLTLB_HITS 1074 ipi_page++; 1075#endif 1076 } 1077} 1078 1079void 1080smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1081{ 1082 1083 if (smp_started) { 1084 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1085#ifdef COUNT_XINVLTLB_HITS 1086 ipi_range++; 1087 ipi_range_size += (addr2 - addr1) / PAGE_SIZE; 1088#endif 1089 } 1090} 1091 1092void 1093smp_masked_invltlb(u_int mask) 1094{ 1095 1096 if (smp_started) { 1097 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1098#ifdef COUNT_XINVLTLB_HITS 1099 ipi_masked_global++; 1100#endif 1101 } 1102} 1103 1104void 1105smp_masked_invlpg(u_int mask, vm_offset_t addr) 1106{ 1107 1108 if (smp_started) { 1109 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1110#ifdef COUNT_XINVLTLB_HITS 1111 ipi_masked_page++; 1112#endif 1113 } 1114} 1115 1116void 1117smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 1118{ 1119 1120 if (smp_started) { 1121 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1122#ifdef COUNT_XINVLTLB_HITS 1123 ipi_masked_range++; 1124 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; 1125#endif 1126 } 1127} 1128 1129void 1130ipi_bitmap_handler(struct trapframe frame) 1131{ 1132 int cpu = PCPU_GET(cpuid); 1133 u_int ipi_bitmap; 1134 1135 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 1136 1137 if (ipi_bitmap & (1 << IPI_PREEMPT)) { 1138#ifdef COUNT_IPIS 1139 (*ipi_preempt_counts[cpu])++; 1140#endif 1141 sched_preempt(curthread); 1142 } 1143 1144 if (ipi_bitmap & (1 << IPI_AST)) { 1145#ifdef COUNT_IPIS 1146 (*ipi_ast_counts[cpu])++; 1147#endif 1148 /* Nothing to do for AST */ 1149 } 1150} 1151 1152/* 1153 * send an IPI to a set of cpus. 1154 */ 1155void 1156ipi_selected(u_int32_t cpus, u_int ipi) 1157{ 1158 int cpu; 1159 u_int bitmap = 0; 1160 u_int old_pending; 1161 u_int new_pending; 1162 1163 if (IPI_IS_BITMAPED(ipi)) { 1164 bitmap = 1 << ipi; 1165 ipi = IPI_BITMAP_VECTOR; 1166 } 1167 1168#ifdef STOP_NMI 1169 if (ipi == IPI_STOP && stop_cpus_with_nmi) { 1170 ipi_nmi_selected(cpus); 1171 return; 1172 } 1173#endif 1174 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 1175 while ((cpu = ffs(cpus)) != 0) { 1176 cpu--; 1177 cpus &= ~(1 << cpu); 1178 1179 KASSERT(cpu_apic_ids[cpu] != -1, 1180 ("IPI to non-existent CPU %d", cpu)); 1181 1182 if (bitmap) { 1183 do { 1184 old_pending = cpu_ipi_pending[cpu]; 1185 new_pending = old_pending | bitmap; 1186 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 1187 1188 if (old_pending) 1189 continue; 1190 } 1191 1192 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); 1193 } 1194 1195} 1196 1197/* 1198 * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 1199 */ 1200void 1201ipi_all(u_int ipi) 1202{ 1203 1204 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1205 ipi_selected(all_cpus, ipi); 1206 return; 1207 } 1208 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1209 lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL); 1210} 1211 1212/* 1213 * send an IPI to all CPUs EXCEPT myself 1214 */ 1215void 1216ipi_all_but_self(u_int ipi) 1217{ 1218 1219 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1220 ipi_selected(PCPU_GET(other_cpus), ipi); 1221 return; 1222 } 1223 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1224 lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); 1225} 1226 1227/* 1228 * send an IPI to myself 1229 */ 1230void 1231ipi_self(u_int ipi) 1232{ 1233 1234 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1235 ipi_selected(PCPU_GET(cpumask), ipi); 1236 return; 1237 } 1238 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1239 lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF); 1240} 1241 1242#ifdef STOP_NMI 1243/* 1244 * send NMI IPI to selected CPUs 1245 */ 1246 1247#define BEFORE_SPIN 1000000 1248 1249void 1250ipi_nmi_selected(u_int32_t cpus) 1251{ 1252 int cpu; 1253 register_t icrlo; 1254 1255 icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1256 | APIC_TRIGMOD_EDGE; 1257 1258 CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1259 1260 atomic_set_int(&ipi_nmi_pending, cpus); 1261 1262 while ((cpu = ffs(cpus)) != 0) { 1263 cpu--; 1264 cpus &= ~(1 << cpu); 1265 1266 KASSERT(cpu_apic_ids[cpu] != -1, 1267 ("IPI NMI to non-existent CPU %d", cpu)); 1268 1269 /* Wait for an earlier IPI to finish. */ 1270 if (!lapic_ipi_wait(BEFORE_SPIN)) 1271 panic("ipi_nmi_selected: previous IPI has not cleared"); 1272 1273 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1274 } 1275} 1276 1277int 1278ipi_nmi_handler(void) 1279{ 1280 int cpumask = PCPU_GET(cpumask); 1281 1282 if (!(ipi_nmi_pending & cpumask)) 1283 return 1; 1284 1285 atomic_clear_int(&ipi_nmi_pending, cpumask); 1286 cpustop_handler(); 1287 return 0; 1288} 1289 1290#endif /* STOP_NMI */ 1291 1292/* 1293 * Handle an IPI_STOP by saving our current context and spinning until we 1294 * are resumed. 1295 */ 1296void 1297cpustop_handler(void) 1298{ 1299 int cpu = PCPU_GET(cpuid); 1300 int cpumask = PCPU_GET(cpumask); 1301 1302 savectx(&stoppcbs[cpu]); 1303 1304 /* Indicate that we are stopped */ 1305 atomic_set_int(&stopped_cpus, cpumask); 1306 1307 /* Wait for restart */ 1308 while (!(started_cpus & cpumask)) 1309 ia32_pause(); 1310 1311 atomic_clear_int(&started_cpus, cpumask); 1312 atomic_clear_int(&stopped_cpus, cpumask); 1313 1314 if (cpu == 0 && cpustop_restartfunc != NULL) { 1315 cpustop_restartfunc(); 1316 cpustop_restartfunc = NULL; 1317 } 1318} 1319 1320/* 1321 * This is called once the rest of the system is up and running and we're 1322 * ready to let the AP's out of the pen. 1323 */ 1324static void 1325release_aps(void *dummy __unused) 1326{ 1327 1328 if (mp_ncpus == 1) 1329 return; 1330 atomic_store_rel_int(&aps_ready, 1); 1331 while (smp_started == 0) 1332 ia32_pause(); 1333} 1334SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1335 1336static int 1337sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) 1338{ 1339 u_int mask; 1340 int error; 1341 1342 mask = hlt_cpus_mask; 1343 error = sysctl_handle_int(oidp, &mask, 0, req); 1344 if (error || !req->newptr) 1345 return (error); 1346 1347 if (logical_cpus_mask != 0 && 1348 (mask & logical_cpus_mask) == logical_cpus_mask) 1349 hlt_logical_cpus = 1; 1350 else 1351 hlt_logical_cpus = 0; 1352 1353 if (! hyperthreading_allowed) 1354 mask |= hyperthreading_cpus_mask; 1355 1356 if ((mask & all_cpus) == all_cpus) 1357 mask &= ~(1<<0); 1358 hlt_cpus_mask = mask; 1359 return (error); 1360} 1361SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 1362 0, 0, sysctl_hlt_cpus, "IU", 1363 "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); 1364 1365static int 1366sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) 1367{ 1368 int disable, error; 1369 1370 disable = hlt_logical_cpus; 1371 error = sysctl_handle_int(oidp, &disable, 0, req); 1372 if (error || !req->newptr) 1373 return (error); 1374 1375 if (disable) 1376 hlt_cpus_mask |= logical_cpus_mask; 1377 else 1378 hlt_cpus_mask &= ~logical_cpus_mask; 1379 1380 if (! hyperthreading_allowed) 1381 hlt_cpus_mask |= hyperthreading_cpus_mask; 1382 1383 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1384 hlt_cpus_mask &= ~(1<<0); 1385 1386 hlt_logical_cpus = disable; 1387 return (error); 1388} 1389 1390static int 1391sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) 1392{ 1393 int allowed, error; 1394 1395 allowed = hyperthreading_allowed; 1396 error = sysctl_handle_int(oidp, &allowed, 0, req); 1397 if (error || !req->newptr) 1398 return (error); 1399 1400 if (allowed) 1401 hlt_cpus_mask &= ~hyperthreading_cpus_mask; 1402 else 1403 hlt_cpus_mask |= hyperthreading_cpus_mask; 1404 1405 if (logical_cpus_mask != 0 && 1406 (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) 1407 hlt_logical_cpus = 1; 1408 else 1409 hlt_logical_cpus = 0; 1410 1411 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1412 hlt_cpus_mask &= ~(1<<0); 1413 1414 hyperthreading_allowed = allowed; 1415 return (error); 1416} 1417 1418static void 1419cpu_hlt_setup(void *dummy __unused) 1420{ 1421 1422 if (logical_cpus_mask != 0) { 1423 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", 1424 &hlt_logical_cpus); 1425 sysctl_ctx_init(&logical_cpu_clist); 1426 SYSCTL_ADD_PROC(&logical_cpu_clist, 1427 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1428 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, 1429 sysctl_hlt_logical_cpus, "IU", ""); 1430 SYSCTL_ADD_UINT(&logical_cpu_clist, 1431 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1432 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, 1433 &logical_cpus_mask, 0, ""); 1434 1435 if (hlt_logical_cpus) 1436 hlt_cpus_mask |= logical_cpus_mask; 1437 1438 /* 1439 * If necessary for security purposes, force 1440 * hyperthreading off, regardless of the value 1441 * of hlt_logical_cpus. 1442 */ 1443 if (hyperthreading_cpus_mask) { 1444 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", 1445 &hyperthreading_allowed); 1446 SYSCTL_ADD_PROC(&logical_cpu_clist, 1447 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1448 "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 1449 0, 0, sysctl_hyperthreading_allowed, "IU", ""); 1450 if (! hyperthreading_allowed) 1451 hlt_cpus_mask |= hyperthreading_cpus_mask; 1452 } 1453 } 1454} 1455SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); 1456 1457int 1458mp_grab_cpu_hlt(void) 1459{ 1460 u_int mask = PCPU_GET(cpumask); 1461#ifdef MP_WATCHDOG 1462 u_int cpuid = PCPU_GET(cpuid); 1463#endif 1464 int retval; 1465 1466#ifdef MP_WATCHDOG 1467 ap_watchdog(cpuid); 1468#endif 1469 1470 retval = mask & hlt_cpus_mask; 1471 while (mask & hlt_cpus_mask) 1472 __asm __volatile("sti; hlt" : : : "memory"); 1473 return (retval); 1474} 1475 1476#ifdef COUNT_IPIS 1477/* 1478 * Setup interrupt counters for IPI handlers. 1479 */ 1480static void 1481mp_ipi_intrcnt(void *dummy) 1482{ 1483 char buf[64]; 1484 int i; 1485 1486 for (i = 0; i < mp_maxid; i++) { 1487 if (CPU_ABSENT(i)) 1488 continue; 1489 snprintf(buf, sizeof(buf), "cpu%d: invltlb", i); 1490 intrcnt_add(buf, &ipi_invltlb_counts[i]); 1491 snprintf(buf, sizeof(buf), "cpu%d: invlrng", i); 1492 intrcnt_add(buf, &ipi_invlrng_counts[i]); 1493 snprintf(buf, sizeof(buf), "cpu%d: invlpg", i); 1494 intrcnt_add(buf, &ipi_invlpg_counts[i]); 1495 snprintf(buf, sizeof(buf), "cpu%d: preempt", i); 1496 intrcnt_add(buf, &ipi_preempt_counts[i]); 1497 snprintf(buf, sizeof(buf), "cpu%d: ast", i); 1498 intrcnt_add(buf, &ipi_ast_counts[i]); 1499 snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i); 1500 intrcnt_add(buf, &ipi_rendezvous_counts[i]); 1501 snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i); 1502 intrcnt_add(buf, &ipi_lazypmap_counts[i]); 1503 } 1504} 1505SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); 1506#endif 1507