mp_machdep.c revision 187880
1/*- 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 187880 2009-01-29 09:22:56Z jeff $"); 28 29#include "opt_apic.h" 30#include "opt_cpu.h" 31#include "opt_kstack_pages.h" 32#include "opt_mp_watchdog.h" 33#include "opt_sched.h" 34#include "opt_smp.h" 35 36#if !defined(lint) 37#if !defined(SMP) 38#error How did you get here? 39#endif 40 41#ifndef DEV_APIC 42#error The apic device is required for SMP, add "device apic" to your config file. 43#endif 44#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 45#error SMP not supported with CPU_DISABLE_CMPXCHG 46#endif 47#endif /* not lint */ 48 49#include <sys/param.h> 50#include <sys/systm.h> 51#include <sys/bus.h> 52#include <sys/cons.h> /* cngetc() */ 53#ifdef GPROF 54#include <sys/gmon.h> 55#endif 56#include <sys/kernel.h> 57#include <sys/ktr.h> 58#include <sys/lock.h> 59#include <sys/malloc.h> 60#include <sys/memrange.h> 61#include <sys/mutex.h> 62#include <sys/pcpu.h> 63#include <sys/proc.h> 64#include <sys/sched.h> 65#include <sys/smp.h> 66#include <sys/sysctl.h> 67 68#include <vm/vm.h> 69#include <vm/vm_param.h> 70#include <vm/pmap.h> 71#include <vm/vm_kern.h> 72#include <vm/vm_extern.h> 73 74#include <machine/apicreg.h> 75#include <machine/cputypes.h> 76#include <machine/md_var.h> 77#include <machine/mp_watchdog.h> 78#include <machine/pcb.h> 79#include <machine/psl.h> 80#include <machine/smp.h> 81#include <machine/specialreg.h> 82 83#define WARMBOOT_TARGET 0 84#define WARMBOOT_OFF (KERNBASE + 0x0467) 85#define WARMBOOT_SEG (KERNBASE + 0x0469) 86 87#define CMOS_REG (0x70) 88#define CMOS_DATA (0x71) 89#define BIOS_RESET (0x0f) 90#define BIOS_WARM (0x0a) 91 92/* 93 * this code MUST be enabled here and in mpboot.s. 94 * it follows the very early stages of AP boot by placing values in CMOS ram. 95 * it NORMALLY will never be needed and thus the primitive method for enabling. 96 * 97#define CHECK_POINTS 98 */ 99 100#if defined(CHECK_POINTS) && !defined(PC98) 101#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 102#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 103 104#define CHECK_INIT(D); \ 105 CHECK_WRITE(0x34, (D)); \ 106 CHECK_WRITE(0x35, (D)); \ 107 CHECK_WRITE(0x36, (D)); \ 108 CHECK_WRITE(0x37, (D)); \ 109 CHECK_WRITE(0x38, (D)); \ 110 CHECK_WRITE(0x39, (D)); 111 112#define CHECK_PRINT(S); \ 113 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 114 (S), \ 115 CHECK_READ(0x34), \ 116 CHECK_READ(0x35), \ 117 CHECK_READ(0x36), \ 118 CHECK_READ(0x37), \ 119 CHECK_READ(0x38), \ 120 CHECK_READ(0x39)); 121 122#else /* CHECK_POINTS */ 123 124#define CHECK_INIT(D) 125#define CHECK_PRINT(S) 126#define CHECK_WRITE(A, D) 127 128#endif /* CHECK_POINTS */ 129 130/* lock region used by kernel profiling */ 131int mcount_lock; 132 133int mp_naps; /* # of Applications processors */ 134int boot_cpu_id = -1; /* designated BSP */ 135 136extern struct pcpu __pcpu[]; 137 138/* AP uses this during bootstrap. Do not staticize. */ 139char *bootSTK; 140static int bootAP; 141 142/* Free these after use */ 143void *bootstacks[MAXCPU]; 144 145/* Hotwire a 0->4MB V==P mapping */ 146extern pt_entry_t *KPTphys; 147 148struct pcb stoppcbs[MAXCPU]; 149 150/* Variables needed for SMP tlb shootdown. */ 151vm_offset_t smp_tlb_addr1; 152vm_offset_t smp_tlb_addr2; 153volatile int smp_tlb_wait; 154 155#ifdef STOP_NMI 156volatile cpumask_t ipi_nmi_pending; 157 158static void ipi_nmi_selected(u_int32_t cpus); 159#endif 160 161#ifdef COUNT_IPIS 162/* Interrupt counts. */ 163static u_long *ipi_preempt_counts[MAXCPU]; 164static u_long *ipi_ast_counts[MAXCPU]; 165u_long *ipi_invltlb_counts[MAXCPU]; 166u_long *ipi_invlrng_counts[MAXCPU]; 167u_long *ipi_invlpg_counts[MAXCPU]; 168u_long *ipi_invlcache_counts[MAXCPU]; 169u_long *ipi_rendezvous_counts[MAXCPU]; 170u_long *ipi_lazypmap_counts[MAXCPU]; 171#endif 172 173/* 174 * Local data and functions. 175 */ 176 177#ifdef STOP_NMI 178/* 179 * Provide an alternate method of stopping other CPUs. If another CPU has 180 * disabled interrupts the conventional STOP IPI will be blocked. This 181 * NMI-based stop should get through in that case. 182 */ 183static int stop_cpus_with_nmi = 1; 184SYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW, 185 &stop_cpus_with_nmi, 0, ""); 186TUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi); 187#else 188#define stop_cpus_with_nmi 0 189#endif 190 191static u_int logical_cpus; 192 193/* used to hold the AP's until we are ready to release them */ 194static struct mtx ap_boot_mtx; 195 196/* Set to 1 once we're ready to let the APs out of the pen. */ 197static volatile int aps_ready = 0; 198 199/* 200 * Store data from cpu_add() until later in the boot when we actually setup 201 * the APs. 202 */ 203struct cpu_info { 204 int cpu_present:1; 205 int cpu_bsp:1; 206 int cpu_disabled:1; 207} static cpu_info[MAX_APIC_ID + 1]; 208int cpu_apic_ids[MAXCPU]; 209int apic_cpuids[MAX_APIC_ID + 1]; 210 211/* Holds pending bitmap based IPIs per CPU */ 212static volatile u_int cpu_ipi_pending[MAXCPU]; 213 214static u_int boot_address; 215 216static void assign_cpu_ids(void); 217static void install_ap_tramp(void); 218static void set_interrupt_apic_ids(void); 219static int start_all_aps(void); 220static int start_ap(int apic_id); 221static void release_aps(void *dummy); 222 223static int hlt_logical_cpus; 224static u_int hyperthreading_cpus; 225static cpumask_t hyperthreading_cpus_mask; 226static int hyperthreading_allowed = 1; 227static struct sysctl_ctx_list logical_cpu_clist; 228 229static void 230mem_range_AP_init(void) 231{ 232 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 233 mem_range_softc.mr_op->initAP(&mem_range_softc); 234} 235 236struct cpu_group * 237cpu_topo(void) 238{ 239 if (cpu_cores == 0) 240 cpu_cores = 1; 241 if (cpu_logical == 0) 242 cpu_logical = 1; 243 if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 244 printf("WARNING: Non-uniform processors.\n"); 245 printf("WARNING: Using suboptimal topology.\n"); 246 return (smp_topo_none()); 247 } 248 /* 249 * No multi-core or hyper-threaded. 250 */ 251 if (cpu_logical * cpu_cores == 1) 252 return (smp_topo_none()); 253 /* 254 * Only HTT no multi-core. 255 */ 256 if (cpu_logical > 1 && cpu_cores == 1) 257 return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 258 /* 259 * Only multi-core no HTT. 260 */ 261 if (cpu_cores > 1 && cpu_logical == 1) 262 return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 263 /* 264 * Both HTT and multi-core. 265 */ 266 return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 267 CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 268} 269 270 271/* 272 * Calculate usable address in base memory for AP trampoline code. 273 */ 274u_int 275mp_bootaddress(u_int basemem) 276{ 277 278 boot_address = trunc_page(basemem); /* round down to 4k boundary */ 279 if ((basemem - boot_address) < bootMP_size) 280 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 281 282 return boot_address; 283} 284 285void 286cpu_add(u_int apic_id, char boot_cpu) 287{ 288 289 if (apic_id > MAX_APIC_ID) { 290 panic("SMP: APIC ID %d too high", apic_id); 291 return; 292 } 293 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 294 apic_id)); 295 cpu_info[apic_id].cpu_present = 1; 296 if (boot_cpu) { 297 KASSERT(boot_cpu_id == -1, 298 ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 299 boot_cpu_id)); 300 boot_cpu_id = apic_id; 301 cpu_info[apic_id].cpu_bsp = 1; 302 } 303 if (mp_ncpus < MAXCPU) 304 mp_ncpus++; 305 if (bootverbose) 306 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 307 "AP"); 308} 309 310void 311cpu_mp_setmaxid(void) 312{ 313 314 mp_maxid = MAXCPU - 1; 315} 316 317int 318cpu_mp_probe(void) 319{ 320 321 /* 322 * Always record BSP in CPU map so that the mbuf init code works 323 * correctly. 324 */ 325 all_cpus = 1; 326 if (mp_ncpus == 0) { 327 /* 328 * No CPUs were found, so this must be a UP system. Setup 329 * the variables to represent a system with a single CPU 330 * with an id of 0. 331 */ 332 mp_ncpus = 1; 333 return (0); 334 } 335 336 /* At least one CPU was found. */ 337 if (mp_ncpus == 1) { 338 /* 339 * One CPU was found, so this must be a UP system with 340 * an I/O APIC. 341 */ 342 return (0); 343 } 344 345 /* At least two CPUs were found. */ 346 return (1); 347} 348 349/* 350 * Initialize the IPI handlers and start up the AP's. 351 */ 352void 353cpu_mp_start(void) 354{ 355 int i; 356 u_int threads_per_cache, p[4]; 357 358 /* Initialize the logical ID to APIC ID table. */ 359 for (i = 0; i < MAXCPU; i++) { 360 cpu_apic_ids[i] = -1; 361 cpu_ipi_pending[i] = 0; 362 } 363 364 /* Install an inter-CPU IPI for TLB invalidation */ 365 setidt(IPI_INVLTLB, IDTVEC(invltlb), 366 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 367 setidt(IPI_INVLPG, IDTVEC(invlpg), 368 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 369 setidt(IPI_INVLRNG, IDTVEC(invlrng), 370 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 371 372 /* Install an inter-CPU IPI for cache invalidation. */ 373 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 374 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 375 376 /* Install an inter-CPU IPI for lazy pmap release */ 377 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), 378 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 379 380 /* Install an inter-CPU IPI for all-CPU rendezvous */ 381 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 382 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 383 384 /* Install generic inter-CPU IPI handler */ 385 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 386 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 387 388 /* Install an inter-CPU IPI for CPU stop/restart */ 389 setidt(IPI_STOP, IDTVEC(cpustop), 390 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 391 392 393 /* Set boot_cpu_id if needed. */ 394 if (boot_cpu_id == -1) { 395 boot_cpu_id = PCPU_GET(apic_id); 396 cpu_info[boot_cpu_id].cpu_bsp = 1; 397 } else 398 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 399 ("BSP's APIC ID doesn't match boot_cpu_id")); 400 cpu_apic_ids[0] = boot_cpu_id; 401 apic_cpuids[boot_cpu_id] = 0; 402 403 assign_cpu_ids(); 404 405 /* Start each Application Processor */ 406 start_all_aps(); 407 408 /* Setup the initial logical CPUs info. */ 409 logical_cpus = logical_cpus_mask = 0; 410 if (cpu_feature & CPUID_HTT) 411 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 412 413 /* 414 * Work out if hyperthreading is *really* enabled. This 415 * is made really ugly by the fact that processors lie: Dual 416 * core processors claim to be hyperthreaded even when they're 417 * not, presumably because they want to be treated the same 418 * way as HTT with respect to per-cpu software licensing. 419 * At the time of writing (May 12, 2005) the only hyperthreaded 420 * cpus are from Intel, and Intel's dual-core processors can be 421 * identified via the "deterministic cache parameters" cpuid 422 * calls. 423 */ 424 /* 425 * First determine if this is an Intel processor which claims 426 * to have hyperthreading support. 427 */ 428 if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_INTEL) { 429 /* 430 * If the "deterministic cache parameters" cpuid calls 431 * are available, use them. 432 */ 433 if (cpu_high >= 4) { 434 /* Ask the processor about the L1 cache. */ 435 for (i = 0; i < 1; i++) { 436 cpuid_count(4, i, p); 437 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; 438 if (hyperthreading_cpus < threads_per_cache) 439 hyperthreading_cpus = threads_per_cache; 440 if ((p[0] & 0x1f) == 0) 441 break; 442 } 443 } 444 445 /* 446 * If the deterministic cache parameters are not 447 * available, or if no caches were reported to exist, 448 * just accept what the HTT flag indicated. 449 */ 450 if (hyperthreading_cpus == 0) 451 hyperthreading_cpus = logical_cpus; 452 } 453 454 set_interrupt_apic_ids(); 455} 456 457 458/* 459 * Print various information about the SMP system hardware and setup. 460 */ 461void 462cpu_mp_announce(void) 463{ 464 int i, x; 465 466 /* List CPUs */ 467 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 468 for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 469 if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 470 continue; 471 if (cpu_info[x].cpu_disabled) 472 printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 473 else { 474 KASSERT(i < mp_ncpus, 475 ("mp_ncpus and actual cpus are out of whack")); 476 printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 477 } 478 } 479} 480 481/* 482 * AP CPU's call this to initialize themselves. 483 */ 484void 485init_secondary(void) 486{ 487 struct pcpu *pc; 488 vm_offset_t addr; 489 int gsel_tss; 490 int x, myid; 491 u_int cr0; 492 493 /* bootAP is set in start_ap() to our ID. */ 494 myid = bootAP; 495 496 /* Get per-cpu data */ 497 pc = &__pcpu[myid]; 498 499 /* prime data page for it to use */ 500 pcpu_init(pc, myid, sizeof(struct pcpu)); 501 pc->pc_apic_id = cpu_apic_ids[myid]; 502 pc->pc_prvspace = pc; 503 pc->pc_curthread = 0; 504 505 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 506 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 507 508 for (x = 0; x < NGDT; x++) { 509 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 510 } 511 512 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 513 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 514 lgdt(&r_gdt); /* does magic intra-segment return */ 515 516 lidt(&r_idt); 517 518 lldt(_default_ldt); 519 PCPU_SET(currentldt, _default_ldt); 520 521 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 522 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 523 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 524 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 525 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 526 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 527 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 528 ltr(gsel_tss); 529 530 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 531 532 /* 533 * Set to a known state: 534 * Set by mpboot.s: CR0_PG, CR0_PE 535 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 536 */ 537 cr0 = rcr0(); 538 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 539 load_cr0(cr0); 540 CHECK_WRITE(0x38, 5); 541 542 /* Disable local APIC just to be sure. */ 543 lapic_disable(); 544 545 /* signal our startup to the BSP. */ 546 mp_naps++; 547 CHECK_WRITE(0x39, 6); 548 549 /* Spin until the BSP releases the AP's. */ 550 while (!aps_ready) 551 ia32_pause(); 552 553 /* BSP may have changed PTD while we were waiting */ 554 invltlb(); 555 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 556 invlpg(addr); 557 558#if defined(I586_CPU) && !defined(NO_F00F_HACK) 559 lidt(&r_idt); 560#endif 561 562 /* Initialize the PAT MSR if present. */ 563 pmap_init_pat(); 564 565 /* set up CPU registers and state */ 566 cpu_setregs(); 567 568 /* set up FPU state on the AP */ 569 npxinit(__INITIAL_NPXCW__); 570 571 /* set up SSE registers */ 572 enable_sse(); 573 574#ifdef PAE 575 /* Enable the PTE no-execute bit. */ 576 if ((amd_feature & AMDID_NX) != 0) { 577 uint64_t msr; 578 579 msr = rdmsr(MSR_EFER) | EFER_NXE; 580 wrmsr(MSR_EFER, msr); 581 } 582#endif 583 584 /* A quick check from sanity claus */ 585 if (PCPU_GET(apic_id) != lapic_id()) { 586 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 587 printf("SMP: actual apic_id = %d\n", lapic_id()); 588 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 589 panic("cpuid mismatch! boom!!"); 590 } 591 592 /* Initialize curthread. */ 593 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 594 PCPU_SET(curthread, PCPU_GET(idlethread)); 595 596 mtx_lock_spin(&ap_boot_mtx); 597 598 /* Init local apic for irq's */ 599 lapic_setup(1); 600 601 /* Set memory range attributes for this CPU to match the BSP */ 602 mem_range_AP_init(); 603 604 smp_cpus++; 605 606 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 607 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 608 609 /* Determine if we are a logical CPU. */ 610 if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 611 logical_cpus_mask |= PCPU_GET(cpumask); 612 613 /* Determine if we are a hyperthread. */ 614 if (hyperthreading_cpus > 1 && 615 PCPU_GET(apic_id) % hyperthreading_cpus != 0) 616 hyperthreading_cpus_mask |= PCPU_GET(cpumask); 617 618 /* Build our map of 'other' CPUs. */ 619 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 620 621 if (bootverbose) 622 lapic_dump("AP"); 623 624 if (smp_cpus == mp_ncpus) { 625 /* enable IPI's, tlb shootdown, freezes etc */ 626 atomic_store_rel_int(&smp_started, 1); 627 smp_active = 1; /* historic */ 628 } 629 630 mtx_unlock_spin(&ap_boot_mtx); 631 632 /* wait until all the AP's are up */ 633 while (smp_started == 0) 634 ia32_pause(); 635 636 /* enter the scheduler */ 637 sched_throw(NULL); 638 639 panic("scheduler returned us to %s", __func__); 640 /* NOTREACHED */ 641} 642 643/******************************************************************* 644 * local functions and data 645 */ 646 647/* 648 * We tell the I/O APIC code about all the CPUs we want to receive 649 * interrupts. If we don't want certain CPUs to receive IRQs we 650 * can simply not tell the I/O APIC code about them in this function. 651 * We also do not tell it about the BSP since it tells itself about 652 * the BSP internally to work with UP kernels and on UP machines. 653 */ 654static void 655set_interrupt_apic_ids(void) 656{ 657 u_int i, apic_id; 658 659 for (i = 0; i < MAXCPU; i++) { 660 apic_id = cpu_apic_ids[i]; 661 if (apic_id == -1) 662 continue; 663 if (cpu_info[apic_id].cpu_bsp) 664 continue; 665 if (cpu_info[apic_id].cpu_disabled) 666 continue; 667 668 /* Don't let hyperthreads service interrupts. */ 669 if (hyperthreading_cpus > 1 && 670 apic_id % hyperthreading_cpus != 0) 671 continue; 672 673 intr_add_cpu(i); 674 } 675} 676 677/* 678 * Assign logical CPU IDs to local APICs. 679 */ 680static void 681assign_cpu_ids(void) 682{ 683 u_int i; 684 685 /* Check for explicitly disabled CPUs. */ 686 for (i = 0; i <= MAX_APIC_ID; i++) { 687 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 688 continue; 689 690 /* Don't use this CPU if it has been disabled by a tunable. */ 691 if (resource_disabled("lapic", i)) { 692 cpu_info[i].cpu_disabled = 1; 693 continue; 694 } 695 } 696 697 /* 698 * Assign CPU IDs to local APIC IDs and disable any CPUs 699 * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 700 * so we only have to assign IDs for APs. 701 */ 702 mp_ncpus = 1; 703 for (i = 0; i <= MAX_APIC_ID; i++) { 704 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 705 cpu_info[i].cpu_disabled) 706 continue; 707 708 if (mp_ncpus < MAXCPU) { 709 cpu_apic_ids[mp_ncpus] = i; 710 apic_cpuids[i] = mp_ncpus; 711 mp_ncpus++; 712 } else 713 cpu_info[i].cpu_disabled = 1; 714 } 715 KASSERT(mp_maxid >= mp_ncpus - 1, 716 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 717 mp_ncpus)); 718} 719 720/* 721 * start each AP in our list 722 */ 723/* Lowest 1MB is already mapped: don't touch*/ 724#define TMPMAP_START 1 725static int 726start_all_aps(void) 727{ 728#ifndef PC98 729 u_char mpbiosreason; 730#endif 731 uintptr_t kptbase; 732 u_int32_t mpbioswarmvec; 733 int apic_id, cpu, i; 734 735 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 736 737 /* install the AP 1st level boot code */ 738 install_ap_tramp(); 739 740 /* save the current value of the warm-start vector */ 741 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 742#ifndef PC98 743 outb(CMOS_REG, BIOS_RESET); 744 mpbiosreason = inb(CMOS_DATA); 745#endif 746 747 /* set up temporary P==V mapping for AP boot */ 748 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 749 750 kptbase = (uintptr_t)(void *)KPTphys; 751 for (i = TMPMAP_START; i < NKPT; i++) 752 PTD[i] = (pd_entry_t)(PG_V | PG_RW | 753 ((kptbase + i * PAGE_SIZE) & PG_FRAME)); 754 invltlb(); 755 756 /* start each AP */ 757 for (cpu = 1; cpu < mp_ncpus; cpu++) { 758 apic_id = cpu_apic_ids[cpu]; 759 760 /* allocate and set up a boot stack data page */ 761 bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 762 763 /* setup a vector to our boot code */ 764 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 765 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 766#ifndef PC98 767 outb(CMOS_REG, BIOS_RESET); 768 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 769#endif 770 771 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4; 772 bootAP = cpu; 773 774 /* attempt to start the Application Processor */ 775 CHECK_INIT(99); /* setup checkpoints */ 776 if (!start_ap(apic_id)) { 777 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 778 CHECK_PRINT("trace"); /* show checkpoints */ 779 /* better panic as the AP may be running loose */ 780 printf("panic y/n? [y] "); 781 if (cngetc() != 'n') 782 panic("bye-bye"); 783 } 784 CHECK_PRINT("trace"); /* show checkpoints */ 785 786 all_cpus |= (1 << cpu); /* record AP in CPU map */ 787 } 788 789 /* build our map of 'other' CPUs */ 790 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 791 792 /* restore the warmstart vector */ 793 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 794 795#ifndef PC98 796 outb(CMOS_REG, BIOS_RESET); 797 outb(CMOS_DATA, mpbiosreason); 798#endif 799 800 /* Undo V==P hack from above */ 801 for (i = TMPMAP_START; i < NKPT; i++) 802 PTD[i] = 0; 803 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 804 805 /* number of APs actually started */ 806 return mp_naps; 807} 808 809/* 810 * load the 1st level AP boot code into base memory. 811 */ 812 813/* targets for relocation */ 814extern void bigJump(void); 815extern void bootCodeSeg(void); 816extern void bootDataSeg(void); 817extern void MPentry(void); 818extern u_int MP_GDT; 819extern u_int mp_gdtbase; 820 821static void 822install_ap_tramp(void) 823{ 824 int x; 825 int size = *(int *) ((u_long) & bootMP_size); 826 vm_offset_t va = boot_address + KERNBASE; 827 u_char *src = (u_char *) ((u_long) bootMP); 828 u_char *dst = (u_char *) va; 829 u_int boot_base = (u_int) bootMP; 830 u_int8_t *dst8; 831 u_int16_t *dst16; 832 u_int32_t *dst32; 833 834 KASSERT (size <= PAGE_SIZE, 835 ("'size' do not fit into PAGE_SIZE, as expected.")); 836 pmap_kenter(va, boot_address); 837 pmap_invalidate_page (kernel_pmap, va); 838 for (x = 0; x < size; ++x) 839 *dst++ = *src++; 840 841 /* 842 * modify addresses in code we just moved to basemem. unfortunately we 843 * need fairly detailed info about mpboot.s for this to work. changes 844 * to mpboot.s might require changes here. 845 */ 846 847 /* boot code is located in KERNEL space */ 848 dst = (u_char *) va; 849 850 /* modify the lgdt arg */ 851 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 852 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 853 854 /* modify the ljmp target for MPentry() */ 855 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 856 *dst32 = ((u_int) MPentry - KERNBASE); 857 858 /* modify the target for boot code segment */ 859 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 860 dst8 = (u_int8_t *) (dst16 + 1); 861 *dst16 = (u_int) boot_address & 0xffff; 862 *dst8 = ((u_int) boot_address >> 16) & 0xff; 863 864 /* modify the target for boot data segment */ 865 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 866 dst8 = (u_int8_t *) (dst16 + 1); 867 *dst16 = (u_int) boot_address & 0xffff; 868 *dst8 = ((u_int) boot_address >> 16) & 0xff; 869} 870 871/* 872 * This function starts the AP (application processor) identified 873 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 874 * to accomplish this. This is necessary because of the nuances 875 * of the different hardware we might encounter. It isn't pretty, 876 * but it seems to work. 877 */ 878static int 879start_ap(int apic_id) 880{ 881 int vector, ms; 882 int cpus; 883 884 /* calculate the vector */ 885 vector = (boot_address >> 12) & 0xff; 886 887 /* used as a watchpoint to signal AP startup */ 888 cpus = mp_naps; 889 890 /* 891 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 892 * and running the target CPU. OR this INIT IPI might be latched (P5 893 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 894 * ignored. 895 */ 896 897 /* do an INIT IPI: assert RESET */ 898 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 899 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); 900 901 /* wait for pending status end */ 902 lapic_ipi_wait(-1); 903 904 /* do an INIT IPI: deassert RESET */ 905 lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | 906 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); 907 908 /* wait for pending status end */ 909 DELAY(10000); /* wait ~10mS */ 910 lapic_ipi_wait(-1); 911 912 /* 913 * next we do a STARTUP IPI: the previous INIT IPI might still be 914 * latched, (P5 bug) this 1st STARTUP would then terminate 915 * immediately, and the previously started INIT IPI would continue. OR 916 * the previous INIT IPI has already run. and this STARTUP IPI will 917 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 918 * will run. 919 */ 920 921 /* do a STARTUP IPI */ 922 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 923 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 924 vector, apic_id); 925 lapic_ipi_wait(-1); 926 DELAY(200); /* wait ~200uS */ 927 928 /* 929 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 930 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 931 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 932 * recognized after hardware RESET or INIT IPI. 933 */ 934 935 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 936 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 937 vector, apic_id); 938 lapic_ipi_wait(-1); 939 DELAY(200); /* wait ~200uS */ 940 941 /* Wait up to 5 seconds for it to start. */ 942 for (ms = 0; ms < 5000; ms++) { 943 if (mp_naps > cpus) 944 return 1; /* return SUCCESS */ 945 DELAY(1000); 946 } 947 return 0; /* return FAILURE */ 948} 949 950#ifdef COUNT_XINVLTLB_HITS 951u_int xhits_gbl[MAXCPU]; 952u_int xhits_pg[MAXCPU]; 953u_int xhits_rng[MAXCPU]; 954SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); 955SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, 956 sizeof(xhits_gbl), "IU", ""); 957SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, 958 sizeof(xhits_pg), "IU", ""); 959SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, 960 sizeof(xhits_rng), "IU", ""); 961 962u_int ipi_global; 963u_int ipi_page; 964u_int ipi_range; 965u_int ipi_range_size; 966SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); 967SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); 968SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); 969SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 970 0, ""); 971 972u_int ipi_masked_global; 973u_int ipi_masked_page; 974u_int ipi_masked_range; 975u_int ipi_masked_range_size; 976SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, 977 &ipi_masked_global, 0, ""); 978SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, 979 &ipi_masked_page, 0, ""); 980SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, 981 &ipi_masked_range, 0, ""); 982SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, 983 &ipi_masked_range_size, 0, ""); 984#endif /* COUNT_XINVLTLB_HITS */ 985 986/* 987 * Flush the TLB on all other CPU's 988 */ 989static void 990smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 991{ 992 u_int ncpu; 993 994 ncpu = mp_ncpus - 1; /* does not shootdown self */ 995 if (ncpu < 1) 996 return; /* no other cpus */ 997 if (!(read_eflags() & PSL_I)) 998 panic("%s: interrupts disabled", __func__); 999 mtx_lock_spin(&smp_ipi_mtx); 1000 smp_tlb_addr1 = addr1; 1001 smp_tlb_addr2 = addr2; 1002 atomic_store_rel_int(&smp_tlb_wait, 0); 1003 ipi_all_but_self(vector); 1004 while (smp_tlb_wait < ncpu) 1005 ia32_pause(); 1006 mtx_unlock_spin(&smp_ipi_mtx); 1007} 1008 1009static void 1010smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1011{ 1012 int ncpu, othercpus; 1013 1014 othercpus = mp_ncpus - 1; 1015 if (mask == (u_int)-1) { 1016 ncpu = othercpus; 1017 if (ncpu < 1) 1018 return; 1019 } else { 1020 mask &= ~PCPU_GET(cpumask); 1021 if (mask == 0) 1022 return; 1023 ncpu = bitcount32(mask); 1024 if (ncpu > othercpus) { 1025 /* XXX this should be a panic offence */ 1026 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 1027 ncpu, othercpus); 1028 ncpu = othercpus; 1029 } 1030 /* XXX should be a panic, implied by mask == 0 above */ 1031 if (ncpu < 1) 1032 return; 1033 } 1034 if (!(read_eflags() & PSL_I)) 1035 panic("%s: interrupts disabled", __func__); 1036 mtx_lock_spin(&smp_ipi_mtx); 1037 smp_tlb_addr1 = addr1; 1038 smp_tlb_addr2 = addr2; 1039 atomic_store_rel_int(&smp_tlb_wait, 0); 1040 if (mask == (u_int)-1) 1041 ipi_all_but_self(vector); 1042 else 1043 ipi_selected(mask, vector); 1044 while (smp_tlb_wait < ncpu) 1045 ia32_pause(); 1046 mtx_unlock_spin(&smp_ipi_mtx); 1047} 1048 1049void 1050smp_cache_flush(void) 1051{ 1052 1053 if (smp_started) 1054 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1055} 1056 1057void 1058smp_invltlb(void) 1059{ 1060 1061 if (smp_started) { 1062 smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1063#ifdef COUNT_XINVLTLB_HITS 1064 ipi_global++; 1065#endif 1066 } 1067} 1068 1069void 1070smp_invlpg(vm_offset_t addr) 1071{ 1072 1073 if (smp_started) { 1074 smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1075#ifdef COUNT_XINVLTLB_HITS 1076 ipi_page++; 1077#endif 1078 } 1079} 1080 1081void 1082smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1083{ 1084 1085 if (smp_started) { 1086 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1087#ifdef COUNT_XINVLTLB_HITS 1088 ipi_range++; 1089 ipi_range_size += (addr2 - addr1) / PAGE_SIZE; 1090#endif 1091 } 1092} 1093 1094void 1095smp_masked_invltlb(u_int mask) 1096{ 1097 1098 if (smp_started) { 1099 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1100#ifdef COUNT_XINVLTLB_HITS 1101 ipi_masked_global++; 1102#endif 1103 } 1104} 1105 1106void 1107smp_masked_invlpg(u_int mask, vm_offset_t addr) 1108{ 1109 1110 if (smp_started) { 1111 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1112#ifdef COUNT_XINVLTLB_HITS 1113 ipi_masked_page++; 1114#endif 1115 } 1116} 1117 1118void 1119smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 1120{ 1121 1122 if (smp_started) { 1123 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1124#ifdef COUNT_XINVLTLB_HITS 1125 ipi_masked_range++; 1126 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; 1127#endif 1128 } 1129} 1130 1131void 1132ipi_bitmap_handler(struct trapframe frame) 1133{ 1134 int cpu = PCPU_GET(cpuid); 1135 u_int ipi_bitmap; 1136 1137 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 1138 1139 if (ipi_bitmap & (1 << IPI_PREEMPT)) { 1140#ifdef COUNT_IPIS 1141 (*ipi_preempt_counts[cpu])++; 1142#endif 1143 sched_preempt(curthread); 1144 } 1145 1146 if (ipi_bitmap & (1 << IPI_AST)) { 1147#ifdef COUNT_IPIS 1148 (*ipi_ast_counts[cpu])++; 1149#endif 1150 /* Nothing to do for AST */ 1151 } 1152} 1153 1154/* 1155 * send an IPI to a set of cpus. 1156 */ 1157void 1158ipi_selected(u_int32_t cpus, u_int ipi) 1159{ 1160 int cpu; 1161 u_int bitmap = 0; 1162 u_int old_pending; 1163 u_int new_pending; 1164 1165 if (IPI_IS_BITMAPED(ipi)) { 1166 bitmap = 1 << ipi; 1167 ipi = IPI_BITMAP_VECTOR; 1168 } 1169 1170#ifdef STOP_NMI 1171 if (ipi == IPI_STOP && stop_cpus_with_nmi) { 1172 ipi_nmi_selected(cpus); 1173 return; 1174 } 1175#endif 1176 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 1177 while ((cpu = ffs(cpus)) != 0) { 1178 cpu--; 1179 cpus &= ~(1 << cpu); 1180 1181 KASSERT(cpu_apic_ids[cpu] != -1, 1182 ("IPI to non-existent CPU %d", cpu)); 1183 1184 if (bitmap) { 1185 do { 1186 old_pending = cpu_ipi_pending[cpu]; 1187 new_pending = old_pending | bitmap; 1188 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 1189 1190 if (old_pending) 1191 continue; 1192 } 1193 1194 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); 1195 } 1196 1197} 1198 1199/* 1200 * send an IPI to all CPUs EXCEPT myself 1201 */ 1202void 1203ipi_all_but_self(u_int ipi) 1204{ 1205 1206 if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1207 ipi_selected(PCPU_GET(other_cpus), ipi); 1208 return; 1209 } 1210 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1211 lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); 1212} 1213 1214#ifdef STOP_NMI 1215/* 1216 * send NMI IPI to selected CPUs 1217 */ 1218 1219#define BEFORE_SPIN 1000000 1220 1221void 1222ipi_nmi_selected(u_int32_t cpus) 1223{ 1224 int cpu; 1225 register_t icrlo; 1226 1227 icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1228 | APIC_TRIGMOD_EDGE; 1229 1230 CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1231 1232 atomic_set_int(&ipi_nmi_pending, cpus); 1233 1234 while ((cpu = ffs(cpus)) != 0) { 1235 cpu--; 1236 cpus &= ~(1 << cpu); 1237 1238 KASSERT(cpu_apic_ids[cpu] != -1, 1239 ("IPI NMI to non-existent CPU %d", cpu)); 1240 1241 /* Wait for an earlier IPI to finish. */ 1242 if (!lapic_ipi_wait(BEFORE_SPIN)) 1243 panic("ipi_nmi_selected: previous IPI has not cleared"); 1244 1245 lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1246 } 1247} 1248 1249int 1250ipi_nmi_handler(void) 1251{ 1252 int cpumask = PCPU_GET(cpumask); 1253 1254 if (!(ipi_nmi_pending & cpumask)) 1255 return 1; 1256 1257 atomic_clear_int(&ipi_nmi_pending, cpumask); 1258 cpustop_handler(); 1259 return 0; 1260} 1261 1262#endif /* STOP_NMI */ 1263 1264/* 1265 * Handle an IPI_STOP by saving our current context and spinning until we 1266 * are resumed. 1267 */ 1268void 1269cpustop_handler(void) 1270{ 1271 int cpu = PCPU_GET(cpuid); 1272 int cpumask = PCPU_GET(cpumask); 1273 1274 savectx(&stoppcbs[cpu]); 1275 1276 /* Indicate that we are stopped */ 1277 atomic_set_int(&stopped_cpus, cpumask); 1278 1279 /* Wait for restart */ 1280 while (!(started_cpus & cpumask)) 1281 ia32_pause(); 1282 1283 atomic_clear_int(&started_cpus, cpumask); 1284 atomic_clear_int(&stopped_cpus, cpumask); 1285 1286 if (cpu == 0 && cpustop_restartfunc != NULL) { 1287 cpustop_restartfunc(); 1288 cpustop_restartfunc = NULL; 1289 } 1290} 1291 1292/* 1293 * This is called once the rest of the system is up and running and we're 1294 * ready to let the AP's out of the pen. 1295 */ 1296static void 1297release_aps(void *dummy __unused) 1298{ 1299 1300 if (mp_ncpus == 1) 1301 return; 1302 atomic_store_rel_int(&aps_ready, 1); 1303 while (smp_started == 0) 1304 ia32_pause(); 1305} 1306SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1307 1308static int 1309sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) 1310{ 1311 u_int mask; 1312 int error; 1313 1314 mask = hlt_cpus_mask; 1315 error = sysctl_handle_int(oidp, &mask, 0, req); 1316 if (error || !req->newptr) 1317 return (error); 1318 1319 if (logical_cpus_mask != 0 && 1320 (mask & logical_cpus_mask) == logical_cpus_mask) 1321 hlt_logical_cpus = 1; 1322 else 1323 hlt_logical_cpus = 0; 1324 1325 if (! hyperthreading_allowed) 1326 mask |= hyperthreading_cpus_mask; 1327 1328 if ((mask & all_cpus) == all_cpus) 1329 mask &= ~(1<<0); 1330 hlt_cpus_mask = mask; 1331 return (error); 1332} 1333SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 1334 0, 0, sysctl_hlt_cpus, "IU", 1335 "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); 1336 1337static int 1338sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) 1339{ 1340 int disable, error; 1341 1342 disable = hlt_logical_cpus; 1343 error = sysctl_handle_int(oidp, &disable, 0, req); 1344 if (error || !req->newptr) 1345 return (error); 1346 1347 if (disable) 1348 hlt_cpus_mask |= logical_cpus_mask; 1349 else 1350 hlt_cpus_mask &= ~logical_cpus_mask; 1351 1352 if (! hyperthreading_allowed) 1353 hlt_cpus_mask |= hyperthreading_cpus_mask; 1354 1355 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1356 hlt_cpus_mask &= ~(1<<0); 1357 1358 hlt_logical_cpus = disable; 1359 return (error); 1360} 1361 1362static int 1363sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) 1364{ 1365 int allowed, error; 1366 1367 allowed = hyperthreading_allowed; 1368 error = sysctl_handle_int(oidp, &allowed, 0, req); 1369 if (error || !req->newptr) 1370 return (error); 1371 1372 if (allowed) 1373 hlt_cpus_mask &= ~hyperthreading_cpus_mask; 1374 else 1375 hlt_cpus_mask |= hyperthreading_cpus_mask; 1376 1377 if (logical_cpus_mask != 0 && 1378 (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) 1379 hlt_logical_cpus = 1; 1380 else 1381 hlt_logical_cpus = 0; 1382 1383 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1384 hlt_cpus_mask &= ~(1<<0); 1385 1386 hyperthreading_allowed = allowed; 1387 return (error); 1388} 1389 1390static void 1391cpu_hlt_setup(void *dummy __unused) 1392{ 1393 1394 if (logical_cpus_mask != 0) { 1395 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", 1396 &hlt_logical_cpus); 1397 sysctl_ctx_init(&logical_cpu_clist); 1398 SYSCTL_ADD_PROC(&logical_cpu_clist, 1399 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1400 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, 1401 sysctl_hlt_logical_cpus, "IU", ""); 1402 SYSCTL_ADD_UINT(&logical_cpu_clist, 1403 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1404 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, 1405 &logical_cpus_mask, 0, ""); 1406 1407 if (hlt_logical_cpus) 1408 hlt_cpus_mask |= logical_cpus_mask; 1409 1410 /* 1411 * If necessary for security purposes, force 1412 * hyperthreading off, regardless of the value 1413 * of hlt_logical_cpus. 1414 */ 1415 if (hyperthreading_cpus_mask) { 1416 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", 1417 &hyperthreading_allowed); 1418 SYSCTL_ADD_PROC(&logical_cpu_clist, 1419 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1420 "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 1421 0, 0, sysctl_hyperthreading_allowed, "IU", ""); 1422 if (! hyperthreading_allowed) 1423 hlt_cpus_mask |= hyperthreading_cpus_mask; 1424 } 1425 } 1426} 1427SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); 1428 1429int 1430mp_grab_cpu_hlt(void) 1431{ 1432 u_int mask = PCPU_GET(cpumask); 1433#ifdef MP_WATCHDOG 1434 u_int cpuid = PCPU_GET(cpuid); 1435#endif 1436 int retval; 1437 1438#ifdef MP_WATCHDOG 1439 ap_watchdog(cpuid); 1440#endif 1441 1442 retval = mask & hlt_cpus_mask; 1443 while (mask & hlt_cpus_mask) 1444 __asm __volatile("sti; hlt" : : : "memory"); 1445 return (retval); 1446} 1447 1448#ifdef COUNT_IPIS 1449/* 1450 * Setup interrupt counters for IPI handlers. 1451 */ 1452static void 1453mp_ipi_intrcnt(void *dummy) 1454{ 1455 char buf[64]; 1456 int i; 1457 1458 for (i = 0; i < mp_maxid; i++) { 1459 if (CPU_ABSENT(i)) 1460 continue; 1461 snprintf(buf, sizeof(buf), "cpu%d: invltlb", i); 1462 intrcnt_add(buf, &ipi_invltlb_counts[i]); 1463 snprintf(buf, sizeof(buf), "cpu%d: invlrng", i); 1464 intrcnt_add(buf, &ipi_invlrng_counts[i]); 1465 snprintf(buf, sizeof(buf), "cpu%d: invlpg", i); 1466 intrcnt_add(buf, &ipi_invlpg_counts[i]); 1467 snprintf(buf, sizeof(buf), "cpu%d: preempt", i); 1468 intrcnt_add(buf, &ipi_preempt_counts[i]); 1469 snprintf(buf, sizeof(buf), "cpu%d: ast", i); 1470 intrcnt_add(buf, &ipi_ast_counts[i]); 1471 snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i); 1472 intrcnt_add(buf, &ipi_rendezvous_counts[i]); 1473 snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i); 1474 intrcnt_add(buf, &ipi_lazypmap_counts[i]); 1475 } 1476} 1477SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); 1478#endif 1479