mp_x86.c revision 211518
1/*- 2 * Copyright (c) 1996, by Steve Passe 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. The name of the developer may NOT be used to endorse or promote products 11 * derived from this software without specific prior written permission. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> 27__FBSDID("$FreeBSD: head/sys/i386/i386/mp_machdep.c 211518 2010-08-19 22:37:43Z attilio $"); 28 29#include "opt_apic.h" 30#include "opt_cpu.h" 31#include "opt_kstack_pages.h" 32#include "opt_mp_watchdog.h" 33#include "opt_pmap.h" 34#include "opt_sched.h" 35#include "opt_smp.h" 36 37#if !defined(lint) 38#if !defined(SMP) 39#error How did you get here? 40#endif 41 42#ifndef DEV_APIC 43#error The apic device is required for SMP, add "device apic" to your config file. 44#endif 45#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 46#error SMP not supported with CPU_DISABLE_CMPXCHG 47#endif 48#endif /* not lint */ 49 50#include <sys/param.h> 51#include <sys/systm.h> 52#include <sys/bus.h> 53#include <sys/cons.h> /* cngetc() */ 54#ifdef GPROF 55#include <sys/gmon.h> 56#endif 57#include <sys/kernel.h> 58#include <sys/ktr.h> 59#include <sys/lock.h> 60#include <sys/malloc.h> 61#include <sys/memrange.h> 62#include <sys/mutex.h> 63#include <sys/pcpu.h> 64#include <sys/proc.h> 65#include <sys/sched.h> 66#include <sys/smp.h> 67#include <sys/sysctl.h> 68 69#include <vm/vm.h> 70#include <vm/vm_param.h> 71#include <vm/pmap.h> 72#include <vm/vm_kern.h> 73#include <vm/vm_extern.h> 74 75#include <machine/apicreg.h> 76#include <machine/clock.h> 77#include <machine/cputypes.h> 78#include <machine/mca.h> 79#include <machine/md_var.h> 80#include <machine/mp_watchdog.h> 81#include <machine/pcb.h> 82#include <machine/psl.h> 83#include <machine/smp.h> 84#include <machine/specialreg.h> 85 86#define WARMBOOT_TARGET 0 87#define WARMBOOT_OFF (KERNBASE + 0x0467) 88#define WARMBOOT_SEG (KERNBASE + 0x0469) 89 90#define CMOS_REG (0x70) 91#define CMOS_DATA (0x71) 92#define BIOS_RESET (0x0f) 93#define BIOS_WARM (0x0a) 94 95/* 96 * this code MUST be enabled here and in mpboot.s. 97 * it follows the very early stages of AP boot by placing values in CMOS ram. 98 * it NORMALLY will never be needed and thus the primitive method for enabling. 99 * 100#define CHECK_POINTS 101 */ 102 103#if defined(CHECK_POINTS) && !defined(PC98) 104#define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) 105#define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) 106 107#define CHECK_INIT(D); \ 108 CHECK_WRITE(0x34, (D)); \ 109 CHECK_WRITE(0x35, (D)); \ 110 CHECK_WRITE(0x36, (D)); \ 111 CHECK_WRITE(0x37, (D)); \ 112 CHECK_WRITE(0x38, (D)); \ 113 CHECK_WRITE(0x39, (D)); 114 115#define CHECK_PRINT(S); \ 116 printf("%s: %d, %d, %d, %d, %d, %d\n", \ 117 (S), \ 118 CHECK_READ(0x34), \ 119 CHECK_READ(0x35), \ 120 CHECK_READ(0x36), \ 121 CHECK_READ(0x37), \ 122 CHECK_READ(0x38), \ 123 CHECK_READ(0x39)); 124 125#else /* CHECK_POINTS */ 126 127#define CHECK_INIT(D) 128#define CHECK_PRINT(S) 129#define CHECK_WRITE(A, D) 130 131#endif /* CHECK_POINTS */ 132 133/* lock region used by kernel profiling */ 134int mcount_lock; 135 136int mp_naps; /* # of Applications processors */ 137int boot_cpu_id = -1; /* designated BSP */ 138 139extern struct pcpu __pcpu[]; 140 141/* AP uses this during bootstrap. Do not staticize. */ 142char *bootSTK; 143static int bootAP; 144 145/* Free these after use */ 146void *bootstacks[MAXCPU]; 147static void *dpcpu; 148 149/* Hotwire a 0->4MB V==P mapping */ 150extern pt_entry_t *KPTphys; 151 152struct pcb stoppcbs[MAXCPU]; 153 154/* Variables needed for SMP tlb shootdown. */ 155vm_offset_t smp_tlb_addr1; 156vm_offset_t smp_tlb_addr2; 157volatile int smp_tlb_wait; 158 159#ifdef COUNT_IPIS 160/* Interrupt counts. */ 161static u_long *ipi_preempt_counts[MAXCPU]; 162static u_long *ipi_ast_counts[MAXCPU]; 163u_long *ipi_invltlb_counts[MAXCPU]; 164u_long *ipi_invlrng_counts[MAXCPU]; 165u_long *ipi_invlpg_counts[MAXCPU]; 166u_long *ipi_invlcache_counts[MAXCPU]; 167u_long *ipi_rendezvous_counts[MAXCPU]; 168u_long *ipi_lazypmap_counts[MAXCPU]; 169static u_long *ipi_hardclock_counts[MAXCPU]; 170static u_long *ipi_statclock_counts[MAXCPU]; 171#endif 172 173/* 174 * Local data and functions. 175 */ 176 177static u_int logical_cpus; 178static volatile cpumask_t ipi_nmi_pending; 179 180/* used to hold the AP's until we are ready to release them */ 181static struct mtx ap_boot_mtx; 182 183/* Set to 1 once we're ready to let the APs out of the pen. */ 184static volatile int aps_ready = 0; 185 186/* 187 * Store data from cpu_add() until later in the boot when we actually setup 188 * the APs. 189 */ 190struct cpu_info { 191 int cpu_present:1; 192 int cpu_bsp:1; 193 int cpu_disabled:1; 194 int cpu_hyperthread:1; 195} static cpu_info[MAX_APIC_ID + 1]; 196int cpu_apic_ids[MAXCPU]; 197int apic_cpuids[MAX_APIC_ID + 1]; 198 199/* Holds pending bitmap based IPIs per CPU */ 200static volatile u_int cpu_ipi_pending[MAXCPU]; 201 202static u_int boot_address; 203static int cpu_logical; 204static int cpu_cores; 205 206static void assign_cpu_ids(void); 207static void install_ap_tramp(void); 208static void set_interrupt_apic_ids(void); 209static int start_all_aps(void); 210static int start_ap(int apic_id); 211static void release_aps(void *dummy); 212 213static int hlt_logical_cpus; 214static u_int hyperthreading_cpus; 215static cpumask_t hyperthreading_cpus_mask; 216static int hyperthreading_allowed = 1; 217static struct sysctl_ctx_list logical_cpu_clist; 218 219static void 220mem_range_AP_init(void) 221{ 222 if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 223 mem_range_softc.mr_op->initAP(&mem_range_softc); 224} 225 226static void 227topo_probe_0xb(void) 228{ 229 int logical; 230 int p[4]; 231 int bits; 232 int type; 233 int cnt; 234 int i; 235 int x; 236 237 /* We only support two levels for now. */ 238 for (i = 0; i < 3; i++) { 239 cpuid_count(0x0B, i, p); 240 bits = p[0] & 0x1f; 241 logical = p[1] &= 0xffff; 242 type = (p[2] >> 8) & 0xff; 243 if (type == 0 || logical == 0) 244 break; 245 for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { 246 if (!cpu_info[x].cpu_present || 247 cpu_info[x].cpu_disabled) 248 continue; 249 if (x >> bits == boot_cpu_id >> bits) 250 cnt++; 251 } 252 if (type == CPUID_TYPE_SMT) 253 cpu_logical = cnt; 254 else if (type == CPUID_TYPE_CORE) 255 cpu_cores = cnt; 256 } 257 if (cpu_logical == 0) 258 cpu_logical = 1; 259 cpu_cores /= cpu_logical; 260} 261 262static void 263topo_probe_0x4(void) 264{ 265 u_int threads_per_cache, p[4]; 266 u_int htt, cmp; 267 int i; 268 269 htt = cmp = 1; 270 /* 271 * If this CPU supports HTT or CMP then mention the 272 * number of physical/logical cores it contains. 273 */ 274 if (cpu_feature & CPUID_HTT) 275 htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 276 if (cpu_vendor_id == CPU_VENDOR_AMD && (amd_feature2 & AMDID2_CMP)) 277 cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; 278 else if (cpu_vendor_id == CPU_VENDOR_INTEL && (cpu_high >= 4)) { 279 cpuid_count(4, 0, p); 280 if ((p[0] & 0x1f) != 0) 281 cmp = ((p[0] >> 26) & 0x3f) + 1; 282 } 283 cpu_cores = cmp; 284 cpu_logical = htt / cmp; 285 286 /* Setup the initial logical CPUs info. */ 287 if (cpu_feature & CPUID_HTT) 288 logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 289 290 /* 291 * Work out if hyperthreading is *really* enabled. This 292 * is made really ugly by the fact that processors lie: Dual 293 * core processors claim to be hyperthreaded even when they're 294 * not, presumably because they want to be treated the same 295 * way as HTT with respect to per-cpu software licensing. 296 * At the time of writing (May 12, 2005) the only hyperthreaded 297 * cpus are from Intel, and Intel's dual-core processors can be 298 * identified via the "deterministic cache parameters" cpuid 299 * calls. 300 */ 301 /* 302 * First determine if this is an Intel processor which claims 303 * to have hyperthreading support. 304 */ 305 if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_INTEL) { 306 /* 307 * If the "deterministic cache parameters" cpuid calls 308 * are available, use them. 309 */ 310 if (cpu_high >= 4) { 311 /* Ask the processor about the L1 cache. */ 312 for (i = 0; i < 1; i++) { 313 cpuid_count(4, i, p); 314 threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; 315 if (hyperthreading_cpus < threads_per_cache) 316 hyperthreading_cpus = threads_per_cache; 317 if ((p[0] & 0x1f) == 0) 318 break; 319 } 320 } 321 322 /* 323 * If the deterministic cache parameters are not 324 * available, or if no caches were reported to exist, 325 * just accept what the HTT flag indicated. 326 */ 327 if (hyperthreading_cpus == 0) 328 hyperthreading_cpus = logical_cpus; 329 } 330} 331 332static void 333topo_probe(void) 334{ 335 static int cpu_topo_probed = 0; 336 337 if (cpu_topo_probed) 338 return; 339 340 logical_cpus = logical_cpus_mask = 0; 341 if (cpu_high >= 0xb) 342 topo_probe_0xb(); 343 else if (cpu_high) 344 topo_probe_0x4(); 345 if (cpu_cores == 0) 346 cpu_cores = mp_ncpus > 0 ? mp_ncpus : 1; 347 if (cpu_logical == 0) 348 cpu_logical = 1; 349 cpu_topo_probed = 1; 350} 351 352struct cpu_group * 353cpu_topo(void) 354{ 355 int cg_flags; 356 357 /* 358 * Determine whether any threading flags are 359 * necessry. 360 */ 361 topo_probe(); 362 if (cpu_logical > 1 && hyperthreading_cpus) 363 cg_flags = CG_FLAG_HTT; 364 else if (cpu_logical > 1) 365 cg_flags = CG_FLAG_SMT; 366 else 367 cg_flags = 0; 368 if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 369 printf("WARNING: Non-uniform processors.\n"); 370 printf("WARNING: Using suboptimal topology.\n"); 371 return (smp_topo_none()); 372 } 373 /* 374 * No multi-core or hyper-threaded. 375 */ 376 if (cpu_logical * cpu_cores == 1) 377 return (smp_topo_none()); 378 /* 379 * Only HTT no multi-core. 380 */ 381 if (cpu_logical > 1 && cpu_cores == 1) 382 return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); 383 /* 384 * Only multi-core no HTT. 385 */ 386 if (cpu_cores > 1 && cpu_logical == 1) 387 return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); 388 /* 389 * Both HTT and multi-core. 390 */ 391 return (smp_topo_2level(CG_SHARE_L2, cpu_cores, 392 CG_SHARE_L1, cpu_logical, cg_flags)); 393} 394 395 396/* 397 * Calculate usable address in base memory for AP trampoline code. 398 */ 399u_int 400mp_bootaddress(u_int basemem) 401{ 402 403 boot_address = trunc_page(basemem); /* round down to 4k boundary */ 404 if ((basemem - boot_address) < bootMP_size) 405 boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 406 407 return boot_address; 408} 409 410void 411cpu_add(u_int apic_id, char boot_cpu) 412{ 413 414 if (apic_id > MAX_APIC_ID) { 415 panic("SMP: APIC ID %d too high", apic_id); 416 return; 417 } 418 KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 419 apic_id)); 420 cpu_info[apic_id].cpu_present = 1; 421 if (boot_cpu) { 422 KASSERT(boot_cpu_id == -1, 423 ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 424 boot_cpu_id)); 425 boot_cpu_id = apic_id; 426 cpu_info[apic_id].cpu_bsp = 1; 427 } 428 if (mp_ncpus < MAXCPU) 429 mp_ncpus++; 430 if (bootverbose) 431 printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 432 "AP"); 433} 434 435void 436cpu_mp_setmaxid(void) 437{ 438 439 mp_maxid = MAXCPU - 1; 440} 441 442int 443cpu_mp_probe(void) 444{ 445 446 /* 447 * Always record BSP in CPU map so that the mbuf init code works 448 * correctly. 449 */ 450 all_cpus = 1; 451 if (mp_ncpus == 0) { 452 /* 453 * No CPUs were found, so this must be a UP system. Setup 454 * the variables to represent a system with a single CPU 455 * with an id of 0. 456 */ 457 mp_ncpus = 1; 458 return (0); 459 } 460 461 /* At least one CPU was found. */ 462 if (mp_ncpus == 1) { 463 /* 464 * One CPU was found, so this must be a UP system with 465 * an I/O APIC. 466 */ 467 return (0); 468 } 469 470 /* At least two CPUs were found. */ 471 return (1); 472} 473 474/* 475 * Initialize the IPI handlers and start up the AP's. 476 */ 477void 478cpu_mp_start(void) 479{ 480 int i; 481 482 /* Initialize the logical ID to APIC ID table. */ 483 for (i = 0; i < MAXCPU; i++) { 484 cpu_apic_ids[i] = -1; 485 cpu_ipi_pending[i] = 0; 486 } 487 488 /* Install an inter-CPU IPI for TLB invalidation */ 489 setidt(IPI_INVLTLB, IDTVEC(invltlb), 490 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 491 setidt(IPI_INVLPG, IDTVEC(invlpg), 492 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 493 setidt(IPI_INVLRNG, IDTVEC(invlrng), 494 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 495 496 /* Install an inter-CPU IPI for cache invalidation. */ 497 setidt(IPI_INVLCACHE, IDTVEC(invlcache), 498 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 499 500 /* Install an inter-CPU IPI for lazy pmap release */ 501 setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), 502 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 503 504 /* Install an inter-CPU IPI for all-CPU rendezvous */ 505 setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), 506 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 507 508 /* Install generic inter-CPU IPI handler */ 509 setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 510 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 511 512 /* Install an inter-CPU IPI for CPU stop/restart */ 513 setidt(IPI_STOP, IDTVEC(cpustop), 514 SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 515 516 517 /* Set boot_cpu_id if needed. */ 518 if (boot_cpu_id == -1) { 519 boot_cpu_id = PCPU_GET(apic_id); 520 cpu_info[boot_cpu_id].cpu_bsp = 1; 521 } else 522 KASSERT(boot_cpu_id == PCPU_GET(apic_id), 523 ("BSP's APIC ID doesn't match boot_cpu_id")); 524 525 /* Probe logical/physical core configuration. */ 526 topo_probe(); 527 528 assign_cpu_ids(); 529 530 /* Start each Application Processor */ 531 start_all_aps(); 532 533 set_interrupt_apic_ids(); 534} 535 536 537/* 538 * Print various information about the SMP system hardware and setup. 539 */ 540void 541cpu_mp_announce(void) 542{ 543 const char *hyperthread; 544 int i; 545 546 printf("FreeBSD/SMP: %d package(s) x %d core(s)", 547 mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); 548 if (hyperthreading_cpus > 1) 549 printf(" x %d HTT threads", cpu_logical); 550 else if (cpu_logical > 1) 551 printf(" x %d SMT threads", cpu_logical); 552 printf("\n"); 553 554 /* List active CPUs first. */ 555 printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 556 for (i = 1; i < mp_ncpus; i++) { 557 if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) 558 hyperthread = "/HT"; 559 else 560 hyperthread = ""; 561 printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, 562 cpu_apic_ids[i]); 563 } 564 565 /* List disabled CPUs last. */ 566 for (i = 0; i <= MAX_APIC_ID; i++) { 567 if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) 568 continue; 569 if (cpu_info[i].cpu_hyperthread) 570 hyperthread = "/HT"; 571 else 572 hyperthread = ""; 573 printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, 574 i); 575 } 576} 577 578/* 579 * AP CPU's call this to initialize themselves. 580 */ 581void 582init_secondary(void) 583{ 584 struct pcpu *pc; 585 vm_offset_t addr; 586 int gsel_tss; 587 int x, myid; 588 u_int cr0; 589 590 /* bootAP is set in start_ap() to our ID. */ 591 myid = bootAP; 592 593 /* Get per-cpu data */ 594 pc = &__pcpu[myid]; 595 596 /* prime data page for it to use */ 597 pcpu_init(pc, myid, sizeof(struct pcpu)); 598 dpcpu_init(dpcpu, myid); 599 pc->pc_apic_id = cpu_apic_ids[myid]; 600 pc->pc_prvspace = pc; 601 pc->pc_curthread = 0; 602 603 gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 604 gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 605 606 for (x = 0; x < NGDT; x++) { 607 ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); 608 } 609 610 r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; 611 r_gdt.rd_base = (int) &gdt[myid * NGDT]; 612 lgdt(&r_gdt); /* does magic intra-segment return */ 613 614 lidt(&r_idt); 615 616 lldt(_default_ldt); 617 PCPU_SET(currentldt, _default_ldt); 618 619 gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 620 gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 621 PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 622 PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 623 PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 624 PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); 625 PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 626 ltr(gsel_tss); 627 628 PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); 629 630 /* 631 * Set to a known state: 632 * Set by mpboot.s: CR0_PG, CR0_PE 633 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 634 */ 635 cr0 = rcr0(); 636 cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 637 load_cr0(cr0); 638 CHECK_WRITE(0x38, 5); 639 640 /* Disable local APIC just to be sure. */ 641 lapic_disable(); 642 643 /* signal our startup to the BSP. */ 644 mp_naps++; 645 CHECK_WRITE(0x39, 6); 646 647 /* Spin until the BSP releases the AP's. */ 648 while (!aps_ready) 649 ia32_pause(); 650 651 /* BSP may have changed PTD while we were waiting */ 652 invltlb(); 653 for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 654 invlpg(addr); 655 656#if defined(I586_CPU) && !defined(NO_F00F_HACK) 657 lidt(&r_idt); 658#endif 659 660 /* Initialize the PAT MSR if present. */ 661 pmap_init_pat(); 662 663 /* set up CPU registers and state */ 664 cpu_setregs(); 665 666 /* set up FPU state on the AP */ 667 npxinit(); 668 669 /* set up SSE registers */ 670 enable_sse(); 671 672#ifdef PAE 673 /* Enable the PTE no-execute bit. */ 674 if ((amd_feature & AMDID_NX) != 0) { 675 uint64_t msr; 676 677 msr = rdmsr(MSR_EFER) | EFER_NXE; 678 wrmsr(MSR_EFER, msr); 679 } 680#endif 681 682 /* A quick check from sanity claus */ 683 if (PCPU_GET(apic_id) != lapic_id()) { 684 printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 685 printf("SMP: actual apic_id = %d\n", lapic_id()); 686 printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 687 panic("cpuid mismatch! boom!!"); 688 } 689 690 /* Initialize curthread. */ 691 KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 692 PCPU_SET(curthread, PCPU_GET(idlethread)); 693 694 mca_init(); 695 696 mtx_lock_spin(&ap_boot_mtx); 697 698 /* Init local apic for irq's */ 699 lapic_setup(1); 700 701 /* Set memory range attributes for this CPU to match the BSP */ 702 mem_range_AP_init(); 703 704 smp_cpus++; 705 706 CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 707 printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 708 709 /* Determine if we are a logical CPU. */ 710 if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 711 logical_cpus_mask |= PCPU_GET(cpumask); 712 713 /* Determine if we are a hyperthread. */ 714 if (hyperthreading_cpus > 1 && 715 PCPU_GET(apic_id) % hyperthreading_cpus != 0) 716 hyperthreading_cpus_mask |= PCPU_GET(cpumask); 717 718 /* Build our map of 'other' CPUs. */ 719 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 720 721 if (bootverbose) 722 lapic_dump("AP"); 723 724 if (smp_cpus == mp_ncpus) { 725 /* enable IPI's, tlb shootdown, freezes etc */ 726 atomic_store_rel_int(&smp_started, 1); 727 smp_active = 1; /* historic */ 728 } 729 730 mtx_unlock_spin(&ap_boot_mtx); 731 732 /* Wait until all the AP's are up. */ 733 while (smp_started == 0) 734 ia32_pause(); 735 736 /* Start per-CPU event timers. */ 737 cpu_initclocks_ap(); 738 739 /* Enter the scheduler. */ 740 sched_throw(NULL); 741 742 panic("scheduler returned us to %s", __func__); 743 /* NOTREACHED */ 744} 745 746/******************************************************************* 747 * local functions and data 748 */ 749 750/* 751 * We tell the I/O APIC code about all the CPUs we want to receive 752 * interrupts. If we don't want certain CPUs to receive IRQs we 753 * can simply not tell the I/O APIC code about them in this function. 754 * We also do not tell it about the BSP since it tells itself about 755 * the BSP internally to work with UP kernels and on UP machines. 756 */ 757static void 758set_interrupt_apic_ids(void) 759{ 760 u_int i, apic_id; 761 762 for (i = 0; i < MAXCPU; i++) { 763 apic_id = cpu_apic_ids[i]; 764 if (apic_id == -1) 765 continue; 766 if (cpu_info[apic_id].cpu_bsp) 767 continue; 768 if (cpu_info[apic_id].cpu_disabled) 769 continue; 770 771 /* Don't let hyperthreads service interrupts. */ 772 if (hyperthreading_cpus > 1 && 773 apic_id % hyperthreading_cpus != 0) 774 continue; 775 776 intr_add_cpu(i); 777 } 778} 779 780/* 781 * Assign logical CPU IDs to local APICs. 782 */ 783static void 784assign_cpu_ids(void) 785{ 786 u_int i; 787 788 TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", 789 &hyperthreading_allowed); 790 791 /* Check for explicitly disabled CPUs. */ 792 for (i = 0; i <= MAX_APIC_ID; i++) { 793 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 794 continue; 795 796 if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { 797 cpu_info[i].cpu_hyperthread = 1; 798#if defined(SCHED_ULE) 799 /* 800 * Don't use HT CPU if it has been disabled by a 801 * tunable. 802 */ 803 if (hyperthreading_allowed == 0) { 804 cpu_info[i].cpu_disabled = 1; 805 continue; 806 } 807#endif 808 } 809 810 /* Don't use this CPU if it has been disabled by a tunable. */ 811 if (resource_disabled("lapic", i)) { 812 cpu_info[i].cpu_disabled = 1; 813 continue; 814 } 815 } 816 817 /* 818 * Assign CPU IDs to local APIC IDs and disable any CPUs 819 * beyond MAXCPU. CPU 0 is always assigned to the BSP. 820 * 821 * To minimize confusion for userland, we attempt to number 822 * CPUs such that all threads and cores in a package are 823 * grouped together. For now we assume that the BSP is always 824 * the first thread in a package and just start adding APs 825 * starting with the BSP's APIC ID. 826 */ 827 mp_ncpus = 1; 828 cpu_apic_ids[0] = boot_cpu_id; 829 apic_cpuids[boot_cpu_id] = 0; 830 for (i = boot_cpu_id + 1; i != boot_cpu_id; 831 i == MAX_APIC_ID ? i = 0 : i++) { 832 if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 833 cpu_info[i].cpu_disabled) 834 continue; 835 836 if (mp_ncpus < MAXCPU) { 837 cpu_apic_ids[mp_ncpus] = i; 838 apic_cpuids[i] = mp_ncpus; 839 mp_ncpus++; 840 } else 841 cpu_info[i].cpu_disabled = 1; 842 } 843 KASSERT(mp_maxid >= mp_ncpus - 1, 844 ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 845 mp_ncpus)); 846} 847 848/* 849 * start each AP in our list 850 */ 851/* Lowest 1MB is already mapped: don't touch*/ 852#define TMPMAP_START 1 853static int 854start_all_aps(void) 855{ 856#ifndef PC98 857 u_char mpbiosreason; 858#endif 859 uintptr_t kptbase; 860 u_int32_t mpbioswarmvec; 861 int apic_id, cpu, i; 862 863 mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 864 865 /* install the AP 1st level boot code */ 866 install_ap_tramp(); 867 868 /* save the current value of the warm-start vector */ 869 mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 870#ifndef PC98 871 outb(CMOS_REG, BIOS_RESET); 872 mpbiosreason = inb(CMOS_DATA); 873#endif 874 875 /* set up temporary P==V mapping for AP boot */ 876 /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 877 878 kptbase = (uintptr_t)(void *)KPTphys; 879 for (i = TMPMAP_START; i < NKPT; i++) 880 PTD[i] = (pd_entry_t)(PG_V | PG_RW | 881 ((kptbase + i * PAGE_SIZE) & PG_FRAME)); 882 invltlb(); 883 884 /* start each AP */ 885 for (cpu = 1; cpu < mp_ncpus; cpu++) { 886 apic_id = cpu_apic_ids[cpu]; 887 888 /* allocate and set up a boot stack data page */ 889 bootstacks[cpu] = 890 (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 891 dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); 892 /* setup a vector to our boot code */ 893 *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 894 *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 895#ifndef PC98 896 outb(CMOS_REG, BIOS_RESET); 897 outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 898#endif 899 900 bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4; 901 bootAP = cpu; 902 903 /* attempt to start the Application Processor */ 904 CHECK_INIT(99); /* setup checkpoints */ 905 if (!start_ap(apic_id)) { 906 printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 907 CHECK_PRINT("trace"); /* show checkpoints */ 908 /* better panic as the AP may be running loose */ 909 printf("panic y/n? [y] "); 910 if (cngetc() != 'n') 911 panic("bye-bye"); 912 } 913 CHECK_PRINT("trace"); /* show checkpoints */ 914 915 all_cpus |= (1 << cpu); /* record AP in CPU map */ 916 } 917 918 /* build our map of 'other' CPUs */ 919 PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 920 921 /* restore the warmstart vector */ 922 *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 923 924#ifndef PC98 925 outb(CMOS_REG, BIOS_RESET); 926 outb(CMOS_DATA, mpbiosreason); 927#endif 928 929 /* Undo V==P hack from above */ 930 for (i = TMPMAP_START; i < NKPT; i++) 931 PTD[i] = 0; 932 pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 933 934 /* number of APs actually started */ 935 return mp_naps; 936} 937 938/* 939 * load the 1st level AP boot code into base memory. 940 */ 941 942/* targets for relocation */ 943extern void bigJump(void); 944extern void bootCodeSeg(void); 945extern void bootDataSeg(void); 946extern void MPentry(void); 947extern u_int MP_GDT; 948extern u_int mp_gdtbase; 949 950static void 951install_ap_tramp(void) 952{ 953 int x; 954 int size = *(int *) ((u_long) & bootMP_size); 955 vm_offset_t va = boot_address + KERNBASE; 956 u_char *src = (u_char *) ((u_long) bootMP); 957 u_char *dst = (u_char *) va; 958 u_int boot_base = (u_int) bootMP; 959 u_int8_t *dst8; 960 u_int16_t *dst16; 961 u_int32_t *dst32; 962 963 KASSERT (size <= PAGE_SIZE, 964 ("'size' do not fit into PAGE_SIZE, as expected.")); 965 pmap_kenter(va, boot_address); 966 pmap_invalidate_page (kernel_pmap, va); 967 for (x = 0; x < size; ++x) 968 *dst++ = *src++; 969 970 /* 971 * modify addresses in code we just moved to basemem. unfortunately we 972 * need fairly detailed info about mpboot.s for this to work. changes 973 * to mpboot.s might require changes here. 974 */ 975 976 /* boot code is located in KERNEL space */ 977 dst = (u_char *) va; 978 979 /* modify the lgdt arg */ 980 dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); 981 *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); 982 983 /* modify the ljmp target for MPentry() */ 984 dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); 985 *dst32 = ((u_int) MPentry - KERNBASE); 986 987 /* modify the target for boot code segment */ 988 dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); 989 dst8 = (u_int8_t *) (dst16 + 1); 990 *dst16 = (u_int) boot_address & 0xffff; 991 *dst8 = ((u_int) boot_address >> 16) & 0xff; 992 993 /* modify the target for boot data segment */ 994 dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); 995 dst8 = (u_int8_t *) (dst16 + 1); 996 *dst16 = (u_int) boot_address & 0xffff; 997 *dst8 = ((u_int) boot_address >> 16) & 0xff; 998} 999 1000/* 1001 * This function starts the AP (application processor) identified 1002 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 1003 * to accomplish this. This is necessary because of the nuances 1004 * of the different hardware we might encounter. It isn't pretty, 1005 * but it seems to work. 1006 */ 1007static int 1008start_ap(int apic_id) 1009{ 1010 int vector, ms; 1011 int cpus; 1012 1013 /* calculate the vector */ 1014 vector = (boot_address >> 12) & 0xff; 1015 1016 /* used as a watchpoint to signal AP startup */ 1017 cpus = mp_naps; 1018 1019 /* 1020 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 1021 * and running the target CPU. OR this INIT IPI might be latched (P5 1022 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 1023 * ignored. 1024 */ 1025 1026 /* do an INIT IPI: assert RESET */ 1027 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 1028 APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); 1029 1030 /* wait for pending status end */ 1031 lapic_ipi_wait(-1); 1032 1033 /* do an INIT IPI: deassert RESET */ 1034 lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | 1035 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); 1036 1037 /* wait for pending status end */ 1038 DELAY(10000); /* wait ~10mS */ 1039 lapic_ipi_wait(-1); 1040 1041 /* 1042 * next we do a STARTUP IPI: the previous INIT IPI might still be 1043 * latched, (P5 bug) this 1st STARTUP would then terminate 1044 * immediately, and the previously started INIT IPI would continue. OR 1045 * the previous INIT IPI has already run. and this STARTUP IPI will 1046 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 1047 * will run. 1048 */ 1049 1050 /* do a STARTUP IPI */ 1051 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 1052 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 1053 vector, apic_id); 1054 lapic_ipi_wait(-1); 1055 DELAY(200); /* wait ~200uS */ 1056 1057 /* 1058 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 1059 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 1060 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 1061 * recognized after hardware RESET or INIT IPI. 1062 */ 1063 1064 lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 1065 APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 1066 vector, apic_id); 1067 lapic_ipi_wait(-1); 1068 DELAY(200); /* wait ~200uS */ 1069 1070 /* Wait up to 5 seconds for it to start. */ 1071 for (ms = 0; ms < 5000; ms++) { 1072 if (mp_naps > cpus) 1073 return 1; /* return SUCCESS */ 1074 DELAY(1000); 1075 } 1076 return 0; /* return FAILURE */ 1077} 1078 1079#ifdef COUNT_XINVLTLB_HITS 1080u_int xhits_gbl[MAXCPU]; 1081u_int xhits_pg[MAXCPU]; 1082u_int xhits_rng[MAXCPU]; 1083SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); 1084SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, 1085 sizeof(xhits_gbl), "IU", ""); 1086SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, 1087 sizeof(xhits_pg), "IU", ""); 1088SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, 1089 sizeof(xhits_rng), "IU", ""); 1090 1091u_int ipi_global; 1092u_int ipi_page; 1093u_int ipi_range; 1094u_int ipi_range_size; 1095SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); 1096SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); 1097SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); 1098SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, 1099 0, ""); 1100 1101u_int ipi_masked_global; 1102u_int ipi_masked_page; 1103u_int ipi_masked_range; 1104u_int ipi_masked_range_size; 1105SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, 1106 &ipi_masked_global, 0, ""); 1107SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, 1108 &ipi_masked_page, 0, ""); 1109SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, 1110 &ipi_masked_range, 0, ""); 1111SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, 1112 &ipi_masked_range_size, 0, ""); 1113#endif /* COUNT_XINVLTLB_HITS */ 1114 1115/* 1116 * Flush the TLB on all other CPU's 1117 */ 1118static void 1119smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1120{ 1121 u_int ncpu; 1122 1123 ncpu = mp_ncpus - 1; /* does not shootdown self */ 1124 if (ncpu < 1) 1125 return; /* no other cpus */ 1126 if (!(read_eflags() & PSL_I)) 1127 panic("%s: interrupts disabled", __func__); 1128 mtx_lock_spin(&smp_ipi_mtx); 1129 smp_tlb_addr1 = addr1; 1130 smp_tlb_addr2 = addr2; 1131 atomic_store_rel_int(&smp_tlb_wait, 0); 1132 ipi_all_but_self(vector); 1133 while (smp_tlb_wait < ncpu) 1134 ia32_pause(); 1135 mtx_unlock_spin(&smp_ipi_mtx); 1136} 1137 1138static void 1139smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1140{ 1141 int ncpu, othercpus; 1142 1143 othercpus = mp_ncpus - 1; 1144 if (mask == (u_int)-1) { 1145 ncpu = othercpus; 1146 if (ncpu < 1) 1147 return; 1148 } else { 1149 mask &= ~PCPU_GET(cpumask); 1150 if (mask == 0) 1151 return; 1152 ncpu = bitcount32(mask); 1153 if (ncpu > othercpus) { 1154 /* XXX this should be a panic offence */ 1155 printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 1156 ncpu, othercpus); 1157 ncpu = othercpus; 1158 } 1159 /* XXX should be a panic, implied by mask == 0 above */ 1160 if (ncpu < 1) 1161 return; 1162 } 1163 if (!(read_eflags() & PSL_I)) 1164 panic("%s: interrupts disabled", __func__); 1165 mtx_lock_spin(&smp_ipi_mtx); 1166 smp_tlb_addr1 = addr1; 1167 smp_tlb_addr2 = addr2; 1168 atomic_store_rel_int(&smp_tlb_wait, 0); 1169 if (mask == (u_int)-1) 1170 ipi_all_but_self(vector); 1171 else 1172 ipi_selected(mask, vector); 1173 while (smp_tlb_wait < ncpu) 1174 ia32_pause(); 1175 mtx_unlock_spin(&smp_ipi_mtx); 1176} 1177 1178/* 1179 * Send an IPI to specified CPU handling the bitmap logic. 1180 */ 1181static void 1182ipi_send_cpu(int cpu, u_int ipi) 1183{ 1184 u_int bitmap, old_pending, new_pending; 1185 1186 KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); 1187 1188 if (IPI_IS_BITMAPED(ipi)) { 1189 bitmap = 1 << ipi; 1190 ipi = IPI_BITMAP_VECTOR; 1191 do { 1192 old_pending = cpu_ipi_pending[cpu]; 1193 new_pending = old_pending | bitmap; 1194 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], 1195 old_pending, new_pending)); 1196 if (old_pending) 1197 return; 1198 } 1199 lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); 1200} 1201 1202void 1203smp_cache_flush(void) 1204{ 1205 1206 if (smp_started) 1207 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1208} 1209 1210void 1211smp_invltlb(void) 1212{ 1213 1214 if (smp_started) { 1215 smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1216#ifdef COUNT_XINVLTLB_HITS 1217 ipi_global++; 1218#endif 1219 } 1220} 1221 1222void 1223smp_invlpg(vm_offset_t addr) 1224{ 1225 1226 if (smp_started) { 1227 smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1228#ifdef COUNT_XINVLTLB_HITS 1229 ipi_page++; 1230#endif 1231 } 1232} 1233 1234void 1235smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1236{ 1237 1238 if (smp_started) { 1239 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1240#ifdef COUNT_XINVLTLB_HITS 1241 ipi_range++; 1242 ipi_range_size += (addr2 - addr1) / PAGE_SIZE; 1243#endif 1244 } 1245} 1246 1247void 1248smp_masked_invltlb(cpumask_t mask) 1249{ 1250 1251 if (smp_started) { 1252 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1253#ifdef COUNT_XINVLTLB_HITS 1254 ipi_masked_global++; 1255#endif 1256 } 1257} 1258 1259void 1260smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) 1261{ 1262 1263 if (smp_started) { 1264 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1265#ifdef COUNT_XINVLTLB_HITS 1266 ipi_masked_page++; 1267#endif 1268 } 1269} 1270 1271void 1272smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) 1273{ 1274 1275 if (smp_started) { 1276 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1277#ifdef COUNT_XINVLTLB_HITS 1278 ipi_masked_range++; 1279 ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; 1280#endif 1281 } 1282} 1283 1284void 1285ipi_bitmap_handler(struct trapframe frame) 1286{ 1287 int cpu = PCPU_GET(cpuid); 1288 u_int ipi_bitmap; 1289 1290 ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 1291 1292 if (ipi_bitmap & (1 << IPI_PREEMPT)) { 1293#ifdef COUNT_IPIS 1294 (*ipi_preempt_counts[cpu])++; 1295#endif 1296 sched_preempt(curthread); 1297 } 1298 if (ipi_bitmap & (1 << IPI_AST)) { 1299#ifdef COUNT_IPIS 1300 (*ipi_ast_counts[cpu])++; 1301#endif 1302 /* Nothing to do for AST */ 1303 } 1304 if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { 1305#ifdef COUNT_IPIS 1306 (*ipi_hardclock_counts[cpu])++; 1307#endif 1308 hardclockintr(&frame); 1309 } 1310 if (ipi_bitmap & (1 << IPI_STATCLOCK)) { 1311#ifdef COUNT_IPIS 1312 (*ipi_statclock_counts[cpu])++; 1313#endif 1314 statclockintr(&frame); 1315 } 1316} 1317 1318/* 1319 * send an IPI to a set of cpus. 1320 */ 1321void 1322ipi_selected(cpumask_t cpus, u_int ipi) 1323{ 1324 int cpu; 1325 1326 /* 1327 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1328 * of help in order to understand what is the source. 1329 * Set the mask of receiving CPUs for this purpose. 1330 */ 1331 if (ipi == IPI_STOP_HARD) 1332 atomic_set_int(&ipi_nmi_pending, cpus); 1333 1334 CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 1335 while ((cpu = ffs(cpus)) != 0) { 1336 cpu--; 1337 cpus &= ~(1 << cpu); 1338 ipi_send_cpu(cpu, ipi); 1339 } 1340} 1341 1342/* 1343 * send an IPI to a specific CPU. 1344 */ 1345void 1346ipi_cpu(int cpu, u_int ipi) 1347{ 1348 1349 /* 1350 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1351 * of help in order to understand what is the source. 1352 * Set the mask of receiving CPUs for this purpose. 1353 */ 1354 if (ipi == IPI_STOP_HARD) 1355 atomic_set_int(&ipi_nmi_pending, 1 << cpu); 1356 1357 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1358 ipi_send_cpu(cpu, ipi); 1359} 1360 1361/* 1362 * send an IPI to all CPUs EXCEPT myself 1363 */ 1364void 1365ipi_all_but_self(u_int ipi) 1366{ 1367 1368 if (IPI_IS_BITMAPED(ipi)) { 1369 ipi_selected(PCPU_GET(other_cpus), ipi); 1370 return; 1371 } 1372 1373 /* 1374 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1375 * of help in order to understand what is the source. 1376 * Set the mask of receiving CPUs for this purpose. 1377 */ 1378 if (ipi == IPI_STOP_HARD) 1379 atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); 1380 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1381 lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); 1382} 1383 1384int 1385ipi_nmi_handler() 1386{ 1387 cpumask_t cpumask; 1388 1389 /* 1390 * As long as there is not a simple way to know about a NMI's 1391 * source, if the bitmask for the current CPU is present in 1392 * the global pending bitword an IPI_STOP_HARD has been issued 1393 * and should be handled. 1394 */ 1395 cpumask = PCPU_GET(cpumask); 1396 if ((ipi_nmi_pending & cpumask) == 0) 1397 return (1); 1398 1399 atomic_clear_int(&ipi_nmi_pending, cpumask); 1400 cpustop_handler(); 1401 return (0); 1402} 1403 1404/* 1405 * Handle an IPI_STOP by saving our current context and spinning until we 1406 * are resumed. 1407 */ 1408void 1409cpustop_handler(void) 1410{ 1411 cpumask_t cpumask; 1412 u_int cpu; 1413 1414 cpu = PCPU_GET(cpuid); 1415 cpumask = PCPU_GET(cpumask); 1416 1417 savectx(&stoppcbs[cpu]); 1418 1419 /* Indicate that we are stopped */ 1420 atomic_set_int(&stopped_cpus, cpumask); 1421 1422 /* Wait for restart */ 1423 while (!(started_cpus & cpumask)) 1424 ia32_pause(); 1425 1426 atomic_clear_int(&started_cpus, cpumask); 1427 atomic_clear_int(&stopped_cpus, cpumask); 1428 1429 if (cpu == 0 && cpustop_restartfunc != NULL) { 1430 cpustop_restartfunc(); 1431 cpustop_restartfunc = NULL; 1432 } 1433} 1434 1435/* 1436 * This is called once the rest of the system is up and running and we're 1437 * ready to let the AP's out of the pen. 1438 */ 1439static void 1440release_aps(void *dummy __unused) 1441{ 1442 1443 if (mp_ncpus == 1) 1444 return; 1445 atomic_store_rel_int(&aps_ready, 1); 1446 while (smp_started == 0) 1447 ia32_pause(); 1448} 1449SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1450 1451static int 1452sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) 1453{ 1454 cpumask_t mask; 1455 int error; 1456 1457 mask = hlt_cpus_mask; 1458 error = sysctl_handle_int(oidp, &mask, 0, req); 1459 if (error || !req->newptr) 1460 return (error); 1461 1462 if (logical_cpus_mask != 0 && 1463 (mask & logical_cpus_mask) == logical_cpus_mask) 1464 hlt_logical_cpus = 1; 1465 else 1466 hlt_logical_cpus = 0; 1467 1468 if (! hyperthreading_allowed) 1469 mask |= hyperthreading_cpus_mask; 1470 1471 if ((mask & all_cpus) == all_cpus) 1472 mask &= ~(1<<0); 1473 hlt_cpus_mask = mask; 1474 return (error); 1475} 1476SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 1477 0, 0, sysctl_hlt_cpus, "IU", 1478 "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); 1479 1480static int 1481sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) 1482{ 1483 int disable, error; 1484 1485 disable = hlt_logical_cpus; 1486 error = sysctl_handle_int(oidp, &disable, 0, req); 1487 if (error || !req->newptr) 1488 return (error); 1489 1490 if (disable) 1491 hlt_cpus_mask |= logical_cpus_mask; 1492 else 1493 hlt_cpus_mask &= ~logical_cpus_mask; 1494 1495 if (! hyperthreading_allowed) 1496 hlt_cpus_mask |= hyperthreading_cpus_mask; 1497 1498 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1499 hlt_cpus_mask &= ~(1<<0); 1500 1501 hlt_logical_cpus = disable; 1502 return (error); 1503} 1504 1505static int 1506sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) 1507{ 1508 int allowed, error; 1509 1510 allowed = hyperthreading_allowed; 1511 error = sysctl_handle_int(oidp, &allowed, 0, req); 1512 if (error || !req->newptr) 1513 return (error); 1514 1515#ifdef SCHED_ULE 1516 /* 1517 * SCHED_ULE doesn't allow enabling/disabling HT cores at 1518 * run-time. 1519 */ 1520 if (allowed != hyperthreading_allowed) 1521 return (ENOTSUP); 1522 return (error); 1523#endif 1524 1525 if (allowed) 1526 hlt_cpus_mask &= ~hyperthreading_cpus_mask; 1527 else 1528 hlt_cpus_mask |= hyperthreading_cpus_mask; 1529 1530 if (logical_cpus_mask != 0 && 1531 (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) 1532 hlt_logical_cpus = 1; 1533 else 1534 hlt_logical_cpus = 0; 1535 1536 if ((hlt_cpus_mask & all_cpus) == all_cpus) 1537 hlt_cpus_mask &= ~(1<<0); 1538 1539 hyperthreading_allowed = allowed; 1540 return (error); 1541} 1542 1543static void 1544cpu_hlt_setup(void *dummy __unused) 1545{ 1546 1547 if (logical_cpus_mask != 0) { 1548 TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", 1549 &hlt_logical_cpus); 1550 sysctl_ctx_init(&logical_cpu_clist); 1551 SYSCTL_ADD_PROC(&logical_cpu_clist, 1552 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1553 "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, 1554 sysctl_hlt_logical_cpus, "IU", ""); 1555 SYSCTL_ADD_UINT(&logical_cpu_clist, 1556 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1557 "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, 1558 &logical_cpus_mask, 0, ""); 1559 1560 if (hlt_logical_cpus) 1561 hlt_cpus_mask |= logical_cpus_mask; 1562 1563 /* 1564 * If necessary for security purposes, force 1565 * hyperthreading off, regardless of the value 1566 * of hlt_logical_cpus. 1567 */ 1568 if (hyperthreading_cpus_mask) { 1569 SYSCTL_ADD_PROC(&logical_cpu_clist, 1570 SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1571 "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 1572 0, 0, sysctl_hyperthreading_allowed, "IU", ""); 1573 if (! hyperthreading_allowed) 1574 hlt_cpus_mask |= hyperthreading_cpus_mask; 1575 } 1576 } 1577} 1578SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); 1579 1580int 1581mp_grab_cpu_hlt(void) 1582{ 1583 cpumask_t mask; 1584#ifdef MP_WATCHDOG 1585 u_int cpuid; 1586#endif 1587 int retval; 1588 1589 mask = PCPU_GET(cpumask); 1590#ifdef MP_WATCHDOG 1591 cpuid = PCPU_GET(cpuid); 1592 ap_watchdog(cpuid); 1593#endif 1594 1595 retval = 0; 1596 while (mask & hlt_cpus_mask) { 1597 retval = 1; 1598 __asm __volatile("sti; hlt" : : : "memory"); 1599 } 1600 return (retval); 1601} 1602 1603#ifdef COUNT_IPIS 1604/* 1605 * Setup interrupt counters for IPI handlers. 1606 */ 1607static void 1608mp_ipi_intrcnt(void *dummy) 1609{ 1610 char buf[64]; 1611 int i; 1612 1613 CPU_FOREACH(i) { 1614 snprintf(buf, sizeof(buf), "cpu%d:invltlb", i); 1615 intrcnt_add(buf, &ipi_invltlb_counts[i]); 1616 snprintf(buf, sizeof(buf), "cpu%d:invlrng", i); 1617 intrcnt_add(buf, &ipi_invlrng_counts[i]); 1618 snprintf(buf, sizeof(buf), "cpu%d:invlpg", i); 1619 intrcnt_add(buf, &ipi_invlpg_counts[i]); 1620 snprintf(buf, sizeof(buf), "cpu%d:preempt", i); 1621 intrcnt_add(buf, &ipi_preempt_counts[i]); 1622 snprintf(buf, sizeof(buf), "cpu%d:ast", i); 1623 intrcnt_add(buf, &ipi_ast_counts[i]); 1624 snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); 1625 intrcnt_add(buf, &ipi_rendezvous_counts[i]); 1626 snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); 1627 intrcnt_add(buf, &ipi_lazypmap_counts[i]); 1628 snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); 1629 intrcnt_add(buf, &ipi_hardclock_counts[i]); 1630 snprintf(buf, sizeof(buf), "cpu%d:statclock", i); 1631 intrcnt_add(buf, &ipi_statclock_counts[i]); 1632 } 1633} 1634SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL); 1635#endif 1636