mp_machdep.c revision 166569
1116742Ssam/*- 2116904Ssam * Copyright (c) 1996, by Steve Passe 3186904Ssam * Copyright (c) 2003, by Peter Wemm 4116742Ssam * All rights reserved. 5116742Ssam * 6116742Ssam * Redistribution and use in source and binary forms, with or without 7116742Ssam * modification, are permitted provided that the following conditions 8116742Ssam * are met: 9116742Ssam * 1. Redistributions of source code must retain the above copyright 10116904Ssam * notice, this list of conditions and the following disclaimer. 11116904Ssam * 2. The name of the developer may NOT be used to endorse or promote products 12116904Ssam * derived from this software without specific prior written permission. 13116904Ssam * 14116742Ssam * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15116904Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16116904Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17116904Ssam * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18116904Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19116904Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20116904Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21116904Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22116904Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23116904Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24116904Ssam * SUCH DAMAGE. 25116742Ssam */ 26116742Ssam 27116742Ssam#include <sys/cdefs.h> 28116742Ssam__FBSDID("$FreeBSD: head/sys/amd64/amd64/mp_machdep.c 166569 2007-02-08 16:49:59Z jhb $"); 29116742Ssam 30178354Ssam#include "opt_cpu.h" 31178354Ssam#include "opt_kstack_pages.h" 32116742Ssam#include "opt_mp_watchdog.h" 33116742Ssam#include "opt_sched.h" 34116742Ssam 35116742Ssam#include <sys/param.h> 36116742Ssam#include <sys/systm.h> 37138568Ssam#include <sys/bus.h> 38116742Ssam#ifdef GPROF 39116742Ssam#include <sys/gmon.h> 40116742Ssam#endif 41116742Ssam#include <sys/kernel.h> 42116742Ssam#include <sys/ktr.h> 43116742Ssam#include <sys/lock.h> 44116742Ssam#include <sys/malloc.h> 45178354Ssam#include <sys/memrange.h> 46190391Ssam#include <sys/mutex.h> 47190391Ssam#include <sys/pcpu.h> 48190391Ssam#include <sys/proc.h> 49186904Ssam#include <sys/smp.h> 50186904Ssam#include <sys/sysctl.h> 51186904Ssam 52178354Ssam#include <vm/vm.h> 53195618Srpaulo#include <vm/vm_param.h> 54206358Srpaulo#include <vm/pmap.h> 55116742Ssam#include <vm/vm_kern.h> 56116742Ssam#include <vm/vm_extern.h> 57116742Ssam 58147221Ssam#include <machine/apicreg.h> 59195618Srpaulo#include <machine/md_var.h> 60195618Srpaulo#include <machine/mp_watchdog.h> 61195618Srpaulo#include <machine/pcb.h> 62195618Srpaulo#include <machine/psl.h> 63195618Srpaulo#include <machine/smp.h> 64147221Ssam#include <machine/specialreg.h> 65147221Ssam#include <machine/tss.h> 66178354Ssam 67178354Ssam#define WARMBOOT_TARGET 0 68178354Ssam#define WARMBOOT_OFF (KERNBASE + 0x0467) 69178354Ssam#define WARMBOOT_SEG (KERNBASE + 0x0469) 70178354Ssam 71178354Ssam#define CMOS_REG (0x70) 72178354Ssam#define CMOS_DATA (0x71) 73178354Ssam#define BIOS_RESET (0x0f) 74147221Ssam#define BIOS_WARM (0x0a) 75159139Sdds 76159139Sdds/* lock region used by kernel profiling */ 77159139Sddsint mcount_lock; 78159139Sdds 79159139Sddsint mp_naps; /* # of Applications processors */ 80159139Sddsint boot_cpu_id = -1; /* designated BSP */ 81170530Ssamextern int nkpt; 82170530Ssam 83179643Ssam/* 84179643Ssam * CPU topology map datastructures for HTT. 85138568Ssam */ 86138568Ssamstatic struct cpu_group mp_groups[MAXCPU]; 87178354Ssamstatic struct cpu_top mp_top; 88170530Ssam 89170530Ssam/* AP uses this during bootstrap. Do not staticize. */ 90178354Ssamchar *bootSTK; 91178354Ssamstatic int bootAP; 92116742Ssam 93138568Ssam/* Free these after use */ 94120104Ssamvoid *bootstacks[MAXCPU]; 95138568Ssam 96148863Ssam/* Temporary holder for double fault stack */ 97170530Ssamchar *doublefault_stack; 98178354Ssam 99178354Ssam/* Hotwire a 0->4MB V==P mapping */ 100138568Ssamextern pt_entry_t *KPTphys; 101172211Ssam 102138568Ssam/* SMP page table page */ 103127876Ssamextern pt_entry_t *SMPpt; 104178354Ssam 105120481Ssamstruct pcb stoppcbs[MAXCPU]; 106116742Ssam 107138568Ssam/* Variables needed for SMP tlb shootdown. */ 108116742Ssamvm_offset_t smp_tlb_addr1; 109195379Ssamvm_offset_t smp_tlb_addr2; 110195379Ssamvolatile int smp_tlb_wait; 111195379Ssam 112178354Ssamextern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); 113178354Ssam 114178354Ssam#ifdef STOP_NMI 115178354Ssamvolatile cpumask_t ipi_nmi_pending; 116178354Ssam 117116742Ssamstatic void ipi_nmi_selected(u_int32_t cpus); 118138568Ssam#endif 119138568Ssam 120138568Ssam/* 121178354Ssam * Local data and functions. 122178354Ssam */ 123138568Ssam 124170530Ssam#ifdef STOP_NMI 125178354Ssam/* 126138568Ssam * Provide an alternate method of stopping other CPUs. If another CPU has 127178354Ssam * disabled interrupts the conventional STOP IPI will be blocked. This 128178354Ssam * NMI-based stop should get through in that case. 129178354Ssam */ 130178354Ssamstatic int stop_cpus_with_nmi = 1; 131178354SsamSYSCTL_INT(_debug, OID_AUTO, stop_cpus_with_nmi, CTLTYPE_INT | CTLFLAG_RW, 132178354Ssam &stop_cpus_with_nmi, 0, ""); 133138568SsamTUNABLE_INT("debug.stop_cpus_with_nmi", &stop_cpus_with_nmi); 134178354Ssam#else 135178354Ssam#define stop_cpus_with_nmi 0 136178354Ssam#endif 137170530Ssam 138178354Ssamstatic u_int logical_cpus; 139178354Ssam 140195379Ssam/* used to hold the AP's until we are ready to release them */ 141178354Ssamstatic struct mtx ap_boot_mtx; 142172062Ssam 143178354Ssam/* Set to 1 once we're ready to let the APs out of the pen. */ 144178354Ssamstatic volatile int aps_ready = 0; 145178354Ssam 146178354Ssam/* 147178354Ssam * Store data from cpu_add() until later in the boot when we actually setup 148178354Ssam * the APs. 149178354Ssam */ 150178354Ssamstruct cpu_info { 151178354Ssam int cpu_present:1; 152178354Ssam int cpu_bsp:1; 153178354Ssam int cpu_disabled:1; 154184277Ssam} static cpu_info[MAXCPU]; 155184277Ssamstatic int cpu_apic_ids[MAXCPU]; 156184277Ssam 157184277Ssam/* Holds pending bitmap based IPIs per CPU */ 158184277Ssamstatic volatile u_int cpu_ipi_pending[MAXCPU]; 159148863Ssam 160148863Ssamstatic u_int boot_address; 161148863Ssam 162178354Ssamstatic void set_interrupt_apic_ids(void); 163148863Ssamstatic int start_all_aps(void); 164178354Ssamstatic int start_ap(int apic_id); 165178354Ssamstatic void release_aps(void *dummy); 166178354Ssam 167178354Ssamstatic int hlt_logical_cpus; 168178354Ssamstatic u_int hyperthreading_cpus; 169178354Ssamstatic cpumask_t hyperthreading_cpus_mask; 170178354Ssamstatic int hyperthreading_allowed = 1; 171178354Ssamstatic struct sysctl_ctx_list logical_cpu_clist; 172186302Ssamstatic u_int bootMP_size; 173184210Sdes 174178354Ssamstatic void 175178354Ssammem_range_AP_init(void) 176178354Ssam{ 177178354Ssam if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP) 178178354Ssam mem_range_softc.mr_op->initAP(&mem_range_softc); 179178354Ssam} 180178354Ssam 181138568Ssamvoid 182138568Ssammp_topology(void) 183178354Ssam{ 184118887Ssam struct cpu_group *group; 185178354Ssam u_int regs[4]; 186116742Ssam int logical_cpus; 187116742Ssam int apic_id; 188116742Ssam int groups; 189178354Ssam int cpu; 190116742Ssam 191178354Ssam /* Build the smp_topology map. */ 192116742Ssam /* Nothing to do if there is no HTT support. */ 193178354Ssam if ((cpu_feature & CPUID_HTT) == 0) 194178354Ssam return; 195178354Ssam logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 196178354Ssam if (logical_cpus <= 1) 197138568Ssam return; 198178354Ssam /* Nothing to do if reported cores are physical cores. */ 199186302Ssam if (strcmp(cpu_vendor, "GenuineIntel") == 0 && cpu_high >= 4) { 200178354Ssam cpuid_count(4, 0, regs); 201138568Ssam if ((regs[0] & 0x1f) != 0 && 202116742Ssam logical_cpus <= ((regs[0] >> 26) & 0x3f) + 1) 203116742Ssam return; 204138568Ssam } 205138568Ssam group = &mp_groups[0]; 206138568Ssam groups = 1; 207138568Ssam for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { 208138568Ssam if (!cpu_info[apic_id].cpu_present) 209148302Ssam continue; 210138568Ssam /* 211184277Ssam * If the current group has members and we're not a logical 212184277Ssam * cpu, create a new group. 213138568Ssam */ 214184277Ssam if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) { 215172062Ssam group++; 216184277Ssam groups++; 217184277Ssam } 218184277Ssam group->cg_count++; 219138568Ssam group->cg_mask |= 1 << cpu; 220138568Ssam cpu++; 221138568Ssam } 222148302Ssam 223138568Ssam mp_top.ct_count = groups; 224184277Ssam mp_top.ct_group = mp_groups; 225184277Ssam smp_topology = &mp_top; 226138568Ssam} 227184277Ssam 228172062Ssam/* 229172062Ssam * Calculate usable address in base memory for AP trampoline code. 230184277Ssam */ 231184277Ssamu_int 232184277Ssammp_bootaddress(u_int basemem) 233184277Ssam{ 234138568Ssam 235138568Ssam bootMP_size = mptramp_end - mptramp_start; 236116742Ssam boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ 237183251Ssam if (((basemem * 1024) - boot_address) < bootMP_size) 238183251Ssam boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ 239193966Ssam /* 3 levels of page table pages */ 240193966Ssam mptramp_pagetables = boot_address - (PAGE_SIZE * 3); 241183251Ssam 242183251Ssam return mptramp_pagetables; 243187898Ssam} 244183251Ssam 245183251Ssamvoid 246183251Ssamcpu_add(u_int apic_id, char boot_cpu) 247187898Ssam{ 248183251Ssam 249187898Ssam if (apic_id >= MAXCPU) { 250183251Ssam printf("SMP: CPU %d exceeds maximum CPU %d, ignoring\n", 251187898Ssam apic_id, MAXCPU - 1); 252187898Ssam return; 253188782Ssam } 254188782Ssam KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 255188782Ssam apic_id)); 256188782Ssam cpu_info[apic_id].cpu_present = 1; 257191015Ssam if (boot_cpu) { 258187898Ssam KASSERT(boot_cpu_id == -1, 259187898Ssam ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 260191015Ssam boot_cpu_id)); 261191015Ssam boot_cpu_id = apic_id; 262187898Ssam cpu_info[apic_id].cpu_bsp = 1; 263183251Ssam } 264187898Ssam mp_ncpus++; 265183251Ssam if (apic_id > mp_maxid) 266187898Ssam mp_maxid = apic_id; 267183251Ssam if (bootverbose) 268183251Ssam printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 269183251Ssam "AP"); 270138568Ssam 271138568Ssam} 272178354Ssam 273138568Ssamvoid 274178354Ssamcpu_mp_setmaxid(void) 275178354Ssam{ 276178354Ssam 277138568Ssam /* 278178354Ssam * mp_maxid should be already set by calls to cpu_add(). 279183251Ssam * Just sanity check its value here. 280183251Ssam */ 281170530Ssam if (mp_ncpus == 0) 282178354Ssam KASSERT(mp_maxid == 0, 283178354Ssam ("%s: mp_ncpus is zero, but mp_maxid is not", __func__)); 284138568Ssam else if (mp_ncpus == 1) 285183251Ssam mp_maxid = 0; 286170530Ssam else 287170530Ssam KASSERT(mp_maxid >= mp_ncpus - 1, 288170530Ssam ("%s: counters out of sync: max %d, count %d", __func__, 289170530Ssam mp_maxid, mp_ncpus)); 290170530Ssam 291170530Ssam} 292170530Ssam 293170530Ssamint 294170530Ssamcpu_mp_probe(void) 295183251Ssam{ 296183251Ssam 297183251Ssam /* 298183251Ssam * Always record BSP in CPU map so that the mbuf init code works 299183251Ssam * correctly. 300183251Ssam */ 301193655Ssam all_cpus = 1; 302183251Ssam if (mp_ncpus == 0) { 303183251Ssam /* 304193655Ssam * No CPUs were found, so this must be a UP system. Setup 305183251Ssam * the variables to represent a system with a single CPU 306183251Ssam * with an id of 0. 307183251Ssam */ 308183251Ssam mp_ncpus = 1; 309170530Ssam return (0); 310183251Ssam } 311165569Ssam 312138568Ssam /* At least one CPU was found. */ 313138568Ssam if (mp_ncpus == 1) { 314141658Ssam /* 315141658Ssam * One CPU was found, so this must be a UP system with 316141658Ssam * an I/O APIC. 317141658Ssam */ 318141658Ssam mp_maxid = 0; 319141658Ssam return (0); 320141658Ssam } 321141658Ssam 322178354Ssam /* At least two CPUs were found. */ 323141658Ssam return (1); 324141658Ssam} 325116742Ssam 326178354Ssam/* 327116742Ssam * Initialize the IPI handlers and start up the AP's. 328178354Ssam */ 329116742Ssamvoid 330116742Ssamcpu_mp_start(void) 331178354Ssam{ 332195618Srpaulo int i; 333195618Srpaulo u_int threads_per_cache, p[4]; 334178354Ssam 335138568Ssam /* Initialize the logical ID to APIC ID table. */ 336178354Ssam for (i = 0; i < MAXCPU; i++) { 337140753Ssam cpu_apic_ids[i] = -1; 338140753Ssam cpu_ipi_pending[i] = 0; 339138568Ssam } 340138568Ssam 341178354Ssam /* Install an inter-CPU IPI for TLB invalidation */ 342178354Ssam setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); 343178354Ssam setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); 344178354Ssam setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); 345178354Ssam 346148843Ssam /* Install an inter-CPU IPI for cache invalidation. */ 347178354Ssam setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0); 348116742Ssam 349116742Ssam /* Install an inter-CPU IPI for all-CPU rendezvous */ 350116742Ssam setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); 351116742Ssam 352116742Ssam /* Install generic inter-CPU IPI handler */ 353178354Ssam setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), 354178354Ssam SDT_SYSIGT, SEL_KPL, 0); 355138568Ssam 356178354Ssam /* Install an inter-CPU IPI for CPU stop/restart */ 357178354Ssam setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); 358167282Ssam 359167282Ssam /* Set boot_cpu_id if needed. */ 360167282Ssam if (boot_cpu_id == -1) { 361167282Ssam boot_cpu_id = PCPU_GET(apic_id); 362167282Ssam cpu_info[boot_cpu_id].cpu_bsp = 1; 363178354Ssam } else 364178354Ssam KASSERT(boot_cpu_id == PCPU_GET(apic_id), 365178354Ssam ("BSP's APIC ID doesn't match boot_cpu_id")); 366153403Ssam cpu_apic_ids[0] = boot_cpu_id; 367186904Ssam 368186904Ssam /* Start each Application Processor */ 369186904Ssam start_all_aps(); 370153403Ssam 371195618Srpaulo /* Setup the initial logical CPUs info. */ 372195618Srpaulo logical_cpus = logical_cpus_mask = 0; 373195618Srpaulo if (cpu_feature & CPUID_HTT) 374195618Srpaulo logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 375195618Srpaulo 376138568Ssam /* 377138568Ssam * Work out if hyperthreading is *really* enabled. This 378138568Ssam * is made really ugly by the fact that processors lie: Dual 379138568Ssam * core processors claim to be hyperthreaded even when they're 380178354Ssam * not, presumably because they want to be treated the same 381178354Ssam * way as HTT with respect to per-cpu software licensing. 382178354Ssam * At the time of writing (May 12, 2005) the only hyperthreaded 383178354Ssam * cpus are from Intel, and Intel's dual-core processors can be 384178354Ssam * identified via the "deterministic cache parameters" cpuid 385170530Ssam * calls. 386178354Ssam */ 387170530Ssam /* 388138568Ssam * First determine if this is an Intel processor which claims 389178354Ssam * to have hyperthreading support. 390138568Ssam */ 391170530Ssam if ((cpu_feature & CPUID_HTT) && 392170530Ssam (strcmp(cpu_vendor, "GenuineIntel") == 0)) { 393170530Ssam /* 394178354Ssam * If the "deterministic cache parameters" cpuid calls 395170530Ssam * are available, use them. 396178354Ssam */ 397178354Ssam if (cpu_high >= 4) { 398178354Ssam /* Ask the processor about the L1 cache. */ 399178354Ssam for (i = 0; i < 1; i++) { 400178354Ssam cpuid_count(4, i, p); 401178354Ssam threads_per_cache = ((p[0] & 0x3ffc000) >> 14) + 1; 402178354Ssam if (hyperthreading_cpus < threads_per_cache) 403178354Ssam hyperthreading_cpus = threads_per_cache; 404178354Ssam if ((p[0] & 0x1f) == 0) 405178354Ssam break; 406170530Ssam } 407170530Ssam } 408170530Ssam 409170530Ssam /* 410178354Ssam * If the deterministic cache parameters are not 411170530Ssam * available, or if no caches were reported to exist, 412170530Ssam * just accept what the HTT flag indicated. 413170530Ssam */ 414138568Ssam if (hyperthreading_cpus == 0) 415170530Ssam hyperthreading_cpus = logical_cpus; 416116742Ssam } 417116742Ssam 418170530Ssam set_interrupt_apic_ids(); 419170530Ssam} 420170530Ssam 421170530Ssam 422170530Ssam/* 423170530Ssam * Print various information about the SMP system hardware and setup. 424138568Ssam */ 425178354Ssamvoid 426138568Ssamcpu_mp_announce(void) 427178354Ssam{ 428138568Ssam int i, x; 429138568Ssam 430178354Ssam /* List CPUs */ 431178354Ssam printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 432170530Ssam for (i = 1, x = 0; x < MAXCPU; x++) { 433140753Ssam if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 434178354Ssam continue; 435138568Ssam if (cpu_info[x].cpu_disabled) 436178354Ssam printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 437178354Ssam else { 438141658Ssam KASSERT(i < mp_ncpus, 439141658Ssam ("mp_ncpus and actual cpus are out of whack")); 440148843Ssam printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 441138568Ssam } 442178354Ssam } 443178354Ssam} 444138568Ssam 445138568Ssam/* 446170530Ssam * AP CPU's call this to initialize themselves. 447170530Ssam */ 448170530Ssamvoid 449170530Ssaminit_secondary(void) 450170530Ssam{ 451148432Ssam struct pcpu *pc; 452170530Ssam u_int64_t msr, cr0; 453170530Ssam int cpu, gsel_tss; 454170530Ssam 455170530Ssam /* Set by the startup code for us to use */ 456170530Ssam cpu = bootAP; 457170530Ssam 458170530Ssam /* Init tss */ 459170530Ssam common_tss[cpu] = common_tss[0]; 460170530Ssam common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */ 461170530Ssam common_tss[cpu].tss_iobase = sizeof(struct amd64tss); 462170530Ssam common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; 463127767Ssam 464178354Ssam gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; 465127767Ssam ssdtosyssd(&gdt_segs[GPROC0_SEL], 466178354Ssam (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); 467170530Ssam 468170530Ssam lgdt(&r_gdt); /* does magic intra-segment return */ 469170530Ssam 470170530Ssam /* Get per-cpu data */ 471178354Ssam pc = &__pcpu[cpu]; 472170530Ssam 473170530Ssam /* prime data page for it to use */ 474170530Ssam pcpu_init(pc, cpu, sizeof(struct pcpu)); 475170530Ssam pc->pc_apic_id = cpu_apic_ids[cpu]; 476170530Ssam pc->pc_prvspace = pc; 477170530Ssam pc->pc_curthread = 0; 478178354Ssam pc->pc_tssp = &common_tss[cpu]; 479170530Ssam pc->pc_rsp0 = 0; 480170530Ssam 481170530Ssam wrmsr(MSR_FSBASE, 0); /* User value */ 482170530Ssam wrmsr(MSR_GSBASE, (u_int64_t)pc); 483170530Ssam wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ 484170530Ssam 485170530Ssam lidt(&r_idt); 486170530Ssam 487170530Ssam gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 488170530Ssam ltr(gsel_tss); 489170530Ssam 490178354Ssam /* 491178354Ssam * Set to a known state: 492170530Ssam * Set by mpboot.s: CR0_PG, CR0_PE 493178354Ssam * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 494178354Ssam */ 495170530Ssam cr0 = rcr0(); 496170530Ssam cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); 497170530Ssam load_cr0(cr0); 498170530Ssam 499170530Ssam /* Set up the fast syscall stuff */ 500170530Ssam msr = rdmsr(MSR_EFER) | EFER_SCE; 501170530Ssam wrmsr(MSR_EFER, msr); 502170530Ssam wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); 503170530Ssam wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); 504178354Ssam msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 505170530Ssam ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); 506178354Ssam wrmsr(MSR_STAR, msr); 507170530Ssam wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); 508127767Ssam 509127767Ssam /* Disable local APIC just to be sure. */ 510127767Ssam lapic_disable(); 511127767Ssam 512127767Ssam /* signal our startup to the BSP. */ 513178354Ssam mp_naps++; 514127767Ssam 515127767Ssam /* Spin until the BSP releases the AP's. */ 516127767Ssam while (!aps_ready) 517127767Ssam ia32_pause(); 518127767Ssam 519127767Ssam /* Initialize the PAT MSR. */ 520178354Ssam pmap_init_pat(); 521127767Ssam 522127767Ssam /* set up CPU registers and state */ 523127767Ssam cpu_setregs(); 524127767Ssam 525127767Ssam /* set up SSE/NX registers */ 526127767Ssam initializecpu(); 527127767Ssam 528167442Ssam /* set up FPU state on the AP */ 529165887Ssam fpuinit(); 530127767Ssam 531127767Ssam /* A quick check from sanity claus */ 532178354Ssam if (PCPU_GET(apic_id) != lapic_id()) { 533178354Ssam printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 534127767Ssam printf("SMP: actual apic_id = %d\n", lapic_id()); 535178354Ssam printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 536178354Ssam panic("cpuid mismatch! boom!!"); 537127767Ssam } 538127767Ssam 539170530Ssam /* Initialize curthread. */ 540170530Ssam KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 541170530Ssam PCPU_SET(curthread, PCPU_GET(idlethread)); 542170530Ssam 543170530Ssam mtx_lock_spin(&ap_boot_mtx); 544170530Ssam 545170530Ssam /* Init local apic for irq's */ 546170530Ssam lapic_setup(1); 547170530Ssam 548170530Ssam /* Set memory range attributes for this CPU to match the BSP */ 549170530Ssam mem_range_AP_init(); 550170530Ssam 551170530Ssam smp_cpus++; 552170530Ssam 553170530Ssam CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 554170530Ssam printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 555138568Ssam 556170530Ssam /* Determine if we are a logical CPU. */ 557138568Ssam if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 558138568Ssam logical_cpus_mask |= PCPU_GET(cpumask); 559138568Ssam 560138568Ssam /* Determine if we are a hyperthread. */ 561138568Ssam if (hyperthreading_cpus > 1 && 562138568Ssam PCPU_GET(apic_id) % hyperthreading_cpus != 0) 563138568Ssam hyperthreading_cpus_mask |= PCPU_GET(cpumask); 564138568Ssam 565138568Ssam /* Build our map of 'other' CPUs. */ 566138568Ssam PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 567138568Ssam 568138568Ssam if (bootverbose) 569138568Ssam lapic_dump("AP"); 570138568Ssam 571148306Ssam if (smp_cpus == mp_ncpus) { 572138568Ssam /* enable IPI's, tlb shootdown, freezes etc */ 573178354Ssam atomic_store_rel_int(&smp_started, 1); 574178354Ssam smp_active = 1; /* historic */ 575148306Ssam } 576178354Ssam 577138568Ssam /* 578178354Ssam * Enable global pages TLB extension 579178354Ssam * This also implicitly flushes the TLB 580138568Ssam */ 581138568Ssam 582138568Ssam load_cr4(rcr4() | CR4_PGE); 583178354Ssam 584170530Ssam mtx_unlock_spin(&ap_boot_mtx); 585178354Ssam 586138568Ssam /* wait until all the AP's are up */ 587170530Ssam while (smp_started == 0) 588178354Ssam ia32_pause(); 589178354Ssam 590170530Ssam /* ok, now grab sched_lock and enter the scheduler */ 591178354Ssam mtx_lock_spin(&sched_lock); 592138568Ssam 593138568Ssam /* 594178354Ssam * Correct spinlock nesting. The idle thread context that we are 595138568Ssam * borrowing was created so that it would start out with a single 596138568Ssam * spin lock (sched_lock) held in fork_trampoline(). Since we've 597138568Ssam * explicitly acquired locks in this function, the nesting count 598138568Ssam * is now 2 rather than 1. Since we are nested, calling 599138568Ssam * spinlock_exit() will simply adjust the counts without allowing 600138568Ssam * spin lock using code to interrupt us. 601170530Ssam */ 602138568Ssam spinlock_exit(); 603138568Ssam KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); 604138568Ssam 605178354Ssam PCPU_SET(switchtime, cpu_ticks()); 606178354Ssam PCPU_SET(switchticks, ticks); 607173273Ssam 608178354Ssam cpu_throw(NULL, choosethread()); /* doesn't return */ 609178354Ssam 610178354Ssam panic("scheduler returned us to %s", __func__); 611178354Ssam /* NOTREACHED */ 612178354Ssam} 613178354Ssam 614178354Ssam/******************************************************************* 615178354Ssam * local functions and data 616178354Ssam */ 617178354Ssam 618178354Ssam/* 619178354Ssam * We tell the I/O APIC code about all the CPUs we want to receive 620178354Ssam * interrupts. If we don't want certain CPUs to receive IRQs we 621178354Ssam * can simply not tell the I/O APIC code about them in this function. 622178354Ssam * We also do not tell it about the BSP since it tells itself about 623178354Ssam * the BSP internally to work with UP kernels and on UP machines. 624178354Ssam */ 625195618Srpaulostatic void 626178354Ssamset_interrupt_apic_ids(void) 627178354Ssam{ 628178354Ssam u_int apic_id; 629178354Ssam 630178354Ssam for (apic_id = 0; apic_id < MAXCPU; apic_id++) { 631178354Ssam if (!cpu_info[apic_id].cpu_present) 632178354Ssam continue; 633178354Ssam if (cpu_info[apic_id].cpu_bsp) 634178354Ssam continue; 635178354Ssam if (cpu_info[apic_id].cpu_disabled) 636178354Ssam continue; 637184303Ssam 638184303Ssam /* Don't let hyperthreads service interrupts. */ 639178354Ssam if (hyperthreading_cpus > 1 && 640173273Ssam apic_id % hyperthreading_cpus != 0) 641178354Ssam continue; 642173273Ssam 643178354Ssam intr_add_cpu(apic_id); 644178354Ssam } 645178354Ssam} 646178354Ssam 647178354Ssam/* 648178354Ssam * start each AP in our list 649190532Ssam */ 650191746Sthompsastatic int 651178354Ssamstart_all_aps(void) 652192468Ssam{ 653191746Sthompsa vm_offset_t va = boot_address + KERNBASE; 654178354Ssam u_int64_t *pt4, *pt3, *pt2; 655178354Ssam u_int32_t mpbioswarmvec; 656178354Ssam int apic_id, cpu, i; 657178354Ssam u_char mpbiosreason; 658191746Sthompsa 659178354Ssam mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 660178354Ssam 661178354Ssam /* install the AP 1st level boot code */ 662191746Sthompsa pmap_kenter(va, boot_address); 663178354Ssam pmap_invalidate_page(kernel_pmap, va); 664178354Ssam bcopy(mptramp_start, (void *)va, bootMP_size); 665178354Ssam 666178354Ssam /* Locate the page tables, they'll be below the trampoline */ 667178354Ssam pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE); 668178354Ssam pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); 669178354Ssam pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); 670178354Ssam 671193655Ssam /* Create the initial 1GB replicated page tables */ 672178354Ssam for (i = 0; i < 512; i++) { 673178354Ssam /* Each slot of the level 4 pages points to the same level 3 page */ 674178354Ssam pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); 675178354Ssam pt4[i] |= PG_V | PG_RW | PG_U; 676173273Ssam 677190532Ssam /* Each slot of the level 3 pages points to the same level 2 page */ 678173273Ssam pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); 679173273Ssam pt3[i] |= PG_V | PG_RW | PG_U; 680173273Ssam 681191746Sthompsa /* The level 2 page slots are mapped with 2MB pages for 1GB. */ 682191746Sthompsa pt2[i] = i * (2 * 1024 * 1024); 683191746Sthompsa pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; 684191746Sthompsa } 685191746Sthompsa 686191746Sthompsa /* save the current value of the warm-start vector */ 687191746Sthompsa mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 688191746Sthompsa outb(CMOS_REG, BIOS_RESET); 689191746Sthompsa mpbiosreason = inb(CMOS_DATA); 690191746Sthompsa 691191746Sthompsa /* setup a vector to our boot code */ 692138568Ssam *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 693138568Ssam *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 694138568Ssam outb(CMOS_REG, BIOS_RESET); 695170530Ssam outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ 696170530Ssam 697138568Ssam /* start each AP */ 698178354Ssam for (cpu = 0, apic_id = 0; apic_id < MAXCPU; apic_id++) { 699170530Ssam 700138568Ssam /* Ignore non-existent CPUs and the BSP. */ 701170530Ssam if (!cpu_info[apic_id].cpu_present || 702138568Ssam cpu_info[apic_id].cpu_bsp) 703138568Ssam continue; 704138568Ssam 705138568Ssam /* Don't use this CPU if it has been disabled by a tunable. */ 706178354Ssam if (resource_disabled("lapic", apic_id)) { 707170530Ssam cpu_info[apic_id].cpu_disabled = 1; 708170530Ssam mp_ncpus--; 709170530Ssam continue; 710170530Ssam } 711170530Ssam 712178354Ssam cpu++; 713170530Ssam 714178354Ssam /* save APIC ID for this logical ID */ 715153352Ssam cpu_apic_ids[cpu] = apic_id; 716153352Ssam 717188541Ssam /* allocate and set up an idle stack data page */ 718188541Ssam bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); 719178354Ssam doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); 720153352Ssam 721165887Ssam bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; 722138568Ssam bootAP = cpu; 723165887Ssam 724165887Ssam /* attempt to start the Application Processor */ 725165887Ssam if (!start_ap(apic_id)) { 726178354Ssam /* restore the warmstart vector */ 727167442Ssam *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 728165887Ssam panic("AP #%d (PHY# %d) failed!", cpu, apic_id); 729178354Ssam } 730165887Ssam 731138568Ssam all_cpus |= (1 << cpu); /* record AP in CPU map */ 732138568Ssam } 733138568Ssam 734138568Ssam /* build our map of 'other' CPUs */ 735138568Ssam PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 736178354Ssam 737140753Ssam /* restore the warmstart vector */ 738178354Ssam *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 739170530Ssam 740170530Ssam outb(CMOS_REG, BIOS_RESET); 741178354Ssam outb(CMOS_DATA, mpbiosreason); 742170530Ssam 743170530Ssam /* number of APs actually started */ 744170530Ssam return mp_naps; 745170530Ssam} 746170530Ssam 747170530Ssam 748170530Ssam/* 749178354Ssam * This function starts the AP (application processor) identified 750170530Ssam * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 751170530Ssam * to accomplish this. This is necessary because of the nuances 752170530Ssam * of the different hardware we might encounter. It isn't pretty, 753178354Ssam * but it seems to work. 754138568Ssam */ 755116742Ssamstatic int 756116742Ssamstart_ap(int apic_id) 757170530Ssam{ 758184274Ssam int vector, ms; 759170530Ssam int cpus; 760170530Ssam 761178354Ssam /* calculate the vector */ 762170530Ssam vector = (boot_address >> 12) & 0xff; 763170530Ssam 764178354Ssam /* used as a watchpoint to signal AP startup */ 765170530Ssam cpus = mp_naps; 766170530Ssam 767170530Ssam /* 768170530Ssam * first we do an INIT/RESET IPI this INIT IPI might be run, reseting 769170530Ssam * and running the target CPU. OR this INIT IPI might be latched (P5 770170530Ssam * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be 771170530Ssam * ignored. 772170530Ssam */ 773170530Ssam 774170530Ssam /* do an INIT IPI: assert RESET */ 775170530Ssam lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 776170530Ssam APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); 777170530Ssam 778170530Ssam /* wait for pending status end */ 779184274Ssam lapic_ipi_wait(-1); 780170530Ssam 781170530Ssam /* do an INIT IPI: deassert RESET */ 782170530Ssam lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | 783170530Ssam APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); 784178354Ssam 785170530Ssam /* wait for pending status end */ 786186870Ssam DELAY(10000); /* wait ~10mS */ 787186870Ssam lapic_ipi_wait(-1); 788186870Ssam 789186870Ssam /* 790178354Ssam * next we do a STARTUP IPI: the previous INIT IPI might still be 791178354Ssam * latched, (P5 bug) this 1st STARTUP would then terminate 792178354Ssam * immediately, and the previously started INIT IPI would continue. OR 793190391Ssam * the previous INIT IPI has already run. and this STARTUP IPI will 794178354Ssam * run. OR the previous INIT IPI was ignored. and this STARTUP IPI 795178354Ssam * will run. 796190391Ssam */ 797178354Ssam 798178354Ssam /* do a STARTUP IPI */ 799178354Ssam lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 800178354Ssam APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 801195618Srpaulo vector, apic_id); 802195618Srpaulo lapic_ipi_wait(-1); 803195618Srpaulo DELAY(200); /* wait ~200uS */ 804195618Srpaulo 805186904Ssam /* 806186904Ssam * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF 807186904Ssam * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR 808186904Ssam * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is 809173864Ssam * recognized after hardware RESET or INIT IPI. 810170530Ssam */ 811178354Ssam 812178354Ssam lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | 813170530Ssam APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | 814170530Ssam vector, apic_id); 815170530Ssam lapic_ipi_wait(-1); 816170530Ssam DELAY(200); /* wait ~200uS */ 817184279Ssam 818184279Ssam /* Wait up to 5 seconds for it to start. */ 819193966Ssam for (ms = 0; ms < 5000; ms++) { 820170530Ssam if (mp_naps > cpus) 821170530Ssam return 1; /* return SUCCESS */ 822170530Ssam DELAY(1000); 823170530Ssam } 824138568Ssam return 0; /* return FAILURE */ 825138568Ssam} 826138568Ssam 827138568Ssam/* 828138568Ssam * Flush the TLB on all other CPU's 829178354Ssam */ 830138568Ssamstatic void 831178354Ssamsmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 832178354Ssam{ 833138568Ssam u_int ncpu; 834178354Ssam 835138568Ssam ncpu = mp_ncpus - 1; /* does not shootdown self */ 836138568Ssam if (ncpu < 1) 837178354Ssam return; /* no other cpus */ 838178354Ssam mtx_assert(&smp_ipi_mtx, MA_OWNED); 839178354Ssam smp_tlb_addr1 = addr1; 840178354Ssam smp_tlb_addr2 = addr2; 841178354Ssam atomic_store_rel_int(&smp_tlb_wait, 0); 842178354Ssam ipi_all_but_self(vector); 843178354Ssam while (smp_tlb_wait < ncpu) 844178354Ssam ia32_pause(); 845178354Ssam} 846178354Ssam 847178354Ssamstatic void 848178354Ssamsmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 849178354Ssam{ 850178354Ssam int ncpu, othercpus; 851116742Ssam 852179643Ssam othercpus = mp_ncpus - 1; 853116742Ssam if (mask == (u_int)-1) { 854127768Ssam ncpu = othercpus; 855138568Ssam if (ncpu < 1) 856186302Ssam return; 857127768Ssam } else { 858127768Ssam mask &= ~PCPU_GET(cpumask); 859116742Ssam if (mask == 0) 860116742Ssam return; 861138568Ssam ncpu = bitcount32(mask); 862178354Ssam if (ncpu > othercpus) { 863178354Ssam /* XXX this should be a panic offence */ 864178354Ssam printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 865178354Ssam ncpu, othercpus); 866178354Ssam ncpu = othercpus; 867178354Ssam } 868178354Ssam /* XXX should be a panic, implied by mask == 0 above */ 869178354Ssam if (ncpu < 1) 870178354Ssam return; 871178354Ssam } 872178354Ssam mtx_assert(&smp_ipi_mtx, MA_OWNED); 873178354Ssam smp_tlb_addr1 = addr1; 874186302Ssam smp_tlb_addr2 = addr2; 875178354Ssam atomic_store_rel_int(&smp_tlb_wait, 0); 876178354Ssam if (mask == (u_int)-1) 877178354Ssam ipi_all_but_self(vector); 878186302Ssam else 879178354Ssam ipi_selected(mask, vector); 880178354Ssam while (smp_tlb_wait < ncpu) 881178354Ssam ia32_pause(); 882178354Ssam} 883178354Ssam 884178354Ssamvoid 885178354Ssamsmp_cache_flush(void) 886178354Ssam{ 887178354Ssam 888178354Ssam if (smp_started) 889178354Ssam smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 890178354Ssam} 891178354Ssam 892178354Ssamvoid 893178354Ssamsmp_invltlb(void) 894178354Ssam{ 895178354Ssam 896178354Ssam if (smp_started) { 897186302Ssam smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 898178354Ssam } 899178354Ssam} 900178354Ssam 901178354Ssamvoid 902178354Ssamsmp_invlpg(vm_offset_t addr) 903178354Ssam{ 904178354Ssam 905178354Ssam if (smp_started) 906178354Ssam smp_tlb_shootdown(IPI_INVLPG, addr, 0); 907178354Ssam} 908178354Ssam 909178354Ssamvoid 910178354Ssamsmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 911178354Ssam{ 912178354Ssam 913178354Ssam if (smp_started) { 914178354Ssam smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 915178354Ssam } 916178354Ssam} 917178354Ssam 918178354Ssamvoid 919178354Ssamsmp_masked_invltlb(u_int mask) 920190391Ssam{ 921178354Ssam 922178354Ssam if (smp_started) { 923190391Ssam smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 924186904Ssam } 925186904Ssam} 926186904Ssam 927186904Ssamvoid 928178354Ssamsmp_masked_invlpg(u_int mask, vm_offset_t addr) 929178354Ssam{ 930178354Ssam 931178354Ssam if (smp_started) { 932178354Ssam smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 933178354Ssam } 934178354Ssam} 935195618Srpaulo 936195618Srpaulovoid 937195618Srpaulosmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 938195618Srpaulo{ 939195618Srpaulo 940178354Ssam if (smp_started) { 941178354Ssam smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 942178354Ssam } 943178354Ssam} 944178354Ssam 945178354Ssamvoid 946178354Ssamipi_bitmap_handler(struct trapframe frame) 947138568Ssam{ 948138568Ssam int cpu = PCPU_GET(cpuid); 949138568Ssam u_int ipi_bitmap; 950138568Ssam 951138568Ssam ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 952116742Ssam 953138568Ssam if (ipi_bitmap & (1 << IPI_PREEMPT)) { 954116742Ssam struct thread *running_thread = curthread; 955138568Ssam mtx_lock_spin(&sched_lock); 956178354Ssam if (running_thread->td_critnest > 1) 957195379Ssam running_thread->td_owepreempt = 1; 958170530Ssam else 959138568Ssam mi_switch(SW_INVOL | SW_PREEMPT, NULL); 960138568Ssam mtx_unlock_spin(&sched_lock); 961138568Ssam } 962178354Ssam 963178354Ssam /* Nothing to do for AST */ 964138568Ssam} 965178354Ssam 966178354Ssam/* 967138568Ssam * send an IPI to a set of cpus. 968147788Ssam */ 969173273Ssamvoid 970173273Ssamipi_selected(u_int32_t cpus, u_int ipi) 971173273Ssam{ 972173273Ssam int cpu; 973190579Ssam u_int bitmap = 0; 974190579Ssam u_int old_pending; 975190579Ssam u_int new_pending; 976190579Ssam 977195618Srpaulo if (IPI_IS_BITMAPED(ipi)) { 978173273Ssam bitmap = 1 << ipi; 979195618Srpaulo ipi = IPI_BITMAP_VECTOR; 980195618Srpaulo } 981195618Srpaulo 982195618Srpaulo#ifdef STOP_NMI 983195618Srpaulo if (ipi == IPI_STOP && stop_cpus_with_nmi) { 984195618Srpaulo ipi_nmi_selected(cpus); 985195379Ssam return; 986195379Ssam } 987195379Ssam#endif 988195379Ssam CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 989195379Ssam while ((cpu = ffs(cpus)) != 0) { 990147788Ssam cpu--; 991147788Ssam cpus &= ~(1 << cpu); 992147788Ssam 993147788Ssam KASSERT(cpu_apic_ids[cpu] != -1, 994186099Ssam ("IPI to non-existent CPU %d", cpu)); 995186099Ssam 996147788Ssam if (bitmap) { 997186099Ssam do { 998138568Ssam old_pending = cpu_ipi_pending[cpu]; 999138568Ssam new_pending = old_pending | bitmap; 1000138568Ssam } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 1001138568Ssam 1002184288Ssam if (old_pending) 1003178354Ssam continue; 1004138568Ssam } 1005138568Ssam 1006138568Ssam lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); 1007186302Ssam } 1008138568Ssam 1009138568Ssam} 1010138568Ssam 1011138568Ssam/* 1012138568Ssam * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 1013138568Ssam */ 1014138568Ssamvoid 1015138568Ssamipi_all(u_int ipi) 1016138568Ssam{ 1017138568Ssam 1018138568Ssam if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1019138568Ssam ipi_selected(all_cpus, ipi); 1020138568Ssam return; 1021138568Ssam } 1022138568Ssam CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1023138568Ssam lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL); 1024138568Ssam} 1025138568Ssam 1026138568Ssam/* 1027148863Ssam * send an IPI to all CPUs EXCEPT myself 1028148863Ssam */ 1029148863Ssamvoid 1030148863Ssamipi_all_but_self(u_int ipi) 1031138568Ssam{ 1032116742Ssam 1033116742Ssam if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1034116742Ssam ipi_selected(PCPU_GET(other_cpus), ipi); 1035138568Ssam return; 1036116742Ssam } 1037138568Ssam CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1038138568Ssam lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); 1039206358Srpaulo} 1040138568Ssam 1041178354Ssam/* 1042184288Ssam * send an IPI to myself 1043186302Ssam */ 1044116742Ssamvoid 1045116742Ssamipi_self(u_int ipi) 1046178354Ssam{ 1047178354Ssam 1048178354Ssam if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1049178354Ssam ipi_selected(PCPU_GET(cpumask), ipi); 1050184303Ssam return; 1051184303Ssam } 1052184303Ssam CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1053178354Ssam lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF); 1054178354Ssam} 1055178354Ssam 1056184288Ssam#ifdef STOP_NMI 1057184288Ssam/* 1058178354Ssam * send NMI IPI to selected CPUs 1059178354Ssam */ 1060178354Ssam 1061178354Ssam#define BEFORE_SPIN 1000000 1062178354Ssam 1063178354Ssamvoid 1064178354Ssamipi_nmi_selected(u_int32_t cpus) 1065178354Ssam{ 1066178354Ssam int cpu; 1067170530Ssam register_t icrlo; 1068138568Ssam 1069120104Ssam icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1070178354Ssam | APIC_TRIGMOD_EDGE; 1071178354Ssam 1072178354Ssam CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1073178354Ssam 1074178354Ssam atomic_set_int(&ipi_nmi_pending, cpus); 1075178354Ssam 1076178354Ssam while ((cpu = ffs(cpus)) != 0) { 1077120104Ssam cpu--; 1078120104Ssam cpus &= ~(1 << cpu); 1079116742Ssam 1080170530Ssam KASSERT(cpu_apic_ids[cpu] != -1, 1081170530Ssam ("IPI NMI to non-existent CPU %d", cpu)); 1082178354Ssam 1083170530Ssam /* Wait for an earlier IPI to finish. */ 1084170530Ssam if (!lapic_ipi_wait(BEFORE_SPIN)) 1085170530Ssam panic("ipi_nmi_selected: previous IPI has not cleared"); 1086170530Ssam 1087178354Ssam lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1088178354Ssam } 1089116742Ssam} 1090178354Ssam 1091178354Ssamint 1092178354Ssamipi_nmi_handler(void) 1093178354Ssam{ 1094178354Ssam int cpumask = PCPU_GET(cpumask); 1095178354Ssam 1096178354Ssam if (!(ipi_nmi_pending & cpumask)) 1097138568Ssam return 1; 1098178354Ssam 1099116742Ssam atomic_clear_int(&ipi_nmi_pending, cpumask); 1100116742Ssam cpustop_handler(); 1101179643Ssam return 0; 1102178354Ssam} 1103178354Ssam 1104178354Ssam#endif /* STOP_NMI */ 1105178354Ssam 1106178354Ssam/* 1107178354Ssam * Handle an IPI_STOP by saving our current context and spinning until we 1108140766Ssam * are resumed. 1109138568Ssam */ 1110138568Ssamvoid 1111116742Ssamcpustop_handler(void) 1112195618Srpaulo{ 1113138568Ssam int cpu = PCPU_GET(cpuid); 1114138568Ssam int cpumask = PCPU_GET(cpumask); 1115138568Ssam 1116138568Ssam savectx(&stoppcbs[cpu]); 1117183251Ssam 1118178354Ssam /* Indicate that we are stopped */ 1119178354Ssam atomic_set_int(&stopped_cpus, cpumask); 1120139528Ssam 1121139528Ssam /* Wait for restart */ 1122170530Ssam while (!(started_cpus & cpumask)) 1123184288Ssam ia32_pause(); 1124195618Srpaulo 1125195618Srpaulo atomic_clear_int(&started_cpus, cpumask); 1126195618Srpaulo atomic_clear_int(&stopped_cpus, cpumask); 1127195618Srpaulo 1128138568Ssam if (cpu == 0 && cpustop_restartfunc != NULL) { 1129138568Ssam cpustop_restartfunc(); 1130138568Ssam cpustop_restartfunc = NULL; 1131138568Ssam } 1132178354Ssam} 1133138568Ssam 1134138568Ssam/* 1135116742Ssam * This is called once the rest of the system is up and running and we're 1136184277Ssam * ready to let the AP's out of the pen. 1137184277Ssam */ 1138184277Ssamstatic void 1139116742Ssamrelease_aps(void *dummy __unused) 1140116742Ssam{ 1141116742Ssam 1142148777Ssam if (mp_ncpus == 1) 1143148777Ssam return; 1144148777Ssam mtx_lock_spin(&sched_lock); 1145148777Ssam atomic_store_rel_int(&aps_ready, 1); 1146148777Ssam while (smp_started == 0) 1147148777Ssam ia32_pause(); 1148116742Ssam mtx_unlock_spin(&sched_lock); 1149178354Ssam} 1150178354SsamSYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1151148777Ssam 1152178354Ssamstatic int 1153148777Ssamsysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) 1154148777Ssam{ 1155179643Ssam u_int mask; 1156148777Ssam int error; 1157183259Ssam 1158183259Ssam mask = hlt_cpus_mask; 1159178354Ssam error = sysctl_handle_int(oidp, &mask, 0, req); 1160148777Ssam if (error || !req->newptr) 1161148777Ssam return (error); 1162178354Ssam 1163178354Ssam if (logical_cpus_mask != 0 && 1164178354Ssam (mask & logical_cpus_mask) == logical_cpus_mask) 1165178354Ssam hlt_logical_cpus = 1; 1166148777Ssam else 1167183259Ssam hlt_logical_cpus = 0; 1168148777Ssam 1169148777Ssam if (! hyperthreading_allowed) 1170183259Ssam mask |= hyperthreading_cpus_mask; 1171178354Ssam 1172148777Ssam if ((mask & all_cpus) == all_cpus) 1173183259Ssam mask &= ~(1<<0); 1174148777Ssam hlt_cpus_mask = mask; 1175184288Ssam return (error); 1176148777Ssam} 1177148777SsamSYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, 1178178354Ssam 0, 0, sysctl_hlt_cpus, "IU", 1179148777Ssam "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); 1180148777Ssam 1181148777Ssamstatic int 1182148777Ssamsysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) 1183148777Ssam{ 1184178354Ssam int disable, error; 1185178354Ssam 1186116742Ssam disable = hlt_logical_cpus; 1187178354Ssam error = sysctl_handle_int(oidp, &disable, 0, req); 1188138568Ssam if (error || !req->newptr) 1189138568Ssam return (error); 1190178354Ssam 1191116742Ssam if (disable) 1192183259Ssam hlt_cpus_mask |= logical_cpus_mask; 1193127770Ssam else 1194178354Ssam hlt_cpus_mask &= ~logical_cpus_mask; 1195127770Ssam 1196183259Ssam if (! hyperthreading_allowed) 1197183259Ssam hlt_cpus_mask |= hyperthreading_cpus_mask; 1198183259Ssam 1199178354Ssam if ((hlt_cpus_mask & all_cpus) == all_cpus) 1200116742Ssam hlt_cpus_mask &= ~(1<<0); 1201116742Ssam 1202116742Ssam hlt_logical_cpus = disable; 1203178354Ssam return (error); 1204178354Ssam} 1205178354Ssam 1206178354Ssamstatic int 1207178354Ssamsysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) 1208178354Ssam{ 1209178354Ssam int allowed, error; 1210178354Ssam 1211178354Ssam allowed = hyperthreading_allowed; 1212178354Ssam error = sysctl_handle_int(oidp, &allowed, 0, req); 1213178354Ssam if (error || !req->newptr) 1214178354Ssam return (error); 1215178354Ssam 1216178354Ssam if (allowed) 1217178354Ssam hlt_cpus_mask &= ~hyperthreading_cpus_mask; 1218178354Ssam else 1219178354Ssam hlt_cpus_mask |= hyperthreading_cpus_mask; 1220178354Ssam 1221178354Ssam if (logical_cpus_mask != 0 && 1222178354Ssam (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) 1223178354Ssam hlt_logical_cpus = 1; 1224178354Ssam else 1225183259Ssam hlt_logical_cpus = 0; 1226178354Ssam 1227178354Ssam if ((hlt_cpus_mask & all_cpus) == all_cpus) 1228178354Ssam hlt_cpus_mask &= ~(1<<0); 1229178354Ssam 1230178354Ssam hyperthreading_allowed = allowed; 1231178354Ssam return (error); 1232178354Ssam} 1233178354Ssam 1234178354Ssamstatic void 1235178354Ssamcpu_hlt_setup(void *dummy __unused) 1236178354Ssam{ 1237178354Ssam 1238190391Ssam if (logical_cpus_mask != 0) { 1239178354Ssam TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", 1240178354Ssam &hlt_logical_cpus); 1241190391Ssam sysctl_ctx_init(&logical_cpu_clist); 1242178354Ssam SYSCTL_ADD_PROC(&logical_cpu_clist, 1243193655Ssam SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1244178354Ssam "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, 1245178354Ssam sysctl_hlt_logical_cpus, "IU", ""); 1246178354Ssam SYSCTL_ADD_UINT(&logical_cpu_clist, 1247178354Ssam SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1248178354Ssam "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, 1249178354Ssam &logical_cpus_mask, 0, ""); 1250178354Ssam 1251178354Ssam if (hlt_logical_cpus) 1252178354Ssam hlt_cpus_mask |= logical_cpus_mask; 1253178354Ssam 1254178354Ssam /* 1255178354Ssam * If necessary for security purposes, force 1256178354Ssam * hyperthreading off, regardless of the value 1257178354Ssam * of hlt_logical_cpus. 1258178354Ssam */ 1259178354Ssam if (hyperthreading_cpus_mask) { 1260178354Ssam TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", 1261178354Ssam &hyperthreading_allowed); 1262178354Ssam SYSCTL_ADD_PROC(&logical_cpu_clist, 1263178354Ssam SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, 1264178354Ssam "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, 1265178354Ssam 0, 0, sysctl_hyperthreading_allowed, "IU", ""); 1266138568Ssam if (! hyperthreading_allowed) 1267178354Ssam hlt_cpus_mask |= hyperthreading_cpus_mask; 1268178354Ssam } 1269138568Ssam } 1270178354Ssam} 1271178354SsamSYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); 1272138568Ssam 1273116742Ssamint 1274116742Ssammp_grab_cpu_hlt(void) 1275116742Ssam{ 1276116742Ssam u_int mask = PCPU_GET(cpumask); 1277138568Ssam#ifdef MP_WATCHDOG 1278127772Ssam u_int cpuid = PCPU_GET(cpuid); 1279195618Srpaulo#endif 1280138568Ssam int retval; 1281116742Ssam 1282138568Ssam#ifdef MP_WATCHDOG 1283138568Ssam ap_watchdog(cpuid); 1284178354Ssam#endif 1285140766Ssam 1286140766Ssam retval = mask & hlt_cpus_mask; 1287140766Ssam while (mask & hlt_cpus_mask) 1288140766Ssam __asm __volatile("sti; hlt" : : : "memory"); 1289138568Ssam return (retval); 1290127772Ssam} 1291116742Ssam