mp_machdep.c revision 183345
1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD: head/sys/i386/xen/mp_machdep.c 183345 2008-09-25 07:11:04Z kmacy $"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34182902Skmacy#include "opt_sched.h" 35182902Skmacy#include "opt_smp.h" 36182902Skmacy 37182902Skmacy#if !defined(lint) 38182902Skmacy#if !defined(SMP) 39182902Skmacy#error How did you get here? 40182902Skmacy#endif 41182902Skmacy 42182902Skmacy#ifndef DEV_APIC 43182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 44182902Skmacy#endif 45182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 46182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 47182902Skmacy#endif 48182902Skmacy#endif /* not lint */ 49182902Skmacy 50182902Skmacy#include <sys/param.h> 51182902Skmacy#include <sys/systm.h> 52182902Skmacy#include <sys/bus.h> 53182902Skmacy#include <sys/cons.h> /* cngetc() */ 54182902Skmacy#ifdef GPROF 55182902Skmacy#include <sys/gmon.h> 56182902Skmacy#endif 57182902Skmacy#include <sys/kernel.h> 58182902Skmacy#include <sys/ktr.h> 59182902Skmacy#include <sys/lock.h> 60182902Skmacy#include <sys/malloc.h> 61182902Skmacy#include <sys/memrange.h> 62182902Skmacy#include <sys/mutex.h> 63182902Skmacy#include <sys/pcpu.h> 64182902Skmacy#include <sys/proc.h> 65182902Skmacy#include <sys/sched.h> 66182902Skmacy#include <sys/smp.h> 67182902Skmacy#include <sys/sysctl.h> 68182902Skmacy 69182902Skmacy#include <vm/vm.h> 70182902Skmacy#include <vm/vm_param.h> 71182902Skmacy#include <vm/pmap.h> 72182902Skmacy#include <vm/vm_kern.h> 73182902Skmacy#include <vm/vm_extern.h> 74182902Skmacy#include <vm/vm_page.h> 75182902Skmacy 76182902Skmacy#include <machine/apicreg.h> 77182902Skmacy#include <machine/md_var.h> 78182902Skmacy#include <machine/mp_watchdog.h> 79182902Skmacy#include <machine/pcb.h> 80182902Skmacy#include <machine/psl.h> 81182902Skmacy#include <machine/smp.h> 82182902Skmacy#include <machine/specialreg.h> 83182902Skmacy#include <machine/pcpu.h> 84182902Skmacy 85182902Skmacy 86182902Skmacy 87182902Skmacy#include <machine/xen/xen-os.h> 88182902Skmacy#include <machine/xen/hypervisor.h> 89182902Skmacy#include <xen/interface/vcpu.h> 90182902Skmacy 91182902Skmacy 92182902Skmacy#define WARMBOOT_TARGET 0 93182902Skmacy#define WARMBOOT_OFF (KERNBASE + 0x0467) 94182902Skmacy#define WARMBOOT_SEG (KERNBASE + 0x0469) 95182902Skmacy 96182902Skmacy#define stop_cpus_with_nmi 0 97182902Skmacy 98182902Skmacy 99182902Skmacyint mp_naps; /* # of Applications processors */ 100182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 101182902Skmacy 102182902Skmacyextern struct pcpu __pcpu[]; 103182902Skmacy 104182902Skmacystatic int bootAP; 105182902Skmacystatic union descriptor *bootAPgdt; 106182902Skmacy 107182902Skmacy 108182902Skmacy/* Free these after use */ 109182902Skmacyvoid *bootstacks[MAXCPU]; 110182902Skmacy 111182902Skmacy/* Hotwire a 0->4MB V==P mapping */ 112182902Skmacyextern pt_entry_t *KPTphys; 113182902Skmacy 114182902Skmacystruct pcb stoppcbs[MAXCPU]; 115182902Skmacy 116182902Skmacy/* Variables needed for SMP tlb shootdown. */ 117182902Skmacyvm_offset_t smp_tlb_addr1; 118182902Skmacyvm_offset_t smp_tlb_addr2; 119182902Skmacyvolatile int smp_tlb_wait; 120182902Skmacy 121182902Skmacystatic u_int logical_cpus; 122182902Skmacy 123182902Skmacy/* used to hold the AP's until we are ready to release them */ 124182902Skmacystatic struct mtx ap_boot_mtx; 125182902Skmacy 126182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 127182902Skmacystatic volatile int aps_ready = 0; 128182902Skmacy 129182902Skmacy/* 130182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 131182902Skmacy * the APs. 132182902Skmacy */ 133182902Skmacystruct cpu_info { 134182902Skmacy int cpu_present:1; 135182902Skmacy int cpu_bsp:1; 136182902Skmacy int cpu_disabled:1; 137182902Skmacy} static cpu_info[MAX_APIC_ID + 1]; 138182902Skmacyint cpu_apic_ids[MAXCPU]; 139182902Skmacy 140182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 141182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 142182902Skmacy 143182902Skmacystatic u_int boot_address; 144182902Skmacy 145182902Skmacystatic void assign_cpu_ids(void); 146182902Skmacystatic void set_interrupt_apic_ids(void); 147182902Skmacyint start_all_aps(void); 148182902Skmacystatic int start_ap(int apic_id); 149182902Skmacystatic void release_aps(void *dummy); 150182902Skmacy 151182902Skmacystatic u_int hyperthreading_cpus; 152182902Skmacystatic cpumask_t hyperthreading_cpus_mask; 153182902Skmacy 154182902Skmacyextern void Xhypervisor_callback(void); 155182902Skmacyextern void failsafe_callback(void); 156182902Skmacy 157182902Skmacystruct cpu_group * 158182902Skmacycpu_topo(void) 159182902Skmacy{ 160182902Skmacy if (cpu_cores == 0) 161182902Skmacy cpu_cores = 1; 162182902Skmacy if (cpu_logical == 0) 163182902Skmacy cpu_logical = 1; 164182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 165182902Skmacy printf("WARNING: Non-uniform processors.\n"); 166182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 167182902Skmacy return (smp_topo_none()); 168182902Skmacy } 169182902Skmacy /* 170182902Skmacy * No multi-core or hyper-threaded. 171182902Skmacy */ 172182902Skmacy if (cpu_logical * cpu_cores == 1) 173182902Skmacy return (smp_topo_none()); 174182902Skmacy /* 175182902Skmacy * Only HTT no multi-core. 176182902Skmacy */ 177182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 178182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 179182902Skmacy /* 180182902Skmacy * Only multi-core no HTT. 181182902Skmacy */ 182182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 183182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 184182902Skmacy /* 185182902Skmacy * Both HTT and multi-core. 186182902Skmacy */ 187182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 188182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 189182902Skmacy} 190182902Skmacy 191182902Skmacy/* 192182902Skmacy * Calculate usable address in base memory for AP trampoline code. 193182902Skmacy */ 194182902Skmacyu_int 195182902Skmacymp_bootaddress(u_int basemem) 196182902Skmacy{ 197182902Skmacy 198182902Skmacy return (basemem); 199182902Skmacy} 200182902Skmacy 201182902Skmacyvoid 202182902Skmacycpu_add(u_int apic_id, char boot_cpu) 203182902Skmacy{ 204182902Skmacy 205182902Skmacy if (apic_id > MAX_APIC_ID) { 206182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 207182902Skmacy return; 208182902Skmacy } 209182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 210182902Skmacy apic_id)); 211182902Skmacy cpu_info[apic_id].cpu_present = 1; 212182902Skmacy if (boot_cpu) { 213182902Skmacy KASSERT(boot_cpu_id == -1, 214182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 215182902Skmacy boot_cpu_id)); 216182902Skmacy boot_cpu_id = apic_id; 217182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 218182902Skmacy } 219182902Skmacy if (mp_ncpus < MAXCPU) 220182902Skmacy mp_ncpus++; 221182902Skmacy if (bootverbose) 222182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 223182902Skmacy "AP"); 224182902Skmacy} 225182902Skmacy 226182902Skmacyvoid 227182902Skmacycpu_mp_setmaxid(void) 228182902Skmacy{ 229182902Skmacy 230182902Skmacy mp_maxid = MAXCPU - 1; 231182902Skmacy} 232182902Skmacy 233182902Skmacyint 234182902Skmacycpu_mp_probe(void) 235182902Skmacy{ 236182902Skmacy 237182902Skmacy /* 238182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 239182902Skmacy * correctly. 240182902Skmacy */ 241182902Skmacy all_cpus = 1; 242182902Skmacy if (mp_ncpus == 0) { 243182902Skmacy /* 244182902Skmacy * No CPUs were found, so this must be a UP system. Setup 245182902Skmacy * the variables to represent a system with a single CPU 246182902Skmacy * with an id of 0. 247182902Skmacy */ 248182902Skmacy mp_ncpus = 1; 249182902Skmacy return (0); 250182902Skmacy } 251182902Skmacy 252182902Skmacy /* At least one CPU was found. */ 253182902Skmacy if (mp_ncpus == 1) { 254182902Skmacy /* 255182902Skmacy * One CPU was found, so this must be a UP system with 256182902Skmacy * an I/O APIC. 257182902Skmacy */ 258182902Skmacy return (0); 259182902Skmacy } 260182902Skmacy 261182902Skmacy /* At least two CPUs were found. */ 262182902Skmacy return (1); 263182902Skmacy} 264182902Skmacy 265182902Skmacy/* 266182902Skmacy * Initialize the IPI handlers and start up the AP's. 267182902Skmacy */ 268182902Skmacyvoid 269182902Skmacycpu_mp_start(void) 270182902Skmacy{ 271182902Skmacy int i; 272182902Skmacy 273182902Skmacy /* Initialize the logical ID to APIC ID table. */ 274182902Skmacy for (i = 0; i < MAXCPU; i++) { 275182902Skmacy cpu_apic_ids[i] = -1; 276182902Skmacy cpu_ipi_pending[i] = 0; 277182902Skmacy } 278182902Skmacy 279182902Skmacy /* Set boot_cpu_id if needed. */ 280182902Skmacy if (boot_cpu_id == -1) { 281182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 282182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 283182902Skmacy } else 284182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 285182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 286182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 287182902Skmacy 288182902Skmacy assign_cpu_ids(); 289182902Skmacy 290182902Skmacy /* Start each Application Processor */ 291182902Skmacy start_all_aps(); 292182902Skmacy 293182902Skmacy /* Setup the initial logical CPUs info. */ 294182902Skmacy logical_cpus = logical_cpus_mask = 0; 295182902Skmacy if (cpu_feature & CPUID_HTT) 296182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 297182902Skmacy 298182902Skmacy set_interrupt_apic_ids(); 299182902Skmacy} 300182902Skmacy 301182902Skmacy 302182902Skmacy/* 303182902Skmacy * Print various information about the SMP system hardware and setup. 304182902Skmacy */ 305182902Skmacyvoid 306182902Skmacycpu_mp_announce(void) 307182902Skmacy{ 308182902Skmacy int i, x; 309182902Skmacy 310182902Skmacy /* List CPUs */ 311182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 312182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 313182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 314182902Skmacy continue; 315182902Skmacy if (cpu_info[x].cpu_disabled) 316182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 317182902Skmacy else { 318182902Skmacy KASSERT(i < mp_ncpus, 319182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 320182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 321182902Skmacy } 322182902Skmacy } 323182902Skmacy} 324182902Skmacy 325182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 326182902Skmacy 327182902Skmacy/* 328182902Skmacy * AP CPU's call this to initialize themselves. 329182902Skmacy */ 330182902Skmacyvoid 331182902Skmacyinit_secondary(void) 332182902Skmacy{ 333182902Skmacy vm_offset_t addr; 334182902Skmacy int gsel_tss; 335182902Skmacy 336182902Skmacy 337182902Skmacy /* bootAP is set in start_ap() to our ID. */ 338182902Skmacy 339182902Skmacy 340182902Skmacy PCPU_SET(currentldt, _default_ldt); 341182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 342182902Skmacy#if 0 343182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 344182902Skmacy#endif 345182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 346182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 347182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 348182902Skmacy#if 0 349182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 350182902Skmacy 351182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 352182902Skmacy#endif 353182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 354182902Skmacy 355182902Skmacy /* 356182902Skmacy * Set to a known state: 357182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 358182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 359182902Skmacy */ 360182902Skmacy /* 361182902Skmacy * signal our startup to the BSP. 362182902Skmacy */ 363182902Skmacy mp_naps++; 364182902Skmacy 365182902Skmacy /* Spin until the BSP releases the AP's. */ 366182902Skmacy while (!aps_ready) 367182902Skmacy ia32_pause(); 368182902Skmacy 369182902Skmacy /* BSP may have changed PTD while we were waiting */ 370182902Skmacy invltlb(); 371182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 372182902Skmacy invlpg(addr); 373182902Skmacy 374182902Skmacy /* set up FPU state on the AP */ 375182902Skmacy npxinit(__INITIAL_NPXCW__); 376182902Skmacy#if 0 377182902Skmacy 378182902Skmacy /* set up SSE registers */ 379182902Skmacy enable_sse(); 380182902Skmacy#endif 381182902Skmacy#if 0 && defined(PAE) 382182902Skmacy /* Enable the PTE no-execute bit. */ 383182902Skmacy if ((amd_feature & AMDID_NX) != 0) { 384182902Skmacy uint64_t msr; 385182902Skmacy 386182902Skmacy msr = rdmsr(MSR_EFER) | EFER_NXE; 387182902Skmacy wrmsr(MSR_EFER, msr); 388182902Skmacy } 389182902Skmacy#endif 390182902Skmacy#if 0 391182902Skmacy /* A quick check from sanity claus */ 392182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 393182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 394182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 395182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 396182902Skmacy panic("cpuid mismatch! boom!!"); 397182902Skmacy } 398182902Skmacy#endif 399182902Skmacy 400182902Skmacy /* Initialize curthread. */ 401182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 402182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 403182902Skmacy 404182902Skmacy mtx_lock_spin(&ap_boot_mtx); 405182902Skmacy#if 0 406182902Skmacy 407182902Skmacy /* Init local apic for irq's */ 408182902Skmacy lapic_setup(1); 409182902Skmacy#endif 410182902Skmacy smp_cpus++; 411182902Skmacy 412182902Skmacy CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 413182902Skmacy printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 414182902Skmacy 415182902Skmacy /* Determine if we are a logical CPU. */ 416182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 417182902Skmacy logical_cpus_mask |= PCPU_GET(cpumask); 418182902Skmacy 419182902Skmacy /* Determine if we are a hyperthread. */ 420182902Skmacy if (hyperthreading_cpus > 1 && 421182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 422182902Skmacy hyperthreading_cpus_mask |= PCPU_GET(cpumask); 423182902Skmacy 424182902Skmacy /* Build our map of 'other' CPUs. */ 425182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 426182902Skmacy#if 0 427182902Skmacy if (bootverbose) 428182902Skmacy lapic_dump("AP"); 429182902Skmacy#endif 430182902Skmacy if (smp_cpus == mp_ncpus) { 431182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 432182902Skmacy atomic_store_rel_int(&smp_started, 1); 433182902Skmacy smp_active = 1; /* historic */ 434182902Skmacy } 435182902Skmacy 436182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 437182902Skmacy 438182902Skmacy /* wait until all the AP's are up */ 439182902Skmacy while (smp_started == 0) 440182902Skmacy ia32_pause(); 441182902Skmacy 442183131Skmacy 443183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 444182902Skmacy /* enter the scheduler */ 445182902Skmacy sched_throw(NULL); 446182902Skmacy 447182902Skmacy panic("scheduler returned us to %s", __func__); 448182902Skmacy /* NOTREACHED */ 449182902Skmacy} 450182902Skmacy 451182902Skmacy/******************************************************************* 452182902Skmacy * local functions and data 453182902Skmacy */ 454182902Skmacy 455182902Skmacy/* 456182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 457182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 458182902Skmacy * can simply not tell the I/O APIC code about them in this function. 459182902Skmacy * We also do not tell it about the BSP since it tells itself about 460182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 461182902Skmacy */ 462182902Skmacystatic void 463182902Skmacyset_interrupt_apic_ids(void) 464182902Skmacy{ 465182902Skmacy u_int i, apic_id; 466182902Skmacy 467182902Skmacy for (i = 0; i < MAXCPU; i++) { 468182902Skmacy apic_id = cpu_apic_ids[i]; 469182902Skmacy if (apic_id == -1) 470182902Skmacy continue; 471182902Skmacy if (cpu_info[apic_id].cpu_bsp) 472182902Skmacy continue; 473182902Skmacy if (cpu_info[apic_id].cpu_disabled) 474182902Skmacy continue; 475182902Skmacy 476182902Skmacy /* Don't let hyperthreads service interrupts. */ 477182902Skmacy if (hyperthreading_cpus > 1 && 478182902Skmacy apic_id % hyperthreading_cpus != 0) 479182902Skmacy continue; 480182902Skmacy 481182902Skmacy intr_add_cpu(i); 482182902Skmacy } 483182902Skmacy} 484182902Skmacy 485182902Skmacy/* 486182902Skmacy * Assign logical CPU IDs to local APICs. 487182902Skmacy */ 488182902Skmacystatic void 489182902Skmacyassign_cpu_ids(void) 490182902Skmacy{ 491182902Skmacy u_int i; 492182902Skmacy 493182902Skmacy /* Check for explicitly disabled CPUs. */ 494182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 495182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 496182902Skmacy continue; 497182902Skmacy 498182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 499182902Skmacy if (resource_disabled("lapic", i)) { 500182902Skmacy cpu_info[i].cpu_disabled = 1; 501182902Skmacy continue; 502182902Skmacy } 503182902Skmacy } 504182902Skmacy 505182902Skmacy /* 506182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 507182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 508182902Skmacy * so we only have to assign IDs for APs. 509182902Skmacy */ 510182902Skmacy mp_ncpus = 1; 511182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 512182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 513182902Skmacy cpu_info[i].cpu_disabled) 514182902Skmacy continue; 515182902Skmacy 516182902Skmacy if (mp_ncpus < MAXCPU) { 517182902Skmacy cpu_apic_ids[mp_ncpus] = i; 518182902Skmacy mp_ncpus++; 519182902Skmacy } else 520182902Skmacy cpu_info[i].cpu_disabled = 1; 521182902Skmacy } 522182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 523182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 524182902Skmacy mp_ncpus)); 525182902Skmacy} 526182902Skmacy 527182902Skmacy/* 528182902Skmacy * start each AP in our list 529182902Skmacy */ 530182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 531182902Skmacy#define TMPMAP_START 1 532182902Skmacyint 533182902Skmacystart_all_aps(void) 534182902Skmacy{ 535182902Skmacy u_int32_t mpbioswarmvec; 536182902Skmacy int x,apic_id, cpu; 537182902Skmacy struct pcpu *pc; 538182902Skmacy 539182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 540182902Skmacy 541182902Skmacy /* save the current value of the warm-start vector */ 542182902Skmacy mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 543182902Skmacy 544182902Skmacy /* set up temporary P==V mapping for AP boot */ 545182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 546182902Skmacy 547182902Skmacy /* start each AP */ 548182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 549182902Skmacy apic_id = cpu_apic_ids[cpu]; 550182902Skmacy 551182902Skmacy 552182902Skmacy /* setup a vector to our boot code */ 553182902Skmacy *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 554182902Skmacy *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 555182902Skmacy 556182902Skmacy bootAP = cpu; 557182902Skmacy bootAPgdt = gdt + (512*cpu); 558182902Skmacy 559182902Skmacy /* Get per-cpu data */ 560182902Skmacy pc = &__pcpu[bootAP]; 561183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 562182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 563182902Skmacy pc->pc_prvspace = pc; 564182902Skmacy pc->pc_curthread = 0; 565182902Skmacy 566182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 567182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 568182902Skmacy 569182902Skmacy PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW); 570182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 571182902Skmacy for (x = 0; x < NGDT; x++) 572182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 573182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 574183345Skmacy#ifdef notyet 575183345Skmacy 576183345Skmacy if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 577183345Skmacy apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 578183345Skmacy acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 579183345Skmacy#ifdef CONFIG_ACPI 580183345Skmacy if (acpiid != 0xff) 581183345Skmacy x86_acpiid_to_apicid[acpiid] = apicid; 582183345Skmacy#endif 583183345Skmacy } 584183345Skmacy#endif 585183345Skmacy 586182902Skmacy /* attempt to start the Application Processor */ 587182902Skmacy if (!start_ap(cpu)) { 588182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 589182902Skmacy /* better panic as the AP may be running loose */ 590182902Skmacy printf("panic y/n? [y] "); 591182902Skmacy if (cngetc() != 'n') 592182902Skmacy panic("bye-bye"); 593182902Skmacy } 594182902Skmacy 595182902Skmacy all_cpus |= (1 << cpu); /* record AP in CPU map */ 596182902Skmacy } 597182902Skmacy 598182902Skmacy 599182902Skmacy /* build our map of 'other' CPUs */ 600182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 601182902Skmacy 602182902Skmacy /* restore the warmstart vector */ 603182902Skmacy *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 604182902Skmacy 605182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 606182902Skmacy 607182902Skmacy /* number of APs actually started */ 608182902Skmacy return mp_naps; 609182902Skmacy} 610182902Skmacy 611182902Skmacyextern uint8_t *pcpu_boot_stack; 612182902Skmacyextern trap_info_t trap_table[]; 613182902Skmacy 614182902Skmacystatic void 615182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 616182902Skmacy{ 617182902Skmacy const trap_info_t *t = trap_table; 618182902Skmacy 619182902Skmacy for (t = trap_table; t->address; t++) { 620182902Skmacy trap_ctxt[t->vector].flags = t->flags; 621182902Skmacy trap_ctxt[t->vector].cs = t->cs; 622182902Skmacy trap_ctxt[t->vector].address = t->address; 623182902Skmacy } 624182902Skmacy} 625182902Skmacy 626182902Skmacyvoid 627182902Skmacycpu_initialize_context(unsigned int cpu); 628182902Skmacyextern int nkpt; 629182902Skmacy 630182902Skmacyvoid 631182902Skmacycpu_initialize_context(unsigned int cpu) 632182902Skmacy{ 633182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 634182902Skmacy * Hence we allocate statically and protect it with a lock */ 635182902Skmacy vm_page_t m[4]; 636182902Skmacy static vcpu_guest_context_t ctxt; 637182902Skmacy vm_offset_t boot_stack; 638183131Skmacy vm_offset_t newPTD; 639183131Skmacy vm_paddr_t ma[NPGPTD]; 640182902Skmacy static int color; 641182902Skmacy int i; 642182902Skmacy 643182902Skmacy /* 644183131Skmacy * Page 0,[0-3] PTD 645183131Skmacy * Page 1, [4] boot stack 646183131Skmacy * Page [5] PDPT 647183131Skmacy 648182902Skmacy * 649182902Skmacy */ 650183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 651182902Skmacy m[i] = vm_page_alloc(NULL, color++, 652182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 653182902Skmacy VM_ALLOC_ZERO); 654182902Skmacy 655182902Skmacy pmap_zero_page(m[i]); 656182902Skmacy 657182902Skmacy } 658183131Skmacy boot_stack = kmem_alloc_nofault(kernel_map, 1); 659183131Skmacy newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); 660183131Skmacy ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V; 661182902Skmacy 662183131Skmacy#ifdef PAE 663183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 664183131Skmacy for (i = 0; i < NPGPTD; i++) { 665183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 666183131Skmacy ma[i] = 667183131Skmacy xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V; 668182902Skmacy } 669183131Skmacy#endif 670182902Skmacy 671182902Skmacy /* 672182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 673182902Skmacy * kernel mappings 674182902Skmacy */ 675183131Skmacy pmap_qenter(newPTD, m, 4); 676183131Skmacy 677183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 678183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 679182902Skmacy nkpt*sizeof(vm_paddr_t)); 680183131Skmacy 681183131Skmacy pmap_qremove(newPTD, 4); 682183131Skmacy kmem_free(kernel_map, newPTD, 4); 683182902Skmacy /* 684182902Skmacy * map actual idle stack to boot_stack 685182902Skmacy */ 686183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 687182902Skmacy 688182902Skmacy 689183131Skmacy xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]))); 690182902Skmacy vm_page_lock_queues(); 691182902Skmacy for (i = 0; i < 4; i++) { 692183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 693183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 694183131Skmacy 695182902Skmacy xen_queue_pt_update((vm_paddr_t) 696183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 697182902Skmacy ma[i]); 698182902Skmacy } 699182902Skmacy PT_UPDATES_FLUSH(); 700182902Skmacy vm_page_unlock_queues(); 701182902Skmacy 702182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 703182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 704182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 705182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 706182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 707182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 708182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 709182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 710182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 711182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 712182902Skmacy 713182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 714182902Skmacy 715182902Skmacy smp_trap_init(ctxt.trap_ctxt); 716182902Skmacy 717182902Skmacy ctxt.ldt_ents = 0; 718182902Skmacy ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 719182902Skmacy ctxt.gdt_ents = 512; 720182902Skmacy 721182902Skmacy#ifdef __i386__ 722182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 723182902Skmacy 724182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 725182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 726182902Skmacy 727182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 728182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 729182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 730182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 731182902Skmacy 732183131Skmacy ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 733182902Skmacy#else /* __x86_64__ */ 734182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 735182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 736182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 737182902Skmacy 738182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 739182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 740182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 741182902Skmacy 742182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 743182902Skmacy 744182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 745182902Skmacy#endif 746182902Skmacy 747182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 748182902Skmacy ctxt.gdt_frames[0], 749182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 750182902Skmacy 751182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 752182902Skmacy DELAY(3000); 753182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 754182902Skmacy} 755182902Skmacy 756182902Skmacy/* 757182902Skmacy * This function starts the AP (application processor) identified 758182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 759182902Skmacy * to accomplish this. This is necessary because of the nuances 760182902Skmacy * of the different hardware we might encounter. It isn't pretty, 761182902Skmacy * but it seems to work. 762182902Skmacy */ 763183131Skmacy 764183131Skmacyint cpus; 765182902Skmacystatic int 766182902Skmacystart_ap(int apic_id) 767182902Skmacy{ 768182902Skmacy int ms; 769182902Skmacy 770182902Skmacy /* used as a watchpoint to signal AP startup */ 771182902Skmacy cpus = mp_naps; 772182902Skmacy 773182902Skmacy cpu_initialize_context(apic_id); 774182902Skmacy 775182902Skmacy /* Wait up to 5 seconds for it to start. */ 776182902Skmacy for (ms = 0; ms < 5000; ms++) { 777182902Skmacy if (mp_naps > cpus) 778182902Skmacy return 1; /* return SUCCESS */ 779182902Skmacy DELAY(1000); 780182902Skmacy } 781182902Skmacy return 0; /* return FAILURE */ 782182902Skmacy} 783182902Skmacy 784182902Skmacy/* 785182902Skmacy * Flush the TLB on all other CPU's 786182902Skmacy */ 787182902Skmacystatic void 788182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 789182902Skmacy{ 790182902Skmacy u_int ncpu; 791182902Skmacy 792182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 793182902Skmacy if (ncpu < 1) 794182902Skmacy return; /* no other cpus */ 795182902Skmacy if (!(read_eflags() & PSL_I)) 796182902Skmacy panic("%s: interrupts disabled", __func__); 797182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 798182902Skmacy smp_tlb_addr1 = addr1; 799182902Skmacy smp_tlb_addr2 = addr2; 800182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 801182902Skmacy ipi_all_but_self(vector); 802182902Skmacy while (smp_tlb_wait < ncpu) 803182902Skmacy ia32_pause(); 804182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 805182902Skmacy} 806182902Skmacy 807182902Skmacystatic void 808182902Skmacysmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 809182902Skmacy{ 810182902Skmacy int ncpu, othercpus; 811182902Skmacy 812182902Skmacy othercpus = mp_ncpus - 1; 813182902Skmacy if (mask == (u_int)-1) { 814182902Skmacy ncpu = othercpus; 815182902Skmacy if (ncpu < 1) 816182902Skmacy return; 817182902Skmacy } else { 818182902Skmacy mask &= ~PCPU_GET(cpumask); 819182902Skmacy if (mask == 0) 820182902Skmacy return; 821182902Skmacy ncpu = bitcount32(mask); 822182902Skmacy if (ncpu > othercpus) { 823182902Skmacy /* XXX this should be a panic offence */ 824182902Skmacy printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 825182902Skmacy ncpu, othercpus); 826182902Skmacy ncpu = othercpus; 827182902Skmacy } 828182902Skmacy /* XXX should be a panic, implied by mask == 0 above */ 829182902Skmacy if (ncpu < 1) 830182902Skmacy return; 831182902Skmacy } 832182902Skmacy if (!(read_eflags() & PSL_I)) 833182902Skmacy panic("%s: interrupts disabled", __func__); 834182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 835182902Skmacy smp_tlb_addr1 = addr1; 836182902Skmacy smp_tlb_addr2 = addr2; 837182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 838182902Skmacy if (mask == (u_int)-1) 839182902Skmacy ipi_all_but_self(vector); 840182902Skmacy else 841182902Skmacy ipi_selected(mask, vector); 842182902Skmacy while (smp_tlb_wait < ncpu) 843182902Skmacy ia32_pause(); 844182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 845182902Skmacy} 846182902Skmacy 847182902Skmacyvoid 848182902Skmacysmp_cache_flush(void) 849182902Skmacy{ 850182902Skmacy 851182902Skmacy if (smp_started) 852182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 853182902Skmacy} 854182902Skmacy 855182902Skmacyvoid 856182902Skmacysmp_invltlb(void) 857182902Skmacy{ 858182902Skmacy 859182902Skmacy if (smp_started) { 860182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 861182902Skmacy } 862182902Skmacy} 863182902Skmacy 864182902Skmacyvoid 865182902Skmacysmp_invlpg(vm_offset_t addr) 866182902Skmacy{ 867182902Skmacy 868182902Skmacy if (smp_started) { 869182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 870182902Skmacy } 871182902Skmacy} 872182902Skmacy 873182902Skmacyvoid 874182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 875182902Skmacy{ 876182902Skmacy 877182902Skmacy if (smp_started) { 878182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 879182902Skmacy } 880182902Skmacy} 881182902Skmacy 882182902Skmacyvoid 883182902Skmacysmp_masked_invltlb(u_int mask) 884182902Skmacy{ 885182902Skmacy 886182902Skmacy if (smp_started) { 887182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 888182902Skmacy } 889182902Skmacy} 890182902Skmacy 891182902Skmacyvoid 892182902Skmacysmp_masked_invlpg(u_int mask, vm_offset_t addr) 893182902Skmacy{ 894182902Skmacy 895182902Skmacy if (smp_started) { 896182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 897182902Skmacy } 898182902Skmacy} 899182902Skmacy 900182902Skmacyvoid 901182902Skmacysmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 902182902Skmacy{ 903182902Skmacy 904182902Skmacy if (smp_started) { 905182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 906182902Skmacy } 907182902Skmacy} 908182902Skmacy 909183345Skmacystatic __inline void 910183345Skmacyipi_pcpu(unsigned int cpu, int vector) 911183345Skmacy{ 912183345Skmacy#ifdef notyet 913183345Skmacy int irq = per_cpu(ipi_to_irq, cpu)[vector]; 914183345Skmacy 915183345Skmacy notify_remote_via_irq(irq); 916183345Skmacy#endif 917183345Skmacy} 918183345Skmacy 919183345Skmacy 920182902Skmacyvoid 921182902Skmacyipi_bitmap_handler(struct trapframe frame) 922182902Skmacy{ 923182902Skmacy int cpu = PCPU_GET(cpuid); 924182902Skmacy u_int ipi_bitmap; 925182902Skmacy 926182902Skmacy ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 927182902Skmacy 928182902Skmacy if (ipi_bitmap & (1 << IPI_PREEMPT)) { 929182902Skmacy sched_preempt(curthread); 930182902Skmacy } 931182902Skmacy} 932182902Skmacy 933182902Skmacy/* 934182902Skmacy * send an IPI to a set of cpus. 935182902Skmacy */ 936182902Skmacyvoid 937182902Skmacyipi_selected(u_int32_t cpus, u_int ipi) 938182902Skmacy{ 939182902Skmacy int cpu; 940182902Skmacy u_int bitmap = 0; 941182902Skmacy u_int old_pending; 942182902Skmacy u_int new_pending; 943182902Skmacy 944182902Skmacy if (IPI_IS_BITMAPED(ipi)) { 945182902Skmacy bitmap = 1 << ipi; 946182902Skmacy ipi = IPI_BITMAP_VECTOR; 947182902Skmacy } 948182902Skmacy 949182902Skmacy#ifdef STOP_NMI 950182902Skmacy if (ipi == IPI_STOP && stop_cpus_with_nmi) { 951182902Skmacy ipi_nmi_selected(cpus); 952182902Skmacy return; 953182902Skmacy } 954182902Skmacy#endif 955182902Skmacy CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 956182902Skmacy while ((cpu = ffs(cpus)) != 0) { 957182902Skmacy cpu--; 958182902Skmacy cpus &= ~(1 << cpu); 959182902Skmacy 960182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 961182902Skmacy ("IPI to non-existent CPU %d", cpu)); 962182902Skmacy 963182902Skmacy if (bitmap) { 964182902Skmacy do { 965182902Skmacy old_pending = cpu_ipi_pending[cpu]; 966182902Skmacy new_pending = old_pending | bitmap; 967182902Skmacy } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 968182902Skmacy 969182902Skmacy if (old_pending) 970182902Skmacy continue; 971182902Skmacy } 972182902Skmacy 973183345Skmacy ipi_pcpu(cpu, ipi); 974182902Skmacy } 975182902Skmacy} 976182902Skmacy 977182902Skmacy/* 978182902Skmacy * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 979182902Skmacy */ 980182902Skmacyvoid 981182902Skmacyipi_all(u_int ipi) 982182902Skmacy{ 983182902Skmacy 984182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 985182902Skmacy ipi_selected(all_cpus, ipi); 986182902Skmacy return; 987182902Skmacy } 988182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 989183345Skmacy 990183345Skmacy /* 991183345Skmacy * 992183345Skmacy */ 993183345Skmacy 994183345Skmacy ipi_selected(-1, ipi); 995182902Skmacy} 996182902Skmacy 997182902Skmacy/* 998182902Skmacy * send an IPI to all CPUs EXCEPT myself 999182902Skmacy */ 1000182902Skmacyvoid 1001182902Skmacyipi_all_but_self(u_int ipi) 1002182902Skmacy{ 1003182902Skmacy 1004182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1005182902Skmacy ipi_selected(PCPU_GET(other_cpus), ipi); 1006182902Skmacy return; 1007182902Skmacy } 1008182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1009183345Skmacy ipi_selected(((int)-1 & ~(1 << curcpu)), ipi); 1010182902Skmacy} 1011182902Skmacy 1012182902Skmacy/* 1013182902Skmacy * send an IPI to myself 1014182902Skmacy */ 1015182902Skmacyvoid 1016182902Skmacyipi_self(u_int ipi) 1017182902Skmacy{ 1018182902Skmacy 1019182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1020182902Skmacy ipi_selected(PCPU_GET(cpumask), ipi); 1021182902Skmacy return; 1022182902Skmacy } 1023182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1024183345Skmacy ipi_pcpu(curcpu, ipi); 1025182902Skmacy} 1026182902Skmacy 1027182902Skmacy#ifdef STOP_NMI 1028182902Skmacy/* 1029182902Skmacy * send NMI IPI to selected CPUs 1030182902Skmacy */ 1031182902Skmacy 1032182902Skmacy#define BEFORE_SPIN 1000000 1033182902Skmacy 1034182902Skmacyvoid 1035182902Skmacyipi_nmi_selected(u_int32_t cpus) 1036182902Skmacy{ 1037182902Skmacy int cpu; 1038182902Skmacy register_t icrlo; 1039182902Skmacy 1040182902Skmacy icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1041182902Skmacy | APIC_TRIGMOD_EDGE; 1042182902Skmacy 1043182902Skmacy CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1044182902Skmacy 1045182902Skmacy atomic_set_int(&ipi_nmi_pending, cpus); 1046182902Skmacy 1047182902Skmacy while ((cpu = ffs(cpus)) != 0) { 1048182902Skmacy cpu--; 1049182902Skmacy cpus &= ~(1 << cpu); 1050182902Skmacy 1051182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 1052182902Skmacy ("IPI NMI to non-existent CPU %d", cpu)); 1053182902Skmacy 1054182902Skmacy /* Wait for an earlier IPI to finish. */ 1055182902Skmacy if (!lapic_ipi_wait(BEFORE_SPIN)) 1056182902Skmacy panic("ipi_nmi_selected: previous IPI has not cleared"); 1057182902Skmacy 1058182902Skmacy lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1059182902Skmacy } 1060182902Skmacy} 1061182902Skmacy 1062182902Skmacyint 1063182902Skmacyipi_nmi_handler(void) 1064182902Skmacy{ 1065182902Skmacy int cpumask = PCPU_GET(cpumask); 1066182902Skmacy 1067182902Skmacy if (!(ipi_nmi_pending & cpumask)) 1068182902Skmacy return 1; 1069182902Skmacy 1070182902Skmacy atomic_clear_int(&ipi_nmi_pending, cpumask); 1071182902Skmacy cpustop_handler(); 1072182902Skmacy return 0; 1073182902Skmacy} 1074182902Skmacy 1075182902Skmacy#endif /* STOP_NMI */ 1076182902Skmacy 1077182902Skmacy/* 1078182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1079182902Skmacy * are resumed. 1080182902Skmacy */ 1081182902Skmacyvoid 1082182902Skmacycpustop_handler(void) 1083182902Skmacy{ 1084182902Skmacy int cpu = PCPU_GET(cpuid); 1085182902Skmacy int cpumask = PCPU_GET(cpumask); 1086182902Skmacy 1087182902Skmacy savectx(&stoppcbs[cpu]); 1088182902Skmacy 1089182902Skmacy /* Indicate that we are stopped */ 1090182902Skmacy atomic_set_int(&stopped_cpus, cpumask); 1091182902Skmacy 1092182902Skmacy /* Wait for restart */ 1093182902Skmacy while (!(started_cpus & cpumask)) 1094182902Skmacy ia32_pause(); 1095182902Skmacy 1096182902Skmacy atomic_clear_int(&started_cpus, cpumask); 1097182902Skmacy atomic_clear_int(&stopped_cpus, cpumask); 1098182902Skmacy 1099182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1100182902Skmacy cpustop_restartfunc(); 1101182902Skmacy cpustop_restartfunc = NULL; 1102182902Skmacy } 1103182902Skmacy} 1104182902Skmacy 1105182902Skmacy/* 1106182902Skmacy * This is called once the rest of the system is up and running and we're 1107182902Skmacy * ready to let the AP's out of the pen. 1108182902Skmacy */ 1109182902Skmacystatic void 1110182902Skmacyrelease_aps(void *dummy __unused) 1111182902Skmacy{ 1112182902Skmacy 1113182902Skmacy if (mp_ncpus == 1) 1114182902Skmacy return; 1115182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1116182902Skmacy while (smp_started == 0) 1117182902Skmacy ia32_pause(); 1118182902Skmacy} 1119182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1120182902Skmacy 1121