mp_machdep.c revision 183132
1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD: head/sys/i386/xen/mp_machdep.c 183132 2008-09-18 02:59:19Z kmacy $"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34182902Skmacy#include "opt_sched.h" 35182902Skmacy#include "opt_smp.h" 36182902Skmacy 37182902Skmacy#if !defined(lint) 38182902Skmacy#if !defined(SMP) 39182902Skmacy#error How did you get here? 40182902Skmacy#endif 41182902Skmacy 42182902Skmacy#ifndef DEV_APIC 43182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 44182902Skmacy#endif 45182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 46182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 47182902Skmacy#endif 48182902Skmacy#endif /* not lint */ 49182902Skmacy 50182902Skmacy#include <sys/param.h> 51182902Skmacy#include <sys/systm.h> 52182902Skmacy#include <sys/bus.h> 53182902Skmacy#include <sys/cons.h> /* cngetc() */ 54182902Skmacy#ifdef GPROF 55182902Skmacy#include <sys/gmon.h> 56182902Skmacy#endif 57182902Skmacy#include <sys/kernel.h> 58182902Skmacy#include <sys/ktr.h> 59182902Skmacy#include <sys/lock.h> 60182902Skmacy#include <sys/malloc.h> 61182902Skmacy#include <sys/memrange.h> 62182902Skmacy#include <sys/mutex.h> 63182902Skmacy#include <sys/pcpu.h> 64182902Skmacy#include <sys/proc.h> 65182902Skmacy#include <sys/sched.h> 66182902Skmacy#include <sys/smp.h> 67182902Skmacy#include <sys/sysctl.h> 68182902Skmacy 69182902Skmacy#include <vm/vm.h> 70182902Skmacy#include <vm/vm_param.h> 71182902Skmacy#include <vm/pmap.h> 72182902Skmacy#include <vm/vm_kern.h> 73182902Skmacy#include <vm/vm_extern.h> 74182902Skmacy#include <vm/vm_page.h> 75182902Skmacy 76182902Skmacy#include <machine/apicreg.h> 77182902Skmacy#include <machine/md_var.h> 78182902Skmacy#include <machine/mp_watchdog.h> 79182902Skmacy#include <machine/pcb.h> 80182902Skmacy#include <machine/psl.h> 81182902Skmacy#include <machine/smp.h> 82182902Skmacy#include <machine/specialreg.h> 83182902Skmacy#include <machine/pcpu.h> 84182902Skmacy 85182902Skmacy 86182902Skmacy 87182902Skmacy#include <machine/xen/xen-os.h> 88182902Skmacy#include <machine/xen/hypervisor.h> 89182902Skmacy#include <xen/interface/vcpu.h> 90182902Skmacy 91182902Skmacy 92182902Skmacy#define WARMBOOT_TARGET 0 93182902Skmacy#define WARMBOOT_OFF (KERNBASE + 0x0467) 94182902Skmacy#define WARMBOOT_SEG (KERNBASE + 0x0469) 95182902Skmacy 96182902Skmacy#define stop_cpus_with_nmi 0 97182902Skmacy 98182902Skmacy 99182902Skmacyint mp_naps; /* # of Applications processors */ 100182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 101182902Skmacy 102182902Skmacyextern struct pcpu __pcpu[]; 103182902Skmacy 104182902Skmacystatic int bootAP; 105182902Skmacystatic union descriptor *bootAPgdt; 106182902Skmacy 107182902Skmacy 108182902Skmacy/* Free these after use */ 109182902Skmacyvoid *bootstacks[MAXCPU]; 110182902Skmacy 111182902Skmacy/* Hotwire a 0->4MB V==P mapping */ 112182902Skmacyextern pt_entry_t *KPTphys; 113182902Skmacy 114182902Skmacystruct pcb stoppcbs[MAXCPU]; 115182902Skmacy 116182902Skmacy/* Variables needed for SMP tlb shootdown. */ 117182902Skmacyvm_offset_t smp_tlb_addr1; 118182902Skmacyvm_offset_t smp_tlb_addr2; 119182902Skmacyvolatile int smp_tlb_wait; 120182902Skmacy 121182902Skmacystatic u_int logical_cpus; 122182902Skmacy 123182902Skmacy/* used to hold the AP's until we are ready to release them */ 124182902Skmacystatic struct mtx ap_boot_mtx; 125182902Skmacy 126182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 127182902Skmacystatic volatile int aps_ready = 0; 128182902Skmacy 129182902Skmacy/* 130182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 131182902Skmacy * the APs. 132182902Skmacy */ 133182902Skmacystruct cpu_info { 134182902Skmacy int cpu_present:1; 135182902Skmacy int cpu_bsp:1; 136182902Skmacy int cpu_disabled:1; 137182902Skmacy} static cpu_info[MAX_APIC_ID + 1]; 138182902Skmacyint cpu_apic_ids[MAXCPU]; 139182902Skmacy 140182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 141182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 142182902Skmacy 143182902Skmacystatic u_int boot_address; 144182902Skmacy 145182902Skmacystatic void assign_cpu_ids(void); 146182902Skmacystatic void set_interrupt_apic_ids(void); 147182902Skmacyint start_all_aps(void); 148182902Skmacystatic int start_ap(int apic_id); 149182902Skmacystatic void release_aps(void *dummy); 150182902Skmacy 151182902Skmacystatic u_int hyperthreading_cpus; 152182902Skmacystatic cpumask_t hyperthreading_cpus_mask; 153182902Skmacy 154182902Skmacyextern void Xhypervisor_callback(void); 155182902Skmacyextern void failsafe_callback(void); 156182902Skmacy 157182902Skmacystruct cpu_group * 158182902Skmacycpu_topo(void) 159182902Skmacy{ 160182902Skmacy if (cpu_cores == 0) 161182902Skmacy cpu_cores = 1; 162182902Skmacy if (cpu_logical == 0) 163182902Skmacy cpu_logical = 1; 164182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 165182902Skmacy printf("WARNING: Non-uniform processors.\n"); 166182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 167182902Skmacy return (smp_topo_none()); 168182902Skmacy } 169182902Skmacy /* 170182902Skmacy * No multi-core or hyper-threaded. 171182902Skmacy */ 172182902Skmacy if (cpu_logical * cpu_cores == 1) 173182902Skmacy return (smp_topo_none()); 174182902Skmacy /* 175182902Skmacy * Only HTT no multi-core. 176182902Skmacy */ 177182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 178182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 179182902Skmacy /* 180182902Skmacy * Only multi-core no HTT. 181182902Skmacy */ 182182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 183182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 184182902Skmacy /* 185182902Skmacy * Both HTT and multi-core. 186182902Skmacy */ 187182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 188182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 189182902Skmacy} 190182902Skmacy 191182902Skmacy/* 192182902Skmacy * Calculate usable address in base memory for AP trampoline code. 193182902Skmacy */ 194182902Skmacyu_int 195182902Skmacymp_bootaddress(u_int basemem) 196182902Skmacy{ 197182902Skmacy 198182902Skmacy return (basemem); 199182902Skmacy} 200182902Skmacy 201182902Skmacyvoid 202182902Skmacycpu_add(u_int apic_id, char boot_cpu) 203182902Skmacy{ 204182902Skmacy 205182902Skmacy if (apic_id > MAX_APIC_ID) { 206182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 207182902Skmacy return; 208182902Skmacy } 209182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 210182902Skmacy apic_id)); 211182902Skmacy cpu_info[apic_id].cpu_present = 1; 212182902Skmacy if (boot_cpu) { 213182902Skmacy KASSERT(boot_cpu_id == -1, 214182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 215182902Skmacy boot_cpu_id)); 216182902Skmacy boot_cpu_id = apic_id; 217182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 218182902Skmacy } 219182902Skmacy if (mp_ncpus < MAXCPU) 220182902Skmacy mp_ncpus++; 221182902Skmacy if (bootverbose) 222182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 223182902Skmacy "AP"); 224182902Skmacy} 225182902Skmacy 226182902Skmacyvoid 227182902Skmacycpu_mp_setmaxid(void) 228182902Skmacy{ 229182902Skmacy 230182902Skmacy mp_maxid = MAXCPU - 1; 231182902Skmacy} 232182902Skmacy 233182902Skmacyint 234182902Skmacycpu_mp_probe(void) 235182902Skmacy{ 236182902Skmacy 237182902Skmacy /* 238182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 239182902Skmacy * correctly. 240182902Skmacy */ 241182902Skmacy all_cpus = 1; 242182902Skmacy if (mp_ncpus == 0) { 243182902Skmacy /* 244182902Skmacy * No CPUs were found, so this must be a UP system. Setup 245182902Skmacy * the variables to represent a system with a single CPU 246182902Skmacy * with an id of 0. 247182902Skmacy */ 248182902Skmacy mp_ncpus = 1; 249182902Skmacy return (0); 250182902Skmacy } 251182902Skmacy 252182902Skmacy /* At least one CPU was found. */ 253182902Skmacy if (mp_ncpus == 1) { 254182902Skmacy /* 255182902Skmacy * One CPU was found, so this must be a UP system with 256182902Skmacy * an I/O APIC. 257182902Skmacy */ 258182902Skmacy return (0); 259182902Skmacy } 260182902Skmacy 261182902Skmacy /* At least two CPUs were found. */ 262182902Skmacy return (1); 263182902Skmacy} 264182902Skmacy 265182902Skmacy/* 266182902Skmacy * Initialize the IPI handlers and start up the AP's. 267182902Skmacy */ 268182902Skmacyvoid 269182902Skmacycpu_mp_start(void) 270182902Skmacy{ 271182902Skmacy int i; 272182902Skmacy 273182902Skmacy /* Initialize the logical ID to APIC ID table. */ 274182902Skmacy for (i = 0; i < MAXCPU; i++) { 275182902Skmacy cpu_apic_ids[i] = -1; 276182902Skmacy cpu_ipi_pending[i] = 0; 277182902Skmacy } 278182902Skmacy 279182902Skmacy /* Set boot_cpu_id if needed. */ 280182902Skmacy if (boot_cpu_id == -1) { 281182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 282182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 283182902Skmacy } else 284182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 285182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 286182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 287182902Skmacy 288182902Skmacy assign_cpu_ids(); 289182902Skmacy 290182902Skmacy /* Start each Application Processor */ 291182902Skmacy start_all_aps(); 292182902Skmacy 293182902Skmacy /* Setup the initial logical CPUs info. */ 294182902Skmacy logical_cpus = logical_cpus_mask = 0; 295182902Skmacy if (cpu_feature & CPUID_HTT) 296182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 297182902Skmacy 298182902Skmacy set_interrupt_apic_ids(); 299182902Skmacy} 300182902Skmacy 301182902Skmacy 302182902Skmacy/* 303182902Skmacy * Print various information about the SMP system hardware and setup. 304182902Skmacy */ 305182902Skmacyvoid 306182902Skmacycpu_mp_announce(void) 307182902Skmacy{ 308182902Skmacy int i, x; 309182902Skmacy 310182902Skmacy /* List CPUs */ 311182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 312182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 313182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 314182902Skmacy continue; 315182902Skmacy if (cpu_info[x].cpu_disabled) 316182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 317182902Skmacy else { 318182902Skmacy KASSERT(i < mp_ncpus, 319182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 320182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 321182902Skmacy } 322182902Skmacy } 323182902Skmacy} 324182902Skmacy 325182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 326182902Skmacy 327182902Skmacy/* 328182902Skmacy * AP CPU's call this to initialize themselves. 329182902Skmacy */ 330182902Skmacyvoid 331182902Skmacyinit_secondary(void) 332182902Skmacy{ 333182902Skmacy vm_offset_t addr; 334182902Skmacy int gsel_tss; 335182902Skmacy 336182902Skmacy 337182902Skmacy /* bootAP is set in start_ap() to our ID. */ 338182902Skmacy 339182902Skmacy 340182902Skmacy PCPU_SET(currentldt, _default_ldt); 341182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 342182902Skmacy#if 0 343182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 344182902Skmacy#endif 345182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 346182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 347182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 348182902Skmacy#if 0 349182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 350182902Skmacy 351182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 352182902Skmacy#endif 353182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 354182902Skmacy 355182902Skmacy /* 356182902Skmacy * Set to a known state: 357182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 358182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 359182902Skmacy */ 360182902Skmacy /* 361182902Skmacy * signal our startup to the BSP. 362182902Skmacy */ 363182902Skmacy mp_naps++; 364182902Skmacy 365182902Skmacy /* Spin until the BSP releases the AP's. */ 366182902Skmacy while (!aps_ready) 367182902Skmacy ia32_pause(); 368182902Skmacy 369182902Skmacy /* BSP may have changed PTD while we were waiting */ 370182902Skmacy invltlb(); 371182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 372182902Skmacy invlpg(addr); 373182902Skmacy 374182902Skmacy /* set up FPU state on the AP */ 375182902Skmacy npxinit(__INITIAL_NPXCW__); 376182902Skmacy#if 0 377182902Skmacy 378182902Skmacy /* set up SSE registers */ 379182902Skmacy enable_sse(); 380182902Skmacy#endif 381182902Skmacy#if 0 && defined(PAE) 382182902Skmacy /* Enable the PTE no-execute bit. */ 383182902Skmacy if ((amd_feature & AMDID_NX) != 0) { 384182902Skmacy uint64_t msr; 385182902Skmacy 386182902Skmacy msr = rdmsr(MSR_EFER) | EFER_NXE; 387182902Skmacy wrmsr(MSR_EFER, msr); 388182902Skmacy } 389182902Skmacy#endif 390182902Skmacy#if 0 391182902Skmacy /* A quick check from sanity claus */ 392182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 393182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 394182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 395182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 396182902Skmacy panic("cpuid mismatch! boom!!"); 397182902Skmacy } 398182902Skmacy#endif 399182902Skmacy 400182902Skmacy /* Initialize curthread. */ 401182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 402182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 403182902Skmacy 404182902Skmacy mtx_lock_spin(&ap_boot_mtx); 405182902Skmacy#if 0 406182902Skmacy 407182902Skmacy /* Init local apic for irq's */ 408182902Skmacy lapic_setup(1); 409182902Skmacy#endif 410182902Skmacy smp_cpus++; 411182902Skmacy 412182902Skmacy CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 413182902Skmacy printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 414182902Skmacy 415182902Skmacy /* Determine if we are a logical CPU. */ 416182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 417182902Skmacy logical_cpus_mask |= PCPU_GET(cpumask); 418182902Skmacy 419182902Skmacy /* Determine if we are a hyperthread. */ 420182902Skmacy if (hyperthreading_cpus > 1 && 421182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 422182902Skmacy hyperthreading_cpus_mask |= PCPU_GET(cpumask); 423182902Skmacy 424182902Skmacy /* Build our map of 'other' CPUs. */ 425182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 426182902Skmacy#if 0 427182902Skmacy if (bootverbose) 428182902Skmacy lapic_dump("AP"); 429182902Skmacy#endif 430182902Skmacy if (smp_cpus == mp_ncpus) { 431182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 432182902Skmacy atomic_store_rel_int(&smp_started, 1); 433182902Skmacy smp_active = 1; /* historic */ 434182902Skmacy } 435182902Skmacy 436182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 437182902Skmacy 438182902Skmacy /* wait until all the AP's are up */ 439182902Skmacy while (smp_started == 0) 440182902Skmacy ia32_pause(); 441182902Skmacy 442183131Skmacy 443183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 444182902Skmacy /* enter the scheduler */ 445182902Skmacy sched_throw(NULL); 446182902Skmacy 447182902Skmacy panic("scheduler returned us to %s", __func__); 448182902Skmacy /* NOTREACHED */ 449182902Skmacy} 450182902Skmacy 451182902Skmacy/******************************************************************* 452182902Skmacy * local functions and data 453182902Skmacy */ 454182902Skmacy 455182902Skmacy/* 456182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 457182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 458182902Skmacy * can simply not tell the I/O APIC code about them in this function. 459182902Skmacy * We also do not tell it about the BSP since it tells itself about 460182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 461182902Skmacy */ 462182902Skmacystatic void 463182902Skmacyset_interrupt_apic_ids(void) 464182902Skmacy{ 465182902Skmacy u_int i, apic_id; 466182902Skmacy 467182902Skmacy for (i = 0; i < MAXCPU; i++) { 468182902Skmacy apic_id = cpu_apic_ids[i]; 469182902Skmacy if (apic_id == -1) 470182902Skmacy continue; 471182902Skmacy if (cpu_info[apic_id].cpu_bsp) 472182902Skmacy continue; 473182902Skmacy if (cpu_info[apic_id].cpu_disabled) 474182902Skmacy continue; 475182902Skmacy 476182902Skmacy /* Don't let hyperthreads service interrupts. */ 477182902Skmacy if (hyperthreading_cpus > 1 && 478182902Skmacy apic_id % hyperthreading_cpus != 0) 479182902Skmacy continue; 480182902Skmacy 481182902Skmacy intr_add_cpu(i); 482182902Skmacy } 483182902Skmacy} 484182902Skmacy 485182902Skmacy/* 486182902Skmacy * Assign logical CPU IDs to local APICs. 487182902Skmacy */ 488182902Skmacystatic void 489182902Skmacyassign_cpu_ids(void) 490182902Skmacy{ 491182902Skmacy u_int i; 492182902Skmacy 493182902Skmacy /* Check for explicitly disabled CPUs. */ 494182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 495182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 496182902Skmacy continue; 497182902Skmacy 498182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 499182902Skmacy if (resource_disabled("lapic", i)) { 500182902Skmacy cpu_info[i].cpu_disabled = 1; 501182902Skmacy continue; 502182902Skmacy } 503182902Skmacy } 504182902Skmacy 505182902Skmacy /* 506182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 507182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 508182902Skmacy * so we only have to assign IDs for APs. 509182902Skmacy */ 510182902Skmacy mp_ncpus = 1; 511182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 512182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 513182902Skmacy cpu_info[i].cpu_disabled) 514182902Skmacy continue; 515182902Skmacy 516182902Skmacy if (mp_ncpus < MAXCPU) { 517182902Skmacy cpu_apic_ids[mp_ncpus] = i; 518182902Skmacy mp_ncpus++; 519182902Skmacy } else 520182902Skmacy cpu_info[i].cpu_disabled = 1; 521182902Skmacy } 522182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 523182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 524182902Skmacy mp_ncpus)); 525182902Skmacy} 526182902Skmacy 527182902Skmacy/* 528182902Skmacy * start each AP in our list 529182902Skmacy */ 530182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 531182902Skmacy#define TMPMAP_START 1 532182902Skmacyint 533182902Skmacystart_all_aps(void) 534182902Skmacy{ 535182902Skmacy u_int32_t mpbioswarmvec; 536182902Skmacy int x,apic_id, cpu; 537182902Skmacy struct pcpu *pc; 538182902Skmacy 539182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 540182902Skmacy 541182902Skmacy /* save the current value of the warm-start vector */ 542182902Skmacy mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); 543182902Skmacy 544182902Skmacy /* set up temporary P==V mapping for AP boot */ 545182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 546182902Skmacy 547182902Skmacy /* start each AP */ 548182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 549182902Skmacy apic_id = cpu_apic_ids[cpu]; 550182902Skmacy 551182902Skmacy 552182902Skmacy /* setup a vector to our boot code */ 553182902Skmacy *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; 554182902Skmacy *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); 555182902Skmacy 556182902Skmacy bootAP = cpu; 557182902Skmacy bootAPgdt = gdt + (512*cpu); 558182902Skmacy 559182902Skmacy /* Get per-cpu data */ 560182902Skmacy pc = &__pcpu[bootAP]; 561183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 562182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 563182902Skmacy pc->pc_prvspace = pc; 564182902Skmacy pc->pc_curthread = 0; 565182902Skmacy 566182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 567182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 568182902Skmacy 569182902Skmacy PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW); 570182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 571182902Skmacy for (x = 0; x < NGDT; x++) 572182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 573182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 574182902Skmacy 575182902Skmacy /* attempt to start the Application Processor */ 576182902Skmacy if (!start_ap(cpu)) { 577182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 578182902Skmacy /* better panic as the AP may be running loose */ 579182902Skmacy printf("panic y/n? [y] "); 580182902Skmacy if (cngetc() != 'n') 581182902Skmacy panic("bye-bye"); 582182902Skmacy } 583182902Skmacy 584182902Skmacy all_cpus |= (1 << cpu); /* record AP in CPU map */ 585182902Skmacy } 586182902Skmacy 587182902Skmacy 588182902Skmacy /* build our map of 'other' CPUs */ 589182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 590182902Skmacy 591182902Skmacy /* restore the warmstart vector */ 592182902Skmacy *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; 593182902Skmacy 594182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 595182902Skmacy 596182902Skmacy /* number of APs actually started */ 597182902Skmacy return mp_naps; 598182902Skmacy} 599182902Skmacy 600182902Skmacyextern uint8_t *pcpu_boot_stack; 601182902Skmacyextern trap_info_t trap_table[]; 602182902Skmacy 603182902Skmacystatic void 604182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 605182902Skmacy{ 606182902Skmacy const trap_info_t *t = trap_table; 607182902Skmacy 608182902Skmacy for (t = trap_table; t->address; t++) { 609182902Skmacy trap_ctxt[t->vector].flags = t->flags; 610182902Skmacy trap_ctxt[t->vector].cs = t->cs; 611182902Skmacy trap_ctxt[t->vector].address = t->address; 612182902Skmacy } 613182902Skmacy} 614182902Skmacy 615182902Skmacyvoid 616182902Skmacycpu_initialize_context(unsigned int cpu); 617182902Skmacyextern int nkpt; 618182902Skmacy 619182902Skmacyvoid 620182902Skmacycpu_initialize_context(unsigned int cpu) 621182902Skmacy{ 622182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 623182902Skmacy * Hence we allocate statically and protect it with a lock */ 624182902Skmacy vm_page_t m[4]; 625182902Skmacy static vcpu_guest_context_t ctxt; 626182902Skmacy vm_offset_t boot_stack; 627183131Skmacy vm_offset_t newPTD; 628183131Skmacy vm_paddr_t ma[NPGPTD]; 629182902Skmacy static int color; 630182902Skmacy int i; 631182902Skmacy 632182902Skmacy /* 633183131Skmacy * Page 0,[0-3] PTD 634183131Skmacy * Page 1, [4] boot stack 635183131Skmacy * Page [5] PDPT 636183131Skmacy 637182902Skmacy * 638182902Skmacy */ 639183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 640182902Skmacy m[i] = vm_page_alloc(NULL, color++, 641182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 642182902Skmacy VM_ALLOC_ZERO); 643182902Skmacy 644182902Skmacy pmap_zero_page(m[i]); 645182902Skmacy 646182902Skmacy } 647183131Skmacy boot_stack = kmem_alloc_nofault(kernel_map, 1); 648183131Skmacy newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); 649183131Skmacy ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V; 650182902Skmacy 651183131Skmacy#ifdef PAE 652183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 653183131Skmacy for (i = 0; i < NPGPTD; i++) { 654183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 655183131Skmacy ma[i] = 656183131Skmacy xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V; 657182902Skmacy } 658183131Skmacy#endif 659182902Skmacy 660182902Skmacy /* 661182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 662182902Skmacy * kernel mappings 663182902Skmacy */ 664183131Skmacy pmap_qenter(newPTD, m, 4); 665183131Skmacy 666183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 667183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 668182902Skmacy nkpt*sizeof(vm_paddr_t)); 669183131Skmacy 670183131Skmacy pmap_qremove(newPTD, 4); 671183131Skmacy kmem_free(kernel_map, newPTD, 4); 672182902Skmacy /* 673182902Skmacy * map actual idle stack to boot_stack 674182902Skmacy */ 675183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 676182902Skmacy 677182902Skmacy 678183131Skmacy xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]))); 679182902Skmacy vm_page_lock_queues(); 680182902Skmacy for (i = 0; i < 4; i++) { 681183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 682183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 683183131Skmacy 684182902Skmacy xen_queue_pt_update((vm_paddr_t) 685183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 686182902Skmacy ma[i]); 687182902Skmacy } 688182902Skmacy PT_UPDATES_FLUSH(); 689182902Skmacy vm_page_unlock_queues(); 690182902Skmacy 691182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 692182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 693182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 694182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 695182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 696182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 697182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 698182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 699182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 700182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 701182902Skmacy 702182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 703182902Skmacy 704182902Skmacy smp_trap_init(ctxt.trap_ctxt); 705182902Skmacy 706182902Skmacy ctxt.ldt_ents = 0; 707182902Skmacy ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 708182902Skmacy ctxt.gdt_ents = 512; 709182902Skmacy 710182902Skmacy#ifdef __i386__ 711182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 712182902Skmacy 713182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 714182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 715182902Skmacy 716182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 717182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 718182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 719182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 720182902Skmacy 721183131Skmacy ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 722182902Skmacy#else /* __x86_64__ */ 723182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 724182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 725182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 726182902Skmacy 727182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 728182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 729182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 730182902Skmacy 731182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 732182902Skmacy 733182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 734182902Skmacy#endif 735182902Skmacy 736182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 737182902Skmacy ctxt.gdt_frames[0], 738182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 739182902Skmacy 740182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 741182902Skmacy DELAY(3000); 742182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 743182902Skmacy} 744182902Skmacy 745182902Skmacy/* 746182902Skmacy * This function starts the AP (application processor) identified 747182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 748182902Skmacy * to accomplish this. This is necessary because of the nuances 749182902Skmacy * of the different hardware we might encounter. It isn't pretty, 750182902Skmacy * but it seems to work. 751182902Skmacy */ 752183131Skmacy 753183131Skmacyint cpus; 754182902Skmacystatic int 755182902Skmacystart_ap(int apic_id) 756182902Skmacy{ 757182902Skmacy int ms; 758182902Skmacy 759182902Skmacy /* used as a watchpoint to signal AP startup */ 760182902Skmacy cpus = mp_naps; 761182902Skmacy 762182902Skmacy cpu_initialize_context(apic_id); 763182902Skmacy 764182902Skmacy /* Wait up to 5 seconds for it to start. */ 765182902Skmacy for (ms = 0; ms < 5000; ms++) { 766182902Skmacy if (mp_naps > cpus) 767182902Skmacy return 1; /* return SUCCESS */ 768182902Skmacy DELAY(1000); 769182902Skmacy } 770182902Skmacy return 0; /* return FAILURE */ 771182902Skmacy} 772182902Skmacy 773182902Skmacy/* 774182902Skmacy * Flush the TLB on all other CPU's 775182902Skmacy */ 776182902Skmacystatic void 777182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 778182902Skmacy{ 779182902Skmacy u_int ncpu; 780182902Skmacy 781182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 782182902Skmacy if (ncpu < 1) 783182902Skmacy return; /* no other cpus */ 784182902Skmacy if (!(read_eflags() & PSL_I)) 785182902Skmacy panic("%s: interrupts disabled", __func__); 786182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 787182902Skmacy smp_tlb_addr1 = addr1; 788182902Skmacy smp_tlb_addr2 = addr2; 789182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 790182902Skmacy ipi_all_but_self(vector); 791182902Skmacy while (smp_tlb_wait < ncpu) 792182902Skmacy ia32_pause(); 793182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 794182902Skmacy} 795182902Skmacy 796182902Skmacystatic void 797182902Skmacysmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 798182902Skmacy{ 799182902Skmacy int ncpu, othercpus; 800182902Skmacy 801182902Skmacy othercpus = mp_ncpus - 1; 802182902Skmacy if (mask == (u_int)-1) { 803182902Skmacy ncpu = othercpus; 804182902Skmacy if (ncpu < 1) 805182902Skmacy return; 806182902Skmacy } else { 807182902Skmacy mask &= ~PCPU_GET(cpumask); 808182902Skmacy if (mask == 0) 809182902Skmacy return; 810182902Skmacy ncpu = bitcount32(mask); 811182902Skmacy if (ncpu > othercpus) { 812182902Skmacy /* XXX this should be a panic offence */ 813182902Skmacy printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 814182902Skmacy ncpu, othercpus); 815182902Skmacy ncpu = othercpus; 816182902Skmacy } 817182902Skmacy /* XXX should be a panic, implied by mask == 0 above */ 818182902Skmacy if (ncpu < 1) 819182902Skmacy return; 820182902Skmacy } 821182902Skmacy if (!(read_eflags() & PSL_I)) 822182902Skmacy panic("%s: interrupts disabled", __func__); 823182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 824182902Skmacy smp_tlb_addr1 = addr1; 825182902Skmacy smp_tlb_addr2 = addr2; 826182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 827182902Skmacy if (mask == (u_int)-1) 828182902Skmacy ipi_all_but_self(vector); 829182902Skmacy else 830182902Skmacy ipi_selected(mask, vector); 831182902Skmacy while (smp_tlb_wait < ncpu) 832182902Skmacy ia32_pause(); 833182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 834182902Skmacy} 835182902Skmacy 836182902Skmacyvoid 837182902Skmacysmp_cache_flush(void) 838182902Skmacy{ 839182902Skmacy 840182902Skmacy if (smp_started) 841182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 842182902Skmacy} 843182902Skmacy 844182902Skmacyvoid 845182902Skmacysmp_invltlb(void) 846182902Skmacy{ 847182902Skmacy 848182902Skmacy if (smp_started) { 849182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 850182902Skmacy } 851182902Skmacy} 852182902Skmacy 853182902Skmacyvoid 854182902Skmacysmp_invlpg(vm_offset_t addr) 855182902Skmacy{ 856182902Skmacy 857182902Skmacy if (smp_started) { 858182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 859182902Skmacy } 860182902Skmacy} 861182902Skmacy 862182902Skmacyvoid 863182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 864182902Skmacy{ 865182902Skmacy 866182902Skmacy if (smp_started) { 867182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 868182902Skmacy } 869182902Skmacy} 870182902Skmacy 871182902Skmacyvoid 872182902Skmacysmp_masked_invltlb(u_int mask) 873182902Skmacy{ 874182902Skmacy 875182902Skmacy if (smp_started) { 876182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 877182902Skmacy } 878182902Skmacy} 879182902Skmacy 880182902Skmacyvoid 881182902Skmacysmp_masked_invlpg(u_int mask, vm_offset_t addr) 882182902Skmacy{ 883182902Skmacy 884182902Skmacy if (smp_started) { 885182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 886182902Skmacy } 887182902Skmacy} 888182902Skmacy 889182902Skmacyvoid 890182902Skmacysmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 891182902Skmacy{ 892182902Skmacy 893182902Skmacy if (smp_started) { 894182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 895182902Skmacy } 896182902Skmacy} 897182902Skmacy 898182902Skmacyvoid 899182902Skmacyipi_bitmap_handler(struct trapframe frame) 900182902Skmacy{ 901182902Skmacy int cpu = PCPU_GET(cpuid); 902182902Skmacy u_int ipi_bitmap; 903182902Skmacy 904182902Skmacy ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 905182902Skmacy 906182902Skmacy if (ipi_bitmap & (1 << IPI_PREEMPT)) { 907182902Skmacy sched_preempt(curthread); 908182902Skmacy } 909182902Skmacy} 910182902Skmacy 911182902Skmacy/* 912182902Skmacy * send an IPI to a set of cpus. 913182902Skmacy */ 914182902Skmacyvoid 915182902Skmacyipi_selected(u_int32_t cpus, u_int ipi) 916182902Skmacy{ 917182902Skmacy int cpu; 918182902Skmacy u_int bitmap = 0; 919182902Skmacy u_int old_pending; 920182902Skmacy u_int new_pending; 921182902Skmacy 922182902Skmacy if (IPI_IS_BITMAPED(ipi)) { 923182902Skmacy bitmap = 1 << ipi; 924182902Skmacy ipi = IPI_BITMAP_VECTOR; 925182902Skmacy } 926182902Skmacy 927182902Skmacy#ifdef STOP_NMI 928182902Skmacy if (ipi == IPI_STOP && stop_cpus_with_nmi) { 929182902Skmacy ipi_nmi_selected(cpus); 930182902Skmacy return; 931182902Skmacy } 932182902Skmacy#endif 933182902Skmacy CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 934182902Skmacy while ((cpu = ffs(cpus)) != 0) { 935182902Skmacy cpu--; 936182902Skmacy cpus &= ~(1 << cpu); 937182902Skmacy 938182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 939182902Skmacy ("IPI to non-existent CPU %d", cpu)); 940182902Skmacy 941182902Skmacy if (bitmap) { 942182902Skmacy do { 943182902Skmacy old_pending = cpu_ipi_pending[cpu]; 944182902Skmacy new_pending = old_pending | bitmap; 945182902Skmacy } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 946182902Skmacy 947182902Skmacy if (old_pending) 948182902Skmacy continue; 949182902Skmacy } 950182902Skmacy 951182902Skmacy lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); 952182902Skmacy } 953182902Skmacy 954182902Skmacy} 955182902Skmacy 956182902Skmacy/* 957182902Skmacy * send an IPI INTerrupt containing 'vector' to all CPUs, including myself 958182902Skmacy */ 959182902Skmacyvoid 960182902Skmacyipi_all(u_int ipi) 961182902Skmacy{ 962182902Skmacy 963182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 964182902Skmacy ipi_selected(all_cpus, ipi); 965182902Skmacy return; 966182902Skmacy } 967182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 968182902Skmacy lapic_ipi_vectored(ipi, APIC_IPI_DEST_ALL); 969182902Skmacy} 970182902Skmacy 971182902Skmacy/* 972182902Skmacy * send an IPI to all CPUs EXCEPT myself 973182902Skmacy */ 974182902Skmacyvoid 975182902Skmacyipi_all_but_self(u_int ipi) 976182902Skmacy{ 977182902Skmacy 978182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 979182902Skmacy ipi_selected(PCPU_GET(other_cpus), ipi); 980182902Skmacy return; 981182902Skmacy } 982182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 983182902Skmacy lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); 984182902Skmacy} 985182902Skmacy 986182902Skmacy/* 987182902Skmacy * send an IPI to myself 988182902Skmacy */ 989182902Skmacyvoid 990182902Skmacyipi_self(u_int ipi) 991182902Skmacy{ 992182902Skmacy 993182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 994182902Skmacy ipi_selected(PCPU_GET(cpumask), ipi); 995182902Skmacy return; 996182902Skmacy } 997182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 998182902Skmacy lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF); 999182902Skmacy} 1000182902Skmacy 1001182902Skmacy#ifdef STOP_NMI 1002182902Skmacy/* 1003182902Skmacy * send NMI IPI to selected CPUs 1004182902Skmacy */ 1005182902Skmacy 1006182902Skmacy#define BEFORE_SPIN 1000000 1007182902Skmacy 1008182902Skmacyvoid 1009182902Skmacyipi_nmi_selected(u_int32_t cpus) 1010182902Skmacy{ 1011182902Skmacy int cpu; 1012182902Skmacy register_t icrlo; 1013182902Skmacy 1014182902Skmacy icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1015182902Skmacy | APIC_TRIGMOD_EDGE; 1016182902Skmacy 1017182902Skmacy CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1018182902Skmacy 1019182902Skmacy atomic_set_int(&ipi_nmi_pending, cpus); 1020182902Skmacy 1021182902Skmacy while ((cpu = ffs(cpus)) != 0) { 1022182902Skmacy cpu--; 1023182902Skmacy cpus &= ~(1 << cpu); 1024182902Skmacy 1025182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 1026182902Skmacy ("IPI NMI to non-existent CPU %d", cpu)); 1027182902Skmacy 1028182902Skmacy /* Wait for an earlier IPI to finish. */ 1029182902Skmacy if (!lapic_ipi_wait(BEFORE_SPIN)) 1030182902Skmacy panic("ipi_nmi_selected: previous IPI has not cleared"); 1031182902Skmacy 1032182902Skmacy lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1033182902Skmacy } 1034182902Skmacy} 1035182902Skmacy 1036182902Skmacyint 1037182902Skmacyipi_nmi_handler(void) 1038182902Skmacy{ 1039182902Skmacy int cpumask = PCPU_GET(cpumask); 1040182902Skmacy 1041182902Skmacy if (!(ipi_nmi_pending & cpumask)) 1042182902Skmacy return 1; 1043182902Skmacy 1044182902Skmacy atomic_clear_int(&ipi_nmi_pending, cpumask); 1045182902Skmacy cpustop_handler(); 1046182902Skmacy return 0; 1047182902Skmacy} 1048182902Skmacy 1049182902Skmacy#endif /* STOP_NMI */ 1050182902Skmacy 1051182902Skmacy/* 1052182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1053182902Skmacy * are resumed. 1054182902Skmacy */ 1055182902Skmacyvoid 1056182902Skmacycpustop_handler(void) 1057182902Skmacy{ 1058182902Skmacy int cpu = PCPU_GET(cpuid); 1059182902Skmacy int cpumask = PCPU_GET(cpumask); 1060182902Skmacy 1061182902Skmacy savectx(&stoppcbs[cpu]); 1062182902Skmacy 1063182902Skmacy /* Indicate that we are stopped */ 1064182902Skmacy atomic_set_int(&stopped_cpus, cpumask); 1065182902Skmacy 1066182902Skmacy /* Wait for restart */ 1067182902Skmacy while (!(started_cpus & cpumask)) 1068182902Skmacy ia32_pause(); 1069182902Skmacy 1070182902Skmacy atomic_clear_int(&started_cpus, cpumask); 1071182902Skmacy atomic_clear_int(&stopped_cpus, cpumask); 1072182902Skmacy 1073182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1074182902Skmacy cpustop_restartfunc(); 1075182902Skmacy cpustop_restartfunc = NULL; 1076182902Skmacy } 1077182902Skmacy} 1078182902Skmacy 1079182902Skmacy/* 1080182902Skmacy * This is called once the rest of the system is up and running and we're 1081182902Skmacy * ready to let the AP's out of the pen. 1082182902Skmacy */ 1083182902Skmacystatic void 1084182902Skmacyrelease_aps(void *dummy __unused) 1085182902Skmacy{ 1086182902Skmacy 1087182902Skmacy if (mp_ncpus == 1) 1088182902Skmacy return; 1089182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1090182902Skmacy while (smp_started == 0) 1091182902Skmacy ia32_pause(); 1092182902Skmacy} 1093182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1094182902Skmacy 1095