mp_machdep.c revision 184115
1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD: head/sys/i386/xen/mp_machdep.c 184115 2008-10-21 08:03:12Z kmacy $"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34182902Skmacy#include "opt_sched.h" 35182902Skmacy#include "opt_smp.h" 36182902Skmacy 37182902Skmacy#if !defined(lint) 38182902Skmacy#if !defined(SMP) 39182902Skmacy#error How did you get here? 40182902Skmacy#endif 41182902Skmacy 42182902Skmacy#ifndef DEV_APIC 43182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 44182902Skmacy#endif 45182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 46182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 47182902Skmacy#endif 48182902Skmacy#endif /* not lint */ 49182902Skmacy 50182902Skmacy#include <sys/param.h> 51182902Skmacy#include <sys/systm.h> 52182902Skmacy#include <sys/bus.h> 53182902Skmacy#include <sys/cons.h> /* cngetc() */ 54182902Skmacy#ifdef GPROF 55182902Skmacy#include <sys/gmon.h> 56182902Skmacy#endif 57182902Skmacy#include <sys/kernel.h> 58182902Skmacy#include <sys/ktr.h> 59182902Skmacy#include <sys/lock.h> 60182902Skmacy#include <sys/malloc.h> 61182902Skmacy#include <sys/memrange.h> 62182902Skmacy#include <sys/mutex.h> 63182902Skmacy#include <sys/pcpu.h> 64182902Skmacy#include <sys/proc.h> 65182902Skmacy#include <sys/sched.h> 66182902Skmacy#include <sys/smp.h> 67182902Skmacy#include <sys/sysctl.h> 68182902Skmacy 69182902Skmacy#include <vm/vm.h> 70182902Skmacy#include <vm/vm_param.h> 71182902Skmacy#include <vm/pmap.h> 72182902Skmacy#include <vm/vm_kern.h> 73182902Skmacy#include <vm/vm_extern.h> 74182902Skmacy#include <vm/vm_page.h> 75182902Skmacy 76182902Skmacy#include <machine/apicreg.h> 77182902Skmacy#include <machine/md_var.h> 78182902Skmacy#include <machine/mp_watchdog.h> 79182902Skmacy#include <machine/pcb.h> 80182902Skmacy#include <machine/psl.h> 81182902Skmacy#include <machine/smp.h> 82182902Skmacy#include <machine/specialreg.h> 83182902Skmacy#include <machine/pcpu.h> 84182902Skmacy 85182902Skmacy 86182902Skmacy 87182902Skmacy#include <machine/xen/xen-os.h> 88183379Skmacy#include <machine/xen/evtchn.h> 89184112Skmacy#include <machine/xen/xen_intr.h> 90182902Skmacy#include <machine/xen/hypervisor.h> 91182902Skmacy#include <xen/interface/vcpu.h> 92182902Skmacy 93182902Skmacy#define stop_cpus_with_nmi 0 94182902Skmacy 95182902Skmacy 96182902Skmacyint mp_naps; /* # of Applications processors */ 97182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 98182902Skmacy 99182902Skmacyextern struct pcpu __pcpu[]; 100182902Skmacy 101182902Skmacystatic int bootAP; 102182902Skmacystatic union descriptor *bootAPgdt; 103182902Skmacy 104184112Skmacystatic DEFINE_PER_CPU(int, resched_irq); 105184112Skmacystatic DEFINE_PER_CPU(int, callfunc_irq); 106184112Skmacystatic char resched_name[NR_CPUS][15]; 107184112Skmacystatic char callfunc_name[NR_CPUS][15]; 108182902Skmacy 109182902Skmacy/* Free these after use */ 110182902Skmacyvoid *bootstacks[MAXCPU]; 111182902Skmacy 112182902Skmacy/* Hotwire a 0->4MB V==P mapping */ 113182902Skmacyextern pt_entry_t *KPTphys; 114182902Skmacy 115182902Skmacystruct pcb stoppcbs[MAXCPU]; 116182902Skmacy 117182902Skmacy/* Variables needed for SMP tlb shootdown. */ 118182902Skmacyvm_offset_t smp_tlb_addr1; 119182902Skmacyvm_offset_t smp_tlb_addr2; 120182902Skmacyvolatile int smp_tlb_wait; 121182902Skmacy 122184112Skmacytypedef void call_data_func_t(uintptr_t , uintptr_t); 123184112Skmacy 124182902Skmacystatic u_int logical_cpus; 125182902Skmacy 126182902Skmacy/* used to hold the AP's until we are ready to release them */ 127182902Skmacystatic struct mtx ap_boot_mtx; 128182902Skmacy 129182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 130182902Skmacystatic volatile int aps_ready = 0; 131182902Skmacy 132182902Skmacy/* 133182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 134182902Skmacy * the APs. 135182902Skmacy */ 136182902Skmacystruct cpu_info { 137182902Skmacy int cpu_present:1; 138182902Skmacy int cpu_bsp:1; 139182902Skmacy int cpu_disabled:1; 140182902Skmacy} static cpu_info[MAX_APIC_ID + 1]; 141182902Skmacyint cpu_apic_ids[MAXCPU]; 142182902Skmacy 143182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 144182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 145182902Skmacy 146182902Skmacystatic void assign_cpu_ids(void); 147182902Skmacystatic void set_interrupt_apic_ids(void); 148182902Skmacyint start_all_aps(void); 149182902Skmacystatic int start_ap(int apic_id); 150182902Skmacystatic void release_aps(void *dummy); 151182902Skmacy 152182902Skmacystatic u_int hyperthreading_cpus; 153182902Skmacystatic cpumask_t hyperthreading_cpus_mask; 154182902Skmacy 155182902Skmacyextern void Xhypervisor_callback(void); 156182902Skmacyextern void failsafe_callback(void); 157184115Skmacyextern void pmap_lazyfix_action(void); 158182902Skmacy 159182902Skmacystruct cpu_group * 160182902Skmacycpu_topo(void) 161182902Skmacy{ 162182902Skmacy if (cpu_cores == 0) 163182902Skmacy cpu_cores = 1; 164182902Skmacy if (cpu_logical == 0) 165182902Skmacy cpu_logical = 1; 166182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 167182902Skmacy printf("WARNING: Non-uniform processors.\n"); 168182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 169182902Skmacy return (smp_topo_none()); 170182902Skmacy } 171182902Skmacy /* 172182902Skmacy * No multi-core or hyper-threaded. 173182902Skmacy */ 174182902Skmacy if (cpu_logical * cpu_cores == 1) 175182902Skmacy return (smp_topo_none()); 176182902Skmacy /* 177182902Skmacy * Only HTT no multi-core. 178182902Skmacy */ 179182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 180182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 181182902Skmacy /* 182182902Skmacy * Only multi-core no HTT. 183182902Skmacy */ 184182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 185182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 186182902Skmacy /* 187182902Skmacy * Both HTT and multi-core. 188182902Skmacy */ 189182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 190182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 191182902Skmacy} 192182902Skmacy 193182902Skmacy/* 194182902Skmacy * Calculate usable address in base memory for AP trampoline code. 195182902Skmacy */ 196182902Skmacyu_int 197182902Skmacymp_bootaddress(u_int basemem) 198182902Skmacy{ 199182902Skmacy 200182902Skmacy return (basemem); 201182902Skmacy} 202182902Skmacy 203182902Skmacyvoid 204182902Skmacycpu_add(u_int apic_id, char boot_cpu) 205182902Skmacy{ 206182902Skmacy 207182902Skmacy if (apic_id > MAX_APIC_ID) { 208182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 209182902Skmacy return; 210182902Skmacy } 211182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 212182902Skmacy apic_id)); 213182902Skmacy cpu_info[apic_id].cpu_present = 1; 214182902Skmacy if (boot_cpu) { 215182902Skmacy KASSERT(boot_cpu_id == -1, 216182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 217182902Skmacy boot_cpu_id)); 218182902Skmacy boot_cpu_id = apic_id; 219182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 220182902Skmacy } 221182902Skmacy if (mp_ncpus < MAXCPU) 222182902Skmacy mp_ncpus++; 223182902Skmacy if (bootverbose) 224182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 225182902Skmacy "AP"); 226182902Skmacy} 227182902Skmacy 228182902Skmacyvoid 229182902Skmacycpu_mp_setmaxid(void) 230182902Skmacy{ 231182902Skmacy 232182902Skmacy mp_maxid = MAXCPU - 1; 233182902Skmacy} 234182902Skmacy 235182902Skmacyint 236182902Skmacycpu_mp_probe(void) 237182902Skmacy{ 238182902Skmacy 239182902Skmacy /* 240182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 241182902Skmacy * correctly. 242182902Skmacy */ 243182902Skmacy all_cpus = 1; 244182902Skmacy if (mp_ncpus == 0) { 245182902Skmacy /* 246182902Skmacy * No CPUs were found, so this must be a UP system. Setup 247182902Skmacy * the variables to represent a system with a single CPU 248182902Skmacy * with an id of 0. 249182902Skmacy */ 250182902Skmacy mp_ncpus = 1; 251182902Skmacy return (0); 252182902Skmacy } 253182902Skmacy 254182902Skmacy /* At least one CPU was found. */ 255182902Skmacy if (mp_ncpus == 1) { 256182902Skmacy /* 257182902Skmacy * One CPU was found, so this must be a UP system with 258182902Skmacy * an I/O APIC. 259182902Skmacy */ 260182902Skmacy return (0); 261182902Skmacy } 262182902Skmacy 263182902Skmacy /* At least two CPUs were found. */ 264182902Skmacy return (1); 265182902Skmacy} 266182902Skmacy 267182902Skmacy/* 268182902Skmacy * Initialize the IPI handlers and start up the AP's. 269182902Skmacy */ 270182902Skmacyvoid 271182902Skmacycpu_mp_start(void) 272182902Skmacy{ 273182902Skmacy int i; 274182902Skmacy 275182902Skmacy /* Initialize the logical ID to APIC ID table. */ 276182902Skmacy for (i = 0; i < MAXCPU; i++) { 277182902Skmacy cpu_apic_ids[i] = -1; 278182902Skmacy cpu_ipi_pending[i] = 0; 279182902Skmacy } 280182902Skmacy 281182902Skmacy /* Set boot_cpu_id if needed. */ 282182902Skmacy if (boot_cpu_id == -1) { 283182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 284182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 285182902Skmacy } else 286182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 287182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 288182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 289182902Skmacy 290182902Skmacy assign_cpu_ids(); 291182902Skmacy 292182902Skmacy /* Start each Application Processor */ 293182902Skmacy start_all_aps(); 294182902Skmacy 295182902Skmacy /* Setup the initial logical CPUs info. */ 296182902Skmacy logical_cpus = logical_cpus_mask = 0; 297182902Skmacy if (cpu_feature & CPUID_HTT) 298182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 299182902Skmacy 300182902Skmacy set_interrupt_apic_ids(); 301182902Skmacy} 302182902Skmacy 303182902Skmacy 304184112Skmacystatic void 305184112Skmacyiv_rendezvous(uintptr_t a, uintptr_t b) 306184112Skmacy{ 307184115Skmacy smp_rendezvous_action(); 308184112Skmacy} 309184112Skmacy 310184112Skmacystatic void 311184112Skmacyiv_invltlb(uintptr_t a, uintptr_t b) 312184112Skmacy{ 313184115Skmacy xen_tlb_flush(); 314184112Skmacy} 315184112Skmacy 316184112Skmacystatic void 317184112Skmacyiv_invlpg(uintptr_t a, uintptr_t b) 318184112Skmacy{ 319184115Skmacy xen_invlpg(a); 320184112Skmacy} 321184112Skmacy 322184112Skmacystatic void 323184112Skmacyiv_invlrng(uintptr_t a, uintptr_t b) 324184112Skmacy{ 325184115Skmacy vm_offset_t start = (vm_offset_t)a; 326184115Skmacy vm_offset_t end = (vm_offset_t)b; 327184115Skmacy 328184115Skmacy while (start < end) { 329184115Skmacy xen_invlpg(start); 330184115Skmacy start += PAGE_SIZE; 331184115Skmacy } 332184112Skmacy} 333184112Skmacy 334184115Skmacy 335184112Skmacystatic void 336184112Skmacyiv_invlcache(uintptr_t a, uintptr_t b) 337184112Skmacy{ 338184115Skmacy 339184115Skmacy wbinvd(); 340184112Skmacy} 341184112Skmacy 342184112Skmacystatic void 343184112Skmacyiv_lazypmap(uintptr_t a, uintptr_t b) 344184112Skmacy{ 345184115Skmacy pmap_lazyfix_action(); 346184112Skmacy} 347184112Skmacy 348184112Skmacystatic void 349184112Skmacyiv_bitmap_vector(uintptr_t a, uintptr_t b) 350184112Skmacy{ 351184115Skmacy 352184112Skmacy 353184112Skmacy} 354184112Skmacy 355184112Skmacy 356184112Skmacystatic call_data_func_t *ipi_vectors[IPI_BITMAP_VECTOR + 1] = 357184112Skmacy{ iv_rendezvous, 358184112Skmacy iv_invltlb, 359184112Skmacy iv_invlpg, 360184112Skmacy iv_invlrng, 361184112Skmacy iv_invlcache, 362184112Skmacy iv_lazypmap, 363184112Skmacy iv_bitmap_vector 364184112Skmacy}; 365184112Skmacy 366182902Skmacy/* 367184112Skmacy * Reschedule call back. Nothing to do, 368184112Skmacy * all the work is done automatically when 369184112Skmacy * we return from the interrupt. 370184112Skmacy */ 371184112Skmacystatic void 372184112Skmacysmp_reschedule_interrupt(void *unused) 373184112Skmacy{ 374184112Skmacy} 375184112Skmacy 376184112Skmacystruct _call_data { 377184112Skmacy call_data_func_t *func; 378184112Skmacy uintptr_t arg1; 379184112Skmacy uintptr_t arg2; 380184112Skmacy atomic_t started; 381184112Skmacy atomic_t finished; 382184112Skmacy int wait; 383184112Skmacy}; 384184112Skmacy 385184112Skmacystatic struct _call_data *call_data; 386184112Skmacy 387184112Skmacystatic void 388184112Skmacysmp_call_function_interrupt(void *unused) 389184112Skmacy{ 390184112Skmacy call_data_func_t *func = call_data->func; 391184112Skmacy uintptr_t arg1 = call_data->arg1; 392184112Skmacy uintptr_t arg2 = call_data->arg2; 393184112Skmacy int wait = call_data->wait; 394184112Skmacy 395184112Skmacy /* 396184112Skmacy * Notify initiating CPU that I've grabbed the data and am 397184112Skmacy * about to execute the function 398184112Skmacy */ 399184112Skmacy mb(); 400184112Skmacy atomic_inc(&call_data->started); 401184112Skmacy /* 402184112Skmacy * At this point the info structure may be out of scope unless wait==1 403184112Skmacy */ 404184112Skmacy (*func)(arg1, arg2); 405184112Skmacy 406184112Skmacy if (wait) { 407184112Skmacy mb(); 408184112Skmacy atomic_inc(&call_data->finished); 409184112Skmacy } 410184112Skmacy} 411184112Skmacy 412184112Skmacy/* 413182902Skmacy * Print various information about the SMP system hardware and setup. 414182902Skmacy */ 415182902Skmacyvoid 416182902Skmacycpu_mp_announce(void) 417182902Skmacy{ 418182902Skmacy int i, x; 419182902Skmacy 420182902Skmacy /* List CPUs */ 421182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 422182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 423182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 424182902Skmacy continue; 425182902Skmacy if (cpu_info[x].cpu_disabled) 426182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 427182902Skmacy else { 428182902Skmacy KASSERT(i < mp_ncpus, 429182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 430182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 431182902Skmacy } 432182902Skmacy } 433182902Skmacy} 434182902Skmacy 435184112Skmacy 436184112Skmacystatic int 437184112Skmacyxen_smp_intr_init(unsigned int cpu) 438184112Skmacy{ 439184112Skmacy int rc; 440184112Skmacy 441184112Skmacy per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; 442184112Skmacy 443184112Skmacy sprintf(resched_name[cpu], "resched%u", cpu); 444184112Skmacy rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, 445184112Skmacy cpu, 446184112Skmacy resched_name[cpu], 447184112Skmacy smp_reschedule_interrupt, 448184112Skmacy INTR_FAST); 449184112Skmacy 450184112Skmacy per_cpu(resched_irq, cpu) = rc; 451184112Skmacy 452184112Skmacy sprintf(callfunc_name[cpu], "callfunc%u", cpu); 453184112Skmacy rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, 454184112Skmacy cpu, 455184112Skmacy callfunc_name[cpu], 456184112Skmacy smp_call_function_interrupt, 457184112Skmacy INTR_FAST); 458184112Skmacy if (rc < 0) 459184112Skmacy goto fail; 460184112Skmacy per_cpu(callfunc_irq, cpu) = rc; 461184112Skmacy 462184112Skmacy if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) 463184112Skmacy goto fail; 464184112Skmacy 465184112Skmacy return 0; 466184112Skmacy 467184112Skmacy fail: 468184112Skmacy if (per_cpu(resched_irq, cpu) >= 0) 469184112Skmacy unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); 470184112Skmacy if (per_cpu(callfunc_irq, cpu) >= 0) 471184112Skmacy unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); 472184112Skmacy return rc; 473184112Skmacy} 474184112Skmacy 475182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 476182902Skmacy 477182902Skmacy/* 478182902Skmacy * AP CPU's call this to initialize themselves. 479182902Skmacy */ 480182902Skmacyvoid 481182902Skmacyinit_secondary(void) 482182902Skmacy{ 483182902Skmacy vm_offset_t addr; 484182902Skmacy int gsel_tss; 485182902Skmacy 486182902Skmacy 487182902Skmacy /* bootAP is set in start_ap() to our ID. */ 488182902Skmacy PCPU_SET(currentldt, _default_ldt); 489182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 490182902Skmacy#if 0 491182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 492182902Skmacy#endif 493182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 494182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 495182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 496182902Skmacy#if 0 497182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 498182902Skmacy 499182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 500182902Skmacy#endif 501182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 502182902Skmacy 503182902Skmacy /* 504182902Skmacy * Set to a known state: 505182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 506182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 507182902Skmacy */ 508182902Skmacy /* 509182902Skmacy * signal our startup to the BSP. 510182902Skmacy */ 511182902Skmacy mp_naps++; 512182902Skmacy 513182902Skmacy /* Spin until the BSP releases the AP's. */ 514182902Skmacy while (!aps_ready) 515182902Skmacy ia32_pause(); 516182902Skmacy 517182902Skmacy /* BSP may have changed PTD while we were waiting */ 518182902Skmacy invltlb(); 519182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 520182902Skmacy invlpg(addr); 521182902Skmacy 522182902Skmacy /* set up FPU state on the AP */ 523182902Skmacy npxinit(__INITIAL_NPXCW__); 524182902Skmacy#if 0 525182902Skmacy 526182902Skmacy /* set up SSE registers */ 527182902Skmacy enable_sse(); 528182902Skmacy#endif 529182902Skmacy#if 0 && defined(PAE) 530182902Skmacy /* Enable the PTE no-execute bit. */ 531182902Skmacy if ((amd_feature & AMDID_NX) != 0) { 532182902Skmacy uint64_t msr; 533182902Skmacy 534182902Skmacy msr = rdmsr(MSR_EFER) | EFER_NXE; 535182902Skmacy wrmsr(MSR_EFER, msr); 536182902Skmacy } 537182902Skmacy#endif 538182902Skmacy#if 0 539182902Skmacy /* A quick check from sanity claus */ 540182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 541182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 542182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 543182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 544182902Skmacy panic("cpuid mismatch! boom!!"); 545182902Skmacy } 546182902Skmacy#endif 547182902Skmacy 548182902Skmacy /* Initialize curthread. */ 549182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 550182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 551182902Skmacy 552182902Skmacy mtx_lock_spin(&ap_boot_mtx); 553182902Skmacy#if 0 554182902Skmacy 555182902Skmacy /* Init local apic for irq's */ 556182902Skmacy lapic_setup(1); 557182902Skmacy#endif 558182902Skmacy smp_cpus++; 559182902Skmacy 560182902Skmacy CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 561182902Skmacy printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 562182902Skmacy 563182902Skmacy /* Determine if we are a logical CPU. */ 564182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 565182902Skmacy logical_cpus_mask |= PCPU_GET(cpumask); 566182902Skmacy 567182902Skmacy /* Determine if we are a hyperthread. */ 568182902Skmacy if (hyperthreading_cpus > 1 && 569182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 570182902Skmacy hyperthreading_cpus_mask |= PCPU_GET(cpumask); 571182902Skmacy 572182902Skmacy /* Build our map of 'other' CPUs. */ 573182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 574182902Skmacy#if 0 575182902Skmacy if (bootverbose) 576182902Skmacy lapic_dump("AP"); 577182902Skmacy#endif 578182902Skmacy if (smp_cpus == mp_ncpus) { 579182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 580182902Skmacy atomic_store_rel_int(&smp_started, 1); 581182902Skmacy smp_active = 1; /* historic */ 582182902Skmacy } 583182902Skmacy 584184112Skmacy xen_smp_intr_init(bootAP); 585182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 586182902Skmacy 587182902Skmacy /* wait until all the AP's are up */ 588182902Skmacy while (smp_started == 0) 589182902Skmacy ia32_pause(); 590182902Skmacy 591183131Skmacy 592183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 593182902Skmacy /* enter the scheduler */ 594182902Skmacy sched_throw(NULL); 595182902Skmacy 596182902Skmacy panic("scheduler returned us to %s", __func__); 597182902Skmacy /* NOTREACHED */ 598182902Skmacy} 599182902Skmacy 600182902Skmacy/******************************************************************* 601182902Skmacy * local functions and data 602182902Skmacy */ 603182902Skmacy 604182902Skmacy/* 605182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 606182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 607182902Skmacy * can simply not tell the I/O APIC code about them in this function. 608182902Skmacy * We also do not tell it about the BSP since it tells itself about 609182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 610182902Skmacy */ 611182902Skmacystatic void 612182902Skmacyset_interrupt_apic_ids(void) 613182902Skmacy{ 614182902Skmacy u_int i, apic_id; 615182902Skmacy 616182902Skmacy for (i = 0; i < MAXCPU; i++) { 617182902Skmacy apic_id = cpu_apic_ids[i]; 618182902Skmacy if (apic_id == -1) 619182902Skmacy continue; 620182902Skmacy if (cpu_info[apic_id].cpu_bsp) 621182902Skmacy continue; 622182902Skmacy if (cpu_info[apic_id].cpu_disabled) 623182902Skmacy continue; 624182902Skmacy 625182902Skmacy /* Don't let hyperthreads service interrupts. */ 626182902Skmacy if (hyperthreading_cpus > 1 && 627182902Skmacy apic_id % hyperthreading_cpus != 0) 628182902Skmacy continue; 629182902Skmacy 630182902Skmacy intr_add_cpu(i); 631182902Skmacy } 632182902Skmacy} 633182902Skmacy 634182902Skmacy/* 635182902Skmacy * Assign logical CPU IDs to local APICs. 636182902Skmacy */ 637182902Skmacystatic void 638182902Skmacyassign_cpu_ids(void) 639182902Skmacy{ 640182902Skmacy u_int i; 641182902Skmacy 642182902Skmacy /* Check for explicitly disabled CPUs. */ 643182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 644182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 645182902Skmacy continue; 646182902Skmacy 647182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 648182902Skmacy if (resource_disabled("lapic", i)) { 649182902Skmacy cpu_info[i].cpu_disabled = 1; 650182902Skmacy continue; 651182902Skmacy } 652182902Skmacy } 653182902Skmacy 654182902Skmacy /* 655182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 656182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 657182902Skmacy * so we only have to assign IDs for APs. 658182902Skmacy */ 659182902Skmacy mp_ncpus = 1; 660182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 661182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 662182902Skmacy cpu_info[i].cpu_disabled) 663182902Skmacy continue; 664182902Skmacy 665182902Skmacy if (mp_ncpus < MAXCPU) { 666182902Skmacy cpu_apic_ids[mp_ncpus] = i; 667182902Skmacy mp_ncpus++; 668182902Skmacy } else 669182902Skmacy cpu_info[i].cpu_disabled = 1; 670182902Skmacy } 671182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 672182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 673182902Skmacy mp_ncpus)); 674182902Skmacy} 675182902Skmacy 676182902Skmacy/* 677182902Skmacy * start each AP in our list 678182902Skmacy */ 679182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 680182902Skmacy#define TMPMAP_START 1 681182902Skmacyint 682182902Skmacystart_all_aps(void) 683182902Skmacy{ 684182902Skmacy int x,apic_id, cpu; 685182902Skmacy struct pcpu *pc; 686182902Skmacy 687182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 688182902Skmacy 689182902Skmacy /* set up temporary P==V mapping for AP boot */ 690182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 691182902Skmacy 692184115Skmacy xen_smp_intr_init(0); 693182902Skmacy /* start each AP */ 694182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 695182902Skmacy apic_id = cpu_apic_ids[cpu]; 696182902Skmacy 697182902Skmacy 698182902Skmacy bootAP = cpu; 699182902Skmacy bootAPgdt = gdt + (512*cpu); 700182902Skmacy 701182902Skmacy /* Get per-cpu data */ 702182902Skmacy pc = &__pcpu[bootAP]; 703183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 704182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 705182902Skmacy pc->pc_prvspace = pc; 706182902Skmacy pc->pc_curthread = 0; 707182902Skmacy 708182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 709182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 710182902Skmacy 711182902Skmacy PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW); 712182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 713182902Skmacy for (x = 0; x < NGDT; x++) 714182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 715182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 716183345Skmacy#ifdef notyet 717183345Skmacy 718183345Skmacy if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 719183345Skmacy apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 720183345Skmacy acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 721183345Skmacy#ifdef CONFIG_ACPI 722183345Skmacy if (acpiid != 0xff) 723183345Skmacy x86_acpiid_to_apicid[acpiid] = apicid; 724183345Skmacy#endif 725183345Skmacy } 726183345Skmacy#endif 727183345Skmacy 728182902Skmacy /* attempt to start the Application Processor */ 729182902Skmacy if (!start_ap(cpu)) { 730182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 731182902Skmacy /* better panic as the AP may be running loose */ 732182902Skmacy printf("panic y/n? [y] "); 733182902Skmacy if (cngetc() != 'n') 734182902Skmacy panic("bye-bye"); 735182902Skmacy } 736182902Skmacy 737182902Skmacy all_cpus |= (1 << cpu); /* record AP in CPU map */ 738182902Skmacy } 739182902Skmacy 740182902Skmacy 741182902Skmacy /* build our map of 'other' CPUs */ 742182902Skmacy PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 743182902Skmacy 744182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 745182902Skmacy 746182902Skmacy /* number of APs actually started */ 747182902Skmacy return mp_naps; 748182902Skmacy} 749182902Skmacy 750182902Skmacyextern uint8_t *pcpu_boot_stack; 751182902Skmacyextern trap_info_t trap_table[]; 752182902Skmacy 753182902Skmacystatic void 754182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 755182902Skmacy{ 756182902Skmacy const trap_info_t *t = trap_table; 757182902Skmacy 758182902Skmacy for (t = trap_table; t->address; t++) { 759182902Skmacy trap_ctxt[t->vector].flags = t->flags; 760182902Skmacy trap_ctxt[t->vector].cs = t->cs; 761182902Skmacy trap_ctxt[t->vector].address = t->address; 762182902Skmacy } 763182902Skmacy} 764182902Skmacy 765182902Skmacyextern int nkpt; 766184112Skmacystatic void 767182902Skmacycpu_initialize_context(unsigned int cpu) 768182902Skmacy{ 769182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 770182902Skmacy * Hence we allocate statically and protect it with a lock */ 771182902Skmacy vm_page_t m[4]; 772182902Skmacy static vcpu_guest_context_t ctxt; 773182902Skmacy vm_offset_t boot_stack; 774183131Skmacy vm_offset_t newPTD; 775183131Skmacy vm_paddr_t ma[NPGPTD]; 776182902Skmacy static int color; 777182902Skmacy int i; 778182902Skmacy 779182902Skmacy /* 780183131Skmacy * Page 0,[0-3] PTD 781183131Skmacy * Page 1, [4] boot stack 782183131Skmacy * Page [5] PDPT 783182902Skmacy * 784182902Skmacy */ 785183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 786182902Skmacy m[i] = vm_page_alloc(NULL, color++, 787182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 788182902Skmacy VM_ALLOC_ZERO); 789182902Skmacy 790182902Skmacy pmap_zero_page(m[i]); 791182902Skmacy 792182902Skmacy } 793183131Skmacy boot_stack = kmem_alloc_nofault(kernel_map, 1); 794183131Skmacy newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); 795183131Skmacy ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V; 796182902Skmacy 797183131Skmacy#ifdef PAE 798183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 799183131Skmacy for (i = 0; i < NPGPTD; i++) { 800183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 801183131Skmacy ma[i] = 802183131Skmacy xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V; 803182902Skmacy } 804183131Skmacy#endif 805182902Skmacy 806182902Skmacy /* 807182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 808182902Skmacy * kernel mappings 809182902Skmacy */ 810183131Skmacy pmap_qenter(newPTD, m, 4); 811183131Skmacy 812183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 813183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 814182902Skmacy nkpt*sizeof(vm_paddr_t)); 815183131Skmacy 816183131Skmacy pmap_qremove(newPTD, 4); 817183131Skmacy kmem_free(kernel_map, newPTD, 4); 818182902Skmacy /* 819182902Skmacy * map actual idle stack to boot_stack 820182902Skmacy */ 821183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 822182902Skmacy 823182902Skmacy 824183131Skmacy xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]))); 825182902Skmacy vm_page_lock_queues(); 826182902Skmacy for (i = 0; i < 4; i++) { 827183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 828183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 829183131Skmacy 830182902Skmacy xen_queue_pt_update((vm_paddr_t) 831183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 832182902Skmacy ma[i]); 833182902Skmacy } 834182902Skmacy PT_UPDATES_FLUSH(); 835182902Skmacy vm_page_unlock_queues(); 836182902Skmacy 837182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 838182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 839182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 840182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 841182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 842182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 843182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 844182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 845182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 846182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 847182902Skmacy 848182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 849182902Skmacy 850182902Skmacy smp_trap_init(ctxt.trap_ctxt); 851182902Skmacy 852182902Skmacy ctxt.ldt_ents = 0; 853182902Skmacy ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 854182902Skmacy ctxt.gdt_ents = 512; 855182902Skmacy 856182902Skmacy#ifdef __i386__ 857182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 858182902Skmacy 859182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 860182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 861182902Skmacy 862182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 863182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 864182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 865182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 866182902Skmacy 867183131Skmacy ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 868182902Skmacy#else /* __x86_64__ */ 869182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 870182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 871182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 872182902Skmacy 873182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 874182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 875182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 876182902Skmacy 877182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 878182902Skmacy 879182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 880182902Skmacy#endif 881182902Skmacy 882182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 883182902Skmacy ctxt.gdt_frames[0], 884182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 885182902Skmacy 886182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 887182902Skmacy DELAY(3000); 888182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 889182902Skmacy} 890182902Skmacy 891182902Skmacy/* 892182902Skmacy * This function starts the AP (application processor) identified 893182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 894182902Skmacy * to accomplish this. This is necessary because of the nuances 895182902Skmacy * of the different hardware we might encounter. It isn't pretty, 896182902Skmacy * but it seems to work. 897182902Skmacy */ 898183131Skmacy 899183131Skmacyint cpus; 900182902Skmacystatic int 901182902Skmacystart_ap(int apic_id) 902182902Skmacy{ 903182902Skmacy int ms; 904182902Skmacy 905182902Skmacy /* used as a watchpoint to signal AP startup */ 906182902Skmacy cpus = mp_naps; 907182902Skmacy 908182902Skmacy cpu_initialize_context(apic_id); 909182902Skmacy 910182902Skmacy /* Wait up to 5 seconds for it to start. */ 911182902Skmacy for (ms = 0; ms < 5000; ms++) { 912182902Skmacy if (mp_naps > cpus) 913182902Skmacy return 1; /* return SUCCESS */ 914182902Skmacy DELAY(1000); 915182902Skmacy } 916182902Skmacy return 0; /* return FAILURE */ 917182902Skmacy} 918182902Skmacy 919182902Skmacy/* 920182902Skmacy * Flush the TLB on all other CPU's 921182902Skmacy */ 922182902Skmacystatic void 923182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 924182902Skmacy{ 925182902Skmacy u_int ncpu; 926182902Skmacy 927182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 928182902Skmacy if (ncpu < 1) 929182902Skmacy return; /* no other cpus */ 930182902Skmacy if (!(read_eflags() & PSL_I)) 931182902Skmacy panic("%s: interrupts disabled", __func__); 932182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 933184112Skmacy call_data->func = ipi_vectors[vector]; 934184112Skmacy call_data->arg1 = addr1; 935184112Skmacy call_data->arg2 = addr2; 936182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 937182902Skmacy ipi_all_but_self(vector); 938182902Skmacy while (smp_tlb_wait < ncpu) 939182902Skmacy ia32_pause(); 940182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 941182902Skmacy} 942182902Skmacy 943182902Skmacystatic void 944182902Skmacysmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 945182902Skmacy{ 946182902Skmacy int ncpu, othercpus; 947182902Skmacy 948182902Skmacy othercpus = mp_ncpus - 1; 949182902Skmacy if (mask == (u_int)-1) { 950182902Skmacy ncpu = othercpus; 951182902Skmacy if (ncpu < 1) 952182902Skmacy return; 953182902Skmacy } else { 954182902Skmacy mask &= ~PCPU_GET(cpumask); 955182902Skmacy if (mask == 0) 956182902Skmacy return; 957182902Skmacy ncpu = bitcount32(mask); 958182902Skmacy if (ncpu > othercpus) { 959182902Skmacy /* XXX this should be a panic offence */ 960182902Skmacy printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 961182902Skmacy ncpu, othercpus); 962182902Skmacy ncpu = othercpus; 963182902Skmacy } 964182902Skmacy /* XXX should be a panic, implied by mask == 0 above */ 965182902Skmacy if (ncpu < 1) 966182902Skmacy return; 967182902Skmacy } 968182902Skmacy if (!(read_eflags() & PSL_I)) 969182902Skmacy panic("%s: interrupts disabled", __func__); 970182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 971182902Skmacy smp_tlb_addr1 = addr1; 972182902Skmacy smp_tlb_addr2 = addr2; 973182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 974182902Skmacy if (mask == (u_int)-1) 975182902Skmacy ipi_all_but_self(vector); 976182902Skmacy else 977182902Skmacy ipi_selected(mask, vector); 978182902Skmacy while (smp_tlb_wait < ncpu) 979182902Skmacy ia32_pause(); 980182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 981182902Skmacy} 982182902Skmacy 983182902Skmacyvoid 984182902Skmacysmp_cache_flush(void) 985182902Skmacy{ 986182902Skmacy 987182902Skmacy if (smp_started) 988182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 989182902Skmacy} 990182902Skmacy 991182902Skmacyvoid 992182902Skmacysmp_invltlb(void) 993182902Skmacy{ 994182902Skmacy 995182902Skmacy if (smp_started) { 996182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 997182902Skmacy } 998182902Skmacy} 999182902Skmacy 1000182902Skmacyvoid 1001182902Skmacysmp_invlpg(vm_offset_t addr) 1002182902Skmacy{ 1003182902Skmacy 1004182902Skmacy if (smp_started) { 1005182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1006182902Skmacy } 1007182902Skmacy} 1008182902Skmacy 1009182902Skmacyvoid 1010182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1011182902Skmacy{ 1012182902Skmacy 1013182902Skmacy if (smp_started) { 1014182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1015182902Skmacy } 1016182902Skmacy} 1017182902Skmacy 1018182902Skmacyvoid 1019182902Skmacysmp_masked_invltlb(u_int mask) 1020182902Skmacy{ 1021182902Skmacy 1022182902Skmacy if (smp_started) { 1023182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1024182902Skmacy } 1025182902Skmacy} 1026182902Skmacy 1027182902Skmacyvoid 1028182902Skmacysmp_masked_invlpg(u_int mask, vm_offset_t addr) 1029182902Skmacy{ 1030182902Skmacy 1031182902Skmacy if (smp_started) { 1032182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1033182902Skmacy } 1034182902Skmacy} 1035182902Skmacy 1036182902Skmacyvoid 1037182902Skmacysmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) 1038182902Skmacy{ 1039182902Skmacy 1040182902Skmacy if (smp_started) { 1041182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1042182902Skmacy } 1043182902Skmacy} 1044182902Skmacy 1045182902Skmacy/* 1046182902Skmacy * send an IPI to a set of cpus. 1047182902Skmacy */ 1048182902Skmacyvoid 1049182902Skmacyipi_selected(u_int32_t cpus, u_int ipi) 1050182902Skmacy{ 1051182902Skmacy int cpu; 1052182902Skmacy u_int bitmap = 0; 1053182902Skmacy u_int old_pending; 1054182902Skmacy u_int new_pending; 1055182902Skmacy 1056182902Skmacy if (IPI_IS_BITMAPED(ipi)) { 1057182902Skmacy bitmap = 1 << ipi; 1058182902Skmacy ipi = IPI_BITMAP_VECTOR; 1059182902Skmacy } 1060182902Skmacy 1061182902Skmacy#ifdef STOP_NMI 1062182902Skmacy if (ipi == IPI_STOP && stop_cpus_with_nmi) { 1063182902Skmacy ipi_nmi_selected(cpus); 1064182902Skmacy return; 1065182902Skmacy } 1066182902Skmacy#endif 1067182902Skmacy CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 1068182902Skmacy while ((cpu = ffs(cpus)) != 0) { 1069182902Skmacy cpu--; 1070182902Skmacy cpus &= ~(1 << cpu); 1071182902Skmacy 1072182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 1073182902Skmacy ("IPI to non-existent CPU %d", cpu)); 1074182902Skmacy 1075182902Skmacy if (bitmap) { 1076182902Skmacy do { 1077182902Skmacy old_pending = cpu_ipi_pending[cpu]; 1078182902Skmacy new_pending = old_pending | bitmap; 1079182902Skmacy } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 1080182902Skmacy 1081182902Skmacy if (old_pending) 1082182902Skmacy continue; 1083182902Skmacy } 1084184115Skmacy call_data->func = ipi_vectors[ipi]; 1085183345Skmacy ipi_pcpu(cpu, ipi); 1086182902Skmacy } 1087182902Skmacy} 1088182902Skmacy 1089182902Skmacy/* 1090182902Skmacy * send an IPI to all CPUs EXCEPT myself 1091182902Skmacy */ 1092182902Skmacyvoid 1093182902Skmacyipi_all_but_self(u_int ipi) 1094182902Skmacy{ 1095182902Skmacy 1096182902Skmacy if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) { 1097182902Skmacy ipi_selected(PCPU_GET(other_cpus), ipi); 1098182902Skmacy return; 1099182902Skmacy } 1100182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1101183345Skmacy ipi_selected(((int)-1 & ~(1 << curcpu)), ipi); 1102182902Skmacy} 1103182902Skmacy 1104182902Skmacy#ifdef STOP_NMI 1105182902Skmacy/* 1106182902Skmacy * send NMI IPI to selected CPUs 1107182902Skmacy */ 1108182902Skmacy 1109182902Skmacy#define BEFORE_SPIN 1000000 1110182902Skmacy 1111182902Skmacyvoid 1112182902Skmacyipi_nmi_selected(u_int32_t cpus) 1113182902Skmacy{ 1114182902Skmacy int cpu; 1115182902Skmacy register_t icrlo; 1116182902Skmacy 1117182902Skmacy icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT 1118182902Skmacy | APIC_TRIGMOD_EDGE; 1119182902Skmacy 1120182902Skmacy CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus); 1121182902Skmacy 1122182902Skmacy atomic_set_int(&ipi_nmi_pending, cpus); 1123182902Skmacy 1124182902Skmacy while ((cpu = ffs(cpus)) != 0) { 1125182902Skmacy cpu--; 1126182902Skmacy cpus &= ~(1 << cpu); 1127182902Skmacy 1128182902Skmacy KASSERT(cpu_apic_ids[cpu] != -1, 1129182902Skmacy ("IPI NMI to non-existent CPU %d", cpu)); 1130182902Skmacy 1131182902Skmacy /* Wait for an earlier IPI to finish. */ 1132182902Skmacy if (!lapic_ipi_wait(BEFORE_SPIN)) 1133182902Skmacy panic("ipi_nmi_selected: previous IPI has not cleared"); 1134182902Skmacy 1135182902Skmacy lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]); 1136182902Skmacy } 1137182902Skmacy} 1138182902Skmacy 1139182902Skmacyint 1140182902Skmacyipi_nmi_handler(void) 1141182902Skmacy{ 1142182902Skmacy int cpumask = PCPU_GET(cpumask); 1143182902Skmacy 1144182902Skmacy if (!(ipi_nmi_pending & cpumask)) 1145182902Skmacy return 1; 1146182902Skmacy 1147182902Skmacy atomic_clear_int(&ipi_nmi_pending, cpumask); 1148182902Skmacy cpustop_handler(); 1149182902Skmacy return 0; 1150182902Skmacy} 1151182902Skmacy 1152182902Skmacy#endif /* STOP_NMI */ 1153182902Skmacy 1154182902Skmacy/* 1155182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1156182902Skmacy * are resumed. 1157182902Skmacy */ 1158182902Skmacyvoid 1159182902Skmacycpustop_handler(void) 1160182902Skmacy{ 1161182902Skmacy int cpu = PCPU_GET(cpuid); 1162182902Skmacy int cpumask = PCPU_GET(cpumask); 1163182902Skmacy 1164182902Skmacy savectx(&stoppcbs[cpu]); 1165182902Skmacy 1166182902Skmacy /* Indicate that we are stopped */ 1167182902Skmacy atomic_set_int(&stopped_cpus, cpumask); 1168182902Skmacy 1169182902Skmacy /* Wait for restart */ 1170182902Skmacy while (!(started_cpus & cpumask)) 1171182902Skmacy ia32_pause(); 1172182902Skmacy 1173182902Skmacy atomic_clear_int(&started_cpus, cpumask); 1174182902Skmacy atomic_clear_int(&stopped_cpus, cpumask); 1175182902Skmacy 1176182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1177182902Skmacy cpustop_restartfunc(); 1178182902Skmacy cpustop_restartfunc = NULL; 1179182902Skmacy } 1180182902Skmacy} 1181182902Skmacy 1182182902Skmacy/* 1183182902Skmacy * This is called once the rest of the system is up and running and we're 1184182902Skmacy * ready to let the AP's out of the pen. 1185182902Skmacy */ 1186182902Skmacystatic void 1187182902Skmacyrelease_aps(void *dummy __unused) 1188182902Skmacy{ 1189182902Skmacy 1190182902Skmacy if (mp_ncpus == 1) 1191182902Skmacy return; 1192182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1193182902Skmacy while (smp_started == 0) 1194182902Skmacy ia32_pause(); 1195182902Skmacy} 1196182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1197182902Skmacy 1198