1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD$"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34204972Sjhb#include "opt_pmap.h" 35182902Skmacy#include "opt_sched.h" 36182902Skmacy#include "opt_smp.h" 37182902Skmacy 38182902Skmacy#if !defined(lint) 39182902Skmacy#if !defined(SMP) 40182902Skmacy#error How did you get here? 41182902Skmacy#endif 42182902Skmacy 43182902Skmacy#ifndef DEV_APIC 44182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 45182902Skmacy#endif 46182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 47182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 48182902Skmacy#endif 49182902Skmacy#endif /* not lint */ 50182902Skmacy 51182902Skmacy#include <sys/param.h> 52182902Skmacy#include <sys/systm.h> 53182902Skmacy#include <sys/bus.h> 54182902Skmacy#include <sys/cons.h> /* cngetc() */ 55222813Sattilio#include <sys/cpuset.h> 56182902Skmacy#ifdef GPROF 57182902Skmacy#include <sys/gmon.h> 58182902Skmacy#endif 59182902Skmacy#include <sys/kernel.h> 60182902Skmacy#include <sys/ktr.h> 61182902Skmacy#include <sys/lock.h> 62182902Skmacy#include <sys/malloc.h> 63182902Skmacy#include <sys/memrange.h> 64182902Skmacy#include <sys/mutex.h> 65182902Skmacy#include <sys/pcpu.h> 66182902Skmacy#include <sys/proc.h> 67241498Salc#include <sys/rwlock.h> 68182902Skmacy#include <sys/sched.h> 69182902Skmacy#include <sys/smp.h> 70182902Skmacy#include <sys/sysctl.h> 71182902Skmacy 72182902Skmacy#include <vm/vm.h> 73182902Skmacy#include <vm/vm_param.h> 74182902Skmacy#include <vm/pmap.h> 75182902Skmacy#include <vm/vm_kern.h> 76182902Skmacy#include <vm/vm_extern.h> 77182902Skmacy#include <vm/vm_page.h> 78182902Skmacy 79214631Sjhb#include <x86/apicreg.h> 80182902Skmacy#include <machine/md_var.h> 81182902Skmacy#include <machine/mp_watchdog.h> 82182902Skmacy#include <machine/pcb.h> 83182902Skmacy#include <machine/psl.h> 84182902Skmacy#include <machine/smp.h> 85182902Skmacy#include <machine/specialreg.h> 86182902Skmacy#include <machine/pcpu.h> 87182902Skmacy 88255040Sgibbs#include <xen/xen-os.h> 89186557Skmacy#include <xen/evtchn.h> 90186557Skmacy#include <xen/xen_intr.h> 91186557Skmacy#include <xen/hypervisor.h> 92182902Skmacy#include <xen/interface/vcpu.h> 93182902Skmacy 94255158Sgibbs/*---------------------------- Extern Declarations ---------------------------*/ 95255158Sgibbsextern struct pcpu __pcpu[]; 96182902Skmacy 97255158Sgibbsextern void Xhypervisor_callback(void); 98255158Sgibbsextern void failsafe_callback(void); 99255158Sgibbsextern void pmap_lazyfix_action(void); 100255158Sgibbs 101255158Sgibbs/*--------------------------- Forward Declarations ---------------------------*/ 102255331Sgibbsstatic driver_filter_t smp_reschedule_interrupt; 103255331Sgibbsstatic driver_filter_t smp_call_function_interrupt; 104255331Sgibbsstatic void assign_cpu_ids(void); 105255331Sgibbsstatic void set_interrupt_apic_ids(void); 106255331Sgibbsstatic int start_all_aps(void); 107255331Sgibbsstatic int start_ap(int apic_id); 108255331Sgibbsstatic void release_aps(void *dummy); 109255158Sgibbs 110255331Sgibbs/*---------------------------------- Macros ----------------------------------*/ 111255331Sgibbs#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) 112255331Sgibbs 113255158Sgibbs/*-------------------------------- Local Types -------------------------------*/ 114255158Sgibbstypedef void call_data_func_t(uintptr_t , uintptr_t); 115255158Sgibbs 116255158Sgibbsstruct cpu_info { 117255158Sgibbs int cpu_present:1; 118255158Sgibbs int cpu_bsp:1; 119255158Sgibbs int cpu_disabled:1; 120255158Sgibbs}; 121255158Sgibbs 122255331Sgibbsstruct xen_ipi_handler 123255331Sgibbs{ 124255331Sgibbs driver_filter_t *filter; 125255331Sgibbs const char *description; 126255331Sgibbs}; 127255331Sgibbs 128255331Sgibbsenum { 129255331Sgibbs RESCHEDULE_VECTOR, 130255331Sgibbs CALL_FUNCTION_VECTOR, 131255331Sgibbs}; 132255331Sgibbs 133255158Sgibbs/*-------------------------------- Global Data -------------------------------*/ 134255158Sgibbsstatic u_int hyperthreading_cpus; 135255158Sgibbsstatic cpuset_t hyperthreading_cpus_mask; 136255158Sgibbs 137182902Skmacyint mp_naps; /* # of Applications processors */ 138182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 139182902Skmacy 140182902Skmacystatic int bootAP; 141182902Skmacystatic union descriptor *bootAPgdt; 142182902Skmacy 143182902Skmacy/* Free these after use */ 144182902Skmacyvoid *bootstacks[MAXCPU]; 145182902Skmacy 146182902Skmacystruct pcb stoppcbs[MAXCPU]; 147182902Skmacy 148182902Skmacy/* Variables needed for SMP tlb shootdown. */ 149182902Skmacyvm_offset_t smp_tlb_addr1; 150182902Skmacyvm_offset_t smp_tlb_addr2; 151182902Skmacyvolatile int smp_tlb_wait; 152182902Skmacy 153182902Skmacystatic u_int logical_cpus; 154222813Sattiliostatic volatile cpuset_t ipi_nmi_pending; 155182902Skmacy 156182902Skmacy/* used to hold the AP's until we are ready to release them */ 157182902Skmacystatic struct mtx ap_boot_mtx; 158182902Skmacy 159182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 160182902Skmacystatic volatile int aps_ready = 0; 161182902Skmacy 162182902Skmacy/* 163182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 164182902Skmacy * the APs. 165182902Skmacy */ 166255158Sgibbsstatic struct cpu_info cpu_info[MAX_APIC_ID + 1]; 167182902Skmacyint cpu_apic_ids[MAXCPU]; 168187966Sbzint apic_cpuids[MAX_APIC_ID + 1]; 169182902Skmacy 170182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 171182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 172182902Skmacy 173191759Skmacystatic int cpu_logical; 174191759Skmacystatic int cpu_cores; 175191759Skmacy 176255331Sgibbsstatic const struct xen_ipi_handler xen_ipis[] = 177255331Sgibbs{ 178255331Sgibbs [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" }, 179255331Sgibbs [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" } 180255331Sgibbs}; 181255331Sgibbs 182255158Sgibbs/*------------------------------- Per-CPU Data -------------------------------*/ 183255331SgibbsDPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); 184255040SgibbsDPCPU_DEFINE(struct vcpu_info *, vcpu_info); 185255040Sgibbs 186255158Sgibbs/*------------------------------ Implementation ------------------------------*/ 187182902Skmacystruct cpu_group * 188182902Skmacycpu_topo(void) 189182902Skmacy{ 190182902Skmacy if (cpu_cores == 0) 191182902Skmacy cpu_cores = 1; 192182902Skmacy if (cpu_logical == 0) 193182902Skmacy cpu_logical = 1; 194182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 195182902Skmacy printf("WARNING: Non-uniform processors.\n"); 196182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 197182902Skmacy return (smp_topo_none()); 198182902Skmacy } 199182902Skmacy /* 200182902Skmacy * No multi-core or hyper-threaded. 201182902Skmacy */ 202182902Skmacy if (cpu_logical * cpu_cores == 1) 203182902Skmacy return (smp_topo_none()); 204182902Skmacy /* 205182902Skmacy * Only HTT no multi-core. 206182902Skmacy */ 207182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 208182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 209182902Skmacy /* 210182902Skmacy * Only multi-core no HTT. 211182902Skmacy */ 212182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 213182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 214182902Skmacy /* 215182902Skmacy * Both HTT and multi-core. 216182902Skmacy */ 217182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 218182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 219182902Skmacy} 220182902Skmacy 221182902Skmacy/* 222182902Skmacy * Calculate usable address in base memory for AP trampoline code. 223182902Skmacy */ 224182902Skmacyu_int 225182902Skmacymp_bootaddress(u_int basemem) 226182902Skmacy{ 227182902Skmacy 228182902Skmacy return (basemem); 229182902Skmacy} 230182902Skmacy 231182902Skmacyvoid 232182902Skmacycpu_add(u_int apic_id, char boot_cpu) 233182902Skmacy{ 234182902Skmacy 235182902Skmacy if (apic_id > MAX_APIC_ID) { 236182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 237182902Skmacy return; 238182902Skmacy } 239182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 240182902Skmacy apic_id)); 241182902Skmacy cpu_info[apic_id].cpu_present = 1; 242182902Skmacy if (boot_cpu) { 243182902Skmacy KASSERT(boot_cpu_id == -1, 244182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 245182902Skmacy boot_cpu_id)); 246182902Skmacy boot_cpu_id = apic_id; 247182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 248182902Skmacy } 249182902Skmacy if (mp_ncpus < MAXCPU) 250182902Skmacy mp_ncpus++; 251182902Skmacy if (bootverbose) 252182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 253182902Skmacy "AP"); 254182902Skmacy} 255182902Skmacy 256182902Skmacyvoid 257182902Skmacycpu_mp_setmaxid(void) 258182902Skmacy{ 259182902Skmacy 260182902Skmacy mp_maxid = MAXCPU - 1; 261182902Skmacy} 262182902Skmacy 263182902Skmacyint 264182902Skmacycpu_mp_probe(void) 265182902Skmacy{ 266182902Skmacy 267182902Skmacy /* 268182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 269182902Skmacy * correctly. 270182902Skmacy */ 271222813Sattilio CPU_SETOF(0, &all_cpus); 272182902Skmacy if (mp_ncpus == 0) { 273182902Skmacy /* 274182902Skmacy * No CPUs were found, so this must be a UP system. Setup 275182902Skmacy * the variables to represent a system with a single CPU 276182902Skmacy * with an id of 0. 277182902Skmacy */ 278182902Skmacy mp_ncpus = 1; 279182902Skmacy return (0); 280182902Skmacy } 281182902Skmacy 282182902Skmacy /* At least one CPU was found. */ 283182902Skmacy if (mp_ncpus == 1) { 284182902Skmacy /* 285182902Skmacy * One CPU was found, so this must be a UP system with 286182902Skmacy * an I/O APIC. 287182902Skmacy */ 288182902Skmacy return (0); 289182902Skmacy } 290182902Skmacy 291182902Skmacy /* At least two CPUs were found. */ 292182902Skmacy return (1); 293182902Skmacy} 294182902Skmacy 295182902Skmacy/* 296182902Skmacy * Initialize the IPI handlers and start up the AP's. 297182902Skmacy */ 298182902Skmacyvoid 299182902Skmacycpu_mp_start(void) 300182902Skmacy{ 301182902Skmacy int i; 302182902Skmacy 303182902Skmacy /* Initialize the logical ID to APIC ID table. */ 304182902Skmacy for (i = 0; i < MAXCPU; i++) { 305182902Skmacy cpu_apic_ids[i] = -1; 306182902Skmacy cpu_ipi_pending[i] = 0; 307182902Skmacy } 308182902Skmacy 309182902Skmacy /* Set boot_cpu_id if needed. */ 310182902Skmacy if (boot_cpu_id == -1) { 311182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 312182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 313182902Skmacy } else 314182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 315182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 316182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 317187966Sbz apic_cpuids[boot_cpu_id] = 0; 318182902Skmacy 319182902Skmacy assign_cpu_ids(); 320182902Skmacy 321182902Skmacy /* Start each Application Processor */ 322182902Skmacy start_all_aps(); 323182902Skmacy 324182902Skmacy /* Setup the initial logical CPUs info. */ 325222813Sattilio logical_cpus = 0; 326222813Sattilio CPU_ZERO(&logical_cpus_mask); 327182902Skmacy if (cpu_feature & CPUID_HTT) 328182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 329182902Skmacy 330182902Skmacy set_interrupt_apic_ids(); 331182902Skmacy} 332182902Skmacy 333182902Skmacy 334184112Skmacystatic void 335184112Skmacyiv_rendezvous(uintptr_t a, uintptr_t b) 336184112Skmacy{ 337184115Skmacy smp_rendezvous_action(); 338184112Skmacy} 339184112Skmacy 340184112Skmacystatic void 341184112Skmacyiv_invltlb(uintptr_t a, uintptr_t b) 342184112Skmacy{ 343184115Skmacy xen_tlb_flush(); 344184112Skmacy} 345184112Skmacy 346184112Skmacystatic void 347184112Skmacyiv_invlpg(uintptr_t a, uintptr_t b) 348184112Skmacy{ 349184115Skmacy xen_invlpg(a); 350184112Skmacy} 351184112Skmacy 352184112Skmacystatic void 353184112Skmacyiv_invlrng(uintptr_t a, uintptr_t b) 354184112Skmacy{ 355184115Skmacy vm_offset_t start = (vm_offset_t)a; 356184115Skmacy vm_offset_t end = (vm_offset_t)b; 357184115Skmacy 358184115Skmacy while (start < end) { 359184115Skmacy xen_invlpg(start); 360184115Skmacy start += PAGE_SIZE; 361184115Skmacy } 362184112Skmacy} 363184112Skmacy 364184115Skmacy 365184112Skmacystatic void 366184112Skmacyiv_invlcache(uintptr_t a, uintptr_t b) 367184112Skmacy{ 368184115Skmacy 369184115Skmacy wbinvd(); 370184198Skmacy atomic_add_int(&smp_tlb_wait, 1); 371184112Skmacy} 372184112Skmacy 373184112Skmacystatic void 374184112Skmacyiv_lazypmap(uintptr_t a, uintptr_t b) 375184112Skmacy{ 376184115Skmacy pmap_lazyfix_action(); 377184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 378184112Skmacy} 379184112Skmacy 380193154Sadrian/* 381193154Sadrian * These start from "IPI offset" APIC_IPI_INTS 382193154Sadrian */ 383255331Sgibbsstatic call_data_func_t *ipi_vectors[6] = 384184112Skmacy{ 385255158Sgibbs iv_rendezvous, 386255158Sgibbs iv_invltlb, 387255158Sgibbs iv_invlpg, 388255158Sgibbs iv_invlrng, 389255158Sgibbs iv_invlcache, 390255158Sgibbs iv_lazypmap, 391184224Skmacy}; 392184224Skmacy 393184224Skmacy/* 394184224Skmacy * Reschedule call back. Nothing to do, 395184224Skmacy * all the work is done automatically when 396184224Skmacy * we return from the interrupt. 397184224Skmacy */ 398184224Skmacystatic int 399184224Skmacysmp_reschedule_interrupt(void *unused) 400184224Skmacy{ 401184198Skmacy int cpu = PCPU_GET(cpuid); 402184198Skmacy u_int ipi_bitmap; 403184198Skmacy 404184198Skmacy ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 405184198Skmacy 406184198Skmacy if (ipi_bitmap & (1 << IPI_PREEMPT)) { 407184198Skmacy#ifdef COUNT_IPIS 408184198Skmacy (*ipi_preempt_counts[cpu])++; 409184198Skmacy#endif 410184198Skmacy sched_preempt(curthread); 411184198Skmacy } 412184198Skmacy 413184198Skmacy if (ipi_bitmap & (1 << IPI_AST)) { 414184198Skmacy#ifdef COUNT_IPIS 415184198Skmacy (*ipi_ast_counts[cpu])++; 416184198Skmacy#endif 417184198Skmacy /* Nothing to do for AST */ 418184198Skmacy } 419184198Skmacy return (FILTER_HANDLED); 420184112Skmacy} 421184112Skmacy 422184112Skmacystruct _call_data { 423184224Skmacy uint16_t func_id; 424184224Skmacy uint16_t wait; 425184112Skmacy uintptr_t arg1; 426184112Skmacy uintptr_t arg2; 427184112Skmacy atomic_t started; 428184112Skmacy atomic_t finished; 429184112Skmacy}; 430184112Skmacy 431184112Skmacystatic struct _call_data *call_data; 432184112Skmacy 433184198Skmacystatic int 434184112Skmacysmp_call_function_interrupt(void *unused) 435184112Skmacy{ 436184224Skmacy call_data_func_t *func; 437184112Skmacy uintptr_t arg1 = call_data->arg1; 438184112Skmacy uintptr_t arg2 = call_data->arg2; 439184112Skmacy int wait = call_data->wait; 440184224Skmacy atomic_t *started = &call_data->started; 441184224Skmacy atomic_t *finished = &call_data->finished; 442184112Skmacy 443193154Sadrian /* We only handle function IPIs, not bitmap IPIs */ 444255158Sgibbs if (call_data->func_id < APIC_IPI_INTS || 445255158Sgibbs call_data->func_id > IPI_BITMAP_VECTOR) 446184224Skmacy panic("invalid function id %u", call_data->func_id); 447184224Skmacy 448255331Sgibbs func = ipi_vectors[IPI_TO_IDX(call_data->func_id)]; 449184112Skmacy /* 450184112Skmacy * Notify initiating CPU that I've grabbed the data and am 451184112Skmacy * about to execute the function 452184112Skmacy */ 453184112Skmacy mb(); 454184224Skmacy atomic_inc(started); 455184112Skmacy /* 456184112Skmacy * At this point the info structure may be out of scope unless wait==1 457184112Skmacy */ 458184112Skmacy (*func)(arg1, arg2); 459184112Skmacy 460184112Skmacy if (wait) { 461184112Skmacy mb(); 462184224Skmacy atomic_inc(finished); 463184112Skmacy } 464184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 465184198Skmacy return (FILTER_HANDLED); 466184112Skmacy} 467184112Skmacy 468184112Skmacy/* 469182902Skmacy * Print various information about the SMP system hardware and setup. 470182902Skmacy */ 471182902Skmacyvoid 472182902Skmacycpu_mp_announce(void) 473182902Skmacy{ 474182902Skmacy int i, x; 475182902Skmacy 476182902Skmacy /* List CPUs */ 477182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 478182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 479182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 480182902Skmacy continue; 481182902Skmacy if (cpu_info[x].cpu_disabled) 482182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 483182902Skmacy else { 484182902Skmacy KASSERT(i < mp_ncpus, 485182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 486182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 487182902Skmacy } 488182902Skmacy } 489182902Skmacy} 490182902Skmacy 491184112Skmacystatic int 492255040Sgibbsxen_smp_cpu_init(unsigned int cpu) 493184112Skmacy{ 494255331Sgibbs xen_intr_handle_t *ipi_handle; 495255331Sgibbs const struct xen_ipi_handler *ipi; 496255331Sgibbs int idx, rc; 497184112Skmacy 498255331Sgibbs ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); 499255331Sgibbs for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { 500184112Skmacy 501255331Sgibbs /* 502255331Sgibbs * The PCPU variable pc_device is not initialized on i386 PV, 503255331Sgibbs * so we have to use the root_bus device in order to setup 504255331Sgibbs * the IPIs. 505255331Sgibbs */ 506255331Sgibbs rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu, 507255331Sgibbs ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]); 508255331Sgibbs if (rc != 0) { 509255331Sgibbs printf("Unable to allocate a XEN IPI port. " 510255331Sgibbs "Error %d\n", rc); 511255331Sgibbs break; 512255331Sgibbs } 513255331Sgibbs xen_intr_describe(ipi_handle[idx], "%s", ipi->description); 514255331Sgibbs } 515184112Skmacy 516255331Sgibbs for (;idx < nitems(xen_ipis); idx++) 517255331Sgibbs ipi_handle[idx] = NULL; 518255040Sgibbs 519255331Sgibbs if (rc == 0) 520255331Sgibbs return (0); 521184112Skmacy 522255331Sgibbs /* Either all are successfully mapped, or none at all. */ 523255331Sgibbs for (idx = 0; idx < nitems(xen_ipis); idx++) { 524255331Sgibbs if (ipi_handle[idx] == NULL) 525255331Sgibbs continue; 526184198Skmacy 527255331Sgibbs xen_intr_unbind(ipi_handle[idx]); 528255331Sgibbs ipi_handle[idx] = NULL; 529255331Sgibbs } 530184112Skmacy 531255158Sgibbs return (rc); 532184112Skmacy} 533184112Skmacy 534184198Skmacystatic void 535184198Skmacyxen_smp_intr_init_cpus(void *unused) 536184198Skmacy{ 537184198Skmacy int i; 538184198Skmacy 539184198Skmacy for (i = 0; i < mp_ncpus; i++) 540255040Sgibbs xen_smp_cpu_init(i); 541184198Skmacy} 542184198Skmacy 543255040Sgibbsstatic void 544255040Sgibbsxen_smp_intr_setup_cpus(void *unused) 545255040Sgibbs{ 546255040Sgibbs int i; 547255040Sgibbs 548255040Sgibbs for (i = 0; i < mp_ncpus; i++) 549255040Sgibbs DPCPU_ID_SET(i, vcpu_info, 550255040Sgibbs &HYPERVISOR_shared_info->vcpu_info[i]); 551255040Sgibbs} 552255040Sgibbs 553182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 554182902Skmacy 555182902Skmacy/* 556182902Skmacy * AP CPU's call this to initialize themselves. 557182902Skmacy */ 558182902Skmacyvoid 559182902Skmacyinit_secondary(void) 560182902Skmacy{ 561182902Skmacy vm_offset_t addr; 562223758Sattilio u_int cpuid; 563182902Skmacy int gsel_tss; 564182902Skmacy 565182902Skmacy 566182902Skmacy /* bootAP is set in start_ap() to our ID. */ 567182902Skmacy PCPU_SET(currentldt, _default_ldt); 568182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 569182902Skmacy#if 0 570182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 571182902Skmacy#endif 572182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 573182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 574182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 575182902Skmacy#if 0 576182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 577182902Skmacy 578182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 579182902Skmacy#endif 580182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 581182902Skmacy 582182902Skmacy /* 583182902Skmacy * Set to a known state: 584182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 585182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 586182902Skmacy */ 587182902Skmacy /* 588182902Skmacy * signal our startup to the BSP. 589182902Skmacy */ 590182902Skmacy mp_naps++; 591182902Skmacy 592182902Skmacy /* Spin until the BSP releases the AP's. */ 593182902Skmacy while (!aps_ready) 594182902Skmacy ia32_pause(); 595182902Skmacy 596182902Skmacy /* BSP may have changed PTD while we were waiting */ 597182902Skmacy invltlb(); 598182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 599182902Skmacy invlpg(addr); 600182902Skmacy 601182902Skmacy#if 0 602276076Sjhb /* set up SSE/NX */ 603276076Sjhb initializecpu(); 604182902Skmacy#endif 605182902Skmacy 606276076Sjhb /* set up FPU state on the AP */ 607278423Smarius npxinit(false); 608182902Skmacy#if 0 609182902Skmacy /* A quick check from sanity claus */ 610182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 611182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 612182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 613182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 614182902Skmacy panic("cpuid mismatch! boom!!"); 615182902Skmacy } 616182902Skmacy#endif 617182902Skmacy 618182902Skmacy /* Initialize curthread. */ 619182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 620182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 621182902Skmacy 622182902Skmacy mtx_lock_spin(&ap_boot_mtx); 623182902Skmacy#if 0 624182902Skmacy 625182902Skmacy /* Init local apic for irq's */ 626182902Skmacy lapic_setup(1); 627182902Skmacy#endif 628182902Skmacy smp_cpus++; 629182902Skmacy 630223758Sattilio cpuid = PCPU_GET(cpuid); 631223758Sattilio CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); 632223758Sattilio printf("SMP: AP CPU #%d Launched!\n", cpuid); 633182902Skmacy 634182902Skmacy /* Determine if we are a logical CPU. */ 635182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 636223758Sattilio CPU_SET(cpuid, &logical_cpus_mask); 637182902Skmacy 638182902Skmacy /* Determine if we are a hyperthread. */ 639182902Skmacy if (hyperthreading_cpus > 1 && 640182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 641223758Sattilio CPU_SET(cpuid, &hyperthreading_cpus_mask); 642182902Skmacy#if 0 643182902Skmacy if (bootverbose) 644182902Skmacy lapic_dump("AP"); 645182902Skmacy#endif 646182902Skmacy if (smp_cpus == mp_ncpus) { 647182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 648182902Skmacy atomic_store_rel_int(&smp_started, 1); 649182902Skmacy } 650182902Skmacy 651182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 652182902Skmacy 653182902Skmacy /* wait until all the AP's are up */ 654182902Skmacy while (smp_started == 0) 655182902Skmacy ia32_pause(); 656182902Skmacy 657183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 658221835Smav 659221835Smav /* Start per-CPU event timers. */ 660221835Smav cpu_initclocks_ap(); 661221835Smav 662182902Skmacy /* enter the scheduler */ 663182902Skmacy sched_throw(NULL); 664182902Skmacy 665182902Skmacy panic("scheduler returned us to %s", __func__); 666182902Skmacy /* NOTREACHED */ 667182902Skmacy} 668182902Skmacy 669182902Skmacy/******************************************************************* 670182902Skmacy * local functions and data 671182902Skmacy */ 672182902Skmacy 673182902Skmacy/* 674182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 675182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 676182902Skmacy * can simply not tell the I/O APIC code about them in this function. 677182902Skmacy * We also do not tell it about the BSP since it tells itself about 678182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 679182902Skmacy */ 680182902Skmacystatic void 681182902Skmacyset_interrupt_apic_ids(void) 682182902Skmacy{ 683182902Skmacy u_int i, apic_id; 684182902Skmacy 685182902Skmacy for (i = 0; i < MAXCPU; i++) { 686182902Skmacy apic_id = cpu_apic_ids[i]; 687182902Skmacy if (apic_id == -1) 688182902Skmacy continue; 689182902Skmacy if (cpu_info[apic_id].cpu_bsp) 690182902Skmacy continue; 691182902Skmacy if (cpu_info[apic_id].cpu_disabled) 692182902Skmacy continue; 693182902Skmacy 694182902Skmacy /* Don't let hyperthreads service interrupts. */ 695182902Skmacy if (hyperthreading_cpus > 1 && 696182902Skmacy apic_id % hyperthreading_cpus != 0) 697182902Skmacy continue; 698182902Skmacy 699182902Skmacy intr_add_cpu(i); 700182902Skmacy } 701182902Skmacy} 702182902Skmacy 703182902Skmacy/* 704182902Skmacy * Assign logical CPU IDs to local APICs. 705182902Skmacy */ 706182902Skmacystatic void 707182902Skmacyassign_cpu_ids(void) 708182902Skmacy{ 709182902Skmacy u_int i; 710182902Skmacy 711182902Skmacy /* Check for explicitly disabled CPUs. */ 712182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 713182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 714182902Skmacy continue; 715182902Skmacy 716182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 717182902Skmacy if (resource_disabled("lapic", i)) { 718182902Skmacy cpu_info[i].cpu_disabled = 1; 719182902Skmacy continue; 720182902Skmacy } 721182902Skmacy } 722182902Skmacy 723182902Skmacy /* 724182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 725182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 726182902Skmacy * so we only have to assign IDs for APs. 727182902Skmacy */ 728182902Skmacy mp_ncpus = 1; 729182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 730182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 731182902Skmacy cpu_info[i].cpu_disabled) 732182902Skmacy continue; 733182902Skmacy 734182902Skmacy if (mp_ncpus < MAXCPU) { 735182902Skmacy cpu_apic_ids[mp_ncpus] = i; 736187966Sbz apic_cpuids[i] = mp_ncpus; 737182902Skmacy mp_ncpus++; 738182902Skmacy } else 739182902Skmacy cpu_info[i].cpu_disabled = 1; 740182902Skmacy } 741182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 742182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 743182902Skmacy mp_ncpus)); 744182902Skmacy} 745182902Skmacy 746182902Skmacy/* 747182902Skmacy * start each AP in our list 748182902Skmacy */ 749182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 750182902Skmacy#define TMPMAP_START 1 751182902Skmacyint 752182902Skmacystart_all_aps(void) 753182902Skmacy{ 754182902Skmacy int x,apic_id, cpu; 755182902Skmacy struct pcpu *pc; 756182902Skmacy 757182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 758182902Skmacy 759182902Skmacy /* set up temporary P==V mapping for AP boot */ 760182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 761182902Skmacy 762182902Skmacy /* start each AP */ 763182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 764182902Skmacy apic_id = cpu_apic_ids[cpu]; 765182902Skmacy 766182902Skmacy 767182902Skmacy bootAP = cpu; 768182902Skmacy bootAPgdt = gdt + (512*cpu); 769182902Skmacy 770182902Skmacy /* Get per-cpu data */ 771182902Skmacy pc = &__pcpu[bootAP]; 772183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 773254025Sjeff dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 774254025Sjeff M_WAITOK | M_ZERO), bootAP); 775182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 776256073Sgibbs pc->pc_vcpu_id = cpu_apic_ids[bootAP]; 777182902Skmacy pc->pc_prvspace = pc; 778182902Skmacy pc->pc_curthread = 0; 779182902Skmacy 780182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 781182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 782182902Skmacy 783215587Scperciva PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW); 784182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 785182902Skmacy for (x = 0; x < NGDT; x++) 786182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 787182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 788183345Skmacy#ifdef notyet 789183345Skmacy 790183345Skmacy if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 791183345Skmacy apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 792183345Skmacy acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 793183345Skmacy#ifdef CONFIG_ACPI 794183345Skmacy if (acpiid != 0xff) 795183345Skmacy x86_acpiid_to_apicid[acpiid] = apicid; 796183345Skmacy#endif 797183345Skmacy } 798183345Skmacy#endif 799183345Skmacy 800182902Skmacy /* attempt to start the Application Processor */ 801182902Skmacy if (!start_ap(cpu)) { 802182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 803182902Skmacy /* better panic as the AP may be running loose */ 804182902Skmacy printf("panic y/n? [y] "); 805182902Skmacy if (cngetc() != 'n') 806182902Skmacy panic("bye-bye"); 807182902Skmacy } 808182902Skmacy 809222813Sattilio CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 810182902Skmacy } 811182902Skmacy 812182902Skmacy 813182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 814182902Skmacy 815182902Skmacy /* number of APs actually started */ 816255158Sgibbs return (mp_naps); 817182902Skmacy} 818182902Skmacy 819182902Skmacyextern uint8_t *pcpu_boot_stack; 820182902Skmacyextern trap_info_t trap_table[]; 821182902Skmacy 822182902Skmacystatic void 823182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 824182902Skmacy{ 825182902Skmacy const trap_info_t *t = trap_table; 826182902Skmacy 827182902Skmacy for (t = trap_table; t->address; t++) { 828182902Skmacy trap_ctxt[t->vector].flags = t->flags; 829182902Skmacy trap_ctxt[t->vector].cs = t->cs; 830182902Skmacy trap_ctxt[t->vector].address = t->address; 831182902Skmacy } 832182902Skmacy} 833182902Skmacy 834241498Salcextern struct rwlock pvh_global_lock; 835182902Skmacyextern int nkpt; 836184112Skmacystatic void 837182902Skmacycpu_initialize_context(unsigned int cpu) 838182902Skmacy{ 839182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 840182902Skmacy * Hence we allocate statically and protect it with a lock */ 841228747Salc vm_page_t m[NPGPTD + 2]; 842182902Skmacy static vcpu_guest_context_t ctxt; 843182902Skmacy vm_offset_t boot_stack; 844183131Skmacy vm_offset_t newPTD; 845183131Skmacy vm_paddr_t ma[NPGPTD]; 846182902Skmacy int i; 847182902Skmacy 848182902Skmacy /* 849183131Skmacy * Page 0,[0-3] PTD 850183131Skmacy * Page 1, [4] boot stack 851183131Skmacy * Page [5] PDPT 852182902Skmacy * 853182902Skmacy */ 854183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 855228522Salc m[i] = vm_page_alloc(NULL, 0, 856182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 857182902Skmacy VM_ALLOC_ZERO); 858182902Skmacy 859182902Skmacy pmap_zero_page(m[i]); 860182902Skmacy 861182902Skmacy } 862254025Sjeff boot_stack = kva_alloc(PAGE_SIZE); 863254025Sjeff newPTD = kva_alloc(NPGPTD * PAGE_SIZE); 864215587Scperciva ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; 865182902Skmacy 866183131Skmacy#ifdef PAE 867183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 868183131Skmacy for (i = 0; i < NPGPTD; i++) { 869183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 870215587Scperciva ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; 871182902Skmacy } 872183131Skmacy#endif 873182902Skmacy 874182902Skmacy /* 875182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 876182902Skmacy * kernel mappings 877182902Skmacy */ 878183131Skmacy pmap_qenter(newPTD, m, 4); 879183131Skmacy 880183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 881183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 882182902Skmacy nkpt*sizeof(vm_paddr_t)); 883183131Skmacy 884183131Skmacy pmap_qremove(newPTD, 4); 885254025Sjeff kva_free(newPTD, 4 * PAGE_SIZE); 886182902Skmacy /* 887182902Skmacy * map actual idle stack to boot_stack 888182902Skmacy */ 889183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 890182902Skmacy 891182902Skmacy 892215587Scperciva xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); 893241498Salc rw_wlock(&pvh_global_lock); 894182902Skmacy for (i = 0; i < 4; i++) { 895183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 896183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 897183131Skmacy 898182902Skmacy xen_queue_pt_update((vm_paddr_t) 899183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 900182902Skmacy ma[i]); 901182902Skmacy } 902182902Skmacy PT_UPDATES_FLUSH(); 903241498Salc rw_wunlock(&pvh_global_lock); 904182902Skmacy 905182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 906182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 907182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 908182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 909182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 910182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 911182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 912182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 913182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 914182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 915182902Skmacy 916182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 917182902Skmacy 918182902Skmacy smp_trap_init(ctxt.trap_ctxt); 919182902Skmacy 920182902Skmacy ctxt.ldt_ents = 0; 921255158Sgibbs ctxt.gdt_frames[0] = 922255158Sgibbs (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 923182902Skmacy ctxt.gdt_ents = 512; 924182902Skmacy 925182902Skmacy#ifdef __i386__ 926182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 927182902Skmacy 928182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 929182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 930182902Skmacy 931182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 932182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 933182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 934182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 935182902Skmacy 936215587Scperciva ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); 937182902Skmacy#else /* __x86_64__ */ 938182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 939182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 940182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 941182902Skmacy 942182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 943182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 944182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 945182902Skmacy 946182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 947182902Skmacy 948182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 949182902Skmacy#endif 950182902Skmacy 951182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 952182902Skmacy ctxt.gdt_frames[0], 953182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 954182902Skmacy 955182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 956182902Skmacy DELAY(3000); 957182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 958182902Skmacy} 959182902Skmacy 960182902Skmacy/* 961182902Skmacy * This function starts the AP (application processor) identified 962182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 963182902Skmacy * to accomplish this. This is necessary because of the nuances 964182902Skmacy * of the different hardware we might encounter. It isn't pretty, 965182902Skmacy * but it seems to work. 966182902Skmacy */ 967183131Skmacy 968183131Skmacyint cpus; 969182902Skmacystatic int 970182902Skmacystart_ap(int apic_id) 971182902Skmacy{ 972182902Skmacy int ms; 973182902Skmacy 974182902Skmacy /* used as a watchpoint to signal AP startup */ 975182902Skmacy cpus = mp_naps; 976182902Skmacy 977182902Skmacy cpu_initialize_context(apic_id); 978182902Skmacy 979182902Skmacy /* Wait up to 5 seconds for it to start. */ 980182902Skmacy for (ms = 0; ms < 5000; ms++) { 981182902Skmacy if (mp_naps > cpus) 982255158Sgibbs return (1); /* return SUCCESS */ 983182902Skmacy DELAY(1000); 984182902Skmacy } 985255158Sgibbs return (0); /* return FAILURE */ 986182902Skmacy} 987182902Skmacy 988255040Sgibbsstatic void 989255040Sgibbsipi_pcpu(int cpu, u_int ipi) 990255040Sgibbs{ 991255331Sgibbs KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI")); 992255331Sgibbs xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi])); 993255040Sgibbs} 994255040Sgibbs 995182902Skmacy/* 996222065Sattilio * send an IPI to a specific CPU. 997222065Sattilio */ 998222065Sattiliostatic void 999222065Sattilioipi_send_cpu(int cpu, u_int ipi) 1000222065Sattilio{ 1001222065Sattilio u_int bitmap, old_pending, new_pending; 1002222065Sattilio 1003222065Sattilio if (IPI_IS_BITMAPED(ipi)) { 1004222065Sattilio bitmap = 1 << ipi; 1005222065Sattilio ipi = IPI_BITMAP_VECTOR; 1006222065Sattilio do { 1007222065Sattilio old_pending = cpu_ipi_pending[cpu]; 1008222065Sattilio new_pending = old_pending | bitmap; 1009222065Sattilio } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], 1010222065Sattilio old_pending, new_pending)); 1011222065Sattilio if (!old_pending) 1012222065Sattilio ipi_pcpu(cpu, RESCHEDULE_VECTOR); 1013222065Sattilio } else { 1014222065Sattilio KASSERT(call_data != NULL, ("call_data not set")); 1015222065Sattilio ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); 1016222065Sattilio } 1017222065Sattilio} 1018222065Sattilio 1019222065Sattilio/* 1020182902Skmacy * Flush the TLB on all other CPU's 1021182902Skmacy */ 1022182902Skmacystatic void 1023182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1024182902Skmacy{ 1025182902Skmacy u_int ncpu; 1026184198Skmacy struct _call_data data; 1027182902Skmacy 1028182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 1029182902Skmacy if (ncpu < 1) 1030182902Skmacy return; /* no other cpus */ 1031182902Skmacy if (!(read_eflags() & PSL_I)) 1032182902Skmacy panic("%s: interrupts disabled", __func__); 1033182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 1034193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1035193098Sadrian call_data = &data; 1036184224Skmacy call_data->func_id = vector; 1037184112Skmacy call_data->arg1 = addr1; 1038184112Skmacy call_data->arg2 = addr2; 1039182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1040182902Skmacy ipi_all_but_self(vector); 1041182902Skmacy while (smp_tlb_wait < ncpu) 1042182902Skmacy ia32_pause(); 1043184224Skmacy call_data = NULL; 1044182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1045182902Skmacy} 1046182902Skmacy 1047182902Skmacystatic void 1048255158Sgibbssmp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, 1049255158Sgibbs vm_offset_t addr2) 1050182902Skmacy{ 1051222813Sattilio int cpu, ncpu, othercpus; 1052184224Skmacy struct _call_data data; 1053182902Skmacy 1054182902Skmacy othercpus = mp_ncpus - 1; 1055222813Sattilio if (CPU_ISFULLSET(&mask)) { 1056222813Sattilio if (othercpus < 1) 1057182902Skmacy return; 1058182902Skmacy } else { 1059223758Sattilio CPU_CLR(PCPU_GET(cpuid), &mask); 1060222813Sattilio if (CPU_EMPTY(&mask)) 1061182902Skmacy return; 1062182902Skmacy } 1063182902Skmacy if (!(read_eflags() & PSL_I)) 1064182902Skmacy panic("%s: interrupts disabled", __func__); 1065182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 1066193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1067184224Skmacy call_data = &data; 1068184224Skmacy call_data->func_id = vector; 1069184224Skmacy call_data->arg1 = addr1; 1070184224Skmacy call_data->arg2 = addr2; 1071182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1072222813Sattilio if (CPU_ISFULLSET(&mask)) { 1073222813Sattilio ncpu = othercpus; 1074182902Skmacy ipi_all_but_self(vector); 1075222813Sattilio } else { 1076222813Sattilio ncpu = 0; 1077251703Sjeff while ((cpu = CPU_FFS(&mask)) != 0) { 1078222813Sattilio cpu--; 1079222813Sattilio CPU_CLR(cpu, &mask); 1080222813Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, 1081222813Sattilio vector); 1082222813Sattilio ipi_send_cpu(cpu, vector); 1083222813Sattilio ncpu++; 1084222813Sattilio } 1085222813Sattilio } 1086182902Skmacy while (smp_tlb_wait < ncpu) 1087182902Skmacy ia32_pause(); 1088184224Skmacy call_data = NULL; 1089182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1090182902Skmacy} 1091182902Skmacy 1092182902Skmacyvoid 1093182902Skmacysmp_cache_flush(void) 1094182902Skmacy{ 1095182902Skmacy 1096182902Skmacy if (smp_started) 1097182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1098182902Skmacy} 1099182902Skmacy 1100182902Skmacyvoid 1101182902Skmacysmp_invltlb(void) 1102182902Skmacy{ 1103182902Skmacy 1104182902Skmacy if (smp_started) { 1105182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1106182902Skmacy } 1107182902Skmacy} 1108182902Skmacy 1109182902Skmacyvoid 1110182902Skmacysmp_invlpg(vm_offset_t addr) 1111182902Skmacy{ 1112182902Skmacy 1113182902Skmacy if (smp_started) { 1114182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1115182902Skmacy } 1116182902Skmacy} 1117182902Skmacy 1118182902Skmacyvoid 1119182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1120182902Skmacy{ 1121182902Skmacy 1122182902Skmacy if (smp_started) { 1123182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1124182902Skmacy } 1125182902Skmacy} 1126182902Skmacy 1127182902Skmacyvoid 1128222813Sattiliosmp_masked_invltlb(cpuset_t mask) 1129182902Skmacy{ 1130182902Skmacy 1131182902Skmacy if (smp_started) { 1132182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1133182902Skmacy } 1134182902Skmacy} 1135182902Skmacy 1136182902Skmacyvoid 1137222813Sattiliosmp_masked_invlpg(cpuset_t mask, vm_offset_t addr) 1138182902Skmacy{ 1139182902Skmacy 1140182902Skmacy if (smp_started) { 1141182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1142182902Skmacy } 1143182902Skmacy} 1144182902Skmacy 1145182902Skmacyvoid 1146222813Sattiliosmp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) 1147182902Skmacy{ 1148182902Skmacy 1149182902Skmacy if (smp_started) { 1150182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1151182902Skmacy } 1152182902Skmacy} 1153182902Skmacy 1154182902Skmacy/* 1155182902Skmacy * send an IPI to a set of cpus. 1156182902Skmacy */ 1157182902Skmacyvoid 1158222813Sattilioipi_selected(cpuset_t cpus, u_int ipi) 1159182902Skmacy{ 1160182902Skmacy int cpu; 1161182902Skmacy 1162196256Sattilio /* 1163196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1164196256Sattilio * of help in order to understand what is the source. 1165196256Sattilio * Set the mask of receiving CPUs for this purpose. 1166196256Sattilio */ 1167196256Sattilio if (ipi == IPI_STOP_HARD) 1168222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); 1169196256Sattilio 1170251703Sjeff while ((cpu = CPU_FFS(&cpus)) != 0) { 1171182902Skmacy cpu--; 1172222813Sattilio CPU_CLR(cpu, &cpus); 1173222065Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1174222065Sattilio ipi_send_cpu(cpu, ipi); 1175182902Skmacy } 1176182902Skmacy} 1177182902Skmacy 1178182902Skmacy/* 1179210939Sjhb * send an IPI to a specific CPU. 1180210939Sjhb */ 1181210939Sjhbvoid 1182210939Sjhbipi_cpu(int cpu, u_int ipi) 1183210939Sjhb{ 1184210939Sjhb 1185210939Sjhb /* 1186210939Sjhb * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1187210939Sjhb * of help in order to understand what is the source. 1188210939Sjhb * Set the mask of receiving CPUs for this purpose. 1189210939Sjhb */ 1190210939Sjhb if (ipi == IPI_STOP_HARD) 1191222813Sattilio CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); 1192210939Sjhb 1193210939Sjhb CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1194222065Sattilio ipi_send_cpu(cpu, ipi); 1195210939Sjhb} 1196210939Sjhb 1197210939Sjhb/* 1198182902Skmacy * send an IPI to all CPUs EXCEPT myself 1199182902Skmacy */ 1200182902Skmacyvoid 1201182902Skmacyipi_all_but_self(u_int ipi) 1202182902Skmacy{ 1203222813Sattilio cpuset_t other_cpus; 1204196256Sattilio 1205196256Sattilio /* 1206196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1207196256Sattilio * of help in order to understand what is the source. 1208196256Sattilio * Set the mask of receiving CPUs for this purpose. 1209196256Sattilio */ 1210223758Sattilio other_cpus = all_cpus; 1211223758Sattilio CPU_CLR(PCPU_GET(cpuid), &other_cpus); 1212196256Sattilio if (ipi == IPI_STOP_HARD) 1213222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); 1214196256Sattilio 1215182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1216222813Sattilio ipi_selected(other_cpus, ipi); 1217182902Skmacy} 1218182902Skmacy 1219196256Sattilioint 1220196256Sattilioipi_nmi_handler() 1221196256Sattilio{ 1222223758Sattilio u_int cpuid; 1223196256Sattilio 1224196256Sattilio /* 1225196256Sattilio * As long as there is not a simple way to know about a NMI's 1226196256Sattilio * source, if the bitmask for the current CPU is present in 1227196256Sattilio * the global pending bitword an IPI_STOP_HARD has been issued 1228196256Sattilio * and should be handled. 1229196256Sattilio */ 1230223758Sattilio cpuid = PCPU_GET(cpuid); 1231223758Sattilio if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) 1232196256Sattilio return (1); 1233196256Sattilio 1234223758Sattilio CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); 1235196256Sattilio cpustop_handler(); 1236196256Sattilio return (0); 1237196256Sattilio} 1238196256Sattilio 1239182902Skmacy/* 1240182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1241182902Skmacy * are resumed. 1242182902Skmacy */ 1243182902Skmacyvoid 1244182902Skmacycpustop_handler(void) 1245182902Skmacy{ 1246222813Sattilio int cpu; 1247182902Skmacy 1248222813Sattilio cpu = PCPU_GET(cpuid); 1249222813Sattilio 1250182902Skmacy savectx(&stoppcbs[cpu]); 1251182902Skmacy 1252182902Skmacy /* Indicate that we are stopped */ 1253223758Sattilio CPU_SET_ATOMIC(cpu, &stopped_cpus); 1254182902Skmacy 1255182902Skmacy /* Wait for restart */ 1256223758Sattilio while (!CPU_ISSET(cpu, &started_cpus)) 1257182902Skmacy ia32_pause(); 1258182902Skmacy 1259223758Sattilio CPU_CLR_ATOMIC(cpu, &started_cpus); 1260223758Sattilio CPU_CLR_ATOMIC(cpu, &stopped_cpus); 1261182902Skmacy 1262182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1263182902Skmacy cpustop_restartfunc(); 1264182902Skmacy cpustop_restartfunc = NULL; 1265182902Skmacy } 1266182902Skmacy} 1267182902Skmacy 1268182902Skmacy/* 1269264118Sroyger * Handlers for TLB related IPIs 1270264118Sroyger * 1271264118Sroyger * On i386 Xen PV this are no-ops since this port doesn't support SMP. 1272264118Sroyger */ 1273264118Sroygervoid 1274264118Sroygerinvltlb_handler(void) 1275264118Sroyger{ 1276264118Sroyger} 1277264118Sroyger 1278264118Sroygervoid 1279264118Sroygerinvlpg_handler(void) 1280264118Sroyger{ 1281264118Sroyger} 1282264118Sroyger 1283264118Sroygervoid 1284264118Sroygerinvlrng_handler(void) 1285264118Sroyger{ 1286264118Sroyger} 1287264118Sroyger 1288264118Sroygervoid 1289264118Sroygerinvlcache_handler(void) 1290264118Sroyger{ 1291264118Sroyger} 1292264118Sroyger 1293264118Sroyger/* 1294182902Skmacy * This is called once the rest of the system is up and running and we're 1295182902Skmacy * ready to let the AP's out of the pen. 1296182902Skmacy */ 1297182902Skmacystatic void 1298182902Skmacyrelease_aps(void *dummy __unused) 1299182902Skmacy{ 1300182902Skmacy 1301182902Skmacy if (mp_ncpus == 1) 1302182902Skmacy return; 1303182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1304182902Skmacy while (smp_started == 0) 1305182902Skmacy ia32_pause(); 1306182902Skmacy} 1307182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1308255040SgibbsSYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); 1309255040SgibbsSYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL); 1310