1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD$"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34204972Sjhb#include "opt_pmap.h" 35182902Skmacy#include "opt_sched.h" 36182902Skmacy#include "opt_smp.h" 37182902Skmacy 38182902Skmacy#if !defined(lint) 39182902Skmacy#if !defined(SMP) 40182902Skmacy#error How did you get here? 41182902Skmacy#endif 42182902Skmacy 43182902Skmacy#ifndef DEV_APIC 44182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 45182902Skmacy#endif 46182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 47182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 48182902Skmacy#endif 49182902Skmacy#endif /* not lint */ 50182902Skmacy 51182902Skmacy#include <sys/param.h> 52182902Skmacy#include <sys/systm.h> 53182902Skmacy#include <sys/bus.h> 54182902Skmacy#include <sys/cons.h> /* cngetc() */ 55222813Sattilio#include <sys/cpuset.h> 56182902Skmacy#ifdef GPROF 57182902Skmacy#include <sys/gmon.h> 58182902Skmacy#endif 59182902Skmacy#include <sys/kernel.h> 60182902Skmacy#include <sys/ktr.h> 61182902Skmacy#include <sys/lock.h> 62182902Skmacy#include <sys/malloc.h> 63182902Skmacy#include <sys/memrange.h> 64182902Skmacy#include <sys/mutex.h> 65182902Skmacy#include <sys/pcpu.h> 66182902Skmacy#include <sys/proc.h> 67241498Salc#include <sys/rwlock.h> 68182902Skmacy#include <sys/sched.h> 69182902Skmacy#include <sys/smp.h> 70182902Skmacy#include <sys/sysctl.h> 71182902Skmacy 72182902Skmacy#include <vm/vm.h> 73182902Skmacy#include <vm/vm_param.h> 74182902Skmacy#include <vm/pmap.h> 75182902Skmacy#include <vm/vm_kern.h> 76182902Skmacy#include <vm/vm_extern.h> 77182902Skmacy#include <vm/vm_page.h> 78182902Skmacy 79214631Sjhb#include <x86/apicreg.h> 80182902Skmacy#include <machine/md_var.h> 81182902Skmacy#include <machine/mp_watchdog.h> 82182902Skmacy#include <machine/pcb.h> 83182902Skmacy#include <machine/psl.h> 84182902Skmacy#include <machine/smp.h> 85182902Skmacy#include <machine/specialreg.h> 86182902Skmacy#include <machine/pcpu.h> 87182902Skmacy 88255040Sgibbs#include <xen/xen-os.h> 89186557Skmacy#include <xen/evtchn.h> 90186557Skmacy#include <xen/xen_intr.h> 91186557Skmacy#include <xen/hypervisor.h> 92182902Skmacy#include <xen/interface/vcpu.h> 93182902Skmacy 94255158Sgibbs/*---------------------------- Extern Declarations ---------------------------*/ 95255158Sgibbsextern struct pcpu __pcpu[]; 96182902Skmacy 97255158Sgibbsextern void Xhypervisor_callback(void); 98255158Sgibbsextern void failsafe_callback(void); 99255158Sgibbsextern void pmap_lazyfix_action(void); 100255158Sgibbs 101255158Sgibbs/*--------------------------- Forward Declarations ---------------------------*/ 102255331Sgibbsstatic driver_filter_t smp_reschedule_interrupt; 103255331Sgibbsstatic driver_filter_t smp_call_function_interrupt; 104255331Sgibbsstatic void assign_cpu_ids(void); 105255331Sgibbsstatic void set_interrupt_apic_ids(void); 106255331Sgibbsstatic int start_all_aps(void); 107255331Sgibbsstatic int start_ap(int apic_id); 108255331Sgibbsstatic void release_aps(void *dummy); 109255158Sgibbs 110255331Sgibbs/*---------------------------------- Macros ----------------------------------*/ 111255331Sgibbs#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) 112255331Sgibbs 113255158Sgibbs/*-------------------------------- Local Types -------------------------------*/ 114255158Sgibbstypedef void call_data_func_t(uintptr_t , uintptr_t); 115255158Sgibbs 116255158Sgibbsstruct cpu_info { 117255158Sgibbs int cpu_present:1; 118255158Sgibbs int cpu_bsp:1; 119255158Sgibbs int cpu_disabled:1; 120255158Sgibbs}; 121255158Sgibbs 122255331Sgibbsstruct xen_ipi_handler 123255331Sgibbs{ 124255331Sgibbs driver_filter_t *filter; 125255331Sgibbs const char *description; 126255331Sgibbs}; 127255331Sgibbs 128255331Sgibbsenum { 129255331Sgibbs RESCHEDULE_VECTOR, 130255331Sgibbs CALL_FUNCTION_VECTOR, 131255331Sgibbs}; 132255331Sgibbs 133255158Sgibbs/*-------------------------------- Global Data -------------------------------*/ 134255158Sgibbsstatic u_int hyperthreading_cpus; 135255158Sgibbsstatic cpuset_t hyperthreading_cpus_mask; 136255158Sgibbs 137182902Skmacyint mp_naps; /* # of Applications processors */ 138182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 139182902Skmacy 140182902Skmacystatic int bootAP; 141182902Skmacystatic union descriptor *bootAPgdt; 142182902Skmacy 143182902Skmacy/* Free these after use */ 144182902Skmacyvoid *bootstacks[MAXCPU]; 145182902Skmacy 146182902Skmacystruct pcb stoppcbs[MAXCPU]; 147182902Skmacy 148182902Skmacy/* Variables needed for SMP tlb shootdown. */ 149182902Skmacyvm_offset_t smp_tlb_addr1; 150182902Skmacyvm_offset_t smp_tlb_addr2; 151182902Skmacyvolatile int smp_tlb_wait; 152182902Skmacy 153182902Skmacystatic u_int logical_cpus; 154222813Sattiliostatic volatile cpuset_t ipi_nmi_pending; 155182902Skmacy 156182902Skmacy/* used to hold the AP's until we are ready to release them */ 157182902Skmacystatic struct mtx ap_boot_mtx; 158182902Skmacy 159182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 160182902Skmacystatic volatile int aps_ready = 0; 161182902Skmacy 162182902Skmacy/* 163182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 164182902Skmacy * the APs. 165182902Skmacy */ 166255158Sgibbsstatic struct cpu_info cpu_info[MAX_APIC_ID + 1]; 167182902Skmacyint cpu_apic_ids[MAXCPU]; 168187966Sbzint apic_cpuids[MAX_APIC_ID + 1]; 169182902Skmacy 170182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 171182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 172182902Skmacy 173191759Skmacystatic int cpu_logical; 174191759Skmacystatic int cpu_cores; 175191759Skmacy 176255331Sgibbsstatic const struct xen_ipi_handler xen_ipis[] = 177255331Sgibbs{ 178255331Sgibbs [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" }, 179255331Sgibbs [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" } 180255331Sgibbs}; 181255331Sgibbs 182255158Sgibbs/*------------------------------- Per-CPU Data -------------------------------*/ 183255331SgibbsDPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); 184255040SgibbsDPCPU_DEFINE(struct vcpu_info *, vcpu_info); 185255040Sgibbs 186255158Sgibbs/*------------------------------ Implementation ------------------------------*/ 187182902Skmacystruct cpu_group * 188182902Skmacycpu_topo(void) 189182902Skmacy{ 190182902Skmacy if (cpu_cores == 0) 191182902Skmacy cpu_cores = 1; 192182902Skmacy if (cpu_logical == 0) 193182902Skmacy cpu_logical = 1; 194182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 195182902Skmacy printf("WARNING: Non-uniform processors.\n"); 196182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 197182902Skmacy return (smp_topo_none()); 198182902Skmacy } 199182902Skmacy /* 200182902Skmacy * No multi-core or hyper-threaded. 201182902Skmacy */ 202182902Skmacy if (cpu_logical * cpu_cores == 1) 203182902Skmacy return (smp_topo_none()); 204182902Skmacy /* 205182902Skmacy * Only HTT no multi-core. 206182902Skmacy */ 207182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 208182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 209182902Skmacy /* 210182902Skmacy * Only multi-core no HTT. 211182902Skmacy */ 212182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 213182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 214182902Skmacy /* 215182902Skmacy * Both HTT and multi-core. 216182902Skmacy */ 217182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 218182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 219182902Skmacy} 220182902Skmacy 221182902Skmacy/* 222182902Skmacy * Calculate usable address in base memory for AP trampoline code. 223182902Skmacy */ 224182902Skmacyu_int 225182902Skmacymp_bootaddress(u_int basemem) 226182902Skmacy{ 227182902Skmacy 228182902Skmacy return (basemem); 229182902Skmacy} 230182902Skmacy 231182902Skmacyvoid 232182902Skmacycpu_add(u_int apic_id, char boot_cpu) 233182902Skmacy{ 234182902Skmacy 235182902Skmacy if (apic_id > MAX_APIC_ID) { 236182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 237182902Skmacy return; 238182902Skmacy } 239182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 240182902Skmacy apic_id)); 241182902Skmacy cpu_info[apic_id].cpu_present = 1; 242182902Skmacy if (boot_cpu) { 243182902Skmacy KASSERT(boot_cpu_id == -1, 244182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 245182902Skmacy boot_cpu_id)); 246182902Skmacy boot_cpu_id = apic_id; 247182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 248182902Skmacy } 249182902Skmacy if (mp_ncpus < MAXCPU) 250182902Skmacy mp_ncpus++; 251182902Skmacy if (bootverbose) 252182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 253182902Skmacy "AP"); 254182902Skmacy} 255182902Skmacy 256182902Skmacyvoid 257182902Skmacycpu_mp_setmaxid(void) 258182902Skmacy{ 259182902Skmacy 260182902Skmacy mp_maxid = MAXCPU - 1; 261182902Skmacy} 262182902Skmacy 263182902Skmacyint 264182902Skmacycpu_mp_probe(void) 265182902Skmacy{ 266182902Skmacy 267182902Skmacy /* 268182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 269182902Skmacy * correctly. 270182902Skmacy */ 271222813Sattilio CPU_SETOF(0, &all_cpus); 272182902Skmacy if (mp_ncpus == 0) { 273182902Skmacy /* 274182902Skmacy * No CPUs were found, so this must be a UP system. Setup 275182902Skmacy * the variables to represent a system with a single CPU 276182902Skmacy * with an id of 0. 277182902Skmacy */ 278182902Skmacy mp_ncpus = 1; 279182902Skmacy return (0); 280182902Skmacy } 281182902Skmacy 282182902Skmacy /* At least one CPU was found. */ 283182902Skmacy if (mp_ncpus == 1) { 284182902Skmacy /* 285182902Skmacy * One CPU was found, so this must be a UP system with 286182902Skmacy * an I/O APIC. 287182902Skmacy */ 288182902Skmacy return (0); 289182902Skmacy } 290182902Skmacy 291182902Skmacy /* At least two CPUs were found. */ 292182902Skmacy return (1); 293182902Skmacy} 294182902Skmacy 295182902Skmacy/* 296182902Skmacy * Initialize the IPI handlers and start up the AP's. 297182902Skmacy */ 298182902Skmacyvoid 299182902Skmacycpu_mp_start(void) 300182902Skmacy{ 301182902Skmacy int i; 302182902Skmacy 303182902Skmacy /* Initialize the logical ID to APIC ID table. */ 304182902Skmacy for (i = 0; i < MAXCPU; i++) { 305182902Skmacy cpu_apic_ids[i] = -1; 306182902Skmacy cpu_ipi_pending[i] = 0; 307182902Skmacy } 308182902Skmacy 309182902Skmacy /* Set boot_cpu_id if needed. */ 310182902Skmacy if (boot_cpu_id == -1) { 311182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 312182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 313182902Skmacy } else 314182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 315182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 316182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 317187966Sbz apic_cpuids[boot_cpu_id] = 0; 318182902Skmacy 319182902Skmacy assign_cpu_ids(); 320182902Skmacy 321182902Skmacy /* Start each Application Processor */ 322182902Skmacy start_all_aps(); 323182902Skmacy 324182902Skmacy /* Setup the initial logical CPUs info. */ 325222813Sattilio logical_cpus = 0; 326222813Sattilio CPU_ZERO(&logical_cpus_mask); 327182902Skmacy if (cpu_feature & CPUID_HTT) 328182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 329182902Skmacy 330182902Skmacy set_interrupt_apic_ids(); 331182902Skmacy} 332182902Skmacy 333182902Skmacy 334184112Skmacystatic void 335184112Skmacyiv_rendezvous(uintptr_t a, uintptr_t b) 336184112Skmacy{ 337184115Skmacy smp_rendezvous_action(); 338184112Skmacy} 339184112Skmacy 340184112Skmacystatic void 341184112Skmacyiv_invltlb(uintptr_t a, uintptr_t b) 342184112Skmacy{ 343184115Skmacy xen_tlb_flush(); 344184112Skmacy} 345184112Skmacy 346184112Skmacystatic void 347184112Skmacyiv_invlpg(uintptr_t a, uintptr_t b) 348184112Skmacy{ 349184115Skmacy xen_invlpg(a); 350184112Skmacy} 351184112Skmacy 352184112Skmacystatic void 353184112Skmacyiv_invlrng(uintptr_t a, uintptr_t b) 354184112Skmacy{ 355184115Skmacy vm_offset_t start = (vm_offset_t)a; 356184115Skmacy vm_offset_t end = (vm_offset_t)b; 357184115Skmacy 358184115Skmacy while (start < end) { 359184115Skmacy xen_invlpg(start); 360184115Skmacy start += PAGE_SIZE; 361184115Skmacy } 362184112Skmacy} 363184112Skmacy 364184115Skmacy 365184112Skmacystatic void 366184112Skmacyiv_invlcache(uintptr_t a, uintptr_t b) 367184112Skmacy{ 368184115Skmacy 369184115Skmacy wbinvd(); 370184198Skmacy atomic_add_int(&smp_tlb_wait, 1); 371184112Skmacy} 372184112Skmacy 373184112Skmacystatic void 374184112Skmacyiv_lazypmap(uintptr_t a, uintptr_t b) 375184112Skmacy{ 376184115Skmacy pmap_lazyfix_action(); 377184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 378184112Skmacy} 379184112Skmacy 380193154Sadrian/* 381193154Sadrian * These start from "IPI offset" APIC_IPI_INTS 382193154Sadrian */ 383255331Sgibbsstatic call_data_func_t *ipi_vectors[6] = 384184112Skmacy{ 385255158Sgibbs iv_rendezvous, 386255158Sgibbs iv_invltlb, 387255158Sgibbs iv_invlpg, 388255158Sgibbs iv_invlrng, 389255158Sgibbs iv_invlcache, 390255158Sgibbs iv_lazypmap, 391184224Skmacy}; 392184224Skmacy 393184224Skmacy/* 394184224Skmacy * Reschedule call back. Nothing to do, 395184224Skmacy * all the work is done automatically when 396184224Skmacy * we return from the interrupt. 397184224Skmacy */ 398184224Skmacystatic int 399184224Skmacysmp_reschedule_interrupt(void *unused) 400184224Skmacy{ 401184198Skmacy int cpu = PCPU_GET(cpuid); 402184198Skmacy u_int ipi_bitmap; 403184198Skmacy 404184198Skmacy ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 405184198Skmacy 406184198Skmacy if (ipi_bitmap & (1 << IPI_PREEMPT)) { 407184198Skmacy#ifdef COUNT_IPIS 408184198Skmacy (*ipi_preempt_counts[cpu])++; 409184198Skmacy#endif 410184198Skmacy sched_preempt(curthread); 411184198Skmacy } 412184198Skmacy 413184198Skmacy if (ipi_bitmap & (1 << IPI_AST)) { 414184198Skmacy#ifdef COUNT_IPIS 415184198Skmacy (*ipi_ast_counts[cpu])++; 416184198Skmacy#endif 417184198Skmacy /* Nothing to do for AST */ 418184198Skmacy } 419184198Skmacy return (FILTER_HANDLED); 420184112Skmacy} 421184112Skmacy 422184112Skmacystruct _call_data { 423184224Skmacy uint16_t func_id; 424184224Skmacy uint16_t wait; 425184112Skmacy uintptr_t arg1; 426184112Skmacy uintptr_t arg2; 427184112Skmacy atomic_t started; 428184112Skmacy atomic_t finished; 429184112Skmacy}; 430184112Skmacy 431184112Skmacystatic struct _call_data *call_data; 432184112Skmacy 433184198Skmacystatic int 434184112Skmacysmp_call_function_interrupt(void *unused) 435184112Skmacy{ 436184224Skmacy call_data_func_t *func; 437184112Skmacy uintptr_t arg1 = call_data->arg1; 438184112Skmacy uintptr_t arg2 = call_data->arg2; 439184112Skmacy int wait = call_data->wait; 440184224Skmacy atomic_t *started = &call_data->started; 441184224Skmacy atomic_t *finished = &call_data->finished; 442184112Skmacy 443193154Sadrian /* We only handle function IPIs, not bitmap IPIs */ 444255158Sgibbs if (call_data->func_id < APIC_IPI_INTS || 445255158Sgibbs call_data->func_id > IPI_BITMAP_VECTOR) 446184224Skmacy panic("invalid function id %u", call_data->func_id); 447184224Skmacy 448255331Sgibbs func = ipi_vectors[IPI_TO_IDX(call_data->func_id)]; 449184112Skmacy /* 450184112Skmacy * Notify initiating CPU that I've grabbed the data and am 451184112Skmacy * about to execute the function 452184112Skmacy */ 453184112Skmacy mb(); 454184224Skmacy atomic_inc(started); 455184112Skmacy /* 456184112Skmacy * At this point the info structure may be out of scope unless wait==1 457184112Skmacy */ 458184112Skmacy (*func)(arg1, arg2); 459184112Skmacy 460184112Skmacy if (wait) { 461184112Skmacy mb(); 462184224Skmacy atomic_inc(finished); 463184112Skmacy } 464184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 465184198Skmacy return (FILTER_HANDLED); 466184112Skmacy} 467184112Skmacy 468184112Skmacy/* 469182902Skmacy * Print various information about the SMP system hardware and setup. 470182902Skmacy */ 471182902Skmacyvoid 472182902Skmacycpu_mp_announce(void) 473182902Skmacy{ 474182902Skmacy int i, x; 475182902Skmacy 476182902Skmacy /* List CPUs */ 477182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 478182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 479182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 480182902Skmacy continue; 481182902Skmacy if (cpu_info[x].cpu_disabled) 482182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 483182902Skmacy else { 484182902Skmacy KASSERT(i < mp_ncpus, 485182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 486182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 487182902Skmacy } 488182902Skmacy } 489182902Skmacy} 490182902Skmacy 491184112Skmacystatic int 492255040Sgibbsxen_smp_cpu_init(unsigned int cpu) 493184112Skmacy{ 494255331Sgibbs xen_intr_handle_t *ipi_handle; 495255331Sgibbs const struct xen_ipi_handler *ipi; 496255331Sgibbs int idx, rc; 497184112Skmacy 498255331Sgibbs ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); 499255331Sgibbs for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { 500184112Skmacy 501255331Sgibbs /* 502255331Sgibbs * The PCPU variable pc_device is not initialized on i386 PV, 503255331Sgibbs * so we have to use the root_bus device in order to setup 504255331Sgibbs * the IPIs. 505255331Sgibbs */ 506255331Sgibbs rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu, 507255331Sgibbs ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]); 508255331Sgibbs if (rc != 0) { 509255331Sgibbs printf("Unable to allocate a XEN IPI port. " 510255331Sgibbs "Error %d\n", rc); 511255331Sgibbs break; 512255331Sgibbs } 513255331Sgibbs xen_intr_describe(ipi_handle[idx], "%s", ipi->description); 514255331Sgibbs } 515184112Skmacy 516255331Sgibbs for (;idx < nitems(xen_ipis); idx++) 517255331Sgibbs ipi_handle[idx] = NULL; 518255040Sgibbs 519255331Sgibbs if (rc == 0) 520255331Sgibbs return (0); 521184112Skmacy 522255331Sgibbs /* Either all are successfully mapped, or none at all. */ 523255331Sgibbs for (idx = 0; idx < nitems(xen_ipis); idx++) { 524255331Sgibbs if (ipi_handle[idx] == NULL) 525255331Sgibbs continue; 526184198Skmacy 527255331Sgibbs xen_intr_unbind(ipi_handle[idx]); 528255331Sgibbs ipi_handle[idx] = NULL; 529255331Sgibbs } 530184112Skmacy 531255158Sgibbs return (rc); 532184112Skmacy} 533184112Skmacy 534184198Skmacystatic void 535184198Skmacyxen_smp_intr_init_cpus(void *unused) 536184198Skmacy{ 537184198Skmacy int i; 538184198Skmacy 539184198Skmacy for (i = 0; i < mp_ncpus; i++) 540255040Sgibbs xen_smp_cpu_init(i); 541184198Skmacy} 542184198Skmacy 543255040Sgibbsstatic void 544255040Sgibbsxen_smp_intr_setup_cpus(void *unused) 545255040Sgibbs{ 546255040Sgibbs int i; 547255040Sgibbs 548255040Sgibbs for (i = 0; i < mp_ncpus; i++) 549255040Sgibbs DPCPU_ID_SET(i, vcpu_info, 550255040Sgibbs &HYPERVISOR_shared_info->vcpu_info[i]); 551255040Sgibbs} 552255040Sgibbs 553182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 554182902Skmacy 555182902Skmacy/* 556182902Skmacy * AP CPU's call this to initialize themselves. 557182902Skmacy */ 558182902Skmacyvoid 559182902Skmacyinit_secondary(void) 560182902Skmacy{ 561182902Skmacy vm_offset_t addr; 562223758Sattilio u_int cpuid; 563182902Skmacy int gsel_tss; 564182902Skmacy 565182902Skmacy 566182902Skmacy /* bootAP is set in start_ap() to our ID. */ 567182902Skmacy PCPU_SET(currentldt, _default_ldt); 568182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 569182902Skmacy#if 0 570182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 571182902Skmacy#endif 572182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 573182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 574182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 575182902Skmacy#if 0 576182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 577182902Skmacy 578182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 579182902Skmacy#endif 580182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 581182902Skmacy 582182902Skmacy /* 583182902Skmacy * Set to a known state: 584182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 585182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 586182902Skmacy */ 587182902Skmacy /* 588182902Skmacy * signal our startup to the BSP. 589182902Skmacy */ 590182902Skmacy mp_naps++; 591182902Skmacy 592182902Skmacy /* Spin until the BSP releases the AP's. */ 593182902Skmacy while (!aps_ready) 594182902Skmacy ia32_pause(); 595182902Skmacy 596182902Skmacy /* BSP may have changed PTD while we were waiting */ 597182902Skmacy invltlb(); 598182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 599182902Skmacy invlpg(addr); 600182902Skmacy 601182902Skmacy /* set up FPU state on the AP */ 602189420Sjhb npxinit(); 603182902Skmacy#if 0 604182902Skmacy 605182902Skmacy /* set up SSE registers */ 606182902Skmacy enable_sse(); 607182902Skmacy#endif 608182902Skmacy#if 0 && defined(PAE) 609182902Skmacy /* Enable the PTE no-execute bit. */ 610182902Skmacy if ((amd_feature & AMDID_NX) != 0) { 611182902Skmacy uint64_t msr; 612182902Skmacy 613182902Skmacy msr = rdmsr(MSR_EFER) | EFER_NXE; 614182902Skmacy wrmsr(MSR_EFER, msr); 615182902Skmacy } 616182902Skmacy#endif 617182902Skmacy#if 0 618182902Skmacy /* A quick check from sanity claus */ 619182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 620182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 621182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 622182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 623182902Skmacy panic("cpuid mismatch! boom!!"); 624182902Skmacy } 625182902Skmacy#endif 626182902Skmacy 627182902Skmacy /* Initialize curthread. */ 628182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 629182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 630182902Skmacy 631182902Skmacy mtx_lock_spin(&ap_boot_mtx); 632182902Skmacy#if 0 633182902Skmacy 634182902Skmacy /* Init local apic for irq's */ 635182902Skmacy lapic_setup(1); 636182902Skmacy#endif 637182902Skmacy smp_cpus++; 638182902Skmacy 639223758Sattilio cpuid = PCPU_GET(cpuid); 640223758Sattilio CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); 641223758Sattilio printf("SMP: AP CPU #%d Launched!\n", cpuid); 642182902Skmacy 643182902Skmacy /* Determine if we are a logical CPU. */ 644182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 645223758Sattilio CPU_SET(cpuid, &logical_cpus_mask); 646182902Skmacy 647182902Skmacy /* Determine if we are a hyperthread. */ 648182902Skmacy if (hyperthreading_cpus > 1 && 649182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 650223758Sattilio CPU_SET(cpuid, &hyperthreading_cpus_mask); 651182902Skmacy#if 0 652182902Skmacy if (bootverbose) 653182902Skmacy lapic_dump("AP"); 654182902Skmacy#endif 655182902Skmacy if (smp_cpus == mp_ncpus) { 656182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 657182902Skmacy atomic_store_rel_int(&smp_started, 1); 658182902Skmacy } 659182902Skmacy 660182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 661182902Skmacy 662182902Skmacy /* wait until all the AP's are up */ 663182902Skmacy while (smp_started == 0) 664182902Skmacy ia32_pause(); 665182902Skmacy 666183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 667221835Smav 668221835Smav /* Start per-CPU event timers. */ 669221835Smav cpu_initclocks_ap(); 670221835Smav 671182902Skmacy /* enter the scheduler */ 672182902Skmacy sched_throw(NULL); 673182902Skmacy 674182902Skmacy panic("scheduler returned us to %s", __func__); 675182902Skmacy /* NOTREACHED */ 676182902Skmacy} 677182902Skmacy 678182902Skmacy/******************************************************************* 679182902Skmacy * local functions and data 680182902Skmacy */ 681182902Skmacy 682182902Skmacy/* 683182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 684182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 685182902Skmacy * can simply not tell the I/O APIC code about them in this function. 686182902Skmacy * We also do not tell it about the BSP since it tells itself about 687182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 688182902Skmacy */ 689182902Skmacystatic void 690182902Skmacyset_interrupt_apic_ids(void) 691182902Skmacy{ 692182902Skmacy u_int i, apic_id; 693182902Skmacy 694182902Skmacy for (i = 0; i < MAXCPU; i++) { 695182902Skmacy apic_id = cpu_apic_ids[i]; 696182902Skmacy if (apic_id == -1) 697182902Skmacy continue; 698182902Skmacy if (cpu_info[apic_id].cpu_bsp) 699182902Skmacy continue; 700182902Skmacy if (cpu_info[apic_id].cpu_disabled) 701182902Skmacy continue; 702182902Skmacy 703182902Skmacy /* Don't let hyperthreads service interrupts. */ 704182902Skmacy if (hyperthreading_cpus > 1 && 705182902Skmacy apic_id % hyperthreading_cpus != 0) 706182902Skmacy continue; 707182902Skmacy 708182902Skmacy intr_add_cpu(i); 709182902Skmacy } 710182902Skmacy} 711182902Skmacy 712182902Skmacy/* 713182902Skmacy * Assign logical CPU IDs to local APICs. 714182902Skmacy */ 715182902Skmacystatic void 716182902Skmacyassign_cpu_ids(void) 717182902Skmacy{ 718182902Skmacy u_int i; 719182902Skmacy 720182902Skmacy /* Check for explicitly disabled CPUs. */ 721182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 722182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 723182902Skmacy continue; 724182902Skmacy 725182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 726182902Skmacy if (resource_disabled("lapic", i)) { 727182902Skmacy cpu_info[i].cpu_disabled = 1; 728182902Skmacy continue; 729182902Skmacy } 730182902Skmacy } 731182902Skmacy 732182902Skmacy /* 733182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 734182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 735182902Skmacy * so we only have to assign IDs for APs. 736182902Skmacy */ 737182902Skmacy mp_ncpus = 1; 738182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 739182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 740182902Skmacy cpu_info[i].cpu_disabled) 741182902Skmacy continue; 742182902Skmacy 743182902Skmacy if (mp_ncpus < MAXCPU) { 744182902Skmacy cpu_apic_ids[mp_ncpus] = i; 745187966Sbz apic_cpuids[i] = mp_ncpus; 746182902Skmacy mp_ncpus++; 747182902Skmacy } else 748182902Skmacy cpu_info[i].cpu_disabled = 1; 749182902Skmacy } 750182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 751182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 752182902Skmacy mp_ncpus)); 753182902Skmacy} 754182902Skmacy 755182902Skmacy/* 756182902Skmacy * start each AP in our list 757182902Skmacy */ 758182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 759182902Skmacy#define TMPMAP_START 1 760182902Skmacyint 761182902Skmacystart_all_aps(void) 762182902Skmacy{ 763182902Skmacy int x,apic_id, cpu; 764182902Skmacy struct pcpu *pc; 765182902Skmacy 766182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 767182902Skmacy 768182902Skmacy /* set up temporary P==V mapping for AP boot */ 769182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 770182902Skmacy 771182902Skmacy /* start each AP */ 772182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 773182902Skmacy apic_id = cpu_apic_ids[cpu]; 774182902Skmacy 775182902Skmacy 776182902Skmacy bootAP = cpu; 777182902Skmacy bootAPgdt = gdt + (512*cpu); 778182902Skmacy 779182902Skmacy /* Get per-cpu data */ 780182902Skmacy pc = &__pcpu[bootAP]; 781183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 782254025Sjeff dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 783254025Sjeff M_WAITOK | M_ZERO), bootAP); 784182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 785256073Sgibbs pc->pc_vcpu_id = cpu_apic_ids[bootAP]; 786182902Skmacy pc->pc_prvspace = pc; 787182902Skmacy pc->pc_curthread = 0; 788182902Skmacy 789182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 790182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 791182902Skmacy 792215587Scperciva PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW); 793182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 794182902Skmacy for (x = 0; x < NGDT; x++) 795182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 796182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 797183345Skmacy#ifdef notyet 798183345Skmacy 799183345Skmacy if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 800183345Skmacy apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 801183345Skmacy acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 802183345Skmacy#ifdef CONFIG_ACPI 803183345Skmacy if (acpiid != 0xff) 804183345Skmacy x86_acpiid_to_apicid[acpiid] = apicid; 805183345Skmacy#endif 806183345Skmacy } 807183345Skmacy#endif 808183345Skmacy 809182902Skmacy /* attempt to start the Application Processor */ 810182902Skmacy if (!start_ap(cpu)) { 811182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 812182902Skmacy /* better panic as the AP may be running loose */ 813182902Skmacy printf("panic y/n? [y] "); 814182902Skmacy if (cngetc() != 'n') 815182902Skmacy panic("bye-bye"); 816182902Skmacy } 817182902Skmacy 818222813Sattilio CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 819182902Skmacy } 820182902Skmacy 821182902Skmacy 822182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 823182902Skmacy 824182902Skmacy /* number of APs actually started */ 825255158Sgibbs return (mp_naps); 826182902Skmacy} 827182902Skmacy 828182902Skmacyextern uint8_t *pcpu_boot_stack; 829182902Skmacyextern trap_info_t trap_table[]; 830182902Skmacy 831182902Skmacystatic void 832182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 833182902Skmacy{ 834182902Skmacy const trap_info_t *t = trap_table; 835182902Skmacy 836182902Skmacy for (t = trap_table; t->address; t++) { 837182902Skmacy trap_ctxt[t->vector].flags = t->flags; 838182902Skmacy trap_ctxt[t->vector].cs = t->cs; 839182902Skmacy trap_ctxt[t->vector].address = t->address; 840182902Skmacy } 841182902Skmacy} 842182902Skmacy 843241498Salcextern struct rwlock pvh_global_lock; 844182902Skmacyextern int nkpt; 845184112Skmacystatic void 846182902Skmacycpu_initialize_context(unsigned int cpu) 847182902Skmacy{ 848182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 849182902Skmacy * Hence we allocate statically and protect it with a lock */ 850228747Salc vm_page_t m[NPGPTD + 2]; 851182902Skmacy static vcpu_guest_context_t ctxt; 852182902Skmacy vm_offset_t boot_stack; 853183131Skmacy vm_offset_t newPTD; 854183131Skmacy vm_paddr_t ma[NPGPTD]; 855182902Skmacy int i; 856182902Skmacy 857182902Skmacy /* 858183131Skmacy * Page 0,[0-3] PTD 859183131Skmacy * Page 1, [4] boot stack 860183131Skmacy * Page [5] PDPT 861182902Skmacy * 862182902Skmacy */ 863183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 864228522Salc m[i] = vm_page_alloc(NULL, 0, 865182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 866182902Skmacy VM_ALLOC_ZERO); 867182902Skmacy 868182902Skmacy pmap_zero_page(m[i]); 869182902Skmacy 870182902Skmacy } 871254025Sjeff boot_stack = kva_alloc(PAGE_SIZE); 872254025Sjeff newPTD = kva_alloc(NPGPTD * PAGE_SIZE); 873215587Scperciva ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; 874182902Skmacy 875183131Skmacy#ifdef PAE 876183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 877183131Skmacy for (i = 0; i < NPGPTD; i++) { 878183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 879215587Scperciva ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; 880182902Skmacy } 881183131Skmacy#endif 882182902Skmacy 883182902Skmacy /* 884182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 885182902Skmacy * kernel mappings 886182902Skmacy */ 887183131Skmacy pmap_qenter(newPTD, m, 4); 888183131Skmacy 889183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 890183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 891182902Skmacy nkpt*sizeof(vm_paddr_t)); 892183131Skmacy 893183131Skmacy pmap_qremove(newPTD, 4); 894254025Sjeff kva_free(newPTD, 4 * PAGE_SIZE); 895182902Skmacy /* 896182902Skmacy * map actual idle stack to boot_stack 897182902Skmacy */ 898183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 899182902Skmacy 900182902Skmacy 901215587Scperciva xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); 902241498Salc rw_wlock(&pvh_global_lock); 903182902Skmacy for (i = 0; i < 4; i++) { 904183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 905183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 906183131Skmacy 907182902Skmacy xen_queue_pt_update((vm_paddr_t) 908183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 909182902Skmacy ma[i]); 910182902Skmacy } 911182902Skmacy PT_UPDATES_FLUSH(); 912241498Salc rw_wunlock(&pvh_global_lock); 913182902Skmacy 914182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 915182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 916182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 917182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 918182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 919182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 920182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 921182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 922182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 923182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 924182902Skmacy 925182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 926182902Skmacy 927182902Skmacy smp_trap_init(ctxt.trap_ctxt); 928182902Skmacy 929182902Skmacy ctxt.ldt_ents = 0; 930255158Sgibbs ctxt.gdt_frames[0] = 931255158Sgibbs (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 932182902Skmacy ctxt.gdt_ents = 512; 933182902Skmacy 934182902Skmacy#ifdef __i386__ 935182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 936182902Skmacy 937182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 938182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 939182902Skmacy 940182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 941182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 942182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 943182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 944182902Skmacy 945215587Scperciva ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); 946182902Skmacy#else /* __x86_64__ */ 947182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 948182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 949182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 950182902Skmacy 951182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 952182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 953182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 954182902Skmacy 955182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 956182902Skmacy 957182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 958182902Skmacy#endif 959182902Skmacy 960182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 961182902Skmacy ctxt.gdt_frames[0], 962182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 963182902Skmacy 964182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 965182902Skmacy DELAY(3000); 966182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 967182902Skmacy} 968182902Skmacy 969182902Skmacy/* 970182902Skmacy * This function starts the AP (application processor) identified 971182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 972182902Skmacy * to accomplish this. This is necessary because of the nuances 973182902Skmacy * of the different hardware we might encounter. It isn't pretty, 974182902Skmacy * but it seems to work. 975182902Skmacy */ 976183131Skmacy 977183131Skmacyint cpus; 978182902Skmacystatic int 979182902Skmacystart_ap(int apic_id) 980182902Skmacy{ 981182902Skmacy int ms; 982182902Skmacy 983182902Skmacy /* used as a watchpoint to signal AP startup */ 984182902Skmacy cpus = mp_naps; 985182902Skmacy 986182902Skmacy cpu_initialize_context(apic_id); 987182902Skmacy 988182902Skmacy /* Wait up to 5 seconds for it to start. */ 989182902Skmacy for (ms = 0; ms < 5000; ms++) { 990182902Skmacy if (mp_naps > cpus) 991255158Sgibbs return (1); /* return SUCCESS */ 992182902Skmacy DELAY(1000); 993182902Skmacy } 994255158Sgibbs return (0); /* return FAILURE */ 995182902Skmacy} 996182902Skmacy 997255040Sgibbsstatic void 998255040Sgibbsipi_pcpu(int cpu, u_int ipi) 999255040Sgibbs{ 1000255331Sgibbs KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI")); 1001255331Sgibbs xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi])); 1002255040Sgibbs} 1003255040Sgibbs 1004182902Skmacy/* 1005222065Sattilio * send an IPI to a specific CPU. 1006222065Sattilio */ 1007222065Sattiliostatic void 1008222065Sattilioipi_send_cpu(int cpu, u_int ipi) 1009222065Sattilio{ 1010222065Sattilio u_int bitmap, old_pending, new_pending; 1011222065Sattilio 1012222065Sattilio if (IPI_IS_BITMAPED(ipi)) { 1013222065Sattilio bitmap = 1 << ipi; 1014222065Sattilio ipi = IPI_BITMAP_VECTOR; 1015222065Sattilio do { 1016222065Sattilio old_pending = cpu_ipi_pending[cpu]; 1017222065Sattilio new_pending = old_pending | bitmap; 1018222065Sattilio } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], 1019222065Sattilio old_pending, new_pending)); 1020222065Sattilio if (!old_pending) 1021222065Sattilio ipi_pcpu(cpu, RESCHEDULE_VECTOR); 1022222065Sattilio } else { 1023222065Sattilio KASSERT(call_data != NULL, ("call_data not set")); 1024222065Sattilio ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); 1025222065Sattilio } 1026222065Sattilio} 1027222065Sattilio 1028222065Sattilio/* 1029182902Skmacy * Flush the TLB on all other CPU's 1030182902Skmacy */ 1031182902Skmacystatic void 1032182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1033182902Skmacy{ 1034182902Skmacy u_int ncpu; 1035184198Skmacy struct _call_data data; 1036182902Skmacy 1037182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 1038182902Skmacy if (ncpu < 1) 1039182902Skmacy return; /* no other cpus */ 1040182902Skmacy if (!(read_eflags() & PSL_I)) 1041182902Skmacy panic("%s: interrupts disabled", __func__); 1042182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 1043193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1044193098Sadrian call_data = &data; 1045184224Skmacy call_data->func_id = vector; 1046184112Skmacy call_data->arg1 = addr1; 1047184112Skmacy call_data->arg2 = addr2; 1048182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1049182902Skmacy ipi_all_but_self(vector); 1050182902Skmacy while (smp_tlb_wait < ncpu) 1051182902Skmacy ia32_pause(); 1052184224Skmacy call_data = NULL; 1053182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1054182902Skmacy} 1055182902Skmacy 1056182902Skmacystatic void 1057255158Sgibbssmp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, 1058255158Sgibbs vm_offset_t addr2) 1059182902Skmacy{ 1060222813Sattilio int cpu, ncpu, othercpus; 1061184224Skmacy struct _call_data data; 1062182902Skmacy 1063182902Skmacy othercpus = mp_ncpus - 1; 1064222813Sattilio if (CPU_ISFULLSET(&mask)) { 1065222813Sattilio if (othercpus < 1) 1066182902Skmacy return; 1067182902Skmacy } else { 1068223758Sattilio CPU_CLR(PCPU_GET(cpuid), &mask); 1069222813Sattilio if (CPU_EMPTY(&mask)) 1070182902Skmacy return; 1071182902Skmacy } 1072182902Skmacy if (!(read_eflags() & PSL_I)) 1073182902Skmacy panic("%s: interrupts disabled", __func__); 1074182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 1075193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1076184224Skmacy call_data = &data; 1077184224Skmacy call_data->func_id = vector; 1078184224Skmacy call_data->arg1 = addr1; 1079184224Skmacy call_data->arg2 = addr2; 1080182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1081222813Sattilio if (CPU_ISFULLSET(&mask)) { 1082222813Sattilio ncpu = othercpus; 1083182902Skmacy ipi_all_but_self(vector); 1084222813Sattilio } else { 1085222813Sattilio ncpu = 0; 1086251703Sjeff while ((cpu = CPU_FFS(&mask)) != 0) { 1087222813Sattilio cpu--; 1088222813Sattilio CPU_CLR(cpu, &mask); 1089222813Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, 1090222813Sattilio vector); 1091222813Sattilio ipi_send_cpu(cpu, vector); 1092222813Sattilio ncpu++; 1093222813Sattilio } 1094222813Sattilio } 1095182902Skmacy while (smp_tlb_wait < ncpu) 1096182902Skmacy ia32_pause(); 1097184224Skmacy call_data = NULL; 1098182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1099182902Skmacy} 1100182902Skmacy 1101182902Skmacyvoid 1102182902Skmacysmp_cache_flush(void) 1103182902Skmacy{ 1104182902Skmacy 1105182902Skmacy if (smp_started) 1106182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1107182902Skmacy} 1108182902Skmacy 1109182902Skmacyvoid 1110182902Skmacysmp_invltlb(void) 1111182902Skmacy{ 1112182902Skmacy 1113182902Skmacy if (smp_started) { 1114182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1115182902Skmacy } 1116182902Skmacy} 1117182902Skmacy 1118182902Skmacyvoid 1119182902Skmacysmp_invlpg(vm_offset_t addr) 1120182902Skmacy{ 1121182902Skmacy 1122182902Skmacy if (smp_started) { 1123182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1124182902Skmacy } 1125182902Skmacy} 1126182902Skmacy 1127182902Skmacyvoid 1128182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1129182902Skmacy{ 1130182902Skmacy 1131182902Skmacy if (smp_started) { 1132182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1133182902Skmacy } 1134182902Skmacy} 1135182902Skmacy 1136182902Skmacyvoid 1137222813Sattiliosmp_masked_invltlb(cpuset_t mask) 1138182902Skmacy{ 1139182902Skmacy 1140182902Skmacy if (smp_started) { 1141182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1142182902Skmacy } 1143182902Skmacy} 1144182902Skmacy 1145182902Skmacyvoid 1146222813Sattiliosmp_masked_invlpg(cpuset_t mask, vm_offset_t addr) 1147182902Skmacy{ 1148182902Skmacy 1149182902Skmacy if (smp_started) { 1150182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1151182902Skmacy } 1152182902Skmacy} 1153182902Skmacy 1154182902Skmacyvoid 1155222813Sattiliosmp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) 1156182902Skmacy{ 1157182902Skmacy 1158182902Skmacy if (smp_started) { 1159182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1160182902Skmacy } 1161182902Skmacy} 1162182902Skmacy 1163182902Skmacy/* 1164182902Skmacy * send an IPI to a set of cpus. 1165182902Skmacy */ 1166182902Skmacyvoid 1167222813Sattilioipi_selected(cpuset_t cpus, u_int ipi) 1168182902Skmacy{ 1169182902Skmacy int cpu; 1170182902Skmacy 1171196256Sattilio /* 1172196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1173196256Sattilio * of help in order to understand what is the source. 1174196256Sattilio * Set the mask of receiving CPUs for this purpose. 1175196256Sattilio */ 1176196256Sattilio if (ipi == IPI_STOP_HARD) 1177222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); 1178196256Sattilio 1179251703Sjeff while ((cpu = CPU_FFS(&cpus)) != 0) { 1180182902Skmacy cpu--; 1181222813Sattilio CPU_CLR(cpu, &cpus); 1182222065Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1183222065Sattilio ipi_send_cpu(cpu, ipi); 1184182902Skmacy } 1185182902Skmacy} 1186182902Skmacy 1187182902Skmacy/* 1188210939Sjhb * send an IPI to a specific CPU. 1189210939Sjhb */ 1190210939Sjhbvoid 1191210939Sjhbipi_cpu(int cpu, u_int ipi) 1192210939Sjhb{ 1193210939Sjhb 1194210939Sjhb /* 1195210939Sjhb * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1196210939Sjhb * of help in order to understand what is the source. 1197210939Sjhb * Set the mask of receiving CPUs for this purpose. 1198210939Sjhb */ 1199210939Sjhb if (ipi == IPI_STOP_HARD) 1200222813Sattilio CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); 1201210939Sjhb 1202210939Sjhb CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1203222065Sattilio ipi_send_cpu(cpu, ipi); 1204210939Sjhb} 1205210939Sjhb 1206210939Sjhb/* 1207182902Skmacy * send an IPI to all CPUs EXCEPT myself 1208182902Skmacy */ 1209182902Skmacyvoid 1210182902Skmacyipi_all_but_self(u_int ipi) 1211182902Skmacy{ 1212222813Sattilio cpuset_t other_cpus; 1213196256Sattilio 1214196256Sattilio /* 1215196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1216196256Sattilio * of help in order to understand what is the source. 1217196256Sattilio * Set the mask of receiving CPUs for this purpose. 1218196256Sattilio */ 1219223758Sattilio other_cpus = all_cpus; 1220223758Sattilio CPU_CLR(PCPU_GET(cpuid), &other_cpus); 1221196256Sattilio if (ipi == IPI_STOP_HARD) 1222222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); 1223196256Sattilio 1224182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1225222813Sattilio ipi_selected(other_cpus, ipi); 1226182902Skmacy} 1227182902Skmacy 1228196256Sattilioint 1229196256Sattilioipi_nmi_handler() 1230196256Sattilio{ 1231223758Sattilio u_int cpuid; 1232196256Sattilio 1233196256Sattilio /* 1234196256Sattilio * As long as there is not a simple way to know about a NMI's 1235196256Sattilio * source, if the bitmask for the current CPU is present in 1236196256Sattilio * the global pending bitword an IPI_STOP_HARD has been issued 1237196256Sattilio * and should be handled. 1238196256Sattilio */ 1239223758Sattilio cpuid = PCPU_GET(cpuid); 1240223758Sattilio if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) 1241196256Sattilio return (1); 1242196256Sattilio 1243223758Sattilio CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); 1244196256Sattilio cpustop_handler(); 1245196256Sattilio return (0); 1246196256Sattilio} 1247196256Sattilio 1248182902Skmacy/* 1249182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1250182902Skmacy * are resumed. 1251182902Skmacy */ 1252182902Skmacyvoid 1253182902Skmacycpustop_handler(void) 1254182902Skmacy{ 1255222813Sattilio int cpu; 1256182902Skmacy 1257222813Sattilio cpu = PCPU_GET(cpuid); 1258222813Sattilio 1259182902Skmacy savectx(&stoppcbs[cpu]); 1260182902Skmacy 1261182902Skmacy /* Indicate that we are stopped */ 1262223758Sattilio CPU_SET_ATOMIC(cpu, &stopped_cpus); 1263182902Skmacy 1264182902Skmacy /* Wait for restart */ 1265223758Sattilio while (!CPU_ISSET(cpu, &started_cpus)) 1266182902Skmacy ia32_pause(); 1267182902Skmacy 1268223758Sattilio CPU_CLR_ATOMIC(cpu, &started_cpus); 1269223758Sattilio CPU_CLR_ATOMIC(cpu, &stopped_cpus); 1270182902Skmacy 1271182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1272182902Skmacy cpustop_restartfunc(); 1273182902Skmacy cpustop_restartfunc = NULL; 1274182902Skmacy } 1275182902Skmacy} 1276182902Skmacy 1277182902Skmacy/* 1278264118Sroyger * Handlers for TLB related IPIs 1279264118Sroyger * 1280264118Sroyger * On i386 Xen PV this are no-ops since this port doesn't support SMP. 1281264118Sroyger */ 1282264118Sroygervoid 1283264118Sroygerinvltlb_handler(void) 1284264118Sroyger{ 1285264118Sroyger} 1286264118Sroyger 1287264118Sroygervoid 1288264118Sroygerinvlpg_handler(void) 1289264118Sroyger{ 1290264118Sroyger} 1291264118Sroyger 1292264118Sroygervoid 1293264118Sroygerinvlrng_handler(void) 1294264118Sroyger{ 1295264118Sroyger} 1296264118Sroyger 1297264118Sroygervoid 1298264118Sroygerinvlcache_handler(void) 1299264118Sroyger{ 1300264118Sroyger} 1301264118Sroyger 1302264118Sroyger/* 1303182902Skmacy * This is called once the rest of the system is up and running and we're 1304182902Skmacy * ready to let the AP's out of the pen. 1305182902Skmacy */ 1306182902Skmacystatic void 1307182902Skmacyrelease_aps(void *dummy __unused) 1308182902Skmacy{ 1309182902Skmacy 1310182902Skmacy if (mp_ncpus == 1) 1311182902Skmacy return; 1312182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1313182902Skmacy while (smp_started == 0) 1314182902Skmacy ia32_pause(); 1315182902Skmacy} 1316182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1317255040SgibbsSYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); 1318255040SgibbsSYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL); 1319