mp_machdep.c revision 276076
139287Ssos/*- 239287Ssos * Copyright (c) 1996, by Steve Passe 339287Ssos * Copyright (c) 2008, by Kip Macy 439287Ssos * All rights reserved. 539287Ssos * 639287Ssos * Redistribution and use in source and binary forms, with or without 739287Ssos * modification, are permitted provided that the following conditions 839287Ssos * are met: 939287Ssos * 1. Redistributions of source code must retain the above copyright 1039287Ssos * notice, this list of conditions and the following disclaimer. 1139287Ssos * 2. The name of the developer may NOT be used to endorse or promote products 1239287Ssos * derived from this software without specific prior written permission. 1339287Ssos * 1439287Ssos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1539287Ssos * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1639287Ssos * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1739287Ssos * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1839287Ssos * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1939287Ssos * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2039287Ssos * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2139287Ssos * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2239287Ssos * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2339287Ssos * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2439287Ssos * SUCH DAMAGE. 2539287Ssos */ 2639287Ssos 2739287Ssos#include <sys/cdefs.h> 2839287Ssos__FBSDID("$FreeBSD: stable/10/sys/i386/xen/mp_machdep.c 276076 2014-12-22 19:53:55Z jhb $"); 2939287Ssos 3039287Ssos#include "opt_apic.h" 3139287Ssos#include "opt_cpu.h" 3239287Ssos#include "opt_kstack_pages.h" 3339287Ssos#include "opt_mp_watchdog.h" 3439287Ssos#include "opt_pmap.h" 3539287Ssos#include "opt_sched.h" 3639287Ssos#include "opt_smp.h" 3739287Ssos 3839287Ssos#if !defined(lint) 3939287Ssos#if !defined(SMP) 4039287Ssos#error How did you get here? 4139287Ssos#endif 4239287Ssos 4339287Ssos#ifndef DEV_APIC 4439287Ssos#error The apic device is required for SMP, add "device apic" to your config file. 4539287Ssos#endif 4639287Ssos#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 4739287Ssos#error SMP not supported with CPU_DISABLE_CMPXCHG 4839287Ssos#endif 4939287Ssos#endif /* not lint */ 5039287Ssos 5139287Ssos#include <sys/param.h> 5239287Ssos#include <sys/systm.h> 5339287Ssos#include <sys/bus.h> 5439287Ssos#include <sys/cons.h> /* cngetc() */ 5539287Ssos#include <sys/cpuset.h> 5639287Ssos#ifdef GPROF 5739287Ssos#include <sys/gmon.h> 5839287Ssos#endif 5939287Ssos#include <sys/kernel.h> 6039287Ssos#include <sys/ktr.h> 6139287Ssos#include <sys/lock.h> 6239287Ssos#include <sys/malloc.h> 6339287Ssos#include <sys/memrange.h> 6439287Ssos#include <sys/mutex.h> 6539287Ssos#include <sys/pcpu.h> 6639287Ssos#include <sys/proc.h> 6739287Ssos#include <sys/rwlock.h> 6839287Ssos#include <sys/sched.h> 6939287Ssos#include <sys/smp.h> 7039287Ssos#include <sys/sysctl.h> 7139287Ssos 7239287Ssos#include <vm/vm.h> 7339287Ssos#include <vm/vm_param.h> 7439287Ssos#include <vm/pmap.h> 7539287Ssos#include <vm/vm_kern.h> 7639287Ssos#include <vm/vm_extern.h> 7739287Ssos#include <vm/vm_page.h> 7839287Ssos 7939287Ssos#include <x86/apicreg.h> 8039287Ssos#include <machine/md_var.h> 8139287Ssos#include <machine/mp_watchdog.h> 8239287Ssos#include <machine/pcb.h> 8339287Ssos#include <machine/psl.h> 8439287Ssos#include <machine/smp.h> 8539287Ssos#include <machine/specialreg.h> 8639287Ssos#include <machine/pcpu.h> 8739287Ssos 8839287Ssos#include <xen/xen-os.h> 8939287Ssos#include <xen/evtchn.h> 9039287Ssos#include <xen/xen_intr.h> 9139287Ssos#include <xen/hypervisor.h> 9239287Ssos#include <xen/interface/vcpu.h> 9339287Ssos 9439287Ssos/*---------------------------- Extern Declarations ---------------------------*/ 9539287Ssosextern struct pcpu __pcpu[]; 9639287Ssos 9739287Ssosextern void Xhypervisor_callback(void); 9839287Ssosextern void failsafe_callback(void); 9939287Ssosextern void pmap_lazyfix_action(void); 10039287Ssos 10139287Ssos/*--------------------------- Forward Declarations ---------------------------*/ 10239287Ssosstatic driver_filter_t smp_reschedule_interrupt; 10339287Ssosstatic driver_filter_t smp_call_function_interrupt; 10439287Ssosstatic void assign_cpu_ids(void); 10539287Ssosstatic void set_interrupt_apic_ids(void); 10639287Ssosstatic int start_all_aps(void); 10739287Ssosstatic int start_ap(int apic_id); 10839287Ssosstatic void release_aps(void *dummy); 10939287Ssos 11039287Ssos/*---------------------------------- Macros ----------------------------------*/ 11139287Ssos#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) 11239287Ssos 11339287Ssos/*-------------------------------- Local Types -------------------------------*/ 11439287Ssostypedef void call_data_func_t(uintptr_t , uintptr_t); 11539287Ssos 11639287Ssosstruct cpu_info { 11739287Ssos int cpu_present:1; 11839287Ssos int cpu_bsp:1; 11939287Ssos int cpu_disabled:1; 12039287Ssos}; 12139287Ssos 12239287Ssosstruct xen_ipi_handler 12339287Ssos{ 12439287Ssos driver_filter_t *filter; 12539287Ssos const char *description; 12639287Ssos}; 12739287Ssos 12839287Ssosenum { 12939287Ssos RESCHEDULE_VECTOR, 13039287Ssos CALL_FUNCTION_VECTOR, 13139287Ssos}; 13239287Ssos 13339287Ssos/*-------------------------------- Global Data -------------------------------*/ 13439287Ssosstatic u_int hyperthreading_cpus; 13539287Ssosstatic cpuset_t hyperthreading_cpus_mask; 13639287Ssos 13739287Ssosint mp_naps; /* # of Applications processors */ 13839287Ssosint boot_cpu_id = -1; /* designated BSP */ 13939287Ssos 14039287Ssosstatic int bootAP; 14139287Ssosstatic union descriptor *bootAPgdt; 14239287Ssos 14339287Ssos/* Free these after use */ 14439287Ssosvoid *bootstacks[MAXCPU]; 14539287Ssos 14639287Ssosstruct pcb stoppcbs[MAXCPU]; 14739287Ssos 14839287Ssos/* Variables needed for SMP tlb shootdown. */ 14939287Ssosvm_offset_t smp_tlb_addr1; 15039287Ssosvm_offset_t smp_tlb_addr2; 15139287Ssosvolatile int smp_tlb_wait; 15239287Ssos 15339287Ssosstatic u_int logical_cpus; 15439287Ssosstatic volatile cpuset_t ipi_nmi_pending; 15539287Ssos 15639287Ssos/* used to hold the AP's until we are ready to release them */ 15739287Ssosstatic struct mtx ap_boot_mtx; 15839287Ssos 15939287Ssos/* Set to 1 once we're ready to let the APs out of the pen. */ 16039287Ssosstatic volatile int aps_ready = 0; 16139287Ssos 16239287Ssos/* 16339287Ssos * Store data from cpu_add() until later in the boot when we actually setup 16439287Ssos * the APs. 16539287Ssos */ 16639287Ssosstatic struct cpu_info cpu_info[MAX_APIC_ID + 1]; 16739287Ssosint cpu_apic_ids[MAXCPU]; 16839287Ssosint apic_cpuids[MAX_APIC_ID + 1]; 16939287Ssos 17039287Ssos/* Holds pending bitmap based IPIs per CPU */ 17139287Ssosstatic volatile u_int cpu_ipi_pending[MAXCPU]; 17239287Ssos 17339287Ssosstatic int cpu_logical; 17439287Ssosstatic int cpu_cores; 17539287Ssos 17639287Ssosstatic const struct xen_ipi_handler xen_ipis[] = 17739287Ssos{ 17839287Ssos [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" }, 17939287Ssos [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" } 18039287Ssos}; 18139287Ssos 18239287Ssos/*------------------------------- Per-CPU Data -------------------------------*/ 18339287SsosDPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); 18439287SsosDPCPU_DEFINE(struct vcpu_info *, vcpu_info); 18539287Ssos 18639287Ssos/*------------------------------ Implementation ------------------------------*/ 18739287Ssosstruct cpu_group * 18839287Ssoscpu_topo(void) 18939287Ssos{ 19039287Ssos if (cpu_cores == 0) 19139287Ssos cpu_cores = 1; 19239287Ssos if (cpu_logical == 0) 19339287Ssos cpu_logical = 1; 19439287Ssos if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 19539287Ssos printf("WARNING: Non-uniform processors.\n"); 19639287Ssos printf("WARNING: Using suboptimal topology.\n"); 19739287Ssos return (smp_topo_none()); 19839287Ssos } 19939287Ssos /* 20039287Ssos * No multi-core or hyper-threaded. 20139287Ssos */ 20239287Ssos if (cpu_logical * cpu_cores == 1) 20339287Ssos return (smp_topo_none()); 20439287Ssos /* 20539287Ssos * Only HTT no multi-core. 20639287Ssos */ 20739287Ssos if (cpu_logical > 1 && cpu_cores == 1) 20839287Ssos return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 20939287Ssos /* 21039287Ssos * Only multi-core no HTT. 21139287Ssos */ 21239287Ssos if (cpu_cores > 1 && cpu_logical == 1) 21339287Ssos return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 21439287Ssos /* 21539287Ssos * Both HTT and multi-core. 21639287Ssos */ 21739287Ssos return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 21839287Ssos CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 21939287Ssos} 22039287Ssos 22139287Ssos/* 22239287Ssos * Calculate usable address in base memory for AP trampoline code. 22339287Ssos */ 22439287Ssosu_int 22539287Ssosmp_bootaddress(u_int basemem) 22639287Ssos{ 22739287Ssos 22839287Ssos return (basemem); 22939287Ssos} 23039287Ssos 23139287Ssosvoid 23239287Ssoscpu_add(u_int apic_id, char boot_cpu) 23339287Ssos{ 23439287Ssos 23539287Ssos if (apic_id > MAX_APIC_ID) { 23639287Ssos panic("SMP: APIC ID %d too high", apic_id); 23739287Ssos return; 23839287Ssos } 23939287Ssos KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 24039287Ssos apic_id)); 24139287Ssos cpu_info[apic_id].cpu_present = 1; 24239287Ssos if (boot_cpu) { 24339287Ssos KASSERT(boot_cpu_id == -1, 24439287Ssos ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 24539287Ssos boot_cpu_id)); 24639287Ssos boot_cpu_id = apic_id; 24739287Ssos cpu_info[apic_id].cpu_bsp = 1; 24839287Ssos } 24939287Ssos if (mp_ncpus < MAXCPU) 25039287Ssos mp_ncpus++; 25139287Ssos if (bootverbose) 25239287Ssos printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 25339287Ssos "AP"); 25439287Ssos} 25539287Ssos 25639287Ssosvoid 25739287Ssoscpu_mp_setmaxid(void) 25839287Ssos{ 25939287Ssos 26039287Ssos mp_maxid = MAXCPU - 1; 26139287Ssos} 26239287Ssos 26339287Ssosint 26439287Ssoscpu_mp_probe(void) 26539287Ssos{ 26639287Ssos 26739287Ssos /* 26839287Ssos * Always record BSP in CPU map so that the mbuf init code works 26939287Ssos * correctly. 27039287Ssos */ 27139287Ssos CPU_SETOF(0, &all_cpus); 27239287Ssos if (mp_ncpus == 0) { 27339287Ssos /* 27439287Ssos * No CPUs were found, so this must be a UP system. Setup 27539287Ssos * the variables to represent a system with a single CPU 27639287Ssos * with an id of 0. 27739287Ssos */ 27839287Ssos mp_ncpus = 1; 27939287Ssos return (0); 28039287Ssos } 28139287Ssos 28239287Ssos /* At least one CPU was found. */ 28339287Ssos if (mp_ncpus == 1) { 28439287Ssos /* 28539287Ssos * One CPU was found, so this must be a UP system with 28639287Ssos * an I/O APIC. 28739287Ssos */ 28839287Ssos return (0); 28939287Ssos } 29039287Ssos 29139287Ssos /* At least two CPUs were found. */ 29239287Ssos return (1); 29339287Ssos} 29439287Ssos 29539287Ssos/* 29639287Ssos * Initialize the IPI handlers and start up the AP's. 29739287Ssos */ 29839287Ssosvoid 29939287Ssoscpu_mp_start(void) 30039287Ssos{ 30139287Ssos int i; 30239287Ssos 30339287Ssos /* Initialize the logical ID to APIC ID table. */ 30439287Ssos for (i = 0; i < MAXCPU; i++) { 30539287Ssos cpu_apic_ids[i] = -1; 30639287Ssos cpu_ipi_pending[i] = 0; 30739287Ssos } 30839287Ssos 30939287Ssos /* Set boot_cpu_id if needed. */ 31039287Ssos if (boot_cpu_id == -1) { 31139287Ssos boot_cpu_id = PCPU_GET(apic_id); 31239287Ssos cpu_info[boot_cpu_id].cpu_bsp = 1; 31339287Ssos } else 31439287Ssos KASSERT(boot_cpu_id == PCPU_GET(apic_id), 31539287Ssos ("BSP's APIC ID doesn't match boot_cpu_id")); 31639287Ssos cpu_apic_ids[0] = boot_cpu_id; 31739287Ssos apic_cpuids[boot_cpu_id] = 0; 31839287Ssos 31939287Ssos assign_cpu_ids(); 32039287Ssos 32139287Ssos /* Start each Application Processor */ 32239287Ssos start_all_aps(); 32339287Ssos 32439287Ssos /* Setup the initial logical CPUs info. */ 32539287Ssos logical_cpus = 0; 32639287Ssos CPU_ZERO(&logical_cpus_mask); 32739287Ssos if (cpu_feature & CPUID_HTT) 32839287Ssos logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 32939287Ssos 33039287Ssos set_interrupt_apic_ids(); 33139287Ssos} 33239287Ssos 33339287Ssos 33439287Ssosstatic void 33539287Ssosiv_rendezvous(uintptr_t a, uintptr_t b) 33639287Ssos{ 33739287Ssos smp_rendezvous_action(); 33839287Ssos} 33939287Ssos 34039287Ssosstatic void 34139287Ssosiv_invltlb(uintptr_t a, uintptr_t b) 34239287Ssos{ 34339287Ssos xen_tlb_flush(); 34439287Ssos} 34539287Ssos 34639287Ssosstatic void 34739287Ssosiv_invlpg(uintptr_t a, uintptr_t b) 34839287Ssos{ 34939287Ssos xen_invlpg(a); 35039287Ssos} 35139287Ssos 35239287Ssosstatic void 35339287Ssosiv_invlrng(uintptr_t a, uintptr_t b) 35439287Ssos{ 35539287Ssos vm_offset_t start = (vm_offset_t)a; 35639287Ssos vm_offset_t end = (vm_offset_t)b; 35739287Ssos 35839287Ssos while (start < end) { 35939287Ssos xen_invlpg(start); 36039287Ssos start += PAGE_SIZE; 36139287Ssos } 36239287Ssos} 36339287Ssos 36439287Ssos 36539287Ssosstatic void 36639287Ssosiv_invlcache(uintptr_t a, uintptr_t b) 36739287Ssos{ 36839287Ssos 36939287Ssos wbinvd(); 37039287Ssos atomic_add_int(&smp_tlb_wait, 1); 37139287Ssos} 37239287Ssos 37339287Ssosstatic void 37439287Ssosiv_lazypmap(uintptr_t a, uintptr_t b) 37539287Ssos{ 37639287Ssos pmap_lazyfix_action(); 37739287Ssos atomic_add_int(&smp_tlb_wait, 1); 37839287Ssos} 37939287Ssos 38039287Ssos/* 38139287Ssos * These start from "IPI offset" APIC_IPI_INTS 38239287Ssos */ 38339287Ssosstatic call_data_func_t *ipi_vectors[6] = 38439287Ssos{ 38539287Ssos iv_rendezvous, 38639287Ssos iv_invltlb, 38739287Ssos iv_invlpg, 38839287Ssos iv_invlrng, 38939287Ssos iv_invlcache, 39039287Ssos iv_lazypmap, 39139287Ssos}; 39239287Ssos 39339287Ssos/* 39439287Ssos * Reschedule call back. Nothing to do, 39539287Ssos * all the work is done automatically when 39639287Ssos * we return from the interrupt. 39739287Ssos */ 39839287Ssosstatic int 39939287Ssossmp_reschedule_interrupt(void *unused) 40039287Ssos{ 40139287Ssos int cpu = PCPU_GET(cpuid); 40239287Ssos u_int ipi_bitmap; 40339287Ssos 40439287Ssos ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 40539287Ssos 40639287Ssos if (ipi_bitmap & (1 << IPI_PREEMPT)) { 40739287Ssos#ifdef COUNT_IPIS 40839287Ssos (*ipi_preempt_counts[cpu])++; 40939287Ssos#endif 41039287Ssos sched_preempt(curthread); 41139287Ssos } 41239287Ssos 41339287Ssos if (ipi_bitmap & (1 << IPI_AST)) { 41439287Ssos#ifdef COUNT_IPIS 41539287Ssos (*ipi_ast_counts[cpu])++; 41639287Ssos#endif 41739287Ssos /* Nothing to do for AST */ 41839287Ssos } 41939287Ssos return (FILTER_HANDLED); 42039287Ssos} 42139287Ssos 42239287Ssosstruct _call_data { 42339287Ssos uint16_t func_id; 42439287Ssos uint16_t wait; 42539287Ssos uintptr_t arg1; 42639287Ssos uintptr_t arg2; 42739287Ssos atomic_t started; 42839287Ssos atomic_t finished; 42939287Ssos}; 43039287Ssos 43139287Ssosstatic struct _call_data *call_data; 43239287Ssos 43339287Ssosstatic int 43439287Ssossmp_call_function_interrupt(void *unused) 43539287Ssos{ 43639287Ssos call_data_func_t *func; 43739287Ssos uintptr_t arg1 = call_data->arg1; 43839287Ssos uintptr_t arg2 = call_data->arg2; 43939287Ssos int wait = call_data->wait; 44039287Ssos atomic_t *started = &call_data->started; 44139287Ssos atomic_t *finished = &call_data->finished; 44239287Ssos 44339287Ssos /* We only handle function IPIs, not bitmap IPIs */ 44439287Ssos if (call_data->func_id < APIC_IPI_INTS || 44539287Ssos call_data->func_id > IPI_BITMAP_VECTOR) 44639287Ssos panic("invalid function id %u", call_data->func_id); 44739287Ssos 44839287Ssos func = ipi_vectors[IPI_TO_IDX(call_data->func_id)]; 44939287Ssos /* 45039287Ssos * Notify initiating CPU that I've grabbed the data and am 45139287Ssos * about to execute the function 45239287Ssos */ 45339287Ssos mb(); 45439287Ssos atomic_inc(started); 45539287Ssos /* 45639287Ssos * At this point the info structure may be out of scope unless wait==1 45739287Ssos */ 45839287Ssos (*func)(arg1, arg2); 45939287Ssos 46039287Ssos if (wait) { 46139287Ssos mb(); 46239287Ssos atomic_inc(finished); 46339287Ssos } 46439287Ssos atomic_add_int(&smp_tlb_wait, 1); 46539287Ssos return (FILTER_HANDLED); 46639287Ssos} 46739287Ssos 46839287Ssos/* 46939287Ssos * Print various information about the SMP system hardware and setup. 47039287Ssos */ 47139287Ssosvoid 47239287Ssoscpu_mp_announce(void) 47339287Ssos{ 47439287Ssos int i, x; 47539287Ssos 47639287Ssos /* List CPUs */ 47739287Ssos printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 47839287Ssos for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 47939287Ssos if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 48039287Ssos continue; 48139287Ssos if (cpu_info[x].cpu_disabled) 48239287Ssos printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 48339287Ssos else { 48439287Ssos KASSERT(i < mp_ncpus, 48539287Ssos ("mp_ncpus and actual cpus are out of whack")); 48639287Ssos printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 48739287Ssos } 48839287Ssos } 48939287Ssos} 49039287Ssos 49139287Ssosstatic int 49239287Ssosxen_smp_cpu_init(unsigned int cpu) 49339287Ssos{ 49439287Ssos xen_intr_handle_t *ipi_handle; 49539287Ssos const struct xen_ipi_handler *ipi; 49639287Ssos int idx, rc; 49739287Ssos 49839287Ssos ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); 49939287Ssos for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { 50039287Ssos 50139287Ssos /* 50239287Ssos * The PCPU variable pc_device is not initialized on i386 PV, 50339287Ssos * so we have to use the root_bus device in order to setup 50439287Ssos * the IPIs. 50539287Ssos */ 50639287Ssos rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu, 50739287Ssos ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]); 50839287Ssos if (rc != 0) { 50939287Ssos printf("Unable to allocate a XEN IPI port. " 51039287Ssos "Error %d\n", rc); 51139287Ssos break; 51239287Ssos } 51339287Ssos xen_intr_describe(ipi_handle[idx], "%s", ipi->description); 51439287Ssos } 51539287Ssos 51639287Ssos for (;idx < nitems(xen_ipis); idx++) 51739287Ssos ipi_handle[idx] = NULL; 51839287Ssos 51939287Ssos if (rc == 0) 52039287Ssos return (0); 52139287Ssos 52239287Ssos /* Either all are successfully mapped, or none at all. */ 52339287Ssos for (idx = 0; idx < nitems(xen_ipis); idx++) { 52439287Ssos if (ipi_handle[idx] == NULL) 52539287Ssos continue; 52639287Ssos 52739287Ssos xen_intr_unbind(ipi_handle[idx]); 52839287Ssos ipi_handle[idx] = NULL; 52939287Ssos } 53039287Ssos 53139287Ssos return (rc); 53239287Ssos} 53339287Ssos 53439287Ssosstatic void 53539287Ssosxen_smp_intr_init_cpus(void *unused) 53639287Ssos{ 53739287Ssos int i; 53839287Ssos 53939287Ssos for (i = 0; i < mp_ncpus; i++) 54039287Ssos xen_smp_cpu_init(i); 54139287Ssos} 54239287Ssos 54339287Ssosstatic void 54439287Ssosxen_smp_intr_setup_cpus(void *unused) 54539287Ssos{ 54639287Ssos int i; 54739287Ssos 54839287Ssos for (i = 0; i < mp_ncpus; i++) 54939287Ssos DPCPU_ID_SET(i, vcpu_info, 55039287Ssos &HYPERVISOR_shared_info->vcpu_info[i]); 55139287Ssos} 55239287Ssos 55339287Ssos#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 55439287Ssos 55539287Ssos/* 55639287Ssos * AP CPU's call this to initialize themselves. 55739287Ssos */ 55839287Ssosvoid 55939287Ssosinit_secondary(void) 56039287Ssos{ 56139287Ssos vm_offset_t addr; 56239287Ssos u_int cpuid; 56339287Ssos int gsel_tss; 56439287Ssos 56539287Ssos 56639287Ssos /* bootAP is set in start_ap() to our ID. */ 56739287Ssos PCPU_SET(currentldt, _default_ldt); 56839287Ssos gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 56939287Ssos#if 0 57039287Ssos gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 57139287Ssos#endif 57239287Ssos PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 57339287Ssos PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 57439287Ssos PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 57539287Ssos#if 0 57639287Ssos PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 57739287Ssos 57839287Ssos PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 57939287Ssos#endif 58039287Ssos PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 58139287Ssos 58239287Ssos /* 58339287Ssos * Set to a known state: 58439287Ssos * Set by mpboot.s: CR0_PG, CR0_PE 58539287Ssos * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 58639287Ssos */ 58739287Ssos /* 58839287Ssos * signal our startup to the BSP. 58939287Ssos */ 59039287Ssos mp_naps++; 59139287Ssos 59239287Ssos /* Spin until the BSP releases the AP's. */ 59339287Ssos while (!aps_ready) 59439287Ssos ia32_pause(); 59539287Ssos 59639287Ssos /* BSP may have changed PTD while we were waiting */ 59739287Ssos invltlb(); 59839287Ssos for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 59939287Ssos invlpg(addr); 60039287Ssos 60139287Ssos#if 0 60239287Ssos /* set up SSE/NX */ 60339287Ssos initializecpu(); 60439287Ssos#endif 60539287Ssos 60639287Ssos /* set up FPU state on the AP */ 60739287Ssos npxinit(); 60839287Ssos#if 0 60939287Ssos /* A quick check from sanity claus */ 61039287Ssos if (PCPU_GET(apic_id) != lapic_id()) { 61139287Ssos printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 61239287Ssos printf("SMP: actual apic_id = %d\n", lapic_id()); 61339287Ssos printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 61439287Ssos panic("cpuid mismatch! boom!!"); 61539287Ssos } 61639287Ssos#endif 61739287Ssos 61839287Ssos /* Initialize curthread. */ 61939287Ssos KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 62039287Ssos PCPU_SET(curthread, PCPU_GET(idlethread)); 62139287Ssos 62239287Ssos mtx_lock_spin(&ap_boot_mtx); 62339287Ssos#if 0 62439287Ssos 62539287Ssos /* Init local apic for irq's */ 62639287Ssos lapic_setup(1); 62739287Ssos#endif 62839287Ssos smp_cpus++; 62939287Ssos 63039287Ssos cpuid = PCPU_GET(cpuid); 63139287Ssos CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); 63239287Ssos printf("SMP: AP CPU #%d Launched!\n", cpuid); 63339287Ssos 63439287Ssos /* Determine if we are a logical CPU. */ 63539287Ssos if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 63639287Ssos CPU_SET(cpuid, &logical_cpus_mask); 63739287Ssos 63839287Ssos /* Determine if we are a hyperthread. */ 63939287Ssos if (hyperthreading_cpus > 1 && 64039287Ssos PCPU_GET(apic_id) % hyperthreading_cpus != 0) 64139287Ssos CPU_SET(cpuid, &hyperthreading_cpus_mask); 64239287Ssos#if 0 64339287Ssos if (bootverbose) 64439287Ssos lapic_dump("AP"); 64539287Ssos#endif 64639287Ssos if (smp_cpus == mp_ncpus) { 64739287Ssos /* enable IPI's, tlb shootdown, freezes etc */ 64839287Ssos atomic_store_rel_int(&smp_started, 1); 64939287Ssos } 65039287Ssos 65139287Ssos mtx_unlock_spin(&ap_boot_mtx); 65239287Ssos 65339287Ssos /* wait until all the AP's are up */ 65439287Ssos while (smp_started == 0) 65539287Ssos ia32_pause(); 65639287Ssos 65739287Ssos PCPU_SET(curthread, PCPU_GET(idlethread)); 65839287Ssos 65939287Ssos /* Start per-CPU event timers. */ 66039287Ssos cpu_initclocks_ap(); 66139287Ssos 66239287Ssos /* enter the scheduler */ 66339287Ssos sched_throw(NULL); 66439287Ssos 66539287Ssos panic("scheduler returned us to %s", __func__); 66639287Ssos /* NOTREACHED */ 66739287Ssos} 66839287Ssos 66939287Ssos/******************************************************************* 67039287Ssos * local functions and data 67139287Ssos */ 67239287Ssos 67339287Ssos/* 67439287Ssos * We tell the I/O APIC code about all the CPUs we want to receive 67539287Ssos * interrupts. If we don't want certain CPUs to receive IRQs we 67639287Ssos * can simply not tell the I/O APIC code about them in this function. 67739287Ssos * We also do not tell it about the BSP since it tells itself about 67839287Ssos * the BSP internally to work with UP kernels and on UP machines. 67939287Ssos */ 68039287Ssosstatic void 68139287Ssosset_interrupt_apic_ids(void) 68239287Ssos{ 68339287Ssos u_int i, apic_id; 68439287Ssos 68539287Ssos for (i = 0; i < MAXCPU; i++) { 68639287Ssos apic_id = cpu_apic_ids[i]; 68739287Ssos if (apic_id == -1) 68839287Ssos continue; 68939287Ssos if (cpu_info[apic_id].cpu_bsp) 69039287Ssos continue; 69139287Ssos if (cpu_info[apic_id].cpu_disabled) 69239287Ssos continue; 69339287Ssos 69439287Ssos /* Don't let hyperthreads service interrupts. */ 69539287Ssos if (hyperthreading_cpus > 1 && 69639287Ssos apic_id % hyperthreading_cpus != 0) 69739287Ssos continue; 69839287Ssos 69939287Ssos intr_add_cpu(i); 70039287Ssos } 70139287Ssos} 70239287Ssos 70339287Ssos/* 70439287Ssos * Assign logical CPU IDs to local APICs. 70539287Ssos */ 70639287Ssosstatic void 70739287Ssosassign_cpu_ids(void) 70839287Ssos{ 70939287Ssos u_int i; 71039287Ssos 71139287Ssos /* Check for explicitly disabled CPUs. */ 71239287Ssos for (i = 0; i <= MAX_APIC_ID; i++) { 71339287Ssos if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 71439287Ssos continue; 71539287Ssos 71639287Ssos /* Don't use this CPU if it has been disabled by a tunable. */ 71739287Ssos if (resource_disabled("lapic", i)) { 71839287Ssos cpu_info[i].cpu_disabled = 1; 71939287Ssos continue; 72039287Ssos } 72139287Ssos } 72239287Ssos 72339287Ssos /* 72439287Ssos * Assign CPU IDs to local APIC IDs and disable any CPUs 72539287Ssos * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 72639287Ssos * so we only have to assign IDs for APs. 72739287Ssos */ 72839287Ssos mp_ncpus = 1; 72939287Ssos for (i = 0; i <= MAX_APIC_ID; i++) { 73039287Ssos if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 73139287Ssos cpu_info[i].cpu_disabled) 73239287Ssos continue; 73339287Ssos 73439287Ssos if (mp_ncpus < MAXCPU) { 73539287Ssos cpu_apic_ids[mp_ncpus] = i; 73639287Ssos apic_cpuids[i] = mp_ncpus; 73739287Ssos mp_ncpus++; 73839287Ssos } else 73939287Ssos cpu_info[i].cpu_disabled = 1; 74039287Ssos } 74139287Ssos KASSERT(mp_maxid >= mp_ncpus - 1, 74239287Ssos ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 74339287Ssos mp_ncpus)); 74439287Ssos} 74539287Ssos 74639287Ssos/* 74739287Ssos * start each AP in our list 74839287Ssos */ 74939287Ssos/* Lowest 1MB is already mapped: don't touch*/ 75039287Ssos#define TMPMAP_START 1 75139287Ssosint 75239287Ssosstart_all_aps(void) 75339287Ssos{ 75439287Ssos int x,apic_id, cpu; 75539287Ssos struct pcpu *pc; 75639287Ssos 75739287Ssos mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 75839287Ssos 75939287Ssos /* set up temporary P==V mapping for AP boot */ 76039287Ssos /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 76139287Ssos 76239287Ssos /* start each AP */ 76339287Ssos for (cpu = 1; cpu < mp_ncpus; cpu++) { 76439287Ssos apic_id = cpu_apic_ids[cpu]; 76539287Ssos 76639287Ssos 76739287Ssos bootAP = cpu; 76839287Ssos bootAPgdt = gdt + (512*cpu); 76939287Ssos 77039287Ssos /* Get per-cpu data */ 77139287Ssos pc = &__pcpu[bootAP]; 77239287Ssos pcpu_init(pc, bootAP, sizeof(struct pcpu)); 77339287Ssos dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE, 77439287Ssos M_WAITOK | M_ZERO), bootAP); 77539287Ssos pc->pc_apic_id = cpu_apic_ids[bootAP]; 77639287Ssos pc->pc_vcpu_id = cpu_apic_ids[bootAP]; 77739287Ssos pc->pc_prvspace = pc; 77839287Ssos pc->pc_curthread = 0; 77939287Ssos 78039287Ssos gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 78139287Ssos gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 78239287Ssos 78339287Ssos PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW); 78439287Ssos bzero(bootAPgdt, PAGE_SIZE); 78539287Ssos for (x = 0; x < NGDT; x++) 78639287Ssos ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 78739287Ssos PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 78839287Ssos#ifdef notyet 78939287Ssos 79039287Ssos if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 79139287Ssos apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 79239287Ssos acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 79339287Ssos#ifdef CONFIG_ACPI 79439287Ssos if (acpiid != 0xff) 79539287Ssos x86_acpiid_to_apicid[acpiid] = apicid; 79639287Ssos#endif 79739287Ssos } 79839287Ssos#endif 79939287Ssos 80039287Ssos /* attempt to start the Application Processor */ 80139287Ssos if (!start_ap(cpu)) { 80239287Ssos printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 80339287Ssos /* better panic as the AP may be running loose */ 80439287Ssos printf("panic y/n? [y] "); 80539287Ssos if (cngetc() != 'n') 80639287Ssos panic("bye-bye"); 80739287Ssos } 80839287Ssos 80939287Ssos CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 81039287Ssos } 81139287Ssos 81239287Ssos 81339287Ssos pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 81439287Ssos 81539287Ssos /* number of APs actually started */ 81639287Ssos return (mp_naps); 81739287Ssos} 81839287Ssos 81939287Ssosextern uint8_t *pcpu_boot_stack; 82039287Ssosextern trap_info_t trap_table[]; 82139287Ssos 82239287Ssosstatic void 82339287Ssossmp_trap_init(trap_info_t *trap_ctxt) 82439287Ssos{ 82539287Ssos const trap_info_t *t = trap_table; 82639287Ssos 82739287Ssos for (t = trap_table; t->address; t++) { 82839287Ssos trap_ctxt[t->vector].flags = t->flags; 82939287Ssos trap_ctxt[t->vector].cs = t->cs; 83039287Ssos trap_ctxt[t->vector].address = t->address; 83139287Ssos } 83239287Ssos} 83339287Ssos 83439287Ssosextern struct rwlock pvh_global_lock; 83539287Ssosextern int nkpt; 83639287Ssosstatic void 83739287Ssoscpu_initialize_context(unsigned int cpu) 83839287Ssos{ 83939287Ssos /* vcpu_guest_context_t is too large to allocate on the stack. 84039287Ssos * Hence we allocate statically and protect it with a lock */ 84139287Ssos vm_page_t m[NPGPTD + 2]; 84239287Ssos static vcpu_guest_context_t ctxt; 84339287Ssos vm_offset_t boot_stack; 84439287Ssos vm_offset_t newPTD; 84539287Ssos vm_paddr_t ma[NPGPTD]; 84639287Ssos int i; 84739287Ssos 84839287Ssos /* 84939287Ssos * Page 0,[0-3] PTD 85039287Ssos * Page 1, [4] boot stack 85139287Ssos * Page [5] PDPT 85239287Ssos * 85339287Ssos */ 85439287Ssos for (i = 0; i < NPGPTD + 2; i++) { 85539287Ssos m[i] = vm_page_alloc(NULL, 0, 85639287Ssos VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 85739287Ssos VM_ALLOC_ZERO); 85839287Ssos 85939287Ssos pmap_zero_page(m[i]); 86039287Ssos 86139287Ssos } 86239287Ssos boot_stack = kva_alloc(PAGE_SIZE); 86339287Ssos newPTD = kva_alloc(NPGPTD * PAGE_SIZE); 86439287Ssos ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; 86539287Ssos 86639287Ssos#ifdef PAE 86739287Ssos pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 86839287Ssos for (i = 0; i < NPGPTD; i++) { 869 ((vm_paddr_t *)boot_stack)[i] = 870 ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; 871 } 872#endif 873 874 /* 875 * Copy cpu0 IdlePTD to new IdlePTD - copying only 876 * kernel mappings 877 */ 878 pmap_qenter(newPTD, m, 4); 879 880 memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 881 (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 882 nkpt*sizeof(vm_paddr_t)); 883 884 pmap_qremove(newPTD, 4); 885 kva_free(newPTD, 4 * PAGE_SIZE); 886 /* 887 * map actual idle stack to boot_stack 888 */ 889 pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 890 891 892 xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); 893 rw_wlock(&pvh_global_lock); 894 for (i = 0; i < 4; i++) { 895 int pdir = (PTDPTDI + i) / NPDEPG; 896 int curoffset = (PTDPTDI + i) % NPDEPG; 897 898 xen_queue_pt_update((vm_paddr_t) 899 ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 900 ma[i]); 901 } 902 PT_UPDATES_FLUSH(); 903 rw_wunlock(&pvh_global_lock); 904 905 memset(&ctxt, 0, sizeof(ctxt)); 906 ctxt.flags = VGCF_IN_KERNEL; 907 ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 908 ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 909 ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 910 ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 911 ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 912 ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 913 ctxt.user_regs.eip = (unsigned long)init_secondary; 914 ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 915 916 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 917 918 smp_trap_init(ctxt.trap_ctxt); 919 920 ctxt.ldt_ents = 0; 921 ctxt.gdt_frames[0] = 922 (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 923 ctxt.gdt_ents = 512; 924 925#ifdef __i386__ 926 ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 927 928 ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 929 ctxt.kernel_sp = boot_stack + PAGE_SIZE; 930 931 ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 932 ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 933 ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 934 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 935 936 ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); 937#else /* __x86_64__ */ 938 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 939 ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 940 ctxt.kernel_sp = idle->thread.rsp0; 941 942 ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 943 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 944 ctxt.syscall_callback_eip = (unsigned long)system_call; 945 946 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 947 948 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 949#endif 950 951 printf("gdtpfn=%lx pdptpfn=%lx\n", 952 ctxt.gdt_frames[0], 953 ctxt.ctrlreg[3] >> PAGE_SHIFT); 954 955 PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 956 DELAY(3000); 957 PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 958} 959 960/* 961 * This function starts the AP (application processor) identified 962 * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 963 * to accomplish this. This is necessary because of the nuances 964 * of the different hardware we might encounter. It isn't pretty, 965 * but it seems to work. 966 */ 967 968int cpus; 969static int 970start_ap(int apic_id) 971{ 972 int ms; 973 974 /* used as a watchpoint to signal AP startup */ 975 cpus = mp_naps; 976 977 cpu_initialize_context(apic_id); 978 979 /* Wait up to 5 seconds for it to start. */ 980 for (ms = 0; ms < 5000; ms++) { 981 if (mp_naps > cpus) 982 return (1); /* return SUCCESS */ 983 DELAY(1000); 984 } 985 return (0); /* return FAILURE */ 986} 987 988static void 989ipi_pcpu(int cpu, u_int ipi) 990{ 991 KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI")); 992 xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi])); 993} 994 995/* 996 * send an IPI to a specific CPU. 997 */ 998static void 999ipi_send_cpu(int cpu, u_int ipi) 1000{ 1001 u_int bitmap, old_pending, new_pending; 1002 1003 if (IPI_IS_BITMAPED(ipi)) { 1004 bitmap = 1 << ipi; 1005 ipi = IPI_BITMAP_VECTOR; 1006 do { 1007 old_pending = cpu_ipi_pending[cpu]; 1008 new_pending = old_pending | bitmap; 1009 } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], 1010 old_pending, new_pending)); 1011 if (!old_pending) 1012 ipi_pcpu(cpu, RESCHEDULE_VECTOR); 1013 } else { 1014 KASSERT(call_data != NULL, ("call_data not set")); 1015 ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); 1016 } 1017} 1018 1019/* 1020 * Flush the TLB on all other CPU's 1021 */ 1022static void 1023smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1024{ 1025 u_int ncpu; 1026 struct _call_data data; 1027 1028 ncpu = mp_ncpus - 1; /* does not shootdown self */ 1029 if (ncpu < 1) 1030 return; /* no other cpus */ 1031 if (!(read_eflags() & PSL_I)) 1032 panic("%s: interrupts disabled", __func__); 1033 mtx_lock_spin(&smp_ipi_mtx); 1034 KASSERT(call_data == NULL, ("call_data isn't null?!")); 1035 call_data = &data; 1036 call_data->func_id = vector; 1037 call_data->arg1 = addr1; 1038 call_data->arg2 = addr2; 1039 atomic_store_rel_int(&smp_tlb_wait, 0); 1040 ipi_all_but_self(vector); 1041 while (smp_tlb_wait < ncpu) 1042 ia32_pause(); 1043 call_data = NULL; 1044 mtx_unlock_spin(&smp_ipi_mtx); 1045} 1046 1047static void 1048smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, 1049 vm_offset_t addr2) 1050{ 1051 int cpu, ncpu, othercpus; 1052 struct _call_data data; 1053 1054 othercpus = mp_ncpus - 1; 1055 if (CPU_ISFULLSET(&mask)) { 1056 if (othercpus < 1) 1057 return; 1058 } else { 1059 CPU_CLR(PCPU_GET(cpuid), &mask); 1060 if (CPU_EMPTY(&mask)) 1061 return; 1062 } 1063 if (!(read_eflags() & PSL_I)) 1064 panic("%s: interrupts disabled", __func__); 1065 mtx_lock_spin(&smp_ipi_mtx); 1066 KASSERT(call_data == NULL, ("call_data isn't null?!")); 1067 call_data = &data; 1068 call_data->func_id = vector; 1069 call_data->arg1 = addr1; 1070 call_data->arg2 = addr2; 1071 atomic_store_rel_int(&smp_tlb_wait, 0); 1072 if (CPU_ISFULLSET(&mask)) { 1073 ncpu = othercpus; 1074 ipi_all_but_self(vector); 1075 } else { 1076 ncpu = 0; 1077 while ((cpu = CPU_FFS(&mask)) != 0) { 1078 cpu--; 1079 CPU_CLR(cpu, &mask); 1080 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, 1081 vector); 1082 ipi_send_cpu(cpu, vector); 1083 ncpu++; 1084 } 1085 } 1086 while (smp_tlb_wait < ncpu) 1087 ia32_pause(); 1088 call_data = NULL; 1089 mtx_unlock_spin(&smp_ipi_mtx); 1090} 1091 1092void 1093smp_cache_flush(void) 1094{ 1095 1096 if (smp_started) 1097 smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1098} 1099 1100void 1101smp_invltlb(void) 1102{ 1103 1104 if (smp_started) { 1105 smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1106 } 1107} 1108 1109void 1110smp_invlpg(vm_offset_t addr) 1111{ 1112 1113 if (smp_started) { 1114 smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1115 } 1116} 1117 1118void 1119smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1120{ 1121 1122 if (smp_started) { 1123 smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1124 } 1125} 1126 1127void 1128smp_masked_invltlb(cpuset_t mask) 1129{ 1130 1131 if (smp_started) { 1132 smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1133 } 1134} 1135 1136void 1137smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) 1138{ 1139 1140 if (smp_started) { 1141 smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1142 } 1143} 1144 1145void 1146smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) 1147{ 1148 1149 if (smp_started) { 1150 smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1151 } 1152} 1153 1154/* 1155 * send an IPI to a set of cpus. 1156 */ 1157void 1158ipi_selected(cpuset_t cpus, u_int ipi) 1159{ 1160 int cpu; 1161 1162 /* 1163 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1164 * of help in order to understand what is the source. 1165 * Set the mask of receiving CPUs for this purpose. 1166 */ 1167 if (ipi == IPI_STOP_HARD) 1168 CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); 1169 1170 while ((cpu = CPU_FFS(&cpus)) != 0) { 1171 cpu--; 1172 CPU_CLR(cpu, &cpus); 1173 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1174 ipi_send_cpu(cpu, ipi); 1175 } 1176} 1177 1178/* 1179 * send an IPI to a specific CPU. 1180 */ 1181void 1182ipi_cpu(int cpu, u_int ipi) 1183{ 1184 1185 /* 1186 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1187 * of help in order to understand what is the source. 1188 * Set the mask of receiving CPUs for this purpose. 1189 */ 1190 if (ipi == IPI_STOP_HARD) 1191 CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); 1192 1193 CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1194 ipi_send_cpu(cpu, ipi); 1195} 1196 1197/* 1198 * send an IPI to all CPUs EXCEPT myself 1199 */ 1200void 1201ipi_all_but_self(u_int ipi) 1202{ 1203 cpuset_t other_cpus; 1204 1205 /* 1206 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1207 * of help in order to understand what is the source. 1208 * Set the mask of receiving CPUs for this purpose. 1209 */ 1210 other_cpus = all_cpus; 1211 CPU_CLR(PCPU_GET(cpuid), &other_cpus); 1212 if (ipi == IPI_STOP_HARD) 1213 CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); 1214 1215 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1216 ipi_selected(other_cpus, ipi); 1217} 1218 1219int 1220ipi_nmi_handler() 1221{ 1222 u_int cpuid; 1223 1224 /* 1225 * As long as there is not a simple way to know about a NMI's 1226 * source, if the bitmask for the current CPU is present in 1227 * the global pending bitword an IPI_STOP_HARD has been issued 1228 * and should be handled. 1229 */ 1230 cpuid = PCPU_GET(cpuid); 1231 if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) 1232 return (1); 1233 1234 CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); 1235 cpustop_handler(); 1236 return (0); 1237} 1238 1239/* 1240 * Handle an IPI_STOP by saving our current context and spinning until we 1241 * are resumed. 1242 */ 1243void 1244cpustop_handler(void) 1245{ 1246 int cpu; 1247 1248 cpu = PCPU_GET(cpuid); 1249 1250 savectx(&stoppcbs[cpu]); 1251 1252 /* Indicate that we are stopped */ 1253 CPU_SET_ATOMIC(cpu, &stopped_cpus); 1254 1255 /* Wait for restart */ 1256 while (!CPU_ISSET(cpu, &started_cpus)) 1257 ia32_pause(); 1258 1259 CPU_CLR_ATOMIC(cpu, &started_cpus); 1260 CPU_CLR_ATOMIC(cpu, &stopped_cpus); 1261 1262 if (cpu == 0 && cpustop_restartfunc != NULL) { 1263 cpustop_restartfunc(); 1264 cpustop_restartfunc = NULL; 1265 } 1266} 1267 1268/* 1269 * Handlers for TLB related IPIs 1270 * 1271 * On i386 Xen PV this are no-ops since this port doesn't support SMP. 1272 */ 1273void 1274invltlb_handler(void) 1275{ 1276} 1277 1278void 1279invlpg_handler(void) 1280{ 1281} 1282 1283void 1284invlrng_handler(void) 1285{ 1286} 1287 1288void 1289invlcache_handler(void) 1290{ 1291} 1292 1293/* 1294 * This is called once the rest of the system is up and running and we're 1295 * ready to let the AP's out of the pen. 1296 */ 1297static void 1298release_aps(void *dummy __unused) 1299{ 1300 1301 if (mp_ncpus == 1) 1302 return; 1303 atomic_store_rel_int(&aps_ready, 1); 1304 while (smp_started == 0) 1305 ia32_pause(); 1306} 1307SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1308SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); 1309SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL); 1310