mp_machdep.c revision 204972
1132332Smarcel/*- 2132332Smarcel * Copyright (c) 1996, by Steve Passe 3132332Smarcel * Copyright (c) 2008, by Kip Macy 4132332Smarcel * All rights reserved. 5132332Smarcel * 6132332Smarcel * Redistribution and use in source and binary forms, with or without 7132332Smarcel * modification, are permitted provided that the following conditions 8132332Smarcel * are met: 9132332Smarcel * 1. Redistributions of source code must retain the above copyright 10132332Smarcel * notice, this list of conditions and the following disclaimer. 11132332Smarcel * 2. The name of the developer may NOT be used to endorse or promote products 12132332Smarcel * derived from this software without specific prior written permission. 13132332Smarcel * 14132332Smarcel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15132332Smarcel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16132332Smarcel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17132332Smarcel * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18132332Smarcel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19132332Smarcel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20132332Smarcel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21132332Smarcel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22132332Smarcel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23132332Smarcel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24132332Smarcel * SUCH DAMAGE. 25132332Smarcel */ 26132332Smarcel 27132332Smarcel#include <sys/cdefs.h> 28132332Smarcel__FBSDID("$FreeBSD: head/sys/i386/xen/mp_machdep.c 204972 2010-03-10 19:50:52Z jhb $"); 29132332Smarcel 30132332Smarcel#include "opt_apic.h" 31132332Smarcel#include "opt_cpu.h" 32132332Smarcel#include "opt_kstack_pages.h" 33132332Smarcel#include "opt_mp_watchdog.h" 34132332Smarcel#include "opt_pmap.h" 35132332Smarcel#include "opt_sched.h" 36177490Sdavidxu#include "opt_smp.h" 37132332Smarcel 38132332Smarcel#if !defined(lint) 39132332Smarcel#if !defined(SMP) 40132332Smarcel#error How did you get here? 41132332Smarcel#endif 42177526Sjeff 43132332Smarcel#ifndef DEV_APIC 44132332Smarcel#error The apic device is required for SMP, add "device apic" to your config file. 45132332Smarcel#endif 46132332Smarcel#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 47132332Smarcel#error SMP not supported with CPU_DISABLE_CMPXCHG 48132332Smarcel#endif 49132332Smarcel#endif /* not lint */ 50132332Smarcel 51132332Smarcel#include <sys/param.h> 52132332Smarcel#include <sys/systm.h> 53132332Smarcel#include <sys/bus.h> 54132332Smarcel#include <sys/cons.h> /* cngetc() */ 55132332Smarcel#ifdef GPROF 56132332Smarcel#include <sys/gmon.h> 57132332Smarcel#endif 58132332Smarcel#include <sys/kernel.h> 59132332Smarcel#include <sys/ktr.h> 60132332Smarcel#include <sys/lock.h> 61132332Smarcel#include <sys/malloc.h> 62132332Smarcel#include <sys/memrange.h> 63132332Smarcel#include <sys/mutex.h> 64132332Smarcel#include <sys/pcpu.h> 65132332Smarcel#include <sys/proc.h> 66132332Smarcel#include <sys/sched.h> 67132332Smarcel#include <sys/smp.h> 68132332Smarcel#include <sys/sysctl.h> 69132332Smarcel 70132332Smarcel#include <vm/vm.h> 71132332Smarcel#include <vm/vm_param.h> 72132332Smarcel#include <vm/pmap.h> 73181059Smarcel#include <vm/vm_kern.h> 74132332Smarcel#include <vm/vm_extern.h> 75132332Smarcel#include <vm/vm_page.h> 76132332Smarcel 77132332Smarcel#include <machine/apicreg.h> 78132332Smarcel#include <machine/md_var.h> 79132332Smarcel#include <machine/mp_watchdog.h> 80132332Smarcel#include <machine/pcb.h> 81132332Smarcel#include <machine/psl.h> 82132332Smarcel#include <machine/smp.h> 83132332Smarcel#include <machine/specialreg.h> 84132332Smarcel#include <machine/pcpu.h> 85132332Smarcel 86132332Smarcel 87132332Smarcel 88132332Smarcel#include <machine/xen/xen-os.h> 89132332Smarcel#include <xen/evtchn.h> 90132332Smarcel#include <xen/xen_intr.h> 91132332Smarcel#include <xen/hypervisor.h> 92132332Smarcel#include <xen/interface/vcpu.h> 93132332Smarcel 94132332Smarcel 95132332Smarcelint mp_naps; /* # of Applications processors */ 96132332Smarcelint boot_cpu_id = -1; /* designated BSP */ 97132332Smarcel 98132332Smarcelextern struct pcpu __pcpu[]; 99132332Smarcel 100132332Smarcelstatic int bootAP; 101132332Smarcelstatic union descriptor *bootAPgdt; 102132332Smarcel 103132332Smarcelstatic char resched_name[NR_CPUS][15]; 104132332Smarcelstatic char callfunc_name[NR_CPUS][15]; 105132332Smarcel 106132332Smarcel/* Free these after use */ 107132332Smarcelvoid *bootstacks[MAXCPU]; 108132332Smarcel 109132332Smarcelstruct pcb stoppcbs[MAXCPU]; 110132332Smarcel 111132332Smarcel/* Variables needed for SMP tlb shootdown. */ 112132332Smarcelvm_offset_t smp_tlb_addr1; 113132332Smarcelvm_offset_t smp_tlb_addr2; 114132332Smarcelvolatile int smp_tlb_wait; 115132332Smarcel 116132332Smarceltypedef void call_data_func_t(uintptr_t , uintptr_t); 117132332Smarcel 118132332Smarcelstatic u_int logical_cpus; 119132332Smarcelstatic volatile cpumask_t ipi_nmi_pending; 120132332Smarcel 121132332Smarcel/* used to hold the AP's until we are ready to release them */ 122132332Smarcelstatic struct mtx ap_boot_mtx; 123132332Smarcel 124132332Smarcel/* Set to 1 once we're ready to let the APs out of the pen. */ 125132332Smarcelstatic volatile int aps_ready = 0; 126132332Smarcel 127132332Smarcel/* 128132332Smarcel * Store data from cpu_add() until later in the boot when we actually setup 129132332Smarcel * the APs. 130132332Smarcel */ 131132332Smarcelstruct cpu_info { 132133802Sdavidxu int cpu_present:1; 133133802Sdavidxu int cpu_bsp:1; 134133802Sdavidxu int cpu_disabled:1; 135133802Sdavidxu} static cpu_info[MAX_APIC_ID + 1]; 136133802Sdavidxuint cpu_apic_ids[MAXCPU]; 137133802Sdavidxuint apic_cpuids[MAX_APIC_ID + 1]; 138133802Sdavidxu 139133802Sdavidxu/* Holds pending bitmap based IPIs per CPU */ 140133802Sdavidxustatic volatile u_int cpu_ipi_pending[MAXCPU]; 141133802Sdavidxu 142133802Sdavidxustatic int cpu_logical; 143133802Sdavidxustatic int cpu_cores; 144133802Sdavidxu 145133802Sdavidxustatic void assign_cpu_ids(void); 146132332Smarcelstatic void set_interrupt_apic_ids(void); 147133802Sdavidxuint start_all_aps(void); 148132332Smarcelstatic int start_ap(int apic_id); 149132332Smarcelstatic void release_aps(void *dummy); 150132332Smarcel 151132332Smarcelstatic u_int hyperthreading_cpus; 152132332Smarcelstatic cpumask_t hyperthreading_cpus_mask; 153132332Smarcel 154132332Smarcelextern void Xhypervisor_callback(void); 155132332Smarcelextern void failsafe_callback(void); 156132332Smarcelextern void pmap_lazyfix_action(void); 157132332Smarcel 158132332Smarcelstruct cpu_group * 159132332Smarcelcpu_topo(void) 160132332Smarcel{ 161132332Smarcel if (cpu_cores == 0) 162132332Smarcel cpu_cores = 1; 163132332Smarcel if (cpu_logical == 0) 164132332Smarcel cpu_logical = 1; 165132332Smarcel if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 166132332Smarcel printf("WARNING: Non-uniform processors.\n"); 167133802Sdavidxu printf("WARNING: Using suboptimal topology.\n"); 168133802Sdavidxu return (smp_topo_none()); 169133802Sdavidxu } 170133802Sdavidxu /* 171133802Sdavidxu * No multi-core or hyper-threaded. 172133802Sdavidxu */ 173133802Sdavidxu if (cpu_logical * cpu_cores == 1) 174133802Sdavidxu return (smp_topo_none()); 175133802Sdavidxu /* 176133802Sdavidxu * Only HTT no multi-core. 177133802Sdavidxu */ 178133802Sdavidxu if (cpu_logical > 1 && cpu_cores == 1) 179133802Sdavidxu return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 180133802Sdavidxu /* 181133802Sdavidxu * Only multi-core no HTT. 182133802Sdavidxu */ 183133802Sdavidxu if (cpu_cores > 1 && cpu_logical == 1) 184158680Sdavidxu return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 185158680Sdavidxu /* 186132332Smarcel * Both HTT and multi-core. 187132332Smarcel */ 188132332Smarcel return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 189132332Smarcel CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 190132332Smarcel} 191132332Smarcel 192132332Smarcel/* 193132332Smarcel * Calculate usable address in base memory for AP trampoline code. 194132332Smarcel */ 195132332Smarcelu_int 196132332Smarcelmp_bootaddress(u_int basemem) 197132332Smarcel{ 198132332Smarcel 199132332Smarcel return (basemem); 200132332Smarcel} 201132332Smarcel 202132332Smarcelvoid 203132332Smarcelcpu_add(u_int apic_id, char boot_cpu) 204132332Smarcel{ 205132332Smarcel 206132332Smarcel if (apic_id > MAX_APIC_ID) { 207132332Smarcel panic("SMP: APIC ID %d too high", apic_id); 208132332Smarcel return; 209132332Smarcel } 210132332Smarcel KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 211132332Smarcel apic_id)); 212132332Smarcel cpu_info[apic_id].cpu_present = 1; 213132332Smarcel if (boot_cpu) { 214132332Smarcel KASSERT(boot_cpu_id == -1, 215132332Smarcel ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 216132332Smarcel boot_cpu_id)); 217132332Smarcel boot_cpu_id = apic_id; 218132332Smarcel cpu_info[apic_id].cpu_bsp = 1; 219132332Smarcel } 220132332Smarcel if (mp_ncpus < MAXCPU) 221132332Smarcel mp_ncpus++; 222132332Smarcel if (bootverbose) 223132332Smarcel printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 224132332Smarcel "AP"); 225132332Smarcel} 226132332Smarcel 227132332Smarcelvoid 228132332Smarcelcpu_mp_setmaxid(void) 229132332Smarcel{ 230132332Smarcel 231183021Smarcel mp_maxid = MAXCPU - 1; 232183021Smarcel} 233132332Smarcel 234183021Smarcelint 235132332Smarcelcpu_mp_probe(void) 236132332Smarcel{ 237132332Smarcel 238132332Smarcel /* 239132332Smarcel * Always record BSP in CPU map so that the mbuf init code works 240132332Smarcel * correctly. 241183021Smarcel */ 242183021Smarcel all_cpus = 1; 243132332Smarcel if (mp_ncpus == 0) { 244183021Smarcel /* 245183021Smarcel * No CPUs were found, so this must be a UP system. Setup 246183021Smarcel * the variables to represent a system with a single CPU 247132332Smarcel * with an id of 0. 248183021Smarcel */ 249132332Smarcel mp_ncpus = 1; 250132332Smarcel return (0); 251132332Smarcel } 252132332Smarcel 253132332Smarcel /* At least one CPU was found. */ 254132332Smarcel if (mp_ncpus == 1) { 255132332Smarcel /* 256132332Smarcel * One CPU was found, so this must be a UP system with 257132332Smarcel * an I/O APIC. 258183021Smarcel */ 259132332Smarcel return (0); 260183021Smarcel } 261132332Smarcel 262132332Smarcel /* At least two CPUs were found. */ 263155411Sdavidxu return (1); 264155411Sdavidxu} 265132332Smarcel 266132332Smarcel/* 267132332Smarcel * Initialize the IPI handlers and start up the AP's. 268132332Smarcel */ 269132332Smarcelvoid 270132332Smarcelcpu_mp_start(void) 271183021Smarcel{ 272183021Smarcel int i; 273132332Smarcel 274183021Smarcel /* Initialize the logical ID to APIC ID table. */ 275132332Smarcel for (i = 0; i < MAXCPU; i++) { 276183021Smarcel cpu_apic_ids[i] = -1; 277132332Smarcel cpu_ipi_pending[i] = 0; 278183021Smarcel } 279132332Smarcel 280132332Smarcel /* Set boot_cpu_id if needed. */ 281132332Smarcel if (boot_cpu_id == -1) { 282132332Smarcel boot_cpu_id = PCPU_GET(apic_id); 283132332Smarcel cpu_info[boot_cpu_id].cpu_bsp = 1; 284132332Smarcel } else 285132332Smarcel KASSERT(boot_cpu_id == PCPU_GET(apic_id), 286132332Smarcel ("BSP's APIC ID doesn't match boot_cpu_id")); 287132332Smarcel cpu_apic_ids[0] = boot_cpu_id; 288132332Smarcel apic_cpuids[boot_cpu_id] = 0; 289144663Sdavidxu 290132332Smarcel assign_cpu_ids(); 291132332Smarcel 292132332Smarcel /* Start each Application Processor */ 293132332Smarcel start_all_aps(); 294132332Smarcel 295132332Smarcel /* Setup the initial logical CPUs info. */ 296183021Smarcel logical_cpus = logical_cpus_mask = 0; 297183021Smarcel if (cpu_feature & CPUID_HTT) 298132332Smarcel logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 299183021Smarcel 300132332Smarcel set_interrupt_apic_ids(); 301132332Smarcel} 302183021Smarcel 303132332Smarcel 304183021Smarcelstatic void 305132332Smarceliv_rendezvous(uintptr_t a, uintptr_t b) 306183021Smarcel{ 307132332Smarcel smp_rendezvous_action(); 308183021Smarcel} 309183021Smarcel 310183021Smarcelstatic void 311132332Smarceliv_invltlb(uintptr_t a, uintptr_t b) 312183021Smarcel{ 313183021Smarcel xen_tlb_flush(); 314132332Smarcel} 315132332Smarcel 316132332Smarcelstatic void 317132332Smarceliv_invlpg(uintptr_t a, uintptr_t b) 318132332Smarcel{ 319144663Sdavidxu xen_invlpg(a); 320132332Smarcel} 321132332Smarcel 322132332Smarcelstatic void 323132332Smarceliv_invlrng(uintptr_t a, uintptr_t b) 324183021Smarcel{ 325132332Smarcel vm_offset_t start = (vm_offset_t)a; 326183021Smarcel vm_offset_t end = (vm_offset_t)b; 327132332Smarcel 328132332Smarcel while (start < end) { 329132332Smarcel xen_invlpg(start); 330132332Smarcel start += PAGE_SIZE; 331132332Smarcel } 332132332Smarcel} 333181341Smarcel 334181341Smarcel 335181341Smarcelstatic void 336132332Smarceliv_invlcache(uintptr_t a, uintptr_t b) 337132332Smarcel{ 338132332Smarcel 339132332Smarcel wbinvd(); 340183021Smarcel atomic_add_int(&smp_tlb_wait, 1); 341132332Smarcel} 342132332Smarcel 343132332Smarcelstatic void 344132332Smarceliv_lazypmap(uintptr_t a, uintptr_t b) 345132332Smarcel{ 346132332Smarcel pmap_lazyfix_action(); 347132332Smarcel atomic_add_int(&smp_tlb_wait, 1); 348132332Smarcel} 349132332Smarcel 350132332Smarcel/* 351183021Smarcel * These start from "IPI offset" APIC_IPI_INTS 352183021Smarcel */ 353183021Smarcelstatic call_data_func_t *ipi_vectors[6] = 354132332Smarcel{ 355132332Smarcel iv_rendezvous, 356132332Smarcel iv_invltlb, 357144663Sdavidxu iv_invlpg, 358132332Smarcel iv_invlrng, 359132332Smarcel iv_invlcache, 360132332Smarcel iv_lazypmap, 361132332Smarcel}; 362132332Smarcel 363132332Smarcel/* 364183021Smarcel * Reschedule call back. Nothing to do, 365183021Smarcel * all the work is done automatically when 366183021Smarcel * we return from the interrupt. 367132332Smarcel */ 368132332Smarcelstatic int 369132332Smarcelsmp_reschedule_interrupt(void *unused) 370132332Smarcel{ 371132332Smarcel int cpu = PCPU_GET(cpuid); 372132332Smarcel u_int ipi_bitmap; 373132332Smarcel 374181341Smarcel ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 375133802Sdavidxu 376133802Sdavidxu if (ipi_bitmap & (1 << IPI_PREEMPT)) { 377132332Smarcel#ifdef COUNT_IPIS 378132332Smarcel (*ipi_preempt_counts[cpu])++; 379132332Smarcel#endif 380133802Sdavidxu sched_preempt(curthread); 381133802Sdavidxu } 382133802Sdavidxu 383132332Smarcel if (ipi_bitmap & (1 << IPI_AST)) { 384133802Sdavidxu#ifdef COUNT_IPIS 385133805Sdavidxu (*ipi_ast_counts[cpu])++; 386133805Sdavidxu#endif 387132332Smarcel /* Nothing to do for AST */ 388133805Sdavidxu } 389133802Sdavidxu return (FILTER_HANDLED); 390181341Smarcel} 391181341Smarcel 392181341Smarcelstruct _call_data { 393181341Smarcel uint16_t func_id; 394133802Sdavidxu uint16_t wait; 395133802Sdavidxu uintptr_t arg1; 396133802Sdavidxu uintptr_t arg2; 397133802Sdavidxu atomic_t started; 398132332Smarcel atomic_t finished; 399133802Sdavidxu}; 400132332Smarcel 401132332Smarcelstatic struct _call_data *call_data; 402133802Sdavidxu 403132332Smarcelstatic int 404132332Smarcelsmp_call_function_interrupt(void *unused) 405132332Smarcel{ 406132332Smarcel call_data_func_t *func; 407181341Smarcel uintptr_t arg1 = call_data->arg1; 408181341Smarcel uintptr_t arg2 = call_data->arg2; 409132332Smarcel int wait = call_data->wait; 410132332Smarcel atomic_t *started = &call_data->started; 411144922Sdavidxu atomic_t *finished = &call_data->finished; 412132332Smarcel 413132332Smarcel /* We only handle function IPIs, not bitmap IPIs */ 414132332Smarcel if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR) 415181341Smarcel panic("invalid function id %u", call_data->func_id); 416181341Smarcel 417132332Smarcel func = ipi_vectors[call_data->func_id - APIC_IPI_INTS]; 418132332Smarcel /* 419144922Sdavidxu * Notify initiating CPU that I've grabbed the data and am 420132332Smarcel * about to execute the function 421132332Smarcel */ 422132332Smarcel mb(); 423181341Smarcel atomic_inc(started); 424181341Smarcel /* 425132332Smarcel * At this point the info structure may be out of scope unless wait==1 426132332Smarcel */ 427144922Sdavidxu (*func)(arg1, arg2); 428132332Smarcel 429132332Smarcel if (wait) { 430132332Smarcel mb(); 431181341Smarcel atomic_inc(finished); 432181341Smarcel } 433132332Smarcel atomic_add_int(&smp_tlb_wait, 1); 434132332Smarcel return (FILTER_HANDLED); 435132332Smarcel} 436132332Smarcel 437132332Smarcel/* 438132332Smarcel * Print various information about the SMP system hardware and setup. 439132951Sdavidxu */ 440132951Sdavidxuvoid 441181341Smarcelcpu_mp_announce(void) 442132951Sdavidxu{ 443132951Sdavidxu int i, x; 444132951Sdavidxu 445133342Sdavidxu /* List CPUs */ 446132951Sdavidxu printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 447132951Sdavidxu for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 448132951Sdavidxu if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 449132951Sdavidxu continue; 450132951Sdavidxu if (cpu_info[x].cpu_disabled) 451132951Sdavidxu printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 452132951Sdavidxu else { 453132951Sdavidxu KASSERT(i < mp_ncpus, 454132951Sdavidxu ("mp_ncpus and actual cpus are out of whack")); 455132951Sdavidxu printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 456132951Sdavidxu } 457132951Sdavidxu } 458132951Sdavidxu} 459132951Sdavidxu 460132951Sdavidxustatic int 461132951Sdavidxuxen_smp_intr_init(unsigned int cpu) 462133802Sdavidxu{ 463132951Sdavidxu int rc; 464132951Sdavidxu unsigned int irq; 465132951Sdavidxu 466132951Sdavidxu per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; 467133802Sdavidxu 468133802Sdavidxu sprintf(resched_name[cpu], "resched%u", cpu); 469132951Sdavidxu rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, 470132951Sdavidxu cpu, 471133802Sdavidxu resched_name[cpu], 472132951Sdavidxu smp_reschedule_interrupt, 473132951Sdavidxu INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq); 474132951Sdavidxu 475132951Sdavidxu printf("[XEN] IPI cpu=%d irq=%d vector=RESCHEDULE_VECTOR (%d)\n", 476133342Sdavidxu cpu, irq, RESCHEDULE_VECTOR); 477133342Sdavidxu 478133342Sdavidxu per_cpu(resched_irq, cpu) = irq; 479133802Sdavidxu 480133342Sdavidxu sprintf(callfunc_name[cpu], "callfunc%u", cpu); 481133342Sdavidxu rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, 482133342Sdavidxu cpu, 483133342Sdavidxu callfunc_name[cpu], 484133342Sdavidxu smp_call_function_interrupt, 485133342Sdavidxu INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq); 486133802Sdavidxu if (rc < 0) 487133342Sdavidxu goto fail; 488133342Sdavidxu per_cpu(callfunc_irq, cpu) = irq; 489133342Sdavidxu 490133802Sdavidxu printf("[XEN] IPI cpu=%d irq=%d vector=CALL_FUNCTION_VECTOR (%d)\n", 491133802Sdavidxu cpu, irq, CALL_FUNCTION_VECTOR); 492133342Sdavidxu 493133342Sdavidxu 494133342Sdavidxu if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) 495133342Sdavidxu goto fail; 496133802Sdavidxu 497133342Sdavidxu return 0; 498133342Sdavidxu 499133342Sdavidxu fail: 500133342Sdavidxu if (per_cpu(resched_irq, cpu) >= 0) 501133342Sdavidxu unbind_from_irqhandler(per_cpu(resched_irq, cpu)); 502133342Sdavidxu if (per_cpu(callfunc_irq, cpu) >= 0) 503133342Sdavidxu unbind_from_irqhandler(per_cpu(callfunc_irq, cpu)); 504133342Sdavidxu return rc; 505133342Sdavidxu} 506132951Sdavidxu 507133342Sdavidxustatic void 508132951Sdavidxuxen_smp_intr_init_cpus(void *unused) 509132951Sdavidxu{ 510133342Sdavidxu int i; 511133342Sdavidxu 512133342Sdavidxu for (i = 0; i < mp_ncpus; i++) 513133342Sdavidxu xen_smp_intr_init(i); 514155413Sdavidxu} 515133342Sdavidxu 516133342Sdavidxu#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 517133342Sdavidxu 518133342Sdavidxu/* 519133342Sdavidxu * AP CPU's call this to initialize themselves. 520133342Sdavidxu */ 521133342Sdavidxuvoid 522133342Sdavidxuinit_secondary(void) 523133342Sdavidxu{ 524133342Sdavidxu vm_offset_t addr; 525132951Sdavidxu int gsel_tss; 526132951Sdavidxu 527132951Sdavidxu 528132951Sdavidxu /* bootAP is set in start_ap() to our ID. */ 529132951Sdavidxu PCPU_SET(currentldt, _default_ldt); 530132951Sdavidxu gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 531132951Sdavidxu#if 0 532132951Sdavidxu gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 533132951Sdavidxu#endif 534133047Sdavidxu PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 535132951Sdavidxu PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 536133047Sdavidxu PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 537132951Sdavidxu#if 0 538132951Sdavidxu PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 539132951Sdavidxu 540132951Sdavidxu PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 541132951Sdavidxu#endif 542132951Sdavidxu PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 543132951Sdavidxu 544132332Smarcel /* 545132332Smarcel * Set to a known state: 546132332Smarcel * Set by mpboot.s: CR0_PG, CR0_PE 547132951Sdavidxu * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 548132951Sdavidxu */ 549132332Smarcel /* 550132332Smarcel * signal our startup to the BSP. 551132332Smarcel */ 552132332Smarcel mp_naps++; 553132332Smarcel 554132332Smarcel /* Spin until the BSP releases the AP's. */ 555132951Sdavidxu while (!aps_ready) 556132951Sdavidxu ia32_pause(); 557132332Smarcel 558132332Smarcel /* BSP may have changed PTD while we were waiting */ 559132332Smarcel invltlb(); 560132332Smarcel for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 561132332Smarcel invlpg(addr); 562132332Smarcel 563132332Smarcel /* set up FPU state on the AP */ 564132332Smarcel npxinit(); 565132332Smarcel#if 0 566132332Smarcel 567132332Smarcel /* set up SSE registers */ 568132332Smarcel enable_sse(); 569132951Sdavidxu#endif 570132332Smarcel#if 0 && defined(PAE) 571132332Smarcel /* Enable the PTE no-execute bit. */ 572132332Smarcel if ((amd_feature & AMDID_NX) != 0) { 573132332Smarcel uint64_t msr; 574132332Smarcel 575132332Smarcel msr = rdmsr(MSR_EFER) | EFER_NXE; 576158680Sdavidxu wrmsr(MSR_EFER, msr); 577133802Sdavidxu } 578133802Sdavidxu#endif 579158680Sdavidxu#if 0 580133802Sdavidxu /* A quick check from sanity claus */ 581132332Smarcel if (PCPU_GET(apic_id) != lapic_id()) { 582158680Sdavidxu printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 583132332Smarcel printf("SMP: actual apic_id = %d\n", lapic_id()); 584132332Smarcel printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 585132332Smarcel panic("cpuid mismatch! boom!!"); 586155387Sdavidxu } 587132332Smarcel#endif 588132332Smarcel 589132332Smarcel /* Initialize curthread. */ 590132332Smarcel KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 591132332Smarcel PCPU_SET(curthread, PCPU_GET(idlethread)); 592132332Smarcel 593132332Smarcel mtx_lock_spin(&ap_boot_mtx); 594132332Smarcel#if 0 595132332Smarcel 596132332Smarcel /* Init local apic for irq's */ 597132332Smarcel lapic_setup(1); 598132332Smarcel#endif 599132332Smarcel smp_cpus++; 600158680Sdavidxu 601158680Sdavidxu CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); 602158680Sdavidxu printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); 603158680Sdavidxu 604158680Sdavidxu /* Determine if we are a logical CPU. */ 605158680Sdavidxu if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 606133802Sdavidxu logical_cpus_mask |= PCPU_GET(cpumask); 607133802Sdavidxu 608132332Smarcel /* Determine if we are a hyperthread. */ 609132332Smarcel if (hyperthreading_cpus > 1 && 610133802Sdavidxu PCPU_GET(apic_id) % hyperthreading_cpus != 0) 611133802Sdavidxu hyperthreading_cpus_mask |= PCPU_GET(cpumask); 612132332Smarcel 613133802Sdavidxu /* Build our map of 'other' CPUs. */ 614133802Sdavidxu PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 615132332Smarcel#if 0 616132332Smarcel if (bootverbose) 617132332Smarcel lapic_dump("AP"); 618132951Sdavidxu#endif 619133802Sdavidxu if (smp_cpus == mp_ncpus) { 620133802Sdavidxu /* enable IPI's, tlb shootdown, freezes etc */ 621132951Sdavidxu atomic_store_rel_int(&smp_started, 1); 622132951Sdavidxu smp_active = 1; /* historic */ 623132951Sdavidxu } 624158680Sdavidxu 625158680Sdavidxu mtx_unlock_spin(&ap_boot_mtx); 626158680Sdavidxu 627158680Sdavidxu /* wait until all the AP's are up */ 628132332Smarcel while (smp_started == 0) 629132332Smarcel ia32_pause(); 630158680Sdavidxu 631158680Sdavidxu 632158680Sdavidxu PCPU_SET(curthread, PCPU_GET(idlethread)); 633158680Sdavidxu /* enter the scheduler */ 634158680Sdavidxu sched_throw(NULL); 635158680Sdavidxu 636158680Sdavidxu panic("scheduler returned us to %s", __func__); 637158680Sdavidxu /* NOTREACHED */ 638158680Sdavidxu} 639158680Sdavidxu 640158680Sdavidxu/******************************************************************* 641158680Sdavidxu * local functions and data 642158680Sdavidxu */ 643158680Sdavidxu 644158680Sdavidxu/* 645158680Sdavidxu * We tell the I/O APIC code about all the CPUs we want to receive 646158680Sdavidxu * interrupts. If we don't want certain CPUs to receive IRQs we 647158680Sdavidxu * can simply not tell the I/O APIC code about them in this function. 648158680Sdavidxu * We also do not tell it about the BSP since it tells itself about 649158680Sdavidxu * the BSP internally to work with UP kernels and on UP machines. 650158680Sdavidxu */ 651133802Sdavidxustatic void 652132332Smarcelset_interrupt_apic_ids(void) 653133802Sdavidxu{ 654133802Sdavidxu u_int i, apic_id; 655133802Sdavidxu 656132332Smarcel for (i = 0; i < MAXCPU; i++) { 657133047Sdavidxu apic_id = cpu_apic_ids[i]; 658132332Smarcel if (apic_id == -1) 659132332Smarcel continue; 660132332Smarcel if (cpu_info[apic_id].cpu_bsp) 661132332Smarcel continue; 662146818Sdfr if (cpu_info[apic_id].cpu_disabled) 663132332Smarcel continue; 664146818Sdfr 665146818Sdfr /* Don't let hyperthreads service interrupts. */ 666146818Sdfr if (hyperthreading_cpus > 1 && 667146818Sdfr apic_id % hyperthreading_cpus != 0) 668146818Sdfr continue; 669146818Sdfr 670146818Sdfr intr_add_cpu(i); 671146818Sdfr } 672146818Sdfr} 673146818Sdfr 674146818Sdfr/* 675146818Sdfr * Assign logical CPU IDs to local APICs. 676146818Sdfr */ 677146818Sdfrstatic void 678146818Sdfrassign_cpu_ids(void) 679146818Sdfr{ 680146818Sdfr u_int i; 681146818Sdfr 682146818Sdfr /* Check for explicitly disabled CPUs. */ 683146818Sdfr for (i = 0; i <= MAX_APIC_ID; i++) { 684146818Sdfr if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 685146818Sdfr continue; 686146818Sdfr 687146818Sdfr /* Don't use this CPU if it has been disabled by a tunable. */ 688146818Sdfr if (resource_disabled("lapic", i)) { 689146818Sdfr cpu_info[i].cpu_disabled = 1; 690146818Sdfr continue; 691146818Sdfr } 692146818Sdfr } 693146818Sdfr 694146818Sdfr /* 695146818Sdfr * Assign CPU IDs to local APIC IDs and disable any CPUs 696146818Sdfr * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 697146818Sdfr * so we only have to assign IDs for APs. 698146818Sdfr */ 699146818Sdfr mp_ncpus = 1; 700146818Sdfr for (i = 0; i <= MAX_APIC_ID; i++) { 701146818Sdfr if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 702146818Sdfr cpu_info[i].cpu_disabled) 703146818Sdfr continue; 704146818Sdfr 705146818Sdfr if (mp_ncpus < MAXCPU) { 706146818Sdfr cpu_apic_ids[mp_ncpus] = i; 707146818Sdfr apic_cpuids[i] = mp_ncpus; 708132332Smarcel mp_ncpus++; 709132332Smarcel } else 710132332Smarcel cpu_info[i].cpu_disabled = 1; 711132332Smarcel } 712132332Smarcel KASSERT(mp_maxid >= mp_ncpus - 1, 713132332Smarcel ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 714132332Smarcel mp_ncpus)); 715132332Smarcel} 716132332Smarcel 717132332Smarcel/* 718132332Smarcel * start each AP in our list 719132332Smarcel */ 720132332Smarcel/* Lowest 1MB is already mapped: don't touch*/ 721132332Smarcel#define TMPMAP_START 1 722132332Smarcelint 723132332Smarcelstart_all_aps(void) 724132332Smarcel{ 725132332Smarcel int x,apic_id, cpu; 726132332Smarcel struct pcpu *pc; 727133802Sdavidxu 728132951Sdavidxu mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 729132332Smarcel 730132332Smarcel /* set up temporary P==V mapping for AP boot */ 731133802Sdavidxu /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 732132332Smarcel 733132332Smarcel /* start each AP */ 734132332Smarcel for (cpu = 1; cpu < mp_ncpus; cpu++) { 735132332Smarcel apic_id = cpu_apic_ids[cpu]; 736132332Smarcel 737132332Smarcel 738132332Smarcel bootAP = cpu; 739132332Smarcel bootAPgdt = gdt + (512*cpu); 740132332Smarcel 741132332Smarcel /* Get per-cpu data */ 742132332Smarcel pc = &__pcpu[bootAP]; 743132332Smarcel pcpu_init(pc, bootAP, sizeof(struct pcpu)); 744132332Smarcel dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), bootAP); 745132332Smarcel pc->pc_apic_id = cpu_apic_ids[bootAP]; 746132332Smarcel pc->pc_prvspace = pc; 747132332Smarcel pc->pc_curthread = 0; 748132332Smarcel 749132332Smarcel gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 750132332Smarcel gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 751132332Smarcel 752132332Smarcel PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW); 753132332Smarcel bzero(bootAPgdt, PAGE_SIZE); 754132332Smarcel for (x = 0; x < NGDT; x++) 755132332Smarcel ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 756132332Smarcel PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 757132332Smarcel#ifdef notyet 758132332Smarcel 759132332Smarcel if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 760132332Smarcel apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 761132332Smarcel acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 762132332Smarcel#ifdef CONFIG_ACPI 763132332Smarcel if (acpiid != 0xff) 764132332Smarcel x86_acpiid_to_apicid[acpiid] = apicid; 765132332Smarcel#endif 766132332Smarcel } 767132332Smarcel#endif 768132332Smarcel 769133802Sdavidxu /* attempt to start the Application Processor */ 770132332Smarcel if (!start_ap(cpu)) { 771132332Smarcel printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 772132332Smarcel /* better panic as the AP may be running loose */ 773133802Sdavidxu printf("panic y/n? [y] "); 774132332Smarcel if (cngetc() != 'n') 775132332Smarcel panic("bye-bye"); 776132332Smarcel } 777132332Smarcel 778132332Smarcel all_cpus |= (1 << cpu); /* record AP in CPU map */ 779132332Smarcel } 780132332Smarcel 781132332Smarcel 782132332Smarcel /* build our map of 'other' CPUs */ 783132332Smarcel PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); 784132332Smarcel 785132332Smarcel pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 786132332Smarcel 787132332Smarcel /* number of APs actually started */ 788132332Smarcel return mp_naps; 789146818Sdfr} 790132332Smarcel 791146818Sdfrextern uint8_t *pcpu_boot_stack; 792146818Sdfrextern trap_info_t trap_table[]; 793146818Sdfr 794146818Sdfrstatic void 795146818Sdfrsmp_trap_init(trap_info_t *trap_ctxt) 796146818Sdfr{ 797146818Sdfr const trap_info_t *t = trap_table; 798146818Sdfr 799146818Sdfr for (t = trap_table; t->address; t++) { 800146818Sdfr trap_ctxt[t->vector].flags = t->flags; 801146818Sdfr trap_ctxt[t->vector].cs = t->cs; 802146818Sdfr trap_ctxt[t->vector].address = t->address; 803146818Sdfr } 804146818Sdfr} 805146818Sdfr 806146818Sdfrextern int nkpt; 807146818Sdfrstatic void 808146818Sdfrcpu_initialize_context(unsigned int cpu) 809146818Sdfr{ 810146818Sdfr /* vcpu_guest_context_t is too large to allocate on the stack. 811146818Sdfr * Hence we allocate statically and protect it with a lock */ 812146818Sdfr vm_page_t m[4]; 813146818Sdfr static vcpu_guest_context_t ctxt; 814146818Sdfr vm_offset_t boot_stack; 815146818Sdfr vm_offset_t newPTD; 816146818Sdfr vm_paddr_t ma[NPGPTD]; 817146818Sdfr static int color; 818146818Sdfr int i; 819146818Sdfr 820146818Sdfr /* 821146818Sdfr * Page 0,[0-3] PTD 822146818Sdfr * Page 1, [4] boot stack 823146818Sdfr * Page [5] PDPT 824146818Sdfr * 825146818Sdfr */ 826146818Sdfr for (i = 0; i < NPGPTD + 2; i++) { 827146818Sdfr m[i] = vm_page_alloc(NULL, color++, 828146818Sdfr VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 829146818Sdfr VM_ALLOC_ZERO); 830146818Sdfr 831146818Sdfr pmap_zero_page(m[i]); 832146818Sdfr 833146818Sdfr } 834146818Sdfr boot_stack = kmem_alloc_nofault(kernel_map, 1); 835146818Sdfr newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); 836146818Sdfr ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V; 837146818Sdfr 838146818Sdfr#ifdef PAE 839146818Sdfr pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 840146818Sdfr for (i = 0; i < NPGPTD; i++) { 841132332Smarcel ((vm_paddr_t *)boot_stack)[i] = 842132332Smarcel ma[i] = 843132332Smarcel xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V; 844132332Smarcel } 845132332Smarcel#endif 846132332Smarcel 847132332Smarcel /* 848132332Smarcel * Copy cpu0 IdlePTD to new IdlePTD - copying only 849132332Smarcel * kernel mappings 850132332Smarcel */ 851132332Smarcel pmap_qenter(newPTD, m, 4); 852132332Smarcel 853132332Smarcel memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 854132332Smarcel (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 855132332Smarcel nkpt*sizeof(vm_paddr_t)); 856132332Smarcel 857132332Smarcel pmap_qremove(newPTD, 4); 858132332Smarcel kmem_free(kernel_map, newPTD, 4); 859132332Smarcel /* 860132332Smarcel * map actual idle stack to boot_stack 861133802Sdavidxu */ 862132332Smarcel pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 863132332Smarcel 864132332Smarcel 865133802Sdavidxu xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]))); 866132332Smarcel vm_page_lock_queues(); 867132332Smarcel for (i = 0; i < 4; i++) { 868132332Smarcel int pdir = (PTDPTDI + i) / NPDEPG; 869132332Smarcel int curoffset = (PTDPTDI + i) % NPDEPG; 870132332Smarcel 871132332Smarcel xen_queue_pt_update((vm_paddr_t) 872132332Smarcel ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 873132332Smarcel ma[i]); 874132332Smarcel } 875132332Smarcel PT_UPDATES_FLUSH(); 876132332Smarcel vm_page_unlock_queues(); 877132332Smarcel 878132332Smarcel memset(&ctxt, 0, sizeof(ctxt)); 879132332Smarcel ctxt.flags = VGCF_IN_KERNEL; 880132332Smarcel ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 881132332Smarcel ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 882132332Smarcel ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 883132332Smarcel ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 884132332Smarcel ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 885132332Smarcel ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 886132332Smarcel ctxt.user_regs.eip = (unsigned long)init_secondary; 887132332Smarcel ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 888132332Smarcel 889132332Smarcel memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 890132332Smarcel 891132332Smarcel smp_trap_init(ctxt.trap_ctxt); 892132332Smarcel 893132332Smarcel ctxt.ldt_ents = 0; 894132332Smarcel ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 895132332Smarcel ctxt.gdt_ents = 512; 896132332Smarcel 897132332Smarcel#ifdef __i386__ 898132332Smarcel ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 899132332Smarcel 900132332Smarcel ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 901132332Smarcel ctxt.kernel_sp = boot_stack + PAGE_SIZE; 902132332Smarcel 903132332Smarcel ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 904132332Smarcel ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 905132332Smarcel ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 906132332Smarcel ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 907132332Smarcel 908133802Sdavidxu ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 909132332Smarcel#else /* __x86_64__ */ 910132332Smarcel ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 911132332Smarcel ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 912133802Sdavidxu ctxt.kernel_sp = idle->thread.rsp0; 913132332Smarcel 914132332Smarcel ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 915132332Smarcel ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 916132332Smarcel ctxt.syscall_callback_eip = (unsigned long)system_call; 917132332Smarcel 918132332Smarcel ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 919132332Smarcel 920132332Smarcel ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 921132332Smarcel#endif 922132332Smarcel 923132332Smarcel printf("gdtpfn=%lx pdptpfn=%lx\n", 924132332Smarcel ctxt.gdt_frames[0], 925132332Smarcel ctxt.ctrlreg[3] >> PAGE_SHIFT); 926132332Smarcel 927132332Smarcel PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 928132332Smarcel DELAY(3000); 929132332Smarcel PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 930132332Smarcel} 931132332Smarcel 932132332Smarcel/* 933132332Smarcel * This function starts the AP (application processor) identified 934132332Smarcel * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 935181341Smarcel * to accomplish this. This is necessary because of the nuances 936132332Smarcel * of the different hardware we might encounter. It isn't pretty, 937132332Smarcel * but it seems to work. 938144922Sdavidxu */ 939132332Smarcel 940132332Smarcelint cpus; 941132332Smarcelstatic int 942181341Smarcelstart_ap(int apic_id) 943181341Smarcel{ 944132332Smarcel int ms; 945132332Smarcel 946144922Sdavidxu /* used as a watchpoint to signal AP startup */ 947132332Smarcel cpus = mp_naps; 948132332Smarcel 949132332Smarcel cpu_initialize_context(apic_id); 950181341Smarcel 951181341Smarcel /* Wait up to 5 seconds for it to start. */ 952132332Smarcel for (ms = 0; ms < 5000; ms++) { 953132332Smarcel if (mp_naps > cpus) 954144922Sdavidxu return 1; /* return SUCCESS */ 955132332Smarcel DELAY(1000); 956132332Smarcel } 957132332Smarcel return 0; /* return FAILURE */ 958181341Smarcel} 959181341Smarcel 960132332Smarcel/* 961132332Smarcel * Flush the TLB on all other CPU's 962132332Smarcel */ 963132332Smarcelstatic void 964132332Smarcelsmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 965132332Smarcel{ 966132332Smarcel u_int ncpu; 967132332Smarcel struct _call_data data; 968132332Smarcel 969132332Smarcel ncpu = mp_ncpus - 1; /* does not shootdown self */ 970132332Smarcel if (ncpu < 1) 971132332Smarcel return; /* no other cpus */ 972132951Sdavidxu if (!(read_eflags() & PSL_I)) 973132332Smarcel panic("%s: interrupts disabled", __func__); 974132332Smarcel mtx_lock_spin(&smp_ipi_mtx); 975132332Smarcel KASSERT(call_data == NULL, ("call_data isn't null?!")); 976132332Smarcel call_data = &data; 977132332Smarcel call_data->func_id = vector; 978132332Smarcel call_data->arg1 = addr1; 979132332Smarcel call_data->arg2 = addr2; 980132332Smarcel atomic_store_rel_int(&smp_tlb_wait, 0); 981132332Smarcel ipi_all_but_self(vector); 982132332Smarcel while (smp_tlb_wait < ncpu) 983132332Smarcel ia32_pause(); 984132332Smarcel call_data = NULL; 985132332Smarcel mtx_unlock_spin(&smp_ipi_mtx); 986133802Sdavidxu} 987132332Smarcel 988132332Smarcelstatic void 989132332Smarcelsmp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 990132332Smarcel{ 991132332Smarcel int ncpu, othercpus; 992133802Sdavidxu struct _call_data data; 993133802Sdavidxu 994133802Sdavidxu othercpus = mp_ncpus - 1; 995133802Sdavidxu if (mask == (u_int)-1) { 996132951Sdavidxu ncpu = othercpus; 997132951Sdavidxu if (ncpu < 1) 998132951Sdavidxu return; 999132951Sdavidxu } else { 1000132951Sdavidxu mask &= ~PCPU_GET(cpumask); 1001132951Sdavidxu if (mask == 0) 1002133802Sdavidxu return; 1003133802Sdavidxu ncpu = bitcount32(mask); 1004133802Sdavidxu if (ncpu > othercpus) { 1005133802Sdavidxu /* XXX this should be a panic offence */ 1006132332Smarcel printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", 1007132332Smarcel ncpu, othercpus); 1008132332Smarcel ncpu = othercpus; 1009133802Sdavidxu } 1010133802Sdavidxu /* XXX should be a panic, implied by mask == 0 above */ 1011133802Sdavidxu if (ncpu < 1) 1012133802Sdavidxu return; 1013132332Smarcel } 1014132332Smarcel if (!(read_eflags() & PSL_I)) 1015132332Smarcel panic("%s: interrupts disabled", __func__); 1016132951Sdavidxu mtx_lock_spin(&smp_ipi_mtx); 1017132332Smarcel KASSERT(call_data == NULL, ("call_data isn't null?!")); 1018133802Sdavidxu call_data = &data; 1019132332Smarcel call_data->func_id = vector; 1020132332Smarcel call_data->arg1 = addr1; 1021132332Smarcel call_data->arg2 = addr2; 1022132332Smarcel atomic_store_rel_int(&smp_tlb_wait, 0); 1023132332Smarcel if (mask == (u_int)-1) 1024132332Smarcel ipi_all_but_self(vector); 1025132332Smarcel else 1026132332Smarcel ipi_selected(mask, vector); 1027132332Smarcel while (smp_tlb_wait < ncpu) 1028132332Smarcel ia32_pause(); 1029132332Smarcel call_data = NULL; 1030132332Smarcel mtx_unlock_spin(&smp_ipi_mtx); 1031132332Smarcel} 1032132332Smarcel 1033132332Smarcelvoid 1034132332Smarcelsmp_cache_flush(void) 1035132332Smarcel{ 1036132332Smarcel 1037132332Smarcel if (smp_started) 1038132332Smarcel smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1039132332Smarcel} 1040132332Smarcel 1041132332Smarcelvoid 1042132332Smarcelsmp_invltlb(void) 1043132332Smarcel{ 1044132332Smarcel 1045132332Smarcel if (smp_started) { 1046132332Smarcel smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1047132332Smarcel } 1048132332Smarcel} 1049132332Smarcel 1050132332Smarcelvoid 1051132332Smarcelsmp_invlpg(vm_offset_t addr) 1052132332Smarcel{ 1053132332Smarcel 1054132332Smarcel if (smp_started) { 1055132332Smarcel smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1056132332Smarcel } 1057132332Smarcel} 1058132332Smarcel 1059132332Smarcelvoid 1060181059Smarcelsmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1061180982Smarcel{ 1062180982Smarcel 1063133342Sdavidxu if (smp_started) { 1064133342Sdavidxu smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1065180982Smarcel } 1066133342Sdavidxu} 1067133342Sdavidxu 1068133342Sdavidxuvoid 1069180982Smarcelsmp_masked_invltlb(cpumask_t mask) 1070133342Sdavidxu{ 1071133342Sdavidxu 1072133342Sdavidxu if (smp_started) { 1073133802Sdavidxu smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1074133342Sdavidxu } 1075133342Sdavidxu} 1076133342Sdavidxu 1077133342Sdavidxuvoid 1078133342Sdavidxusmp_masked_invlpg(cpumask_t mask, vm_offset_t addr) 1079133342Sdavidxu{ 1080133802Sdavidxu 1081133342Sdavidxu if (smp_started) { 1082133342Sdavidxu smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1083133342Sdavidxu } 1084133342Sdavidxu} 1085133342Sdavidxu 1086133802Sdavidxuvoid 1087133342Sdavidxusmp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) 1088133342Sdavidxu{ 1089133342Sdavidxu 1090133342Sdavidxu if (smp_started) { 1091180982Smarcel smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1092180982Smarcel } 1093133342Sdavidxu} 1094133342Sdavidxu 1095133342Sdavidxu/* 1096133342Sdavidxu * send an IPI to a set of cpus. 1097133342Sdavidxu */ 1098133342Sdavidxuvoid 1099133342Sdavidxuipi_selected(cpumask_t cpus, u_int ipi) 1100132332Smarcel{ 1101132332Smarcel int cpu; 1102132332Smarcel u_int bitmap = 0; 1103132332Smarcel u_int old_pending; 1104132332Smarcel u_int new_pending; 1105132332Smarcel 1106132332Smarcel if (IPI_IS_BITMAPED(ipi)) { 1107132332Smarcel bitmap = 1 << ipi; 1108132332Smarcel ipi = IPI_BITMAP_VECTOR; 1109132332Smarcel } 1110132332Smarcel 1111132332Smarcel /* 1112132332Smarcel * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1113132332Smarcel * of help in order to understand what is the source. 1114132332Smarcel * Set the mask of receiving CPUs for this purpose. 1115132332Smarcel */ 1116132332Smarcel if (ipi == IPI_STOP_HARD) 1117132332Smarcel atomic_set_int(&ipi_nmi_pending, cpus); 1118132332Smarcel 1119132332Smarcel CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); 1120132332Smarcel while ((cpu = ffs(cpus)) != 0) { 1121132332Smarcel cpu--; 1122132332Smarcel cpus &= ~(1 << cpu); 1123132332Smarcel 1124133342Sdavidxu KASSERT(cpu_apic_ids[cpu] != -1, 1125132332Smarcel ("IPI to non-existent CPU %d", cpu)); 1126132332Smarcel 1127132332Smarcel if (bitmap) { 1128146818Sdfr do { 1129146818Sdfr old_pending = cpu_ipi_pending[cpu]; 1130146818Sdfr new_pending = old_pending | bitmap; 1131146818Sdfr } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); 1132132332Smarcel 1133177490Sdavidxu if (!old_pending) 1134177490Sdavidxu ipi_pcpu(cpu, RESCHEDULE_VECTOR); 1135 continue; 1136 1137 } else { 1138 KASSERT(call_data != NULL, ("call_data not set")); 1139 ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); 1140 } 1141 } 1142} 1143 1144/* 1145 * send an IPI to all CPUs EXCEPT myself 1146 */ 1147void 1148ipi_all_but_self(u_int ipi) 1149{ 1150 1151 /* 1152 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1153 * of help in order to understand what is the source. 1154 * Set the mask of receiving CPUs for this purpose. 1155 */ 1156 if (ipi == IPI_STOP_HARD) 1157 atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); 1158 1159 CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1160 ipi_selected(PCPU_GET(other_cpus), ipi); 1161} 1162 1163int 1164ipi_nmi_handler() 1165{ 1166 cpumask_t cpumask; 1167 1168 /* 1169 * As long as there is not a simple way to know about a NMI's 1170 * source, if the bitmask for the current CPU is present in 1171 * the global pending bitword an IPI_STOP_HARD has been issued 1172 * and should be handled. 1173 */ 1174 cpumask = PCPU_GET(cpumask); 1175 if ((ipi_nmi_pending & cpumask) == 0) 1176 return (1); 1177 1178 atomic_clear_int(&ipi_nmi_pending, cpumask); 1179 cpustop_handler(); 1180 return (0); 1181} 1182 1183/* 1184 * Handle an IPI_STOP by saving our current context and spinning until we 1185 * are resumed. 1186 */ 1187void 1188cpustop_handler(void) 1189{ 1190 int cpu = PCPU_GET(cpuid); 1191 int cpumask = PCPU_GET(cpumask); 1192 1193 savectx(&stoppcbs[cpu]); 1194 1195 /* Indicate that we are stopped */ 1196 atomic_set_int(&stopped_cpus, cpumask); 1197 1198 /* Wait for restart */ 1199 while (!(started_cpus & cpumask)) 1200 ia32_pause(); 1201 1202 atomic_clear_int(&started_cpus, cpumask); 1203 atomic_clear_int(&stopped_cpus, cpumask); 1204 1205 if (cpu == 0 && cpustop_restartfunc != NULL) { 1206 cpustop_restartfunc(); 1207 cpustop_restartfunc = NULL; 1208 } 1209} 1210 1211/* 1212 * This is called once the rest of the system is up and running and we're 1213 * ready to let the AP's out of the pen. 1214 */ 1215static void 1216release_aps(void *dummy __unused) 1217{ 1218 1219 if (mp_ncpus == 1) 1220 return; 1221 atomic_store_rel_int(&aps_ready, 1); 1222 while (smp_started == 0) 1223 ia32_pause(); 1224} 1225SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1226SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); 1227 1228