1182902Skmacy/*- 2182902Skmacy * Copyright (c) 1996, by Steve Passe 3182902Skmacy * Copyright (c) 2008, by Kip Macy 4182902Skmacy * All rights reserved. 5182902Skmacy * 6182902Skmacy * Redistribution and use in source and binary forms, with or without 7182902Skmacy * modification, are permitted provided that the following conditions 8182902Skmacy * are met: 9182902Skmacy * 1. Redistributions of source code must retain the above copyright 10182902Skmacy * notice, this list of conditions and the following disclaimer. 11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products 12182902Skmacy * derived from this software without specific prior written permission. 13182902Skmacy * 14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17182902Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24182902Skmacy * SUCH DAMAGE. 25182902Skmacy */ 26182902Skmacy 27182902Skmacy#include <sys/cdefs.h> 28182902Skmacy__FBSDID("$FreeBSD$"); 29182902Skmacy 30182902Skmacy#include "opt_apic.h" 31182902Skmacy#include "opt_cpu.h" 32182902Skmacy#include "opt_kstack_pages.h" 33182902Skmacy#include "opt_mp_watchdog.h" 34204972Sjhb#include "opt_pmap.h" 35182902Skmacy#include "opt_sched.h" 36182902Skmacy#include "opt_smp.h" 37182902Skmacy 38182902Skmacy#if !defined(lint) 39182902Skmacy#if !defined(SMP) 40182902Skmacy#error How did you get here? 41182902Skmacy#endif 42182902Skmacy 43182902Skmacy#ifndef DEV_APIC 44182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file. 45182902Skmacy#endif 46182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) 47182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG 48182902Skmacy#endif 49182902Skmacy#endif /* not lint */ 50182902Skmacy 51182902Skmacy#include <sys/param.h> 52182902Skmacy#include <sys/systm.h> 53182902Skmacy#include <sys/bus.h> 54182902Skmacy#include <sys/cons.h> /* cngetc() */ 55222813Sattilio#include <sys/cpuset.h> 56182902Skmacy#ifdef GPROF 57182902Skmacy#include <sys/gmon.h> 58182902Skmacy#endif 59182902Skmacy#include <sys/kernel.h> 60182902Skmacy#include <sys/ktr.h> 61182902Skmacy#include <sys/lock.h> 62182902Skmacy#include <sys/malloc.h> 63182902Skmacy#include <sys/memrange.h> 64182902Skmacy#include <sys/mutex.h> 65182902Skmacy#include <sys/pcpu.h> 66182902Skmacy#include <sys/proc.h> 67182902Skmacy#include <sys/sched.h> 68182902Skmacy#include <sys/smp.h> 69182902Skmacy#include <sys/sysctl.h> 70182902Skmacy 71182902Skmacy#include <vm/vm.h> 72182902Skmacy#include <vm/vm_param.h> 73182902Skmacy#include <vm/pmap.h> 74182902Skmacy#include <vm/vm_kern.h> 75182902Skmacy#include <vm/vm_extern.h> 76182902Skmacy#include <vm/vm_page.h> 77182902Skmacy 78214631Sjhb#include <x86/apicreg.h> 79182902Skmacy#include <machine/md_var.h> 80182902Skmacy#include <machine/mp_watchdog.h> 81182902Skmacy#include <machine/pcb.h> 82182902Skmacy#include <machine/psl.h> 83182902Skmacy#include <machine/smp.h> 84182902Skmacy#include <machine/specialreg.h> 85182902Skmacy#include <machine/pcpu.h> 86182902Skmacy 87182902Skmacy 88182902Skmacy 89182902Skmacy#include <machine/xen/xen-os.h> 90186557Skmacy#include <xen/evtchn.h> 91186557Skmacy#include <xen/xen_intr.h> 92186557Skmacy#include <xen/hypervisor.h> 93182902Skmacy#include <xen/interface/vcpu.h> 94182902Skmacy 95182902Skmacy 96182902Skmacyint mp_naps; /* # of Applications processors */ 97182902Skmacyint boot_cpu_id = -1; /* designated BSP */ 98182902Skmacy 99182902Skmacyextern struct pcpu __pcpu[]; 100182902Skmacy 101182902Skmacystatic int bootAP; 102182902Skmacystatic union descriptor *bootAPgdt; 103182902Skmacy 104184112Skmacystatic char resched_name[NR_CPUS][15]; 105184112Skmacystatic char callfunc_name[NR_CPUS][15]; 106182902Skmacy 107182902Skmacy/* Free these after use */ 108182902Skmacyvoid *bootstacks[MAXCPU]; 109182902Skmacy 110182902Skmacystruct pcb stoppcbs[MAXCPU]; 111182902Skmacy 112182902Skmacy/* Variables needed for SMP tlb shootdown. */ 113182902Skmacyvm_offset_t smp_tlb_addr1; 114182902Skmacyvm_offset_t smp_tlb_addr2; 115182902Skmacyvolatile int smp_tlb_wait; 116182902Skmacy 117184112Skmacytypedef void call_data_func_t(uintptr_t , uintptr_t); 118184112Skmacy 119182902Skmacystatic u_int logical_cpus; 120222813Sattiliostatic volatile cpuset_t ipi_nmi_pending; 121182902Skmacy 122182902Skmacy/* used to hold the AP's until we are ready to release them */ 123182902Skmacystatic struct mtx ap_boot_mtx; 124182902Skmacy 125182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */ 126182902Skmacystatic volatile int aps_ready = 0; 127182902Skmacy 128182902Skmacy/* 129182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup 130182902Skmacy * the APs. 131182902Skmacy */ 132182902Skmacystruct cpu_info { 133182902Skmacy int cpu_present:1; 134182902Skmacy int cpu_bsp:1; 135182902Skmacy int cpu_disabled:1; 136182902Skmacy} static cpu_info[MAX_APIC_ID + 1]; 137182902Skmacyint cpu_apic_ids[MAXCPU]; 138187966Sbzint apic_cpuids[MAX_APIC_ID + 1]; 139182902Skmacy 140182902Skmacy/* Holds pending bitmap based IPIs per CPU */ 141182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU]; 142182902Skmacy 143191759Skmacystatic int cpu_logical; 144191759Skmacystatic int cpu_cores; 145191759Skmacy 146182902Skmacystatic void assign_cpu_ids(void); 147182902Skmacystatic void set_interrupt_apic_ids(void); 148182902Skmacyint start_all_aps(void); 149182902Skmacystatic int start_ap(int apic_id); 150182902Skmacystatic void release_aps(void *dummy); 151182902Skmacy 152182902Skmacystatic u_int hyperthreading_cpus; 153222813Sattiliostatic cpuset_t hyperthreading_cpus_mask; 154182902Skmacy 155182902Skmacyextern void Xhypervisor_callback(void); 156182902Skmacyextern void failsafe_callback(void); 157184115Skmacyextern void pmap_lazyfix_action(void); 158182902Skmacy 159182902Skmacystruct cpu_group * 160182902Skmacycpu_topo(void) 161182902Skmacy{ 162182902Skmacy if (cpu_cores == 0) 163182902Skmacy cpu_cores = 1; 164182902Skmacy if (cpu_logical == 0) 165182902Skmacy cpu_logical = 1; 166182902Skmacy if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { 167182902Skmacy printf("WARNING: Non-uniform processors.\n"); 168182902Skmacy printf("WARNING: Using suboptimal topology.\n"); 169182902Skmacy return (smp_topo_none()); 170182902Skmacy } 171182902Skmacy /* 172182902Skmacy * No multi-core or hyper-threaded. 173182902Skmacy */ 174182902Skmacy if (cpu_logical * cpu_cores == 1) 175182902Skmacy return (smp_topo_none()); 176182902Skmacy /* 177182902Skmacy * Only HTT no multi-core. 178182902Skmacy */ 179182902Skmacy if (cpu_logical > 1 && cpu_cores == 1) 180182902Skmacy return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 181182902Skmacy /* 182182902Skmacy * Only multi-core no HTT. 183182902Skmacy */ 184182902Skmacy if (cpu_cores > 1 && cpu_logical == 1) 185182902Skmacy return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0)); 186182902Skmacy /* 187182902Skmacy * Both HTT and multi-core. 188182902Skmacy */ 189182902Skmacy return (smp_topo_2level(CG_SHARE_NONE, cpu_cores, 190182902Skmacy CG_SHARE_L1, cpu_logical, CG_FLAG_HTT)); 191182902Skmacy} 192182902Skmacy 193182902Skmacy/* 194182902Skmacy * Calculate usable address in base memory for AP trampoline code. 195182902Skmacy */ 196182902Skmacyu_int 197182902Skmacymp_bootaddress(u_int basemem) 198182902Skmacy{ 199182902Skmacy 200182902Skmacy return (basemem); 201182902Skmacy} 202182902Skmacy 203182902Skmacyvoid 204182902Skmacycpu_add(u_int apic_id, char boot_cpu) 205182902Skmacy{ 206182902Skmacy 207182902Skmacy if (apic_id > MAX_APIC_ID) { 208182902Skmacy panic("SMP: APIC ID %d too high", apic_id); 209182902Skmacy return; 210182902Skmacy } 211182902Skmacy KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", 212182902Skmacy apic_id)); 213182902Skmacy cpu_info[apic_id].cpu_present = 1; 214182902Skmacy if (boot_cpu) { 215182902Skmacy KASSERT(boot_cpu_id == -1, 216182902Skmacy ("CPU %d claims to be BSP, but CPU %d already is", apic_id, 217182902Skmacy boot_cpu_id)); 218182902Skmacy boot_cpu_id = apic_id; 219182902Skmacy cpu_info[apic_id].cpu_bsp = 1; 220182902Skmacy } 221182902Skmacy if (mp_ncpus < MAXCPU) 222182902Skmacy mp_ncpus++; 223182902Skmacy if (bootverbose) 224182902Skmacy printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : 225182902Skmacy "AP"); 226182902Skmacy} 227182902Skmacy 228182902Skmacyvoid 229182902Skmacycpu_mp_setmaxid(void) 230182902Skmacy{ 231182902Skmacy 232182902Skmacy mp_maxid = MAXCPU - 1; 233182902Skmacy} 234182902Skmacy 235182902Skmacyint 236182902Skmacycpu_mp_probe(void) 237182902Skmacy{ 238182902Skmacy 239182902Skmacy /* 240182902Skmacy * Always record BSP in CPU map so that the mbuf init code works 241182902Skmacy * correctly. 242182902Skmacy */ 243222813Sattilio CPU_SETOF(0, &all_cpus); 244182902Skmacy if (mp_ncpus == 0) { 245182902Skmacy /* 246182902Skmacy * No CPUs were found, so this must be a UP system. Setup 247182902Skmacy * the variables to represent a system with a single CPU 248182902Skmacy * with an id of 0. 249182902Skmacy */ 250182902Skmacy mp_ncpus = 1; 251182902Skmacy return (0); 252182902Skmacy } 253182902Skmacy 254182902Skmacy /* At least one CPU was found. */ 255182902Skmacy if (mp_ncpus == 1) { 256182902Skmacy /* 257182902Skmacy * One CPU was found, so this must be a UP system with 258182902Skmacy * an I/O APIC. 259182902Skmacy */ 260182902Skmacy return (0); 261182902Skmacy } 262182902Skmacy 263182902Skmacy /* At least two CPUs were found. */ 264182902Skmacy return (1); 265182902Skmacy} 266182902Skmacy 267182902Skmacy/* 268182902Skmacy * Initialize the IPI handlers and start up the AP's. 269182902Skmacy */ 270182902Skmacyvoid 271182902Skmacycpu_mp_start(void) 272182902Skmacy{ 273182902Skmacy int i; 274182902Skmacy 275182902Skmacy /* Initialize the logical ID to APIC ID table. */ 276182902Skmacy for (i = 0; i < MAXCPU; i++) { 277182902Skmacy cpu_apic_ids[i] = -1; 278182902Skmacy cpu_ipi_pending[i] = 0; 279182902Skmacy } 280182902Skmacy 281182902Skmacy /* Set boot_cpu_id if needed. */ 282182902Skmacy if (boot_cpu_id == -1) { 283182902Skmacy boot_cpu_id = PCPU_GET(apic_id); 284182902Skmacy cpu_info[boot_cpu_id].cpu_bsp = 1; 285182902Skmacy } else 286182902Skmacy KASSERT(boot_cpu_id == PCPU_GET(apic_id), 287182902Skmacy ("BSP's APIC ID doesn't match boot_cpu_id")); 288182902Skmacy cpu_apic_ids[0] = boot_cpu_id; 289187966Sbz apic_cpuids[boot_cpu_id] = 0; 290182902Skmacy 291182902Skmacy assign_cpu_ids(); 292182902Skmacy 293182902Skmacy /* Start each Application Processor */ 294182902Skmacy start_all_aps(); 295182902Skmacy 296182902Skmacy /* Setup the initial logical CPUs info. */ 297222813Sattilio logical_cpus = 0; 298222813Sattilio CPU_ZERO(&logical_cpus_mask); 299182902Skmacy if (cpu_feature & CPUID_HTT) 300182902Skmacy logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; 301182902Skmacy 302182902Skmacy set_interrupt_apic_ids(); 303182902Skmacy} 304182902Skmacy 305182902Skmacy 306184112Skmacystatic void 307184112Skmacyiv_rendezvous(uintptr_t a, uintptr_t b) 308184112Skmacy{ 309184115Skmacy smp_rendezvous_action(); 310184112Skmacy} 311184112Skmacy 312184112Skmacystatic void 313184112Skmacyiv_invltlb(uintptr_t a, uintptr_t b) 314184112Skmacy{ 315184115Skmacy xen_tlb_flush(); 316184112Skmacy} 317184112Skmacy 318184112Skmacystatic void 319184112Skmacyiv_invlpg(uintptr_t a, uintptr_t b) 320184112Skmacy{ 321184115Skmacy xen_invlpg(a); 322184112Skmacy} 323184112Skmacy 324184112Skmacystatic void 325184112Skmacyiv_invlrng(uintptr_t a, uintptr_t b) 326184112Skmacy{ 327184115Skmacy vm_offset_t start = (vm_offset_t)a; 328184115Skmacy vm_offset_t end = (vm_offset_t)b; 329184115Skmacy 330184115Skmacy while (start < end) { 331184115Skmacy xen_invlpg(start); 332184115Skmacy start += PAGE_SIZE; 333184115Skmacy } 334184112Skmacy} 335184112Skmacy 336184115Skmacy 337184112Skmacystatic void 338184112Skmacyiv_invlcache(uintptr_t a, uintptr_t b) 339184112Skmacy{ 340184115Skmacy 341184115Skmacy wbinvd(); 342184198Skmacy atomic_add_int(&smp_tlb_wait, 1); 343184112Skmacy} 344184112Skmacy 345184112Skmacystatic void 346184112Skmacyiv_lazypmap(uintptr_t a, uintptr_t b) 347184112Skmacy{ 348184115Skmacy pmap_lazyfix_action(); 349184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 350184112Skmacy} 351184112Skmacy 352193154Sadrian/* 353193154Sadrian * These start from "IPI offset" APIC_IPI_INTS 354193154Sadrian */ 355193154Sadrianstatic call_data_func_t *ipi_vectors[6] = 356184112Skmacy{ 357184224Skmacy iv_rendezvous, 358184224Skmacy iv_invltlb, 359184224Skmacy iv_invlpg, 360184224Skmacy iv_invlrng, 361184224Skmacy iv_invlcache, 362184224Skmacy iv_lazypmap, 363184224Skmacy}; 364184224Skmacy 365184224Skmacy/* 366184224Skmacy * Reschedule call back. Nothing to do, 367184224Skmacy * all the work is done automatically when 368184224Skmacy * we return from the interrupt. 369184224Skmacy */ 370184224Skmacystatic int 371184224Skmacysmp_reschedule_interrupt(void *unused) 372184224Skmacy{ 373184198Skmacy int cpu = PCPU_GET(cpuid); 374184198Skmacy u_int ipi_bitmap; 375184198Skmacy 376184198Skmacy ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); 377184198Skmacy 378184198Skmacy if (ipi_bitmap & (1 << IPI_PREEMPT)) { 379184198Skmacy#ifdef COUNT_IPIS 380184198Skmacy (*ipi_preempt_counts[cpu])++; 381184198Skmacy#endif 382184198Skmacy sched_preempt(curthread); 383184198Skmacy } 384184198Skmacy 385184198Skmacy if (ipi_bitmap & (1 << IPI_AST)) { 386184198Skmacy#ifdef COUNT_IPIS 387184198Skmacy (*ipi_ast_counts[cpu])++; 388184198Skmacy#endif 389184198Skmacy /* Nothing to do for AST */ 390184198Skmacy } 391184198Skmacy return (FILTER_HANDLED); 392184112Skmacy} 393184112Skmacy 394184112Skmacystruct _call_data { 395184224Skmacy uint16_t func_id; 396184224Skmacy uint16_t wait; 397184112Skmacy uintptr_t arg1; 398184112Skmacy uintptr_t arg2; 399184112Skmacy atomic_t started; 400184112Skmacy atomic_t finished; 401184112Skmacy}; 402184112Skmacy 403184112Skmacystatic struct _call_data *call_data; 404184112Skmacy 405184198Skmacystatic int 406184112Skmacysmp_call_function_interrupt(void *unused) 407184112Skmacy{ 408184224Skmacy call_data_func_t *func; 409184112Skmacy uintptr_t arg1 = call_data->arg1; 410184112Skmacy uintptr_t arg2 = call_data->arg2; 411184112Skmacy int wait = call_data->wait; 412184224Skmacy atomic_t *started = &call_data->started; 413184224Skmacy atomic_t *finished = &call_data->finished; 414184112Skmacy 415193154Sadrian /* We only handle function IPIs, not bitmap IPIs */ 416193154Sadrian if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR) 417184224Skmacy panic("invalid function id %u", call_data->func_id); 418184224Skmacy 419193154Sadrian func = ipi_vectors[call_data->func_id - APIC_IPI_INTS]; 420184112Skmacy /* 421184112Skmacy * Notify initiating CPU that I've grabbed the data and am 422184112Skmacy * about to execute the function 423184112Skmacy */ 424184112Skmacy mb(); 425184224Skmacy atomic_inc(started); 426184112Skmacy /* 427184112Skmacy * At this point the info structure may be out of scope unless wait==1 428184112Skmacy */ 429184112Skmacy (*func)(arg1, arg2); 430184112Skmacy 431184112Skmacy if (wait) { 432184112Skmacy mb(); 433184224Skmacy atomic_inc(finished); 434184112Skmacy } 435184224Skmacy atomic_add_int(&smp_tlb_wait, 1); 436184198Skmacy return (FILTER_HANDLED); 437184112Skmacy} 438184112Skmacy 439184112Skmacy/* 440182902Skmacy * Print various information about the SMP system hardware and setup. 441182902Skmacy */ 442182902Skmacyvoid 443182902Skmacycpu_mp_announce(void) 444182902Skmacy{ 445182902Skmacy int i, x; 446182902Skmacy 447182902Skmacy /* List CPUs */ 448182902Skmacy printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); 449182902Skmacy for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { 450182902Skmacy if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) 451182902Skmacy continue; 452182902Skmacy if (cpu_info[x].cpu_disabled) 453182902Skmacy printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); 454182902Skmacy else { 455182902Skmacy KASSERT(i < mp_ncpus, 456182902Skmacy ("mp_ncpus and actual cpus are out of whack")); 457182902Skmacy printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); 458182902Skmacy } 459182902Skmacy } 460182902Skmacy} 461182902Skmacy 462184112Skmacystatic int 463184112Skmacyxen_smp_intr_init(unsigned int cpu) 464184112Skmacy{ 465184112Skmacy int rc; 466186557Skmacy unsigned int irq; 467186557Skmacy 468184112Skmacy per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; 469184112Skmacy 470184112Skmacy sprintf(resched_name[cpu], "resched%u", cpu); 471184112Skmacy rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, 472184112Skmacy cpu, 473184112Skmacy resched_name[cpu], 474184112Skmacy smp_reschedule_interrupt, 475217072Sjhb INTR_TYPE_TTY, &irq); 476184112Skmacy 477193082Sadrian printf("[XEN] IPI cpu=%d irq=%d vector=RESCHEDULE_VECTOR (%d)\n", 478193082Sadrian cpu, irq, RESCHEDULE_VECTOR); 479184198Skmacy 480186557Skmacy per_cpu(resched_irq, cpu) = irq; 481184112Skmacy 482184112Skmacy sprintf(callfunc_name[cpu], "callfunc%u", cpu); 483184112Skmacy rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, 484184112Skmacy cpu, 485184112Skmacy callfunc_name[cpu], 486184112Skmacy smp_call_function_interrupt, 487217072Sjhb INTR_TYPE_TTY, &irq); 488184112Skmacy if (rc < 0) 489184112Skmacy goto fail; 490186557Skmacy per_cpu(callfunc_irq, cpu) = irq; 491184112Skmacy 492193082Sadrian printf("[XEN] IPI cpu=%d irq=%d vector=CALL_FUNCTION_VECTOR (%d)\n", 493193082Sadrian cpu, irq, CALL_FUNCTION_VECTOR); 494184198Skmacy 495184198Skmacy 496184112Skmacy if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) 497184112Skmacy goto fail; 498184112Skmacy 499184112Skmacy return 0; 500184112Skmacy 501184112Skmacy fail: 502184112Skmacy if (per_cpu(resched_irq, cpu) >= 0) 503186557Skmacy unbind_from_irqhandler(per_cpu(resched_irq, cpu)); 504184112Skmacy if (per_cpu(callfunc_irq, cpu) >= 0) 505186557Skmacy unbind_from_irqhandler(per_cpu(callfunc_irq, cpu)); 506184112Skmacy return rc; 507184112Skmacy} 508184112Skmacy 509184198Skmacystatic void 510184198Skmacyxen_smp_intr_init_cpus(void *unused) 511184198Skmacy{ 512184198Skmacy int i; 513184198Skmacy 514184198Skmacy for (i = 0; i < mp_ncpus; i++) 515184198Skmacy xen_smp_intr_init(i); 516184198Skmacy} 517184198Skmacy 518182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) 519182902Skmacy 520182902Skmacy/* 521182902Skmacy * AP CPU's call this to initialize themselves. 522182902Skmacy */ 523182902Skmacyvoid 524182902Skmacyinit_secondary(void) 525182902Skmacy{ 526182902Skmacy vm_offset_t addr; 527223758Sattilio u_int cpuid; 528182902Skmacy int gsel_tss; 529182902Skmacy 530182902Skmacy 531182902Skmacy /* bootAP is set in start_ap() to our ID. */ 532182902Skmacy PCPU_SET(currentldt, _default_ldt); 533182902Skmacy gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); 534182902Skmacy#if 0 535182902Skmacy gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; 536182902Skmacy#endif 537182902Skmacy PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ 538182902Skmacy PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); 539182902Skmacy PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); 540182902Skmacy#if 0 541182902Skmacy PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd); 542182902Skmacy 543182902Skmacy PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); 544182902Skmacy#endif 545182902Skmacy PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); 546182902Skmacy 547182902Skmacy /* 548182902Skmacy * Set to a known state: 549182902Skmacy * Set by mpboot.s: CR0_PG, CR0_PE 550182902Skmacy * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM 551182902Skmacy */ 552182902Skmacy /* 553182902Skmacy * signal our startup to the BSP. 554182902Skmacy */ 555182902Skmacy mp_naps++; 556182902Skmacy 557182902Skmacy /* Spin until the BSP releases the AP's. */ 558182902Skmacy while (!aps_ready) 559182902Skmacy ia32_pause(); 560182902Skmacy 561182902Skmacy /* BSP may have changed PTD while we were waiting */ 562182902Skmacy invltlb(); 563182902Skmacy for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) 564182902Skmacy invlpg(addr); 565182902Skmacy 566182902Skmacy /* set up FPU state on the AP */ 567189420Sjhb npxinit(); 568182902Skmacy#if 0 569182902Skmacy 570182902Skmacy /* set up SSE registers */ 571182902Skmacy enable_sse(); 572182902Skmacy#endif 573182902Skmacy#if 0 && defined(PAE) 574182902Skmacy /* Enable the PTE no-execute bit. */ 575182902Skmacy if ((amd_feature & AMDID_NX) != 0) { 576182902Skmacy uint64_t msr; 577182902Skmacy 578182902Skmacy msr = rdmsr(MSR_EFER) | EFER_NXE; 579182902Skmacy wrmsr(MSR_EFER, msr); 580182902Skmacy } 581182902Skmacy#endif 582182902Skmacy#if 0 583182902Skmacy /* A quick check from sanity claus */ 584182902Skmacy if (PCPU_GET(apic_id) != lapic_id()) { 585182902Skmacy printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); 586182902Skmacy printf("SMP: actual apic_id = %d\n", lapic_id()); 587182902Skmacy printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); 588182902Skmacy panic("cpuid mismatch! boom!!"); 589182902Skmacy } 590182902Skmacy#endif 591182902Skmacy 592182902Skmacy /* Initialize curthread. */ 593182902Skmacy KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); 594182902Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 595182902Skmacy 596182902Skmacy mtx_lock_spin(&ap_boot_mtx); 597182902Skmacy#if 0 598182902Skmacy 599182902Skmacy /* Init local apic for irq's */ 600182902Skmacy lapic_setup(1); 601182902Skmacy#endif 602182902Skmacy smp_cpus++; 603182902Skmacy 604223758Sattilio cpuid = PCPU_GET(cpuid); 605223758Sattilio CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid); 606223758Sattilio printf("SMP: AP CPU #%d Launched!\n", cpuid); 607182902Skmacy 608182902Skmacy /* Determine if we are a logical CPU. */ 609182902Skmacy if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) 610223758Sattilio CPU_SET(cpuid, &logical_cpus_mask); 611182902Skmacy 612182902Skmacy /* Determine if we are a hyperthread. */ 613182902Skmacy if (hyperthreading_cpus > 1 && 614182902Skmacy PCPU_GET(apic_id) % hyperthreading_cpus != 0) 615223758Sattilio CPU_SET(cpuid, &hyperthreading_cpus_mask); 616182902Skmacy#if 0 617182902Skmacy if (bootverbose) 618182902Skmacy lapic_dump("AP"); 619182902Skmacy#endif 620182902Skmacy if (smp_cpus == mp_ncpus) { 621182902Skmacy /* enable IPI's, tlb shootdown, freezes etc */ 622182902Skmacy atomic_store_rel_int(&smp_started, 1); 623182902Skmacy smp_active = 1; /* historic */ 624182902Skmacy } 625182902Skmacy 626182902Skmacy mtx_unlock_spin(&ap_boot_mtx); 627182902Skmacy 628182902Skmacy /* wait until all the AP's are up */ 629182902Skmacy while (smp_started == 0) 630182902Skmacy ia32_pause(); 631182902Skmacy 632183131Skmacy PCPU_SET(curthread, PCPU_GET(idlethread)); 633221835Smav 634221835Smav /* Start per-CPU event timers. */ 635221835Smav cpu_initclocks_ap(); 636221835Smav 637182902Skmacy /* enter the scheduler */ 638182902Skmacy sched_throw(NULL); 639182902Skmacy 640182902Skmacy panic("scheduler returned us to %s", __func__); 641182902Skmacy /* NOTREACHED */ 642182902Skmacy} 643182902Skmacy 644182902Skmacy/******************************************************************* 645182902Skmacy * local functions and data 646182902Skmacy */ 647182902Skmacy 648182902Skmacy/* 649182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive 650182902Skmacy * interrupts. If we don't want certain CPUs to receive IRQs we 651182902Skmacy * can simply not tell the I/O APIC code about them in this function. 652182902Skmacy * We also do not tell it about the BSP since it tells itself about 653182902Skmacy * the BSP internally to work with UP kernels and on UP machines. 654182902Skmacy */ 655182902Skmacystatic void 656182902Skmacyset_interrupt_apic_ids(void) 657182902Skmacy{ 658182902Skmacy u_int i, apic_id; 659182902Skmacy 660182902Skmacy for (i = 0; i < MAXCPU; i++) { 661182902Skmacy apic_id = cpu_apic_ids[i]; 662182902Skmacy if (apic_id == -1) 663182902Skmacy continue; 664182902Skmacy if (cpu_info[apic_id].cpu_bsp) 665182902Skmacy continue; 666182902Skmacy if (cpu_info[apic_id].cpu_disabled) 667182902Skmacy continue; 668182902Skmacy 669182902Skmacy /* Don't let hyperthreads service interrupts. */ 670182902Skmacy if (hyperthreading_cpus > 1 && 671182902Skmacy apic_id % hyperthreading_cpus != 0) 672182902Skmacy continue; 673182902Skmacy 674182902Skmacy intr_add_cpu(i); 675182902Skmacy } 676182902Skmacy} 677182902Skmacy 678182902Skmacy/* 679182902Skmacy * Assign logical CPU IDs to local APICs. 680182902Skmacy */ 681182902Skmacystatic void 682182902Skmacyassign_cpu_ids(void) 683182902Skmacy{ 684182902Skmacy u_int i; 685182902Skmacy 686182902Skmacy /* Check for explicitly disabled CPUs. */ 687182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 688182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) 689182902Skmacy continue; 690182902Skmacy 691182902Skmacy /* Don't use this CPU if it has been disabled by a tunable. */ 692182902Skmacy if (resource_disabled("lapic", i)) { 693182902Skmacy cpu_info[i].cpu_disabled = 1; 694182902Skmacy continue; 695182902Skmacy } 696182902Skmacy } 697182902Skmacy 698182902Skmacy /* 699182902Skmacy * Assign CPU IDs to local APIC IDs and disable any CPUs 700182902Skmacy * beyond MAXCPU. CPU 0 has already been assigned to the BSP, 701182902Skmacy * so we only have to assign IDs for APs. 702182902Skmacy */ 703182902Skmacy mp_ncpus = 1; 704182902Skmacy for (i = 0; i <= MAX_APIC_ID; i++) { 705182902Skmacy if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || 706182902Skmacy cpu_info[i].cpu_disabled) 707182902Skmacy continue; 708182902Skmacy 709182902Skmacy if (mp_ncpus < MAXCPU) { 710182902Skmacy cpu_apic_ids[mp_ncpus] = i; 711187966Sbz apic_cpuids[i] = mp_ncpus; 712182902Skmacy mp_ncpus++; 713182902Skmacy } else 714182902Skmacy cpu_info[i].cpu_disabled = 1; 715182902Skmacy } 716182902Skmacy KASSERT(mp_maxid >= mp_ncpus - 1, 717182902Skmacy ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, 718182902Skmacy mp_ncpus)); 719182902Skmacy} 720182902Skmacy 721182902Skmacy/* 722182902Skmacy * start each AP in our list 723182902Skmacy */ 724182902Skmacy/* Lowest 1MB is already mapped: don't touch*/ 725182902Skmacy#define TMPMAP_START 1 726182902Skmacyint 727182902Skmacystart_all_aps(void) 728182902Skmacy{ 729182902Skmacy int x,apic_id, cpu; 730182902Skmacy struct pcpu *pc; 731182902Skmacy 732182902Skmacy mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); 733182902Skmacy 734182902Skmacy /* set up temporary P==V mapping for AP boot */ 735182902Skmacy /* XXX this is a hack, we should boot the AP on its own stack/PTD */ 736182902Skmacy 737182902Skmacy /* start each AP */ 738182902Skmacy for (cpu = 1; cpu < mp_ncpus; cpu++) { 739182902Skmacy apic_id = cpu_apic_ids[cpu]; 740182902Skmacy 741182902Skmacy 742182902Skmacy bootAP = cpu; 743182902Skmacy bootAPgdt = gdt + (512*cpu); 744182902Skmacy 745182902Skmacy /* Get per-cpu data */ 746182902Skmacy pc = &__pcpu[bootAP]; 747183132Skmacy pcpu_init(pc, bootAP, sizeof(struct pcpu)); 748194784Sjeff dpcpu_init((void *)kmem_alloc(kernel_map, DPCPU_SIZE), bootAP); 749182902Skmacy pc->pc_apic_id = cpu_apic_ids[bootAP]; 750182902Skmacy pc->pc_prvspace = pc; 751182902Skmacy pc->pc_curthread = 0; 752182902Skmacy 753182902Skmacy gdt_segs[GPRIV_SEL].ssd_base = (int) pc; 754182902Skmacy gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; 755182902Skmacy 756215587Scperciva PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW); 757182902Skmacy bzero(bootAPgdt, PAGE_SIZE); 758182902Skmacy for (x = 0; x < NGDT; x++) 759182902Skmacy ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd); 760182902Skmacy PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); 761183345Skmacy#ifdef notyet 762183345Skmacy 763183345Skmacy if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 764183345Skmacy apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 765183345Skmacy acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 766183345Skmacy#ifdef CONFIG_ACPI 767183345Skmacy if (acpiid != 0xff) 768183345Skmacy x86_acpiid_to_apicid[acpiid] = apicid; 769183345Skmacy#endif 770183345Skmacy } 771183345Skmacy#endif 772183345Skmacy 773182902Skmacy /* attempt to start the Application Processor */ 774182902Skmacy if (!start_ap(cpu)) { 775182902Skmacy printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); 776182902Skmacy /* better panic as the AP may be running loose */ 777182902Skmacy printf("panic y/n? [y] "); 778182902Skmacy if (cngetc() != 'n') 779182902Skmacy panic("bye-bye"); 780182902Skmacy } 781182902Skmacy 782222813Sattilio CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ 783182902Skmacy } 784182902Skmacy 785182902Skmacy 786182902Skmacy pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); 787182902Skmacy 788182902Skmacy /* number of APs actually started */ 789182902Skmacy return mp_naps; 790182902Skmacy} 791182902Skmacy 792182902Skmacyextern uint8_t *pcpu_boot_stack; 793182902Skmacyextern trap_info_t trap_table[]; 794182902Skmacy 795182902Skmacystatic void 796182902Skmacysmp_trap_init(trap_info_t *trap_ctxt) 797182902Skmacy{ 798182902Skmacy const trap_info_t *t = trap_table; 799182902Skmacy 800182902Skmacy for (t = trap_table; t->address; t++) { 801182902Skmacy trap_ctxt[t->vector].flags = t->flags; 802182902Skmacy trap_ctxt[t->vector].cs = t->cs; 803182902Skmacy trap_ctxt[t->vector].address = t->address; 804182902Skmacy } 805182902Skmacy} 806182902Skmacy 807182902Skmacyextern int nkpt; 808184112Skmacystatic void 809182902Skmacycpu_initialize_context(unsigned int cpu) 810182902Skmacy{ 811182902Skmacy /* vcpu_guest_context_t is too large to allocate on the stack. 812182902Skmacy * Hence we allocate statically and protect it with a lock */ 813182902Skmacy vm_page_t m[4]; 814182902Skmacy static vcpu_guest_context_t ctxt; 815182902Skmacy vm_offset_t boot_stack; 816183131Skmacy vm_offset_t newPTD; 817183131Skmacy vm_paddr_t ma[NPGPTD]; 818182902Skmacy static int color; 819182902Skmacy int i; 820182902Skmacy 821182902Skmacy /* 822183131Skmacy * Page 0,[0-3] PTD 823183131Skmacy * Page 1, [4] boot stack 824183131Skmacy * Page [5] PDPT 825182902Skmacy * 826182902Skmacy */ 827183131Skmacy for (i = 0; i < NPGPTD + 2; i++) { 828182902Skmacy m[i] = vm_page_alloc(NULL, color++, 829182902Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 830182902Skmacy VM_ALLOC_ZERO); 831182902Skmacy 832182902Skmacy pmap_zero_page(m[i]); 833182902Skmacy 834182902Skmacy } 835183131Skmacy boot_stack = kmem_alloc_nofault(kernel_map, 1); 836183131Skmacy newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); 837215587Scperciva ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; 838182902Skmacy 839183131Skmacy#ifdef PAE 840183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); 841183131Skmacy for (i = 0; i < NPGPTD; i++) { 842183131Skmacy ((vm_paddr_t *)boot_stack)[i] = 843215587Scperciva ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; 844182902Skmacy } 845183131Skmacy#endif 846182902Skmacy 847182902Skmacy /* 848182902Skmacy * Copy cpu0 IdlePTD to new IdlePTD - copying only 849182902Skmacy * kernel mappings 850182902Skmacy */ 851183131Skmacy pmap_qenter(newPTD, m, 4); 852183131Skmacy 853183131Skmacy memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), 854183131Skmacy (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), 855182902Skmacy nkpt*sizeof(vm_paddr_t)); 856183131Skmacy 857183131Skmacy pmap_qremove(newPTD, 4); 858183131Skmacy kmem_free(kernel_map, newPTD, 4); 859182902Skmacy /* 860182902Skmacy * map actual idle stack to boot_stack 861182902Skmacy */ 862183131Skmacy pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); 863182902Skmacy 864182902Skmacy 865215587Scperciva xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); 866182902Skmacy vm_page_lock_queues(); 867182902Skmacy for (i = 0; i < 4; i++) { 868183131Skmacy int pdir = (PTDPTDI + i) / NPDEPG; 869183131Skmacy int curoffset = (PTDPTDI + i) % NPDEPG; 870183131Skmacy 871182902Skmacy xen_queue_pt_update((vm_paddr_t) 872183131Skmacy ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 873182902Skmacy ma[i]); 874182902Skmacy } 875182902Skmacy PT_UPDATES_FLUSH(); 876182902Skmacy vm_page_unlock_queues(); 877182902Skmacy 878182902Skmacy memset(&ctxt, 0, sizeof(ctxt)); 879182902Skmacy ctxt.flags = VGCF_IN_KERNEL; 880182902Skmacy ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 881182902Skmacy ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 882182902Skmacy ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); 883182902Skmacy ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); 884182902Skmacy ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 885182902Skmacy ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 886182902Skmacy ctxt.user_regs.eip = (unsigned long)init_secondary; 887182902Skmacy ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ 888182902Skmacy 889182902Skmacy memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 890182902Skmacy 891182902Skmacy smp_trap_init(ctxt.trap_ctxt); 892182902Skmacy 893182902Skmacy ctxt.ldt_ents = 0; 894182902Skmacy ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); 895182902Skmacy ctxt.gdt_ents = 512; 896182902Skmacy 897182902Skmacy#ifdef __i386__ 898182902Skmacy ctxt.user_regs.esp = boot_stack + PAGE_SIZE; 899182902Skmacy 900182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 901182902Skmacy ctxt.kernel_sp = boot_stack + PAGE_SIZE; 902182902Skmacy 903182902Skmacy ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 904182902Skmacy ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; 905182902Skmacy ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 906182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 907182902Skmacy 908215587Scperciva ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); 909182902Skmacy#else /* __x86_64__ */ 910182902Skmacy ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); 911182902Skmacy ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 912182902Skmacy ctxt.kernel_sp = idle->thread.rsp0; 913182902Skmacy 914182902Skmacy ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 915182902Skmacy ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 916182902Skmacy ctxt.syscall_callback_eip = (unsigned long)system_call; 917182902Skmacy 918182902Skmacy ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); 919182902Skmacy 920182902Skmacy ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); 921182902Skmacy#endif 922182902Skmacy 923182902Skmacy printf("gdtpfn=%lx pdptpfn=%lx\n", 924182902Skmacy ctxt.gdt_frames[0], 925182902Skmacy ctxt.ctrlreg[3] >> PAGE_SHIFT); 926182902Skmacy 927182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); 928182902Skmacy DELAY(3000); 929182902Skmacy PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); 930182902Skmacy} 931182902Skmacy 932182902Skmacy/* 933182902Skmacy * This function starts the AP (application processor) identified 934182902Skmacy * by the APIC ID 'physicalCpu'. It does quite a "song and dance" 935182902Skmacy * to accomplish this. This is necessary because of the nuances 936182902Skmacy * of the different hardware we might encounter. It isn't pretty, 937182902Skmacy * but it seems to work. 938182902Skmacy */ 939183131Skmacy 940183131Skmacyint cpus; 941182902Skmacystatic int 942182902Skmacystart_ap(int apic_id) 943182902Skmacy{ 944182902Skmacy int ms; 945182902Skmacy 946182902Skmacy /* used as a watchpoint to signal AP startup */ 947182902Skmacy cpus = mp_naps; 948182902Skmacy 949182902Skmacy cpu_initialize_context(apic_id); 950182902Skmacy 951182902Skmacy /* Wait up to 5 seconds for it to start. */ 952182902Skmacy for (ms = 0; ms < 5000; ms++) { 953182902Skmacy if (mp_naps > cpus) 954182902Skmacy return 1; /* return SUCCESS */ 955182902Skmacy DELAY(1000); 956182902Skmacy } 957182902Skmacy return 0; /* return FAILURE */ 958182902Skmacy} 959182902Skmacy 960182902Skmacy/* 961222065Sattilio * send an IPI to a specific CPU. 962222065Sattilio */ 963222065Sattiliostatic void 964222065Sattilioipi_send_cpu(int cpu, u_int ipi) 965222065Sattilio{ 966222065Sattilio u_int bitmap, old_pending, new_pending; 967222065Sattilio 968222065Sattilio if (IPI_IS_BITMAPED(ipi)) { 969222065Sattilio bitmap = 1 << ipi; 970222065Sattilio ipi = IPI_BITMAP_VECTOR; 971222065Sattilio do { 972222065Sattilio old_pending = cpu_ipi_pending[cpu]; 973222065Sattilio new_pending = old_pending | bitmap; 974222065Sattilio } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], 975222065Sattilio old_pending, new_pending)); 976222065Sattilio if (!old_pending) 977222065Sattilio ipi_pcpu(cpu, RESCHEDULE_VECTOR); 978222065Sattilio } else { 979222065Sattilio KASSERT(call_data != NULL, ("call_data not set")); 980222065Sattilio ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); 981222065Sattilio } 982222065Sattilio} 983222065Sattilio 984222065Sattilio/* 985182902Skmacy * Flush the TLB on all other CPU's 986182902Skmacy */ 987182902Skmacystatic void 988182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) 989182902Skmacy{ 990182902Skmacy u_int ncpu; 991184198Skmacy struct _call_data data; 992182902Skmacy 993182902Skmacy ncpu = mp_ncpus - 1; /* does not shootdown self */ 994182902Skmacy if (ncpu < 1) 995182902Skmacy return; /* no other cpus */ 996182902Skmacy if (!(read_eflags() & PSL_I)) 997182902Skmacy panic("%s: interrupts disabled", __func__); 998182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 999193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1000193098Sadrian call_data = &data; 1001184224Skmacy call_data->func_id = vector; 1002184112Skmacy call_data->arg1 = addr1; 1003184112Skmacy call_data->arg2 = addr2; 1004182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1005182902Skmacy ipi_all_but_self(vector); 1006182902Skmacy while (smp_tlb_wait < ncpu) 1007182902Skmacy ia32_pause(); 1008184224Skmacy call_data = NULL; 1009182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1010182902Skmacy} 1011182902Skmacy 1012182902Skmacystatic void 1013222813Sattiliosmp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) 1014182902Skmacy{ 1015222813Sattilio int cpu, ncpu, othercpus; 1016184224Skmacy struct _call_data data; 1017182902Skmacy 1018182902Skmacy othercpus = mp_ncpus - 1; 1019222813Sattilio if (CPU_ISFULLSET(&mask)) { 1020222813Sattilio if (othercpus < 1) 1021182902Skmacy return; 1022182902Skmacy } else { 1023223758Sattilio CPU_CLR(PCPU_GET(cpuid), &mask); 1024222813Sattilio if (CPU_EMPTY(&mask)) 1025182902Skmacy return; 1026182902Skmacy } 1027182902Skmacy if (!(read_eflags() & PSL_I)) 1028182902Skmacy panic("%s: interrupts disabled", __func__); 1029182902Skmacy mtx_lock_spin(&smp_ipi_mtx); 1030193098Sadrian KASSERT(call_data == NULL, ("call_data isn't null?!")); 1031184224Skmacy call_data = &data; 1032184224Skmacy call_data->func_id = vector; 1033184224Skmacy call_data->arg1 = addr1; 1034184224Skmacy call_data->arg2 = addr2; 1035182902Skmacy atomic_store_rel_int(&smp_tlb_wait, 0); 1036222813Sattilio if (CPU_ISFULLSET(&mask)) { 1037222813Sattilio ncpu = othercpus; 1038182902Skmacy ipi_all_but_self(vector); 1039222813Sattilio } else { 1040222813Sattilio ncpu = 0; 1041256207Smav while ((cpu = CPU_FFS(&mask)) != 0) { 1042222813Sattilio cpu--; 1043222813Sattilio CPU_CLR(cpu, &mask); 1044222813Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, 1045222813Sattilio vector); 1046222813Sattilio ipi_send_cpu(cpu, vector); 1047222813Sattilio ncpu++; 1048222813Sattilio } 1049222813Sattilio } 1050182902Skmacy while (smp_tlb_wait < ncpu) 1051182902Skmacy ia32_pause(); 1052184224Skmacy call_data = NULL; 1053182902Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1054182902Skmacy} 1055182902Skmacy 1056182902Skmacyvoid 1057182902Skmacysmp_cache_flush(void) 1058182902Skmacy{ 1059182902Skmacy 1060182902Skmacy if (smp_started) 1061182902Skmacy smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); 1062182902Skmacy} 1063182902Skmacy 1064182902Skmacyvoid 1065182902Skmacysmp_invltlb(void) 1066182902Skmacy{ 1067182902Skmacy 1068182902Skmacy if (smp_started) { 1069182902Skmacy smp_tlb_shootdown(IPI_INVLTLB, 0, 0); 1070182902Skmacy } 1071182902Skmacy} 1072182902Skmacy 1073182902Skmacyvoid 1074182902Skmacysmp_invlpg(vm_offset_t addr) 1075182902Skmacy{ 1076182902Skmacy 1077182902Skmacy if (smp_started) { 1078182902Skmacy smp_tlb_shootdown(IPI_INVLPG, addr, 0); 1079182902Skmacy } 1080182902Skmacy} 1081182902Skmacy 1082182902Skmacyvoid 1083182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) 1084182902Skmacy{ 1085182902Skmacy 1086182902Skmacy if (smp_started) { 1087182902Skmacy smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); 1088182902Skmacy } 1089182902Skmacy} 1090182902Skmacy 1091182902Skmacyvoid 1092222813Sattiliosmp_masked_invltlb(cpuset_t mask) 1093182902Skmacy{ 1094182902Skmacy 1095182902Skmacy if (smp_started) { 1096182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); 1097182902Skmacy } 1098182902Skmacy} 1099182902Skmacy 1100182902Skmacyvoid 1101222813Sattiliosmp_masked_invlpg(cpuset_t mask, vm_offset_t addr) 1102182902Skmacy{ 1103182902Skmacy 1104182902Skmacy if (smp_started) { 1105182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); 1106182902Skmacy } 1107182902Skmacy} 1108182902Skmacy 1109182902Skmacyvoid 1110222813Sattiliosmp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) 1111182902Skmacy{ 1112182902Skmacy 1113182902Skmacy if (smp_started) { 1114182902Skmacy smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); 1115182902Skmacy } 1116182902Skmacy} 1117182902Skmacy 1118182902Skmacy/* 1119182902Skmacy * send an IPI to a set of cpus. 1120182902Skmacy */ 1121182902Skmacyvoid 1122222813Sattilioipi_selected(cpuset_t cpus, u_int ipi) 1123182902Skmacy{ 1124182902Skmacy int cpu; 1125182902Skmacy 1126196256Sattilio /* 1127196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1128196256Sattilio * of help in order to understand what is the source. 1129196256Sattilio * Set the mask of receiving CPUs for this purpose. 1130196256Sattilio */ 1131196256Sattilio if (ipi == IPI_STOP_HARD) 1132222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); 1133196256Sattilio 1134256207Smav while ((cpu = CPU_FFS(&cpus)) != 0) { 1135182902Skmacy cpu--; 1136222813Sattilio CPU_CLR(cpu, &cpus); 1137222065Sattilio CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1138222065Sattilio ipi_send_cpu(cpu, ipi); 1139182902Skmacy } 1140182902Skmacy} 1141182902Skmacy 1142182902Skmacy/* 1143210939Sjhb * send an IPI to a specific CPU. 1144210939Sjhb */ 1145210939Sjhbvoid 1146210939Sjhbipi_cpu(int cpu, u_int ipi) 1147210939Sjhb{ 1148210939Sjhb 1149210939Sjhb /* 1150210939Sjhb * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1151210939Sjhb * of help in order to understand what is the source. 1152210939Sjhb * Set the mask of receiving CPUs for this purpose. 1153210939Sjhb */ 1154210939Sjhb if (ipi == IPI_STOP_HARD) 1155222813Sattilio CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); 1156210939Sjhb 1157210939Sjhb CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); 1158222065Sattilio ipi_send_cpu(cpu, ipi); 1159210939Sjhb} 1160210939Sjhb 1161210939Sjhb/* 1162182902Skmacy * send an IPI to all CPUs EXCEPT myself 1163182902Skmacy */ 1164182902Skmacyvoid 1165182902Skmacyipi_all_but_self(u_int ipi) 1166182902Skmacy{ 1167222813Sattilio cpuset_t other_cpus; 1168196256Sattilio 1169196256Sattilio /* 1170196256Sattilio * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit 1171196256Sattilio * of help in order to understand what is the source. 1172196256Sattilio * Set the mask of receiving CPUs for this purpose. 1173196256Sattilio */ 1174223758Sattilio other_cpus = all_cpus; 1175223758Sattilio CPU_CLR(PCPU_GET(cpuid), &other_cpus); 1176196256Sattilio if (ipi == IPI_STOP_HARD) 1177222813Sattilio CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); 1178196256Sattilio 1179182902Skmacy CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); 1180222813Sattilio ipi_selected(other_cpus, ipi); 1181182902Skmacy} 1182182902Skmacy 1183196256Sattilioint 1184196256Sattilioipi_nmi_handler() 1185196256Sattilio{ 1186223758Sattilio u_int cpuid; 1187196256Sattilio 1188196256Sattilio /* 1189196256Sattilio * As long as there is not a simple way to know about a NMI's 1190196256Sattilio * source, if the bitmask for the current CPU is present in 1191196256Sattilio * the global pending bitword an IPI_STOP_HARD has been issued 1192196256Sattilio * and should be handled. 1193196256Sattilio */ 1194223758Sattilio cpuid = PCPU_GET(cpuid); 1195223758Sattilio if (!CPU_ISSET(cpuid, &ipi_nmi_pending)) 1196196256Sattilio return (1); 1197196256Sattilio 1198223758Sattilio CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending); 1199196256Sattilio cpustop_handler(); 1200196256Sattilio return (0); 1201196256Sattilio} 1202196256Sattilio 1203182902Skmacy/* 1204182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we 1205182902Skmacy * are resumed. 1206182902Skmacy */ 1207182902Skmacyvoid 1208182902Skmacycpustop_handler(void) 1209182902Skmacy{ 1210222813Sattilio int cpu; 1211182902Skmacy 1212222813Sattilio cpu = PCPU_GET(cpuid); 1213222813Sattilio 1214182902Skmacy savectx(&stoppcbs[cpu]); 1215182902Skmacy 1216182902Skmacy /* Indicate that we are stopped */ 1217223758Sattilio CPU_SET_ATOMIC(cpu, &stopped_cpus); 1218182902Skmacy 1219182902Skmacy /* Wait for restart */ 1220223758Sattilio while (!CPU_ISSET(cpu, &started_cpus)) 1221182902Skmacy ia32_pause(); 1222182902Skmacy 1223223758Sattilio CPU_CLR_ATOMIC(cpu, &started_cpus); 1224223758Sattilio CPU_CLR_ATOMIC(cpu, &stopped_cpus); 1225182902Skmacy 1226182902Skmacy if (cpu == 0 && cpustop_restartfunc != NULL) { 1227182902Skmacy cpustop_restartfunc(); 1228182902Skmacy cpustop_restartfunc = NULL; 1229182902Skmacy } 1230182902Skmacy} 1231182902Skmacy 1232182902Skmacy/* 1233182902Skmacy * This is called once the rest of the system is up and running and we're 1234182902Skmacy * ready to let the AP's out of the pen. 1235182902Skmacy */ 1236182902Skmacystatic void 1237182902Skmacyrelease_aps(void *dummy __unused) 1238182902Skmacy{ 1239182902Skmacy 1240182902Skmacy if (mp_ncpus == 1) 1241182902Skmacy return; 1242182902Skmacy atomic_store_rel_int(&aps_ready, 1); 1243182902Skmacy while (smp_started == 0) 1244182902Skmacy ia32_pause(); 1245182902Skmacy} 1246182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); 1247184198SkmacySYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); 1248182902Skmacy 1249