mp_machdep.c revision 183345
1182902Skmacy/*-
2182902Skmacy * Copyright (c) 1996, by Steve Passe
3182902Skmacy * Copyright (c) 2008, by Kip Macy
4182902Skmacy * All rights reserved.
5182902Skmacy *
6182902Skmacy * Redistribution and use in source and binary forms, with or without
7182902Skmacy * modification, are permitted provided that the following conditions
8182902Skmacy * are met:
9182902Skmacy * 1. Redistributions of source code must retain the above copyright
10182902Skmacy *    notice, this list of conditions and the following disclaimer.
11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products
12182902Skmacy *    derived from this software without specific prior written permission.
13182902Skmacy *
14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17182902Skmacy * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24182902Skmacy * SUCH DAMAGE.
25182902Skmacy */
26182902Skmacy
27182902Skmacy#include <sys/cdefs.h>
28182902Skmacy__FBSDID("$FreeBSD: head/sys/i386/xen/mp_machdep.c 183345 2008-09-25 07:11:04Z kmacy $");
29182902Skmacy
30182902Skmacy#include "opt_apic.h"
31182902Skmacy#include "opt_cpu.h"
32182902Skmacy#include "opt_kstack_pages.h"
33182902Skmacy#include "opt_mp_watchdog.h"
34182902Skmacy#include "opt_sched.h"
35182902Skmacy#include "opt_smp.h"
36182902Skmacy
37182902Skmacy#if !defined(lint)
38182902Skmacy#if !defined(SMP)
39182902Skmacy#error How did you get here?
40182902Skmacy#endif
41182902Skmacy
42182902Skmacy#ifndef DEV_APIC
43182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file.
44182902Skmacy#endif
45182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
46182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG
47182902Skmacy#endif
48182902Skmacy#endif /* not lint */
49182902Skmacy
50182902Skmacy#include <sys/param.h>
51182902Skmacy#include <sys/systm.h>
52182902Skmacy#include <sys/bus.h>
53182902Skmacy#include <sys/cons.h>	/* cngetc() */
54182902Skmacy#ifdef GPROF
55182902Skmacy#include <sys/gmon.h>
56182902Skmacy#endif
57182902Skmacy#include <sys/kernel.h>
58182902Skmacy#include <sys/ktr.h>
59182902Skmacy#include <sys/lock.h>
60182902Skmacy#include <sys/malloc.h>
61182902Skmacy#include <sys/memrange.h>
62182902Skmacy#include <sys/mutex.h>
63182902Skmacy#include <sys/pcpu.h>
64182902Skmacy#include <sys/proc.h>
65182902Skmacy#include <sys/sched.h>
66182902Skmacy#include <sys/smp.h>
67182902Skmacy#include <sys/sysctl.h>
68182902Skmacy
69182902Skmacy#include <vm/vm.h>
70182902Skmacy#include <vm/vm_param.h>
71182902Skmacy#include <vm/pmap.h>
72182902Skmacy#include <vm/vm_kern.h>
73182902Skmacy#include <vm/vm_extern.h>
74182902Skmacy#include <vm/vm_page.h>
75182902Skmacy
76182902Skmacy#include <machine/apicreg.h>
77182902Skmacy#include <machine/md_var.h>
78182902Skmacy#include <machine/mp_watchdog.h>
79182902Skmacy#include <machine/pcb.h>
80182902Skmacy#include <machine/psl.h>
81182902Skmacy#include <machine/smp.h>
82182902Skmacy#include <machine/specialreg.h>
83182902Skmacy#include <machine/pcpu.h>
84182902Skmacy
85182902Skmacy
86182902Skmacy
87182902Skmacy#include <machine/xen/xen-os.h>
88182902Skmacy#include <machine/xen/hypervisor.h>
89182902Skmacy#include <xen/interface/vcpu.h>
90182902Skmacy
91182902Skmacy
92182902Skmacy#define WARMBOOT_TARGET		0
93182902Skmacy#define WARMBOOT_OFF		(KERNBASE + 0x0467)
94182902Skmacy#define WARMBOOT_SEG		(KERNBASE + 0x0469)
95182902Skmacy
96182902Skmacy#define stop_cpus_with_nmi	0
97182902Skmacy
98182902Skmacy
99182902Skmacyint	mp_naps;		/* # of Applications processors */
100182902Skmacyint	boot_cpu_id = -1;	/* designated BSP */
101182902Skmacy
102182902Skmacyextern	struct pcpu __pcpu[];
103182902Skmacy
104182902Skmacystatic int bootAP;
105182902Skmacystatic union descriptor *bootAPgdt;
106182902Skmacy
107182902Skmacy
108182902Skmacy/* Free these after use */
109182902Skmacyvoid *bootstacks[MAXCPU];
110182902Skmacy
111182902Skmacy/* Hotwire a 0->4MB V==P mapping */
112182902Skmacyextern pt_entry_t *KPTphys;
113182902Skmacy
114182902Skmacystruct pcb stoppcbs[MAXCPU];
115182902Skmacy
116182902Skmacy/* Variables needed for SMP tlb shootdown. */
117182902Skmacyvm_offset_t smp_tlb_addr1;
118182902Skmacyvm_offset_t smp_tlb_addr2;
119182902Skmacyvolatile int smp_tlb_wait;
120182902Skmacy
121182902Skmacystatic u_int logical_cpus;
122182902Skmacy
123182902Skmacy/* used to hold the AP's until we are ready to release them */
124182902Skmacystatic struct mtx ap_boot_mtx;
125182902Skmacy
126182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */
127182902Skmacystatic volatile int aps_ready = 0;
128182902Skmacy
129182902Skmacy/*
130182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup
131182902Skmacy * the APs.
132182902Skmacy */
133182902Skmacystruct cpu_info {
134182902Skmacy	int	cpu_present:1;
135182902Skmacy	int	cpu_bsp:1;
136182902Skmacy	int	cpu_disabled:1;
137182902Skmacy} static cpu_info[MAX_APIC_ID + 1];
138182902Skmacyint cpu_apic_ids[MAXCPU];
139182902Skmacy
140182902Skmacy/* Holds pending bitmap based IPIs per CPU */
141182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU];
142182902Skmacy
143182902Skmacystatic u_int boot_address;
144182902Skmacy
145182902Skmacystatic void	assign_cpu_ids(void);
146182902Skmacystatic void	set_interrupt_apic_ids(void);
147182902Skmacyint	start_all_aps(void);
148182902Skmacystatic int	start_ap(int apic_id);
149182902Skmacystatic void	release_aps(void *dummy);
150182902Skmacy
151182902Skmacystatic u_int	hyperthreading_cpus;
152182902Skmacystatic cpumask_t	hyperthreading_cpus_mask;
153182902Skmacy
154182902Skmacyextern void Xhypervisor_callback(void);
155182902Skmacyextern void failsafe_callback(void);
156182902Skmacy
157182902Skmacystruct cpu_group *
158182902Skmacycpu_topo(void)
159182902Skmacy{
160182902Skmacy	if (cpu_cores == 0)
161182902Skmacy		cpu_cores = 1;
162182902Skmacy	if (cpu_logical == 0)
163182902Skmacy		cpu_logical = 1;
164182902Skmacy	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
165182902Skmacy		printf("WARNING: Non-uniform processors.\n");
166182902Skmacy		printf("WARNING: Using suboptimal topology.\n");
167182902Skmacy		return (smp_topo_none());
168182902Skmacy	}
169182902Skmacy	/*
170182902Skmacy	 * No multi-core or hyper-threaded.
171182902Skmacy	 */
172182902Skmacy	if (cpu_logical * cpu_cores == 1)
173182902Skmacy		return (smp_topo_none());
174182902Skmacy	/*
175182902Skmacy	 * Only HTT no multi-core.
176182902Skmacy	 */
177182902Skmacy	if (cpu_logical > 1 && cpu_cores == 1)
178182902Skmacy		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
179182902Skmacy	/*
180182902Skmacy	 * Only multi-core no HTT.
181182902Skmacy	 */
182182902Skmacy	if (cpu_cores > 1 && cpu_logical == 1)
183182902Skmacy		return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
184182902Skmacy	/*
185182902Skmacy	 * Both HTT and multi-core.
186182902Skmacy	 */
187182902Skmacy	return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
188182902Skmacy	    CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
189182902Skmacy}
190182902Skmacy
191182902Skmacy/*
192182902Skmacy * Calculate usable address in base memory for AP trampoline code.
193182902Skmacy */
194182902Skmacyu_int
195182902Skmacymp_bootaddress(u_int basemem)
196182902Skmacy{
197182902Skmacy
198182902Skmacy	return (basemem);
199182902Skmacy}
200182902Skmacy
201182902Skmacyvoid
202182902Skmacycpu_add(u_int apic_id, char boot_cpu)
203182902Skmacy{
204182902Skmacy
205182902Skmacy	if (apic_id > MAX_APIC_ID) {
206182902Skmacy		panic("SMP: APIC ID %d too high", apic_id);
207182902Skmacy		return;
208182902Skmacy	}
209182902Skmacy	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
210182902Skmacy	    apic_id));
211182902Skmacy	cpu_info[apic_id].cpu_present = 1;
212182902Skmacy	if (boot_cpu) {
213182902Skmacy		KASSERT(boot_cpu_id == -1,
214182902Skmacy		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
215182902Skmacy		    boot_cpu_id));
216182902Skmacy		boot_cpu_id = apic_id;
217182902Skmacy		cpu_info[apic_id].cpu_bsp = 1;
218182902Skmacy	}
219182902Skmacy	if (mp_ncpus < MAXCPU)
220182902Skmacy		mp_ncpus++;
221182902Skmacy	if (bootverbose)
222182902Skmacy		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
223182902Skmacy		    "AP");
224182902Skmacy}
225182902Skmacy
226182902Skmacyvoid
227182902Skmacycpu_mp_setmaxid(void)
228182902Skmacy{
229182902Skmacy
230182902Skmacy	mp_maxid = MAXCPU - 1;
231182902Skmacy}
232182902Skmacy
233182902Skmacyint
234182902Skmacycpu_mp_probe(void)
235182902Skmacy{
236182902Skmacy
237182902Skmacy	/*
238182902Skmacy	 * Always record BSP in CPU map so that the mbuf init code works
239182902Skmacy	 * correctly.
240182902Skmacy	 */
241182902Skmacy	all_cpus = 1;
242182902Skmacy	if (mp_ncpus == 0) {
243182902Skmacy		/*
244182902Skmacy		 * No CPUs were found, so this must be a UP system.  Setup
245182902Skmacy		 * the variables to represent a system with a single CPU
246182902Skmacy		 * with an id of 0.
247182902Skmacy		 */
248182902Skmacy		mp_ncpus = 1;
249182902Skmacy		return (0);
250182902Skmacy	}
251182902Skmacy
252182902Skmacy	/* At least one CPU was found. */
253182902Skmacy	if (mp_ncpus == 1) {
254182902Skmacy		/*
255182902Skmacy		 * One CPU was found, so this must be a UP system with
256182902Skmacy		 * an I/O APIC.
257182902Skmacy		 */
258182902Skmacy		return (0);
259182902Skmacy	}
260182902Skmacy
261182902Skmacy	/* At least two CPUs were found. */
262182902Skmacy	return (1);
263182902Skmacy}
264182902Skmacy
265182902Skmacy/*
266182902Skmacy * Initialize the IPI handlers and start up the AP's.
267182902Skmacy */
268182902Skmacyvoid
269182902Skmacycpu_mp_start(void)
270182902Skmacy{
271182902Skmacy	int i;
272182902Skmacy
273182902Skmacy	/* Initialize the logical ID to APIC ID table. */
274182902Skmacy	for (i = 0; i < MAXCPU; i++) {
275182902Skmacy		cpu_apic_ids[i] = -1;
276182902Skmacy		cpu_ipi_pending[i] = 0;
277182902Skmacy	}
278182902Skmacy
279182902Skmacy	/* Set boot_cpu_id if needed. */
280182902Skmacy	if (boot_cpu_id == -1) {
281182902Skmacy		boot_cpu_id = PCPU_GET(apic_id);
282182902Skmacy		cpu_info[boot_cpu_id].cpu_bsp = 1;
283182902Skmacy	} else
284182902Skmacy		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
285182902Skmacy		    ("BSP's APIC ID doesn't match boot_cpu_id"));
286182902Skmacy	cpu_apic_ids[0] = boot_cpu_id;
287182902Skmacy
288182902Skmacy	assign_cpu_ids();
289182902Skmacy
290182902Skmacy	/* Start each Application Processor */
291182902Skmacy	start_all_aps();
292182902Skmacy
293182902Skmacy	/* Setup the initial logical CPUs info. */
294182902Skmacy	logical_cpus = logical_cpus_mask = 0;
295182902Skmacy	if (cpu_feature & CPUID_HTT)
296182902Skmacy		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
297182902Skmacy
298182902Skmacy	set_interrupt_apic_ids();
299182902Skmacy}
300182902Skmacy
301182902Skmacy
302182902Skmacy/*
303182902Skmacy * Print various information about the SMP system hardware and setup.
304182902Skmacy */
305182902Skmacyvoid
306182902Skmacycpu_mp_announce(void)
307182902Skmacy{
308182902Skmacy	int i, x;
309182902Skmacy
310182902Skmacy	/* List CPUs */
311182902Skmacy	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
312182902Skmacy	for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
313182902Skmacy		if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
314182902Skmacy			continue;
315182902Skmacy		if (cpu_info[x].cpu_disabled)
316182902Skmacy			printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
317182902Skmacy		else {
318182902Skmacy			KASSERT(i < mp_ncpus,
319182902Skmacy			    ("mp_ncpus and actual cpus are out of whack"));
320182902Skmacy			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
321182902Skmacy		}
322182902Skmacy	}
323182902Skmacy}
324182902Skmacy
325182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
326182902Skmacy
327182902Skmacy/*
328182902Skmacy * AP CPU's call this to initialize themselves.
329182902Skmacy */
330182902Skmacyvoid
331182902Skmacyinit_secondary(void)
332182902Skmacy{
333182902Skmacy	vm_offset_t addr;
334182902Skmacy	int	gsel_tss;
335182902Skmacy
336182902Skmacy
337182902Skmacy	/* bootAP is set in start_ap() to our ID. */
338182902Skmacy
339182902Skmacy
340182902Skmacy	PCPU_SET(currentldt, _default_ldt);
341182902Skmacy	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
342182902Skmacy#if 0
343182902Skmacy	gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
344182902Skmacy#endif
345182902Skmacy	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
346182902Skmacy	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
347182902Skmacy	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
348182902Skmacy#if 0
349182902Skmacy	PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
350182902Skmacy
351182902Skmacy	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
352182902Skmacy#endif
353182902Skmacy	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
354182902Skmacy
355182902Skmacy	/*
356182902Skmacy	 * Set to a known state:
357182902Skmacy	 * Set by mpboot.s: CR0_PG, CR0_PE
358182902Skmacy	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
359182902Skmacy	 */
360182902Skmacy	/*
361182902Skmacy	 * signal our startup to the BSP.
362182902Skmacy	 */
363182902Skmacy	mp_naps++;
364182902Skmacy
365182902Skmacy	/* Spin until the BSP releases the AP's. */
366182902Skmacy	while (!aps_ready)
367182902Skmacy		ia32_pause();
368182902Skmacy
369182902Skmacy	/* BSP may have changed PTD while we were waiting */
370182902Skmacy	invltlb();
371182902Skmacy	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
372182902Skmacy		invlpg(addr);
373182902Skmacy
374182902Skmacy	/* set up FPU state on the AP */
375182902Skmacy	npxinit(__INITIAL_NPXCW__);
376182902Skmacy#if 0
377182902Skmacy
378182902Skmacy	/* set up SSE registers */
379182902Skmacy	enable_sse();
380182902Skmacy#endif
381182902Skmacy#if 0 && defined(PAE)
382182902Skmacy	/* Enable the PTE no-execute bit. */
383182902Skmacy	if ((amd_feature & AMDID_NX) != 0) {
384182902Skmacy		uint64_t msr;
385182902Skmacy
386182902Skmacy		msr = rdmsr(MSR_EFER) | EFER_NXE;
387182902Skmacy		wrmsr(MSR_EFER, msr);
388182902Skmacy	}
389182902Skmacy#endif
390182902Skmacy#if 0
391182902Skmacy	/* A quick check from sanity claus */
392182902Skmacy	if (PCPU_GET(apic_id) != lapic_id()) {
393182902Skmacy		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
394182902Skmacy		printf("SMP: actual apic_id = %d\n", lapic_id());
395182902Skmacy		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
396182902Skmacy		panic("cpuid mismatch! boom!!");
397182902Skmacy	}
398182902Skmacy#endif
399182902Skmacy
400182902Skmacy	/* Initialize curthread. */
401182902Skmacy	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
402182902Skmacy	PCPU_SET(curthread, PCPU_GET(idlethread));
403182902Skmacy
404182902Skmacy	mtx_lock_spin(&ap_boot_mtx);
405182902Skmacy#if 0
406182902Skmacy
407182902Skmacy	/* Init local apic for irq's */
408182902Skmacy	lapic_setup(1);
409182902Skmacy#endif
410182902Skmacy	smp_cpus++;
411182902Skmacy
412182902Skmacy	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
413182902Skmacy	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
414182902Skmacy
415182902Skmacy	/* Determine if we are a logical CPU. */
416182902Skmacy	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
417182902Skmacy		logical_cpus_mask |= PCPU_GET(cpumask);
418182902Skmacy
419182902Skmacy	/* Determine if we are a hyperthread. */
420182902Skmacy	if (hyperthreading_cpus > 1 &&
421182902Skmacy	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
422182902Skmacy		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
423182902Skmacy
424182902Skmacy	/* Build our map of 'other' CPUs. */
425182902Skmacy	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
426182902Skmacy#if 0
427182902Skmacy	if (bootverbose)
428182902Skmacy		lapic_dump("AP");
429182902Skmacy#endif
430182902Skmacy	if (smp_cpus == mp_ncpus) {
431182902Skmacy		/* enable IPI's, tlb shootdown, freezes etc */
432182902Skmacy		atomic_store_rel_int(&smp_started, 1);
433182902Skmacy		smp_active = 1;	 /* historic */
434182902Skmacy	}
435182902Skmacy
436182902Skmacy	mtx_unlock_spin(&ap_boot_mtx);
437182902Skmacy
438182902Skmacy	/* wait until all the AP's are up */
439182902Skmacy	while (smp_started == 0)
440182902Skmacy		ia32_pause();
441182902Skmacy
442183131Skmacy
443183131Skmacy	PCPU_SET(curthread, PCPU_GET(idlethread));
444182902Skmacy	/* enter the scheduler */
445182902Skmacy	sched_throw(NULL);
446182902Skmacy
447182902Skmacy	panic("scheduler returned us to %s", __func__);
448182902Skmacy	/* NOTREACHED */
449182902Skmacy}
450182902Skmacy
451182902Skmacy/*******************************************************************
452182902Skmacy * local functions and data
453182902Skmacy */
454182902Skmacy
455182902Skmacy/*
456182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive
457182902Skmacy * interrupts.  If we don't want certain CPUs to receive IRQs we
458182902Skmacy * can simply not tell the I/O APIC code about them in this function.
459182902Skmacy * We also do not tell it about the BSP since it tells itself about
460182902Skmacy * the BSP internally to work with UP kernels and on UP machines.
461182902Skmacy */
462182902Skmacystatic void
463182902Skmacyset_interrupt_apic_ids(void)
464182902Skmacy{
465182902Skmacy	u_int i, apic_id;
466182902Skmacy
467182902Skmacy	for (i = 0; i < MAXCPU; i++) {
468182902Skmacy		apic_id = cpu_apic_ids[i];
469182902Skmacy		if (apic_id == -1)
470182902Skmacy			continue;
471182902Skmacy		if (cpu_info[apic_id].cpu_bsp)
472182902Skmacy			continue;
473182902Skmacy		if (cpu_info[apic_id].cpu_disabled)
474182902Skmacy			continue;
475182902Skmacy
476182902Skmacy		/* Don't let hyperthreads service interrupts. */
477182902Skmacy		if (hyperthreading_cpus > 1 &&
478182902Skmacy		    apic_id % hyperthreading_cpus != 0)
479182902Skmacy			continue;
480182902Skmacy
481182902Skmacy		intr_add_cpu(i);
482182902Skmacy	}
483182902Skmacy}
484182902Skmacy
485182902Skmacy/*
486182902Skmacy * Assign logical CPU IDs to local APICs.
487182902Skmacy */
488182902Skmacystatic void
489182902Skmacyassign_cpu_ids(void)
490182902Skmacy{
491182902Skmacy	u_int i;
492182902Skmacy
493182902Skmacy	/* Check for explicitly disabled CPUs. */
494182902Skmacy	for (i = 0; i <= MAX_APIC_ID; i++) {
495182902Skmacy		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
496182902Skmacy			continue;
497182902Skmacy
498182902Skmacy		/* Don't use this CPU if it has been disabled by a tunable. */
499182902Skmacy		if (resource_disabled("lapic", i)) {
500182902Skmacy			cpu_info[i].cpu_disabled = 1;
501182902Skmacy			continue;
502182902Skmacy		}
503182902Skmacy	}
504182902Skmacy
505182902Skmacy	/*
506182902Skmacy	 * Assign CPU IDs to local APIC IDs and disable any CPUs
507182902Skmacy	 * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
508182902Skmacy	 * so we only have to assign IDs for APs.
509182902Skmacy	 */
510182902Skmacy	mp_ncpus = 1;
511182902Skmacy	for (i = 0; i <= MAX_APIC_ID; i++) {
512182902Skmacy		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
513182902Skmacy		    cpu_info[i].cpu_disabled)
514182902Skmacy			continue;
515182902Skmacy
516182902Skmacy		if (mp_ncpus < MAXCPU) {
517182902Skmacy			cpu_apic_ids[mp_ncpus] = i;
518182902Skmacy			mp_ncpus++;
519182902Skmacy		} else
520182902Skmacy			cpu_info[i].cpu_disabled = 1;
521182902Skmacy	}
522182902Skmacy	KASSERT(mp_maxid >= mp_ncpus - 1,
523182902Skmacy	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
524182902Skmacy	    mp_ncpus));
525182902Skmacy}
526182902Skmacy
527182902Skmacy/*
528182902Skmacy * start each AP in our list
529182902Skmacy */
530182902Skmacy/* Lowest 1MB is already mapped: don't touch*/
531182902Skmacy#define TMPMAP_START 1
532182902Skmacyint
533182902Skmacystart_all_aps(void)
534182902Skmacy{
535182902Skmacy	u_int32_t mpbioswarmvec;
536182902Skmacy	int x,apic_id, cpu;
537182902Skmacy	struct pcpu *pc;
538182902Skmacy
539182902Skmacy	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
540182902Skmacy
541182902Skmacy	/* save the current value of the warm-start vector */
542182902Skmacy	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
543182902Skmacy
544182902Skmacy	/* set up temporary P==V mapping for AP boot */
545182902Skmacy	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
546182902Skmacy
547182902Skmacy	/* start each AP */
548182902Skmacy	for (cpu = 1; cpu < mp_ncpus; cpu++) {
549182902Skmacy		apic_id = cpu_apic_ids[cpu];
550182902Skmacy
551182902Skmacy
552182902Skmacy		/* setup a vector to our boot code */
553182902Skmacy		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
554182902Skmacy		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
555182902Skmacy
556182902Skmacy		bootAP = cpu;
557182902Skmacy		bootAPgdt = gdt + (512*cpu);
558182902Skmacy
559182902Skmacy		/* Get per-cpu data */
560182902Skmacy		pc = &__pcpu[bootAP];
561183132Skmacy		pcpu_init(pc, bootAP, sizeof(struct pcpu));
562182902Skmacy		pc->pc_apic_id = cpu_apic_ids[bootAP];
563182902Skmacy		pc->pc_prvspace = pc;
564182902Skmacy		pc->pc_curthread = 0;
565182902Skmacy
566182902Skmacy		gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
567182902Skmacy		gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
568182902Skmacy
569182902Skmacy		PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW);
570182902Skmacy		bzero(bootAPgdt, PAGE_SIZE);
571182902Skmacy		for (x = 0; x < NGDT; x++)
572182902Skmacy			ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
573182902Skmacy		PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
574183345Skmacy#ifdef notyet
575183345Skmacy
576183345Skmacy                if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
577183345Skmacy                        apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
578183345Skmacy                        acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
579183345Skmacy#ifdef CONFIG_ACPI
580183345Skmacy                        if (acpiid != 0xff)
581183345Skmacy                                x86_acpiid_to_apicid[acpiid] = apicid;
582183345Skmacy#endif
583183345Skmacy                }
584183345Skmacy#endif
585183345Skmacy
586182902Skmacy		/* attempt to start the Application Processor */
587182902Skmacy		if (!start_ap(cpu)) {
588182902Skmacy			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
589182902Skmacy			/* better panic as the AP may be running loose */
590182902Skmacy			printf("panic y/n? [y] ");
591182902Skmacy			if (cngetc() != 'n')
592182902Skmacy				panic("bye-bye");
593182902Skmacy		}
594182902Skmacy
595182902Skmacy		all_cpus |= (1 << cpu);		/* record AP in CPU map */
596182902Skmacy	}
597182902Skmacy
598182902Skmacy
599182902Skmacy	/* build our map of 'other' CPUs */
600182902Skmacy	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
601182902Skmacy
602182902Skmacy	/* restore the warmstart vector */
603182902Skmacy	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
604182902Skmacy
605182902Skmacy	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
606182902Skmacy
607182902Skmacy	/* number of APs actually started */
608182902Skmacy	return mp_naps;
609182902Skmacy}
610182902Skmacy
611182902Skmacyextern uint8_t *pcpu_boot_stack;
612182902Skmacyextern trap_info_t trap_table[];
613182902Skmacy
614182902Skmacystatic void
615182902Skmacysmp_trap_init(trap_info_t *trap_ctxt)
616182902Skmacy{
617182902Skmacy        const trap_info_t *t = trap_table;
618182902Skmacy
619182902Skmacy        for (t = trap_table; t->address; t++) {
620182902Skmacy                trap_ctxt[t->vector].flags = t->flags;
621182902Skmacy                trap_ctxt[t->vector].cs = t->cs;
622182902Skmacy                trap_ctxt[t->vector].address = t->address;
623182902Skmacy        }
624182902Skmacy}
625182902Skmacy
626182902Skmacyvoid
627182902Skmacycpu_initialize_context(unsigned int cpu);
628182902Skmacyextern int nkpt;
629182902Skmacy
630182902Skmacyvoid
631182902Skmacycpu_initialize_context(unsigned int cpu)
632182902Skmacy{
633182902Skmacy	/* vcpu_guest_context_t is too large to allocate on the stack.
634182902Skmacy	 * Hence we allocate statically and protect it with a lock */
635182902Skmacy	vm_page_t m[4];
636182902Skmacy	static vcpu_guest_context_t ctxt;
637182902Skmacy	vm_offset_t boot_stack;
638183131Skmacy	vm_offset_t newPTD;
639183131Skmacy	vm_paddr_t ma[NPGPTD];
640182902Skmacy	static int color;
641182902Skmacy	int i;
642182902Skmacy
643182902Skmacy	/*
644183131Skmacy	 * Page 0,[0-3]	PTD
645183131Skmacy	 * Page 1, [4]	boot stack
646183131Skmacy	 * Page [5]	PDPT
647183131Skmacy
648182902Skmacy	 *
649182902Skmacy	 */
650183131Skmacy	for (i = 0; i < NPGPTD + 2; i++) {
651182902Skmacy		m[i] = vm_page_alloc(NULL, color++,
652182902Skmacy		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
653182902Skmacy		    VM_ALLOC_ZERO);
654182902Skmacy
655182902Skmacy		pmap_zero_page(m[i]);
656182902Skmacy
657182902Skmacy	}
658183131Skmacy	boot_stack = kmem_alloc_nofault(kernel_map, 1);
659183131Skmacy	newPTD = kmem_alloc_nofault(kernel_map, NPGPTD);
660183131Skmacy	ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V;
661182902Skmacy
662183131Skmacy#ifdef PAE
663183131Skmacy	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
664183131Skmacy	for (i = 0; i < NPGPTD; i++) {
665183131Skmacy		((vm_paddr_t *)boot_stack)[i] =
666183131Skmacy		ma[i] =
667183131Skmacy		    xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V;
668182902Skmacy	}
669183131Skmacy#endif
670182902Skmacy
671182902Skmacy	/*
672182902Skmacy	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
673182902Skmacy	 * kernel mappings
674182902Skmacy	 */
675183131Skmacy	pmap_qenter(newPTD, m, 4);
676183131Skmacy
677183131Skmacy	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
678183131Skmacy	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
679182902Skmacy	    nkpt*sizeof(vm_paddr_t));
680183131Skmacy
681183131Skmacy	pmap_qremove(newPTD, 4);
682183131Skmacy	kmem_free(kernel_map, newPTD, 4);
683182902Skmacy	/*
684182902Skmacy	 * map actual idle stack to boot_stack
685182902Skmacy	 */
686183131Skmacy	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
687182902Skmacy
688182902Skmacy
689183131Skmacy	xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])));
690182902Skmacy	vm_page_lock_queues();
691182902Skmacy	for (i = 0; i < 4; i++) {
692183131Skmacy		int pdir = (PTDPTDI + i) / NPDEPG;
693183131Skmacy		int curoffset = (PTDPTDI + i) % NPDEPG;
694183131Skmacy
695182902Skmacy		xen_queue_pt_update((vm_paddr_t)
696183131Skmacy		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
697182902Skmacy		    ma[i]);
698182902Skmacy	}
699182902Skmacy	PT_UPDATES_FLUSH();
700182902Skmacy	vm_page_unlock_queues();
701182902Skmacy
702182902Skmacy	memset(&ctxt, 0, sizeof(ctxt));
703182902Skmacy	ctxt.flags = VGCF_IN_KERNEL;
704182902Skmacy	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
705182902Skmacy	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
706182902Skmacy	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
707182902Skmacy	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
708182902Skmacy	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
709182902Skmacy	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
710182902Skmacy	ctxt.user_regs.eip = (unsigned long)init_secondary;
711182902Skmacy	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
712182902Skmacy
713182902Skmacy	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
714182902Skmacy
715182902Skmacy	smp_trap_init(ctxt.trap_ctxt);
716182902Skmacy
717182902Skmacy	ctxt.ldt_ents = 0;
718182902Skmacy	ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
719182902Skmacy	ctxt.gdt_ents      = 512;
720182902Skmacy
721182902Skmacy#ifdef __i386__
722182902Skmacy	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
723182902Skmacy
724182902Skmacy	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
725182902Skmacy	ctxt.kernel_sp = boot_stack + PAGE_SIZE;
726182902Skmacy
727182902Skmacy	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
728182902Skmacy	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
729182902Skmacy	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
730182902Skmacy	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
731182902Skmacy
732183131Skmacy	ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
733182902Skmacy#else /* __x86_64__ */
734182902Skmacy	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
735182902Skmacy	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
736182902Skmacy	ctxt.kernel_sp = idle->thread.rsp0;
737182902Skmacy
738182902Skmacy	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
739182902Skmacy	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
740182902Skmacy	ctxt.syscall_callback_eip  = (unsigned long)system_call;
741182902Skmacy
742182902Skmacy	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
743182902Skmacy
744182902Skmacy	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
745182902Skmacy#endif
746182902Skmacy
747182902Skmacy	printf("gdtpfn=%lx pdptpfn=%lx\n",
748182902Skmacy	    ctxt.gdt_frames[0],
749182902Skmacy	    ctxt.ctrlreg[3] >> PAGE_SHIFT);
750182902Skmacy
751182902Skmacy	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
752182902Skmacy	DELAY(3000);
753182902Skmacy	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
754182902Skmacy}
755182902Skmacy
756182902Skmacy/*
757182902Skmacy * This function starts the AP (application processor) identified
758182902Skmacy * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
759182902Skmacy * to accomplish this.  This is necessary because of the nuances
760182902Skmacy * of the different hardware we might encounter.  It isn't pretty,
761182902Skmacy * but it seems to work.
762182902Skmacy */
763183131Skmacy
764183131Skmacyint cpus;
765182902Skmacystatic int
766182902Skmacystart_ap(int apic_id)
767182902Skmacy{
768182902Skmacy	int ms;
769182902Skmacy
770182902Skmacy	/* used as a watchpoint to signal AP startup */
771182902Skmacy	cpus = mp_naps;
772182902Skmacy
773182902Skmacy	cpu_initialize_context(apic_id);
774182902Skmacy
775182902Skmacy	/* Wait up to 5 seconds for it to start. */
776182902Skmacy	for (ms = 0; ms < 5000; ms++) {
777182902Skmacy		if (mp_naps > cpus)
778182902Skmacy			return 1;	/* return SUCCESS */
779182902Skmacy		DELAY(1000);
780182902Skmacy	}
781182902Skmacy	return 0;		/* return FAILURE */
782182902Skmacy}
783182902Skmacy
784182902Skmacy/*
785182902Skmacy * Flush the TLB on all other CPU's
786182902Skmacy */
787182902Skmacystatic void
788182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
789182902Skmacy{
790182902Skmacy	u_int ncpu;
791182902Skmacy
792182902Skmacy	ncpu = mp_ncpus - 1;	/* does not shootdown self */
793182902Skmacy	if (ncpu < 1)
794182902Skmacy		return;		/* no other cpus */
795182902Skmacy	if (!(read_eflags() & PSL_I))
796182902Skmacy		panic("%s: interrupts disabled", __func__);
797182902Skmacy	mtx_lock_spin(&smp_ipi_mtx);
798182902Skmacy	smp_tlb_addr1 = addr1;
799182902Skmacy	smp_tlb_addr2 = addr2;
800182902Skmacy	atomic_store_rel_int(&smp_tlb_wait, 0);
801182902Skmacy	ipi_all_but_self(vector);
802182902Skmacy	while (smp_tlb_wait < ncpu)
803182902Skmacy		ia32_pause();
804182902Skmacy	mtx_unlock_spin(&smp_ipi_mtx);
805182902Skmacy}
806182902Skmacy
807182902Skmacystatic void
808182902Skmacysmp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
809182902Skmacy{
810182902Skmacy	int ncpu, othercpus;
811182902Skmacy
812182902Skmacy	othercpus = mp_ncpus - 1;
813182902Skmacy	if (mask == (u_int)-1) {
814182902Skmacy		ncpu = othercpus;
815182902Skmacy		if (ncpu < 1)
816182902Skmacy			return;
817182902Skmacy	} else {
818182902Skmacy		mask &= ~PCPU_GET(cpumask);
819182902Skmacy		if (mask == 0)
820182902Skmacy			return;
821182902Skmacy		ncpu = bitcount32(mask);
822182902Skmacy		if (ncpu > othercpus) {
823182902Skmacy			/* XXX this should be a panic offence */
824182902Skmacy			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
825182902Skmacy			    ncpu, othercpus);
826182902Skmacy			ncpu = othercpus;
827182902Skmacy		}
828182902Skmacy		/* XXX should be a panic, implied by mask == 0 above */
829182902Skmacy		if (ncpu < 1)
830182902Skmacy			return;
831182902Skmacy	}
832182902Skmacy	if (!(read_eflags() & PSL_I))
833182902Skmacy		panic("%s: interrupts disabled", __func__);
834182902Skmacy	mtx_lock_spin(&smp_ipi_mtx);
835182902Skmacy	smp_tlb_addr1 = addr1;
836182902Skmacy	smp_tlb_addr2 = addr2;
837182902Skmacy	atomic_store_rel_int(&smp_tlb_wait, 0);
838182902Skmacy	if (mask == (u_int)-1)
839182902Skmacy		ipi_all_but_self(vector);
840182902Skmacy	else
841182902Skmacy		ipi_selected(mask, vector);
842182902Skmacy	while (smp_tlb_wait < ncpu)
843182902Skmacy		ia32_pause();
844182902Skmacy	mtx_unlock_spin(&smp_ipi_mtx);
845182902Skmacy}
846182902Skmacy
847182902Skmacyvoid
848182902Skmacysmp_cache_flush(void)
849182902Skmacy{
850182902Skmacy
851182902Skmacy	if (smp_started)
852182902Skmacy		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
853182902Skmacy}
854182902Skmacy
855182902Skmacyvoid
856182902Skmacysmp_invltlb(void)
857182902Skmacy{
858182902Skmacy
859182902Skmacy	if (smp_started) {
860182902Skmacy		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
861182902Skmacy	}
862182902Skmacy}
863182902Skmacy
864182902Skmacyvoid
865182902Skmacysmp_invlpg(vm_offset_t addr)
866182902Skmacy{
867182902Skmacy
868182902Skmacy	if (smp_started) {
869182902Skmacy		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
870182902Skmacy	}
871182902Skmacy}
872182902Skmacy
873182902Skmacyvoid
874182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
875182902Skmacy{
876182902Skmacy
877182902Skmacy	if (smp_started) {
878182902Skmacy		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
879182902Skmacy	}
880182902Skmacy}
881182902Skmacy
882182902Skmacyvoid
883182902Skmacysmp_masked_invltlb(u_int mask)
884182902Skmacy{
885182902Skmacy
886182902Skmacy	if (smp_started) {
887182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
888182902Skmacy	}
889182902Skmacy}
890182902Skmacy
891182902Skmacyvoid
892182902Skmacysmp_masked_invlpg(u_int mask, vm_offset_t addr)
893182902Skmacy{
894182902Skmacy
895182902Skmacy	if (smp_started) {
896182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
897182902Skmacy	}
898182902Skmacy}
899182902Skmacy
900182902Skmacyvoid
901182902Skmacysmp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
902182902Skmacy{
903182902Skmacy
904182902Skmacy	if (smp_started) {
905182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
906182902Skmacy	}
907182902Skmacy}
908182902Skmacy
909183345Skmacystatic __inline void
910183345Skmacyipi_pcpu(unsigned int cpu, int vector)
911183345Skmacy{
912183345Skmacy#ifdef notyet
913183345Skmacy        int irq = per_cpu(ipi_to_irq, cpu)[vector];
914183345Skmacy
915183345Skmacy        notify_remote_via_irq(irq);
916183345Skmacy#endif
917183345Skmacy}
918183345Skmacy
919183345Skmacy
920182902Skmacyvoid
921182902Skmacyipi_bitmap_handler(struct trapframe frame)
922182902Skmacy{
923182902Skmacy	int cpu = PCPU_GET(cpuid);
924182902Skmacy	u_int ipi_bitmap;
925182902Skmacy
926182902Skmacy	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
927182902Skmacy
928182902Skmacy	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
929182902Skmacy		sched_preempt(curthread);
930182902Skmacy	}
931182902Skmacy}
932182902Skmacy
933182902Skmacy/*
934182902Skmacy * send an IPI to a set of cpus.
935182902Skmacy */
936182902Skmacyvoid
937182902Skmacyipi_selected(u_int32_t cpus, u_int ipi)
938182902Skmacy{
939182902Skmacy	int cpu;
940182902Skmacy	u_int bitmap = 0;
941182902Skmacy	u_int old_pending;
942182902Skmacy	u_int new_pending;
943182902Skmacy
944182902Skmacy	if (IPI_IS_BITMAPED(ipi)) {
945182902Skmacy		bitmap = 1 << ipi;
946182902Skmacy		ipi = IPI_BITMAP_VECTOR;
947182902Skmacy	}
948182902Skmacy
949182902Skmacy#ifdef STOP_NMI
950182902Skmacy	if (ipi == IPI_STOP && stop_cpus_with_nmi) {
951182902Skmacy		ipi_nmi_selected(cpus);
952182902Skmacy		return;
953182902Skmacy	}
954182902Skmacy#endif
955182902Skmacy	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
956182902Skmacy	while ((cpu = ffs(cpus)) != 0) {
957182902Skmacy		cpu--;
958182902Skmacy		cpus &= ~(1 << cpu);
959182902Skmacy
960182902Skmacy		KASSERT(cpu_apic_ids[cpu] != -1,
961182902Skmacy		    ("IPI to non-existent CPU %d", cpu));
962182902Skmacy
963182902Skmacy		if (bitmap) {
964182902Skmacy			do {
965182902Skmacy				old_pending = cpu_ipi_pending[cpu];
966182902Skmacy				new_pending = old_pending | bitmap;
967182902Skmacy			} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));
968182902Skmacy
969182902Skmacy			if (old_pending)
970182902Skmacy				continue;
971182902Skmacy		}
972182902Skmacy
973183345Skmacy		ipi_pcpu(cpu, ipi);
974182902Skmacy	}
975182902Skmacy}
976182902Skmacy
977182902Skmacy/*
978182902Skmacy * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
979182902Skmacy */
980182902Skmacyvoid
981182902Skmacyipi_all(u_int ipi)
982182902Skmacy{
983182902Skmacy
984182902Skmacy	if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
985182902Skmacy		ipi_selected(all_cpus, ipi);
986182902Skmacy		return;
987182902Skmacy	}
988182902Skmacy	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
989183345Skmacy
990183345Skmacy	/*
991183345Skmacy	 *
992183345Skmacy	 */
993183345Skmacy
994183345Skmacy	ipi_selected(-1, ipi);
995182902Skmacy}
996182902Skmacy
997182902Skmacy/*
998182902Skmacy * send an IPI to all CPUs EXCEPT myself
999182902Skmacy */
1000182902Skmacyvoid
1001182902Skmacyipi_all_but_self(u_int ipi)
1002182902Skmacy{
1003182902Skmacy
1004182902Skmacy	if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
1005182902Skmacy		ipi_selected(PCPU_GET(other_cpus), ipi);
1006182902Skmacy		return;
1007182902Skmacy	}
1008182902Skmacy	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1009183345Skmacy	ipi_selected(((int)-1 & ~(1 << curcpu)), ipi);
1010182902Skmacy}
1011182902Skmacy
1012182902Skmacy/*
1013182902Skmacy * send an IPI to myself
1014182902Skmacy */
1015182902Skmacyvoid
1016182902Skmacyipi_self(u_int ipi)
1017182902Skmacy{
1018182902Skmacy
1019182902Skmacy	if (IPI_IS_BITMAPED(ipi) || (ipi == IPI_STOP && stop_cpus_with_nmi)) {
1020182902Skmacy		ipi_selected(PCPU_GET(cpumask), ipi);
1021182902Skmacy		return;
1022182902Skmacy	}
1023182902Skmacy	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1024183345Skmacy	ipi_pcpu(curcpu, ipi);
1025182902Skmacy}
1026182902Skmacy
1027182902Skmacy#ifdef STOP_NMI
1028182902Skmacy/*
1029182902Skmacy * send NMI IPI to selected CPUs
1030182902Skmacy */
1031182902Skmacy
1032182902Skmacy#define	BEFORE_SPIN	1000000
1033182902Skmacy
1034182902Skmacyvoid
1035182902Skmacyipi_nmi_selected(u_int32_t cpus)
1036182902Skmacy{
1037182902Skmacy	int cpu;
1038182902Skmacy	register_t icrlo;
1039182902Skmacy
1040182902Skmacy	icrlo = APIC_DELMODE_NMI | APIC_DESTMODE_PHY | APIC_LEVEL_ASSERT
1041182902Skmacy		| APIC_TRIGMOD_EDGE;
1042182902Skmacy
1043182902Skmacy	CTR2(KTR_SMP, "%s: cpus: %x nmi", __func__, cpus);
1044182902Skmacy
1045182902Skmacy	atomic_set_int(&ipi_nmi_pending, cpus);
1046182902Skmacy
1047182902Skmacy	while ((cpu = ffs(cpus)) != 0) {
1048182902Skmacy		cpu--;
1049182902Skmacy		cpus &= ~(1 << cpu);
1050182902Skmacy
1051182902Skmacy		KASSERT(cpu_apic_ids[cpu] != -1,
1052182902Skmacy		    ("IPI NMI to non-existent CPU %d", cpu));
1053182902Skmacy
1054182902Skmacy		/* Wait for an earlier IPI to finish. */
1055182902Skmacy		if (!lapic_ipi_wait(BEFORE_SPIN))
1056182902Skmacy			panic("ipi_nmi_selected: previous IPI has not cleared");
1057182902Skmacy
1058182902Skmacy		lapic_ipi_raw(icrlo, cpu_apic_ids[cpu]);
1059182902Skmacy	}
1060182902Skmacy}
1061182902Skmacy
1062182902Skmacyint
1063182902Skmacyipi_nmi_handler(void)
1064182902Skmacy{
1065182902Skmacy	int cpumask = PCPU_GET(cpumask);
1066182902Skmacy
1067182902Skmacy	if (!(ipi_nmi_pending & cpumask))
1068182902Skmacy		return 1;
1069182902Skmacy
1070182902Skmacy	atomic_clear_int(&ipi_nmi_pending, cpumask);
1071182902Skmacy	cpustop_handler();
1072182902Skmacy	return 0;
1073182902Skmacy}
1074182902Skmacy
1075182902Skmacy#endif /* STOP_NMI */
1076182902Skmacy
1077182902Skmacy/*
1078182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we
1079182902Skmacy * are resumed.
1080182902Skmacy */
1081182902Skmacyvoid
1082182902Skmacycpustop_handler(void)
1083182902Skmacy{
1084182902Skmacy	int cpu = PCPU_GET(cpuid);
1085182902Skmacy	int cpumask = PCPU_GET(cpumask);
1086182902Skmacy
1087182902Skmacy	savectx(&stoppcbs[cpu]);
1088182902Skmacy
1089182902Skmacy	/* Indicate that we are stopped */
1090182902Skmacy	atomic_set_int(&stopped_cpus, cpumask);
1091182902Skmacy
1092182902Skmacy	/* Wait for restart */
1093182902Skmacy	while (!(started_cpus & cpumask))
1094182902Skmacy	    ia32_pause();
1095182902Skmacy
1096182902Skmacy	atomic_clear_int(&started_cpus, cpumask);
1097182902Skmacy	atomic_clear_int(&stopped_cpus, cpumask);
1098182902Skmacy
1099182902Skmacy	if (cpu == 0 && cpustop_restartfunc != NULL) {
1100182902Skmacy		cpustop_restartfunc();
1101182902Skmacy		cpustop_restartfunc = NULL;
1102182902Skmacy	}
1103182902Skmacy}
1104182902Skmacy
1105182902Skmacy/*
1106182902Skmacy * This is called once the rest of the system is up and running and we're
1107182902Skmacy * ready to let the AP's out of the pen.
1108182902Skmacy */
1109182902Skmacystatic void
1110182902Skmacyrelease_aps(void *dummy __unused)
1111182902Skmacy{
1112182902Skmacy
1113182902Skmacy	if (mp_ncpus == 1)
1114182902Skmacy		return;
1115182902Skmacy	atomic_store_rel_int(&aps_ready, 1);
1116182902Skmacy	while (smp_started == 0)
1117182902Skmacy		ia32_pause();
1118182902Skmacy}
1119182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
1120182902Skmacy
1121