1182902Skmacy/*-
2182902Skmacy * Copyright (c) 1996, by Steve Passe
3182902Skmacy * Copyright (c) 2008, by Kip Macy
4182902Skmacy * All rights reserved.
5182902Skmacy *
6182902Skmacy * Redistribution and use in source and binary forms, with or without
7182902Skmacy * modification, are permitted provided that the following conditions
8182902Skmacy * are met:
9182902Skmacy * 1. Redistributions of source code must retain the above copyright
10182902Skmacy *    notice, this list of conditions and the following disclaimer.
11182902Skmacy * 2. The name of the developer may NOT be used to endorse or promote products
12182902Skmacy *    derived from this software without specific prior written permission.
13182902Skmacy *
14182902Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15182902Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16182902Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17182902Skmacy * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18182902Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19182902Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20182902Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21182902Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22182902Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23182902Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24182902Skmacy * SUCH DAMAGE.
25182902Skmacy */
26182902Skmacy
27182902Skmacy#include <sys/cdefs.h>
28182902Skmacy__FBSDID("$FreeBSD$");
29182902Skmacy
30182902Skmacy#include "opt_apic.h"
31182902Skmacy#include "opt_cpu.h"
32182902Skmacy#include "opt_kstack_pages.h"
33182902Skmacy#include "opt_mp_watchdog.h"
34204972Sjhb#include "opt_pmap.h"
35182902Skmacy#include "opt_sched.h"
36182902Skmacy#include "opt_smp.h"
37182902Skmacy
38182902Skmacy#if !defined(lint)
39182902Skmacy#if !defined(SMP)
40182902Skmacy#error How did you get here?
41182902Skmacy#endif
42182902Skmacy
43182902Skmacy#ifndef DEV_APIC
44182902Skmacy#error The apic device is required for SMP, add "device apic" to your config file.
45182902Skmacy#endif
46182902Skmacy#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
47182902Skmacy#error SMP not supported with CPU_DISABLE_CMPXCHG
48182902Skmacy#endif
49182902Skmacy#endif /* not lint */
50182902Skmacy
51182902Skmacy#include <sys/param.h>
52182902Skmacy#include <sys/systm.h>
53182902Skmacy#include <sys/bus.h>
54182902Skmacy#include <sys/cons.h>	/* cngetc() */
55222813Sattilio#include <sys/cpuset.h>
56182902Skmacy#ifdef GPROF
57182902Skmacy#include <sys/gmon.h>
58182902Skmacy#endif
59182902Skmacy#include <sys/kernel.h>
60182902Skmacy#include <sys/ktr.h>
61182902Skmacy#include <sys/lock.h>
62182902Skmacy#include <sys/malloc.h>
63182902Skmacy#include <sys/memrange.h>
64182902Skmacy#include <sys/mutex.h>
65182902Skmacy#include <sys/pcpu.h>
66182902Skmacy#include <sys/proc.h>
67241498Salc#include <sys/rwlock.h>
68182902Skmacy#include <sys/sched.h>
69182902Skmacy#include <sys/smp.h>
70182902Skmacy#include <sys/sysctl.h>
71182902Skmacy
72182902Skmacy#include <vm/vm.h>
73182902Skmacy#include <vm/vm_param.h>
74182902Skmacy#include <vm/pmap.h>
75182902Skmacy#include <vm/vm_kern.h>
76182902Skmacy#include <vm/vm_extern.h>
77182902Skmacy#include <vm/vm_page.h>
78182902Skmacy
79214631Sjhb#include <x86/apicreg.h>
80182902Skmacy#include <machine/md_var.h>
81182902Skmacy#include <machine/mp_watchdog.h>
82182902Skmacy#include <machine/pcb.h>
83182902Skmacy#include <machine/psl.h>
84182902Skmacy#include <machine/smp.h>
85182902Skmacy#include <machine/specialreg.h>
86182902Skmacy#include <machine/pcpu.h>
87182902Skmacy
88255040Sgibbs#include <xen/xen-os.h>
89186557Skmacy#include <xen/evtchn.h>
90186557Skmacy#include <xen/xen_intr.h>
91186557Skmacy#include <xen/hypervisor.h>
92182902Skmacy#include <xen/interface/vcpu.h>
93182902Skmacy
94255158Sgibbs/*---------------------------- Extern Declarations ---------------------------*/
95255158Sgibbsextern	struct pcpu __pcpu[];
96182902Skmacy
97255158Sgibbsextern void Xhypervisor_callback(void);
98255158Sgibbsextern void failsafe_callback(void);
99255158Sgibbsextern void pmap_lazyfix_action(void);
100255158Sgibbs
101255158Sgibbs/*--------------------------- Forward Declarations ---------------------------*/
102255331Sgibbsstatic driver_filter_t	smp_reschedule_interrupt;
103255331Sgibbsstatic driver_filter_t	smp_call_function_interrupt;
104255331Sgibbsstatic void		assign_cpu_ids(void);
105255331Sgibbsstatic void		set_interrupt_apic_ids(void);
106255331Sgibbsstatic int		start_all_aps(void);
107255331Sgibbsstatic int		start_ap(int apic_id);
108255331Sgibbsstatic void		release_aps(void *dummy);
109255158Sgibbs
110255331Sgibbs/*---------------------------------- Macros ----------------------------------*/
111255331Sgibbs#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
112255331Sgibbs
113255158Sgibbs/*-------------------------------- Local Types -------------------------------*/
114255158Sgibbstypedef void call_data_func_t(uintptr_t , uintptr_t);
115255158Sgibbs
116255158Sgibbsstruct cpu_info {
117255158Sgibbs	int	cpu_present:1;
118255158Sgibbs	int	cpu_bsp:1;
119255158Sgibbs	int	cpu_disabled:1;
120255158Sgibbs};
121255158Sgibbs
122255331Sgibbsstruct xen_ipi_handler
123255331Sgibbs{
124255331Sgibbs	driver_filter_t	*filter;
125255331Sgibbs	const char	*description;
126255331Sgibbs};
127255331Sgibbs
128255331Sgibbsenum {
129255331Sgibbs	RESCHEDULE_VECTOR,
130255331Sgibbs	CALL_FUNCTION_VECTOR,
131255331Sgibbs};
132255331Sgibbs
133255158Sgibbs/*-------------------------------- Global Data -------------------------------*/
134255158Sgibbsstatic u_int	hyperthreading_cpus;
135255158Sgibbsstatic cpuset_t	hyperthreading_cpus_mask;
136255158Sgibbs
137182902Skmacyint	mp_naps;		/* # of Applications processors */
138182902Skmacyint	boot_cpu_id = -1;	/* designated BSP */
139182902Skmacy
140182902Skmacystatic int bootAP;
141182902Skmacystatic union descriptor *bootAPgdt;
142182902Skmacy
143182902Skmacy/* Free these after use */
144182902Skmacyvoid *bootstacks[MAXCPU];
145182902Skmacy
146182902Skmacystruct pcb stoppcbs[MAXCPU];
147182902Skmacy
148182902Skmacy/* Variables needed for SMP tlb shootdown. */
149182902Skmacyvm_offset_t smp_tlb_addr1;
150182902Skmacyvm_offset_t smp_tlb_addr2;
151182902Skmacyvolatile int smp_tlb_wait;
152182902Skmacy
153182902Skmacystatic u_int logical_cpus;
154222813Sattiliostatic volatile cpuset_t ipi_nmi_pending;
155182902Skmacy
156182902Skmacy/* used to hold the AP's until we are ready to release them */
157182902Skmacystatic struct mtx ap_boot_mtx;
158182902Skmacy
159182902Skmacy/* Set to 1 once we're ready to let the APs out of the pen. */
160182902Skmacystatic volatile int aps_ready = 0;
161182902Skmacy
162182902Skmacy/*
163182902Skmacy * Store data from cpu_add() until later in the boot when we actually setup
164182902Skmacy * the APs.
165182902Skmacy */
166255158Sgibbsstatic struct cpu_info cpu_info[MAX_APIC_ID + 1];
167182902Skmacyint cpu_apic_ids[MAXCPU];
168187966Sbzint apic_cpuids[MAX_APIC_ID + 1];
169182902Skmacy
170182902Skmacy/* Holds pending bitmap based IPIs per CPU */
171182902Skmacystatic volatile u_int cpu_ipi_pending[MAXCPU];
172182902Skmacy
173191759Skmacystatic int cpu_logical;
174191759Skmacystatic int cpu_cores;
175191759Skmacy
176255331Sgibbsstatic const struct xen_ipi_handler xen_ipis[] =
177255331Sgibbs{
178255331Sgibbs	[RESCHEDULE_VECTOR]	= { smp_reschedule_interrupt,	"resched"  },
179255331Sgibbs	[CALL_FUNCTION_VECTOR]	= { smp_call_function_interrupt,"callfunc" }
180255331Sgibbs};
181255331Sgibbs
182255158Sgibbs/*------------------------------- Per-CPU Data -------------------------------*/
183255331SgibbsDPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
184255040SgibbsDPCPU_DEFINE(struct vcpu_info *, vcpu_info);
185255040Sgibbs
186255158Sgibbs/*------------------------------ Implementation ------------------------------*/
187182902Skmacystruct cpu_group *
188182902Skmacycpu_topo(void)
189182902Skmacy{
190182902Skmacy	if (cpu_cores == 0)
191182902Skmacy		cpu_cores = 1;
192182902Skmacy	if (cpu_logical == 0)
193182902Skmacy		cpu_logical = 1;
194182902Skmacy	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
195182902Skmacy		printf("WARNING: Non-uniform processors.\n");
196182902Skmacy		printf("WARNING: Using suboptimal topology.\n");
197182902Skmacy		return (smp_topo_none());
198182902Skmacy	}
199182902Skmacy	/*
200182902Skmacy	 * No multi-core or hyper-threaded.
201182902Skmacy	 */
202182902Skmacy	if (cpu_logical * cpu_cores == 1)
203182902Skmacy		return (smp_topo_none());
204182902Skmacy	/*
205182902Skmacy	 * Only HTT no multi-core.
206182902Skmacy	 */
207182902Skmacy	if (cpu_logical > 1 && cpu_cores == 1)
208182902Skmacy		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
209182902Skmacy	/*
210182902Skmacy	 * Only multi-core no HTT.
211182902Skmacy	 */
212182902Skmacy	if (cpu_cores > 1 && cpu_logical == 1)
213182902Skmacy		return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
214182902Skmacy	/*
215182902Skmacy	 * Both HTT and multi-core.
216182902Skmacy	 */
217182902Skmacy	return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
218182902Skmacy	    CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
219182902Skmacy}
220182902Skmacy
221182902Skmacy/*
222182902Skmacy * Calculate usable address in base memory for AP trampoline code.
223182902Skmacy */
224182902Skmacyu_int
225182902Skmacymp_bootaddress(u_int basemem)
226182902Skmacy{
227182902Skmacy
228182902Skmacy	return (basemem);
229182902Skmacy}
230182902Skmacy
231182902Skmacyvoid
232182902Skmacycpu_add(u_int apic_id, char boot_cpu)
233182902Skmacy{
234182902Skmacy
235182902Skmacy	if (apic_id > MAX_APIC_ID) {
236182902Skmacy		panic("SMP: APIC ID %d too high", apic_id);
237182902Skmacy		return;
238182902Skmacy	}
239182902Skmacy	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
240182902Skmacy	    apic_id));
241182902Skmacy	cpu_info[apic_id].cpu_present = 1;
242182902Skmacy	if (boot_cpu) {
243182902Skmacy		KASSERT(boot_cpu_id == -1,
244182902Skmacy		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
245182902Skmacy		    boot_cpu_id));
246182902Skmacy		boot_cpu_id = apic_id;
247182902Skmacy		cpu_info[apic_id].cpu_bsp = 1;
248182902Skmacy	}
249182902Skmacy	if (mp_ncpus < MAXCPU)
250182902Skmacy		mp_ncpus++;
251182902Skmacy	if (bootverbose)
252182902Skmacy		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
253182902Skmacy		    "AP");
254182902Skmacy}
255182902Skmacy
256182902Skmacyvoid
257182902Skmacycpu_mp_setmaxid(void)
258182902Skmacy{
259182902Skmacy
260182902Skmacy	mp_maxid = MAXCPU - 1;
261182902Skmacy}
262182902Skmacy
263182902Skmacyint
264182902Skmacycpu_mp_probe(void)
265182902Skmacy{
266182902Skmacy
267182902Skmacy	/*
268182902Skmacy	 * Always record BSP in CPU map so that the mbuf init code works
269182902Skmacy	 * correctly.
270182902Skmacy	 */
271222813Sattilio	CPU_SETOF(0, &all_cpus);
272182902Skmacy	if (mp_ncpus == 0) {
273182902Skmacy		/*
274182902Skmacy		 * No CPUs were found, so this must be a UP system.  Setup
275182902Skmacy		 * the variables to represent a system with a single CPU
276182902Skmacy		 * with an id of 0.
277182902Skmacy		 */
278182902Skmacy		mp_ncpus = 1;
279182902Skmacy		return (0);
280182902Skmacy	}
281182902Skmacy
282182902Skmacy	/* At least one CPU was found. */
283182902Skmacy	if (mp_ncpus == 1) {
284182902Skmacy		/*
285182902Skmacy		 * One CPU was found, so this must be a UP system with
286182902Skmacy		 * an I/O APIC.
287182902Skmacy		 */
288182902Skmacy		return (0);
289182902Skmacy	}
290182902Skmacy
291182902Skmacy	/* At least two CPUs were found. */
292182902Skmacy	return (1);
293182902Skmacy}
294182902Skmacy
295182902Skmacy/*
296182902Skmacy * Initialize the IPI handlers and start up the AP's.
297182902Skmacy */
298182902Skmacyvoid
299182902Skmacycpu_mp_start(void)
300182902Skmacy{
301182902Skmacy	int i;
302182902Skmacy
303182902Skmacy	/* Initialize the logical ID to APIC ID table. */
304182902Skmacy	for (i = 0; i < MAXCPU; i++) {
305182902Skmacy		cpu_apic_ids[i] = -1;
306182902Skmacy		cpu_ipi_pending[i] = 0;
307182902Skmacy	}
308182902Skmacy
309182902Skmacy	/* Set boot_cpu_id if needed. */
310182902Skmacy	if (boot_cpu_id == -1) {
311182902Skmacy		boot_cpu_id = PCPU_GET(apic_id);
312182902Skmacy		cpu_info[boot_cpu_id].cpu_bsp = 1;
313182902Skmacy	} else
314182902Skmacy		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
315182902Skmacy		    ("BSP's APIC ID doesn't match boot_cpu_id"));
316182902Skmacy	cpu_apic_ids[0] = boot_cpu_id;
317187966Sbz	apic_cpuids[boot_cpu_id] = 0;
318182902Skmacy
319182902Skmacy	assign_cpu_ids();
320182902Skmacy
321182902Skmacy	/* Start each Application Processor */
322182902Skmacy	start_all_aps();
323182902Skmacy
324182902Skmacy	/* Setup the initial logical CPUs info. */
325222813Sattilio	logical_cpus = 0;
326222813Sattilio	CPU_ZERO(&logical_cpus_mask);
327182902Skmacy	if (cpu_feature & CPUID_HTT)
328182902Skmacy		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
329182902Skmacy
330182902Skmacy	set_interrupt_apic_ids();
331182902Skmacy}
332182902Skmacy
333182902Skmacy
334184112Skmacystatic void
335184112Skmacyiv_rendezvous(uintptr_t a, uintptr_t b)
336184112Skmacy{
337184115Skmacy	smp_rendezvous_action();
338184112Skmacy}
339184112Skmacy
340184112Skmacystatic void
341184112Skmacyiv_invltlb(uintptr_t a, uintptr_t b)
342184112Skmacy{
343184115Skmacy	xen_tlb_flush();
344184112Skmacy}
345184112Skmacy
346184112Skmacystatic void
347184112Skmacyiv_invlpg(uintptr_t a, uintptr_t b)
348184112Skmacy{
349184115Skmacy	xen_invlpg(a);
350184112Skmacy}
351184112Skmacy
352184112Skmacystatic void
353184112Skmacyiv_invlrng(uintptr_t a, uintptr_t b)
354184112Skmacy{
355184115Skmacy	vm_offset_t start = (vm_offset_t)a;
356184115Skmacy	vm_offset_t end = (vm_offset_t)b;
357184115Skmacy
358184115Skmacy	while (start < end) {
359184115Skmacy		xen_invlpg(start);
360184115Skmacy		start += PAGE_SIZE;
361184115Skmacy	}
362184112Skmacy}
363184112Skmacy
364184115Skmacy
365184112Skmacystatic void
366184112Skmacyiv_invlcache(uintptr_t a, uintptr_t b)
367184112Skmacy{
368184115Skmacy
369184115Skmacy	wbinvd();
370184198Skmacy	atomic_add_int(&smp_tlb_wait, 1);
371184112Skmacy}
372184112Skmacy
373184112Skmacystatic void
374184112Skmacyiv_lazypmap(uintptr_t a, uintptr_t b)
375184112Skmacy{
376184115Skmacy	pmap_lazyfix_action();
377184224Skmacy	atomic_add_int(&smp_tlb_wait, 1);
378184112Skmacy}
379184112Skmacy
380193154Sadrian/*
381193154Sadrian * These start from "IPI offset" APIC_IPI_INTS
382193154Sadrian */
383255331Sgibbsstatic call_data_func_t *ipi_vectors[6] =
384184112Skmacy{
385255158Sgibbs	iv_rendezvous,
386255158Sgibbs	iv_invltlb,
387255158Sgibbs	iv_invlpg,
388255158Sgibbs	iv_invlrng,
389255158Sgibbs	iv_invlcache,
390255158Sgibbs	iv_lazypmap,
391184224Skmacy};
392184224Skmacy
393184224Skmacy/*
394184224Skmacy * Reschedule call back. Nothing to do,
395184224Skmacy * all the work is done automatically when
396184224Skmacy * we return from the interrupt.
397184224Skmacy */
398184224Skmacystatic int
399184224Skmacysmp_reschedule_interrupt(void *unused)
400184224Skmacy{
401184198Skmacy	int cpu = PCPU_GET(cpuid);
402184198Skmacy	u_int ipi_bitmap;
403184198Skmacy
404184198Skmacy	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
405184198Skmacy
406184198Skmacy	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
407184198Skmacy#ifdef COUNT_IPIS
408184198Skmacy		(*ipi_preempt_counts[cpu])++;
409184198Skmacy#endif
410184198Skmacy		sched_preempt(curthread);
411184198Skmacy	}
412184198Skmacy
413184198Skmacy	if (ipi_bitmap & (1 << IPI_AST)) {
414184198Skmacy#ifdef COUNT_IPIS
415184198Skmacy		(*ipi_ast_counts[cpu])++;
416184198Skmacy#endif
417184198Skmacy		/* Nothing to do for AST */
418184198Skmacy	}
419184198Skmacy	return (FILTER_HANDLED);
420184112Skmacy}
421184112Skmacy
422184112Skmacystruct _call_data {
423184224Skmacy	uint16_t func_id;
424184224Skmacy	uint16_t wait;
425184112Skmacy	uintptr_t arg1;
426184112Skmacy	uintptr_t arg2;
427184112Skmacy	atomic_t started;
428184112Skmacy	atomic_t finished;
429184112Skmacy};
430184112Skmacy
431184112Skmacystatic struct _call_data *call_data;
432184112Skmacy
433184198Skmacystatic int
434184112Skmacysmp_call_function_interrupt(void *unused)
435184112Skmacy{
436184224Skmacy	call_data_func_t *func;
437184112Skmacy	uintptr_t arg1 = call_data->arg1;
438184112Skmacy	uintptr_t arg2 = call_data->arg2;
439184112Skmacy	int wait = call_data->wait;
440184224Skmacy	atomic_t *started = &call_data->started;
441184224Skmacy	atomic_t *finished = &call_data->finished;
442184112Skmacy
443193154Sadrian	/* We only handle function IPIs, not bitmap IPIs */
444255158Sgibbs	if (call_data->func_id < APIC_IPI_INTS ||
445255158Sgibbs	    call_data->func_id > IPI_BITMAP_VECTOR)
446184224Skmacy		panic("invalid function id %u", call_data->func_id);
447184224Skmacy
448255331Sgibbs	func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
449184112Skmacy	/*
450184112Skmacy	 * Notify initiating CPU that I've grabbed the data and am
451184112Skmacy	 * about to execute the function
452184112Skmacy	 */
453184112Skmacy	mb();
454184224Skmacy	atomic_inc(started);
455184112Skmacy	/*
456184112Skmacy	 * At this point the info structure may be out of scope unless wait==1
457184112Skmacy	 */
458184112Skmacy	(*func)(arg1, arg2);
459184112Skmacy
460184112Skmacy	if (wait) {
461184112Skmacy		mb();
462184224Skmacy		atomic_inc(finished);
463184112Skmacy	}
464184224Skmacy	atomic_add_int(&smp_tlb_wait, 1);
465184198Skmacy	return (FILTER_HANDLED);
466184112Skmacy}
467184112Skmacy
468184112Skmacy/*
469182902Skmacy * Print various information about the SMP system hardware and setup.
470182902Skmacy */
471182902Skmacyvoid
472182902Skmacycpu_mp_announce(void)
473182902Skmacy{
474182902Skmacy	int i, x;
475182902Skmacy
476182902Skmacy	/* List CPUs */
477182902Skmacy	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
478182902Skmacy	for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
479182902Skmacy		if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
480182902Skmacy			continue;
481182902Skmacy		if (cpu_info[x].cpu_disabled)
482182902Skmacy			printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
483182902Skmacy		else {
484182902Skmacy			KASSERT(i < mp_ncpus,
485182902Skmacy			    ("mp_ncpus and actual cpus are out of whack"));
486182902Skmacy			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
487182902Skmacy		}
488182902Skmacy	}
489182902Skmacy}
490182902Skmacy
491184112Skmacystatic int
492255040Sgibbsxen_smp_cpu_init(unsigned int cpu)
493184112Skmacy{
494255331Sgibbs	xen_intr_handle_t *ipi_handle;
495255331Sgibbs	const struct xen_ipi_handler *ipi;
496255331Sgibbs	int idx, rc;
497184112Skmacy
498255331Sgibbs	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
499255331Sgibbs	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
500184112Skmacy
501255331Sgibbs		/*
502255331Sgibbs		 * The PCPU variable pc_device is not initialized on i386 PV,
503255331Sgibbs		 * so we have to use the root_bus device in order to setup
504255331Sgibbs		 * the IPIs.
505255331Sgibbs		 */
506255331Sgibbs		rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
507255331Sgibbs		    ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
508255331Sgibbs		if (rc != 0) {
509255331Sgibbs			printf("Unable to allocate a XEN IPI port. "
510255331Sgibbs			    "Error %d\n", rc);
511255331Sgibbs			break;
512255331Sgibbs		}
513255331Sgibbs		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
514255331Sgibbs	}
515184112Skmacy
516255331Sgibbs	for (;idx < nitems(xen_ipis); idx++)
517255331Sgibbs		    ipi_handle[idx] = NULL;
518255040Sgibbs
519255331Sgibbs	if (rc == 0)
520255331Sgibbs		return (0);
521184112Skmacy
522255331Sgibbs	/* Either all are successfully mapped, or none at all. */
523255331Sgibbs	for (idx = 0; idx < nitems(xen_ipis); idx++) {
524255331Sgibbs		if (ipi_handle[idx] == NULL)
525255331Sgibbs			continue;
526184198Skmacy
527255331Sgibbs		xen_intr_unbind(ipi_handle[idx]);
528255331Sgibbs		ipi_handle[idx] = NULL;
529255331Sgibbs	}
530184112Skmacy
531255158Sgibbs	return (rc);
532184112Skmacy}
533184112Skmacy
534184198Skmacystatic void
535184198Skmacyxen_smp_intr_init_cpus(void *unused)
536184198Skmacy{
537184198Skmacy	int i;
538184198Skmacy
539184198Skmacy	for (i = 0; i < mp_ncpus; i++)
540255040Sgibbs		xen_smp_cpu_init(i);
541184198Skmacy}
542184198Skmacy
543255040Sgibbsstatic void
544255040Sgibbsxen_smp_intr_setup_cpus(void *unused)
545255040Sgibbs{
546255040Sgibbs	int i;
547255040Sgibbs
548255040Sgibbs	for (i = 0; i < mp_ncpus; i++)
549255040Sgibbs		DPCPU_ID_SET(i, vcpu_info,
550255040Sgibbs		    &HYPERVISOR_shared_info->vcpu_info[i]);
551255040Sgibbs}
552255040Sgibbs
553182902Skmacy#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
554182902Skmacy
555182902Skmacy/*
556182902Skmacy * AP CPU's call this to initialize themselves.
557182902Skmacy */
558182902Skmacyvoid
559182902Skmacyinit_secondary(void)
560182902Skmacy{
561182902Skmacy	vm_offset_t addr;
562223758Sattilio	u_int	cpuid;
563182902Skmacy	int	gsel_tss;
564182902Skmacy
565182902Skmacy
566182902Skmacy	/* bootAP is set in start_ap() to our ID. */
567182902Skmacy	PCPU_SET(currentldt, _default_ldt);
568182902Skmacy	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
569182902Skmacy#if 0
570182902Skmacy	gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
571182902Skmacy#endif
572182902Skmacy	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
573182902Skmacy	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
574182902Skmacy	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
575182902Skmacy#if 0
576182902Skmacy	PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
577182902Skmacy
578182902Skmacy	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
579182902Skmacy#endif
580182902Skmacy	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
581182902Skmacy
582182902Skmacy	/*
583182902Skmacy	 * Set to a known state:
584182902Skmacy	 * Set by mpboot.s: CR0_PG, CR0_PE
585182902Skmacy	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
586182902Skmacy	 */
587182902Skmacy	/*
588182902Skmacy	 * signal our startup to the BSP.
589182902Skmacy	 */
590182902Skmacy	mp_naps++;
591182902Skmacy
592182902Skmacy	/* Spin until the BSP releases the AP's. */
593182902Skmacy	while (!aps_ready)
594182902Skmacy		ia32_pause();
595182902Skmacy
596182902Skmacy	/* BSP may have changed PTD while we were waiting */
597182902Skmacy	invltlb();
598182902Skmacy	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
599182902Skmacy		invlpg(addr);
600182902Skmacy
601182902Skmacy	/* set up FPU state on the AP */
602189420Sjhb	npxinit();
603182902Skmacy#if 0
604182902Skmacy
605182902Skmacy	/* set up SSE registers */
606182902Skmacy	enable_sse();
607182902Skmacy#endif
608182902Skmacy#if 0 && defined(PAE)
609182902Skmacy	/* Enable the PTE no-execute bit. */
610182902Skmacy	if ((amd_feature & AMDID_NX) != 0) {
611182902Skmacy		uint64_t msr;
612182902Skmacy
613182902Skmacy		msr = rdmsr(MSR_EFER) | EFER_NXE;
614182902Skmacy		wrmsr(MSR_EFER, msr);
615182902Skmacy	}
616182902Skmacy#endif
617182902Skmacy#if 0
618182902Skmacy	/* A quick check from sanity claus */
619182902Skmacy	if (PCPU_GET(apic_id) != lapic_id()) {
620182902Skmacy		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
621182902Skmacy		printf("SMP: actual apic_id = %d\n", lapic_id());
622182902Skmacy		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
623182902Skmacy		panic("cpuid mismatch! boom!!");
624182902Skmacy	}
625182902Skmacy#endif
626182902Skmacy
627182902Skmacy	/* Initialize curthread. */
628182902Skmacy	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
629182902Skmacy	PCPU_SET(curthread, PCPU_GET(idlethread));
630182902Skmacy
631182902Skmacy	mtx_lock_spin(&ap_boot_mtx);
632182902Skmacy#if 0
633182902Skmacy
634182902Skmacy	/* Init local apic for irq's */
635182902Skmacy	lapic_setup(1);
636182902Skmacy#endif
637182902Skmacy	smp_cpus++;
638182902Skmacy
639223758Sattilio	cpuid = PCPU_GET(cpuid);
640223758Sattilio	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
641223758Sattilio	printf("SMP: AP CPU #%d Launched!\n", cpuid);
642182902Skmacy
643182902Skmacy	/* Determine if we are a logical CPU. */
644182902Skmacy	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
645223758Sattilio		CPU_SET(cpuid, &logical_cpus_mask);
646182902Skmacy
647182902Skmacy	/* Determine if we are a hyperthread. */
648182902Skmacy	if (hyperthreading_cpus > 1 &&
649182902Skmacy	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
650223758Sattilio		CPU_SET(cpuid, &hyperthreading_cpus_mask);
651182902Skmacy#if 0
652182902Skmacy	if (bootverbose)
653182902Skmacy		lapic_dump("AP");
654182902Skmacy#endif
655182902Skmacy	if (smp_cpus == mp_ncpus) {
656182902Skmacy		/* enable IPI's, tlb shootdown, freezes etc */
657182902Skmacy		atomic_store_rel_int(&smp_started, 1);
658182902Skmacy	}
659182902Skmacy
660182902Skmacy	mtx_unlock_spin(&ap_boot_mtx);
661182902Skmacy
662182902Skmacy	/* wait until all the AP's are up */
663182902Skmacy	while (smp_started == 0)
664182902Skmacy		ia32_pause();
665182902Skmacy
666183131Skmacy	PCPU_SET(curthread, PCPU_GET(idlethread));
667221835Smav
668221835Smav	/* Start per-CPU event timers. */
669221835Smav	cpu_initclocks_ap();
670221835Smav
671182902Skmacy	/* enter the scheduler */
672182902Skmacy	sched_throw(NULL);
673182902Skmacy
674182902Skmacy	panic("scheduler returned us to %s", __func__);
675182902Skmacy	/* NOTREACHED */
676182902Skmacy}
677182902Skmacy
678182902Skmacy/*******************************************************************
679182902Skmacy * local functions and data
680182902Skmacy */
681182902Skmacy
682182902Skmacy/*
683182902Skmacy * We tell the I/O APIC code about all the CPUs we want to receive
684182902Skmacy * interrupts.  If we don't want certain CPUs to receive IRQs we
685182902Skmacy * can simply not tell the I/O APIC code about them in this function.
686182902Skmacy * We also do not tell it about the BSP since it tells itself about
687182902Skmacy * the BSP internally to work with UP kernels and on UP machines.
688182902Skmacy */
689182902Skmacystatic void
690182902Skmacyset_interrupt_apic_ids(void)
691182902Skmacy{
692182902Skmacy	u_int i, apic_id;
693182902Skmacy
694182902Skmacy	for (i = 0; i < MAXCPU; i++) {
695182902Skmacy		apic_id = cpu_apic_ids[i];
696182902Skmacy		if (apic_id == -1)
697182902Skmacy			continue;
698182902Skmacy		if (cpu_info[apic_id].cpu_bsp)
699182902Skmacy			continue;
700182902Skmacy		if (cpu_info[apic_id].cpu_disabled)
701182902Skmacy			continue;
702182902Skmacy
703182902Skmacy		/* Don't let hyperthreads service interrupts. */
704182902Skmacy		if (hyperthreading_cpus > 1 &&
705182902Skmacy		    apic_id % hyperthreading_cpus != 0)
706182902Skmacy			continue;
707182902Skmacy
708182902Skmacy		intr_add_cpu(i);
709182902Skmacy	}
710182902Skmacy}
711182902Skmacy
712182902Skmacy/*
713182902Skmacy * Assign logical CPU IDs to local APICs.
714182902Skmacy */
715182902Skmacystatic void
716182902Skmacyassign_cpu_ids(void)
717182902Skmacy{
718182902Skmacy	u_int i;
719182902Skmacy
720182902Skmacy	/* Check for explicitly disabled CPUs. */
721182902Skmacy	for (i = 0; i <= MAX_APIC_ID; i++) {
722182902Skmacy		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
723182902Skmacy			continue;
724182902Skmacy
725182902Skmacy		/* Don't use this CPU if it has been disabled by a tunable. */
726182902Skmacy		if (resource_disabled("lapic", i)) {
727182902Skmacy			cpu_info[i].cpu_disabled = 1;
728182902Skmacy			continue;
729182902Skmacy		}
730182902Skmacy	}
731182902Skmacy
732182902Skmacy	/*
733182902Skmacy	 * Assign CPU IDs to local APIC IDs and disable any CPUs
734182902Skmacy	 * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
735182902Skmacy	 * so we only have to assign IDs for APs.
736182902Skmacy	 */
737182902Skmacy	mp_ncpus = 1;
738182902Skmacy	for (i = 0; i <= MAX_APIC_ID; i++) {
739182902Skmacy		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
740182902Skmacy		    cpu_info[i].cpu_disabled)
741182902Skmacy			continue;
742182902Skmacy
743182902Skmacy		if (mp_ncpus < MAXCPU) {
744182902Skmacy			cpu_apic_ids[mp_ncpus] = i;
745187966Sbz			apic_cpuids[i] = mp_ncpus;
746182902Skmacy			mp_ncpus++;
747182902Skmacy		} else
748182902Skmacy			cpu_info[i].cpu_disabled = 1;
749182902Skmacy	}
750182902Skmacy	KASSERT(mp_maxid >= mp_ncpus - 1,
751182902Skmacy	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
752182902Skmacy	    mp_ncpus));
753182902Skmacy}
754182902Skmacy
755182902Skmacy/*
756182902Skmacy * start each AP in our list
757182902Skmacy */
758182902Skmacy/* Lowest 1MB is already mapped: don't touch*/
759182902Skmacy#define TMPMAP_START 1
760182902Skmacyint
761182902Skmacystart_all_aps(void)
762182902Skmacy{
763182902Skmacy	int x,apic_id, cpu;
764182902Skmacy	struct pcpu *pc;
765182902Skmacy
766182902Skmacy	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
767182902Skmacy
768182902Skmacy	/* set up temporary P==V mapping for AP boot */
769182902Skmacy	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
770182902Skmacy
771182902Skmacy	/* start each AP */
772182902Skmacy	for (cpu = 1; cpu < mp_ncpus; cpu++) {
773182902Skmacy		apic_id = cpu_apic_ids[cpu];
774182902Skmacy
775182902Skmacy
776182902Skmacy		bootAP = cpu;
777182902Skmacy		bootAPgdt = gdt + (512*cpu);
778182902Skmacy
779182902Skmacy		/* Get per-cpu data */
780182902Skmacy		pc = &__pcpu[bootAP];
781183132Skmacy		pcpu_init(pc, bootAP, sizeof(struct pcpu));
782254025Sjeff		dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
783254025Sjeff		    M_WAITOK | M_ZERO), bootAP);
784182902Skmacy		pc->pc_apic_id = cpu_apic_ids[bootAP];
785256073Sgibbs		pc->pc_vcpu_id = cpu_apic_ids[bootAP];
786182902Skmacy		pc->pc_prvspace = pc;
787182902Skmacy		pc->pc_curthread = 0;
788182902Skmacy
789182902Skmacy		gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
790182902Skmacy		gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
791182902Skmacy
792215587Scperciva		PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
793182902Skmacy		bzero(bootAPgdt, PAGE_SIZE);
794182902Skmacy		for (x = 0; x < NGDT; x++)
795182902Skmacy			ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
796182902Skmacy		PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
797183345Skmacy#ifdef notyet
798183345Skmacy
799183345Skmacy                if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
800183345Skmacy                        apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
801183345Skmacy                        acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
802183345Skmacy#ifdef CONFIG_ACPI
803183345Skmacy                        if (acpiid != 0xff)
804183345Skmacy                                x86_acpiid_to_apicid[acpiid] = apicid;
805183345Skmacy#endif
806183345Skmacy                }
807183345Skmacy#endif
808183345Skmacy
809182902Skmacy		/* attempt to start the Application Processor */
810182902Skmacy		if (!start_ap(cpu)) {
811182902Skmacy			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
812182902Skmacy			/* better panic as the AP may be running loose */
813182902Skmacy			printf("panic y/n? [y] ");
814182902Skmacy			if (cngetc() != 'n')
815182902Skmacy				panic("bye-bye");
816182902Skmacy		}
817182902Skmacy
818222813Sattilio		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
819182902Skmacy	}
820182902Skmacy
821182902Skmacy
822182902Skmacy	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
823182902Skmacy
824182902Skmacy	/* number of APs actually started */
825255158Sgibbs	return (mp_naps);
826182902Skmacy}
827182902Skmacy
828182902Skmacyextern uint8_t *pcpu_boot_stack;
829182902Skmacyextern trap_info_t trap_table[];
830182902Skmacy
831182902Skmacystatic void
832182902Skmacysmp_trap_init(trap_info_t *trap_ctxt)
833182902Skmacy{
834182902Skmacy        const trap_info_t *t = trap_table;
835182902Skmacy
836182902Skmacy        for (t = trap_table; t->address; t++) {
837182902Skmacy                trap_ctxt[t->vector].flags = t->flags;
838182902Skmacy                trap_ctxt[t->vector].cs = t->cs;
839182902Skmacy                trap_ctxt[t->vector].address = t->address;
840182902Skmacy        }
841182902Skmacy}
842182902Skmacy
843241498Salcextern struct rwlock pvh_global_lock;
844182902Skmacyextern int nkpt;
845184112Skmacystatic void
846182902Skmacycpu_initialize_context(unsigned int cpu)
847182902Skmacy{
848182902Skmacy	/* vcpu_guest_context_t is too large to allocate on the stack.
849182902Skmacy	 * Hence we allocate statically and protect it with a lock */
850228747Salc	vm_page_t m[NPGPTD + 2];
851182902Skmacy	static vcpu_guest_context_t ctxt;
852182902Skmacy	vm_offset_t boot_stack;
853183131Skmacy	vm_offset_t newPTD;
854183131Skmacy	vm_paddr_t ma[NPGPTD];
855182902Skmacy	int i;
856182902Skmacy
857182902Skmacy	/*
858183131Skmacy	 * Page 0,[0-3]	PTD
859183131Skmacy	 * Page 1, [4]	boot stack
860183131Skmacy	 * Page [5]	PDPT
861182902Skmacy	 *
862182902Skmacy	 */
863183131Skmacy	for (i = 0; i < NPGPTD + 2; i++) {
864228522Salc		m[i] = vm_page_alloc(NULL, 0,
865182902Skmacy		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
866182902Skmacy		    VM_ALLOC_ZERO);
867182902Skmacy
868182902Skmacy		pmap_zero_page(m[i]);
869182902Skmacy
870182902Skmacy	}
871254025Sjeff	boot_stack = kva_alloc(PAGE_SIZE);
872254025Sjeff	newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
873215587Scperciva	ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
874182902Skmacy
875183131Skmacy#ifdef PAE
876183131Skmacy	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
877183131Skmacy	for (i = 0; i < NPGPTD; i++) {
878183131Skmacy		((vm_paddr_t *)boot_stack)[i] =
879215587Scperciva		ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
880182902Skmacy	}
881183131Skmacy#endif
882182902Skmacy
883182902Skmacy	/*
884182902Skmacy	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
885182902Skmacy	 * kernel mappings
886182902Skmacy	 */
887183131Skmacy	pmap_qenter(newPTD, m, 4);
888183131Skmacy
889183131Skmacy	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
890183131Skmacy	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
891182902Skmacy	    nkpt*sizeof(vm_paddr_t));
892183131Skmacy
893183131Skmacy	pmap_qremove(newPTD, 4);
894254025Sjeff	kva_free(newPTD, 4 * PAGE_SIZE);
895182902Skmacy	/*
896182902Skmacy	 * map actual idle stack to boot_stack
897182902Skmacy	 */
898183131Skmacy	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
899182902Skmacy
900182902Skmacy
901215587Scperciva	xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
902241498Salc	rw_wlock(&pvh_global_lock);
903182902Skmacy	for (i = 0; i < 4; i++) {
904183131Skmacy		int pdir = (PTDPTDI + i) / NPDEPG;
905183131Skmacy		int curoffset = (PTDPTDI + i) % NPDEPG;
906183131Skmacy
907182902Skmacy		xen_queue_pt_update((vm_paddr_t)
908183131Skmacy		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))),
909182902Skmacy		    ma[i]);
910182902Skmacy	}
911182902Skmacy	PT_UPDATES_FLUSH();
912241498Salc	rw_wunlock(&pvh_global_lock);
913182902Skmacy
914182902Skmacy	memset(&ctxt, 0, sizeof(ctxt));
915182902Skmacy	ctxt.flags = VGCF_IN_KERNEL;
916182902Skmacy	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
917182902Skmacy	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
918182902Skmacy	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
919182902Skmacy	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
920182902Skmacy	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
921182902Skmacy	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
922182902Skmacy	ctxt.user_regs.eip = (unsigned long)init_secondary;
923182902Skmacy	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
924182902Skmacy
925182902Skmacy	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
926182902Skmacy
927182902Skmacy	smp_trap_init(ctxt.trap_ctxt);
928182902Skmacy
929182902Skmacy	ctxt.ldt_ents = 0;
930255158Sgibbs	ctxt.gdt_frames[0] =
931255158Sgibbs	    (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
932182902Skmacy	ctxt.gdt_ents      = 512;
933182902Skmacy
934182902Skmacy#ifdef __i386__
935182902Skmacy	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
936182902Skmacy
937182902Skmacy	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
938182902Skmacy	ctxt.kernel_sp = boot_stack + PAGE_SIZE;
939182902Skmacy
940182902Skmacy	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
941182902Skmacy	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
942182902Skmacy	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
943182902Skmacy	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
944182902Skmacy
945215587Scperciva	ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
946182902Skmacy#else /* __x86_64__ */
947182902Skmacy	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
948182902Skmacy	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
949182902Skmacy	ctxt.kernel_sp = idle->thread.rsp0;
950182902Skmacy
951182902Skmacy	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
952182902Skmacy	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
953182902Skmacy	ctxt.syscall_callback_eip  = (unsigned long)system_call;
954182902Skmacy
955182902Skmacy	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
956182902Skmacy
957182902Skmacy	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
958182902Skmacy#endif
959182902Skmacy
960182902Skmacy	printf("gdtpfn=%lx pdptpfn=%lx\n",
961182902Skmacy	    ctxt.gdt_frames[0],
962182902Skmacy	    ctxt.ctrlreg[3] >> PAGE_SHIFT);
963182902Skmacy
964182902Skmacy	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
965182902Skmacy	DELAY(3000);
966182902Skmacy	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
967182902Skmacy}
968182902Skmacy
969182902Skmacy/*
970182902Skmacy * This function starts the AP (application processor) identified
971182902Skmacy * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
972182902Skmacy * to accomplish this.  This is necessary because of the nuances
973182902Skmacy * of the different hardware we might encounter.  It isn't pretty,
974182902Skmacy * but it seems to work.
975182902Skmacy */
976183131Skmacy
977183131Skmacyint cpus;
978182902Skmacystatic int
979182902Skmacystart_ap(int apic_id)
980182902Skmacy{
981182902Skmacy	int ms;
982182902Skmacy
983182902Skmacy	/* used as a watchpoint to signal AP startup */
984182902Skmacy	cpus = mp_naps;
985182902Skmacy
986182902Skmacy	cpu_initialize_context(apic_id);
987182902Skmacy
988182902Skmacy	/* Wait up to 5 seconds for it to start. */
989182902Skmacy	for (ms = 0; ms < 5000; ms++) {
990182902Skmacy		if (mp_naps > cpus)
991255158Sgibbs			return (1);	/* return SUCCESS */
992182902Skmacy		DELAY(1000);
993182902Skmacy	}
994255158Sgibbs	return (0);		/* return FAILURE */
995182902Skmacy}
996182902Skmacy
997255040Sgibbsstatic void
998255040Sgibbsipi_pcpu(int cpu, u_int ipi)
999255040Sgibbs{
1000255331Sgibbs	KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
1001255331Sgibbs	xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
1002255040Sgibbs}
1003255040Sgibbs
1004182902Skmacy/*
1005222065Sattilio * send an IPI to a specific CPU.
1006222065Sattilio */
1007222065Sattiliostatic void
1008222065Sattilioipi_send_cpu(int cpu, u_int ipi)
1009222065Sattilio{
1010222065Sattilio	u_int bitmap, old_pending, new_pending;
1011222065Sattilio
1012222065Sattilio	if (IPI_IS_BITMAPED(ipi)) {
1013222065Sattilio		bitmap = 1 << ipi;
1014222065Sattilio		ipi = IPI_BITMAP_VECTOR;
1015222065Sattilio		do {
1016222065Sattilio			old_pending = cpu_ipi_pending[cpu];
1017222065Sattilio			new_pending = old_pending | bitmap;
1018222065Sattilio		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
1019222065Sattilio		    old_pending, new_pending));
1020222065Sattilio		if (!old_pending)
1021222065Sattilio			ipi_pcpu(cpu, RESCHEDULE_VECTOR);
1022222065Sattilio	} else {
1023222065Sattilio		KASSERT(call_data != NULL, ("call_data not set"));
1024222065Sattilio		ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
1025222065Sattilio	}
1026222065Sattilio}
1027222065Sattilio
1028222065Sattilio/*
1029182902Skmacy * Flush the TLB on all other CPU's
1030182902Skmacy */
1031182902Skmacystatic void
1032182902Skmacysmp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
1033182902Skmacy{
1034182902Skmacy	u_int ncpu;
1035184198Skmacy	struct _call_data data;
1036182902Skmacy
1037182902Skmacy	ncpu = mp_ncpus - 1;	/* does not shootdown self */
1038182902Skmacy	if (ncpu < 1)
1039182902Skmacy		return;		/* no other cpus */
1040182902Skmacy	if (!(read_eflags() & PSL_I))
1041182902Skmacy		panic("%s: interrupts disabled", __func__);
1042182902Skmacy	mtx_lock_spin(&smp_ipi_mtx);
1043193098Sadrian	KASSERT(call_data == NULL, ("call_data isn't null?!"));
1044193098Sadrian	call_data = &data;
1045184224Skmacy	call_data->func_id = vector;
1046184112Skmacy	call_data->arg1 = addr1;
1047184112Skmacy	call_data->arg2 = addr2;
1048182902Skmacy	atomic_store_rel_int(&smp_tlb_wait, 0);
1049182902Skmacy	ipi_all_but_self(vector);
1050182902Skmacy	while (smp_tlb_wait < ncpu)
1051182902Skmacy		ia32_pause();
1052184224Skmacy	call_data = NULL;
1053182902Skmacy	mtx_unlock_spin(&smp_ipi_mtx);
1054182902Skmacy}
1055182902Skmacy
1056182902Skmacystatic void
1057255158Sgibbssmp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1,
1058255158Sgibbs    vm_offset_t addr2)
1059182902Skmacy{
1060222813Sattilio	int cpu, ncpu, othercpus;
1061184224Skmacy	struct _call_data data;
1062182902Skmacy
1063182902Skmacy	othercpus = mp_ncpus - 1;
1064222813Sattilio	if (CPU_ISFULLSET(&mask)) {
1065222813Sattilio		if (othercpus < 1)
1066182902Skmacy			return;
1067182902Skmacy	} else {
1068223758Sattilio		CPU_CLR(PCPU_GET(cpuid), &mask);
1069222813Sattilio		if (CPU_EMPTY(&mask))
1070182902Skmacy			return;
1071182902Skmacy	}
1072182902Skmacy	if (!(read_eflags() & PSL_I))
1073182902Skmacy		panic("%s: interrupts disabled", __func__);
1074182902Skmacy	mtx_lock_spin(&smp_ipi_mtx);
1075193098Sadrian	KASSERT(call_data == NULL, ("call_data isn't null?!"));
1076184224Skmacy	call_data = &data;
1077184224Skmacy	call_data->func_id = vector;
1078184224Skmacy	call_data->arg1 = addr1;
1079184224Skmacy	call_data->arg2 = addr2;
1080182902Skmacy	atomic_store_rel_int(&smp_tlb_wait, 0);
1081222813Sattilio	if (CPU_ISFULLSET(&mask)) {
1082222813Sattilio		ncpu = othercpus;
1083182902Skmacy		ipi_all_but_self(vector);
1084222813Sattilio	} else {
1085222813Sattilio		ncpu = 0;
1086251703Sjeff		while ((cpu = CPU_FFS(&mask)) != 0) {
1087222813Sattilio			cpu--;
1088222813Sattilio			CPU_CLR(cpu, &mask);
1089222813Sattilio			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
1090222813Sattilio			    vector);
1091222813Sattilio			ipi_send_cpu(cpu, vector);
1092222813Sattilio			ncpu++;
1093222813Sattilio		}
1094222813Sattilio	}
1095182902Skmacy	while (smp_tlb_wait < ncpu)
1096182902Skmacy		ia32_pause();
1097184224Skmacy	call_data = NULL;
1098182902Skmacy	mtx_unlock_spin(&smp_ipi_mtx);
1099182902Skmacy}
1100182902Skmacy
1101182902Skmacyvoid
1102182902Skmacysmp_cache_flush(void)
1103182902Skmacy{
1104182902Skmacy
1105182902Skmacy	if (smp_started)
1106182902Skmacy		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
1107182902Skmacy}
1108182902Skmacy
1109182902Skmacyvoid
1110182902Skmacysmp_invltlb(void)
1111182902Skmacy{
1112182902Skmacy
1113182902Skmacy	if (smp_started) {
1114182902Skmacy		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
1115182902Skmacy	}
1116182902Skmacy}
1117182902Skmacy
1118182902Skmacyvoid
1119182902Skmacysmp_invlpg(vm_offset_t addr)
1120182902Skmacy{
1121182902Skmacy
1122182902Skmacy	if (smp_started) {
1123182902Skmacy		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
1124182902Skmacy	}
1125182902Skmacy}
1126182902Skmacy
1127182902Skmacyvoid
1128182902Skmacysmp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
1129182902Skmacy{
1130182902Skmacy
1131182902Skmacy	if (smp_started) {
1132182902Skmacy		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
1133182902Skmacy	}
1134182902Skmacy}
1135182902Skmacy
1136182902Skmacyvoid
1137222813Sattiliosmp_masked_invltlb(cpuset_t mask)
1138182902Skmacy{
1139182902Skmacy
1140182902Skmacy	if (smp_started) {
1141182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
1142182902Skmacy	}
1143182902Skmacy}
1144182902Skmacy
1145182902Skmacyvoid
1146222813Sattiliosmp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
1147182902Skmacy{
1148182902Skmacy
1149182902Skmacy	if (smp_started) {
1150182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
1151182902Skmacy	}
1152182902Skmacy}
1153182902Skmacy
1154182902Skmacyvoid
1155222813Sattiliosmp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
1156182902Skmacy{
1157182902Skmacy
1158182902Skmacy	if (smp_started) {
1159182902Skmacy		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
1160182902Skmacy	}
1161182902Skmacy}
1162182902Skmacy
1163182902Skmacy/*
1164182902Skmacy * send an IPI to a set of cpus.
1165182902Skmacy */
1166182902Skmacyvoid
1167222813Sattilioipi_selected(cpuset_t cpus, u_int ipi)
1168182902Skmacy{
1169182902Skmacy	int cpu;
1170182902Skmacy
1171196256Sattilio	/*
1172196256Sattilio	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1173196256Sattilio	 * of help in order to understand what is the source.
1174196256Sattilio	 * Set the mask of receiving CPUs for this purpose.
1175196256Sattilio	 */
1176196256Sattilio	if (ipi == IPI_STOP_HARD)
1177222813Sattilio		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
1178196256Sattilio
1179251703Sjeff	while ((cpu = CPU_FFS(&cpus)) != 0) {
1180182902Skmacy		cpu--;
1181222813Sattilio		CPU_CLR(cpu, &cpus);
1182222065Sattilio		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
1183222065Sattilio		ipi_send_cpu(cpu, ipi);
1184182902Skmacy	}
1185182902Skmacy}
1186182902Skmacy
1187182902Skmacy/*
1188210939Sjhb * send an IPI to a specific CPU.
1189210939Sjhb */
1190210939Sjhbvoid
1191210939Sjhbipi_cpu(int cpu, u_int ipi)
1192210939Sjhb{
1193210939Sjhb
1194210939Sjhb	/*
1195210939Sjhb	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1196210939Sjhb	 * of help in order to understand what is the source.
1197210939Sjhb	 * Set the mask of receiving CPUs for this purpose.
1198210939Sjhb	 */
1199210939Sjhb	if (ipi == IPI_STOP_HARD)
1200222813Sattilio		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
1201210939Sjhb
1202210939Sjhb	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
1203222065Sattilio	ipi_send_cpu(cpu, ipi);
1204210939Sjhb}
1205210939Sjhb
1206210939Sjhb/*
1207182902Skmacy * send an IPI to all CPUs EXCEPT myself
1208182902Skmacy */
1209182902Skmacyvoid
1210182902Skmacyipi_all_but_self(u_int ipi)
1211182902Skmacy{
1212222813Sattilio	cpuset_t other_cpus;
1213196256Sattilio
1214196256Sattilio	/*
1215196256Sattilio	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
1216196256Sattilio	 * of help in order to understand what is the source.
1217196256Sattilio	 * Set the mask of receiving CPUs for this purpose.
1218196256Sattilio	 */
1219223758Sattilio	other_cpus = all_cpus;
1220223758Sattilio	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
1221196256Sattilio	if (ipi == IPI_STOP_HARD)
1222222813Sattilio		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
1223196256Sattilio
1224182902Skmacy	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
1225222813Sattilio	ipi_selected(other_cpus, ipi);
1226182902Skmacy}
1227182902Skmacy
1228196256Sattilioint
1229196256Sattilioipi_nmi_handler()
1230196256Sattilio{
1231223758Sattilio	u_int cpuid;
1232196256Sattilio
1233196256Sattilio	/*
1234196256Sattilio	 * As long as there is not a simple way to know about a NMI's
1235196256Sattilio	 * source, if the bitmask for the current CPU is present in
1236196256Sattilio	 * the global pending bitword an IPI_STOP_HARD has been issued
1237196256Sattilio	 * and should be handled.
1238196256Sattilio	 */
1239223758Sattilio	cpuid = PCPU_GET(cpuid);
1240223758Sattilio	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
1241196256Sattilio		return (1);
1242196256Sattilio
1243223758Sattilio	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
1244196256Sattilio	cpustop_handler();
1245196256Sattilio	return (0);
1246196256Sattilio}
1247196256Sattilio
1248182902Skmacy/*
1249182902Skmacy * Handle an IPI_STOP by saving our current context and spinning until we
1250182902Skmacy * are resumed.
1251182902Skmacy */
1252182902Skmacyvoid
1253182902Skmacycpustop_handler(void)
1254182902Skmacy{
1255222813Sattilio	int cpu;
1256182902Skmacy
1257222813Sattilio	cpu = PCPU_GET(cpuid);
1258222813Sattilio
1259182902Skmacy	savectx(&stoppcbs[cpu]);
1260182902Skmacy
1261182902Skmacy	/* Indicate that we are stopped */
1262223758Sattilio	CPU_SET_ATOMIC(cpu, &stopped_cpus);
1263182902Skmacy
1264182902Skmacy	/* Wait for restart */
1265223758Sattilio	while (!CPU_ISSET(cpu, &started_cpus))
1266182902Skmacy	    ia32_pause();
1267182902Skmacy
1268223758Sattilio	CPU_CLR_ATOMIC(cpu, &started_cpus);
1269223758Sattilio	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
1270182902Skmacy
1271182902Skmacy	if (cpu == 0 && cpustop_restartfunc != NULL) {
1272182902Skmacy		cpustop_restartfunc();
1273182902Skmacy		cpustop_restartfunc = NULL;
1274182902Skmacy	}
1275182902Skmacy}
1276182902Skmacy
1277182902Skmacy/*
1278264118Sroyger * Handlers for TLB related IPIs
1279264118Sroyger *
1280264118Sroyger * On i386 Xen PV this are no-ops since this port doesn't support SMP.
1281264118Sroyger */
1282264118Sroygervoid
1283264118Sroygerinvltlb_handler(void)
1284264118Sroyger{
1285264118Sroyger}
1286264118Sroyger
1287264118Sroygervoid
1288264118Sroygerinvlpg_handler(void)
1289264118Sroyger{
1290264118Sroyger}
1291264118Sroyger
1292264118Sroygervoid
1293264118Sroygerinvlrng_handler(void)
1294264118Sroyger{
1295264118Sroyger}
1296264118Sroyger
1297264118Sroygervoid
1298264118Sroygerinvlcache_handler(void)
1299264118Sroyger{
1300264118Sroyger}
1301264118Sroyger
1302264118Sroyger/*
1303182902Skmacy * This is called once the rest of the system is up and running and we're
1304182902Skmacy * ready to let the AP's out of the pen.
1305182902Skmacy */
1306182902Skmacystatic void
1307182902Skmacyrelease_aps(void *dummy __unused)
1308182902Skmacy{
1309182902Skmacy
1310182902Skmacy	if (mp_ncpus == 1)
1311182902Skmacy		return;
1312182902Skmacy	atomic_store_rel_int(&aps_ready, 1);
1313182902Skmacy	while (smp_started == 0)
1314182902Skmacy		ia32_pause();
1315182902Skmacy}
1316182902SkmacySYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
1317255040SgibbsSYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
1318255040SgibbsSYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);
1319