1255040Sgibbs/*
2255331Sgibbs * Copyright (c) 2008, 2013 Citrix Systems, Inc.
3255040Sgibbs * Copyright (c) 2012 Spectra Logic Corporation
4255040Sgibbs * All rights reserved.
5255040Sgibbs *
6255040Sgibbs * Redistribution and use in source and binary forms, with or without
7255040Sgibbs * modification, are permitted provided that the following conditions
8255040Sgibbs * are met:
9255040Sgibbs * 1. Redistributions of source code must retain the above copyright
10255040Sgibbs *    notice, this list of conditions and the following disclaimer.
11255040Sgibbs * 2. Redistributions in binary form must reproduce the above copyright
12255040Sgibbs *    notice, this list of conditions and the following disclaimer in the
13255040Sgibbs *    documentation and/or other materials provided with the distribution.
14255040Sgibbs *
15255040Sgibbs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
16255040Sgibbs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17255040Sgibbs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18255040Sgibbs * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19255040Sgibbs * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20255040Sgibbs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21255040Sgibbs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22255040Sgibbs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23255040Sgibbs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24255040Sgibbs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25255040Sgibbs * SUCH DAMAGE.
26255040Sgibbs */
27255040Sgibbs
28255040Sgibbs#include <sys/cdefs.h>
29255040Sgibbs__FBSDID("$FreeBSD$");
30255040Sgibbs
31255040Sgibbs#include <sys/param.h>
32255040Sgibbs#include <sys/bus.h>
33255040Sgibbs#include <sys/kernel.h>
34255040Sgibbs#include <sys/malloc.h>
35255040Sgibbs#include <sys/proc.h>
36255331Sgibbs#include <sys/smp.h>
37255331Sgibbs#include <sys/systm.h>
38255040Sgibbs
39255331Sgibbs#include <vm/vm.h>
40255331Sgibbs#include <vm/pmap.h>
41255331Sgibbs
42255040Sgibbs#include <dev/pci/pcivar.h>
43255331Sgibbs
44255040Sgibbs#include <machine/cpufunc.h>
45255331Sgibbs#include <machine/cpu.h>
46255331Sgibbs#include <machine/smp.h>
47255040Sgibbs
48255331Sgibbs#include <x86/apicreg.h>
49255331Sgibbs
50255040Sgibbs#include <xen/xen-os.h>
51255040Sgibbs#include <xen/features.h>
52255040Sgibbs#include <xen/gnttab.h>
53255040Sgibbs#include <xen/hypervisor.h>
54255040Sgibbs#include <xen/hvm.h>
55255040Sgibbs#include <xen/xen_intr.h>
56255040Sgibbs
57255040Sgibbs#include <xen/interface/hvm/params.h>
58255040Sgibbs#include <xen/interface/vcpu.h>
59255040Sgibbs
60255331Sgibbs/*--------------------------- Forward Declarations ---------------------------*/
61255726Sgibbs#ifdef SMP
62255331Sgibbsstatic driver_filter_t xen_smp_rendezvous_action;
63255331Sgibbsstatic driver_filter_t xen_invltlb;
64255331Sgibbsstatic driver_filter_t xen_invlpg;
65255331Sgibbsstatic driver_filter_t xen_invlrng;
66255331Sgibbsstatic driver_filter_t xen_invlcache;
67255331Sgibbs#ifdef __i386__
68255331Sgibbsstatic driver_filter_t xen_lazypmap;
69255331Sgibbs#endif
70255331Sgibbsstatic driver_filter_t xen_ipi_bitmap_handler;
71255331Sgibbsstatic driver_filter_t xen_cpustop_handler;
72255331Sgibbsstatic driver_filter_t xen_cpususpend_handler;
73255331Sgibbsstatic driver_filter_t xen_cpustophard_handler;
74255726Sgibbs#endif
75255744Sgibbsstatic void xen_ipi_vectored(u_int vector, int dest);
76255744Sgibbsstatic void xen_hvm_cpu_resume(void);
77255744Sgibbsstatic void xen_hvm_cpu_init(void);
78255331Sgibbs
79255331Sgibbs/*---------------------------- Extern Declarations ---------------------------*/
80255331Sgibbs/* Variables used by mp_machdep to perform the MMU related IPIs */
81255331Sgibbsextern volatile int smp_tlb_wait;
82255331Sgibbsextern vm_offset_t smp_tlb_addr2;
83255331Sgibbs#ifdef __i386__
84255331Sgibbsextern vm_offset_t smp_tlb_addr1;
85255331Sgibbs#else
86255331Sgibbsextern struct invpcid_descr smp_tlb_invpcid;
87255331Sgibbsextern uint64_t pcid_cr3;
88255331Sgibbsextern int invpcid_works;
89255331Sgibbsextern int pmap_pcid_enabled;
90255331Sgibbsextern pmap_t smp_tlb_pmap;
91255331Sgibbs#endif
92255331Sgibbs
93255331Sgibbs#ifdef __i386__
94255331Sgibbsextern void pmap_lazyfix_action(void);
95255331Sgibbs#endif
96255331Sgibbs
97255744Sgibbs/* Variables used by mp_machdep to perform the bitmap IPI */
98255744Sgibbsextern volatile u_int cpu_ipi_pending[MAXCPU];
99255744Sgibbs
100255331Sgibbs/*---------------------------------- Macros ----------------------------------*/
101255331Sgibbs#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
102255331Sgibbs
103255331Sgibbs/*-------------------------------- Local Types -------------------------------*/
104255726Sgibbsenum xen_hvm_init_type {
105255726Sgibbs	XEN_HVM_INIT_COLD,
106255726Sgibbs	XEN_HVM_INIT_CANCELLED_SUSPEND,
107255726Sgibbs	XEN_HVM_INIT_RESUME
108255726Sgibbs};
109255726Sgibbs
110255331Sgibbsstruct xen_ipi_handler
111255331Sgibbs{
112255331Sgibbs	driver_filter_t	*filter;
113255331Sgibbs	const char	*description;
114255331Sgibbs};
115255331Sgibbs
116255331Sgibbs/*-------------------------------- Global Data -------------------------------*/
117255331Sgibbsenum xen_domain_type xen_domain_type = XEN_NATIVE;
118255331Sgibbs
119255744Sgibbsstruct cpu_ops xen_hvm_cpu_ops = {
120255913Sgibbs	.ipi_vectored	= lapic_ipi_vectored,
121255744Sgibbs	.cpu_init	= xen_hvm_cpu_init,
122255744Sgibbs	.cpu_resume	= xen_hvm_cpu_resume
123255744Sgibbs};
124255744Sgibbs
125255040Sgibbsstatic MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
126255040Sgibbs
127255726Sgibbs#ifdef SMP
128255331Sgibbsstatic struct xen_ipi_handler xen_ipis[] =
129255331Sgibbs{
130255331Sgibbs	[IPI_TO_IDX(IPI_RENDEZVOUS)]	= { xen_smp_rendezvous_action,	"r"   },
131255331Sgibbs	[IPI_TO_IDX(IPI_INVLTLB)]	= { xen_invltlb,		"itlb"},
132255331Sgibbs	[IPI_TO_IDX(IPI_INVLPG)]	= { xen_invlpg,			"ipg" },
133255331Sgibbs	[IPI_TO_IDX(IPI_INVLRNG)]	= { xen_invlrng,		"irg" },
134255331Sgibbs	[IPI_TO_IDX(IPI_INVLCACHE)]	= { xen_invlcache,		"ic"  },
135255331Sgibbs#ifdef __i386__
136255331Sgibbs	[IPI_TO_IDX(IPI_LAZYPMAP)]	= { xen_lazypmap,		"lp"  },
137255331Sgibbs#endif
138255331Sgibbs	[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler,	"b"   },
139255331Sgibbs	[IPI_TO_IDX(IPI_STOP)]		= { xen_cpustop_handler,	"st"  },
140255331Sgibbs	[IPI_TO_IDX(IPI_SUSPEND)]	= { xen_cpususpend_handler,	"sp"  },
141255331Sgibbs	[IPI_TO_IDX(IPI_STOP_HARD)]	= { xen_cpustophard_handler,	"sth" },
142255331Sgibbs};
143255726Sgibbs#endif
144255040Sgibbs
145255040Sgibbs/**
146255040Sgibbs * If non-zero, the hypervisor has been configured to use a direct
147255040Sgibbs * IDT event callback for interrupt injection.
148255040Sgibbs */
149255040Sgibbsint xen_vector_callback_enabled;
150255040Sgibbs
151255331Sgibbs/*------------------------------- Per-CPU Data -------------------------------*/
152255331SgibbsDPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
153255331SgibbsDPCPU_DEFINE(struct vcpu_info *, vcpu_info);
154255726Sgibbs#ifdef SMP
155255331SgibbsDPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
156255726Sgibbs#endif
157255331Sgibbs
158255040Sgibbs/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
159255040Sgibbs/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
160255040Sgibbschar *hypercall_stubs;
161255040Sgibbsshared_info_t *HYPERVISOR_shared_info;
162255040Sgibbs
163255726Sgibbs#ifdef SMP
164255331Sgibbs/*---------------------------- XEN PV IPI Handlers ---------------------------*/
165255331Sgibbs/*
166255331Sgibbs * This are C clones of the ASM functions found in apic_vector.s
167255331Sgibbs */
168255331Sgibbsstatic int
169255331Sgibbsxen_ipi_bitmap_handler(void *arg)
170255331Sgibbs{
171255331Sgibbs	struct trapframe *frame;
172255331Sgibbs
173255331Sgibbs	frame = arg;
174255331Sgibbs	ipi_bitmap_handler(*frame);
175255331Sgibbs	return (FILTER_HANDLED);
176255331Sgibbs}
177255331Sgibbs
178255331Sgibbsstatic int
179255331Sgibbsxen_smp_rendezvous_action(void *arg)
180255331Sgibbs{
181255331Sgibbs#ifdef COUNT_IPIS
182255331Sgibbs	int cpu;
183255331Sgibbs
184255331Sgibbs	cpu = PCPU_GET(cpuid);
185255331Sgibbs	(*ipi_rendezvous_counts[cpu])++;
186255331Sgibbs#endif /* COUNT_IPIS */
187255331Sgibbs
188255331Sgibbs	smp_rendezvous_action();
189255331Sgibbs	return (FILTER_HANDLED);
190255331Sgibbs}
191255331Sgibbs
192255331Sgibbsstatic int
193255331Sgibbsxen_invltlb(void *arg)
194255331Sgibbs{
195255331Sgibbs#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
196255331Sgibbs	int cpu;
197255331Sgibbs
198255331Sgibbs	cpu = PCPU_GET(cpuid);
199255331Sgibbs#ifdef COUNT_XINVLTLB_HITS
200255331Sgibbs	xhits_gbl[cpu]++;
201255331Sgibbs#endif /* COUNT_XINVLTLB_HITS */
202255331Sgibbs#ifdef COUNT_IPIS
203255331Sgibbs	(*ipi_invltlb_counts[cpu])++;
204255331Sgibbs#endif /* COUNT_IPIS */
205255331Sgibbs#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
206255331Sgibbs
207255331Sgibbs	invltlb();
208255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
209255331Sgibbs	return (FILTER_HANDLED);
210255331Sgibbs}
211255331Sgibbs
212255331Sgibbs#ifdef __amd64__
213255331Sgibbsstatic int
214255331Sgibbsxen_invltlb_pcid(void *arg)
215255331Sgibbs{
216255331Sgibbs	uint64_t cr3;
217255331Sgibbs#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
218255331Sgibbs	int cpu;
219255331Sgibbs
220255331Sgibbs	cpu = PCPU_GET(cpuid);
221255331Sgibbs#ifdef COUNT_XINVLTLB_HITS
222255331Sgibbs	xhits_gbl[cpu]++;
223255331Sgibbs#endif /* COUNT_XINVLTLB_HITS */
224255331Sgibbs#ifdef COUNT_IPIS
225255331Sgibbs	(*ipi_invltlb_counts[cpu])++;
226255331Sgibbs#endif /* COUNT_IPIS */
227255331Sgibbs#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
228255331Sgibbs
229255331Sgibbs	cr3 = rcr3();
230255331Sgibbs	if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
231255331Sgibbs	    smp_tlb_invpcid.pcid != 0) {
232255331Sgibbs
233255331Sgibbs		if (invpcid_works) {
234255331Sgibbs			invpcid(&smp_tlb_invpcid, INVPCID_CTX);
235255331Sgibbs		} else {
236255331Sgibbs			/* Otherwise reload %cr3 twice. */
237255331Sgibbs			if (cr3 != pcid_cr3) {
238255331Sgibbs				load_cr3(pcid_cr3);
239255331Sgibbs				cr3 |= CR3_PCID_SAVE;
240255331Sgibbs			}
241255331Sgibbs			load_cr3(cr3);
242255331Sgibbs		}
243255331Sgibbs	} else {
244255331Sgibbs		invltlb_globpcid();
245255331Sgibbs	}
246255331Sgibbs	if (smp_tlb_pmap != NULL)
247255331Sgibbs		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save);
248255331Sgibbs
249255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
250255331Sgibbs	return (FILTER_HANDLED);
251255331Sgibbs}
252255331Sgibbs#endif
253255331Sgibbs
254255331Sgibbsstatic int
255255331Sgibbsxen_invlpg(void *arg)
256255331Sgibbs{
257255331Sgibbs#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
258255331Sgibbs	int cpu;
259255331Sgibbs
260255331Sgibbs	cpu = PCPU_GET(cpuid);
261255331Sgibbs#ifdef COUNT_XINVLTLB_HITS
262255331Sgibbs	xhits_pg[cpu]++;
263255331Sgibbs#endif /* COUNT_XINVLTLB_HITS */
264255331Sgibbs#ifdef COUNT_IPIS
265255331Sgibbs	(*ipi_invlpg_counts[cpu])++;
266255331Sgibbs#endif /* COUNT_IPIS */
267255331Sgibbs#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
268255331Sgibbs
269255331Sgibbs#ifdef __i386__
270255331Sgibbs	invlpg(smp_tlb_addr1);
271255331Sgibbs#else
272255331Sgibbs	invlpg(smp_tlb_invpcid.addr);
273255331Sgibbs#endif
274255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
275255331Sgibbs	return (FILTER_HANDLED);
276255331Sgibbs}
277255331Sgibbs
278255331Sgibbs#ifdef __amd64__
279255331Sgibbsstatic int
280255331Sgibbsxen_invlpg_pcid(void *arg)
281255331Sgibbs{
282255331Sgibbs#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
283255331Sgibbs	int cpu;
284255331Sgibbs
285255331Sgibbs	cpu = PCPU_GET(cpuid);
286255331Sgibbs#ifdef COUNT_XINVLTLB_HITS
287255331Sgibbs	xhits_pg[cpu]++;
288255331Sgibbs#endif /* COUNT_XINVLTLB_HITS */
289255331Sgibbs#ifdef COUNT_IPIS
290255331Sgibbs	(*ipi_invlpg_counts[cpu])++;
291255331Sgibbs#endif /* COUNT_IPIS */
292255331Sgibbs#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
293255331Sgibbs
294255331Sgibbs	if (invpcid_works) {
295255331Sgibbs		invpcid(&smp_tlb_invpcid, INVPCID_ADDR);
296255331Sgibbs	} else if (smp_tlb_invpcid.pcid == 0) {
297255331Sgibbs		invlpg(smp_tlb_invpcid.addr);
298255331Sgibbs	} else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
299255331Sgibbs		invltlb_globpcid();
300255331Sgibbs	} else {
301255331Sgibbs		uint64_t cr3;
302255331Sgibbs
303255331Sgibbs		/*
304255331Sgibbs		 * PCID supported, but INVPCID is not.
305255331Sgibbs		 * Temporarily switch to the target address
306255331Sgibbs		 * space and do INVLPG.
307255331Sgibbs		 */
308255331Sgibbs		cr3 = rcr3();
309255331Sgibbs		if (cr3 != pcid_cr3)
310255331Sgibbs			load_cr3(pcid_cr3 | CR3_PCID_SAVE);
311255331Sgibbs		invlpg(smp_tlb_invpcid.addr);
312255331Sgibbs		load_cr3(cr3 | CR3_PCID_SAVE);
313255331Sgibbs	}
314255331Sgibbs
315255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
316255331Sgibbs	return (FILTER_HANDLED);
317255331Sgibbs}
318255331Sgibbs#endif
319255331Sgibbs
320255331Sgibbsstatic inline void
321255331Sgibbsinvlpg_range(vm_offset_t start, vm_offset_t end)
322255331Sgibbs{
323255331Sgibbs	do {
324255331Sgibbs		invlpg(start);
325255331Sgibbs		start += PAGE_SIZE;
326255331Sgibbs	} while (start < end);
327255331Sgibbs}
328255331Sgibbs
329255331Sgibbsstatic int
330255331Sgibbsxen_invlrng(void *arg)
331255331Sgibbs{
332255331Sgibbs	vm_offset_t addr;
333255331Sgibbs#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
334255331Sgibbs	int cpu;
335255331Sgibbs
336255331Sgibbs	cpu = PCPU_GET(cpuid);
337255331Sgibbs#ifdef COUNT_XINVLTLB_HITS
338255331Sgibbs	xhits_rng[cpu]++;
339255331Sgibbs#endif /* COUNT_XINVLTLB_HITS */
340255331Sgibbs#ifdef COUNT_IPIS
341255331Sgibbs	(*ipi_invlrng_counts[cpu])++;
342255331Sgibbs#endif /* COUNT_IPIS */
343255331Sgibbs#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
344255331Sgibbs
345255331Sgibbs#ifdef __i386__
346255331Sgibbs	addr = smp_tlb_addr1;
347255331Sgibbs	invlpg_range(addr, smp_tlb_addr2);
348255331Sgibbs#else
349255331Sgibbs	addr = smp_tlb_invpcid.addr;
350255331Sgibbs	if (pmap_pcid_enabled) {
351255331Sgibbs		if (invpcid_works) {
352255331Sgibbs			struct invpcid_descr d;
353255331Sgibbs
354255331Sgibbs			d = smp_tlb_invpcid;
355255331Sgibbs			do {
356255331Sgibbs				invpcid(&d, INVPCID_ADDR);
357255331Sgibbs				d.addr += PAGE_SIZE;
358255331Sgibbs			} while (d.addr < smp_tlb_addr2);
359255331Sgibbs		} else if (smp_tlb_invpcid.pcid == 0) {
360255331Sgibbs			/*
361255331Sgibbs			 * kernel pmap - use invlpg to invalidate
362255331Sgibbs			 * global mapping.
363255331Sgibbs			 */
364255331Sgibbs			invlpg_range(addr, smp_tlb_addr2);
365255331Sgibbs		} else if (smp_tlb_invpcid.pcid != (uint64_t)-1) {
366255331Sgibbs			invltlb_globpcid();
367255331Sgibbs			if (smp_tlb_pmap != NULL) {
368255331Sgibbs				CPU_CLR_ATOMIC(PCPU_GET(cpuid),
369255331Sgibbs				    &smp_tlb_pmap->pm_save);
370255331Sgibbs			}
371255331Sgibbs		} else {
372255331Sgibbs			uint64_t cr3;
373255331Sgibbs
374255331Sgibbs			cr3 = rcr3();
375255331Sgibbs			if (cr3 != pcid_cr3)
376255331Sgibbs				load_cr3(pcid_cr3 | CR3_PCID_SAVE);
377255331Sgibbs			invlpg_range(addr, smp_tlb_addr2);
378255331Sgibbs			load_cr3(cr3 | CR3_PCID_SAVE);
379255331Sgibbs		}
380255331Sgibbs	} else {
381255331Sgibbs		invlpg_range(addr, smp_tlb_addr2);
382255331Sgibbs	}
383255331Sgibbs#endif
384255331Sgibbs
385255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
386255331Sgibbs	return (FILTER_HANDLED);
387255331Sgibbs}
388255331Sgibbs
389255331Sgibbsstatic int
390255331Sgibbsxen_invlcache(void *arg)
391255331Sgibbs{
392255331Sgibbs#ifdef COUNT_IPIS
393255331Sgibbs	int cpu = PCPU_GET(cpuid);
394255331Sgibbs
395255331Sgibbs	cpu = PCPU_GET(cpuid);
396255331Sgibbs	(*ipi_invlcache_counts[cpu])++;
397255331Sgibbs#endif /* COUNT_IPIS */
398255331Sgibbs
399255331Sgibbs	wbinvd();
400255331Sgibbs	atomic_add_int(&smp_tlb_wait, 1);
401255331Sgibbs	return (FILTER_HANDLED);
402255331Sgibbs}
403255331Sgibbs
404255331Sgibbs#ifdef __i386__
405255331Sgibbsstatic int
406255331Sgibbsxen_lazypmap(void *arg)
407255331Sgibbs{
408255331Sgibbs
409255331Sgibbs	pmap_lazyfix_action();
410255331Sgibbs	return (FILTER_HANDLED);
411255331Sgibbs}
412255331Sgibbs#endif
413255331Sgibbs
414255331Sgibbsstatic int
415255331Sgibbsxen_cpustop_handler(void *arg)
416255331Sgibbs{
417255331Sgibbs
418255331Sgibbs	cpustop_handler();
419255331Sgibbs	return (FILTER_HANDLED);
420255331Sgibbs}
421255331Sgibbs
422255331Sgibbsstatic int
423255331Sgibbsxen_cpususpend_handler(void *arg)
424255331Sgibbs{
425255331Sgibbs
426255331Sgibbs	cpususpend_handler();
427255331Sgibbs	return (FILTER_HANDLED);
428255331Sgibbs}
429255331Sgibbs
430255331Sgibbsstatic int
431255331Sgibbsxen_cpustophard_handler(void *arg)
432255331Sgibbs{
433255331Sgibbs
434255331Sgibbs	ipi_nmi_handler();
435255331Sgibbs	return (FILTER_HANDLED);
436255331Sgibbs}
437255331Sgibbs
438255331Sgibbs/* Xen PV IPI sender */
439255331Sgibbsstatic void
440255331Sgibbsxen_ipi_vectored(u_int vector, int dest)
441255331Sgibbs{
442255331Sgibbs	xen_intr_handle_t *ipi_handle;
443255331Sgibbs	int ipi_idx, to_cpu, self;
444255331Sgibbs
445255331Sgibbs	ipi_idx = IPI_TO_IDX(vector);
446255331Sgibbs	if (ipi_idx > nitems(xen_ipis))
447255331Sgibbs		panic("IPI out of range");
448255331Sgibbs
449255331Sgibbs	switch(dest) {
450255331Sgibbs	case APIC_IPI_DEST_SELF:
451255331Sgibbs		ipi_handle = DPCPU_GET(ipi_handle);
452255331Sgibbs		xen_intr_signal(ipi_handle[ipi_idx]);
453255331Sgibbs		break;
454255331Sgibbs	case APIC_IPI_DEST_ALL:
455255331Sgibbs		CPU_FOREACH(to_cpu) {
456255331Sgibbs			ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
457255331Sgibbs			xen_intr_signal(ipi_handle[ipi_idx]);
458255331Sgibbs		}
459255331Sgibbs		break;
460255331Sgibbs	case APIC_IPI_DEST_OTHERS:
461255331Sgibbs		self = PCPU_GET(cpuid);
462255331Sgibbs		CPU_FOREACH(to_cpu) {
463255331Sgibbs			if (to_cpu != self) {
464255331Sgibbs				ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
465255331Sgibbs				xen_intr_signal(ipi_handle[ipi_idx]);
466255331Sgibbs			}
467255331Sgibbs		}
468255331Sgibbs		break;
469255331Sgibbs	default:
470255331Sgibbs		to_cpu = apic_cpuid(dest);
471255331Sgibbs		ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
472255331Sgibbs		xen_intr_signal(ipi_handle[ipi_idx]);
473255331Sgibbs		break;
474255331Sgibbs	}
475255331Sgibbs}
476255331Sgibbs
477255744Sgibbs/* XEN diverged cpu operations */
478255331Sgibbsstatic void
479255744Sgibbsxen_hvm_cpu_resume(void)
480255744Sgibbs{
481255744Sgibbs	u_int cpuid = PCPU_GET(cpuid);
482255744Sgibbs
483255744Sgibbs	/*
484255744Sgibbs	 * Reset pending bitmap IPIs, because Xen doesn't preserve pending
485255744Sgibbs	 * event channels on migration.
486255744Sgibbs	 */
487255744Sgibbs	cpu_ipi_pending[cpuid] = 0;
488255744Sgibbs
489255744Sgibbs	/* register vcpu_info area */
490255744Sgibbs	xen_hvm_cpu_init();
491255744Sgibbs}
492255744Sgibbs
493255744Sgibbsstatic void
494255331Sgibbsxen_cpu_ipi_init(int cpu)
495255331Sgibbs{
496255331Sgibbs	xen_intr_handle_t *ipi_handle;
497255331Sgibbs	const struct xen_ipi_handler *ipi;
498255331Sgibbs	device_t dev;
499255331Sgibbs	int idx, rc;
500255331Sgibbs
501255331Sgibbs	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
502255331Sgibbs	dev = pcpu_find(cpu)->pc_device;
503255331Sgibbs	KASSERT((dev != NULL), ("NULL pcpu device_t"));
504255331Sgibbs
505255331Sgibbs	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
506255331Sgibbs
507255331Sgibbs		if (ipi->filter == NULL) {
508255331Sgibbs			ipi_handle[idx] = NULL;
509255331Sgibbs			continue;
510255331Sgibbs		}
511255331Sgibbs
512255331Sgibbs		rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter,
513255331Sgibbs		    INTR_TYPE_TTY, &ipi_handle[idx]);
514255331Sgibbs		if (rc != 0)
515255331Sgibbs			panic("Unable to allocate a XEN IPI port");
516255331Sgibbs		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
517255331Sgibbs	}
518255331Sgibbs}
519255331Sgibbs
520255331Sgibbsstatic void
521255744Sgibbsxen_setup_cpus(void)
522255331Sgibbs{
523255331Sgibbs	int i;
524255331Sgibbs
525255331Sgibbs	if (!xen_hvm_domain() || !xen_vector_callback_enabled)
526255331Sgibbs		return;
527255331Sgibbs
528255331Sgibbs#ifdef __amd64__
529255331Sgibbs	if (pmap_pcid_enabled) {
530255331Sgibbs		xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
531255331Sgibbs		xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
532255331Sgibbs	}
533255331Sgibbs#endif
534255331Sgibbs	CPU_FOREACH(i)
535255331Sgibbs		xen_cpu_ipi_init(i);
536255331Sgibbs
537255331Sgibbs	/* Set the xen pv ipi ops to replace the native ones */
538255913Sgibbs	cpu_ops.ipi_vectored = xen_ipi_vectored;
539255331Sgibbs}
540255726Sgibbs#endif
541255331Sgibbs
542255331Sgibbs/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
543255040Sgibbsstatic uint32_t
544255040Sgibbsxen_hvm_cpuid_base(void)
545255040Sgibbs{
546255040Sgibbs	uint32_t base, regs[4];
547255040Sgibbs
548255040Sgibbs	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
549255040Sgibbs		do_cpuid(base, regs);
550255040Sgibbs		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
551255040Sgibbs		    && (regs[0] - base) >= 2)
552255040Sgibbs			return (base);
553255040Sgibbs	}
554255040Sgibbs	return (0);
555255040Sgibbs}
556255040Sgibbs
557255040Sgibbs/*
558255040Sgibbs * Allocate and fill in the hypcall page.
559255040Sgibbs */
560255040Sgibbsstatic int
561255040Sgibbsxen_hvm_init_hypercall_stubs(void)
562255040Sgibbs{
563255040Sgibbs	uint32_t base, regs[4];
564255040Sgibbs	int i;
565255040Sgibbs
566255040Sgibbs	base = xen_hvm_cpuid_base();
567255139Sgibbs	if (base == 0)
568255040Sgibbs		return (ENXIO);
569255040Sgibbs
570255040Sgibbs	if (hypercall_stubs == NULL) {
571255040Sgibbs		do_cpuid(base + 1, regs);
572255040Sgibbs		printf("XEN: Hypervisor version %d.%d detected.\n",
573255040Sgibbs		    regs[0] >> 16, regs[0] & 0xffff);
574255040Sgibbs	}
575255040Sgibbs
576255040Sgibbs	/*
577255040Sgibbs	 * Find the hypercall pages.
578255040Sgibbs	 */
579255040Sgibbs	do_cpuid(base + 2, regs);
580255040Sgibbs
581255040Sgibbs	if (hypercall_stubs == NULL) {
582255040Sgibbs		size_t call_region_size;
583255040Sgibbs
584255040Sgibbs		call_region_size = regs[0] * PAGE_SIZE;
585255040Sgibbs		hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT);
586255040Sgibbs		if (hypercall_stubs == NULL)
587255040Sgibbs			panic("Unable to allocate Xen hypercall region");
588255040Sgibbs	}
589255040Sgibbs
590255040Sgibbs	for (i = 0; i < regs[0]; i++)
591255040Sgibbs		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
592255040Sgibbs
593255040Sgibbs	return (0);
594255040Sgibbs}
595255040Sgibbs
596255040Sgibbsstatic void
597255040Sgibbsxen_hvm_init_shared_info_page(void)
598255040Sgibbs{
599255040Sgibbs	struct xen_add_to_physmap xatp;
600255040Sgibbs
601255040Sgibbs	if (HYPERVISOR_shared_info == NULL) {
602255040Sgibbs		HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
603255040Sgibbs		if (HYPERVISOR_shared_info == NULL)
604255040Sgibbs			panic("Unable to allocate Xen shared info page");
605255040Sgibbs	}
606255040Sgibbs
607255040Sgibbs	xatp.domid = DOMID_SELF;
608255040Sgibbs	xatp.idx = 0;
609255040Sgibbs	xatp.space = XENMAPSPACE_shared_info;
610255040Sgibbs	xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
611255040Sgibbs	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
612255040Sgibbs		panic("HYPERVISOR_memory_op failed");
613255040Sgibbs}
614255040Sgibbs
615255040Sgibbs/*
616255040Sgibbs * Tell the hypervisor how to contact us for event channel callbacks.
617255040Sgibbs */
618255040Sgibbsvoid
619255040Sgibbsxen_hvm_set_callback(device_t dev)
620255040Sgibbs{
621255040Sgibbs	struct xen_hvm_param xhp;
622255040Sgibbs	int irq;
623255040Sgibbs
624255726Sgibbs	if (xen_vector_callback_enabled)
625255726Sgibbs		return;
626255726Sgibbs
627255040Sgibbs	xhp.domid = DOMID_SELF;
628255040Sgibbs	xhp.index = HVM_PARAM_CALLBACK_IRQ;
629255139Sgibbs	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
630255040Sgibbs		int error;
631255040Sgibbs
632255040Sgibbs		xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
633255040Sgibbs		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
634255040Sgibbs		if (error == 0) {
635255040Sgibbs			xen_vector_callback_enabled = 1;
636255040Sgibbs			return;
637255040Sgibbs		}
638255040Sgibbs		printf("Xen HVM callback vector registration failed (%d). "
639255139Sgibbs		    "Falling back to emulated device interrupt\n", error);
640255040Sgibbs	}
641255040Sgibbs	xen_vector_callback_enabled = 0;
642255040Sgibbs	if (dev == NULL) {
643255040Sgibbs		/*
644255040Sgibbs		 * Called from early boot or resume.
645255040Sgibbs		 * xenpci will invoke us again later.
646255040Sgibbs		 */
647255040Sgibbs		return;
648255040Sgibbs	}
649255040Sgibbs
650255040Sgibbs	irq = pci_get_irq(dev);
651255040Sgibbs	if (irq < 16) {
652255040Sgibbs		xhp.value = HVM_CALLBACK_GSI(irq);
653255040Sgibbs	} else {
654255040Sgibbs		u_int slot;
655255040Sgibbs		u_int pin;
656255040Sgibbs
657255040Sgibbs		slot = pci_get_slot(dev);
658255040Sgibbs		pin = pci_get_intpin(dev) - 1;
659255040Sgibbs		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
660255040Sgibbs	}
661255040Sgibbs
662255139Sgibbs	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
663255040Sgibbs		panic("Can't set evtchn callback");
664255040Sgibbs}
665255040Sgibbs
666255040Sgibbs#define	XEN_MAGIC_IOPORT 0x10
667255040Sgibbsenum {
668255040Sgibbs	XMI_MAGIC			 = 0x49d2,
669255040Sgibbs	XMI_UNPLUG_IDE_DISKS		 = 0x01,
670255040Sgibbs	XMI_UNPLUG_NICS			 = 0x02,
671255040Sgibbs	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
672255040Sgibbs};
673255040Sgibbs
674255040Sgibbsstatic void
675255040Sgibbsxen_hvm_disable_emulated_devices(void)
676255040Sgibbs{
677255040Sgibbs	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
678255040Sgibbs		return;
679255040Sgibbs
680255040Sgibbs	if (bootverbose)
681255040Sgibbs		printf("XEN: Disabling emulated block and network devices\n");
682255040Sgibbs	outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
683255040Sgibbs}
684255040Sgibbs
685255726Sgibbsstatic void
686255726Sgibbsxen_hvm_init(enum xen_hvm_init_type init_type)
687255726Sgibbs{
688255726Sgibbs	int error;
689255726Sgibbs	int i;
690255726Sgibbs
691255726Sgibbs	if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
692255726Sgibbs		return;
693255726Sgibbs
694255726Sgibbs	error = xen_hvm_init_hypercall_stubs();
695255726Sgibbs
696255726Sgibbs	switch (init_type) {
697255726Sgibbs	case XEN_HVM_INIT_COLD:
698255726Sgibbs		if (error != 0)
699255726Sgibbs			return;
700255726Sgibbs
701255726Sgibbs		setup_xen_features();
702255913Sgibbs		cpu_ops = xen_hvm_cpu_ops;
703256071Sgibbs 		vm_guest = VM_GUEST_XEN;
704255726Sgibbs		break;
705255726Sgibbs	case XEN_HVM_INIT_RESUME:
706255726Sgibbs		if (error != 0)
707255726Sgibbs			panic("Unable to init Xen hypercall stubs on resume");
708255744Sgibbs
709255744Sgibbs		/* Clear stale vcpu_info. */
710255744Sgibbs		CPU_FOREACH(i)
711255744Sgibbs			DPCPU_ID_SET(i, vcpu_info, NULL);
712255726Sgibbs		break;
713255726Sgibbs	default:
714255726Sgibbs		panic("Unsupported HVM initialization type");
715255726Sgibbs	}
716255726Sgibbs
717255726Sgibbs	xen_vector_callback_enabled = 0;
718255726Sgibbs	xen_domain_type = XEN_HVM_DOMAIN;
719255726Sgibbs	xen_hvm_init_shared_info_page();
720255726Sgibbs	xen_hvm_set_callback(NULL);
721255726Sgibbs	xen_hvm_disable_emulated_devices();
722255726Sgibbs}
723255726Sgibbs
724255040Sgibbsvoid
725255040Sgibbsxen_hvm_suspend(void)
726255040Sgibbs{
727255040Sgibbs}
728255040Sgibbs
729255040Sgibbsvoid
730255726Sgibbsxen_hvm_resume(bool suspend_cancelled)
731255040Sgibbs{
732255139Sgibbs
733255726Sgibbs	xen_hvm_init(suspend_cancelled ?
734255726Sgibbs	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
735255726Sgibbs
736255726Sgibbs	/* Register vcpu_info area for CPU#0. */
737255744Sgibbs	xen_hvm_cpu_init();
738255040Sgibbs}
739255040Sgibbs
740255040Sgibbsstatic void
741255726Sgibbsxen_hvm_sysinit(void *arg __unused)
742255040Sgibbs{
743255726Sgibbs	xen_hvm_init(XEN_HVM_INIT_COLD);
744255726Sgibbs}
745255139Sgibbs
746255744Sgibbsstatic void
747256073Sgibbsxen_set_vcpu_id(void)
748256073Sgibbs{
749256073Sgibbs	struct pcpu *pc;
750256073Sgibbs	int i;
751256073Sgibbs
752256073Sgibbs	/* Set vcpu_id to acpi_id */
753256073Sgibbs	CPU_FOREACH(i) {
754256073Sgibbs		pc = pcpu_find(i);
755256073Sgibbs		pc->pc_vcpu_id = pc->pc_acpi_id;
756256073Sgibbs		if (bootverbose)
757256073Sgibbs			printf("XEN: CPU %u has VCPU ID %u\n",
758256073Sgibbs			       i, pc->pc_vcpu_id);
759256073Sgibbs	}
760256073Sgibbs}
761256073Sgibbs
762256073Sgibbsstatic void
763255744Sgibbsxen_hvm_cpu_init(void)
764255040Sgibbs{
765255139Sgibbs	struct vcpu_register_vcpu_info info;
766255040Sgibbs	struct vcpu_info *vcpu_info;
767255139Sgibbs	int cpu, rc;
768255040Sgibbs
769255744Sgibbs	if (!xen_domain())
770255744Sgibbs		return;
771255744Sgibbs
772255726Sgibbs	if (DPCPU_GET(vcpu_info) != NULL) {
773255726Sgibbs		/*
774255726Sgibbs		 * vcpu_info is already set.  We're resuming
775255726Sgibbs		 * from a failed migration and our pre-suspend
776255726Sgibbs		 * configuration is still valid.
777255726Sgibbs		 */
778255726Sgibbs		return;
779255726Sgibbs	}
780255726Sgibbs
781255726Sgibbs	vcpu_info = DPCPU_PTR(vcpu_local_info);
782256073Sgibbs	cpu = PCPU_GET(vcpu_id);
783255040Sgibbs	info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
784255040Sgibbs	info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
785255040Sgibbs
786255040Sgibbs	rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
787255139Sgibbs	if (rc != 0)
788255040Sgibbs		DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]);
789255139Sgibbs	else
790255040Sgibbs		DPCPU_SET(vcpu_info, vcpu_info);
791255040Sgibbs}
792255040Sgibbs
793255726SgibbsSYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
794255726Sgibbs#ifdef SMP
795255744SgibbsSYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_FIRST, xen_setup_cpus, NULL);
796255726Sgibbs#endif
797255744SgibbsSYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
798256073SgibbsSYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL);
799