1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008, 2013 Citrix Systems, Inc.
5 * Copyright (c) 2012 Spectra Logic Corporation
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/param.h>
31#include <sys/bus.h>
32#include <sys/kernel.h>
33#include <sys/linker.h>
34#include <sys/malloc.h>
35#include <sys/proc.h>
36#include <sys/smp.h>
37#include <sys/systm.h>
38
39#include <vm/vm.h>
40#include <vm/pmap.h>
41#include <vm/vm_param.h>
42
43#include <dev/pci/pcivar.h>
44
45#include <machine/_inttypes.h>
46#include <machine/cpufunc.h>
47#include <machine/cpu.h>
48#include <machine/md_var.h>
49#include <machine/metadata.h>
50#include <machine/smp.h>
51
52#include <x86/apicreg.h>
53
54#include <xen/xen-os.h>
55#include <xen/error.h>
56#include <xen/features.h>
57#include <xen/gnttab.h>
58#include <xen/hypervisor.h>
59#include <xen/hvm.h>
60#include <xen/xen_intr.h>
61
62#include <contrib/xen/arch-x86/cpuid.h>
63#include <contrib/xen/hvm/params.h>
64#include <contrib/xen/vcpu.h>
65
66/*--------------------------- Forward Declarations ---------------------------*/
67static void xen_hvm_cpu_init(void);
68
69/*-------------------------------- Global Data -------------------------------*/
70#ifdef SMP
71struct cpu_ops xen_hvm_cpu_ops = {
72	.cpu_init	= xen_hvm_cpu_init,
73	.cpu_resume	= xen_hvm_cpu_init
74};
75#endif
76
77static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
78
79/**
80 * If non-zero, the hypervisor has been configured to use a direct
81 * IDT event callback for interrupt injection.
82 */
83int xen_vector_callback_enabled;
84
85/**
86 * Signal whether the vector injected for the event channel upcall requires to
87 * be EOI'ed on the local APIC.
88 */
89bool xen_evtchn_needs_ack;
90
91/*------------------------------- Per-CPU Data -------------------------------*/
92DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
93
94/*------------------------------ Sysctl tunables -----------------------------*/
95int xen_disable_pv_disks = 0;
96int xen_disable_pv_nics = 0;
97TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks);
98TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics);
99
100/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
101
102void xen_emergency_print(const char *str, size_t size)
103{
104	outsb(XEN_HVM_DEBUGCONS_IOPORT, str, size);
105}
106
107static void
108hypervisor_quirks(unsigned int major, unsigned int minor)
109{
110#ifdef SMP
111	if (((major < 4) || (major == 4 && minor <= 5)) &&
112	    msix_disable_migration == -1) {
113		/*
114		 * Xen hypervisors prior to 4.6.0 do not properly
115		 * handle updates to enabled MSI-X table entries,
116		 * so disable MSI-X interrupt migration in that
117		 * case.
118		 */
119		if (bootverbose)
120			printf(
121"Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n"
122"Set machdep.msix_disable_migration=0 to forcefully enable it.\n");
123		msix_disable_migration = 1;
124	}
125#endif
126}
127
128static void
129hypervisor_version(void)
130{
131	uint32_t regs[4];
132	int major, minor;
133
134	do_cpuid(hv_base + 1, regs);
135
136	major = regs[0] >> 16;
137	minor = regs[0] & 0xffff;
138	printf("XEN: Hypervisor version %d.%d detected.\n", major, minor);
139
140	hypervisor_quirks(major, minor);
141}
142
143/*
144 * Translate linear to physical address when still running on the bootloader
145 * created page-tables.
146 */
147static vm_paddr_t
148early_init_vtop(void *addr)
149{
150
151	/*
152	 * Using a KASSERT won't print anything, as this is before console
153	 * initialization.
154	 */
155	if (__predict_false((uintptr_t)addr < KERNBASE)) {
156		xc_printf("invalid linear address: %p\n", addr);
157		halt();
158	}
159
160	return ((uintptr_t)addr - KERNBASE
161#ifdef __amd64__
162	    + kernphys - KERNLOAD
163#endif
164	    );
165}
166
167static int
168map_shared_info(void)
169{
170	/*
171	 * TODO shared info page should be mapped in an unpopulated (IOW:
172	 * non-RAM) address.  But finding one at this point in boot is
173	 * complicated, hence re-use a RAM address for the time being.  This
174	 * sadly causes super-page shattering in the second stage translation
175	 * page tables.
176	 */
177	static union {
178		shared_info_t shared_info;
179		uint8_t raw[PAGE_SIZE];
180	} shared_page __attribute__((aligned(PAGE_SIZE)));
181	static struct xen_add_to_physmap xatp = {
182	    .domid = DOMID_SELF,
183	    .space = XENMAPSPACE_shared_info,
184	};
185	int rc;
186
187	_Static_assert(sizeof(shared_page) == PAGE_SIZE,
188	    "invalid Xen shared_info struct size");
189
190	if (xatp.gpfn == 0)
191		xatp.gpfn = atop(early_init_vtop(&shared_page.shared_info));
192
193	rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
194	if (rc != 0) {
195		xc_printf("cannot map shared info page: %d\n", rc);
196		HYPERVISOR_shared_info = NULL;
197	} else if (HYPERVISOR_shared_info == NULL)
198		HYPERVISOR_shared_info = &shared_page.shared_info;
199
200	return (rc);
201}
202
203static void
204fixup_console(void)
205{
206	struct xen_platform_op op = {
207		.cmd = XENPF_get_dom0_console,
208	};
209	xenpf_dom0_console_t *console = &op.u.dom0_console;
210	union {
211		struct efi_fb efi;
212		struct vbe_fb vbe;
213	} *fb = NULL;
214	int size;
215	caddr_t kmdp;
216
217	kmdp = preload_search_by_type("elf kernel");
218	if (kmdp == NULL)
219		kmdp = preload_search_by_type("elf64 kernel");
220	if (kmdp == NULL) {
221		xc_printf("Unable to find kernel metadata\n");
222		return;
223	}
224
225	size = HYPERVISOR_platform_op(&op);
226	if (size < 0) {
227		xc_printf("Failed to get video console info: %d\n", size);
228		return;
229	}
230
231	switch (console->video_type) {
232	case XEN_VGATYPE_VESA_LFB:
233		fb = (__typeof__ (fb))preload_search_info(kmdp,
234		    MODINFO_METADATA | MODINFOMD_VBE_FB);
235
236		if (fb == NULL) {
237			xc_printf("No VBE FB in kernel metadata\n");
238			return;
239		}
240
241		_Static_assert(offsetof(struct vbe_fb, fb_bpp) ==
242		    offsetof(struct efi_fb, fb_mask_reserved) +
243		    sizeof(fb->efi.fb_mask_reserved),
244		    "Bad structure overlay\n");
245		fb->vbe.fb_bpp = console->u.vesa_lfb.bits_per_pixel;
246		/* FALLTHROUGH */
247	case XEN_VGATYPE_EFI_LFB:
248		if (fb == NULL) {
249			fb = (__typeof__ (fb))preload_search_info(kmdp,
250			    MODINFO_METADATA | MODINFOMD_EFI_FB);
251			if (fb == NULL) {
252				xc_printf("No EFI FB in kernel metadata\n");
253				return;
254			}
255		}
256
257		fb->efi.fb_addr = console->u.vesa_lfb.lfb_base;
258		if (size >
259		    offsetof(xenpf_dom0_console_t, u.vesa_lfb.ext_lfb_base))
260			fb->efi.fb_addr |=
261			    (uint64_t)console->u.vesa_lfb.ext_lfb_base << 32;
262		fb->efi.fb_size = console->u.vesa_lfb.lfb_size << 16;
263		fb->efi.fb_height = console->u.vesa_lfb.height;
264		fb->efi.fb_width = console->u.vesa_lfb.width;
265		fb->efi.fb_stride = (console->u.vesa_lfb.bytes_per_line << 3) /
266		    console->u.vesa_lfb.bits_per_pixel;
267#define FBMASK(c) \
268    ((~0u << console->u.vesa_lfb.c ## _pos) & \
269    (~0u >> (32 - console->u.vesa_lfb.c ## _pos - \
270    console->u.vesa_lfb.c ## _size)))
271		fb->efi.fb_mask_red = FBMASK(red);
272		fb->efi.fb_mask_green = FBMASK(green);
273		fb->efi.fb_mask_blue = FBMASK(blue);
274		fb->efi.fb_mask_reserved = FBMASK(rsvd);
275#undef FBMASK
276		break;
277
278	default:
279		xc_printf("Video console type unsupported\n");
280		return;
281	}
282}
283
284/* Early initialization when running as a Xen guest. */
285void
286xen_early_init(void)
287{
288	uint32_t regs[4];
289	int rc;
290
291	if (hv_high < hv_base + 2) {
292		xc_printf("Invalid maximum leaves for hv_base\n");
293		vm_guest = VM_GUEST_VM;
294		return;
295	}
296
297	/* Find the hypercall pages. */
298	do_cpuid(hv_base + 2, regs);
299	if (regs[0] != 1) {
300		xc_printf("Invalid number of hypercall pages %u\n",
301		    regs[0]);
302		vm_guest = VM_GUEST_VM;
303		return;
304	}
305
306	wrmsr(regs[1], early_init_vtop(&hypercall_page));
307
308	rc = map_shared_info();
309	if (rc != 0) {
310		vm_guest = VM_GUEST_VM;
311		return;
312	}
313
314	if (xen_initial_domain())
315	    /* Fixup video console information in case Xen changed the mode. */
316	    fixup_console();
317}
318
319static int
320set_percpu_callback(unsigned int vcpu)
321{
322	struct xen_hvm_evtchn_upcall_vector vec;
323	int error;
324
325	vec.vcpu = vcpu;
326	vec.vector = IDT_EVTCHN;
327	error = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, &vec);
328
329	return (error != 0 ? xen_translate_error(error) : 0);
330}
331
332/*
333 * Tell the hypervisor how to contact us for event channel callbacks.
334 */
335void
336xen_hvm_set_callback(device_t dev)
337{
338	struct xen_hvm_param xhp;
339	int irq;
340
341	if (xen_vector_callback_enabled)
342		return;
343
344	xhp.domid = DOMID_SELF;
345	xhp.index = HVM_PARAM_CALLBACK_IRQ;
346	if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
347		int error;
348
349		error = set_percpu_callback(0);
350		if (error == 0) {
351			xen_evtchn_needs_ack = true;
352			/* Trick toolstack to think we are enlightened */
353			xhp.value = 1;
354		} else
355			xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
356		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
357		if (error == 0) {
358			xen_vector_callback_enabled = 1;
359			return;
360		} else if (xen_evtchn_needs_ack)
361			panic("Unable to setup fake HVM param: %d", error);
362
363		printf("Xen HVM callback vector registration failed (%d). "
364		    "Falling back to emulated device interrupt\n", error);
365	}
366	xen_vector_callback_enabled = 0;
367	if (dev == NULL) {
368		/*
369		 * Called from early boot or resume.
370		 * xenpci will invoke us again later.
371		 */
372		return;
373	}
374
375	irq = pci_get_irq(dev);
376	if (irq < 16) {
377		xhp.value = HVM_CALLBACK_GSI(irq);
378	} else {
379		u_int slot;
380		u_int pin;
381
382		slot = pci_get_slot(dev);
383		pin = pci_get_intpin(dev) - 1;
384		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
385	}
386
387	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0)
388		panic("Can't set evtchn callback");
389}
390
391#define	XEN_MAGIC_IOPORT 0x10
392enum {
393	XMI_MAGIC			 = 0x49d2,
394	XMI_UNPLUG_IDE_DISKS		 = 0x01,
395	XMI_UNPLUG_NICS			 = 0x02,
396	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
397};
398
399static void
400xen_hvm_disable_emulated_devices(void)
401{
402	u_short disable_devs = 0;
403
404	if (xen_pv_domain()) {
405		/*
406		 * No emulated devices in the PV case, so no need to unplug
407		 * anything.
408		 */
409		if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0)
410			printf("PV devices cannot be disabled in PV guests\n");
411		return;
412	}
413
414	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
415		return;
416
417	if (xen_disable_pv_disks == 0) {
418		if (bootverbose)
419			printf("XEN: disabling emulated disks\n");
420		disable_devs |= XMI_UNPLUG_IDE_DISKS;
421	}
422	if (xen_disable_pv_nics == 0) {
423		if (bootverbose)
424			printf("XEN: disabling emulated nics\n");
425		disable_devs |= XMI_UNPLUG_NICS;
426	}
427
428	if (disable_devs != 0)
429		outw(XEN_MAGIC_IOPORT, disable_devs);
430}
431
432static void
433xen_hvm_init(enum xen_hvm_init_type init_type)
434{
435	unsigned int i;
436
437	if (!xen_domain() ||
438	    init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
439		return;
440
441	hypervisor_version();
442
443	switch (init_type) {
444	case XEN_HVM_INIT_LATE:
445		setup_xen_features();
446#ifdef SMP
447		cpu_ops = xen_hvm_cpu_ops;
448#endif
449		break;
450	case XEN_HVM_INIT_RESUME:
451		/* Clear stale vcpu_info. */
452		CPU_FOREACH(i)
453			DPCPU_ID_SET(i, vcpu_info, NULL);
454
455		if (map_shared_info() != 0)
456			panic("cannot map Xen shared info page");
457
458		break;
459	default:
460		panic("Unsupported HVM initialization type");
461	}
462
463	xen_vector_callback_enabled = 0;
464	xen_evtchn_needs_ack = false;
465	xen_hvm_set_callback(NULL);
466
467	xen_hvm_disable_emulated_devices();
468}
469
470void
471xen_hvm_suspend(void)
472{
473}
474
475void
476xen_hvm_resume(bool suspend_cancelled)
477{
478
479	xen_hvm_init(suspend_cancelled ?
480	    XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
481
482	/* Register vcpu_info area for CPU#0. */
483	xen_hvm_cpu_init();
484}
485
486static void
487xen_hvm_sysinit(void *arg __unused)
488{
489	xen_hvm_init(XEN_HVM_INIT_LATE);
490}
491SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
492
493static void
494xen_hvm_cpu_init(void)
495{
496	uint32_t regs[4];
497	int rc;
498
499	if (!xen_domain())
500		return;
501
502	if (DPCPU_GET(vcpu_info) != NULL) {
503		/*
504		 * vcpu_info is already set.  We're resuming
505		 * from a failed migration and our pre-suspend
506		 * configuration is still valid.
507		 */
508		return;
509	}
510
511	/*
512	 * Set vCPU ID. If available fetch the ID from CPUID, if not just use
513	 * the ACPI ID.
514	 */
515	KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
516	cpuid_count(hv_base + 4, 0, regs);
517	KASSERT((regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ||
518	    !xen_pv_domain(),
519	    ("Xen PV domain without vcpu_id in cpuid"));
520	PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ?
521	    regs[1] : PCPU_GET(acpi_id));
522
523	if (xen_evtchn_needs_ack && !IS_BSP()) {
524		/*
525		 * Setup the per-vpcu event channel upcall vector. This is only
526		 * required when using the new HVMOP_set_evtchn_upcall_vector
527		 * hypercall, which allows using a different vector for each
528		 * vCPU. Note that FreeBSD uses the same vector for all vCPUs
529		 * because it's not dynamically allocated.
530		 */
531		rc = set_percpu_callback(PCPU_GET(vcpu_id));
532		if (rc != 0)
533			panic("Event channel upcall vector setup failed: %d",
534			    rc);
535	}
536
537	xen_setup_vcpu_info();
538}
539SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
540
541bool
542xen_has_iommu_maps(void)
543{
544	uint32_t regs[4];
545
546	KASSERT(hv_base != 0, ("Invalid base Xen CPUID leaf"));
547	cpuid_count(hv_base + 4, 0, regs);
548
549	return (regs[0] & XEN_HVM_CPUID_IOMMU_MAPPINGS);
550}
551