vmm_dev.c revision 270071
168349Sobrien/*-
2133359Sobrien * Copyright (c) 2011 NetApp, Inc.
3133359Sobrien * All rights reserved.
4133359Sobrien *
5133359Sobrien * Redistribution and use in source and binary forms, with or without
6133359Sobrien * modification, are permitted provided that the following conditions
7133359Sobrien * are met:
8133359Sobrien * 1. Redistributions of source code must retain the above copyright
9133359Sobrien *    notice, this list of conditions and the following disclaimer.
10133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
11133359Sobrien *    notice, this list of conditions and the following disclaimer in the
12133359Sobrien *    documentation and/or other materials provided with the distribution.
13133359Sobrien *
14133359Sobrien * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17133359Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18133359Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24133359Sobrien * SUCH DAMAGE.
25133359Sobrien *
26133359Sobrien * $FreeBSD: stable/10/sys/amd64/vmm/vmm_dev.c 270071 2014-08-17 01:00:42Z grehan $
27133359Sobrien */
28133359Sobrien
2968349Sobrien#include <sys/cdefs.h>
3068349Sobrien__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/vmm_dev.c 270071 2014-08-17 01:00:42Z grehan $");
3168349Sobrien
3268349Sobrien#include <sys/param.h>
33191736Sobrien#include <sys/kernel.h>
34191736Sobrien#include <sys/queue.h>
35328875Seadler#include <sys/lock.h>
36191736Sobrien#include <sys/mutex.h>
37191736Sobrien#include <sys/malloc.h>
3868349Sobrien#include <sys/conf.h>
39110949Sobrien#include <sys/sysctl.h>
4068349Sobrien#include <sys/libkern.h>
4168349Sobrien#include <sys/ioccom.h>
4268349Sobrien#include <sys/mman.h>
4368349Sobrien#include <sys/uio.h>
4468349Sobrien
4568349Sobrien#include <vm/vm.h>
4668349Sobrien#include <vm/pmap.h>
4768349Sobrien#include <vm/vm_map.h>
48267843Sdelphij
49267843Sdelphij#include <machine/vmparam.h>
50103373Sobrien#include <machine/vmm.h>
51133359Sobrien#include <machine/vmm_instruction_emul.h>
52133359Sobrien#include <machine/vmm_dev.h>
5368349Sobrien
54267843Sdelphij#include "vmm_lapic.h"
55267843Sdelphij#include "vmm_stat.h"
56159764Sobrien#include "vmm_mem.h"
57234250Sobrien#include "io/ppt.h"
58234250Sobrien#include "io/vatpic.h"
5968349Sobrien#include "io/vioapic.h"
6080588Sobrien#include "io/vhpet.h"
6180588Sobrien
62267843Sdelphijstruct vmmdev_softc {
63267843Sdelphij	struct vm	*vm;		/* vm instance cookie */
64267843Sdelphij	struct cdev	*cdev;
65133359Sobrien	SLIST_ENTRY(vmmdev_softc) link;
6680588Sobrien	int		flags;
67169962Sobrien};
68267843Sdelphij#define	VSC_LINKED		0x01
69267843Sdelphij
70267843Sdelphijstatic SLIST_HEAD(, vmmdev_softc) head;
7180588Sobrien
7268349Sobrienstatic struct mtx vmmdev_mtx;
73267843Sdelphij
74267843Sdelphijstatic MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
75133359Sobrien
7680588SobrienSYSCTL_DECL(_hw_vmm);
77169962Sobrien
78169962Sobrienstatic struct vmmdev_softc *
79169962Sobrienvmmdev_lookup(const char *name)
80169962Sobrien{
81226048Sobrien	struct vmmdev_softc *sc;
82226048Sobrien
83226048Sobrien#ifdef notyet	/* XXX kernel is not compiled with invariants */
84226048Sobrien	mtx_assert(&vmmdev_mtx, MA_OWNED);
85169962Sobrien#endif
86169962Sobrien
87169962Sobrien	SLIST_FOREACH(sc, &head, link) {
88169962Sobrien		if (strcmp(name, vm_name(sc->vm)) == 0)
89169962Sobrien			break;
90169962Sobrien	}
91169962Sobrien
92234250Sobrien	return (sc);
93234250Sobrien}
94234250Sobrien
95234250Sobrienstatic struct vmmdev_softc *
96234250Sobrienvmmdev_lookup2(struct cdev *cdev)
97234250Sobrien{
98234250Sobrien
99234250Sobrien	return (cdev->si_drv1);
100234250Sobrien}
101234250Sobrien
102234250Sobrienstatic int
103234250Sobrienvmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
104234250Sobrien{
105234250Sobrien	int error, off, c, prot;
106234250Sobrien	vm_paddr_t gpa;
107234250Sobrien	void *hpa, *cookie;
108234250Sobrien	struct vmmdev_softc *sc;
109234250Sobrien
11068349Sobrien	static char zerobuf[PAGE_SIZE];
111186690Sobrien
112186690Sobrien	error = 0;
11368349Sobrien	sc = vmmdev_lookup2(cdev);
114169962Sobrien	if (sc == NULL)
115191736Sobrien		error = ENXIO;
116169962Sobrien
117169962Sobrien	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
118169962Sobrien	while (uio->uio_resid > 0 && error == 0) {
119169962Sobrien		gpa = uio->uio_offset;
120169962Sobrien		off = gpa & PAGE_MASK;
121169962Sobrien		c = min(uio->uio_resid, PAGE_SIZE - off);
122169962Sobrien
123169962Sobrien		/*
124169962Sobrien		 * The VM has a hole in its physical memory map. If we want to
12568349Sobrien		 * use 'dd' to inspect memory beyond the hole we need to
12668349Sobrien		 * provide bogus data for memory that lies in the hole.
12768349Sobrien		 *
12868349Sobrien		 * Since this device does not support lseek(2), dd(1) will
129133359Sobrien		 * read(2) blocks of data to simulate the lseek(2).
130133359Sobrien		 */
131133359Sobrien		hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
132133359Sobrien		if (hpa == NULL) {
133133359Sobrien			if (uio->uio_rw == UIO_READ)
134159764Sobrien				error = uiomove(zerobuf, c, uio);
135133359Sobrien			else
136133359Sobrien				error = EFAULT;
137267843Sdelphij		} else {
13868349Sobrien			error = uiomove(hpa, c, uio);
13968349Sobrien			vm_gpa_release(cookie);
140169942Sobrien		}
141169942Sobrien	}
142169942Sobrien	return (error);
143226048Sobrien}
144169942Sobrien
145169942Sobrienstatic int
146159764Sobrienvmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
147133359Sobrien	     struct thread *td)
148133359Sobrien{
149159764Sobrien	int error, vcpu, state_changed, size;
150159764Sobrien	cpuset_t *cpuset;
151159764Sobrien	struct vmmdev_softc *sc;
152169942Sobrien	struct vm_memory_segment *seg;
15368349Sobrien	struct vm_register *vmreg;
154133359Sobrien	struct vm_seg_desc *vmsegdesc;
155133359Sobrien	struct vm_run *vmrun;
156133359Sobrien	struct vm_exception *vmexc;
157159764Sobrien	struct vm_lapic_irq *vmirq;
158133359Sobrien	struct vm_lapic_msi *vmmsi;
159290152Sdelphij	struct vm_ioapic_irq *ioapic_irq;
16068349Sobrien	struct vm_isa_irq *isa_irq;
161133359Sobrien	struct vm_isa_irq_trigger *isa_irq_trigger;
162133359Sobrien	struct vm_capability *vmcap;
163133359Sobrien	struct vm_pptdev *pptdev;
164159764Sobrien	struct vm_pptdev_mmio *pptmmio;
165133359Sobrien	struct vm_pptdev_msi *pptmsi;
166290152Sdelphij	struct vm_pptdev_msix *pptmsix;
167284778Sdelphij	struct vm_nmi *vmnmi;
168169942Sobrien	struct vm_stats *vmstats;
169169942Sobrien	struct vm_stat_desc *statdesc;
170169942Sobrien	struct vm_x2apic *x2apic;
171169942Sobrien	struct vm_gpa_pte *gpapte;
172290152Sdelphij	struct vm_suspend *vmsuspend;
173169942Sobrien	struct vm_gla2gpa *gg;
174169942Sobrien	struct vm_activate_cpu *vac;
175169942Sobrien	struct vm_cpuset *vm_cpuset;
176169942Sobrien
177169942Sobrien	sc = vmmdev_lookup2(cdev);
178290152Sdelphij	if (sc == NULL)
179169942Sobrien		return (ENXIO);
180267843Sdelphij
181267843Sdelphij	error = 0;
182267843Sdelphij	vcpu = -1;
183267843Sdelphij	state_changed = 0;
184267843Sdelphij
185267843Sdelphij	/*
186175296Sobrien	 * Some VMM ioctls can operate only on vcpus that are not running.
187175296Sobrien	 */
188175296Sobrien	switch (cmd) {
189175296Sobrien	case VM_RUN:
190175296Sobrien	case VM_GET_REGISTER:
191175296Sobrien	case VM_SET_REGISTER:
192175296Sobrien	case VM_GET_SEGMENT_DESCRIPTOR:
193175296Sobrien	case VM_SET_SEGMENT_DESCRIPTOR:
194175296Sobrien	case VM_INJECT_EXCEPTION:
195175296Sobrien	case VM_GET_CAPABILITY:
196169962Sobrien	case VM_SET_CAPABILITY:
197169962Sobrien	case VM_PPTDEV_MSI:
198169962Sobrien	case VM_PPTDEV_MSIX:
199267843Sdelphij	case VM_SET_X2APIC_STATE:
200267843Sdelphij	case VM_GLA2GPA:
201300899Sdelphij	case VM_ACTIVATE_CPU:
202267843Sdelphij		/*
203267843Sdelphij		 * XXX fragile, handle with care
20468349Sobrien		 * Assumes that the first field of the ioctl data is the vcpu.
205267843Sdelphij		 */
20668349Sobrien		vcpu = *(int *)data;
20768349Sobrien		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
20868349Sobrien			error = EINVAL;
20968349Sobrien			goto done;
21068349Sobrien		}
211103373Sobrien
21268349Sobrien		error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
21368349Sobrien		if (error)
214133359Sobrien			goto done;
215139368Sobrien
21668349Sobrien		state_changed = 1;
21768349Sobrien		break;
21868349Sobrien
21968349Sobrien	case VM_MAP_PPTDEV_MMIO:
22068349Sobrien	case VM_BIND_PPTDEV:
22168349Sobrien	case VM_UNBIND_PPTDEV:
222186690Sobrien	case VM_MAP_MEMORY:
223186690Sobrien	case VM_REINIT:
224186690Sobrien		/*
225186690Sobrien		 * ioctls that operate on the entire virtual machine must
226186690Sobrien		 * prevent all vcpus from running.
22768349Sobrien		 */
22868349Sobrien		error = 0;
229169962Sobrien		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
23068349Sobrien			error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
23180588Sobrien			if (error)
232159764Sobrien				break;
233267843Sdelphij		}
23480588Sobrien
235175296Sobrien		if (error) {
236284778Sdelphij			while (--vcpu >= 0)
237284778Sdelphij				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
23884685Sobrien			goto done;
239267843Sdelphij		}
240267843Sdelphij
241328875Seadler		state_changed = 2;
242267843Sdelphij		break;
243284778Sdelphij
244284778Sdelphij	default:
245284778Sdelphij		break;
246284778Sdelphij	}
247267843Sdelphij
248267843Sdelphij	switch(cmd) {
249267843Sdelphij	case VM_RUN:
250284778Sdelphij		vmrun = (struct vm_run *)data;
25180588Sobrien		error = vm_run(sc->vm, vmrun);
252284778Sdelphij		break;
25380588Sobrien	case VM_SUSPEND:
254284778Sdelphij		vmsuspend = (struct vm_suspend *)data;
255284778Sdelphij		error = vm_suspend(sc->vm, vmsuspend->how);
256284778Sdelphij		break;
25780588Sobrien	case VM_REINIT:
258226048Sobrien		error = vm_reinit(sc->vm);
259226048Sobrien		break;
260175296Sobrien	case VM_STAT_DESC: {
26180588Sobrien		statdesc = (struct vm_stat_desc *)data;
262226048Sobrien		error = vmm_stat_desc_copy(statdesc->index,
263267843Sdelphij					statdesc->desc, sizeof(statdesc->desc));
26480588Sobrien		break;
265	}
266	case VM_STATS: {
267		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
268		vmstats = (struct vm_stats *)data;
269		getmicrotime(&vmstats->tv);
270		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
271				      &vmstats->num_entries, vmstats->statbuf);
272		break;
273	}
274	case VM_PPTDEV_MSI:
275		pptmsi = (struct vm_pptdev_msi *)data;
276		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
277				      pptmsi->bus, pptmsi->slot, pptmsi->func,
278				      pptmsi->addr, pptmsi->msg,
279				      pptmsi->numvec);
280		break;
281	case VM_PPTDEV_MSIX:
282		pptmsix = (struct vm_pptdev_msix *)data;
283		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
284				       pptmsix->bus, pptmsix->slot,
285				       pptmsix->func, pptmsix->idx,
286				       pptmsix->addr, pptmsix->msg,
287				       pptmsix->vector_control);
288		break;
289	case VM_MAP_PPTDEV_MMIO:
290		pptmmio = (struct vm_pptdev_mmio *)data;
291		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
292				     pptmmio->func, pptmmio->gpa, pptmmio->len,
293				     pptmmio->hpa);
294		break;
295	case VM_BIND_PPTDEV:
296		pptdev = (struct vm_pptdev *)data;
297		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
298					 pptdev->func);
299		break;
300	case VM_UNBIND_PPTDEV:
301		pptdev = (struct vm_pptdev *)data;
302		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
303					   pptdev->func);
304		break;
305	case VM_INJECT_EXCEPTION:
306		vmexc = (struct vm_exception *)data;
307		error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
308		break;
309	case VM_INJECT_NMI:
310		vmnmi = (struct vm_nmi *)data;
311		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
312		break;
313	case VM_LAPIC_IRQ:
314		vmirq = (struct vm_lapic_irq *)data;
315		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
316		break;
317	case VM_LAPIC_LOCAL_IRQ:
318		vmirq = (struct vm_lapic_irq *)data;
319		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
320		    vmirq->vector);
321		break;
322	case VM_LAPIC_MSI:
323		vmmsi = (struct vm_lapic_msi *)data;
324		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
325		break;
326	case VM_IOAPIC_ASSERT_IRQ:
327		ioapic_irq = (struct vm_ioapic_irq *)data;
328		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
329		break;
330	case VM_IOAPIC_DEASSERT_IRQ:
331		ioapic_irq = (struct vm_ioapic_irq *)data;
332		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
333		break;
334	case VM_IOAPIC_PULSE_IRQ:
335		ioapic_irq = (struct vm_ioapic_irq *)data;
336		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
337		break;
338	case VM_IOAPIC_PINCOUNT:
339		*(int *)data = vioapic_pincount(sc->vm);
340		break;
341	case VM_ISA_ASSERT_IRQ:
342		isa_irq = (struct vm_isa_irq *)data;
343		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
344		if (error == 0 && isa_irq->ioapic_irq != -1)
345			error = vioapic_assert_irq(sc->vm,
346			    isa_irq->ioapic_irq);
347		break;
348	case VM_ISA_DEASSERT_IRQ:
349		isa_irq = (struct vm_isa_irq *)data;
350		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
351		if (error == 0 && isa_irq->ioapic_irq != -1)
352			error = vioapic_deassert_irq(sc->vm,
353			    isa_irq->ioapic_irq);
354		break;
355	case VM_ISA_PULSE_IRQ:
356		isa_irq = (struct vm_isa_irq *)data;
357		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
358		if (error == 0 && isa_irq->ioapic_irq != -1)
359			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
360		break;
361	case VM_ISA_SET_IRQ_TRIGGER:
362		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
363		error = vatpic_set_irq_trigger(sc->vm,
364		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
365		break;
366	case VM_MAP_MEMORY:
367		seg = (struct vm_memory_segment *)data;
368		error = vm_malloc(sc->vm, seg->gpa, seg->len);
369		break;
370	case VM_GET_MEMORY_SEG:
371		seg = (struct vm_memory_segment *)data;
372		seg->len = 0;
373		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
374		error = 0;
375		break;
376	case VM_GET_REGISTER:
377		vmreg = (struct vm_register *)data;
378		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
379					&vmreg->regval);
380		break;
381	case VM_SET_REGISTER:
382		vmreg = (struct vm_register *)data;
383		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
384					vmreg->regval);
385		break;
386	case VM_SET_SEGMENT_DESCRIPTOR:
387		vmsegdesc = (struct vm_seg_desc *)data;
388		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
389					vmsegdesc->regnum,
390					&vmsegdesc->desc);
391		break;
392	case VM_GET_SEGMENT_DESCRIPTOR:
393		vmsegdesc = (struct vm_seg_desc *)data;
394		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
395					vmsegdesc->regnum,
396					&vmsegdesc->desc);
397		break;
398	case VM_GET_CAPABILITY:
399		vmcap = (struct vm_capability *)data;
400		error = vm_get_capability(sc->vm, vmcap->cpuid,
401					  vmcap->captype,
402					  &vmcap->capval);
403		break;
404	case VM_SET_CAPABILITY:
405		vmcap = (struct vm_capability *)data;
406		error = vm_set_capability(sc->vm, vmcap->cpuid,
407					  vmcap->captype,
408					  vmcap->capval);
409		break;
410	case VM_SET_X2APIC_STATE:
411		x2apic = (struct vm_x2apic *)data;
412		error = vm_set_x2apic_state(sc->vm,
413					    x2apic->cpuid, x2apic->state);
414		break;
415	case VM_GET_X2APIC_STATE:
416		x2apic = (struct vm_x2apic *)data;
417		error = vm_get_x2apic_state(sc->vm,
418					    x2apic->cpuid, &x2apic->state);
419		break;
420	case VM_GET_GPA_PMAP:
421		gpapte = (struct vm_gpa_pte *)data;
422		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
423				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
424		error = 0;
425		break;
426	case VM_GET_HPET_CAPABILITIES:
427		error = vhpet_getcap((struct vm_hpet_cap *)data);
428		break;
429	case VM_GLA2GPA: {
430		CTASSERT(PROT_READ == VM_PROT_READ);
431		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
432		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
433		gg = (struct vm_gla2gpa *)data;
434		error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
435		    gg->prot, &gg->gpa);
436		KASSERT(error == 0 || error == 1 || error == -1,
437		    ("%s: vmm_gla2gpa unknown error %d", __func__, error));
438		if (error >= 0) {
439			/*
440			 * error = 0: the translation was successful
441			 * error = 1: a fault was injected into the guest
442			 */
443			gg->fault = error;
444			error = 0;
445		} else {
446			error = EFAULT;
447		}
448		break;
449	}
450	case VM_ACTIVATE_CPU:
451		vac = (struct vm_activate_cpu *)data;
452		error = vm_activate_cpu(sc->vm, vac->vcpuid);
453		break;
454	case VM_GET_CPUS:
455		error = 0;
456		vm_cpuset = (struct vm_cpuset *)data;
457		size = vm_cpuset->cpusetsize;
458		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
459			error = ERANGE;
460			break;
461		}
462		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
463		if (vm_cpuset->which == VM_ACTIVE_CPUS)
464			*cpuset = vm_active_cpus(sc->vm);
465		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
466			*cpuset = vm_suspended_cpus(sc->vm);
467		else
468			error = EINVAL;
469		if (error == 0)
470			error = copyout(cpuset, vm_cpuset->cpus, size);
471		free(cpuset, M_TEMP);
472		break;
473	default:
474		error = ENOTTY;
475		break;
476	}
477
478	if (state_changed == 1) {
479		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
480	} else if (state_changed == 2) {
481		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
482			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
483	}
484
485done:
486	/* Make sure that no handler returns a bogus value like ERESTART */
487	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
488	return (error);
489}
490
491static int
492vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
493		   vm_size_t size, struct vm_object **object, int nprot)
494{
495	int error;
496	struct vmmdev_softc *sc;
497
498	sc = vmmdev_lookup2(cdev);
499	if (sc != NULL && (nprot & PROT_EXEC) == 0)
500		error = vm_get_memobj(sc->vm, *offset, size, offset, object);
501	else
502		error = EINVAL;
503
504	return (error);
505}
506
507static void
508vmmdev_destroy(void *arg)
509{
510
511	struct vmmdev_softc *sc = arg;
512
513	if (sc->cdev != NULL)
514		destroy_dev(sc->cdev);
515
516	if (sc->vm != NULL)
517		vm_destroy(sc->vm);
518
519	if ((sc->flags & VSC_LINKED) != 0) {
520		mtx_lock(&vmmdev_mtx);
521		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
522		mtx_unlock(&vmmdev_mtx);
523	}
524
525	free(sc, M_VMMDEV);
526}
527
528static int
529sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
530{
531	int error;
532	char buf[VM_MAX_NAMELEN];
533	struct vmmdev_softc *sc;
534	struct cdev *cdev;
535
536	strlcpy(buf, "beavis", sizeof(buf));
537	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
538	if (error != 0 || req->newptr == NULL)
539		return (error);
540
541	mtx_lock(&vmmdev_mtx);
542	sc = vmmdev_lookup(buf);
543	if (sc == NULL || sc->cdev == NULL) {
544		mtx_unlock(&vmmdev_mtx);
545		return (EINVAL);
546	}
547
548	/*
549	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
550	 * goes down to 0 so we should not do it again in the callback.
551	 */
552	cdev = sc->cdev;
553	sc->cdev = NULL;
554	mtx_unlock(&vmmdev_mtx);
555
556	/*
557	 * Schedule the 'cdev' to be destroyed:
558	 *
559	 * - any new operations on this 'cdev' will return an error (ENXIO).
560	 *
561	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
562	 *   be destroyed and the callback will be invoked in a taskqueue
563	 *   context.
564	 */
565	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
566
567	return (0);
568}
569SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
570	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
571
572static struct cdevsw vmmdevsw = {
573	.d_name		= "vmmdev",
574	.d_version	= D_VERSION,
575	.d_ioctl	= vmmdev_ioctl,
576	.d_mmap_single	= vmmdev_mmap_single,
577	.d_read		= vmmdev_rw,
578	.d_write	= vmmdev_rw,
579};
580
581static int
582sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
583{
584	int error;
585	struct vm *vm;
586	struct cdev *cdev;
587	struct vmmdev_softc *sc, *sc2;
588	char buf[VM_MAX_NAMELEN];
589
590	strlcpy(buf, "beavis", sizeof(buf));
591	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
592	if (error != 0 || req->newptr == NULL)
593		return (error);
594
595	mtx_lock(&vmmdev_mtx);
596	sc = vmmdev_lookup(buf);
597	mtx_unlock(&vmmdev_mtx);
598	if (sc != NULL)
599		return (EEXIST);
600
601	error = vm_create(buf, &vm);
602	if (error != 0)
603		return (error);
604
605	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
606	sc->vm = vm;
607
608	/*
609	 * Lookup the name again just in case somebody sneaked in when we
610	 * dropped the lock.
611	 */
612	mtx_lock(&vmmdev_mtx);
613	sc2 = vmmdev_lookup(buf);
614	if (sc2 == NULL) {
615		SLIST_INSERT_HEAD(&head, sc, link);
616		sc->flags |= VSC_LINKED;
617	}
618	mtx_unlock(&vmmdev_mtx);
619
620	if (sc2 != NULL) {
621		vmmdev_destroy(sc);
622		return (EEXIST);
623	}
624
625	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
626			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
627	if (error != 0) {
628		vmmdev_destroy(sc);
629		return (error);
630	}
631
632	mtx_lock(&vmmdev_mtx);
633	sc->cdev = cdev;
634	sc->cdev->si_drv1 = sc;
635	mtx_unlock(&vmmdev_mtx);
636
637	return (0);
638}
639SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
640	    NULL, 0, sysctl_vmm_create, "A", NULL);
641
642void
643vmmdev_init(void)
644{
645	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
646}
647
648int
649vmmdev_cleanup(void)
650{
651	int error;
652
653	if (SLIST_EMPTY(&head))
654		error = 0;
655	else
656		error = EBUSY;
657
658	return (error);
659}
660