1273476Sroyger/*
2273476Sroyger * Copyright (c) 2014 Roger Pau Monn�� <roger.pau@citrix.com>
3273476Sroyger * All rights reserved.
4273476Sroyger *
5273476Sroyger * Redistribution and use in source and binary forms, with or without
6273476Sroyger * modification, are permitted provided that the following conditions
7273476Sroyger * are met:
8273476Sroyger * 1. Redistributions of source code must retain the above copyright
9273476Sroyger *    notice, this list of conditions and the following disclaimer.
10273476Sroyger * 2. Redistributions in binary form must reproduce the above copyright
11273476Sroyger *    notice, this list of conditions and the following disclaimer in the
12273476Sroyger *    documentation and/or other materials provided with the distribution.
13273476Sroyger *
14273476Sroyger * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
15273476Sroyger * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16273476Sroyger * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17273476Sroyger * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18273476Sroyger * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19273476Sroyger * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20273476Sroyger * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21273476Sroyger * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22273476Sroyger * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23273476Sroyger * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24273476Sroyger * SUCH DAMAGE.
25273476Sroyger */
26273476Sroyger
27273476Sroyger#include <sys/cdefs.h>
28273476Sroyger__FBSDID("$FreeBSD$");
29273476Sroyger
30273476Sroyger#include <sys/param.h>
31273476Sroyger#include <sys/systm.h>
32273476Sroyger#include <sys/uio.h>
33273476Sroyger#include <sys/bus.h>
34273476Sroyger#include <sys/malloc.h>
35273476Sroyger#include <sys/kernel.h>
36273476Sroyger#include <sys/lock.h>
37273476Sroyger#include <sys/mutex.h>
38273476Sroyger#include <sys/rwlock.h>
39273476Sroyger#include <sys/selinfo.h>
40273476Sroyger#include <sys/poll.h>
41273476Sroyger#include <sys/conf.h>
42273476Sroyger#include <sys/fcntl.h>
43273476Sroyger#include <sys/ioccom.h>
44273476Sroyger#include <sys/rman.h>
45273476Sroyger#include <sys/tree.h>
46273476Sroyger#include <sys/module.h>
47273476Sroyger#include <sys/proc.h>
48299185Sroyger#include <sys/bitset.h>
49273476Sroyger
50273476Sroyger#include <vm/vm.h>
51273476Sroyger#include <vm/vm_param.h>
52273476Sroyger#include <vm/vm_extern.h>
53273476Sroyger#include <vm/vm_kern.h>
54273476Sroyger#include <vm/vm_page.h>
55273476Sroyger#include <vm/vm_map.h>
56273476Sroyger#include <vm/vm_object.h>
57273476Sroyger#include <vm/vm_pager.h>
58273476Sroyger#include <vm/vm_phys.h>
59273476Sroyger
60273476Sroyger#include <machine/md_var.h>
61273476Sroyger
62273476Sroyger#include <xen/xen-os.h>
63273476Sroyger#include <xen/hypervisor.h>
64273476Sroyger#include <xen/privcmd.h>
65273476Sroyger#include <xen/error.h>
66273476Sroyger
67273476SroygerMALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device");
68273476Sroyger
69273476Sroygerstruct privcmd_map {
70273476Sroyger	vm_object_t mem;
71273476Sroyger	vm_size_t size;
72273476Sroyger	struct resource *pseudo_phys_res;
73273476Sroyger	int pseudo_phys_res_id;
74273476Sroyger	vm_paddr_t phys_base_addr;
75273476Sroyger	boolean_t mapped;
76299185Sroyger	BITSET_DEFINE_VAR() *err;
77273476Sroyger};
78273476Sroyger
79273476Sroygerstatic d_ioctl_t     privcmd_ioctl;
80273476Sroygerstatic d_mmap_single_t	privcmd_mmap_single;
81273476Sroyger
82273476Sroygerstatic struct cdevsw privcmd_devsw = {
83273476Sroyger	.d_version = D_VERSION,
84273476Sroyger	.d_ioctl = privcmd_ioctl,
85273476Sroyger	.d_mmap_single = privcmd_mmap_single,
86273476Sroyger	.d_name = "privcmd",
87273476Sroyger};
88273476Sroyger
89273476Sroygerstatic int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
90273476Sroyger    vm_ooffset_t foff, struct ucred *cred, u_short *color);
91273476Sroygerstatic void privcmd_pg_dtor(void *handle);
92273476Sroygerstatic int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
93273476Sroyger    int prot, vm_page_t *mres);
94273476Sroyger
95273476Sroygerstatic struct cdev_pager_ops privcmd_pg_ops = {
96273476Sroyger	.cdev_pg_fault = privcmd_pg_fault,
97273476Sroyger	.cdev_pg_ctor =	privcmd_pg_ctor,
98273476Sroyger	.cdev_pg_dtor =	privcmd_pg_dtor,
99273476Sroyger};
100273476Sroyger
101273476Sroygerstatic device_t privcmd_dev = NULL;
102273476Sroyger
103273476Sroyger/*------------------------- Privcmd Pager functions --------------------------*/
104273476Sroygerstatic int
105273476Sroygerprivcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
106273476Sroyger    vm_ooffset_t foff, struct ucred *cred, u_short *color)
107273476Sroyger{
108273476Sroyger
109273476Sroyger	return (0);
110273476Sroyger}
111273476Sroyger
112273476Sroygerstatic void
113273476Sroygerprivcmd_pg_dtor(void *handle)
114273476Sroyger{
115273476Sroyger	struct xen_remove_from_physmap rm = { .domid = DOMID_SELF };
116273476Sroyger	struct privcmd_map *map = handle;
117273476Sroyger	int error;
118273476Sroyger	vm_size_t i;
119273476Sroyger	vm_page_t m;
120273476Sroyger
121273476Sroyger	/*
122273476Sroyger	 * Remove the mappings from the used pages. This will remove the
123273476Sroyger	 * underlying p2m bindings in Xen second stage translation.
124273476Sroyger	 */
125273476Sroyger	if (map->mapped == true) {
126273476Sroyger		VM_OBJECT_WLOCK(map->mem);
127273476Sroygerretry:
128273476Sroyger		for (i = 0; i < map->size; i++) {
129273476Sroyger			m = vm_page_lookup(map->mem, i);
130273476Sroyger			if (m == NULL)
131273476Sroyger				continue;
132273476Sroyger			if (vm_page_sleep_if_busy(m, "pcmdum"))
133273476Sroyger				goto retry;
134273476Sroyger			cdev_pager_free_page(map->mem, m);
135273476Sroyger		}
136273476Sroyger		VM_OBJECT_WUNLOCK(map->mem);
137273476Sroyger
138273476Sroyger		for (i = 0; i < map->size; i++) {
139273476Sroyger			rm.gpfn = atop(map->phys_base_addr) + i;
140273476Sroyger			HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm);
141273476Sroyger		}
142299185Sroyger		free(map->err, M_PRIVCMD);
143273476Sroyger	}
144273476Sroyger
145282634Sroyger	error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
146282634Sroyger	    map->pseudo_phys_res);
147273476Sroyger	KASSERT(error == 0, ("Unable to release memory resource: %d", error));
148273476Sroyger
149273476Sroyger	free(map, M_PRIVCMD);
150273476Sroyger}
151273476Sroyger
152273476Sroygerstatic int
153273476Sroygerprivcmd_pg_fault(vm_object_t object, vm_ooffset_t offset,
154273476Sroyger    int prot, vm_page_t *mres)
155273476Sroyger{
156273476Sroyger	struct privcmd_map *map = object->handle;
157273476Sroyger	vm_pindex_t pidx;
158273476Sroyger	vm_page_t page, oldm;
159273476Sroyger
160273476Sroyger	if (map->mapped != true)
161273476Sroyger		return (VM_PAGER_FAIL);
162273476Sroyger
163273476Sroyger	pidx = OFF_TO_IDX(offset);
164299185Sroyger	if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err))
165273476Sroyger		return (VM_PAGER_FAIL);
166273476Sroyger
167273476Sroyger	page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset);
168273476Sroyger	if (page == NULL)
169273476Sroyger		return (VM_PAGER_FAIL);
170273476Sroyger
171273476Sroyger	KASSERT((page->flags & PG_FICTITIOUS) != 0,
172273476Sroyger	    ("not fictitious %p", page));
173273476Sroyger	KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page));
174273476Sroyger	KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page));
175273476Sroyger
176273476Sroyger	if (*mres != NULL) {
177273476Sroyger		oldm = *mres;
178273476Sroyger		vm_page_lock(oldm);
179273476Sroyger		vm_page_free(oldm);
180273476Sroyger		vm_page_unlock(oldm);
181273476Sroyger		*mres = NULL;
182273476Sroyger	}
183273476Sroyger
184273476Sroyger	vm_page_insert(page, object, pidx);
185273476Sroyger	page->valid = VM_PAGE_BITS_ALL;
186273476Sroyger	vm_page_xbusy(page);
187273476Sroyger	*mres = page;
188273476Sroyger	return (VM_PAGER_OK);
189273476Sroyger}
190273476Sroyger
191273476Sroyger/*----------------------- Privcmd char device methods ------------------------*/
192273476Sroygerstatic int
193273476Sroygerprivcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
194273476Sroyger    vm_object_t *object, int nprot)
195273476Sroyger{
196273476Sroyger	struct privcmd_map *map;
197273476Sroyger
198273476Sroyger	map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO);
199273476Sroyger
200273476Sroyger	map->size = OFF_TO_IDX(size);
201273476Sroyger	map->pseudo_phys_res_id = 0;
202273476Sroyger
203282634Sroyger	map->pseudo_phys_res = xenmem_alloc(privcmd_dev,
204282634Sroyger	    &map->pseudo_phys_res_id, size);
205273476Sroyger	if (map->pseudo_phys_res == NULL) {
206273476Sroyger		free(map, M_PRIVCMD);
207273476Sroyger		return (ENOMEM);
208273476Sroyger	}
209273476Sroyger
210273476Sroyger	map->phys_base_addr = rman_get_start(map->pseudo_phys_res);
211273476Sroyger	map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops,
212273476Sroyger	    size, nprot, *offset, NULL);
213273476Sroyger	if (map->mem == NULL) {
214282634Sroyger		xenmem_free(privcmd_dev, map->pseudo_phys_res_id,
215282634Sroyger		    map->pseudo_phys_res);
216273476Sroyger		free(map, M_PRIVCMD);
217273476Sroyger		return (ENOMEM);
218273476Sroyger	}
219273476Sroyger
220273476Sroyger	*object = map->mem;
221273476Sroyger
222273476Sroyger	return (0);
223273476Sroyger}
224273476Sroyger
225273476Sroygerstatic int
226273476Sroygerprivcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg,
227273476Sroyger	      int mode, struct thread *td)
228273476Sroyger{
229273476Sroyger	int error, i;
230273476Sroyger
231273476Sroyger	switch (cmd) {
232273476Sroyger	case IOCTL_PRIVCMD_HYPERCALL: {
233273476Sroyger		struct ioctl_privcmd_hypercall *hcall;
234273476Sroyger
235273476Sroyger		hcall = (struct ioctl_privcmd_hypercall *)arg;
236273476Sroyger
237273476Sroyger		error = privcmd_hypercall(hcall->op, hcall->arg[0],
238273476Sroyger		    hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]);
239273476Sroyger		if (error >= 0) {
240273476Sroyger			hcall->retval = error;
241273476Sroyger			error = 0;
242273476Sroyger		} else {
243273476Sroyger			error = xen_translate_error(error);
244273476Sroyger			hcall->retval = 0;
245273476Sroyger		}
246273476Sroyger		break;
247273476Sroyger	}
248273476Sroyger	case IOCTL_PRIVCMD_MMAPBATCH: {
249273476Sroyger		struct ioctl_privcmd_mmapbatch *mmap;
250273476Sroyger		vm_map_t map;
251273476Sroyger		vm_map_entry_t entry;
252273476Sroyger		vm_object_t mem;
253299185Sroyger		vm_pindex_t pindex;
254273476Sroyger		vm_prot_t prot;
255273476Sroyger		boolean_t wired;
256273476Sroyger		struct xen_add_to_physmap_range add;
257273476Sroyger		xen_ulong_t *idxs;
258273476Sroyger		xen_pfn_t *gpfns;
259299185Sroyger		int *errs, index;
260273476Sroyger		struct privcmd_map *umap;
261299185Sroyger		uint16_t num;
262273476Sroyger
263273476Sroyger		mmap = (struct ioctl_privcmd_mmapbatch *)arg;
264273476Sroyger
265273476Sroyger		if ((mmap->num == 0) ||
266273476Sroyger		    ((mmap->addr & PAGE_MASK) != 0)) {
267273476Sroyger			error = EINVAL;
268273476Sroyger			break;
269273476Sroyger		}
270273476Sroyger
271273476Sroyger		map = &td->td_proc->p_vmspace->vm_map;
272273476Sroyger		error = vm_map_lookup(&map, mmap->addr, VM_PROT_NONE, &entry,
273299185Sroyger		    &mem, &pindex, &prot, &wired);
274273476Sroyger		if (error != KERN_SUCCESS) {
275273476Sroyger			error = EINVAL;
276273476Sroyger			break;
277273476Sroyger		}
278273476Sroyger		if ((entry->start != mmap->addr) ||
279273476Sroyger		    (entry->end != mmap->addr + (mmap->num * PAGE_SIZE))) {
280273476Sroyger			vm_map_lookup_done(map, entry);
281273476Sroyger			error = EINVAL;
282273476Sroyger			break;
283273476Sroyger		}
284273476Sroyger		vm_map_lookup_done(map, entry);
285273476Sroyger		if ((mem->type != OBJT_MGTDEVICE) ||
286273476Sroyger		    (mem->un_pager.devp.ops != &privcmd_pg_ops)) {
287273476Sroyger			error = EINVAL;
288273476Sroyger			break;
289273476Sroyger		}
290273476Sroyger		umap = mem->handle;
291273476Sroyger
292273476Sroyger		add.domid = DOMID_SELF;
293273476Sroyger		add.space = XENMAPSPACE_gmfn_foreign;
294273476Sroyger		add.foreign_domid = mmap->dom;
295273476Sroyger
296299185Sroyger		/*
297299185Sroyger		 * The 'size' field in the xen_add_to_physmap_range only
298299185Sroyger		 * allows for UINT16_MAX mappings in a single hypercall.
299299185Sroyger		 */
300299185Sroyger		num = MIN(mmap->num, UINT16_MAX);
301273476Sroyger
302299185Sroyger		idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK);
303299185Sroyger		gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK);
304299185Sroyger		errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK);
305299185Sroyger
306273476Sroyger		set_xen_guest_handle(add.idxs, idxs);
307273476Sroyger		set_xen_guest_handle(add.gpfns, gpfns);
308273476Sroyger		set_xen_guest_handle(add.errs, errs);
309273476Sroyger
310299185Sroyger		/* Allocate a bitset to store broken page mappings. */
311299185Sroyger		umap->err = BITSET_ALLOC(mmap->num, M_PRIVCMD,
312299185Sroyger		    M_WAITOK | M_ZERO);
313273476Sroyger
314299185Sroyger		for (index = 0; index < mmap->num; index += num) {
315299185Sroyger			num = MIN(mmap->num - index, UINT16_MAX);
316299185Sroyger			add.size = num;
317273476Sroyger
318299185Sroyger			error = copyin(&mmap->arr[index], idxs,
319299185Sroyger			    sizeof(idxs[0]) * num);
320299185Sroyger			if (error != 0)
321299185Sroyger				goto mmap_out;
322273476Sroyger
323299185Sroyger			for (i = 0; i < num; i++)
324299185Sroyger				gpfns[i] = atop(umap->phys_base_addr +
325299185Sroyger				    (i + index) * PAGE_SIZE);
326299185Sroyger
327299185Sroyger			bzero(errs, sizeof(*errs) * num);
328299185Sroyger
329299185Sroyger			error = HYPERVISOR_memory_op(
330299185Sroyger			    XENMEM_add_to_physmap_range, &add);
331299185Sroyger			if (error != 0) {
332299185Sroyger				error = xen_translate_error(error);
333299185Sroyger				goto mmap_out;
334299185Sroyger			}
335299185Sroyger
336299185Sroyger			for (i = 0; i < num; i++) {
337299185Sroyger				if (errs[i] != 0) {
338299185Sroyger					errs[i] = xen_translate_error(errs[i]);
339299185Sroyger
340299185Sroyger					/* Mark the page as invalid. */
341299185Sroyger					BIT_SET(mmap->num, index + i,
342299185Sroyger					    umap->err);
343299185Sroyger				}
344299185Sroyger			}
345299185Sroyger
346299185Sroyger			error = copyout(errs, &mmap->err[index],
347299185Sroyger			    sizeof(errs[0]) * num);
348299185Sroyger			if (error != 0)
349299185Sroyger				goto mmap_out;
350273476Sroyger		}
351273476Sroyger
352273476Sroyger		umap->mapped = true;
353273476Sroyger
354273476Sroygermmap_out:
355273476Sroyger		free(idxs, M_PRIVCMD);
356273476Sroyger		free(gpfns, M_PRIVCMD);
357299185Sroyger		free(errs, M_PRIVCMD);
358273476Sroyger		if (!umap->mapped)
359299185Sroyger			free(umap->err, M_PRIVCMD);
360273476Sroyger
361273476Sroyger		break;
362273476Sroyger	}
363273476Sroyger
364273476Sroyger	default:
365273476Sroyger		error = ENOSYS;
366273476Sroyger		break;
367273476Sroyger	}
368273476Sroyger
369273476Sroyger	return (error);
370273476Sroyger}
371273476Sroyger
372273476Sroyger/*------------------ Private Device Attachment Functions  --------------------*/
373273476Sroygerstatic void
374273476Sroygerprivcmd_identify(driver_t *driver, device_t parent)
375273476Sroyger{
376273476Sroyger
377273476Sroyger	KASSERT(xen_domain(),
378273476Sroyger	    ("Trying to attach privcmd device on non Xen domain"));
379273476Sroyger
380273476Sroyger	if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL)
381273476Sroyger		panic("unable to attach privcmd user-space device");
382273476Sroyger}
383273476Sroyger
384273476Sroygerstatic int
385273476Sroygerprivcmd_probe(device_t dev)
386273476Sroyger{
387273476Sroyger
388273476Sroyger	privcmd_dev = dev;
389273476Sroyger	device_set_desc(dev, "Xen privileged interface user-space device");
390273476Sroyger	return (BUS_PROBE_NOWILDCARD);
391273476Sroyger}
392273476Sroyger
393273476Sroygerstatic int
394273476Sroygerprivcmd_attach(device_t dev)
395273476Sroyger{
396273476Sroyger
397273476Sroyger	make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT,
398273476Sroyger	    GID_WHEEL, 0600, "xen/privcmd");
399273476Sroyger	return (0);
400273476Sroyger}
401273476Sroyger
402273476Sroyger/*-------------------- Private Device Attachment Data  -----------------------*/
403273476Sroygerstatic device_method_t privcmd_methods[] = {
404273476Sroyger	DEVMETHOD(device_identify,	privcmd_identify),
405273476Sroyger	DEVMETHOD(device_probe,		privcmd_probe),
406273476Sroyger	DEVMETHOD(device_attach,	privcmd_attach),
407273476Sroyger
408273476Sroyger	DEVMETHOD_END
409273476Sroyger};
410273476Sroyger
411273476Sroygerstatic driver_t privcmd_driver = {
412273476Sroyger	"privcmd",
413273476Sroyger	privcmd_methods,
414273476Sroyger	0,
415273476Sroyger};
416273476Sroyger
417273476Sroygerdevclass_t privcmd_devclass;
418273476Sroyger
419273476SroygerDRIVER_MODULE(privcmd, xenpv, privcmd_driver, privcmd_devclass, 0, 0);
420273476SroygerMODULE_DEPEND(privcmd, xenpv, 1, 1, 1);
421