1/******************************************************************************
2 * gnttab.c
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2005, Christopher Clark
10 * Copyright (c) 2004, K A Fraser
11 */
12
13#include <sys/cdefs.h>
14__FBSDID("$FreeBSD$");
15
16#include <sys/param.h>
17#include <sys/systm.h>
18#include <sys/bus.h>
19#include <sys/conf.h>
20#include <sys/module.h>
21#include <sys/kernel.h>
22#include <sys/lock.h>
23#include <sys/malloc.h>
24#include <sys/mman.h>
25#include <sys/limits.h>
26#include <sys/rman.h>
27#include <machine/resource.h>
28#include <machine/cpu.h>
29
30#include <xen/xen-os.h>
31#include <xen/hypervisor.h>
32#include <machine/xen/synch_bitops.h>
33
34#include <xen/hypervisor.h>
35#include <xen/gnttab.h>
36
37#include <vm/vm.h>
38#include <vm/vm_kern.h>
39#include <vm/vm_extern.h>
40#include <vm/pmap.h>
41
42/* External tools reserve first few grant table entries. */
43#define NR_RESERVED_ENTRIES 8
44#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
45
46static grant_ref_t **gnttab_list;
47static unsigned int nr_grant_frames;
48static unsigned int boot_max_nr_grant_frames;
49static int gnttab_free_count;
50static grant_ref_t gnttab_free_head;
51static struct mtx gnttab_list_lock;
52
53/*
54 * Resource representing allocated physical address space
55 * for the grant table metainfo
56 */
57static struct resource *gnttab_pseudo_phys_res;
58
59/* Resource id for allocated physical address space. */
60static int gnttab_pseudo_phys_res_id;
61
62static grant_entry_t *shared;
63
64static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
65
66static int gnttab_expand(unsigned int req_entries);
67
68#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
69#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
70
71static int
72get_free_entries(int count, int *entries)
73{
74	int ref, error;
75	grant_ref_t head;
76
77	mtx_lock(&gnttab_list_lock);
78	if ((gnttab_free_count < count) &&
79	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
80		mtx_unlock(&gnttab_list_lock);
81		return (error);
82	}
83	ref = head = gnttab_free_head;
84	gnttab_free_count -= count;
85	while (count-- > 1)
86		head = gnttab_entry(head);
87	gnttab_free_head = gnttab_entry(head);
88	gnttab_entry(head) = GNTTAB_LIST_END;
89	mtx_unlock(&gnttab_list_lock);
90
91	*entries = ref;
92	return (0);
93}
94
95static void
96do_free_callbacks(void)
97{
98	struct gnttab_free_callback *callback, *next;
99
100	callback = gnttab_free_callback_list;
101	gnttab_free_callback_list = NULL;
102
103	while (callback != NULL) {
104		next = callback->next;
105		if (gnttab_free_count >= callback->count) {
106			callback->next = NULL;
107			callback->fn(callback->arg);
108		} else {
109			callback->next = gnttab_free_callback_list;
110			gnttab_free_callback_list = callback;
111		}
112		callback = next;
113	}
114}
115
116static inline void
117check_free_callbacks(void)
118{
119	if (__predict_false(gnttab_free_callback_list != NULL))
120		do_free_callbacks();
121}
122
123static void
124put_free_entry(grant_ref_t ref)
125{
126
127	mtx_lock(&gnttab_list_lock);
128	gnttab_entry(ref) = gnttab_free_head;
129	gnttab_free_head = ref;
130	gnttab_free_count++;
131	check_free_callbacks();
132	mtx_unlock(&gnttab_list_lock);
133}
134
135/*
136 * Public grant-issuing interface functions
137 */
138
139int
140gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
141	grant_ref_t *result)
142{
143	int error, ref;
144
145	error = get_free_entries(1, &ref);
146
147	if (__predict_false(error))
148		return (error);
149
150	shared[ref].frame = frame;
151	shared[ref].domid = domid;
152	wmb();
153	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
154
155	if (result)
156		*result = ref;
157
158	return (0);
159}
160
161void
162gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
163				unsigned long frame, int readonly)
164{
165
166	shared[ref].frame = frame;
167	shared[ref].domid = domid;
168	wmb();
169	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
170}
171
172int
173gnttab_query_foreign_access(grant_ref_t ref)
174{
175	uint16_t nflags;
176
177	nflags = shared[ref].flags;
178
179	return (nflags & (GTF_reading|GTF_writing));
180}
181
182int
183gnttab_end_foreign_access_ref(grant_ref_t ref)
184{
185	uint16_t flags, nflags;
186
187	nflags = shared[ref].flags;
188	do {
189		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
190			printf("%s: WARNING: g.e. still in use!\n", __func__);
191			return (0);
192		}
193	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
194	       flags);
195
196	return (1);
197}
198
199void
200gnttab_end_foreign_access(grant_ref_t ref, void *page)
201{
202	if (gnttab_end_foreign_access_ref(ref)) {
203		put_free_entry(ref);
204		if (page != NULL) {
205			free(page, M_DEVBUF);
206		}
207	}
208	else {
209		/* XXX This needs to be fixed so that the ref and page are
210		   placed on a list to be freed up later. */
211		printf("%s: WARNING: leaking g.e. and page still in use!\n",
212		       __func__);
213	}
214}
215
216void
217gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
218{
219	grant_ref_t *last_ref;
220	grant_ref_t  head;
221	grant_ref_t  tail;
222
223	head = GNTTAB_LIST_END;
224	tail = *refs;
225	last_ref = refs + count;
226	while (refs != last_ref) {
227
228		if (gnttab_end_foreign_access_ref(*refs)) {
229			gnttab_entry(*refs) = head;
230			head = *refs;
231		} else {
232			/*
233			 * XXX This needs to be fixed so that the ref
234			 * is placed on a list to be freed up later.
235			 */
236			printf("%s: WARNING: leaking g.e. still in use!\n",
237			       __func__);
238			count--;
239		}
240		refs++;
241	}
242
243	if (count != 0) {
244		mtx_lock(&gnttab_list_lock);
245		gnttab_free_count += count;
246		gnttab_entry(tail) = gnttab_free_head;
247		gnttab_free_head = head;
248		check_free_callbacks();
249		mtx_unlock(&gnttab_list_lock);
250	}
251}
252
253int
254gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
255    grant_ref_t *result)
256{
257	int error, ref;
258
259	error = get_free_entries(1, &ref);
260	if (__predict_false(error))
261		return (error);
262
263	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
264
265	*result = ref;
266	return (0);
267}
268
269void
270gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
271	unsigned long pfn)
272{
273	shared[ref].frame = pfn;
274	shared[ref].domid = domid;
275	wmb();
276	shared[ref].flags = GTF_accept_transfer;
277}
278
279unsigned long
280gnttab_end_foreign_transfer_ref(grant_ref_t ref)
281{
282	unsigned long frame;
283	uint16_t      flags;
284
285	/*
286         * If a transfer is not even yet started, try to reclaim the grant
287         * reference and return failure (== 0).
288         */
289	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
290		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
291			return (0);
292		cpu_spinwait();
293	}
294
295	/* If a transfer is in progress then wait until it is completed. */
296	while (!(flags & GTF_transfer_completed)) {
297		flags = shared[ref].flags;
298		cpu_spinwait();
299	}
300
301	/* Read the frame number /after/ reading completion status. */
302	rmb();
303	frame = shared[ref].frame;
304	KASSERT(frame != 0, ("grant table inconsistent"));
305
306	return (frame);
307}
308
309unsigned long
310gnttab_end_foreign_transfer(grant_ref_t ref)
311{
312	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
313
314	put_free_entry(ref);
315	return (frame);
316}
317
318void
319gnttab_free_grant_reference(grant_ref_t ref)
320{
321
322	put_free_entry(ref);
323}
324
325void
326gnttab_free_grant_references(grant_ref_t head)
327{
328	grant_ref_t ref;
329	int count = 1;
330
331	if (head == GNTTAB_LIST_END)
332		return;
333
334	ref = head;
335	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
336		ref = gnttab_entry(ref);
337		count++;
338	}
339	mtx_lock(&gnttab_list_lock);
340	gnttab_entry(ref) = gnttab_free_head;
341	gnttab_free_head = head;
342	gnttab_free_count += count;
343	check_free_callbacks();
344	mtx_unlock(&gnttab_list_lock);
345}
346
347int
348gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
349{
350	int ref, error;
351
352	error = get_free_entries(count, &ref);
353	if (__predict_false(error))
354		return (error);
355
356	*head = ref;
357	return (0);
358}
359
360int
361gnttab_empty_grant_references(const grant_ref_t *private_head)
362{
363
364	return (*private_head == GNTTAB_LIST_END);
365}
366
367int
368gnttab_claim_grant_reference(grant_ref_t *private_head)
369{
370	grant_ref_t g = *private_head;
371
372	if (__predict_false(g == GNTTAB_LIST_END))
373		return (g);
374	*private_head = gnttab_entry(g);
375	return (g);
376}
377
378void
379gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
380{
381
382	gnttab_entry(release) = *private_head;
383	*private_head = release;
384}
385
386void
387gnttab_request_free_callback(struct gnttab_free_callback *callback,
388    void (*fn)(void *), void *arg, uint16_t count)
389{
390
391	mtx_lock(&gnttab_list_lock);
392	if (callback->next)
393		goto out;
394	callback->fn = fn;
395	callback->arg = arg;
396	callback->count = count;
397	callback->next = gnttab_free_callback_list;
398	gnttab_free_callback_list = callback;
399	check_free_callbacks();
400 out:
401	mtx_unlock(&gnttab_list_lock);
402
403}
404
405void
406gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
407{
408	struct gnttab_free_callback **pcb;
409
410	mtx_lock(&gnttab_list_lock);
411	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
412		if (*pcb == callback) {
413			*pcb = callback->next;
414			break;
415		}
416	}
417	mtx_unlock(&gnttab_list_lock);
418}
419
420
421static int
422grow_gnttab_list(unsigned int more_frames)
423{
424	unsigned int new_nr_grant_frames, extra_entries, i;
425
426	new_nr_grant_frames = nr_grant_frames + more_frames;
427	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
428
429	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
430	{
431		gnttab_list[i] = (grant_ref_t *)
432			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
433
434		if (!gnttab_list[i])
435			goto grow_nomem;
436	}
437
438	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
439	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
440		gnttab_entry(i) = i + 1;
441
442	gnttab_entry(i) = gnttab_free_head;
443	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
444	gnttab_free_count += extra_entries;
445
446	nr_grant_frames = new_nr_grant_frames;
447
448	check_free_callbacks();
449
450	return (0);
451
452grow_nomem:
453	for ( ; i >= nr_grant_frames; i--)
454		free(gnttab_list[i], M_DEVBUF);
455	return (ENOMEM);
456}
457
458static unsigned int
459__max_nr_grant_frames(void)
460{
461	struct gnttab_query_size query;
462	int rc;
463
464	query.dom = DOMID_SELF;
465
466	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
467	if ((rc < 0) || (query.status != GNTST_okay))
468		return (4); /* Legacy max supported number of frames */
469
470	return (query.max_nr_frames);
471}
472
473static inline
474unsigned int max_nr_grant_frames(void)
475{
476	unsigned int xen_max = __max_nr_grant_frames();
477
478	if (xen_max > boot_max_nr_grant_frames)
479		return (boot_max_nr_grant_frames);
480	return (xen_max);
481}
482
483#ifdef notyet
484/*
485 * XXX needed for backend support
486 *
487 */
488static int
489map_pte_fn(pte_t *pte, struct page *pmd_page,
490		      unsigned long addr, void *data)
491{
492	unsigned long **frames = (unsigned long **)data;
493
494	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
495	(*frames)++;
496	return 0;
497}
498
499static int
500unmap_pte_fn(pte_t *pte, struct page *pmd_page,
501			unsigned long addr, void *data)
502{
503
504	set_pte_at(&init_mm, addr, pte, __pte(0));
505	return 0;
506}
507#endif
508
509static vm_paddr_t resume_frames;
510
511static int
512gnttab_map(unsigned int start_idx, unsigned int end_idx)
513{
514	struct xen_add_to_physmap xatp;
515	unsigned int i = end_idx;
516
517	/*
518	 * Loop backwards, so that the first hypercall has the largest index,
519	 * ensuring that the table will grow only once.
520	 */
521	do {
522		xatp.domid = DOMID_SELF;
523		xatp.idx = i;
524		xatp.space = XENMAPSPACE_grant_table;
525		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
526		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
527			panic("HYPERVISOR_memory_op failed to map gnttab");
528	} while (i-- > start_idx);
529
530	if (shared == NULL) {
531		vm_offset_t area;
532
533		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
534		KASSERT(area, ("can't allocate VM space for grant table"));
535		shared = (grant_entry_t *)area;
536	}
537
538	for (i = start_idx; i <= end_idx; i++) {
539		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
540		    resume_frames + i * PAGE_SIZE);
541	}
542
543	return (0);
544}
545
546int
547gnttab_resume(device_t dev)
548{
549	unsigned int max_nr_gframes, nr_gframes;
550
551	nr_gframes = nr_grant_frames;
552	max_nr_gframes = max_nr_grant_frames();
553	if (max_nr_gframes < nr_gframes)
554		return (ENOSYS);
555
556	if (!resume_frames) {
557		KASSERT(dev != NULL,
558		    ("No resume frames and no device provided"));
559
560		gnttab_pseudo_phys_res = xenmem_alloc(dev,
561		    &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes);
562		if (gnttab_pseudo_phys_res == NULL)
563			panic("Unable to reserve physical memory for gnttab");
564		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
565	}
566
567	return (gnttab_map(0, nr_gframes - 1));
568}
569
570static int
571gnttab_expand(unsigned int req_entries)
572{
573	int error;
574	unsigned int cur, extra;
575
576	cur = nr_grant_frames;
577	extra = howmany(req_entries, GREFS_PER_GRANT_FRAME);
578	if (cur + extra > max_nr_grant_frames())
579		return (ENOSPC);
580
581	error = gnttab_map(cur, cur + extra - 1);
582	if (!error)
583		error = grow_gnttab_list(extra);
584
585	return (error);
586}
587
588MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE);
589
590/*------------------ Private Device Attachment Functions  --------------------*/
591/**
592 * \brief Identify instances of this device type in the system.
593 *
594 * \param driver  The driver performing this identify action.
595 * \param parent  The NewBus parent device for any devices this method adds.
596 */
597static void
598granttable_identify(driver_t *driver __unused, device_t parent)
599{
600
601	KASSERT(xen_domain(),
602	    ("Trying to attach grant-table device on non Xen domain"));
603	/*
604	 * A single device instance for our driver is always present
605	 * in a system operating under Xen.
606	 */
607	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
608		panic("unable to attach Xen Grant-table device");
609}
610
611/**
612 * \brief Probe for the existence of the Xen Grant-table device
613 *
614 * \param dev  NewBus device_t for this instance.
615 *
616 * \return  Always returns 0 indicating success.
617 */
618static int
619granttable_probe(device_t dev)
620{
621
622	device_set_desc(dev, "Xen Grant-table Device");
623	return (BUS_PROBE_NOWILDCARD);
624}
625
626/**
627 * \brief Attach the Xen Grant-table device.
628 *
629 * \param dev  NewBus device_t for this instance.
630 *
631 * \return  On success, 0. Otherwise an errno value indicating the
632 *          type of failure.
633 */
634static int
635granttable_attach(device_t dev)
636{
637	int i;
638	unsigned int max_nr_glist_frames;
639	unsigned int nr_init_grefs;
640
641	nr_grant_frames = 1;
642	boot_max_nr_grant_frames = __max_nr_grant_frames();
643
644	/* Determine the maximum number of frames required for the
645	 * grant reference free list on the current hypervisor.
646	 */
647	max_nr_glist_frames = (boot_max_nr_grant_frames *
648			       GREFS_PER_GRANT_FRAME /
649			       (PAGE_SIZE / sizeof(grant_ref_t)));
650
651	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
652	    M_DEVBUF, M_NOWAIT);
653
654	if (gnttab_list == NULL)
655		return (ENOMEM);
656
657	for (i = 0; i < nr_grant_frames; i++) {
658		gnttab_list[i] = (grant_ref_t *)
659			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
660		if (gnttab_list[i] == NULL)
661			goto ini_nomem;
662	}
663
664	if (gnttab_resume(dev))
665		return (ENODEV);
666
667	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
668
669	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
670		gnttab_entry(i) = i + 1;
671
672	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
673	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
674	gnttab_free_head  = NR_RESERVED_ENTRIES;
675
676	if (bootverbose)
677		printf("Grant table initialized\n");
678
679	return (0);
680
681ini_nomem:
682	for (i--; i >= 0; i--)
683		free(gnttab_list[i], M_DEVBUF);
684	free(gnttab_list, M_DEVBUF);
685	return (ENOMEM);
686}
687
688/*-------------------- Private Device Attachment Data  -----------------------*/
689static device_method_t granttable_methods[] = {
690	/* Device interface */
691	DEVMETHOD(device_identify,	granttable_identify),
692	DEVMETHOD(device_probe,         granttable_probe),
693	DEVMETHOD(device_attach,        granttable_attach),
694
695	DEVMETHOD_END
696};
697
698DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
699devclass_t granttable_devclass;
700
701DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, granttable_devclass,
702    NULL, NULL, SI_ORDER_FIRST);
703