gnttab.c revision 181804
1/******************************************************************************
2 * gnttab.c
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2005, Christopher Clark
10 * Copyright (c) 2004, K A Fraser
11 */
12
13#include <sys/cdefs.h>
14__FBSDID("$FreeBSD: head/sys/xen/gnttab.c 181804 2008-08-17 23:32:34Z kmacy $");
15
16#include "opt_global.h"
17#include "opt_pmap.h"
18#include <sys/param.h>
19#include <sys/systm.h>
20#include <sys/bus.h>
21#include <sys/conf.h>
22#include <sys/module.h>
23#include <sys/kernel.h>
24#include <sys/lock.h>
25#include <sys/malloc.h>
26#include <sys/mman.h>
27#include <vm/vm.h>
28#include <vm/vm_extern.h>
29
30#include <vm/vm_page.h>
31#include <vm/vm_kern.h>
32
33
34
35#include <machine/xen/hypervisor.h>
36#include <machine/xen/synch_bitops.h>
37#include <xen/gnttab.h>
38
39#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
40
41
42#if 1
43#define ASSERT(_p) \
44    if ( !(_p) ) { printk("Assertion '%s': line %d, file %s\n", \
45    #_p , __LINE__, __FILE__); *(int*)0=0; }
46#else
47#define ASSERT(_p) ((void)0)
48#endif
49
50#define WPRINTK(fmt, args...) \
51    printk("xen_grant: " fmt, ##args)
52
53/* External tools reserve first few grant table entries. */
54#define NR_RESERVED_ENTRIES 8
55#define GNTTAB_LIST_END 0xffffffff
56#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
57
58static grant_ref_t **gnttab_list;
59static unsigned int nr_grant_frames;
60static unsigned int boot_max_nr_grant_frames;
61static int gnttab_free_count;
62static grant_ref_t gnttab_free_head;
63static struct mtx gnttab_list_lock;
64
65static grant_entry_t *shared;
66
67static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
68
69static int gnttab_expand(unsigned int req_entries);
70
71#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
72#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
73
74static int
75get_free_entries(int count)
76{
77	int ref, rc;
78	grant_ref_t head;
79
80	mtx_lock(&gnttab_list_lock);
81	if ((gnttab_free_count < count) &&
82	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
83		mtx_unlock(&gnttab_list_lock);
84		return (rc);
85	}
86	ref = head = gnttab_free_head;
87	gnttab_free_count -= count;
88	while (count-- > 1)
89		head = gnttab_entry(head);
90	gnttab_free_head = gnttab_entry(head);
91	gnttab_entry(head) = GNTTAB_LIST_END;
92	mtx_unlock(&gnttab_list_lock);
93	return (ref);
94}
95
96#define get_free_entry() get_free_entries(1)
97
98static void
99do_free_callbacks(void)
100{
101	struct gnttab_free_callback *callback, *next;
102
103	callback = gnttab_free_callback_list;
104	gnttab_free_callback_list = NULL;
105
106	while (callback != NULL) {
107		next = callback->next;
108		if (gnttab_free_count >= callback->count) {
109			callback->next = NULL;
110			callback->fn(callback->arg);
111		} else {
112			callback->next = gnttab_free_callback_list;
113			gnttab_free_callback_list = callback;
114		}
115		callback = next;
116	}
117}
118
119static inline void
120check_free_callbacks(void)
121{
122	if (unlikely(gnttab_free_callback_list != NULL))
123		do_free_callbacks();
124}
125
126static void
127put_free_entry(grant_ref_t ref)
128{
129
130	mtx_lock(&gnttab_list_lock);
131	gnttab_entry(ref) = gnttab_free_head;
132	gnttab_free_head = ref;
133	gnttab_free_count++;
134	check_free_callbacks();
135	mtx_unlock(&gnttab_list_lock);
136}
137
138/*
139 * Public grant-issuing interface functions
140 */
141
142int
143gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly)
144{
145	int ref;
146
147	if (unlikely((ref = get_free_entry()) == -1))
148		return -ENOSPC;
149
150	shared[ref].frame = frame;
151	shared[ref].domid = domid;
152	wmb();
153	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
154
155	return ref;
156}
157
158void
159gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
160				unsigned long frame, int readonly)
161{
162	shared[ref].frame = frame;
163	shared[ref].domid = domid;
164	wmb();
165	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
166}
167
168int
169gnttab_query_foreign_access(grant_ref_t ref)
170{
171	uint16_t nflags;
172
173	nflags = shared[ref].flags;
174
175	return (nflags & (GTF_reading|GTF_writing));
176}
177
178int
179gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
180{
181	uint16_t flags, nflags;
182
183	nflags = shared[ref].flags;
184	do {
185		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
186			printf("WARNING: g.e. still in use!\n");
187			return (0);
188		}
189	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
190	       flags);
191
192	return (1);
193}
194
195void
196gnttab_end_foreign_access(grant_ref_t ref, int readonly, void *page)
197{
198	if (gnttab_end_foreign_access_ref(ref, readonly)) {
199		put_free_entry(ref);
200		if (page != NULL) {
201			free(page, M_DEVBUF);
202		}
203	}
204	else {
205		/* XXX This needs to be fixed so that the ref and page are
206		   placed on a list to be freed up later. */
207		printf("WARNING: leaking g.e. and page still in use!\n");
208	}
209}
210
211int
212gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
213{
214	int ref;
215
216	if (unlikely((ref = get_free_entry()) == -1))
217		return -ENOSPC;
218
219	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
220
221	return (ref);
222}
223
224void
225gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
226	unsigned long pfn)
227{
228	shared[ref].frame = pfn;
229	shared[ref].domid = domid;
230	wmb();
231	shared[ref].flags = GTF_accept_transfer;
232}
233
234unsigned long
235gnttab_end_foreign_transfer_ref(grant_ref_t ref)
236{
237	unsigned long frame;
238	uint16_t      flags;
239
240	/*
241         * If a transfer is not even yet started, try to reclaim the grant
242         * reference and return failure (== 0).
243         */
244	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
245		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
246			return (0);
247		cpu_relax();
248	}
249
250	/* If a transfer is in progress then wait until it is completed. */
251	while (!(flags & GTF_transfer_completed)) {
252		flags = shared[ref].flags;
253		cpu_relax();
254	}
255
256	/* Read the frame number /after/ reading completion status. */
257	rmb();
258	frame = shared[ref].frame;
259	PANIC_IF(frame == 0);
260
261	return (frame);
262}
263
264unsigned long
265gnttab_end_foreign_transfer(grant_ref_t ref)
266{
267	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
268
269	put_free_entry(ref);
270	return (frame);
271}
272
273void
274gnttab_free_grant_reference(grant_ref_t ref)
275{
276
277	put_free_entry(ref);
278}
279
280void
281gnttab_free_grant_references(grant_ref_t head)
282{
283	grant_ref_t ref;
284	int count = 1;
285
286	if (head == GNTTAB_LIST_END)
287		return;
288
289	mtx_lock(&gnttab_list_lock);
290	ref = head;
291	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
292		ref = gnttab_entry(ref);
293		count++;
294	}
295	gnttab_entry(ref) = gnttab_free_head;
296	gnttab_free_head = head;
297	gnttab_free_count += count;
298	check_free_callbacks();
299	mtx_unlock(&gnttab_list_lock);
300}
301
302int
303gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
304{
305	int h = get_free_entries(count);
306
307	if (h == -1)
308		return -ENOSPC;
309
310	*head = h;
311
312	return 0;
313}
314
315int
316gnttab_empty_grant_references(const grant_ref_t *private_head)
317{
318	return (*private_head == GNTTAB_LIST_END);
319}
320
321int
322gnttab_claim_grant_reference(grant_ref_t *private_head)
323{
324	grant_ref_t g = *private_head;
325
326	if (unlikely(g == GNTTAB_LIST_END))
327		return -ENOSPC;
328	*private_head = gnttab_entry(g);
329
330	return (g);
331}
332
333void
334gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
335{
336	gnttab_entry(release) = *private_head;
337	*private_head = release;
338}
339
340void
341gnttab_request_free_callback(struct gnttab_free_callback *callback,
342			     void (*fn)(void *), void *arg, uint16_t count)
343{
344
345	mtx_lock(&gnttab_list_lock);
346	if (callback->next)
347		goto out;
348	callback->fn = fn;
349	callback->arg = arg;
350	callback->count = count;
351	callback->next = gnttab_free_callback_list;
352	gnttab_free_callback_list = callback;
353	check_free_callbacks();
354 out:
355	mtx_unlock(&gnttab_list_lock);
356
357}
358
359void
360gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
361{
362	struct gnttab_free_callback **pcb;
363
364	mtx_lock(&gnttab_list_lock);
365	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
366		if (*pcb == callback) {
367			*pcb = callback->next;
368			break;
369		}
370	}
371	mtx_unlock(&gnttab_list_lock);
372}
373
374
375static int
376grow_gnttab_list(unsigned int more_frames)
377{
378	unsigned int new_nr_grant_frames, extra_entries, i;
379
380	new_nr_grant_frames = nr_grant_frames + more_frames;
381	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
382
383	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
384	{
385		gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
386
387		if (!gnttab_list[i])
388			goto grow_nomem;
389	}
390
391	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
392	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
393		gnttab_entry(i) = i + 1;
394
395	gnttab_entry(i) = gnttab_free_head;
396	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
397	gnttab_free_count += extra_entries;
398
399	nr_grant_frames = new_nr_grant_frames;
400
401	check_free_callbacks();
402
403	return 0;
404
405grow_nomem:
406	for ( ; i >= nr_grant_frames; i--)
407		free(gnttab_list[i], M_DEVBUF);
408	return (-ENOMEM);
409}
410
411static unsigned int
412__max_nr_grant_frames(void)
413{
414	struct gnttab_query_size query;
415	int rc;
416
417	query.dom = DOMID_SELF;
418
419	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
420	if ((rc < 0) || (query.status != GNTST_okay))
421		return (4); /* Legacy max supported number of frames */
422
423	return (query.max_nr_frames);
424}
425
426static inline
427unsigned int max_nr_grant_frames(void)
428{
429	unsigned int xen_max = __max_nr_grant_frames();
430
431	if (xen_max > boot_max_nr_grant_frames)
432		return (boot_max_nr_grant_frames);
433	return (xen_max);
434}
435
436#ifdef notyet
437/*
438 * XXX needed for backend support
439 *
440 */
441static int
442map_pte_fn(pte_t *pte, struct page *pmd_page,
443		      unsigned long addr, void *data)
444{
445	unsigned long **frames = (unsigned long **)data;
446
447	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
448	(*frames)++;
449	return 0;
450}
451
452static int
453unmap_pte_fn(pte_t *pte, struct page *pmd_page,
454			unsigned long addr, void *data)
455{
456
457	set_pte_at(&init_mm, addr, pte, __pte(0));
458	return 0;
459}
460#endif
461
462static int
463gnttab_map(unsigned int start_idx, unsigned int end_idx)
464{
465	struct gnttab_setup_table setup;
466	unsigned long *frames;
467	unsigned int nr_gframes = end_idx + 1;
468	int i, rc;
469
470	frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
471	if (!frames)
472		return -ENOMEM;
473
474	setup.dom        = DOMID_SELF;
475	setup.nr_frames  = nr_gframes;
476	set_xen_guest_handle(setup.frame_list, frames);
477
478	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
479	if (rc == -ENOSYS) {
480		free(frames, M_DEVBUF);
481		return -ENOSYS;
482	}
483	PANIC_IF(rc || setup.status);
484
485	if (shared == NULL) {
486		vm_offset_t area;
487
488		area = kmem_alloc_nofault(kernel_map,
489		    PAGE_SIZE * max_nr_grant_frames());
490		PANIC_IF(area == 0);
491		shared = (grant_entry_t *)area;
492	}
493	for (i = 0; i < nr_gframes; i++)
494		PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE,
495		    ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
496
497	free(frames, M_DEVBUF);
498
499	return 0;
500}
501
502int
503gnttab_resume(void)
504{
505	if (max_nr_grant_frames() < nr_grant_frames)
506		return -ENOSYS;
507	return gnttab_map(0, nr_grant_frames - 1);
508}
509
510int
511gnttab_suspend(void)
512{
513	int i, pages;
514
515	pages = (PAGE_SIZE*nr_grant_frames) >> PAGE_SHIFT;
516
517	for (i = 0; i < pages; i++)
518		PT_SET_MA(shared + (i*PAGE_SIZE), (vm_paddr_t)0);
519
520	return (0);
521}
522
523static int
524gnttab_expand(unsigned int req_entries)
525{
526	int rc;
527	unsigned int cur, extra;
528
529	cur = nr_grant_frames;
530	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
531		 GREFS_PER_GRANT_FRAME);
532	if (cur + extra > max_nr_grant_frames())
533		return -ENOSPC;
534
535	if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
536		rc = grow_gnttab_list(extra);
537
538	return rc;
539}
540
541static int
542gnttab_init(void *unused)
543{
544	int i;
545	unsigned int max_nr_glist_frames;
546	unsigned int nr_init_grefs;
547
548	if (!is_running_on_xen())
549		return -ENODEV;
550
551	nr_grant_frames = 1;
552	boot_max_nr_grant_frames = __max_nr_grant_frames();
553
554	/* Determine the maximum number of frames required for the
555	 * grant reference free list on the current hypervisor.
556	 */
557	max_nr_glist_frames = (boot_max_nr_grant_frames *
558			       GREFS_PER_GRANT_FRAME /
559			       (PAGE_SIZE / sizeof(grant_ref_t)));
560
561	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
562	    M_DEVBUF, M_NOWAIT);
563
564	if (gnttab_list == NULL)
565		return -ENOMEM;
566
567	for (i = 0; i < nr_grant_frames; i++) {
568		gnttab_list[i] = (grant_ref_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
569		if (gnttab_list[i] == NULL)
570			goto ini_nomem;
571	}
572
573	if (gnttab_resume() < 0)
574		return -ENODEV;
575
576	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
577
578	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
579		gnttab_entry(i) = i + 1;
580
581	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
582	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
583	gnttab_free_head  = NR_RESERVED_ENTRIES;
584
585	printk("Grant table initialized\n");
586	return 0;
587
588ini_nomem:
589	for (i--; i >= 0; i--)
590		free(gnttab_list[i], M_DEVBUF);
591	free(gnttab_list, M_DEVBUF);
592	return -ENOMEM;
593
594}
595
596MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF);
597SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL);
598