1/******************************************************************************
2 * gnttab.c
3 *
4 * Two sets of functionality:
5 * 1. Granting foreign access to our memory reservation.
6 * 2. Accessing others' memory reservations via grant references.
7 * (i.e., mechanisms for both sender and recipient of grant references)
8 *
9 * Copyright (c) 2005, Christopher Clark
10 * Copyright (c) 2004, K A Fraser
11 */
12
13#include <sys/cdefs.h>
14__FBSDID("$FreeBSD$");
15
16#include "opt_global.h"
17#include "opt_pmap.h"
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/bus.h>
22#include <sys/conf.h>
23#include <sys/module.h>
24#include <sys/kernel.h>
25#include <sys/lock.h>
26#include <sys/malloc.h>
27#include <sys/mman.h>
28
29#include <xen/xen-os.h>
30#include <xen/hypervisor.h>
31#include <machine/xen/synch_bitops.h>
32
33#include <xen/hypervisor.h>
34#include <xen/gnttab.h>
35
36#include <vm/vm.h>
37#include <vm/vm_kern.h>
38#include <vm/vm_extern.h>
39#include <vm/pmap.h>
40
41#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
42
43/* External tools reserve first few grant table entries. */
44#define NR_RESERVED_ENTRIES 8
45#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
46
47static grant_ref_t **gnttab_list;
48static unsigned int nr_grant_frames;
49static unsigned int boot_max_nr_grant_frames;
50static int gnttab_free_count;
51static grant_ref_t gnttab_free_head;
52static struct mtx gnttab_list_lock;
53
54static grant_entry_t *shared;
55
56static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
57
58static int gnttab_expand(unsigned int req_entries);
59
60#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
61#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
62
63static int
64get_free_entries(int count, int *entries)
65{
66	int ref, error;
67	grant_ref_t head;
68
69	mtx_lock(&gnttab_list_lock);
70	if ((gnttab_free_count < count) &&
71	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
72		mtx_unlock(&gnttab_list_lock);
73		return (error);
74	}
75	ref = head = gnttab_free_head;
76	gnttab_free_count -= count;
77	while (count-- > 1)
78		head = gnttab_entry(head);
79	gnttab_free_head = gnttab_entry(head);
80	gnttab_entry(head) = GNTTAB_LIST_END;
81	mtx_unlock(&gnttab_list_lock);
82
83	*entries = ref;
84	return (0);
85}
86
87static void
88do_free_callbacks(void)
89{
90	struct gnttab_free_callback *callback, *next;
91
92	callback = gnttab_free_callback_list;
93	gnttab_free_callback_list = NULL;
94
95	while (callback != NULL) {
96		next = callback->next;
97		if (gnttab_free_count >= callback->count) {
98			callback->next = NULL;
99			callback->fn(callback->arg);
100		} else {
101			callback->next = gnttab_free_callback_list;
102			gnttab_free_callback_list = callback;
103		}
104		callback = next;
105	}
106}
107
108static inline void
109check_free_callbacks(void)
110{
111	if (__predict_false(gnttab_free_callback_list != NULL))
112		do_free_callbacks();
113}
114
115static void
116put_free_entry(grant_ref_t ref)
117{
118
119	mtx_lock(&gnttab_list_lock);
120	gnttab_entry(ref) = gnttab_free_head;
121	gnttab_free_head = ref;
122	gnttab_free_count++;
123	check_free_callbacks();
124	mtx_unlock(&gnttab_list_lock);
125}
126
127/*
128 * Public grant-issuing interface functions
129 */
130
131int
132gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
133	grant_ref_t *result)
134{
135	int error, ref;
136
137	error = get_free_entries(1, &ref);
138
139	if (__predict_false(error))
140		return (error);
141
142	shared[ref].frame = frame;
143	shared[ref].domid = domid;
144	wmb();
145	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
146
147	if (result)
148		*result = ref;
149
150	return (0);
151}
152
153void
154gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
155				unsigned long frame, int readonly)
156{
157
158	shared[ref].frame = frame;
159	shared[ref].domid = domid;
160	wmb();
161	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
162}
163
164int
165gnttab_query_foreign_access(grant_ref_t ref)
166{
167	uint16_t nflags;
168
169	nflags = shared[ref].flags;
170
171	return (nflags & (GTF_reading|GTF_writing));
172}
173
174int
175gnttab_end_foreign_access_ref(grant_ref_t ref)
176{
177	uint16_t flags, nflags;
178
179	nflags = shared[ref].flags;
180	do {
181		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
182			printf("%s: WARNING: g.e. still in use!\n", __func__);
183			return (0);
184		}
185	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
186	       flags);
187
188	return (1);
189}
190
191void
192gnttab_end_foreign_access(grant_ref_t ref, void *page)
193{
194	if (gnttab_end_foreign_access_ref(ref)) {
195		put_free_entry(ref);
196		if (page != NULL) {
197			free(page, M_DEVBUF);
198		}
199	}
200	else {
201		/* XXX This needs to be fixed so that the ref and page are
202		   placed on a list to be freed up later. */
203		printf("%s: WARNING: leaking g.e. and page still in use!\n",
204		       __func__);
205	}
206}
207
208void
209gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
210{
211	grant_ref_t *last_ref;
212	grant_ref_t  head;
213	grant_ref_t  tail;
214
215	head = GNTTAB_LIST_END;
216	tail = *refs;
217	last_ref = refs + count;
218	while (refs != last_ref) {
219
220		if (gnttab_end_foreign_access_ref(*refs)) {
221			gnttab_entry(*refs) = head;
222			head = *refs;
223		} else {
224			/*
225			 * XXX This needs to be fixed so that the ref
226			 * is placed on a list to be freed up later.
227			 */
228			printf("%s: WARNING: leaking g.e. still in use!\n",
229			       __func__);
230			count--;
231		}
232		refs++;
233	}
234
235	if (count != 0) {
236		mtx_lock(&gnttab_list_lock);
237		gnttab_free_count += count;
238		gnttab_entry(tail) = gnttab_free_head;
239		gnttab_free_head = head;
240		mtx_unlock(&gnttab_list_lock);
241	}
242}
243
244int
245gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
246    grant_ref_t *result)
247{
248	int error, ref;
249
250	error = get_free_entries(1, &ref);
251	if (__predict_false(error))
252		return (error);
253
254	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
255
256	*result = ref;
257	return (0);
258}
259
260void
261gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
262	unsigned long pfn)
263{
264	shared[ref].frame = pfn;
265	shared[ref].domid = domid;
266	wmb();
267	shared[ref].flags = GTF_accept_transfer;
268}
269
270unsigned long
271gnttab_end_foreign_transfer_ref(grant_ref_t ref)
272{
273	unsigned long frame;
274	uint16_t      flags;
275
276	/*
277         * If a transfer is not even yet started, try to reclaim the grant
278         * reference and return failure (== 0).
279         */
280	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
281		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
282			return (0);
283		cpu_relax();
284	}
285
286	/* If a transfer is in progress then wait until it is completed. */
287	while (!(flags & GTF_transfer_completed)) {
288		flags = shared[ref].flags;
289		cpu_relax();
290	}
291
292	/* Read the frame number /after/ reading completion status. */
293	rmb();
294	frame = shared[ref].frame;
295	KASSERT(frame != 0, ("grant table inconsistent"));
296
297	return (frame);
298}
299
300unsigned long
301gnttab_end_foreign_transfer(grant_ref_t ref)
302{
303	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
304
305	put_free_entry(ref);
306	return (frame);
307}
308
309void
310gnttab_free_grant_reference(grant_ref_t ref)
311{
312
313	put_free_entry(ref);
314}
315
316void
317gnttab_free_grant_references(grant_ref_t head)
318{
319	grant_ref_t ref;
320	int count = 1;
321
322	if (head == GNTTAB_LIST_END)
323		return;
324
325	ref = head;
326	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
327		ref = gnttab_entry(ref);
328		count++;
329	}
330	mtx_lock(&gnttab_list_lock);
331	gnttab_entry(ref) = gnttab_free_head;
332	gnttab_free_head = head;
333	gnttab_free_count += count;
334	check_free_callbacks();
335	mtx_unlock(&gnttab_list_lock);
336}
337
338int
339gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
340{
341	int ref, error;
342
343	error = get_free_entries(count, &ref);
344	if (__predict_false(error))
345		return (error);
346
347	*head = ref;
348	return (0);
349}
350
351int
352gnttab_empty_grant_references(const grant_ref_t *private_head)
353{
354
355	return (*private_head == GNTTAB_LIST_END);
356}
357
358int
359gnttab_claim_grant_reference(grant_ref_t *private_head)
360{
361	grant_ref_t g = *private_head;
362
363	if (__predict_false(g == GNTTAB_LIST_END))
364		return (g);
365	*private_head = gnttab_entry(g);
366	return (g);
367}
368
369void
370gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
371{
372
373	gnttab_entry(release) = *private_head;
374	*private_head = release;
375}
376
377void
378gnttab_request_free_callback(struct gnttab_free_callback *callback,
379    void (*fn)(void *), void *arg, uint16_t count)
380{
381
382	mtx_lock(&gnttab_list_lock);
383	if (callback->next)
384		goto out;
385	callback->fn = fn;
386	callback->arg = arg;
387	callback->count = count;
388	callback->next = gnttab_free_callback_list;
389	gnttab_free_callback_list = callback;
390	check_free_callbacks();
391 out:
392	mtx_unlock(&gnttab_list_lock);
393
394}
395
396void
397gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
398{
399	struct gnttab_free_callback **pcb;
400
401	mtx_lock(&gnttab_list_lock);
402	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
403		if (*pcb == callback) {
404			*pcb = callback->next;
405			break;
406		}
407	}
408	mtx_unlock(&gnttab_list_lock);
409}
410
411
412static int
413grow_gnttab_list(unsigned int more_frames)
414{
415	unsigned int new_nr_grant_frames, extra_entries, i;
416
417	new_nr_grant_frames = nr_grant_frames + more_frames;
418	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
419
420	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
421	{
422		gnttab_list[i] = (grant_ref_t *)
423			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
424
425		if (!gnttab_list[i])
426			goto grow_nomem;
427	}
428
429	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
430	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
431		gnttab_entry(i) = i + 1;
432
433	gnttab_entry(i) = gnttab_free_head;
434	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
435	gnttab_free_count += extra_entries;
436
437	nr_grant_frames = new_nr_grant_frames;
438
439	check_free_callbacks();
440
441	return (0);
442
443grow_nomem:
444	for ( ; i >= nr_grant_frames; i--)
445		free(gnttab_list[i], M_DEVBUF);
446	return (ENOMEM);
447}
448
449static unsigned int
450__max_nr_grant_frames(void)
451{
452	struct gnttab_query_size query;
453	int rc;
454
455	query.dom = DOMID_SELF;
456
457	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
458	if ((rc < 0) || (query.status != GNTST_okay))
459		return (4); /* Legacy max supported number of frames */
460
461	return (query.max_nr_frames);
462}
463
464static inline
465unsigned int max_nr_grant_frames(void)
466{
467	unsigned int xen_max = __max_nr_grant_frames();
468
469	if (xen_max > boot_max_nr_grant_frames)
470		return (boot_max_nr_grant_frames);
471	return (xen_max);
472}
473
474#ifdef notyet
475/*
476 * XXX needed for backend support
477 *
478 */
479static int
480map_pte_fn(pte_t *pte, struct page *pmd_page,
481		      unsigned long addr, void *data)
482{
483	unsigned long **frames = (unsigned long **)data;
484
485	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
486	(*frames)++;
487	return 0;
488}
489
490static int
491unmap_pte_fn(pte_t *pte, struct page *pmd_page,
492			unsigned long addr, void *data)
493{
494
495	set_pte_at(&init_mm, addr, pte, __pte(0));
496	return 0;
497}
498#endif
499
500#ifndef XENHVM
501
502static int
503gnttab_map(unsigned int start_idx, unsigned int end_idx)
504{
505	struct gnttab_setup_table setup;
506	u_long *frames;
507
508	unsigned int nr_gframes = end_idx + 1;
509	int i, rc;
510
511	frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
512	if (!frames)
513		return (ENOMEM);
514
515	setup.dom        = DOMID_SELF;
516	setup.nr_frames  = nr_gframes;
517	set_xen_guest_handle(setup.frame_list, frames);
518
519	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
520	if (rc == -ENOSYS) {
521		free(frames, M_DEVBUF);
522		return (ENOSYS);
523	}
524	KASSERT(!(rc || setup.status),
525	    ("unexpected result from grant_table_op"));
526
527	if (shared == NULL) {
528		vm_offset_t area;
529
530		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
531		KASSERT(area, ("can't allocate VM space for grant table"));
532		shared = (grant_entry_t *)area;
533	}
534
535	for (i = 0; i < nr_gframes; i++)
536		PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE,
537		    ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
538
539	free(frames, M_DEVBUF);
540
541	return (0);
542}
543
544int
545gnttab_resume(void)
546{
547
548	if (max_nr_grant_frames() < nr_grant_frames)
549		return (ENOSYS);
550	return (gnttab_map(0, nr_grant_frames - 1));
551}
552
553int
554gnttab_suspend(void)
555{
556	int i;
557
558	for (i = 0; i < nr_grant_frames; i++)
559		pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
560
561	return (0);
562}
563
564#else /* XENHVM */
565
566#include <dev/xen/xenpci/xenpcivar.h>
567
568static vm_paddr_t resume_frames;
569
570static int
571gnttab_map(unsigned int start_idx, unsigned int end_idx)
572{
573	struct xen_add_to_physmap xatp;
574	unsigned int i = end_idx;
575
576	/*
577	 * Loop backwards, so that the first hypercall has the largest index,
578	 * ensuring that the table will grow only once.
579	 */
580	do {
581		xatp.domid = DOMID_SELF;
582		xatp.idx = i;
583		xatp.space = XENMAPSPACE_grant_table;
584		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
585		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
586			panic("HYPERVISOR_memory_op failed to map gnttab");
587	} while (i-- > start_idx);
588
589	if (shared == NULL) {
590		vm_offset_t area;
591
592		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
593		KASSERT(area, ("can't allocate VM space for grant table"));
594		shared = (grant_entry_t *)area;
595	}
596
597	for (i = start_idx; i <= end_idx; i++) {
598		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
599		    resume_frames + i * PAGE_SIZE);
600	}
601
602	return (0);
603}
604
605int
606gnttab_resume(void)
607{
608	int error;
609	unsigned int max_nr_gframes, nr_gframes;
610
611	nr_gframes = nr_grant_frames;
612	max_nr_gframes = max_nr_grant_frames();
613	if (max_nr_gframes < nr_gframes)
614		return (ENOSYS);
615
616	if (!resume_frames) {
617		error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes,
618		    &resume_frames);
619		if (error) {
620			printf("error mapping gnttab share frames\n");
621			return (error);
622		}
623	}
624
625	return (gnttab_map(0, nr_gframes - 1));
626}
627
628#endif
629
630static int
631gnttab_expand(unsigned int req_entries)
632{
633	int error;
634	unsigned int cur, extra;
635
636	cur = nr_grant_frames;
637	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
638		 GREFS_PER_GRANT_FRAME);
639	if (cur + extra > max_nr_grant_frames())
640		return (ENOSPC);
641
642	error = gnttab_map(cur, cur + extra - 1);
643	if (!error)
644		error = grow_gnttab_list(extra);
645
646	return (error);
647}
648
649int
650gnttab_init()
651{
652	int i;
653	unsigned int max_nr_glist_frames;
654	unsigned int nr_init_grefs;
655
656	if (!is_running_on_xen())
657		return (ENODEV);
658
659	nr_grant_frames = 1;
660	boot_max_nr_grant_frames = __max_nr_grant_frames();
661
662	/* Determine the maximum number of frames required for the
663	 * grant reference free list on the current hypervisor.
664	 */
665	max_nr_glist_frames = (boot_max_nr_grant_frames *
666			       GREFS_PER_GRANT_FRAME /
667			       (PAGE_SIZE / sizeof(grant_ref_t)));
668
669	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
670	    M_DEVBUF, M_NOWAIT);
671
672	if (gnttab_list == NULL)
673		return (ENOMEM);
674
675	for (i = 0; i < nr_grant_frames; i++) {
676		gnttab_list[i] = (grant_ref_t *)
677			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
678		if (gnttab_list[i] == NULL)
679			goto ini_nomem;
680	}
681
682	if (gnttab_resume())
683		return (ENODEV);
684
685	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
686
687	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
688		gnttab_entry(i) = i + 1;
689
690	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
691	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
692	gnttab_free_head  = NR_RESERVED_ENTRIES;
693
694	if (bootverbose)
695		printf("Grant table initialized\n");
696
697	return (0);
698
699ini_nomem:
700	for (i--; i >= 0; i--)
701		free(gnttab_list[i], M_DEVBUF);
702	free(gnttab_list, M_DEVBUF);
703	return (ENOMEM);
704
705}
706
707MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF);
708