1/*      $NetBSD: xengnt.c,v 1.22.2.1 2012/02/23 21:19:55 riz Exp $      */
2
3/*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.22.2.1 2012/02/23 21:19:55 riz Exp $");
30
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/malloc.h>
35#include <sys/queue.h>
36#include <sys/extent.h>
37#include <sys/kernel.h>
38#include <sys/mutex.h>
39#include <uvm/uvm.h>
40
41#include <xen/hypervisor.h>
42#include <xen/xen.h>
43#include <xen/granttables.h>
44
45/* #define XENDEBUG */
46#ifdef XENDEBUG
47#define DPRINTF(x) printf x
48#else
49#define DPRINTF(x)
50#endif
51
52#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t))
53
54/* External tools reserve first few grant table entries. */
55#define NR_RESERVED_ENTRIES 8
56
57/* Current number of frames making up the grant table */
58int gnt_nr_grant_frames;
59/* Maximum number of frames that can make up the grant table */
60int gnt_max_grant_frames;
61
62/* table of free grant entries */
63grant_ref_t *gnt_entries;
64/* last free entry */
65int last_gnt_entry;
66/* empty entry in the list */
67#define XENGNT_NO_ENTRY 0xffffffff
68
69/* VM address of the grant table */
70grant_entry_t *grant_table;
71kmutex_t grant_lock;
72
73static grant_ref_t xengnt_get_entry(void);
74static void xengnt_free_entry(grant_ref_t);
75static int xengnt_more_entries(void);
76
77void
78xengnt_init(void)
79{
80	struct gnttab_query_size query;
81	int rc;
82	int nr_grant_entries;
83	int i;
84
85	query.dom = DOMID_SELF;
86	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
87	if ((rc < 0) || (query.status != GNTST_okay))
88		gnt_max_grant_frames = 4; /* Legacy max number of frames */
89	else
90		gnt_max_grant_frames = query.max_nr_frames;
91	gnt_nr_grant_frames = 0;
92
93	nr_grant_entries =
94	    gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
95
96	grant_table = (void *)uvm_km_alloc(kernel_map,
97	    gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
98	if (grant_table == NULL)
99		panic("xengnt_init() no VM space");
100	gnt_entries = malloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
101	    M_DEVBUF, M_NOWAIT);
102	if (gnt_entries == NULL)
103		panic("xengnt_init() no space for bitmask");
104	for (i = 0; i <= nr_grant_entries; i++)
105		gnt_entries[i] = XENGNT_NO_ENTRY;
106
107	mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
108
109	xengnt_resume();
110
111}
112
113/*
114 * Resume grant table state
115 */
116bool
117xengnt_resume(void)
118{
119	int previous_nr_grant_frames = gnt_nr_grant_frames;
120
121	last_gnt_entry = 0;
122	gnt_nr_grant_frames = 0;
123
124	mutex_enter(&grant_lock);
125	while (gnt_nr_grant_frames < previous_nr_grant_frames) {
126		if (xengnt_more_entries() != 0)
127			panic("xengnt_resume: can't restore grant frames");
128	}
129	mutex_exit(&grant_lock);
130	return true;
131}
132
133/*
134 * Suspend grant table state
135 */
136bool
137xengnt_suspend(void) {
138
139	int i;
140
141	mutex_enter(&grant_lock);
142	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
143
144	for (i = 0; i < last_gnt_entry; i++) {
145		/* invalidate all grant entries (necessary for resume) */
146		gnt_entries[i] = XENGNT_NO_ENTRY;
147	}
148
149	/* Remove virtual => machine mapping */
150	pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE);
151	pmap_update(pmap_kernel());
152	mutex_exit(&grant_lock);
153	return true;
154}
155
156
157/*
158 * Add another page to the grant table
159 * Returns 0 on success, ENOMEM on failure
160 */
161static int
162xengnt_more_entries(void)
163{
164	gnttab_setup_table_t setup;
165	u_long *pages;
166	int nframes_new = gnt_nr_grant_frames + 1;
167	int i, start_gnt;
168	KASSERT(mutex_owned(&grant_lock));
169
170	if (gnt_nr_grant_frames == gnt_max_grant_frames)
171		return ENOMEM;
172
173	pages = malloc(nframes_new * sizeof(u_long), M_DEVBUF, M_NOWAIT);
174	if (pages == NULL)
175		return ENOMEM;
176
177	setup.dom = DOMID_SELF;
178	setup.nr_frames = nframes_new;
179	xenguest_handle(setup.frame_list) = pages;
180
181	/*
182	 * setup the grant table, made of nframes_new frames
183	 * and return the list of their virtual addresses
184	 * in 'pages'
185	 */
186	if (HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0)
187		panic("%s: setup table failed", __func__);
188	if (setup.status != GNTST_okay) {
189		aprint_error("%s: setup table returned %d\n",
190		    __func__, setup.status);
191		free(pages, M_DEVBUF);
192		return ENOMEM;
193	}
194
195	DPRINTF(("xengnt_more_entries: map 0x%lx -> %p\n",
196	    pages[gnt_nr_grant_frames],
197	    (char *)grant_table + gnt_nr_grant_frames * PAGE_SIZE));
198
199	/*
200	 * map between grant_table addresses and the machine addresses of
201	 * the grant table frames
202	 */
203	pmap_kenter_ma(((vaddr_t)grant_table) + gnt_nr_grant_frames * PAGE_SIZE,
204	    ((paddr_t)pages[gnt_nr_grant_frames]) << PAGE_SHIFT,
205	    VM_PROT_WRITE, 0);
206	pmap_update(pmap_kernel());
207
208	/*
209	 * add the grant entries associated to the last grant table frame
210	 * and mark them as free. Prevent using the first grants (from 0 to 8)
211	 * since they are used by the tools.
212	 */
213	start_gnt = (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE) <
214	            (NR_RESERVED_ENTRIES + 1) ?
215	            (NR_RESERVED_ENTRIES + 1) :
216	            (gnt_nr_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
217	for (i = start_gnt;
218	    i < nframes_new * NR_GRANT_ENTRIES_PER_PAGE;
219	    i++) {
220		KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
221		gnt_entries[last_gnt_entry] = i;
222		last_gnt_entry++;
223	}
224	gnt_nr_grant_frames = nframes_new;
225	free(pages, M_DEVBUF);
226	return 0;
227}
228
229/*
230 * Returns a reference to the first free entry in grant table
231 */
232static grant_ref_t
233xengnt_get_entry(void)
234{
235	grant_ref_t entry;
236	static struct timeval xengnt_nonmemtime;
237	static const struct timeval xengnt_nonmemintvl = {5,0};
238
239	if (last_gnt_entry == 0) {
240		if (xengnt_more_entries()) {
241			if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
242				printf("xengnt_get_entry: out of grant "
243				    "table entries\n");
244			return XENGNT_NO_ENTRY;
245		}
246	}
247	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
248	last_gnt_entry--;
249	entry = gnt_entries[last_gnt_entry];
250	gnt_entries[last_gnt_entry] = XENGNT_NO_ENTRY;
251	KASSERT(entry != XENGNT_NO_ENTRY && entry > NR_RESERVED_ENTRIES);
252	KASSERT(last_gnt_entry >= 0);
253	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
254	return entry;
255}
256
257/*
258 * Mark the grant table entry as free
259 */
260static void
261xengnt_free_entry(grant_ref_t entry)
262{
263	mutex_enter(&grant_lock);
264	KASSERT(entry > NR_RESERVED_ENTRIES);
265	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
266	KASSERT(last_gnt_entry >= 0);
267	KASSERT(last_gnt_entry <= gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE);
268	gnt_entries[last_gnt_entry] = entry;
269	last_gnt_entry++;
270	mutex_exit(&grant_lock);
271}
272
273int
274xengnt_grant_access(domid_t dom, paddr_t ma, int ro, grant_ref_t *entryp)
275{
276	mutex_enter(&grant_lock);
277
278	*entryp = xengnt_get_entry();
279	if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
280		mutex_exit(&grant_lock);
281		return ENOMEM;
282	}
283
284	grant_table[*entryp].frame = ma >> PAGE_SHIFT;
285	grant_table[*entryp].domid = dom;
286	/*
287	 * ensure that the above values reach global visibility
288	 * before permitting frame's access (done when we set flags)
289	 */
290	xen_rmb();
291	grant_table[*entryp].flags =
292	    GTF_permit_access | (ro ? GTF_readonly : 0);
293	mutex_exit(&grant_lock);
294	return 0;
295}
296
297void
298xengnt_revoke_access(grant_ref_t entry)
299{
300	uint16_t flags, nflags;
301
302	nflags = grant_table[entry].flags;
303
304	do {
305		if ((flags = nflags) & (GTF_reading|GTF_writing))
306			panic("xengnt_revoke_access: still in use");
307		nflags = xen_atomic_cmpxchg16(&grant_table[entry].flags,
308		    flags, 0);
309	} while (nflags != flags);
310	xengnt_free_entry(entry);
311}
312
313int
314xengnt_grant_transfer(domid_t dom, grant_ref_t *entryp)
315{
316	mutex_enter(&grant_lock);
317
318	*entryp = xengnt_get_entry();
319	if (__predict_false(*entryp == XENGNT_NO_ENTRY)) {
320		mutex_exit(&grant_lock);
321		return ENOMEM;
322	}
323
324	grant_table[*entryp].frame = 0;
325	grant_table[*entryp].domid = dom;
326	/*
327	 * ensure that the above values reach global visibility
328	 * before permitting frame's transfer (done when we set flags)
329	 */
330	xen_rmb();
331	grant_table[*entryp].flags = GTF_accept_transfer;
332	mutex_exit(&grant_lock);
333	return 0;
334}
335
336paddr_t
337xengnt_revoke_transfer(grant_ref_t entry)
338{
339	paddr_t page;
340	uint16_t flags;
341
342	/* if the transfer has not started, free the entry and return 0 */
343	while (!((flags = grant_table[entry].flags) & GTF_transfer_committed)) {
344		if (xen_atomic_cmpxchg16(&grant_table[entry].flags,
345		    flags, 0) == flags ) {
346			xengnt_free_entry(entry);
347			return 0;
348		}
349		HYPERVISOR_yield();
350	}
351
352	/* If transfer in progress, wait for completion */
353	while (!((flags = grant_table[entry].flags) & GTF_transfer_completed))
354		HYPERVISOR_yield();
355
356	/* Read the frame number /after/ reading completion status. */
357	__insn_barrier();
358	page = grant_table[entry].frame;
359	if (page == 0)
360		printf("xengnt_revoke_transfer: guest sent pa 0\n");
361
362	xengnt_free_entry(entry);
363	return page;
364}
365
366int
367xengnt_status(grant_ref_t entry)
368{
369	return (grant_table[entry].flags & (GTF_reading|GTF_writing));
370}
371