mem.c revision 330449
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2012 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: stable/11/usr.sbin/bhyve/mem.c 330449 2018-03-05 07:26:05Z eadler $
29 */
30
31/*
32 * Memory ranges are represented with an RB tree. On insertion, the range
33 * is checked for overlaps. On lookup, the key has the same base and limit
34 * so it can be searched within the range.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: stable/11/usr.sbin/bhyve/mem.c 330449 2018-03-05 07:26:05Z eadler $");
39
40#include <sys/types.h>
41#include <sys/tree.h>
42#include <sys/errno.h>
43#include <machine/vmm.h>
44#include <machine/vmm_instruction_emul.h>
45
46#include <stdio.h>
47#include <stdlib.h>
48#include <assert.h>
49#include <pthread.h>
50
51#include "mem.h"
52
53struct mmio_rb_range {
54	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
55	struct mem_range	mr_param;
56	uint64_t                mr_base;
57	uint64_t                mr_end;
58};
59
60struct mmio_rb_tree;
61RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
62
63RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
64
65/*
66 * Per-vCPU cache. Since most accesses from a vCPU will be to
67 * consecutive addresses in a range, it makes sense to cache the
68 * result of a lookup.
69 */
70static struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
71
72static pthread_rwlock_t mmio_rwlock;
73
74static int
75mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
76{
77	if (a->mr_end < b->mr_base)
78		return (-1);
79	else if (a->mr_base > b->mr_end)
80		return (1);
81	return (0);
82}
83
84static int
85mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
86    struct mmio_rb_range **entry)
87{
88	struct mmio_rb_range find, *res;
89
90	find.mr_base = find.mr_end = addr;
91
92	res = RB_FIND(mmio_rb_tree, rbt, &find);
93
94	if (res != NULL) {
95		*entry = res;
96		return (0);
97	}
98
99	return (ENOENT);
100}
101
102static int
103mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
104{
105	struct mmio_rb_range *overlap;
106
107	overlap = RB_INSERT(mmio_rb_tree, rbt, new);
108
109	if (overlap != NULL) {
110#ifdef RB_DEBUG
111		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
112		       new->mr_base, new->mr_end,
113		       overlap->mr_base, overlap->mr_end);
114#endif
115
116		return (EEXIST);
117	}
118
119	return (0);
120}
121
122#if 0
123static void
124mmio_rb_dump(struct mmio_rb_tree *rbt)
125{
126	struct mmio_rb_range *np;
127
128	pthread_rwlock_rdlock(&mmio_rwlock);
129	RB_FOREACH(np, mmio_rb_tree, rbt) {
130		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
131		       np->mr_param.name);
132	}
133	pthread_rwlock_unlock(&mmio_rwlock);
134}
135#endif
136
137RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
138
139static int
140mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
141{
142	int error;
143	struct mem_range *mr = arg;
144
145	error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
146			       rval, mr->arg1, mr->arg2);
147	return (error);
148}
149
150static int
151mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
152{
153	int error;
154	struct mem_range *mr = arg;
155
156	error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
157			       &wval, mr->arg1, mr->arg2);
158	return (error);
159}
160
161int
162emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
163    struct vm_guest_paging *paging)
164
165{
166	struct mmio_rb_range *entry;
167	int err, immutable;
168
169	pthread_rwlock_rdlock(&mmio_rwlock);
170	/*
171	 * First check the per-vCPU cache
172	 */
173	if (mmio_hint[vcpu] &&
174	    paddr >= mmio_hint[vcpu]->mr_base &&
175	    paddr <= mmio_hint[vcpu]->mr_end) {
176		entry = mmio_hint[vcpu];
177	} else
178		entry = NULL;
179
180	if (entry == NULL) {
181		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
182			/* Update the per-vCPU cache */
183			mmio_hint[vcpu] = entry;
184		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
185			pthread_rwlock_unlock(&mmio_rwlock);
186			return (ESRCH);
187		}
188	}
189
190	assert(entry != NULL);
191
192	/*
193	 * An 'immutable' memory range is guaranteed to be never removed
194	 * so there is no need to hold 'mmio_rwlock' while calling the
195	 * handler.
196	 *
197	 * XXX writes to the PCIR_COMMAND register can cause register_mem()
198	 * to be called. If the guest is using PCI extended config space
199	 * to modify the PCIR_COMMAND register then register_mem() can
200	 * deadlock on 'mmio_rwlock'. However by registering the extended
201	 * config space window as 'immutable' the deadlock can be avoided.
202	 */
203	immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
204	if (immutable)
205		pthread_rwlock_unlock(&mmio_rwlock);
206
207	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
208				      mem_read, mem_write, &entry->mr_param);
209
210	if (!immutable)
211		pthread_rwlock_unlock(&mmio_rwlock);
212
213	return (err);
214}
215
216static int
217register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
218{
219	struct mmio_rb_range *entry, *mrp;
220	int		err;
221
222	err = 0;
223
224	mrp = malloc(sizeof(struct mmio_rb_range));
225
226	if (mrp != NULL) {
227		mrp->mr_param = *memp;
228		mrp->mr_base = memp->base;
229		mrp->mr_end = memp->base + memp->size - 1;
230		pthread_rwlock_wrlock(&mmio_rwlock);
231		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
232			err = mmio_rb_add(rbt, mrp);
233		pthread_rwlock_unlock(&mmio_rwlock);
234		if (err)
235			free(mrp);
236	} else
237		err = ENOMEM;
238
239	return (err);
240}
241
242int
243register_mem(struct mem_range *memp)
244{
245
246	return (register_mem_int(&mmio_rb_root, memp));
247}
248
249int
250register_mem_fallback(struct mem_range *memp)
251{
252
253	return (register_mem_int(&mmio_rb_fallback, memp));
254}
255
256int
257unregister_mem(struct mem_range *memp)
258{
259	struct mem_range *mr;
260	struct mmio_rb_range *entry = NULL;
261	int err, i;
262
263	pthread_rwlock_wrlock(&mmio_rwlock);
264	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
265	if (err == 0) {
266		mr = &entry->mr_param;
267		assert(mr->name == memp->name);
268		assert(mr->base == memp->base && mr->size == memp->size);
269		assert((mr->flags & MEM_F_IMMUTABLE) == 0);
270		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
271
272		/* flush Per-vCPU cache */
273		for (i=0; i < VM_MAXCPU; i++) {
274			if (mmio_hint[i] == entry)
275				mmio_hint[i] = NULL;
276		}
277	}
278	pthread_rwlock_unlock(&mmio_rwlock);
279
280	if (entry)
281		free(entry);
282
283	return (err);
284}
285
286void
287init_mem(void)
288{
289
290	RB_INIT(&mmio_rb_root);
291	RB_INIT(&mmio_rb_fallback);
292	pthread_rwlock_init(&mmio_rwlock, NULL);
293}
294