1241744Sgrehan/*-
2241744Sgrehan * Copyright (c) 2012 NetApp, Inc.
3241744Sgrehan * All rights reserved.
4241744Sgrehan *
5241744Sgrehan * Redistribution and use in source and binary forms, with or without
6241744Sgrehan * modification, are permitted provided that the following conditions
7241744Sgrehan * are met:
8241744Sgrehan * 1. Redistributions of source code must retain the above copyright
9241744Sgrehan *    notice, this list of conditions and the following disclaimer.
10241744Sgrehan * 2. Redistributions in binary form must reproduce the above copyright
11241744Sgrehan *    notice, this list of conditions and the following disclaimer in the
12241744Sgrehan *    documentation and/or other materials provided with the distribution.
13241744Sgrehan *
14241744Sgrehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15241744Sgrehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16241744Sgrehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17241744Sgrehan * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18241744Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19241744Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20241744Sgrehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21241744Sgrehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22241744Sgrehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23241744Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24241744Sgrehan * SUCH DAMAGE.
25241744Sgrehan *
26241744Sgrehan * $FreeBSD: releng/11.0/usr.sbin/bhyve/mem.c 269700 2014-08-08 03:49:01Z neel $
27241744Sgrehan */
28241744Sgrehan
29241744Sgrehan/*
30241744Sgrehan * Memory ranges are represented with an RB tree. On insertion, the range
31241744Sgrehan * is checked for overlaps. On lookup, the key has the same base and limit
32241744Sgrehan * so it can be searched within the range.
33241744Sgrehan */
34241744Sgrehan
35241744Sgrehan#include <sys/cdefs.h>
36241744Sgrehan__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyve/mem.c 269700 2014-08-08 03:49:01Z neel $");
37241744Sgrehan
38241744Sgrehan#include <sys/types.h>
39241744Sgrehan#include <sys/tree.h>
40241744Sgrehan#include <sys/errno.h>
41241744Sgrehan#include <machine/vmm.h>
42266627Sneel#include <machine/vmm_instruction_emul.h>
43241744Sgrehan
44241744Sgrehan#include <stdio.h>
45241744Sgrehan#include <stdlib.h>
46241744Sgrehan#include <assert.h>
47249321Sneel#include <pthread.h>
48241744Sgrehan
49241744Sgrehan#include "mem.h"
50241744Sgrehan
51241744Sgrehanstruct mmio_rb_range {
52241744Sgrehan	RB_ENTRY(mmio_rb_range)	mr_link;	/* RB tree links */
53241744Sgrehan	struct mem_range	mr_param;
54241744Sgrehan	uint64_t                mr_base;
55241744Sgrehan	uint64_t                mr_end;
56241744Sgrehan};
57241744Sgrehan
58241744Sgrehanstruct mmio_rb_tree;
59241744SgrehanRB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
60241744Sgrehan
61247144SgrehanRB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
62241744Sgrehan
63241744Sgrehan/*
64241744Sgrehan * Per-vCPU cache. Since most accesses from a vCPU will be to
65241744Sgrehan * consecutive addresses in a range, it makes sense to cache the
66241744Sgrehan * result of a lookup.
67241744Sgrehan */
68241744Sgrehanstatic struct mmio_rb_range	*mmio_hint[VM_MAXCPU];
69241744Sgrehan
70249322Sneelstatic pthread_rwlock_t mmio_rwlock;
71249321Sneel
72241744Sgrehanstatic int
73241744Sgrehanmmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
74241744Sgrehan{
75241744Sgrehan	if (a->mr_end < b->mr_base)
76241744Sgrehan		return (-1);
77241744Sgrehan	else if (a->mr_base > b->mr_end)
78241744Sgrehan		return (1);
79241744Sgrehan	return (0);
80241744Sgrehan}
81241744Sgrehan
82241744Sgrehanstatic int
83247144Sgrehanmmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
84247144Sgrehan    struct mmio_rb_range **entry)
85241744Sgrehan{
86241744Sgrehan	struct mmio_rb_range find, *res;
87241744Sgrehan
88241744Sgrehan	find.mr_base = find.mr_end = addr;
89241744Sgrehan
90247144Sgrehan	res = RB_FIND(mmio_rb_tree, rbt, &find);
91241744Sgrehan
92241744Sgrehan	if (res != NULL) {
93241744Sgrehan		*entry = res;
94241744Sgrehan		return (0);
95241744Sgrehan	}
96241744Sgrehan
97241744Sgrehan	return (ENOENT);
98241744Sgrehan}
99241744Sgrehan
100241744Sgrehanstatic int
101247144Sgrehanmmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
102241744Sgrehan{
103241744Sgrehan	struct mmio_rb_range *overlap;
104241744Sgrehan
105247144Sgrehan	overlap = RB_INSERT(mmio_rb_tree, rbt, new);
106241744Sgrehan
107241744Sgrehan	if (overlap != NULL) {
108241744Sgrehan#ifdef RB_DEBUG
109241744Sgrehan		printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
110241744Sgrehan		       new->mr_base, new->mr_end,
111241744Sgrehan		       overlap->mr_base, overlap->mr_end);
112241744Sgrehan#endif
113241744Sgrehan
114241744Sgrehan		return (EEXIST);
115241744Sgrehan	}
116241744Sgrehan
117241744Sgrehan	return (0);
118241744Sgrehan}
119241744Sgrehan
120241744Sgrehan#if 0
121241744Sgrehanstatic void
122247144Sgrehanmmio_rb_dump(struct mmio_rb_tree *rbt)
123241744Sgrehan{
124241744Sgrehan	struct mmio_rb_range *np;
125241744Sgrehan
126249322Sneel	pthread_rwlock_rdlock(&mmio_rwlock);
127247144Sgrehan	RB_FOREACH(np, mmio_rb_tree, rbt) {
128241744Sgrehan		printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
129241744Sgrehan		       np->mr_param.name);
130241744Sgrehan	}
131249322Sneel	pthread_rwlock_unlock(&mmio_rwlock);
132241744Sgrehan}
133241744Sgrehan#endif
134241744Sgrehan
135241744SgrehanRB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
136241744Sgrehan
137243640Sneelstatic int
138243640Sneelmem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg)
139243640Sneel{
140243640Sneel	int error;
141243640Sneel	struct mem_range *mr = arg;
142243640Sneel
143243640Sneel	error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size,
144243640Sneel			       rval, mr->arg1, mr->arg2);
145243640Sneel	return (error);
146243640Sneel}
147243640Sneel
148243640Sneelstatic int
149243640Sneelmem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg)
150243640Sneel{
151243640Sneel	int error;
152243640Sneel	struct mem_range *mr = arg;
153243640Sneel
154243640Sneel	error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size,
155243640Sneel			       &wval, mr->arg1, mr->arg2);
156243640Sneel	return (error);
157243640Sneel}
158243640Sneel
159241744Sgrehanint
160269008Sneelemulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie,
161269008Sneel    struct vm_guest_paging *paging)
162269008Sneel
163241744Sgrehan{
164241744Sgrehan	struct mmio_rb_range *entry;
165269700Sneel	int err, immutable;
166249321Sneel
167249322Sneel	pthread_rwlock_rdlock(&mmio_rwlock);
168241744Sgrehan	/*
169241744Sgrehan	 * First check the per-vCPU cache
170241744Sgrehan	 */
171241744Sgrehan	if (mmio_hint[vcpu] &&
172241744Sgrehan	    paddr >= mmio_hint[vcpu]->mr_base &&
173241744Sgrehan	    paddr <= mmio_hint[vcpu]->mr_end) {
174243640Sneel		entry = mmio_hint[vcpu];
175243640Sneel	} else
176243640Sneel		entry = NULL;
177243640Sneel
178243640Sneel	if (entry == NULL) {
179249321Sneel		if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
180247144Sgrehan			/* Update the per-vCPU cache */
181247144Sgrehan			mmio_hint[vcpu] = entry;
182247144Sgrehan		} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
183249322Sneel			pthread_rwlock_unlock(&mmio_rwlock);
184243640Sneel			return (ESRCH);
185247144Sgrehan		}
186241744Sgrehan	}
187241744Sgrehan
188247144Sgrehan	assert(entry != NULL);
189269700Sneel
190269700Sneel	/*
191269700Sneel	 * An 'immutable' memory range is guaranteed to be never removed
192269700Sneel	 * so there is no need to hold 'mmio_rwlock' while calling the
193269700Sneel	 * handler.
194269700Sneel	 *
195269700Sneel	 * XXX writes to the PCIR_COMMAND register can cause register_mem()
196269700Sneel	 * to be called. If the guest is using PCI extended config space
197269700Sneel	 * to modify the PCIR_COMMAND register then register_mem() can
198269700Sneel	 * deadlock on 'mmio_rwlock'. However by registering the extended
199269700Sneel	 * config space window as 'immutable' the deadlock can be avoided.
200269700Sneel	 */
201269700Sneel	immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
202269700Sneel	if (immutable)
203269700Sneel		pthread_rwlock_unlock(&mmio_rwlock);
204269700Sneel
205269008Sneel	err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging,
206243640Sneel				      mem_read, mem_write, &entry->mr_param);
207269008Sneel
208269700Sneel	if (!immutable)
209269700Sneel		pthread_rwlock_unlock(&mmio_rwlock);
210269700Sneel
211241744Sgrehan	return (err);
212241744Sgrehan}
213241744Sgrehan
214247144Sgrehanstatic int
215247144Sgrehanregister_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
216241744Sgrehan{
217249321Sneel	struct mmio_rb_range *entry, *mrp;
218241744Sgrehan	int		err;
219241744Sgrehan
220241744Sgrehan	err = 0;
221241744Sgrehan
222241744Sgrehan	mrp = malloc(sizeof(struct mmio_rb_range));
223249321Sneel
224241744Sgrehan	if (mrp != NULL) {
225241744Sgrehan		mrp->mr_param = *memp;
226241744Sgrehan		mrp->mr_base = memp->base;
227241744Sgrehan		mrp->mr_end = memp->base + memp->size - 1;
228249322Sneel		pthread_rwlock_wrlock(&mmio_rwlock);
229249321Sneel		if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
230249321Sneel			err = mmio_rb_add(rbt, mrp);
231249322Sneel		pthread_rwlock_unlock(&mmio_rwlock);
232241744Sgrehan		if (err)
233241744Sgrehan			free(mrp);
234241744Sgrehan	} else
235241744Sgrehan		err = ENOMEM;
236241744Sgrehan
237241744Sgrehan	return (err);
238241744Sgrehan}
239241744Sgrehan
240247144Sgrehanint
241247144Sgrehanregister_mem(struct mem_range *memp)
242247144Sgrehan{
243247144Sgrehan
244247144Sgrehan	return (register_mem_int(&mmio_rb_root, memp));
245247144Sgrehan}
246247144Sgrehan
247247144Sgrehanint
248247144Sgrehanregister_mem_fallback(struct mem_range *memp)
249247144Sgrehan{
250247144Sgrehan
251247144Sgrehan	return (register_mem_int(&mmio_rb_fallback, memp));
252247144Sgrehan}
253247144Sgrehan
254249321Sneelint
255249321Sneelunregister_mem(struct mem_range *memp)
256249321Sneel{
257249321Sneel	struct mem_range *mr;
258249321Sneel	struct mmio_rb_range *entry = NULL;
259249321Sneel	int err, i;
260249321Sneel
261249322Sneel	pthread_rwlock_wrlock(&mmio_rwlock);
262249321Sneel	err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
263249321Sneel	if (err == 0) {
264249321Sneel		mr = &entry->mr_param;
265249321Sneel		assert(mr->name == memp->name);
266249321Sneel		assert(mr->base == memp->base && mr->size == memp->size);
267269700Sneel		assert((mr->flags & MEM_F_IMMUTABLE) == 0);
268249321Sneel		RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
269249321Sneel
270249321Sneel		/* flush Per-vCPU cache */
271249321Sneel		for (i=0; i < VM_MAXCPU; i++) {
272249321Sneel			if (mmio_hint[i] == entry)
273249321Sneel				mmio_hint[i] = NULL;
274249321Sneel		}
275249321Sneel	}
276249322Sneel	pthread_rwlock_unlock(&mmio_rwlock);
277249321Sneel
278249321Sneel	if (entry)
279249321Sneel		free(entry);
280249321Sneel
281249321Sneel	return (err);
282249321Sneel}
283249321Sneel
284241744Sgrehanvoid
285241744Sgrehaninit_mem(void)
286241744Sgrehan{
287241744Sgrehan
288247144Sgrehan	RB_INIT(&mmio_rb_root);
289247144Sgrehan	RB_INIT(&mmio_rb_fallback);
290249322Sneel	pthread_rwlock_init(&mmio_rwlock, NULL);
291241744Sgrehan}
292