libnvmm.c revision 1.15
1/*	$NetBSD: libnvmm.c,v 1.15 2019/10/23 07:01:11 maxv Exp $	*/
2
3/*
4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38#include <fcntl.h>
39#include <errno.h>
40#include <sys/ioctl.h>
41#include <sys/mman.h>
42#include <sys/queue.h>
43#include <machine/vmparam.h>
44
45#include "nvmm.h"
46
47static struct nvmm_capability __capability;
48
49#ifdef __x86_64__
50#include "libnvmm_x86.c"
51#endif
52
53typedef struct __area {
54	LIST_ENTRY(__area) list;
55	gpaddr_t gpa;
56	uintptr_t hva;
57	size_t size;
58	nvmm_prot_t prot;
59} area_t;
60
61typedef LIST_HEAD(, __area) area_list_t;
62
63static int nvmm_fd = -1;
64
65/* -------------------------------------------------------------------------- */
66
67static bool
68__area_isvalid(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
69    size_t size)
70{
71	area_list_t *areas = mach->areas;
72	area_t *ent;
73
74	LIST_FOREACH(ent, areas, list) {
75		/* Collision on GPA */
76		if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
77			return false;
78		}
79		if (gpa + size > ent->gpa &&
80		    gpa + size <= ent->gpa + ent->size) {
81			return false;
82		}
83		if (gpa <= ent->gpa && gpa + size >= ent->gpa + ent->size) {
84			return false;
85		}
86	}
87
88	return true;
89}
90
91static int
92__area_add(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, size_t size,
93    int prot)
94{
95	area_list_t *areas = mach->areas;
96	nvmm_prot_t nprot;
97	area_t *area;
98
99	nprot = 0;
100	if (prot & PROT_READ)
101		nprot |= NVMM_PROT_READ;
102	if (prot & PROT_WRITE)
103		nprot |= NVMM_PROT_WRITE;
104	if (prot & PROT_EXEC)
105		nprot |= NVMM_PROT_EXEC;
106
107	if (!__area_isvalid(mach, hva, gpa, size)) {
108		errno = EINVAL;
109		return -1;
110	}
111
112	area = malloc(sizeof(*area));
113	if (area == NULL)
114		return -1;
115	area->gpa = gpa;
116	area->hva = hva;
117	area->size = size;
118	area->prot = nprot;
119
120	LIST_INSERT_HEAD(areas, area, list);
121
122	return 0;
123}
124
125static int
126__area_delete(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
127    size_t size)
128{
129	area_list_t *areas = mach->areas;
130	area_t *ent, *nxt;
131
132	LIST_FOREACH_SAFE(ent, areas, list, nxt) {
133		if (hva == ent->hva && gpa == ent->gpa && size == ent->size) {
134			LIST_REMOVE(ent, list);
135			free(ent);
136			return 0;
137		}
138	}
139
140	return -1;
141}
142
143static void
144__area_remove_all(struct nvmm_machine *mach)
145{
146	area_list_t *areas = mach->areas;
147	area_t *ent;
148
149	while ((ent = LIST_FIRST(areas)) != NULL) {
150		LIST_REMOVE(ent, list);
151		free(ent);
152	}
153
154	free(areas);
155}
156
157/* -------------------------------------------------------------------------- */
158
159static int
160nvmm_init(void)
161{
162	if (nvmm_fd != -1)
163		return 0;
164	nvmm_fd = open("/dev/nvmm", O_RDONLY | O_CLOEXEC);
165	if (nvmm_fd == -1)
166		return -1;
167	if (nvmm_capability(&__capability) == -1) {
168		close(nvmm_fd);
169		nvmm_fd = -1;
170		return -1;
171	}
172	if (__capability.version != NVMM_KERN_VERSION) {
173		close(nvmm_fd);
174		nvmm_fd = -1;
175		errno = EPROGMISMATCH;
176		return -1;
177	}
178
179	return 0;
180}
181
182int
183nvmm_capability(struct nvmm_capability *cap)
184{
185	struct nvmm_ioc_capability args;
186	int ret;
187
188	if (nvmm_init() == -1) {
189		return -1;
190	}
191
192	ret = ioctl(nvmm_fd, NVMM_IOC_CAPABILITY, &args);
193	if (ret == -1)
194		return -1;
195
196	memcpy(cap, &args.cap, sizeof(args.cap));
197
198	return 0;
199}
200
201int
202nvmm_machine_create(struct nvmm_machine *mach)
203{
204	struct nvmm_ioc_machine_create args;
205	struct nvmm_comm_page **pages;
206	area_list_t *areas;
207	int ret;
208
209	if (nvmm_init() == -1) {
210		return -1;
211	}
212
213	areas = calloc(1, sizeof(*areas));
214	if (areas == NULL)
215		return -1;
216
217	pages = calloc(__capability.max_vcpus, sizeof(*pages));
218	if (pages == NULL) {
219		free(areas);
220		return -1;
221	}
222
223	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
224	if (ret == -1) {
225		free(areas);
226		return -1;
227	}
228
229	LIST_INIT(areas);
230
231	memset(mach, 0, sizeof(*mach));
232	mach->machid = args.machid;
233	mach->pages = pages;
234	mach->areas = areas;
235
236	return 0;
237}
238
239int
240nvmm_machine_destroy(struct nvmm_machine *mach)
241{
242	struct nvmm_ioc_machine_destroy args;
243	int ret;
244
245	args.machid = mach->machid;
246
247	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_DESTROY, &args);
248	if (ret == -1)
249		return -1;
250
251	__area_remove_all(mach);
252	free(mach->pages);
253
254	return 0;
255}
256
257int
258nvmm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *conf)
259{
260	struct nvmm_ioc_machine_configure args;
261	int ret;
262
263	switch (op) {
264	case NVMM_MACH_CONF_CALLBACKS:
265		memcpy(&mach->cbs, conf, sizeof(mach->cbs));
266		return 0;
267	}
268
269	args.machid = mach->machid;
270	args.op = op;
271	args.conf = conf;
272
273	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CONFIGURE, &args);
274	if (ret == -1)
275		return -1;
276
277	return 0;
278}
279
280int
281nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
282    struct nvmm_vcpu *vcpu)
283{
284	struct nvmm_ioc_vcpu_create args;
285	struct nvmm_comm_page *comm;
286	int ret;
287
288	args.machid = mach->machid;
289	args.cpuid = cpuid;
290
291	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CREATE, &args);
292	if (ret == -1)
293		return -1;
294
295	comm = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
296	    nvmm_fd, NVMM_COMM_OFF(mach->machid, cpuid));
297	if (comm == MAP_FAILED)
298		return -1;
299
300	mach->pages[cpuid] = comm;
301
302	vcpu->cpuid = cpuid;
303	vcpu->state = &comm->state;
304	vcpu->event = &comm->event;
305	vcpu->exit = malloc(sizeof(*vcpu->exit));
306
307	return 0;
308}
309
310int
311nvmm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
312{
313	struct nvmm_ioc_vcpu_destroy args;
314	struct nvmm_comm_page *comm;
315	int ret;
316
317	args.machid = mach->machid;
318	args.cpuid = vcpu->cpuid;
319
320	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_DESTROY, &args);
321	if (ret == -1)
322		return -1;
323
324	comm = mach->pages[vcpu->cpuid];
325	munmap(comm, PAGE_SIZE);
326	free(vcpu->exit);
327
328	return 0;
329}
330
331int
332nvmm_vcpu_configure(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
333    uint64_t op, void *conf)
334{
335	struct nvmm_ioc_vcpu_configure args;
336	int ret;
337
338	args.machid = mach->machid;
339	args.cpuid = vcpu->cpuid;
340	args.op = op;
341	args.conf = conf;
342
343	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CONFIGURE, &args);
344	if (ret == -1)
345		return -1;
346
347	return 0;
348}
349
350int
351nvmm_vcpu_setstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
352    uint64_t flags)
353{
354	struct nvmm_comm_page *comm;
355
356	comm = mach->pages[vcpu->cpuid];
357	comm->state_commit |= flags;
358	comm->state_cached |= flags;
359
360	return 0;
361}
362
363int
364nvmm_vcpu_getstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
365    uint64_t flags)
366{
367	struct nvmm_ioc_vcpu_getstate args;
368	struct nvmm_comm_page *comm;
369	int ret;
370
371	comm = mach->pages[vcpu->cpuid];
372
373	if (__predict_true((flags & ~comm->state_cached) == 0)) {
374		return 0;
375	}
376	comm->state_wanted = flags & ~comm->state_cached;
377
378	args.machid = mach->machid;
379	args.cpuid = vcpu->cpuid;
380
381	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
382	if (ret == -1)
383		return -1;
384
385	return 0;
386}
387
388int
389nvmm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
390{
391	struct nvmm_comm_page *comm;
392
393	comm = mach->pages[vcpu->cpuid];
394	comm->event_commit = true;
395
396	return 0;
397}
398
399int
400nvmm_vcpu_run(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
401{
402	struct nvmm_ioc_vcpu_run args;
403	int ret;
404
405	args.machid = mach->machid;
406	args.cpuid = vcpu->cpuid;
407	memset(&args.exit, 0, sizeof(args.exit));
408
409	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_RUN, &args);
410	if (ret == -1)
411		return -1;
412
413	/* No comm support yet, just copy. */
414	memcpy(vcpu->exit, &args.exit, sizeof(args.exit));
415
416	return 0;
417}
418
419int
420nvmm_gpa_map(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
421    size_t size, int prot)
422{
423	struct nvmm_ioc_gpa_map args;
424	int ret;
425
426	ret = __area_add(mach, hva, gpa, size, prot);
427	if (ret == -1)
428		return -1;
429
430	args.machid = mach->machid;
431	args.hva = hva;
432	args.gpa = gpa;
433	args.size = size;
434	args.prot = prot;
435
436	ret = ioctl(nvmm_fd, NVMM_IOC_GPA_MAP, &args);
437	if (ret == -1) {
438		/* Can't recover. */
439		abort();
440	}
441
442	return 0;
443}
444
445int
446nvmm_gpa_unmap(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
447    size_t size)
448{
449	struct nvmm_ioc_gpa_unmap args;
450	int ret;
451
452	ret = __area_delete(mach, hva, gpa, size);
453	if (ret == -1)
454		return -1;
455
456	args.machid = mach->machid;
457	args.gpa = gpa;
458	args.size = size;
459
460	ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args);
461	if (ret == -1) {
462		/* Can't recover. */
463		abort();
464	}
465
466	return 0;
467}
468
469int
470nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size)
471{
472	struct nvmm_ioc_hva_map args;
473	int ret;
474
475	args.machid = mach->machid;
476	args.hva = hva;
477	args.size = size;
478
479	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args);
480	if (ret == -1)
481		return -1;
482
483	return 0;
484}
485
486int
487nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size)
488{
489	struct nvmm_ioc_hva_unmap args;
490	int ret;
491
492	args.machid = mach->machid;
493	args.hva = hva;
494	args.size = size;
495
496	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_UNMAP, &args);
497	if (ret == -1)
498		return -1;
499
500	return 0;
501}
502
503/*
504 * nvmm_gva_to_gpa(): architecture-specific.
505 */
506
507int
508nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva,
509    nvmm_prot_t *prot)
510{
511	area_list_t *areas = mach->areas;
512	area_t *ent;
513
514	LIST_FOREACH(ent, areas, list) {
515		if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
516			*hva = ent->hva + (gpa - ent->gpa);
517			*prot = ent->prot;
518			return 0;
519		}
520	}
521
522	errno = ENOENT;
523	return -1;
524}
525
526/*
527 * nvmm_assist_io(): architecture-specific.
528 */
529
530/*
531 * nvmm_assist_mem(): architecture-specific.
532 */
533
534int
535nvmm_ctl(int op, void *data, size_t size)
536{
537	struct nvmm_ioc_ctl args;
538	int ret;
539
540	if (nvmm_init() == -1) {
541		return -1;
542	}
543
544	args.op = op;
545	args.data = data;
546	args.size = size;
547
548	ret = ioctl(nvmm_fd, NVMM_IOC_CTL, &args);
549	if (ret == -1)
550		return -1;
551
552	return 0;
553}
554