1/*	$NetBSD: libnvmm.c,v 1.20 2021/04/06 08:40:17 reinoud Exp $	*/
2
3/*
4 * Copyright (c) 2018-2020 Maxime Villard, m00nbsd.net
5 * All rights reserved.
6 *
7 * This code is part of the NVMM hypervisor.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
25 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <unistd.h>
37#include <fcntl.h>
38#include <errno.h>
39#include <sys/ioctl.h>
40#include <sys/mman.h>
41#include <sys/queue.h>
42#include <machine/vmparam.h>
43
44#include "nvmm.h"
45
46static struct nvmm_capability __capability;
47
48#ifdef __x86_64__
49#include "libnvmm_x86.c"
50#endif
51
52typedef struct __area {
53	LIST_ENTRY(__area) list;
54	gpaddr_t gpa;
55	uintptr_t hva;
56	size_t size;
57	nvmm_prot_t prot;
58} area_t;
59
60typedef LIST_HEAD(, __area) area_list_t;
61
62static int nvmm_fd = -1;
63
64/* -------------------------------------------------------------------------- */
65
66static bool
67__area_isvalid(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
68    size_t size)
69{
70	area_list_t *areas = mach->areas;
71	area_t *ent;
72
73	LIST_FOREACH(ent, areas, list) {
74		/* Collision on GPA */
75		if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
76			return false;
77		}
78		if (gpa + size > ent->gpa &&
79		    gpa + size <= ent->gpa + ent->size) {
80			return false;
81		}
82		if (gpa <= ent->gpa && gpa + size >= ent->gpa + ent->size) {
83			return false;
84		}
85	}
86
87	return true;
88}
89
90static int
91__area_add(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa, size_t size,
92    int prot)
93{
94	area_list_t *areas = mach->areas;
95	nvmm_prot_t nprot;
96	area_t *area;
97
98	nprot = 0;
99	if (prot & PROT_READ)
100		nprot |= NVMM_PROT_READ;
101	if (prot & PROT_WRITE)
102		nprot |= NVMM_PROT_WRITE;
103	if (prot & PROT_EXEC)
104		nprot |= NVMM_PROT_EXEC;
105
106	if (!__area_isvalid(mach, hva, gpa, size)) {
107		errno = EINVAL;
108		return -1;
109	}
110
111	area = malloc(sizeof(*area));
112	if (area == NULL)
113		return -1;
114	area->gpa = gpa;
115	area->hva = hva;
116	area->size = size;
117	area->prot = nprot;
118
119	LIST_INSERT_HEAD(areas, area, list);
120
121	return 0;
122}
123
124static int
125__area_delete(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
126    size_t size)
127{
128	area_list_t *areas = mach->areas;
129	area_t *ent, *nxt;
130
131	LIST_FOREACH_SAFE(ent, areas, list, nxt) {
132		if (hva == ent->hva && gpa == ent->gpa && size == ent->size) {
133			LIST_REMOVE(ent, list);
134			free(ent);
135			return 0;
136		}
137	}
138
139	return -1;
140}
141
142static void
143__area_remove_all(struct nvmm_machine *mach)
144{
145	area_list_t *areas = mach->areas;
146	area_t *ent;
147
148	while ((ent = LIST_FIRST(areas)) != NULL) {
149		LIST_REMOVE(ent, list);
150		free(ent);
151	}
152
153	free(areas);
154}
155
156/* -------------------------------------------------------------------------- */
157
158int
159nvmm_init(void)
160{
161	if (nvmm_fd != -1)
162		return 0;
163	nvmm_fd = open("/dev/nvmm", O_RDONLY | O_CLOEXEC);
164	if (nvmm_fd == -1)
165		return -1;
166	if (nvmm_capability(&__capability) == -1) {
167		close(nvmm_fd);
168		nvmm_fd = -1;
169		return -1;
170	}
171	if (__capability.version != NVMM_KERN_VERSION) {
172		close(nvmm_fd);
173		nvmm_fd = -1;
174		errno = EPROGMISMATCH;
175		return -1;
176	}
177
178	return 0;
179}
180
181int
182nvmm_root_init(void)
183{
184	if (nvmm_fd != -1)
185		return 0;
186	nvmm_fd = open("/dev/nvmm", O_WRONLY | O_CLOEXEC);
187	if (nvmm_fd == -1)
188		return -1;
189	if (nvmm_capability(&__capability) == -1) {
190		close(nvmm_fd);
191		nvmm_fd = -1;
192		return -1;
193	}
194	if (__capability.version != NVMM_KERN_VERSION) {
195		close(nvmm_fd);
196		nvmm_fd = -1;
197		errno = EPROGMISMATCH;
198		return -1;
199	}
200
201	return 0;
202}
203
204int
205nvmm_capability(struct nvmm_capability *cap)
206{
207	struct nvmm_ioc_capability args;
208	int ret;
209
210	ret = ioctl(nvmm_fd, NVMM_IOC_CAPABILITY, &args);
211	if (ret == -1)
212		return -1;
213
214	memcpy(cap, &args.cap, sizeof(args.cap));
215
216	return 0;
217}
218
219int
220nvmm_machine_create(struct nvmm_machine *mach)
221{
222	struct nvmm_ioc_machine_create args;
223	struct nvmm_comm_page **pages;
224	area_list_t *areas;
225	int ret;
226
227	areas = calloc(1, sizeof(*areas));
228	if (areas == NULL)
229		return -1;
230
231	pages = calloc(__capability.max_vcpus, sizeof(*pages));
232	if (pages == NULL) {
233		free(areas);
234		return -1;
235	}
236
237	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CREATE, &args);
238	if (ret == -1) {
239		free(areas);
240		return -1;
241	}
242
243	LIST_INIT(areas);
244
245	memset(mach, 0, sizeof(*mach));
246	mach->machid = args.machid;
247	mach->pages = pages;
248	mach->areas = areas;
249
250	return 0;
251}
252
253int
254nvmm_machine_destroy(struct nvmm_machine *mach)
255{
256	struct nvmm_ioc_machine_destroy args;
257	int ret;
258
259	args.machid = mach->machid;
260
261	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_DESTROY, &args);
262	if (ret == -1)
263		return -1;
264
265	__area_remove_all(mach);
266	free(mach->pages);
267
268	return 0;
269}
270
271int
272nvmm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *conf)
273{
274	struct nvmm_ioc_machine_configure args;
275	int ret;
276
277	args.machid = mach->machid;
278	args.op = op;
279	args.conf = conf;
280
281	ret = ioctl(nvmm_fd, NVMM_IOC_MACHINE_CONFIGURE, &args);
282	if (ret == -1)
283		return -1;
284
285	return 0;
286}
287
288int
289nvmm_vcpu_create(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
290    struct nvmm_vcpu *vcpu)
291{
292	struct nvmm_ioc_vcpu_create args;
293	struct nvmm_comm_page *comm;
294	int ret;
295
296	args.machid = mach->machid;
297	args.cpuid = cpuid;
298
299	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CREATE, &args);
300	if (ret == -1)
301		return -1;
302
303	comm = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
304	    nvmm_fd, NVMM_COMM_OFF(mach->machid, cpuid));
305	if (comm == MAP_FAILED)
306		return -1;
307
308	mach->pages[cpuid] = comm;
309
310	vcpu->cpuid = cpuid;
311	vcpu->state = &comm->state;
312	vcpu->event = &comm->event;
313	vcpu->stop = &comm->stop;
314	vcpu->exit = malloc(sizeof(*vcpu->exit));
315
316	return 0;
317}
318
319int
320nvmm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
321{
322	struct nvmm_ioc_vcpu_destroy args;
323	struct nvmm_comm_page *comm;
324	int ret;
325
326	args.machid = mach->machid;
327	args.cpuid = vcpu->cpuid;
328
329	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_DESTROY, &args);
330	if (ret == -1)
331		return -1;
332
333	comm = mach->pages[vcpu->cpuid];
334	munmap(comm, PAGE_SIZE);
335	free(vcpu->exit);
336
337	return 0;
338}
339
340int
341nvmm_vcpu_configure(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
342    uint64_t op, void *conf)
343{
344	struct nvmm_ioc_vcpu_configure args;
345	int ret;
346
347	switch (op) {
348	case NVMM_VCPU_CONF_CALLBACKS:
349		memcpy(&vcpu->cbs, conf, sizeof(vcpu->cbs));
350		return 0;
351	}
352
353	args.machid = mach->machid;
354	args.cpuid = vcpu->cpuid;
355	args.op = op;
356	args.conf = conf;
357
358	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_CONFIGURE, &args);
359	if (ret == -1)
360		return -1;
361
362	return 0;
363}
364
365int
366nvmm_vcpu_setstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
367    uint64_t flags)
368{
369	struct nvmm_comm_page *comm;
370
371	comm = mach->pages[vcpu->cpuid];
372	comm->state_commit |= flags;
373	comm->state_cached |= flags;
374
375	return 0;
376}
377
378int
379nvmm_vcpu_getstate(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
380    uint64_t flags)
381{
382	struct nvmm_ioc_vcpu_getstate args;
383	struct nvmm_comm_page *comm;
384	int ret;
385
386	comm = mach->pages[vcpu->cpuid];
387
388	if (__predict_true((flags & ~comm->state_cached) == 0)) {
389		return 0;
390	}
391	comm->state_wanted = flags & ~comm->state_cached;
392
393	args.machid = mach->machid;
394	args.cpuid = vcpu->cpuid;
395
396	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_GETSTATE, &args);
397	if (ret == -1)
398		return -1;
399
400	return 0;
401}
402
403int
404nvmm_vcpu_inject(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
405{
406	struct nvmm_comm_page *comm;
407
408	comm = mach->pages[vcpu->cpuid];
409	comm->event_commit = true;
410
411	return 0;
412}
413
414int
415nvmm_vcpu_run(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
416{
417	struct nvmm_ioc_vcpu_run args;
418	int ret;
419
420	args.machid = mach->machid;
421	args.cpuid = vcpu->cpuid;
422	memset(&args.exit, 0, sizeof(args.exit));
423
424	ret = ioctl(nvmm_fd, NVMM_IOC_VCPU_RUN, &args);
425	if (ret == -1)
426		return -1;
427
428	/* No comm support yet, just copy. */
429	memcpy(vcpu->exit, &args.exit, sizeof(args.exit));
430
431	return 0;
432}
433
434int
435nvmm_gpa_map(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
436    size_t size, int prot)
437{
438	struct nvmm_ioc_gpa_map args;
439	int ret;
440
441	ret = __area_add(mach, hva, gpa, size, prot);
442	if (ret == -1)
443		return -1;
444
445	args.machid = mach->machid;
446	args.hva = hva;
447	args.gpa = gpa;
448	args.size = size;
449	args.prot = prot;
450
451	ret = ioctl(nvmm_fd, NVMM_IOC_GPA_MAP, &args);
452	if (ret == -1) {
453		/* Can't recover. */
454		abort();
455	}
456
457	return 0;
458}
459
460int
461nvmm_gpa_unmap(struct nvmm_machine *mach, uintptr_t hva, gpaddr_t gpa,
462    size_t size)
463{
464	struct nvmm_ioc_gpa_unmap args;
465	int ret;
466
467	ret = __area_delete(mach, hva, gpa, size);
468	if (ret == -1)
469		return -1;
470
471	args.machid = mach->machid;
472	args.gpa = gpa;
473	args.size = size;
474
475	ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args);
476	if (ret == -1) {
477		/* Can't recover. */
478		abort();
479	}
480
481	return 0;
482}
483
484int
485nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size)
486{
487	struct nvmm_ioc_hva_map args;
488	int ret;
489
490	args.machid = mach->machid;
491	args.hva = hva;
492	args.size = size;
493
494	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args);
495	if (ret == -1)
496		return -1;
497
498	return 0;
499}
500
501int
502nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size)
503{
504	struct nvmm_ioc_hva_unmap args;
505	int ret;
506
507	args.machid = mach->machid;
508	args.hva = hva;
509	args.size = size;
510
511	ret = ioctl(nvmm_fd, NVMM_IOC_HVA_UNMAP, &args);
512	if (ret == -1)
513		return -1;
514
515	return 0;
516}
517
518/*
519 * nvmm_gva_to_gpa(): architecture-specific.
520 */
521
522int
523nvmm_gpa_to_hva(struct nvmm_machine *mach, gpaddr_t gpa, uintptr_t *hva,
524    nvmm_prot_t *prot)
525{
526	area_list_t *areas = mach->areas;
527	area_t *ent;
528
529	LIST_FOREACH(ent, areas, list) {
530		if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) {
531			*hva = ent->hva + (gpa - ent->gpa);
532			*prot = ent->prot;
533			return 0;
534		}
535	}
536
537	errno = ENOENT;
538	return -1;
539}
540
541/*
542 * nvmm_assist_io(): architecture-specific.
543 */
544
545/*
546 * nvmm_assist_mem(): architecture-specific.
547 */
548
549int
550nvmm_ctl(int op, void *data, size_t size)
551{
552	struct nvmm_ioc_ctl args;
553	int ret;
554
555	args.op = op;
556	args.data = data;
557	args.size = size;
558
559	ret = ioctl(nvmm_fd, NVMM_IOC_CTL, &args);
560	if (ret == -1)
561		return -1;
562
563	return 0;
564}
565
566int
567nvmm_vcpu_stop(struct nvmm_vcpu *vcpu)
568{
569
570	*vcpu->stop = 1;
571
572	return 0;
573}
574