1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: releng/11.0/usr.sbin/bhyveload/bhyveload.c 302211 2016-06-26 14:44:01Z novel $
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD: releng/11.0/usr.sbin/bhyveload/bhyveload.c 302211 2016-06-26 14:44:01Z novel $
55 */
56
57#include <sys/cdefs.h>
58__FBSDID("$FreeBSD: releng/11.0/usr.sbin/bhyveload/bhyveload.c 302211 2016-06-26 14:44:01Z novel $");
59
60#include <sys/ioctl.h>
61#include <sys/stat.h>
62#include <sys/disk.h>
63#include <sys/queue.h>
64
65#include <machine/specialreg.h>
66#include <machine/vmm.h>
67
68#include <dirent.h>
69#include <dlfcn.h>
70#include <errno.h>
71#include <err.h>
72#include <fcntl.h>
73#include <getopt.h>
74#include <libgen.h>
75#include <limits.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <sysexits.h>
80#include <termios.h>
81#include <unistd.h>
82
83#include <vmmapi.h>
84
85#include "userboot.h"
86
87#define	MB	(1024 * 1024UL)
88#define	GB	(1024 * 1024 * 1024UL)
89#define	BSP	0
90
91#define	NDISKS	32
92
93static char *host_base;
94static struct termios term, oldterm;
95static int disk_fd[NDISKS];
96static int ndisks;
97static int consin_fd, consout_fd;
98
99static char *vmname, *progname;
100static struct vmctx *ctx;
101
102static uint64_t gdtbase, cr3, rsp;
103
104static void cb_exit(void *arg, int v);
105
106/*
107 * Console i/o callbacks
108 */
109
110static void
111cb_putc(void *arg, int ch)
112{
113	char c = ch;
114
115	(void) write(consout_fd, &c, 1);
116}
117
118static int
119cb_getc(void *arg)
120{
121	char c;
122
123	if (read(consin_fd, &c, 1) == 1)
124		return (c);
125	return (-1);
126}
127
128static int
129cb_poll(void *arg)
130{
131	int n;
132
133	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
134		return (n > 0);
135	return (0);
136}
137
138/*
139 * Host filesystem i/o callbacks
140 */
141
142struct cb_file {
143	int cf_isdir;
144	size_t cf_size;
145	struct stat cf_stat;
146	union {
147		int fd;
148		DIR *dir;
149	} cf_u;
150};
151
152static int
153cb_open(void *arg, const char *filename, void **hp)
154{
155	struct cb_file *cf;
156	char path[PATH_MAX];
157
158	if (!host_base)
159		return (ENOENT);
160
161	strlcpy(path, host_base, PATH_MAX);
162	if (path[strlen(path) - 1] == '/')
163		path[strlen(path) - 1] = 0;
164	strlcat(path, filename, PATH_MAX);
165	cf = malloc(sizeof(struct cb_file));
166	if (stat(path, &cf->cf_stat) < 0) {
167		free(cf);
168		return (errno);
169	}
170
171	cf->cf_size = cf->cf_stat.st_size;
172	if (S_ISDIR(cf->cf_stat.st_mode)) {
173		cf->cf_isdir = 1;
174		cf->cf_u.dir = opendir(path);
175		if (!cf->cf_u.dir)
176			goto out;
177		*hp = cf;
178		return (0);
179	}
180	if (S_ISREG(cf->cf_stat.st_mode)) {
181		cf->cf_isdir = 0;
182		cf->cf_u.fd = open(path, O_RDONLY);
183		if (cf->cf_u.fd < 0)
184			goto out;
185		*hp = cf;
186		return (0);
187	}
188
189out:
190	free(cf);
191	return (EINVAL);
192}
193
194static int
195cb_close(void *arg, void *h)
196{
197	struct cb_file *cf = h;
198
199	if (cf->cf_isdir)
200		closedir(cf->cf_u.dir);
201	else
202		close(cf->cf_u.fd);
203	free(cf);
204
205	return (0);
206}
207
208static int
209cb_isdir(void *arg, void *h)
210{
211	struct cb_file *cf = h;
212
213	return (cf->cf_isdir);
214}
215
216static int
217cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
218{
219	struct cb_file *cf = h;
220	ssize_t sz;
221
222	if (cf->cf_isdir)
223		return (EINVAL);
224	sz = read(cf->cf_u.fd, buf, size);
225	if (sz < 0)
226		return (EINVAL);
227	*resid = size - sz;
228	return (0);
229}
230
231static int
232cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
233	   size_t *namelen_return, char *name)
234{
235	struct cb_file *cf = h;
236	struct dirent *dp;
237
238	if (!cf->cf_isdir)
239		return (EINVAL);
240
241	dp = readdir(cf->cf_u.dir);
242	if (!dp)
243		return (ENOENT);
244
245	/*
246	 * Note: d_namlen is in the range 0..255 and therefore less
247	 * than PATH_MAX so we don't need to test before copying.
248	 */
249	*fileno_return = dp->d_fileno;
250	*type_return = dp->d_type;
251	*namelen_return = dp->d_namlen;
252	memcpy(name, dp->d_name, dp->d_namlen);
253	name[dp->d_namlen] = 0;
254
255	return (0);
256}
257
258static int
259cb_seek(void *arg, void *h, uint64_t offset, int whence)
260{
261	struct cb_file *cf = h;
262
263	if (cf->cf_isdir)
264		return (EINVAL);
265	if (lseek(cf->cf_u.fd, offset, whence) < 0)
266		return (errno);
267	return (0);
268}
269
270static int
271cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
272{
273	struct cb_file *cf = h;
274
275	*mode = cf->cf_stat.st_mode;
276	*uid = cf->cf_stat.st_uid;
277	*gid = cf->cf_stat.st_gid;
278	*size = cf->cf_stat.st_size;
279	return (0);
280}
281
282/*
283 * Disk image i/o callbacks
284 */
285
286static int
287cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
288	    size_t *resid)
289{
290	ssize_t n;
291
292	if (unit < 0 || unit >= ndisks )
293		return (EIO);
294	n = pread(disk_fd[unit], to, size, from);
295	if (n < 0)
296		return (errno);
297	*resid = size - n;
298	return (0);
299}
300
301static int
302cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
303{
304	struct stat sb;
305
306	if (unit < 0 || unit >= ndisks)
307		return (EBADF);
308
309	switch (cmd) {
310	case DIOCGSECTORSIZE:
311		*(u_int *)data = 512;
312		break;
313	case DIOCGMEDIASIZE:
314		if (fstat(disk_fd[unit], &sb) == 0)
315			*(off_t *)data = sb.st_size;
316		else
317			return (ENOTTY);
318		break;
319	default:
320		return (ENOTTY);
321	}
322
323	return (0);
324}
325
326/*
327 * Guest virtual machine i/o callbacks
328 */
329static int
330cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
331{
332	char *ptr;
333
334	to &= 0x7fffffff;
335
336	ptr = vm_map_gpa(ctx, to, size);
337	if (ptr == NULL)
338		return (EFAULT);
339
340	memcpy(ptr, from, size);
341	return (0);
342}
343
344static int
345cb_copyout(void *arg, uint64_t from, void *to, size_t size)
346{
347	char *ptr;
348
349	from &= 0x7fffffff;
350
351	ptr = vm_map_gpa(ctx, from, size);
352	if (ptr == NULL)
353		return (EFAULT);
354
355	memcpy(to, ptr, size);
356	return (0);
357}
358
359static void
360cb_setreg(void *arg, int r, uint64_t v)
361{
362	int error;
363	enum vm_reg_name vmreg;
364
365	vmreg = VM_REG_LAST;
366
367	switch (r) {
368	case 4:
369		vmreg = VM_REG_GUEST_RSP;
370		rsp = v;
371		break;
372	default:
373		break;
374	}
375
376	if (vmreg == VM_REG_LAST) {
377		printf("test_setreg(%d): not implemented\n", r);
378		cb_exit(NULL, USERBOOT_EXIT_QUIT);
379	}
380
381	error = vm_set_register(ctx, BSP, vmreg, v);
382	if (error) {
383		perror("vm_set_register");
384		cb_exit(NULL, USERBOOT_EXIT_QUIT);
385	}
386}
387
388static void
389cb_setmsr(void *arg, int r, uint64_t v)
390{
391	int error;
392	enum vm_reg_name vmreg;
393
394	vmreg = VM_REG_LAST;
395
396	switch (r) {
397	case MSR_EFER:
398		vmreg = VM_REG_GUEST_EFER;
399		break;
400	default:
401		break;
402	}
403
404	if (vmreg == VM_REG_LAST) {
405		printf("test_setmsr(%d): not implemented\n", r);
406		cb_exit(NULL, USERBOOT_EXIT_QUIT);
407	}
408
409	error = vm_set_register(ctx, BSP, vmreg, v);
410	if (error) {
411		perror("vm_set_msr");
412		cb_exit(NULL, USERBOOT_EXIT_QUIT);
413	}
414}
415
416static void
417cb_setcr(void *arg, int r, uint64_t v)
418{
419	int error;
420	enum vm_reg_name vmreg;
421
422	vmreg = VM_REG_LAST;
423
424	switch (r) {
425	case 0:
426		vmreg = VM_REG_GUEST_CR0;
427		break;
428	case 3:
429		vmreg = VM_REG_GUEST_CR3;
430		cr3 = v;
431		break;
432	case 4:
433		vmreg = VM_REG_GUEST_CR4;
434		break;
435	default:
436		break;
437	}
438
439	if (vmreg == VM_REG_LAST) {
440		printf("test_setcr(%d): not implemented\n", r);
441		cb_exit(NULL, USERBOOT_EXIT_QUIT);
442	}
443
444	error = vm_set_register(ctx, BSP, vmreg, v);
445	if (error) {
446		perror("vm_set_cr");
447		cb_exit(NULL, USERBOOT_EXIT_QUIT);
448	}
449}
450
451static void
452cb_setgdt(void *arg, uint64_t base, size_t size)
453{
454	int error;
455
456	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
457	if (error != 0) {
458		perror("vm_set_desc(gdt)");
459		cb_exit(NULL, USERBOOT_EXIT_QUIT);
460	}
461
462	gdtbase = base;
463}
464
465static void
466cb_exec(void *arg, uint64_t rip)
467{
468	int error;
469
470	if (cr3 == 0)
471		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
472		    rsp);
473	else
474		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
475		    rsp);
476	if (error) {
477		perror("vm_setup_freebsd_registers");
478		cb_exit(NULL, USERBOOT_EXIT_QUIT);
479	}
480
481	cb_exit(NULL, 0);
482}
483
484/*
485 * Misc
486 */
487
488static void
489cb_delay(void *arg, int usec)
490{
491
492	usleep(usec);
493}
494
495static void
496cb_exit(void *arg, int v)
497{
498
499	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
500	exit(v);
501}
502
503static void
504cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
505{
506
507	*ret_lowmem = vm_get_lowmem_size(ctx);
508	*ret_highmem = vm_get_highmem_size(ctx);
509}
510
511struct env {
512	const char *str;	/* name=value */
513	SLIST_ENTRY(env) next;
514};
515
516static SLIST_HEAD(envhead, env) envhead;
517
518static void
519addenv(const char *str)
520{
521	struct env *env;
522
523	env = malloc(sizeof(struct env));
524	env->str = str;
525	SLIST_INSERT_HEAD(&envhead, env, next);
526}
527
528static const char *
529cb_getenv(void *arg, int num)
530{
531	int i;
532	struct env *env;
533
534	i = 0;
535	SLIST_FOREACH(env, &envhead, next) {
536		if (i == num)
537			return (env->str);
538		i++;
539	}
540
541	return (NULL);
542}
543
544static int
545cb_vm_set_register(void *arg, int vcpu, int reg, uint64_t val)
546{
547
548	return (vm_set_register(ctx, vcpu, reg, val));
549}
550
551static int
552cb_vm_set_desc(void *arg, int vcpu, int reg, uint64_t base, u_int limit,
553    u_int access)
554{
555
556	return (vm_set_desc(ctx, vcpu, reg, base, limit, access));
557}
558
559static struct loader_callbacks cb = {
560	.getc = cb_getc,
561	.putc = cb_putc,
562	.poll = cb_poll,
563
564	.open = cb_open,
565	.close = cb_close,
566	.isdir = cb_isdir,
567	.read = cb_read,
568	.readdir = cb_readdir,
569	.seek = cb_seek,
570	.stat = cb_stat,
571
572	.diskread = cb_diskread,
573	.diskioctl = cb_diskioctl,
574
575	.copyin = cb_copyin,
576	.copyout = cb_copyout,
577	.setreg = cb_setreg,
578	.setmsr = cb_setmsr,
579	.setcr = cb_setcr,
580	.setgdt = cb_setgdt,
581	.exec = cb_exec,
582
583	.delay = cb_delay,
584	.exit = cb_exit,
585	.getmem = cb_getmem,
586
587	.getenv = cb_getenv,
588
589	/* Version 4 additions */
590	.vm_set_register = cb_vm_set_register,
591	.vm_set_desc = cb_vm_set_desc,
592};
593
594static int
595altcons_open(char *path)
596{
597	struct stat sb;
598	int err;
599	int fd;
600
601	/*
602	 * Allow stdio to be passed in so that the same string
603	 * can be used for the bhyveload console and bhyve com-port
604	 * parameters
605	 */
606	if (!strcmp(path, "stdio"))
607		return (0);
608
609	err = stat(path, &sb);
610	if (err == 0) {
611		if (!S_ISCHR(sb.st_mode))
612			err = ENOTSUP;
613		else {
614			fd = open(path, O_RDWR | O_NONBLOCK);
615			if (fd < 0)
616				err = errno;
617			else
618				consin_fd = consout_fd = fd;
619		}
620	}
621
622	return (err);
623}
624
625static int
626disk_open(char *path)
627{
628	int err, fd;
629
630	if (ndisks >= NDISKS)
631		return (ERANGE);
632
633	err = 0;
634	fd = open(path, O_RDONLY);
635
636	if (fd > 0) {
637		disk_fd[ndisks] = fd;
638		ndisks++;
639	} else
640		err = errno;
641
642	return (err);
643}
644
645static void
646usage(void)
647{
648
649	fprintf(stderr,
650	    "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
651	    "       %*s [-h <host-path>] [-m memsize[K|k|M|m|G|g|T|t]] <vmname>\n",
652	    progname,
653	    (int)strlen(progname), "");
654	exit(1);
655}
656
657int
658main(int argc, char** argv)
659{
660	char *loader;
661	void *h;
662	void (*func)(struct loader_callbacks *, void *, int, int);
663	uint64_t mem_size;
664	int opt, error, need_reinit, memflags;
665
666	progname = basename(argv[0]);
667
668	loader = NULL;
669
670	memflags = 0;
671	mem_size = 256 * MB;
672
673	consin_fd = STDIN_FILENO;
674	consout_fd = STDOUT_FILENO;
675
676	while ((opt = getopt(argc, argv, "CSc:d:e:h:l:m:")) != -1) {
677		switch (opt) {
678		case 'c':
679			error = altcons_open(optarg);
680			if (error != 0)
681				errx(EX_USAGE, "Could not open '%s'", optarg);
682			break;
683
684		case 'd':
685			error = disk_open(optarg);
686			if (error != 0)
687				errx(EX_USAGE, "Could not open '%s'", optarg);
688			break;
689
690		case 'e':
691			addenv(optarg);
692			break;
693
694		case 'h':
695			host_base = optarg;
696			break;
697
698		case 'l':
699			if (loader != NULL)
700				errx(EX_USAGE, "-l can only be given once");
701			loader = strdup(optarg);
702			if (loader == NULL)
703				err(EX_OSERR, "malloc");
704			break;
705
706		case 'm':
707			error = vm_parse_memsize(optarg, &mem_size);
708			if (error != 0)
709				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
710			break;
711		case 'C':
712			memflags |= VM_MEM_F_INCORE;
713			break;
714		case 'S':
715			memflags |= VM_MEM_F_WIRED;
716			break;
717		case '?':
718			usage();
719		}
720	}
721
722	argc -= optind;
723	argv += optind;
724
725	if (argc != 1)
726		usage();
727
728	vmname = argv[0];
729
730	need_reinit = 0;
731	error = vm_create(vmname);
732	if (error) {
733		if (errno != EEXIST) {
734			perror("vm_create");
735			exit(1);
736		}
737		need_reinit = 1;
738	}
739
740	ctx = vm_open(vmname);
741	if (ctx == NULL) {
742		perror("vm_open");
743		exit(1);
744	}
745
746	if (need_reinit) {
747		error = vm_reinit(ctx);
748		if (error) {
749			perror("vm_reinit");
750			exit(1);
751		}
752	}
753
754	vm_set_memflags(ctx, memflags);
755	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
756	if (error) {
757		perror("vm_setup_memory");
758		exit(1);
759	}
760
761	if (loader == NULL) {
762		loader = strdup("/boot/userboot.so");
763		if (loader == NULL)
764			err(EX_OSERR, "malloc");
765	}
766	h = dlopen(loader, RTLD_LOCAL);
767	if (!h) {
768		printf("%s\n", dlerror());
769		free(loader);
770		return (1);
771	}
772	func = dlsym(h, "loader_main");
773	if (!func) {
774		printf("%s\n", dlerror());
775		free(loader);
776		return (1);
777	}
778
779	tcgetattr(consout_fd, &term);
780	oldterm = term;
781	cfmakeraw(&term);
782	term.c_cflag |= CLOCAL;
783
784	tcsetattr(consout_fd, TCSAFLUSH, &term);
785
786	addenv("smbios.bios.vendor=BHYVE");
787	addenv("boot_serial=1");
788
789	func(&cb, NULL, USERBOOT_VERSION_4, ndisks);
790
791	free(loader);
792	return (0);
793}
794