bhyveload.c revision 267399
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 267399 2014-06-12 15:20:59Z jhb $
27 */
28
29/*-
30 * Copyright (c) 2011 Google, Inc.
31 * All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 267399 2014-06-12 15:20:59Z jhb $
55 */
56
57#include <sys/cdefs.h>
58__FBSDID("$FreeBSD: stable/10/usr.sbin/bhyveload/bhyveload.c 267399 2014-06-12 15:20:59Z jhb $");
59
60#include <sys/ioctl.h>
61#include <sys/stat.h>
62#include <sys/disk.h>
63#include <sys/queue.h>
64
65#include <machine/specialreg.h>
66#include <machine/vmm.h>
67
68#include <dirent.h>
69#include <dlfcn.h>
70#include <errno.h>
71#include <err.h>
72#include <fcntl.h>
73#include <getopt.h>
74#include <libgen.h>
75#include <limits.h>
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <sysexits.h>
80#include <termios.h>
81#include <unistd.h>
82
83#include <vmmapi.h>
84
85#include "userboot.h"
86
87#define	MB	(1024 * 1024UL)
88#define	GB	(1024 * 1024 * 1024UL)
89#define	BSP	0
90
91static char *host_base;
92static struct termios term, oldterm;
93static int disk_fd = -1;
94static int consin_fd, consout_fd;
95
96static char *vmname, *progname;
97static struct vmctx *ctx;
98
99static uint64_t gdtbase, cr3, rsp;
100
101static void cb_exit(void *arg, int v);
102
103/*
104 * Console i/o callbacks
105 */
106
107static void
108cb_putc(void *arg, int ch)
109{
110	char c = ch;
111
112	(void) write(consout_fd, &c, 1);
113}
114
115static int
116cb_getc(void *arg)
117{
118	char c;
119
120	if (read(consin_fd, &c, 1) == 1)
121		return (c);
122	return (-1);
123}
124
125static int
126cb_poll(void *arg)
127{
128	int n;
129
130	if (ioctl(consin_fd, FIONREAD, &n) >= 0)
131		return (n > 0);
132	return (0);
133}
134
135/*
136 * Host filesystem i/o callbacks
137 */
138
139struct cb_file {
140	int cf_isdir;
141	size_t cf_size;
142	struct stat cf_stat;
143	union {
144		int fd;
145		DIR *dir;
146	} cf_u;
147};
148
149static int
150cb_open(void *arg, const char *filename, void **hp)
151{
152	struct stat st;
153	struct cb_file *cf;
154	char path[PATH_MAX];
155
156	if (!host_base)
157		return (ENOENT);
158
159	strlcpy(path, host_base, PATH_MAX);
160	if (path[strlen(path) - 1] == '/')
161		path[strlen(path) - 1] = 0;
162	strlcat(path, filename, PATH_MAX);
163	cf = malloc(sizeof(struct cb_file));
164	if (stat(path, &cf->cf_stat) < 0) {
165		free(cf);
166		return (errno);
167	}
168
169	cf->cf_size = st.st_size;
170	if (S_ISDIR(cf->cf_stat.st_mode)) {
171		cf->cf_isdir = 1;
172		cf->cf_u.dir = opendir(path);
173		if (!cf->cf_u.dir)
174			goto out;
175		*hp = cf;
176		return (0);
177	}
178	if (S_ISREG(cf->cf_stat.st_mode)) {
179		cf->cf_isdir = 0;
180		cf->cf_u.fd = open(path, O_RDONLY);
181		if (cf->cf_u.fd < 0)
182			goto out;
183		*hp = cf;
184		return (0);
185	}
186
187out:
188	free(cf);
189	return (EINVAL);
190}
191
192static int
193cb_close(void *arg, void *h)
194{
195	struct cb_file *cf = h;
196
197	if (cf->cf_isdir)
198		closedir(cf->cf_u.dir);
199	else
200		close(cf->cf_u.fd);
201	free(cf);
202
203	return (0);
204}
205
206static int
207cb_isdir(void *arg, void *h)
208{
209	struct cb_file *cf = h;
210
211	return (cf->cf_isdir);
212}
213
214static int
215cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid)
216{
217	struct cb_file *cf = h;
218	ssize_t sz;
219
220	if (cf->cf_isdir)
221		return (EINVAL);
222	sz = read(cf->cf_u.fd, buf, size);
223	if (sz < 0)
224		return (EINVAL);
225	*resid = size - sz;
226	return (0);
227}
228
229static int
230cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return,
231	   size_t *namelen_return, char *name)
232{
233	struct cb_file *cf = h;
234	struct dirent *dp;
235
236	if (!cf->cf_isdir)
237		return (EINVAL);
238
239	dp = readdir(cf->cf_u.dir);
240	if (!dp)
241		return (ENOENT);
242
243	/*
244	 * Note: d_namlen is in the range 0..255 and therefore less
245	 * than PATH_MAX so we don't need to test before copying.
246	 */
247	*fileno_return = dp->d_fileno;
248	*type_return = dp->d_type;
249	*namelen_return = dp->d_namlen;
250	memcpy(name, dp->d_name, dp->d_namlen);
251	name[dp->d_namlen] = 0;
252
253	return (0);
254}
255
256static int
257cb_seek(void *arg, void *h, uint64_t offset, int whence)
258{
259	struct cb_file *cf = h;
260
261	if (cf->cf_isdir)
262		return (EINVAL);
263	if (lseek(cf->cf_u.fd, offset, whence) < 0)
264		return (errno);
265	return (0);
266}
267
268static int
269cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size)
270{
271	struct cb_file *cf = h;
272
273	*mode = cf->cf_stat.st_mode;
274	*uid = cf->cf_stat.st_uid;
275	*gid = cf->cf_stat.st_gid;
276	*size = cf->cf_stat.st_size;
277	return (0);
278}
279
280/*
281 * Disk image i/o callbacks
282 */
283
284static int
285cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size,
286	    size_t *resid)
287{
288	ssize_t n;
289
290	if (unit != 0 || disk_fd == -1)
291		return (EIO);
292	n = pread(disk_fd, to, size, from);
293	if (n < 0)
294		return (errno);
295	*resid = size - n;
296	return (0);
297}
298
299static int
300cb_diskioctl(void *arg, int unit, u_long cmd, void *data)
301{
302	struct stat sb;
303
304	if (unit != 0 || disk_fd == -1)
305		return (EBADF);
306
307	switch (cmd) {
308	case DIOCGSECTORSIZE:
309		*(u_int *)data = 512;
310		break;
311	case DIOCGMEDIASIZE:
312		if (fstat(disk_fd, &sb) == 0)
313			*(off_t *)data = sb.st_size;
314		else
315			return (ENOTTY);
316		break;
317	default:
318		return (ENOTTY);
319	}
320
321	return (0);
322}
323
324/*
325 * Guest virtual machine i/o callbacks
326 */
327static int
328cb_copyin(void *arg, const void *from, uint64_t to, size_t size)
329{
330	char *ptr;
331
332	to &= 0x7fffffff;
333
334	ptr = vm_map_gpa(ctx, to, size);
335	if (ptr == NULL)
336		return (EFAULT);
337
338	memcpy(ptr, from, size);
339	return (0);
340}
341
342static int
343cb_copyout(void *arg, uint64_t from, void *to, size_t size)
344{
345	char *ptr;
346
347	from &= 0x7fffffff;
348
349	ptr = vm_map_gpa(ctx, from, size);
350	if (ptr == NULL)
351		return (EFAULT);
352
353	memcpy(to, ptr, size);
354	return (0);
355}
356
357static void
358cb_setreg(void *arg, int r, uint64_t v)
359{
360	int error;
361	enum vm_reg_name vmreg;
362
363	vmreg = VM_REG_LAST;
364
365	switch (r) {
366	case 4:
367		vmreg = VM_REG_GUEST_RSP;
368		rsp = v;
369		break;
370	default:
371		break;
372	}
373
374	if (vmreg == VM_REG_LAST) {
375		printf("test_setreg(%d): not implemented\n", r);
376		cb_exit(NULL, USERBOOT_EXIT_QUIT);
377	}
378
379	error = vm_set_register(ctx, BSP, vmreg, v);
380	if (error) {
381		perror("vm_set_register");
382		cb_exit(NULL, USERBOOT_EXIT_QUIT);
383	}
384}
385
386static void
387cb_setmsr(void *arg, int r, uint64_t v)
388{
389	int error;
390	enum vm_reg_name vmreg;
391
392	vmreg = VM_REG_LAST;
393
394	switch (r) {
395	case MSR_EFER:
396		vmreg = VM_REG_GUEST_EFER;
397		break;
398	default:
399		break;
400	}
401
402	if (vmreg == VM_REG_LAST) {
403		printf("test_setmsr(%d): not implemented\n", r);
404		cb_exit(NULL, USERBOOT_EXIT_QUIT);
405	}
406
407	error = vm_set_register(ctx, BSP, vmreg, v);
408	if (error) {
409		perror("vm_set_msr");
410		cb_exit(NULL, USERBOOT_EXIT_QUIT);
411	}
412}
413
414static void
415cb_setcr(void *arg, int r, uint64_t v)
416{
417	int error;
418	enum vm_reg_name vmreg;
419
420	vmreg = VM_REG_LAST;
421
422	switch (r) {
423	case 0:
424		vmreg = VM_REG_GUEST_CR0;
425		break;
426	case 3:
427		vmreg = VM_REG_GUEST_CR3;
428		cr3 = v;
429		break;
430	case 4:
431		vmreg = VM_REG_GUEST_CR4;
432		break;
433	default:
434		break;
435	}
436
437	if (vmreg == VM_REG_LAST) {
438		printf("test_setcr(%d): not implemented\n", r);
439		cb_exit(NULL, USERBOOT_EXIT_QUIT);
440	}
441
442	error = vm_set_register(ctx, BSP, vmreg, v);
443	if (error) {
444		perror("vm_set_cr");
445		cb_exit(NULL, USERBOOT_EXIT_QUIT);
446	}
447}
448
449static void
450cb_setgdt(void *arg, uint64_t base, size_t size)
451{
452	int error;
453
454	error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0);
455	if (error != 0) {
456		perror("vm_set_desc(gdt)");
457		cb_exit(NULL, USERBOOT_EXIT_QUIT);
458	}
459
460	gdtbase = base;
461}
462
463static void
464cb_exec(void *arg, uint64_t rip)
465{
466	int error;
467
468	if (cr3 == 0)
469		error = vm_setup_freebsd_registers_i386(ctx, BSP, rip, gdtbase,
470		    rsp);
471	else
472		error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase,
473		    rsp);
474	if (error) {
475		perror("vm_setup_freebsd_registers");
476		cb_exit(NULL, USERBOOT_EXIT_QUIT);
477	}
478
479	cb_exit(NULL, 0);
480}
481
482/*
483 * Misc
484 */
485
486static void
487cb_delay(void *arg, int usec)
488{
489
490	usleep(usec);
491}
492
493static void
494cb_exit(void *arg, int v)
495{
496
497	tcsetattr(consout_fd, TCSAFLUSH, &oldterm);
498	exit(v);
499}
500
501static void
502cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
503{
504
505	vm_get_memory_seg(ctx, 0, ret_lowmem, NULL);
506	vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL);
507}
508
509struct env {
510	const char *str;	/* name=value */
511	SLIST_ENTRY(env) next;
512};
513
514static SLIST_HEAD(envhead, env) envhead;
515
516static void
517addenv(const char *str)
518{
519	struct env *env;
520
521	env = malloc(sizeof(struct env));
522	env->str = str;
523	SLIST_INSERT_HEAD(&envhead, env, next);
524}
525
526static const char *
527cb_getenv(void *arg, int num)
528{
529	int i;
530	struct env *env;
531
532	i = 0;
533	SLIST_FOREACH(env, &envhead, next) {
534		if (i == num)
535			return (env->str);
536		i++;
537	}
538
539	return (NULL);
540}
541
542static struct loader_callbacks cb = {
543	.getc = cb_getc,
544	.putc = cb_putc,
545	.poll = cb_poll,
546
547	.open = cb_open,
548	.close = cb_close,
549	.isdir = cb_isdir,
550	.read = cb_read,
551	.readdir = cb_readdir,
552	.seek = cb_seek,
553	.stat = cb_stat,
554
555	.diskread = cb_diskread,
556	.diskioctl = cb_diskioctl,
557
558	.copyin = cb_copyin,
559	.copyout = cb_copyout,
560	.setreg = cb_setreg,
561	.setmsr = cb_setmsr,
562	.setcr = cb_setcr,
563	.setgdt = cb_setgdt,
564	.exec = cb_exec,
565
566	.delay = cb_delay,
567	.exit = cb_exit,
568	.getmem = cb_getmem,
569
570	.getenv = cb_getenv,
571};
572
573static int
574altcons_open(char *path)
575{
576	struct stat sb;
577	int err;
578	int fd;
579
580	/*
581	 * Allow stdio to be passed in so that the same string
582	 * can be used for the bhyveload console and bhyve com-port
583	 * parameters
584	 */
585	if (!strcmp(path, "stdio"))
586		return (0);
587
588	err = stat(path, &sb);
589	if (err == 0) {
590		if (!S_ISCHR(sb.st_mode))
591			err = ENOTSUP;
592		else {
593			fd = open(path, O_RDWR | O_NONBLOCK);
594			if (fd < 0)
595				err = errno;
596			else
597				consin_fd = consout_fd = fd;
598		}
599	}
600
601	return (err);
602}
603
604static void
605usage(void)
606{
607
608	fprintf(stderr,
609	    "usage: %s [-m mem-size] [-d <disk-path>] [-h <host-path>]\n"
610	    "       %*s [-e <name=value>] [-c <console-device>] <vmname>\n",
611	    progname,
612	    (int)strlen(progname), "");
613	exit(1);
614}
615
616int
617main(int argc, char** argv)
618{
619	void *h;
620	void (*func)(struct loader_callbacks *, void *, int, int);
621	uint64_t mem_size;
622	int opt, error;
623	char *disk_image;
624
625	progname = basename(argv[0]);
626
627	mem_size = 256 * MB;
628	disk_image = NULL;
629
630	consin_fd = STDIN_FILENO;
631	consout_fd = STDOUT_FILENO;
632
633	while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
634		switch (opt) {
635		case 'c':
636			error = altcons_open(optarg);
637			if (error != 0)
638				errx(EX_USAGE, "Could not open '%s'", optarg);
639			break;
640		case 'd':
641			disk_image = optarg;
642			break;
643
644		case 'e':
645			addenv(optarg);
646			break;
647
648		case 'h':
649			host_base = optarg;
650			break;
651
652		case 'm':
653			error = vm_parse_memsize(optarg, &mem_size);
654			if (error != 0)
655				errx(EX_USAGE, "Invalid memsize '%s'", optarg);
656			break;
657		case '?':
658			usage();
659		}
660	}
661
662	argc -= optind;
663	argv += optind;
664
665	if (argc != 1)
666		usage();
667
668	vmname = argv[0];
669
670	error = vm_create(vmname);
671	if (error != 0 && errno != EEXIST) {
672		perror("vm_create");
673		exit(1);
674
675	}
676
677	ctx = vm_open(vmname);
678	if (ctx == NULL) {
679		perror("vm_open");
680		exit(1);
681	}
682
683	error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
684	if (error) {
685		perror("vm_setup_memory");
686		exit(1);
687	}
688
689	tcgetattr(consout_fd, &term);
690	oldterm = term;
691	cfmakeraw(&term);
692	term.c_cflag |= CLOCAL;
693
694	tcsetattr(consout_fd, TCSAFLUSH, &term);
695
696	h = dlopen("/boot/userboot.so", RTLD_LOCAL);
697	if (!h) {
698		printf("%s\n", dlerror());
699		return (1);
700	}
701	func = dlsym(h, "loader_main");
702	if (!func) {
703		printf("%s\n", dlerror());
704		return (1);
705	}
706
707	if (disk_image) {
708		disk_fd = open(disk_image, O_RDONLY);
709	}
710
711	addenv("smbios.bios.vendor=BHYVE");
712	addenv("boot_serial=1");
713
714	func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0);
715}
716