zfsboot.c revision 208388
1/*-
2 * Copyright (c) 1998 Robert Nordier
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms are freely
6 * permitted provided that the above copyright notice and this
7 * paragraph and the following disclaimer are duplicated in all
8 * such forms.
9 *
10 * This software is provided "AS IS" and without any express or
11 * implied warranties, including, without limitation, the implied
12 * warranties of merchantability and fitness for a particular
13 * purpose.
14 */
15
16#include <sys/cdefs.h>
17__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 208388 2010-05-21 16:58:52Z jhb $");
18
19#include <sys/param.h>
20#include <sys/errno.h>
21#include <sys/diskmbr.h>
22#ifdef GPT
23#include <sys/gpt.h>
24#endif
25#include <sys/reboot.h>
26#include <sys/queue.h>
27
28#include <machine/bootinfo.h>
29#include <machine/elf.h>
30#include <machine/pc/bios.h>
31
32#include <stdarg.h>
33#include <stddef.h>
34
35#include <a.out.h>
36
37#include <btxv86.h>
38
39#ifndef GPT
40#include "zfsboot.h"
41#endif
42#include "lib.h"
43
44#define IO_KEYBOARD	1
45#define IO_SERIAL	2
46
47#define SECOND		18	/* Circa that many ticks in a second. */
48
49#define RBX_ASKNAME	0x0	/* -a */
50#define RBX_SINGLE	0x1	/* -s */
51/* 0x2 is reserved for log2(RB_NOSYNC). */
52/* 0x3 is reserved for log2(RB_HALT). */
53/* 0x4 is reserved for log2(RB_INITNAME). */
54#define RBX_DFLTROOT	0x5	/* -r */
55#define RBX_KDB 	0x6	/* -d */
56/* 0x7 is reserved for log2(RB_RDONLY). */
57/* 0x8 is reserved for log2(RB_DUMP). */
58/* 0x9 is reserved for log2(RB_MINIROOT). */
59#define RBX_CONFIG	0xa	/* -c */
60#define RBX_VERBOSE	0xb	/* -v */
61#define RBX_SERIAL	0xc	/* -h */
62#define RBX_CDROM	0xd	/* -C */
63/* 0xe is reserved for log2(RB_POWEROFF). */
64#define RBX_GDB 	0xf	/* -g */
65#define RBX_MUTE	0x10	/* -m */
66/* 0x11 is reserved for log2(RB_SELFTEST). */
67/* 0x12 is reserved for boot programs. */
68/* 0x13 is reserved for boot programs. */
69#define RBX_PAUSE	0x14	/* -p */
70#define RBX_QUIET	0x15	/* -q */
71#define RBX_NOINTR	0x1c	/* -n */
72/* 0x1d is reserved for log2(RB_MULTIPLE) and is just misnamed here. */
73#define RBX_DUAL	0x1d	/* -D */
74/* 0x1f is reserved for log2(RB_BOOTINFO). */
75
76/* pass: -a, -s, -r, -d, -c, -v, -h, -C, -g, -m, -p, -D */
77#define RBX_MASK	(OPT_SET(RBX_ASKNAME) | OPT_SET(RBX_SINGLE) | \
78			OPT_SET(RBX_DFLTROOT) | OPT_SET(RBX_KDB ) | \
79			OPT_SET(RBX_CONFIG) | OPT_SET(RBX_VERBOSE) | \
80			OPT_SET(RBX_SERIAL) | OPT_SET(RBX_CDROM) | \
81			OPT_SET(RBX_GDB ) | OPT_SET(RBX_MUTE) | \
82			OPT_SET(RBX_PAUSE) | OPT_SET(RBX_DUAL))
83
84/* Hint to loader that we came from ZFS */
85#define	KARGS_FLAGS_ZFS		0x4
86
87#define PATH_CONFIG	"/boot.config"
88#define PATH_BOOT3	"/boot/zfsloader"
89#define PATH_KERNEL	"/boot/kernel/kernel"
90
91#define ARGS		0x900
92#define NOPT		14
93#define NDEV		3
94#define V86_CY(x)	((x) & 1)
95#define V86_ZR(x)	((x) & 0x40)
96
97#define BIOS_NUMDRIVES		0x475
98#define DRV_HARD	0x80
99#define DRV_MASK	0x7f
100
101#define TYPE_AD		0
102#define TYPE_DA		1
103#define TYPE_MAXHARD	TYPE_DA
104#define TYPE_FD		2
105
106#define OPT_SET(opt)	(1 << (opt))
107#define OPT_CHECK(opt)	((opts) & OPT_SET(opt))
108
109extern uint32_t _end;
110
111#ifdef GPT
112static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
113#endif
114static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
115static const unsigned char flags[NOPT] = {
116    RBX_DUAL,
117    RBX_SERIAL,
118    RBX_ASKNAME,
119    RBX_CDROM,
120    RBX_CONFIG,
121    RBX_KDB,
122    RBX_GDB,
123    RBX_MUTE,
124    RBX_NOINTR,
125    RBX_PAUSE,
126    RBX_QUIET,
127    RBX_DFLTROOT,
128    RBX_SINGLE,
129    RBX_VERBOSE
130};
131
132static const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
133static const unsigned char dev_maj[NDEV] = {30, 4, 2};
134
135struct dsk {
136    unsigned drive;
137    unsigned type;
138    unsigned unit;
139    unsigned slice;
140    unsigned part;
141    int init;
142    daddr_t start;
143};
144static char cmd[512];
145static char kname[1024];
146static uint32_t opts;
147static int comspeed = SIOSPD;
148static struct bootinfo bootinfo;
149static uint32_t bootdev;
150static uint8_t ioctrl = IO_KEYBOARD;
151
152vm_offset_t	high_heap_base;
153uint32_t	bios_basemem, bios_extmem, high_heap_size;
154
155static struct bios_smap smap;
156
157/*
158 * The minimum amount of memory to reserve in bios_extmem for the heap.
159 */
160#define	HEAP_MIN	(3 * 1024 * 1024)
161
162static char *heap_next;
163static char *heap_end;
164
165/* Buffers that must not span a 64k boundary. */
166#define READ_BUF_SIZE	8192
167struct dmadat {
168	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
169	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
170};
171static struct dmadat *dmadat;
172
173void exit(int);
174static void load(void);
175static int parse(void);
176static void printf(const char *,...);
177static void putchar(int);
178static void bios_getmem(void);
179static int drvread(struct dsk *, void *, daddr_t, unsigned);
180static int keyhit(unsigned);
181static int xputc(int);
182static int xgetc(int);
183static int getc(int);
184
185static void memcpy(void *, const void *, int);
186static void
187memcpy(void *dst, const void *src, int len)
188{
189    const char *s = src;
190    char *d = dst;
191
192    while (len--)
193        *d++ = *s++;
194}
195
196static void
197strcpy(char *dst, const char *src)
198{
199    while (*src)
200	*dst++ = *src++;
201    *dst++ = 0;
202}
203
204static void
205strcat(char *dst, const char *src)
206{
207    while (*dst)
208	dst++;
209    while (*src)
210	*dst++ = *src++;
211    *dst++ = 0;
212}
213
214static int
215strcmp(const char *s1, const char *s2)
216{
217    for (; *s1 == *s2 && *s1; s1++, s2++);
218    return (unsigned char)*s1 - (unsigned char)*s2;
219}
220
221static const char *
222strchr(const char *s, char ch)
223{
224    for (; *s; s++)
225	if (*s == ch)
226		return s;
227    return 0;
228}
229
230static int
231memcmp(const void *p1, const void *p2, size_t n)
232{
233    const char *s1 = (const char *) p1;
234    const char *s2 = (const char *) p2;
235    for (; n > 0 && *s1 == *s2; s1++, s2++, n--);
236    if (n)
237        return (unsigned char)*s1 - (unsigned char)*s2;
238    else
239	return 0;
240}
241
242static void
243memset(void *p, char val, size_t n)
244{
245    char *s = (char *) p;
246    while (n--)
247	*s++ = val;
248}
249
250static void *
251malloc(size_t n)
252{
253	char *p = heap_next;
254	if (p + n > heap_end) {
255		printf("malloc failure\n");
256		for (;;)
257		    ;
258		return 0;
259	}
260	heap_next += n;
261	return p;
262}
263
264static size_t
265strlen(const char *s)
266{
267	size_t len = 0;
268	while (*s++)
269		len++;
270	return len;
271}
272
273static char *
274strdup(const char *s)
275{
276	char *p = malloc(strlen(s) + 1);
277	strcpy(p, s);
278	return p;
279}
280
281#include "zfsimpl.c"
282
283/*
284 * Read from a dnode (which must be from a ZPL filesystem).
285 */
286static int
287zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
288{
289	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
290	size_t n;
291	int rc;
292
293	n = size;
294	if (*offp + n > zp->zp_size)
295		n = zp->zp_size - *offp;
296
297	rc = dnode_read(spa, dnode, *offp, start, n);
298	if (rc)
299		return (-1);
300	*offp += n;
301
302	return (n);
303}
304
305/*
306 * Current ZFS pool
307 */
308spa_t *spa;
309
310/*
311 * A wrapper for dskread that doesn't have to worry about whether the
312 * buffer pointer crosses a 64k boundary.
313 */
314static int
315vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
316{
317	char *p;
318	daddr_t lba;
319	unsigned int nb;
320	struct dsk *dsk = (struct dsk *) priv;
321
322	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
323		return -1;
324
325	p = buf;
326	lba = off / DEV_BSIZE;
327	while (bytes > 0) {
328		nb = bytes / DEV_BSIZE;
329		if (nb > READ_BUF_SIZE / DEV_BSIZE)
330			nb = READ_BUF_SIZE / DEV_BSIZE;
331		if (drvread(dsk, dmadat->rdbuf, lba, nb))
332			return -1;
333		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
334		p += nb * DEV_BSIZE;
335		lba += nb;
336		bytes -= nb * DEV_BSIZE;
337	}
338
339	return 0;
340}
341
342static int
343xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
344{
345    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
346	printf("Invalid %s\n", "format");
347	return -1;
348    }
349    return 0;
350}
351
352static void
353bios_getmem(void)
354{
355    uint64_t size;
356
357    /* Parse system memory map */
358    v86.ebx = 0;
359    do {
360	v86.ctl = V86_FLAGS;
361	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
362	v86.eax = 0xe820;
363	v86.ecx = sizeof(struct bios_smap);
364	v86.edx = SMAP_SIG;
365	v86.es = VTOPSEG(&smap);
366	v86.edi = VTOPOFF(&smap);
367	v86int();
368	if ((v86.efl & 1) || (v86.eax != SMAP_SIG))
369	    break;
370	/* look for a low-memory segment that's large enough */
371	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
372	    (smap.length >= (512 * 1024)))
373	    bios_basemem = smap.length;
374	/* look for the first segment in 'extended' memory */
375	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
376	    bios_extmem = smap.length;
377	}
378
379	/*
380	 * Look for the largest segment in 'extended' memory beyond
381	 * 1MB but below 4GB.
382	 */
383	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
384	    (smap.base < 0x100000000ull)) {
385	    size = smap.length;
386
387	    /*
388	     * If this segment crosses the 4GB boundary, truncate it.
389	     */
390	    if (smap.base + size > 0x100000000ull)
391		size = 0x100000000ull - smap.base;
392
393	    if (size > high_heap_size) {
394		high_heap_size = size;
395		high_heap_base = smap.base;
396	    }
397	}
398    } while (v86.ebx != 0);
399
400    /* Fall back to the old compatibility function for base memory */
401    if (bios_basemem == 0) {
402	v86.ctl = 0;
403	v86.addr = 0x12;		/* int 0x12 */
404	v86int();
405
406	bios_basemem = (v86.eax & 0xffff) * 1024;
407    }
408
409    /* Fall back through several compatibility functions for extended memory */
410    if (bios_extmem == 0) {
411	v86.ctl = V86_FLAGS;
412	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
413	v86.eax = 0xe801;
414	v86int();
415	if (!(v86.efl & 1)) {
416	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
417	}
418    }
419    if (bios_extmem == 0) {
420	v86.ctl = 0;
421	v86.addr = 0x15;		/* int 0x15 function 0x88*/
422	v86.eax = 0x8800;
423	v86int();
424	bios_extmem = (v86.eax & 0xffff) * 1024;
425    }
426
427    /*
428     * If we have extended memory and did not find a suitable heap
429     * region in the SMAP, use the last 3MB of 'extended' memory as a
430     * high heap candidate.
431     */
432    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
433	high_heap_size = HEAP_MIN;
434	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
435    }
436}
437
438static inline void
439getstr(void)
440{
441    char *s;
442    int c;
443
444    s = cmd;
445    for (;;) {
446	switch (c = xgetc(0)) {
447	case 0:
448	    break;
449	case '\177':
450	case '\b':
451	    if (s > cmd) {
452		s--;
453		printf("\b \b");
454	    }
455	    break;
456	case '\n':
457	case '\r':
458	    *s = 0;
459	    return;
460	default:
461	    if (s - cmd < sizeof(cmd) - 1)
462		*s++ = c;
463	    putchar(c);
464	}
465    }
466}
467
468static inline void
469putc(int c)
470{
471    v86.ctl = 0;
472    v86.addr = 0x10;
473    v86.eax = 0xe00 | (c & 0xff);
474    v86.ebx = 0x7;
475    v86int();
476}
477
478/*
479 * Try to detect a device supported by the legacy int13 BIOS
480 */
481static int
482int13probe(int drive)
483{
484    v86.ctl = V86_FLAGS;
485    v86.addr = 0x13;
486    v86.eax = 0x800;
487    v86.edx = drive;
488    v86int();
489
490    if (!(v86.efl & 0x1) &&				/* carry clear */
491	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
492	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
493		return(0);				/* skip device */
494	}
495	return (1);
496    }
497    return(0);
498}
499
500/*
501 * We call this when we find a ZFS vdev - ZFS consumes the dsk
502 * structure so we must make a new one.
503 */
504static struct dsk *
505copy_dsk(struct dsk *dsk)
506{
507    struct dsk *newdsk;
508
509    newdsk = malloc(sizeof(struct dsk));
510    *newdsk = *dsk;
511    return (newdsk);
512}
513
514static void
515probe_drive(struct dsk *dsk, spa_t **spap)
516{
517#ifdef GPT
518    struct gpt_hdr hdr;
519    struct gpt_ent *ent;
520    daddr_t slba, elba;
521    unsigned part, entries_per_sec;
522#endif
523    struct dos_partition *dp;
524    char *sec;
525    unsigned i;
526
527    /*
528     * If we find a vdev on the whole disk, stop here. Otherwise dig
529     * out the MBR and probe each slice in turn for a vdev.
530     */
531    if (vdev_probe(vdev_read, dsk, spap) == 0)
532	return;
533
534    sec = dmadat->secbuf;
535    dsk->start = 0;
536
537#ifdef GPT
538    /*
539     * First check for GPT.
540     */
541    if (drvread(dsk, sec, 1, 1)) {
542	return;
543    }
544    memcpy(&hdr, sec, sizeof(hdr));
545    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
546	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
547	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
548	goto trymbr;
549    }
550
551    /*
552     * Probe all GPT partitions for the presense of ZFS pools. We
553     * return the spa_t for the first we find (if requested). This
554     * will have the effect of booting from the first pool on the
555     * disk.
556     */
557    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
558    slba = hdr.hdr_lba_table;
559    elba = slba + hdr.hdr_entries / entries_per_sec;
560    while (slba < elba) {
561	dsk->start = 0;
562	if (drvread(dsk, sec, slba, 1))
563	    return;
564	for (part = 0; part < entries_per_sec; part++) {
565	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
566	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
567		     sizeof(uuid_t)) == 0) {
568		dsk->start = ent->ent_lba_start;
569		if (vdev_probe(vdev_read, dsk, spap) == 0) {
570		    /*
571		     * We record the first pool we find (we will try
572		     * to boot from that one).
573		     */
574		    spap = 0;
575
576		    /*
577		     * This slice had a vdev. We need a new dsk
578		     * structure now since the vdev now owns this one.
579		     */
580		    dsk = copy_dsk(dsk);
581		}
582	    }
583	}
584	slba++;
585    }
586    return;
587trymbr:
588#endif
589
590    if (drvread(dsk, sec, DOSBBSECTOR, 1))
591	return;
592    dp = (void *)(sec + DOSPARTOFF);
593
594    for (i = 0; i < NDOSPART; i++) {
595	if (!dp[i].dp_typ)
596	    continue;
597	dsk->start = dp[i].dp_start;
598	if (vdev_probe(vdev_read, dsk, spap) == 0) {
599	    /*
600	     * We record the first pool we find (we will try to boot
601	     * from that one.
602	     */
603	    spap = 0;
604
605	    /*
606	     * This slice had a vdev. We need a new dsk structure now
607	     * since the vdev now owns this one.
608	     */
609	    dsk = copy_dsk(dsk);
610	}
611    }
612}
613
614int
615main(void)
616{
617    int autoboot, i;
618    dnode_phys_t dn;
619    off_t off;
620    struct dsk *dsk;
621
622    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
623
624    bios_getmem();
625
626    if (high_heap_size > 0) {
627	heap_end = PTOV(high_heap_base + high_heap_size);
628	heap_next = PTOV(high_heap_base);
629    } else {
630	heap_next = (char *) dmadat + sizeof(*dmadat);
631	heap_end = (char *) PTOV(bios_basemem);
632    }
633
634    dsk = malloc(sizeof(struct dsk));
635    dsk->drive = *(uint8_t *)PTOV(ARGS);
636    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
637    dsk->unit = dsk->drive & DRV_MASK;
638    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
639    dsk->part = 0;
640    dsk->start = 0;
641    dsk->init = 0;
642
643    bootinfo.bi_version = BOOTINFO_VERSION;
644    bootinfo.bi_size = sizeof(bootinfo);
645    bootinfo.bi_basemem = bios_basemem / 1024;
646    bootinfo.bi_extmem = bios_extmem / 1024;
647    bootinfo.bi_memsizes_valid++;
648    bootinfo.bi_bios_dev = dsk->drive;
649
650    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
651			  dsk->slice, dsk->unit, dsk->part),
652
653    /* Process configuration file */
654
655    autoboot = 1;
656
657    zfs_init();
658
659    /*
660     * Probe the boot drive first - we will try to boot from whatever
661     * pool we find on that drive.
662     */
663    probe_drive(dsk, &spa);
664
665    /*
666     * Probe the rest of the drives that the bios knows about. This
667     * will find any other available pools and it may fill in missing
668     * vdevs for the boot pool.
669     */
670    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) {
671	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
672	    continue;
673
674	if (!int13probe(i | DRV_HARD))
675	    break;
676
677	dsk = malloc(sizeof(struct dsk));
678	dsk->drive = i | DRV_HARD;
679	dsk->type = dsk->drive & TYPE_AD;
680	dsk->unit = i;
681	dsk->slice = 0;
682	dsk->part = 0;
683	dsk->start = 0;
684	dsk->init = 0;
685	probe_drive(dsk, 0);
686    }
687
688    /*
689     * If we didn't find a pool on the boot drive, default to the
690     * first pool we found, if any.
691     */
692    if (!spa) {
693	spa = STAILQ_FIRST(&zfs_pools);
694	if (!spa) {
695	    printf("No ZFS pools located, can't boot\n");
696	    for (;;)
697		;
698	}
699    }
700
701    zfs_mount_pool(spa);
702
703    if (zfs_lookup(spa, PATH_CONFIG, &dn) == 0) {
704	off = 0;
705	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
706    }
707
708    if (*cmd) {
709	if (parse())
710	    autoboot = 0;
711	if (!OPT_CHECK(RBX_QUIET))
712	    printf("%s: %s", PATH_CONFIG, cmd);
713	/* Do not process this command twice */
714	*cmd = 0;
715    }
716
717    /*
718     * Try to exec stage 3 boot loader. If interrupted by a keypress,
719     * or in case of failure, try to load a kernel directly instead.
720     */
721
722    if (autoboot && !*kname) {
723	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
724	if (!keyhit(3*SECOND)) {
725	    load();
726	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
727	}
728    }
729
730    /* Present the user with the boot2 prompt. */
731
732    for (;;) {
733	if (!autoboot || !OPT_CHECK(RBX_QUIET))
734	    printf("\nFreeBSD/x86 boot\n"
735		   "Default: %s:%s\n"
736		   "boot: ",
737		   spa->spa_name, kname);
738	if (ioctrl & IO_SERIAL)
739	    sio_flush();
740	if (!autoboot || keyhit(5*SECOND))
741	    getstr();
742	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
743	    putchar('\n');
744	autoboot = 0;
745	if (parse())
746	    putchar('\a');
747	else
748	    load();
749    }
750}
751
752/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
753void
754exit(int x)
755{
756}
757
758static void
759load(void)
760{
761    union {
762	struct exec ex;
763	Elf32_Ehdr eh;
764    } hdr;
765    static Elf32_Phdr ep[2];
766    static Elf32_Shdr es[2];
767    caddr_t p;
768    dnode_phys_t dn;
769    off_t off;
770    uint32_t addr, x;
771    int fmt, i, j;
772
773    if (zfs_lookup(spa, kname, &dn)) {
774	return;
775    }
776    off = 0;
777    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
778	return;
779    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
780	fmt = 0;
781    else if (IS_ELF(hdr.eh))
782	fmt = 1;
783    else {
784	printf("Invalid %s\n", "format");
785	return;
786    }
787    if (fmt == 0) {
788	addr = hdr.ex.a_entry & 0xffffff;
789	p = PTOV(addr);
790	off = PAGE_SIZE;
791	if (xfsread(&dn, &off, p, hdr.ex.a_text))
792	    return;
793	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
794	if (xfsread(&dn, &off, p, hdr.ex.a_data))
795	    return;
796	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
797	bootinfo.bi_symtab = VTOP(p);
798	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
799	p += sizeof(hdr.ex.a_syms);
800	if (hdr.ex.a_syms) {
801	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
802		return;
803	    p += hdr.ex.a_syms;
804	    if (xfsread(&dn, &off, p, sizeof(int)))
805		return;
806	    x = *(uint32_t *)p;
807	    p += sizeof(int);
808	    x -= sizeof(int);
809	    if (xfsread(&dn, &off, p, x))
810		return;
811	    p += x;
812	}
813    } else {
814	off = hdr.eh.e_phoff;
815	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
816	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
817		return;
818	    if (ep[j].p_type == PT_LOAD)
819		j++;
820	}
821	for (i = 0; i < 2; i++) {
822	    p = PTOV(ep[i].p_paddr & 0xffffff);
823	    off = ep[i].p_offset;
824	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
825		return;
826	}
827	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
828	bootinfo.bi_symtab = VTOP(p);
829	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
830	    off = hdr.eh.e_shoff + sizeof(es[0]) *
831		(hdr.eh.e_shstrndx + 1);
832	    if (xfsread(&dn, &off, &es, sizeof(es)))
833		return;
834	    for (i = 0; i < 2; i++) {
835		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
836		p += sizeof(es[i].sh_size);
837		off = es[i].sh_offset;
838		if (xfsread(&dn, &off, p, es[i].sh_size))
839		    return;
840		p += es[i].sh_size;
841	    }
842	}
843	addr = hdr.eh.e_entry & 0xffffff;
844    }
845    bootinfo.bi_esymtab = VTOP(p);
846    bootinfo.bi_kernelname = VTOP(kname);
847    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
848	   bootdev,
849	   KARGS_FLAGS_ZFS,
850	   (uint32_t) spa->spa_guid,
851	   (uint32_t) (spa->spa_guid >> 32),
852	   VTOP(&bootinfo));
853}
854
855static int
856parse()
857{
858    char *arg = cmd;
859    char *ep, *p, *q;
860    const char *cp;
861    //unsigned int drv;
862    int c, i, j;
863
864    while ((c = *arg++)) {
865	if (c == ' ' || c == '\t' || c == '\n')
866	    continue;
867	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
868	ep = p;
869	if (*p)
870	    *p++ = 0;
871	if (c == '-') {
872	    while ((c = *arg++)) {
873		if (c == 'P') {
874		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
875			cp = "yes";
876		    } else {
877			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
878			cp = "no";
879		    }
880		    printf("Keyboard: %s\n", cp);
881		    continue;
882		} else if (c == 'S') {
883		    j = 0;
884		    while ((unsigned int)(i = *arg++ - '0') <= 9)
885			j = j * 10 + i;
886		    if (j > 0 && i == -'0') {
887			comspeed = j;
888			break;
889		    }
890		    /* Fall through to error below ('S' not in optstr[]). */
891		}
892		for (i = 0; c != optstr[i]; i++)
893		    if (i == NOPT - 1)
894			return -1;
895		opts ^= OPT_SET(flags[i]);
896	    }
897	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
898		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
899	    if (ioctrl & IO_SERIAL)
900	        sio_init(115200 / comspeed);
901	} if (c == '?') {
902	    dnode_phys_t dn;
903
904	    if (zfs_lookup(spa, arg, &dn) == 0) {
905		zap_list(spa, &dn);
906	    }
907	    return -1;
908	} else {
909	    arg--;
910
911	    /*
912	     * Report pool status if the comment is 'status'. Lets
913	     * hope no-one wants to load /status as a kernel.
914	     */
915	    if (!strcmp(arg, "status")) {
916		spa_all_status();
917		return -1;
918	    }
919
920	    /*
921	     * If there is a colon, switch pools.
922	     */
923	    q = (char *) strchr(arg, ':');
924	    if (q) {
925		spa_t *newspa;
926
927		*q++ = 0;
928		newspa = spa_find_by_name(arg);
929		if (newspa) {
930		    spa = newspa;
931		    zfs_mount_pool(spa);
932		} else {
933		    printf("\nCan't find ZFS pool %s\n", arg);
934		    return -1;
935		}
936		arg = q;
937	    }
938	    if ((i = ep - arg)) {
939		if ((size_t)i >= sizeof(kname))
940		    return -1;
941		memcpy(kname, arg, i + 1);
942	    }
943	}
944	arg = p;
945    }
946    return 0;
947}
948
949static void
950printf(const char *fmt,...)
951{
952    va_list ap;
953    char buf[20];
954    char *s;
955    unsigned long long u;
956    int c;
957    int minus;
958    int prec;
959    int l;
960    int len;
961    int pad;
962
963    va_start(ap, fmt);
964    while ((c = *fmt++)) {
965	if (c == '%') {
966	    minus = 0;
967	    prec = 0;
968	    l = 0;
969	nextfmt:
970	    c = *fmt++;
971	    switch (c) {
972	    case '-':
973		minus = 1;
974		goto nextfmt;
975	    case '0':
976	    case '1':
977	    case '2':
978	    case '3':
979	    case '4':
980	    case '5':
981	    case '6':
982	    case '7':
983	    case '8':
984	    case '9':
985		prec = 10 * prec + (c - '0');
986		goto nextfmt;
987	    case 'c':
988		putchar(va_arg(ap, int));
989		continue;
990	    case 'l':
991		l++;
992		goto nextfmt;
993	    case 's':
994		s = va_arg(ap, char *);
995		if (prec) {
996		    len = strlen(s);
997		    if (len < prec)
998			pad = prec - len;
999		    else
1000			pad = 0;
1001		    if (minus)
1002			while (pad--)
1003			    putchar(' ');
1004		    for (; *s; s++)
1005			putchar(*s);
1006		    if (!minus)
1007			while (pad--)
1008			    putchar(' ');
1009		} else {
1010		    for (; *s; s++)
1011			putchar(*s);
1012		}
1013		continue;
1014	    case 'u':
1015		switch (l) {
1016		case 2:
1017		    u = va_arg(ap, unsigned long long);
1018		    break;
1019		case 1:
1020		    u = va_arg(ap, unsigned long);
1021		    break;
1022		default:
1023		    u = va_arg(ap, unsigned);
1024		    break;
1025		}
1026		s = buf;
1027		do
1028		    *s++ = '0' + u % 10U;
1029		while (u /= 10U);
1030		while (--s >= buf)
1031		    putchar(*s);
1032		continue;
1033	    }
1034	}
1035	putchar(c);
1036    }
1037    va_end(ap);
1038    return;
1039}
1040
1041static void
1042putchar(int c)
1043{
1044    if (c == '\n')
1045	xputc('\r');
1046    xputc(c);
1047}
1048
1049#ifdef GPT
1050static struct {
1051	uint16_t len;
1052	uint16_t count;
1053	uint16_t off;
1054	uint16_t seg;
1055	uint64_t lba;
1056} packet;
1057#endif
1058
1059static int
1060drvread(struct dsk *dsk, void *buf, daddr_t lba, unsigned nblk)
1061{
1062#ifdef GPT
1063    static unsigned c = 0x2d5c7c2f;
1064
1065    if (!OPT_CHECK(RBX_QUIET))
1066	printf("%c\b", c = c << 8 | c >> 24);
1067    packet.len = 0x10;
1068    packet.count = nblk;
1069    packet.off = VTOPOFF(buf);
1070    packet.seg = VTOPSEG(buf);
1071    packet.lba = lba + dsk->start;
1072    v86.ctl = V86_FLAGS;
1073    v86.addr = 0x13;
1074    v86.eax = 0x4200;
1075    v86.edx = dsk->drive;
1076    v86.ds = VTOPSEG(&packet);
1077    v86.esi = VTOPOFF(&packet);
1078    v86int();
1079    if (V86_CY(v86.efl)) {
1080	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
1081	return -1;
1082    }
1083    return 0;
1084#else
1085    static unsigned c = 0x2d5c7c2f;
1086
1087    lba += dsk->start;
1088    if (!OPT_CHECK(RBX_QUIET))
1089	printf("%c\b", c = c << 8 | c >> 24);
1090    v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
1091    v86.addr = XREADORG;		/* call to xread in boot1 */
1092    v86.es = VTOPSEG(buf);
1093    v86.eax = lba;
1094    v86.ebx = VTOPOFF(buf);
1095    v86.ecx = lba >> 32;
1096    v86.edx = nblk << 8 | dsk->drive;
1097    v86int();
1098    v86.ctl = V86_FLAGS;
1099    if (V86_CY(v86.efl)) {
1100	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
1101	return -1;
1102    }
1103    return 0;
1104#endif
1105}
1106
1107static int
1108keyhit(unsigned ticks)
1109{
1110    uint32_t t0, t1;
1111
1112    if (OPT_CHECK(RBX_NOINTR))
1113	return 0;
1114    t0 = 0;
1115    for (;;) {
1116	if (xgetc(1))
1117	    return 1;
1118	t1 = *(uint32_t *)PTOV(0x46c);
1119	if (!t0)
1120	    t0 = t1;
1121	if (t1 < t0 || t1 >= t0 + ticks)
1122	    return 0;
1123    }
1124}
1125
1126static int
1127xputc(int c)
1128{
1129    if (ioctrl & IO_KEYBOARD)
1130	putc(c);
1131    if (ioctrl & IO_SERIAL)
1132	sio_putc(c);
1133    return c;
1134}
1135
1136static int
1137xgetc(int fn)
1138{
1139    if (OPT_CHECK(RBX_NOINTR))
1140	return 0;
1141    for (;;) {
1142	if (ioctrl & IO_KEYBOARD && getc(1))
1143	    return fn ? 1 : getc(0);
1144	if (ioctrl & IO_SERIAL && sio_ischar())
1145	    return fn ? 1 : sio_getc();
1146	if (fn)
1147	    return 0;
1148    }
1149}
1150
1151static int
1152getc(int fn)
1153{
1154    /*
1155     * The extra comparison against zero is an attempt to work around
1156     * what appears to be a bug in QEMU and Bochs. Both emulators
1157     * sometimes report a key-press with scancode one and ascii zero
1158     * when no such key is pressed in reality. As far as I can tell,
1159     * this only happens shortly after a reboot.
1160     */
1161    v86.ctl = V86_FLAGS;
1162    v86.addr = 0x16;
1163    v86.eax = fn << 8;
1164    v86int();
1165    return fn == 0 ? v86.eax & 0xff : (!V86_ZR(v86.efl) && (v86.eax & 0xff));
1166}
1167