zfsboot.c revision 185096
1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 185096 2008-11-19 16:39:01Z dfr $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30185029Spjd
31185029Spjd#include <stdarg.h>
32185029Spjd#include <stddef.h>
33185029Spjd
34185029Spjd#include <a.out.h>
35185029Spjd
36185029Spjd#include <btxv86.h>
37185029Spjd
38185096Sdfr#ifndef GPT
39185029Spjd#include "zfsboot.h"
40185096Sdfr#endif
41185029Spjd#include "lib.h"
42185029Spjd
43185029Spjd#define IO_KEYBOARD	1
44185029Spjd#define IO_SERIAL	2
45185029Spjd
46185029Spjd#define SECOND		18	/* Circa that many ticks in a second. */
47185029Spjd
48185029Spjd#define RBX_ASKNAME	0x0	/* -a */
49185029Spjd#define RBX_SINGLE	0x1	/* -s */
50185029Spjd/* 0x2 is reserved for log2(RB_NOSYNC). */
51185029Spjd/* 0x3 is reserved for log2(RB_HALT). */
52185029Spjd/* 0x4 is reserved for log2(RB_INITNAME). */
53185029Spjd#define RBX_DFLTROOT	0x5	/* -r */
54185029Spjd#define RBX_KDB 	0x6	/* -d */
55185029Spjd/* 0x7 is reserved for log2(RB_RDONLY). */
56185029Spjd/* 0x8 is reserved for log2(RB_DUMP). */
57185029Spjd/* 0x9 is reserved for log2(RB_MINIROOT). */
58185029Spjd#define RBX_CONFIG	0xa	/* -c */
59185029Spjd#define RBX_VERBOSE	0xb	/* -v */
60185029Spjd#define RBX_SERIAL	0xc	/* -h */
61185029Spjd#define RBX_CDROM	0xd	/* -C */
62185029Spjd/* 0xe is reserved for log2(RB_POWEROFF). */
63185029Spjd#define RBX_GDB 	0xf	/* -g */
64185029Spjd#define RBX_MUTE	0x10	/* -m */
65185029Spjd/* 0x11 is reserved for log2(RB_SELFTEST). */
66185029Spjd/* 0x12 is reserved for boot programs. */
67185029Spjd/* 0x13 is reserved for boot programs. */
68185029Spjd#define RBX_PAUSE	0x14	/* -p */
69185029Spjd#define RBX_QUIET	0x15	/* -q */
70185029Spjd#define RBX_NOINTR	0x1c	/* -n */
71185029Spjd/* 0x1d is reserved for log2(RB_MULTIPLE) and is just misnamed here. */
72185029Spjd#define RBX_DUAL	0x1d	/* -D */
73185029Spjd/* 0x1f is reserved for log2(RB_BOOTINFO). */
74185029Spjd
75185029Spjd/* pass: -a, -s, -r, -d, -c, -v, -h, -C, -g, -m, -p, -D */
76185029Spjd#define RBX_MASK	(OPT_SET(RBX_ASKNAME) | OPT_SET(RBX_SINGLE) | \
77185029Spjd			OPT_SET(RBX_DFLTROOT) | OPT_SET(RBX_KDB ) | \
78185029Spjd			OPT_SET(RBX_CONFIG) | OPT_SET(RBX_VERBOSE) | \
79185029Spjd			OPT_SET(RBX_SERIAL) | OPT_SET(RBX_CDROM) | \
80185029Spjd			OPT_SET(RBX_GDB ) | OPT_SET(RBX_MUTE) | \
81185029Spjd			OPT_SET(RBX_PAUSE) | OPT_SET(RBX_DUAL))
82185029Spjd
83185029Spjd/* Hint to loader that we came from ZFS */
84185029Spjd#define	KARGS_FLAGS_ZFS		0x4
85185029Spjd
86185029Spjd#define PATH_CONFIG	"/boot.config"
87185029Spjd#define PATH_BOOT3	"/boot/loader"
88185029Spjd#define PATH_KERNEL	"/boot/kernel/kernel"
89185029Spjd
90185029Spjd#define ARGS		0x900
91185029Spjd#define NOPT		14
92185029Spjd#define NDEV		3
93185029Spjd#define MEM_BASE	0x12
94185029Spjd#define MEM_EXT 	0x15
95185029Spjd#define V86_CY(x)	((x) & 1)
96185029Spjd#define V86_ZR(x)	((x) & 0x40)
97185029Spjd
98185029Spjd#define DRV_HARD	0x80
99185029Spjd#define DRV_MASK	0x7f
100185029Spjd
101185029Spjd#define TYPE_AD		0
102185029Spjd#define TYPE_DA		1
103185029Spjd#define TYPE_MAXHARD	TYPE_DA
104185029Spjd#define TYPE_FD		2
105185029Spjd
106185029Spjd#define OPT_SET(opt)	(1 << (opt))
107185029Spjd#define OPT_CHECK(opt)	((opts) & OPT_SET(opt))
108185029Spjd
109185029Spjdextern uint32_t _end;
110185029Spjd
111185096Sdfr#ifdef GPT
112185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
113185096Sdfr#endif
114185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
115185029Spjdstatic const unsigned char flags[NOPT] = {
116185029Spjd    RBX_DUAL,
117185029Spjd    RBX_SERIAL,
118185029Spjd    RBX_ASKNAME,
119185029Spjd    RBX_CDROM,
120185029Spjd    RBX_CONFIG,
121185029Spjd    RBX_KDB,
122185029Spjd    RBX_GDB,
123185029Spjd    RBX_MUTE,
124185029Spjd    RBX_NOINTR,
125185029Spjd    RBX_PAUSE,
126185029Spjd    RBX_QUIET,
127185029Spjd    RBX_DFLTROOT,
128185029Spjd    RBX_SINGLE,
129185029Spjd    RBX_VERBOSE
130185029Spjd};
131185029Spjd
132185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
133185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
134185029Spjd
135185029Spjdstruct dsk {
136185029Spjd    unsigned drive;
137185029Spjd    unsigned type;
138185029Spjd    unsigned unit;
139185029Spjd    unsigned slice;
140185029Spjd    unsigned part;
141185029Spjd    unsigned start;
142185029Spjd    int init;
143185029Spjd};
144185029Spjdstatic char cmd[512];
145185029Spjdstatic char kname[1024];
146185029Spjdstatic uint32_t opts;
147185029Spjdstatic int comspeed = SIOSPD;
148185029Spjdstatic struct bootinfo bootinfo;
149185029Spjdstatic uint32_t bootdev;
150185029Spjdstatic uint8_t ioctrl = IO_KEYBOARD;
151185029Spjd
152185029Spjd/* Buffers that must not span a 64k boundary. */
153185029Spjd#define READ_BUF_SIZE	8192
154185029Spjdstruct dmadat {
155185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
156185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
157185029Spjd};
158185029Spjdstatic struct dmadat *dmadat;
159185029Spjd
160185029Spjdvoid exit(int);
161185029Spjdstatic void load(void);
162185029Spjdstatic int parse(void);
163185029Spjdstatic void printf(const char *,...);
164185029Spjdstatic void putchar(int);
165185029Spjdstatic uint32_t memsize(void);
166185029Spjdstatic int drvread(struct dsk *, void *, unsigned, unsigned);
167185029Spjdstatic int keyhit(unsigned);
168185029Spjdstatic int xputc(int);
169185029Spjdstatic int xgetc(int);
170185029Spjdstatic int getc(int);
171185029Spjd
172185029Spjdstatic void memcpy(void *, const void *, int);
173185029Spjdstatic void
174185029Spjdmemcpy(void *dst, const void *src, int len)
175185029Spjd{
176185029Spjd    const char *s = src;
177185029Spjd    char *d = dst;
178185029Spjd
179185029Spjd    while (len--)
180185029Spjd        *d++ = *s++;
181185029Spjd}
182185029Spjd
183185029Spjdstatic void
184185029Spjdstrcpy(char *dst, const char *src)
185185029Spjd{
186185029Spjd    while (*src)
187185029Spjd	*dst++ = *src++;
188185029Spjd    *dst++ = 0;
189185029Spjd}
190185029Spjd
191185029Spjdstatic void
192185029Spjdstrcat(char *dst, const char *src)
193185029Spjd{
194185029Spjd    while (*dst)
195185029Spjd	dst++;
196185029Spjd    while (*src)
197185029Spjd	*dst++ = *src++;
198185029Spjd    *dst++ = 0;
199185029Spjd}
200185029Spjd
201185029Spjdstatic int
202185029Spjdstrcmp(const char *s1, const char *s2)
203185029Spjd{
204185029Spjd    for (; *s1 == *s2 && *s1; s1++, s2++);
205185029Spjd    return (unsigned char)*s1 - (unsigned char)*s2;
206185029Spjd}
207185029Spjd
208185029Spjdstatic const char *
209185029Spjdstrchr(const char *s, char ch)
210185029Spjd{
211185029Spjd    for (; *s; s++)
212185029Spjd	if (*s == ch)
213185029Spjd		return s;
214185029Spjd    return 0;
215185029Spjd}
216185029Spjd
217185029Spjdstatic int
218185029Spjdmemcmp(const void *p1, const void *p2, size_t n)
219185029Spjd{
220185029Spjd    const char *s1 = (const char *) p1;
221185029Spjd    const char *s2 = (const char *) p2;
222185029Spjd    for (; n > 0 && *s1 == *s2; s1++, s2++, n--);
223185029Spjd    if (n)
224185029Spjd        return (unsigned char)*s1 - (unsigned char)*s2;
225185029Spjd    else
226185029Spjd	return 0;
227185029Spjd}
228185029Spjd
229185029Spjdstatic void
230185029Spjdmemset(void *p, char val, size_t n)
231185029Spjd{
232185029Spjd    char *s = (char *) p;
233185029Spjd    while (n--)
234185029Spjd	*s++ = val;
235185029Spjd}
236185029Spjd
237185029Spjdstatic void *
238185029Spjdmalloc(size_t n)
239185029Spjd{
240185029Spjd	static char *heap_next;
241185029Spjd	static char *heap_end;
242185029Spjd
243185029Spjd	if (!heap_next) {
244185029Spjd		heap_next = (char *) dmadat + sizeof(*dmadat);
245185029Spjd		heap_end = (char *) (640*1024);
246185029Spjd	}
247185029Spjd
248185029Spjd	char *p = heap_next;
249185029Spjd	if (p + n > heap_end) {
250185029Spjd		printf("malloc failure\n");
251185029Spjd		for (;;)
252185029Spjd		    ;
253185029Spjd		return 0;
254185029Spjd	}
255185029Spjd	heap_next += n;
256185029Spjd	return p;
257185029Spjd}
258185029Spjd
259185029Spjdstatic size_t
260185029Spjdstrlen(const char *s)
261185029Spjd{
262185029Spjd	size_t len = 0;
263185029Spjd	while (*s++)
264185029Spjd		len++;
265185029Spjd	return len;
266185029Spjd}
267185029Spjd
268185029Spjdstatic char *
269185029Spjdstrdup(const char *s)
270185029Spjd{
271185029Spjd	char *p = malloc(strlen(s) + 1);
272185029Spjd	strcpy(p, s);
273185029Spjd	return p;
274185029Spjd}
275185029Spjd
276185029Spjd#include "zfsimpl.c"
277185029Spjd
278185029Spjd/*
279185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
280185029Spjd */
281185029Spjdstatic int
282185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
283185029Spjd{
284185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
285185029Spjd	size_t n;
286185029Spjd	int rc;
287185029Spjd
288185029Spjd	n = size;
289185029Spjd	if (*offp + n > zp->zp_size)
290185029Spjd		n = zp->zp_size - *offp;
291185029Spjd
292185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
293185029Spjd	if (rc)
294185029Spjd		return (-1);
295185029Spjd	*offp += n;
296185029Spjd
297185029Spjd	return (n);
298185029Spjd}
299185029Spjd
300185029Spjd/*
301185029Spjd * Current ZFS pool
302185029Spjd */
303185029Spjdspa_t *spa;
304185029Spjd
305185029Spjd/*
306185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
307185029Spjd * buffer pointer crosses a 64k boundary.
308185029Spjd */
309185029Spjdstatic int
310185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
311185029Spjd{
312185029Spjd	char *p;
313185029Spjd	unsigned int lba, nb;
314185029Spjd	struct dsk *dsk = (struct dsk *) priv;
315185029Spjd
316185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
317185029Spjd		return -1;
318185029Spjd
319185029Spjd	p = buf;
320185029Spjd	lba = off / DEV_BSIZE;
321185029Spjd	while (bytes > 0) {
322185029Spjd		nb = bytes / DEV_BSIZE;
323185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
324185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
325185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
326185029Spjd			return -1;
327185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
328185029Spjd		p += nb * DEV_BSIZE;
329185029Spjd		lba += nb;
330185029Spjd		bytes -= nb * DEV_BSIZE;
331185029Spjd	}
332185029Spjd
333185029Spjd	return 0;
334185029Spjd}
335185029Spjd
336185029Spjdstatic int
337185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
338185029Spjd{
339185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
340185029Spjd	printf("Invalid %s\n", "format");
341185029Spjd	return -1;
342185029Spjd    }
343185029Spjd    return 0;
344185029Spjd}
345185029Spjd
346185029Spjdstatic inline uint32_t
347185029Spjdmemsize(void)
348185029Spjd{
349185029Spjd    v86.addr = MEM_EXT;
350185029Spjd    v86.eax = 0x8800;
351185029Spjd    v86int();
352185029Spjd    return v86.eax;
353185029Spjd}
354185029Spjd
355185029Spjdstatic inline void
356185029Spjdgetstr(void)
357185029Spjd{
358185029Spjd    char *s;
359185029Spjd    int c;
360185029Spjd
361185029Spjd    s = cmd;
362185029Spjd    for (;;) {
363185029Spjd	switch (c = xgetc(0)) {
364185029Spjd	case 0:
365185029Spjd	    break;
366185029Spjd	case '\177':
367185029Spjd	case '\b':
368185029Spjd	    if (s > cmd) {
369185029Spjd		s--;
370185029Spjd		printf("\b \b");
371185029Spjd	    }
372185029Spjd	    break;
373185029Spjd	case '\n':
374185029Spjd	case '\r':
375185029Spjd	    *s = 0;
376185029Spjd	    return;
377185029Spjd	default:
378185029Spjd	    if (s - cmd < sizeof(cmd) - 1)
379185029Spjd		*s++ = c;
380185029Spjd	    putchar(c);
381185029Spjd	}
382185029Spjd    }
383185029Spjd}
384185029Spjd
385185029Spjdstatic inline void
386185029Spjdputc(int c)
387185029Spjd{
388185029Spjd    v86.addr = 0x10;
389185029Spjd    v86.eax = 0xe00 | (c & 0xff);
390185029Spjd    v86.ebx = 0x7;
391185029Spjd    v86int();
392185029Spjd}
393185029Spjd
394185029Spjd/*
395185029Spjd * Try to detect a device supported by the legacy int13 BIOS
396185029Spjd */
397185029Spjdstatic int
398185029Spjdint13probe(int drive)
399185029Spjd{
400185029Spjd    v86.ctl = V86_FLAGS;
401185029Spjd    v86.addr = 0x13;
402185029Spjd    v86.eax = 0x800;
403185029Spjd    v86.edx = drive;
404185029Spjd    v86int();
405185029Spjd
406185029Spjd    if (!(v86.efl & 0x1) &&				/* carry clear */
407185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
408185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
409185029Spjd		return(0);				/* skip device */
410185029Spjd	}
411185029Spjd	return (1);
412185029Spjd    }
413185029Spjd    return(0);
414185029Spjd}
415185029Spjd
416185029Spjdstatic void
417185029Spjdprobe_drive(struct dsk *dsk, spa_t **spap)
418185029Spjd{
419185096Sdfr#ifdef GPT
420185096Sdfr    struct gpt_hdr hdr;
421185096Sdfr    struct gpt_ent *ent;
422185096Sdfr    daddr_t slba, elba;
423185096Sdfr    unsigned part, entries_per_sec;
424185096Sdfr#endif
425185029Spjd    struct dos_partition *dp;
426185029Spjd    char *sec;
427185029Spjd    unsigned i;
428185029Spjd
429185029Spjd    if (!int13probe(dsk->drive))
430185029Spjd	return;
431185029Spjd
432185029Spjd    /*
433185029Spjd     * If we find a vdev on the whole disk, stop here. Otherwise dig
434185029Spjd     * out the MBR and probe each slice in turn for a vdev.
435185029Spjd     */
436185029Spjd    if (vdev_probe(vdev_read, dsk, spap) == 0)
437185029Spjd	return;
438185029Spjd
439185029Spjd    sec = dmadat->secbuf;
440185029Spjd    dsk->start = 0;
441185096Sdfr
442185096Sdfr#ifdef GPT
443185096Sdfr    /*
444185096Sdfr     * First check for GPT.
445185096Sdfr     */
446185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
447185096Sdfr	return;
448185096Sdfr    }
449185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
450185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
451185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
452185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
453185096Sdfr	goto trymbr;
454185096Sdfr    }
455185096Sdfr
456185096Sdfr    /*
457185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
458185096Sdfr     * return the spa_t for the first we find (if requested). This
459185096Sdfr     * will have the effect of booting from the first pool on the
460185096Sdfr     * disk.
461185096Sdfr     */
462185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
463185096Sdfr    slba = hdr.hdr_lba_table;
464185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
465185096Sdfr    while (slba < elba) {
466185096Sdfr	if (drvread(dsk, sec, slba, 1))
467185096Sdfr	    return;
468185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
469185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
470185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
471185096Sdfr		     sizeof(uuid_t)) == 0) {
472185096Sdfr		dsk->start = ent->ent_lba_start;
473185096Sdfr		if (vdev_probe(vdev_read, dsk, spap) == 0) {
474185096Sdfr		    /*
475185096Sdfr		     * We record the first pool we find (we will try
476185096Sdfr		     * to boot from that one.
477185096Sdfr		     */
478185096Sdfr		    spap = 0;
479185096Sdfr
480185096Sdfr		    /*
481185096Sdfr		     * This slice had a vdev. We need a new dsk
482185096Sdfr		     * structure now since the vdev now owns this one.
483185096Sdfr		     */
484185096Sdfr		    struct dsk *newdsk;
485185096Sdfr		    newdsk = malloc(sizeof(struct dsk));
486185096Sdfr		    *newdsk = *dsk;
487185096Sdfr		    dsk = newdsk;
488185096Sdfr		}
489185096Sdfr		break;
490185096Sdfr	    }
491185096Sdfr	}
492185096Sdfr	slba++;
493185096Sdfr    }
494185096Sdfr    return;
495185096Sdfrtrymbr:
496185096Sdfr#endif
497185096Sdfr
498185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
499185029Spjd	return;
500185029Spjd    dp = (void *)(sec + DOSPARTOFF);
501185029Spjd
502185029Spjd    for (i = 0; i < NDOSPART; i++) {
503185029Spjd	if (!dp[i].dp_typ)
504185029Spjd	    continue;
505185029Spjd	dsk->start = dp[i].dp_start;
506185029Spjd	if (vdev_probe(vdev_read, dsk, spap) == 0) {
507185029Spjd	    /*
508185029Spjd	     * We record the first pool we find (we will try to boot
509185029Spjd	     * from that one.
510185029Spjd	     */
511185029Spjd	    spap = 0;
512185029Spjd
513185029Spjd	    /*
514185029Spjd	     * This slice had a vdev. We need a new dsk structure now
515185096Sdfr	     * since the vdev now owns this one.
516185029Spjd	     */
517185029Spjd	    struct dsk *newdsk;
518185029Spjd	    newdsk = malloc(sizeof(struct dsk));
519185029Spjd	    *newdsk = *dsk;
520185029Spjd	    dsk = newdsk;
521185029Spjd	}
522185029Spjd    }
523185029Spjd}
524185029Spjd
525185029Spjdint
526185029Spjdmain(void)
527185029Spjd{
528185029Spjd    int autoboot, i;
529185029Spjd    dnode_phys_t dn;
530185029Spjd    off_t off;
531185029Spjd    struct dsk *dsk;
532185029Spjd
533185029Spjd    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
534185029Spjd    v86.ctl = V86_FLAGS;
535185029Spjd
536185029Spjd    dsk = malloc(sizeof(struct dsk));
537185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
538185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
539185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
540185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
541185029Spjd    dsk->part = 0;
542185029Spjd    dsk->start = 0;
543185029Spjd    dsk->init = 0;
544185029Spjd
545185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
546185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
547185029Spjd    bootinfo.bi_basemem = 0;	/* XXX will be filled by loader or kernel */
548185029Spjd    bootinfo.bi_extmem = memsize();
549185029Spjd    bootinfo.bi_memsizes_valid++;
550185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
551185029Spjd
552185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
553185029Spjd			  dsk->slice, dsk->unit, dsk->part),
554185029Spjd
555185029Spjd    /* Process configuration file */
556185029Spjd
557185029Spjd    autoboot = 1;
558185029Spjd
559185029Spjd    zfs_init();
560185029Spjd
561185029Spjd    /*
562185029Spjd     * Probe the boot drive first - we will try to boot from whatever
563185029Spjd     * pool we find on that drive.
564185029Spjd     */
565185029Spjd    probe_drive(dsk, &spa);
566185029Spjd
567185029Spjd    /*
568185029Spjd     * Probe the rest of the drives that the bios knows about. This
569185029Spjd     * will find any other available pools and it may fill in missing
570185029Spjd     * vdevs for the boot pool.
571185029Spjd     */
572185029Spjd    for (i = 0; i < 4; i++) {
573185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
574185029Spjd	    continue;
575185029Spjd
576185029Spjd	dsk = malloc(sizeof(struct dsk));
577185029Spjd	dsk->drive = i | DRV_HARD;
578185029Spjd	dsk->type = dsk->drive & TYPE_AD;
579185029Spjd	dsk->unit = i;
580185029Spjd	dsk->slice = 0;
581185029Spjd	dsk->part = 0;
582185029Spjd	dsk->start = 0;
583185029Spjd	dsk->init = 0;
584185029Spjd	probe_drive(dsk, 0);
585185029Spjd    }
586185029Spjd
587185029Spjd    /*
588185029Spjd     * If we didn't find a pool on the boot drive, default to the
589185029Spjd     * first pool we found, if any.
590185029Spjd     */
591185029Spjd    if (!spa) {
592185029Spjd	spa = STAILQ_FIRST(&zfs_pools);
593185029Spjd	if (!spa) {
594185029Spjd	    printf("No ZFS pools located, can't boot\n");
595185029Spjd	    for (;;)
596185029Spjd		;
597185029Spjd	}
598185029Spjd    }
599185029Spjd
600185029Spjd    zfs_mount_pool(spa);
601185029Spjd
602185029Spjd    if (zfs_lookup(spa, PATH_CONFIG, &dn) == 0) {
603185029Spjd	off = 0;
604185029Spjd	xfsread(&dn, &off, cmd, sizeof(cmd));
605185029Spjd    }
606185029Spjd
607185029Spjd    if (*cmd) {
608185029Spjd	if (parse())
609185029Spjd	    autoboot = 0;
610185029Spjd	if (!OPT_CHECK(RBX_QUIET))
611185029Spjd	    printf("%s: %s", PATH_CONFIG, cmd);
612185029Spjd	/* Do not process this command twice */
613185029Spjd	*cmd = 0;
614185029Spjd    }
615185029Spjd
616185029Spjd    /*
617185029Spjd     * Try to exec stage 3 boot loader. If interrupted by a keypress,
618185029Spjd     * or in case of failure, try to load a kernel directly instead.
619185029Spjd     */
620185029Spjd
621185029Spjd    if (autoboot && !*kname) {
622185029Spjd	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
623185029Spjd	if (!keyhit(3*SECOND)) {
624185029Spjd	    load();
625185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
626185029Spjd	}
627185029Spjd    }
628185029Spjd
629185029Spjd    /* Present the user with the boot2 prompt. */
630185029Spjd
631185029Spjd    for (;;) {
632185029Spjd	if (!autoboot || !OPT_CHECK(RBX_QUIET))
633185029Spjd	    printf("\nFreeBSD/i386 boot\n"
634185029Spjd		   "Default: %s:%s\n"
635185029Spjd		   "boot: ",
636185029Spjd		   spa->spa_name, kname);
637185029Spjd	if (ioctrl & IO_SERIAL)
638185029Spjd	    sio_flush();
639185029Spjd	if (!autoboot || keyhit(5*SECOND))
640185029Spjd	    getstr();
641185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
642185029Spjd	    putchar('\n');
643185029Spjd	autoboot = 0;
644185029Spjd	if (parse())
645185029Spjd	    putchar('\a');
646185029Spjd	else
647185029Spjd	    load();
648185029Spjd    }
649185029Spjd}
650185029Spjd
651185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
652185029Spjdvoid
653185029Spjdexit(int x)
654185029Spjd{
655185029Spjd}
656185029Spjd
657185029Spjdstatic void
658185029Spjdload(void)
659185029Spjd{
660185029Spjd    union {
661185029Spjd	struct exec ex;
662185029Spjd	Elf32_Ehdr eh;
663185029Spjd    } hdr;
664185029Spjd    static Elf32_Phdr ep[2];
665185029Spjd    static Elf32_Shdr es[2];
666185029Spjd    caddr_t p;
667185029Spjd    dnode_phys_t dn;
668185029Spjd    off_t off;
669185029Spjd    uint32_t addr, x;
670185029Spjd    int fmt, i, j;
671185029Spjd
672185029Spjd    if (zfs_lookup(spa, kname, &dn)) {
673185029Spjd	return;
674185029Spjd    }
675185029Spjd    off = 0;
676185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
677185029Spjd	return;
678185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
679185029Spjd	fmt = 0;
680185029Spjd    else if (IS_ELF(hdr.eh))
681185029Spjd	fmt = 1;
682185029Spjd    else {
683185029Spjd	printf("Invalid %s\n", "format");
684185029Spjd	return;
685185029Spjd    }
686185029Spjd    if (fmt == 0) {
687185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
688185029Spjd	p = PTOV(addr);
689185029Spjd	off = PAGE_SIZE;
690185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
691185029Spjd	    return;
692185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
693185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
694185029Spjd	    return;
695185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
696185029Spjd	bootinfo.bi_symtab = VTOP(p);
697185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
698185029Spjd	p += sizeof(hdr.ex.a_syms);
699185029Spjd	if (hdr.ex.a_syms) {
700185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
701185029Spjd		return;
702185029Spjd	    p += hdr.ex.a_syms;
703185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
704185029Spjd		return;
705185029Spjd	    x = *(uint32_t *)p;
706185029Spjd	    p += sizeof(int);
707185029Spjd	    x -= sizeof(int);
708185029Spjd	    if (xfsread(&dn, &off, p, x))
709185029Spjd		return;
710185029Spjd	    p += x;
711185029Spjd	}
712185029Spjd    } else {
713185029Spjd	off = hdr.eh.e_phoff;
714185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
715185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
716185029Spjd		return;
717185029Spjd	    if (ep[j].p_type == PT_LOAD)
718185029Spjd		j++;
719185029Spjd	}
720185029Spjd	for (i = 0; i < 2; i++) {
721185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
722185029Spjd	    off = ep[i].p_offset;
723185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
724185029Spjd		return;
725185029Spjd	}
726185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
727185029Spjd	bootinfo.bi_symtab = VTOP(p);
728185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
729185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
730185029Spjd		(hdr.eh.e_shstrndx + 1);
731185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
732185029Spjd		return;
733185029Spjd	    for (i = 0; i < 2; i++) {
734185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
735185029Spjd		p += sizeof(es[i].sh_size);
736185029Spjd		off = es[i].sh_offset;
737185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
738185029Spjd		    return;
739185029Spjd		p += es[i].sh_size;
740185029Spjd	    }
741185029Spjd	}
742185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
743185029Spjd    }
744185029Spjd    bootinfo.bi_esymtab = VTOP(p);
745185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
746185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
747185029Spjd	   bootdev,
748185029Spjd	   KARGS_FLAGS_ZFS,
749185029Spjd	   (uint32_t) spa->spa_guid,
750185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
751185029Spjd	   VTOP(&bootinfo));
752185029Spjd}
753185029Spjd
754185029Spjdstatic int
755185029Spjdparse()
756185029Spjd{
757185029Spjd    char *arg = cmd;
758185029Spjd    char *ep, *p, *q;
759185029Spjd    const char *cp;
760185029Spjd    //unsigned int drv;
761185029Spjd    int c, i, j;
762185029Spjd
763185029Spjd    while ((c = *arg++)) {
764185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
765185029Spjd	    continue;
766185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
767185029Spjd	ep = p;
768185029Spjd	if (*p)
769185029Spjd	    *p++ = 0;
770185029Spjd	if (c == '-') {
771185029Spjd	    while ((c = *arg++)) {
772185029Spjd		if (c == 'P') {
773185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
774185029Spjd			cp = "yes";
775185029Spjd		    } else {
776185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
777185029Spjd			cp = "no";
778185029Spjd		    }
779185029Spjd		    printf("Keyboard: %s\n", cp);
780185029Spjd		    continue;
781185029Spjd		} else if (c == 'S') {
782185029Spjd		    j = 0;
783185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
784185029Spjd			j = j * 10 + i;
785185029Spjd		    if (j > 0 && i == -'0') {
786185029Spjd			comspeed = j;
787185029Spjd			break;
788185029Spjd		    }
789185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
790185029Spjd		}
791185029Spjd		for (i = 0; c != optstr[i]; i++)
792185029Spjd		    if (i == NOPT - 1)
793185029Spjd			return -1;
794185029Spjd		opts ^= OPT_SET(flags[i]);
795185029Spjd	    }
796185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
797185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
798185029Spjd	    if (ioctrl & IO_SERIAL)
799185029Spjd	        sio_init(115200 / comspeed);
800185029Spjd	} if (c == '?') {
801185029Spjd	    dnode_phys_t dn;
802185029Spjd
803185029Spjd	    if (zfs_lookup(spa, arg, &dn) == 0) {
804185029Spjd		zap_list(spa, &dn);
805185029Spjd	    }
806185029Spjd	    return -1;
807185029Spjd	} else {
808185029Spjd	    arg--;
809185029Spjd
810185029Spjd	    /*
811185029Spjd	     * Report pool status if the comment is 'status'. Lets
812185029Spjd	     * hope no-one wants to load /status as a kernel.
813185029Spjd	     */
814185029Spjd	    if (!strcmp(arg, "status")) {
815185029Spjd		spa_all_status();
816185029Spjd		return -1;
817185029Spjd	    }
818185029Spjd
819185029Spjd	    /*
820185029Spjd	     * If there is a colon, switch pools.
821185029Spjd	     */
822185029Spjd	    q = (char *) strchr(arg, ':');
823185029Spjd	    if (q) {
824185029Spjd		spa_t *newspa;
825185029Spjd
826185029Spjd		*q++ = 0;
827185029Spjd		newspa = spa_find_by_name(arg);
828185029Spjd		if (newspa) {
829185029Spjd		    spa = newspa;
830185029Spjd		    zfs_mount_pool(spa);
831185029Spjd		} else {
832185029Spjd		    printf("\nCan't find ZFS pool %s\n", arg);
833185029Spjd		    return -1;
834185029Spjd		}
835185029Spjd		arg = q;
836185029Spjd	    }
837185029Spjd	    if ((i = ep - arg)) {
838185029Spjd		if ((size_t)i >= sizeof(kname))
839185029Spjd		    return -1;
840185029Spjd		memcpy(kname, arg, i + 1);
841185029Spjd	    }
842185029Spjd	}
843185029Spjd	arg = p;
844185029Spjd    }
845185029Spjd    return 0;
846185029Spjd}
847185029Spjd
848185029Spjdstatic void
849185029Spjdprintf(const char *fmt,...)
850185029Spjd{
851185029Spjd    va_list ap;
852185029Spjd    char buf[10];
853185029Spjd    char *s;
854185029Spjd    unsigned u;
855185029Spjd    int c;
856185029Spjd    int minus;
857185029Spjd    int prec;
858185029Spjd    int len;
859185029Spjd    int pad;
860185029Spjd
861185029Spjd    va_start(ap, fmt);
862185029Spjd    while ((c = *fmt++)) {
863185029Spjd	if (c == '%') {
864185029Spjd	    minus = 0;
865185029Spjd	    prec = 0;
866185029Spjd	nextfmt:
867185029Spjd	    c = *fmt++;
868185029Spjd	    switch (c) {
869185029Spjd	    case '-':
870185029Spjd		minus = 1;
871185029Spjd		goto nextfmt;
872185029Spjd	    case '0':
873185029Spjd	    case '1':
874185029Spjd	    case '2':
875185029Spjd	    case '3':
876185029Spjd	    case '4':
877185029Spjd	    case '5':
878185029Spjd	    case '6':
879185029Spjd	    case '7':
880185029Spjd	    case '8':
881185029Spjd	    case '9':
882185029Spjd		prec = 10 * prec + (c - '0');
883185029Spjd		goto nextfmt;
884185029Spjd	    case 'c':
885185029Spjd		putchar(va_arg(ap, int));
886185029Spjd		continue;
887185029Spjd	    case 's':
888185029Spjd		s = va_arg(ap, char *);
889185029Spjd		if (prec) {
890185029Spjd		    len = strlen(s);
891185029Spjd		    if (len < prec)
892185029Spjd			pad = prec - len;
893185029Spjd		    else
894185029Spjd			pad = 0;
895185029Spjd		    if (minus)
896185029Spjd			while (pad--)
897185029Spjd			    putchar(' ');
898185029Spjd		    for (; *s; s++)
899185029Spjd			putchar(*s);
900185029Spjd		    if (!minus)
901185029Spjd			while (pad--)
902185029Spjd			    putchar(' ');
903185029Spjd		} else {
904185029Spjd		    for (; *s; s++)
905185029Spjd			putchar(*s);
906185029Spjd		}
907185029Spjd		continue;
908185029Spjd	    case 'u':
909185029Spjd		u = va_arg(ap, unsigned);
910185029Spjd		s = buf;
911185029Spjd		do
912185029Spjd		    *s++ = '0' + u % 10U;
913185029Spjd		while (u /= 10U);
914185029Spjd		while (--s >= buf)
915185029Spjd		    putchar(*s);
916185029Spjd		continue;
917185029Spjd	    }
918185029Spjd	}
919185029Spjd	putchar(c);
920185029Spjd    }
921185029Spjd    va_end(ap);
922185029Spjd    return;
923185029Spjd}
924185029Spjd
925185029Spjdstatic void
926185029Spjdputchar(int c)
927185029Spjd{
928185029Spjd    if (c == '\n')
929185029Spjd	xputc('\r');
930185029Spjd    xputc(c);
931185029Spjd}
932185029Spjd
933185096Sdfr#ifdef GPT
934185096Sdfrstatic struct {
935185096Sdfr	uint16_t len;
936185096Sdfr	uint16_t count;
937185096Sdfr	uint16_t seg;
938185096Sdfr	uint16_t off;
939185096Sdfr	uint64_t lba;
940185096Sdfr} packet;
941185096Sdfr#endif
942185096Sdfr
943185029Spjdstatic int
944185029Spjddrvread(struct dsk *dsk, void *buf, unsigned lba, unsigned nblk)
945185029Spjd{
946185096Sdfr#ifdef GPT
947185096Sdfr   static unsigned c = 0x2d5c7c2f;
948185096Sdfr
949185096Sdfr    if (!OPT_CHECK(RBX_QUIET))
950185096Sdfr	printf("%c\b", c = c << 8 | c >> 24);
951185096Sdfr    packet.len = 0x10;
952185096Sdfr    packet.count = nblk;
953185096Sdfr    packet.seg = VTOPOFF(buf);
954185096Sdfr    packet.off = VTOPSEG(buf);
955185096Sdfr    packet.lba = lba + dsk->start;
956185096Sdfr    v86.ctl = V86_FLAGS;
957185096Sdfr    v86.addr = 0x13;
958185096Sdfr    v86.eax = 0x4200;
959185096Sdfr    v86.edx = dsk->drive;
960185096Sdfr    v86.ds = VTOPSEG(&packet);
961185096Sdfr    v86.esi = VTOPOFF(&packet);
962185096Sdfr    v86int();
963185096Sdfr    if (V86_CY(v86.efl)) {
964185096Sdfr	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
965185096Sdfr	return -1;
966185096Sdfr    }
967185096Sdfr    return 0;
968185096Sdfr#else
969185029Spjd    static unsigned c = 0x2d5c7c2f;
970185029Spjd
971185029Spjd    lba += dsk->start;
972185029Spjd    if (!OPT_CHECK(RBX_QUIET))
973185029Spjd	printf("%c\b", c = c << 8 | c >> 24);
974185029Spjd    v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
975185029Spjd    v86.addr = XREADORG;		/* call to xread in boot1 */
976185029Spjd    v86.es = VTOPSEG(buf);
977185029Spjd    v86.eax = lba;
978185029Spjd    v86.ebx = VTOPOFF(buf);
979185029Spjd    v86.ecx = lba >> 16;
980185029Spjd    v86.edx = nblk << 8 | dsk->drive;
981185029Spjd    v86int();
982185029Spjd    v86.ctl = V86_FLAGS;
983185029Spjd    if (V86_CY(v86.efl)) {
984185029Spjd	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
985185029Spjd	return -1;
986185029Spjd    }
987185029Spjd    return 0;
988185096Sdfr#endif
989185029Spjd}
990185029Spjd
991185029Spjdstatic int
992185029Spjdkeyhit(unsigned ticks)
993185029Spjd{
994185029Spjd    uint32_t t0, t1;
995185029Spjd
996185029Spjd    if (OPT_CHECK(RBX_NOINTR))
997185029Spjd	return 0;
998185029Spjd    t0 = 0;
999185029Spjd    for (;;) {
1000185029Spjd	if (xgetc(1))
1001185029Spjd	    return 1;
1002185029Spjd	t1 = *(uint32_t *)PTOV(0x46c);
1003185029Spjd	if (!t0)
1004185029Spjd	    t0 = t1;
1005185029Spjd	if (t1 < t0 || t1 >= t0 + ticks)
1006185029Spjd	    return 0;
1007185029Spjd    }
1008185029Spjd}
1009185029Spjd
1010185029Spjdstatic int
1011185029Spjdxputc(int c)
1012185029Spjd{
1013185029Spjd    if (ioctrl & IO_KEYBOARD)
1014185029Spjd	putc(c);
1015185029Spjd    if (ioctrl & IO_SERIAL)
1016185029Spjd	sio_putc(c);
1017185029Spjd    return c;
1018185029Spjd}
1019185029Spjd
1020185029Spjdstatic int
1021185029Spjdxgetc(int fn)
1022185029Spjd{
1023185029Spjd    if (OPT_CHECK(RBX_NOINTR))
1024185029Spjd	return 0;
1025185029Spjd    for (;;) {
1026185029Spjd	if (ioctrl & IO_KEYBOARD && getc(1))
1027185029Spjd	    return fn ? 1 : getc(0);
1028185029Spjd	if (ioctrl & IO_SERIAL && sio_ischar())
1029185029Spjd	    return fn ? 1 : sio_getc();
1030185029Spjd	if (fn)
1031185029Spjd	    return 0;
1032185029Spjd    }
1033185029Spjd}
1034185029Spjd
1035185029Spjdstatic int
1036185029Spjdgetc(int fn)
1037185029Spjd{
1038185029Spjd    /*
1039185029Spjd     * The extra comparison against zero is an attempt to work around
1040185029Spjd     * what appears to be a bug in QEMU and Bochs. Both emulators
1041185029Spjd     * sometimes report a key-press with scancode one and ascii zero
1042185029Spjd     * when no such key is pressed in reality. As far as I can tell,
1043185029Spjd     * this only happens shortly after a reboot.
1044185029Spjd     */
1045185029Spjd    v86.addr = 0x16;
1046185029Spjd    v86.eax = fn << 8;
1047185029Spjd    v86int();
1048185029Spjd    return fn == 0 ? v86.eax & 0xff : (!V86_ZR(v86.efl) && (v86.eax & 0xff));
1049185029Spjd}
1050