zfsboot.c revision 198420
1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 198420 2009-10-23 18:44:53Z rnoland $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30185029Spjd
31185029Spjd#include <stdarg.h>
32185029Spjd#include <stddef.h>
33185029Spjd
34185029Spjd#include <a.out.h>
35185029Spjd
36185029Spjd#include <btxv86.h>
37185029Spjd
38185096Sdfr#ifndef GPT
39185029Spjd#include "zfsboot.h"
40185096Sdfr#endif
41185029Spjd#include "lib.h"
42185029Spjd
43185029Spjd#define IO_KEYBOARD	1
44185029Spjd#define IO_SERIAL	2
45185029Spjd
46185029Spjd#define SECOND		18	/* Circa that many ticks in a second. */
47185029Spjd
48185029Spjd#define RBX_ASKNAME	0x0	/* -a */
49185029Spjd#define RBX_SINGLE	0x1	/* -s */
50185029Spjd/* 0x2 is reserved for log2(RB_NOSYNC). */
51185029Spjd/* 0x3 is reserved for log2(RB_HALT). */
52185029Spjd/* 0x4 is reserved for log2(RB_INITNAME). */
53185029Spjd#define RBX_DFLTROOT	0x5	/* -r */
54185029Spjd#define RBX_KDB 	0x6	/* -d */
55185029Spjd/* 0x7 is reserved for log2(RB_RDONLY). */
56185029Spjd/* 0x8 is reserved for log2(RB_DUMP). */
57185029Spjd/* 0x9 is reserved for log2(RB_MINIROOT). */
58185029Spjd#define RBX_CONFIG	0xa	/* -c */
59185029Spjd#define RBX_VERBOSE	0xb	/* -v */
60185029Spjd#define RBX_SERIAL	0xc	/* -h */
61185029Spjd#define RBX_CDROM	0xd	/* -C */
62185029Spjd/* 0xe is reserved for log2(RB_POWEROFF). */
63185029Spjd#define RBX_GDB 	0xf	/* -g */
64185029Spjd#define RBX_MUTE	0x10	/* -m */
65185029Spjd/* 0x11 is reserved for log2(RB_SELFTEST). */
66185029Spjd/* 0x12 is reserved for boot programs. */
67185029Spjd/* 0x13 is reserved for boot programs. */
68185029Spjd#define RBX_PAUSE	0x14	/* -p */
69185029Spjd#define RBX_QUIET	0x15	/* -q */
70185029Spjd#define RBX_NOINTR	0x1c	/* -n */
71185029Spjd/* 0x1d is reserved for log2(RB_MULTIPLE) and is just misnamed here. */
72185029Spjd#define RBX_DUAL	0x1d	/* -D */
73185029Spjd/* 0x1f is reserved for log2(RB_BOOTINFO). */
74185029Spjd
75185029Spjd/* pass: -a, -s, -r, -d, -c, -v, -h, -C, -g, -m, -p, -D */
76185029Spjd#define RBX_MASK	(OPT_SET(RBX_ASKNAME) | OPT_SET(RBX_SINGLE) | \
77185029Spjd			OPT_SET(RBX_DFLTROOT) | OPT_SET(RBX_KDB ) | \
78185029Spjd			OPT_SET(RBX_CONFIG) | OPT_SET(RBX_VERBOSE) | \
79185029Spjd			OPT_SET(RBX_SERIAL) | OPT_SET(RBX_CDROM) | \
80185029Spjd			OPT_SET(RBX_GDB ) | OPT_SET(RBX_MUTE) | \
81185029Spjd			OPT_SET(RBX_PAUSE) | OPT_SET(RBX_DUAL))
82185029Spjd
83185029Spjd/* Hint to loader that we came from ZFS */
84185029Spjd#define	KARGS_FLAGS_ZFS		0x4
85185029Spjd
86185029Spjd#define PATH_CONFIG	"/boot.config"
87185029Spjd#define PATH_BOOT3	"/boot/loader"
88185029Spjd#define PATH_KERNEL	"/boot/kernel/kernel"
89185029Spjd
90185029Spjd#define ARGS		0x900
91185029Spjd#define NOPT		14
92185029Spjd#define NDEV		3
93185029Spjd#define MEM_BASE	0x12
94185029Spjd#define MEM_EXT 	0x15
95185029Spjd#define V86_CY(x)	((x) & 1)
96185029Spjd#define V86_ZR(x)	((x) & 0x40)
97185029Spjd
98185029Spjd#define DRV_HARD	0x80
99185029Spjd#define DRV_MASK	0x7f
100185029Spjd
101185029Spjd#define TYPE_AD		0
102185029Spjd#define TYPE_DA		1
103185029Spjd#define TYPE_MAXHARD	TYPE_DA
104185029Spjd#define TYPE_FD		2
105185029Spjd
106185029Spjd#define OPT_SET(opt)	(1 << (opt))
107185029Spjd#define OPT_CHECK(opt)	((opts) & OPT_SET(opt))
108185029Spjd
109185029Spjdextern uint32_t _end;
110185029Spjd
111185096Sdfr#ifdef GPT
112185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
113185096Sdfr#endif
114185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
115185029Spjdstatic const unsigned char flags[NOPT] = {
116185029Spjd    RBX_DUAL,
117185029Spjd    RBX_SERIAL,
118185029Spjd    RBX_ASKNAME,
119185029Spjd    RBX_CDROM,
120185029Spjd    RBX_CONFIG,
121185029Spjd    RBX_KDB,
122185029Spjd    RBX_GDB,
123185029Spjd    RBX_MUTE,
124185029Spjd    RBX_NOINTR,
125185029Spjd    RBX_PAUSE,
126185029Spjd    RBX_QUIET,
127185029Spjd    RBX_DFLTROOT,
128185029Spjd    RBX_SINGLE,
129185029Spjd    RBX_VERBOSE
130185029Spjd};
131185029Spjd
132185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
133185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
134185029Spjd
135185029Spjdstruct dsk {
136185029Spjd    unsigned drive;
137185029Spjd    unsigned type;
138185029Spjd    unsigned unit;
139185029Spjd    unsigned slice;
140185029Spjd    unsigned part;
141185029Spjd    unsigned start;
142185029Spjd    int init;
143185029Spjd};
144185029Spjdstatic char cmd[512];
145185029Spjdstatic char kname[1024];
146185029Spjdstatic uint32_t opts;
147185029Spjdstatic int comspeed = SIOSPD;
148185029Spjdstatic struct bootinfo bootinfo;
149185029Spjdstatic uint32_t bootdev;
150185029Spjdstatic uint8_t ioctrl = IO_KEYBOARD;
151185029Spjd
152185029Spjd/* Buffers that must not span a 64k boundary. */
153185029Spjd#define READ_BUF_SIZE	8192
154185029Spjdstruct dmadat {
155185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
156185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
157185029Spjd};
158185029Spjdstatic struct dmadat *dmadat;
159185029Spjd
160185029Spjdvoid exit(int);
161185029Spjdstatic void load(void);
162185029Spjdstatic int parse(void);
163185029Spjdstatic void printf(const char *,...);
164185029Spjdstatic void putchar(int);
165185029Spjdstatic uint32_t memsize(void);
166185029Spjdstatic int drvread(struct dsk *, void *, unsigned, unsigned);
167185029Spjdstatic int keyhit(unsigned);
168185029Spjdstatic int xputc(int);
169185029Spjdstatic int xgetc(int);
170185029Spjdstatic int getc(int);
171185029Spjd
172185029Spjdstatic void memcpy(void *, const void *, int);
173185029Spjdstatic void
174185029Spjdmemcpy(void *dst, const void *src, int len)
175185029Spjd{
176185029Spjd    const char *s = src;
177185029Spjd    char *d = dst;
178185029Spjd
179185029Spjd    while (len--)
180185029Spjd        *d++ = *s++;
181185029Spjd}
182185029Spjd
183185029Spjdstatic void
184185029Spjdstrcpy(char *dst, const char *src)
185185029Spjd{
186185029Spjd    while (*src)
187185029Spjd	*dst++ = *src++;
188185029Spjd    *dst++ = 0;
189185029Spjd}
190185029Spjd
191185029Spjdstatic void
192185029Spjdstrcat(char *dst, const char *src)
193185029Spjd{
194185029Spjd    while (*dst)
195185029Spjd	dst++;
196185029Spjd    while (*src)
197185029Spjd	*dst++ = *src++;
198185029Spjd    *dst++ = 0;
199185029Spjd}
200185029Spjd
201185029Spjdstatic int
202185029Spjdstrcmp(const char *s1, const char *s2)
203185029Spjd{
204185029Spjd    for (; *s1 == *s2 && *s1; s1++, s2++);
205185029Spjd    return (unsigned char)*s1 - (unsigned char)*s2;
206185029Spjd}
207185029Spjd
208185029Spjdstatic const char *
209185029Spjdstrchr(const char *s, char ch)
210185029Spjd{
211185029Spjd    for (; *s; s++)
212185029Spjd	if (*s == ch)
213185029Spjd		return s;
214185029Spjd    return 0;
215185029Spjd}
216185029Spjd
217185029Spjdstatic int
218185029Spjdmemcmp(const void *p1, const void *p2, size_t n)
219185029Spjd{
220185029Spjd    const char *s1 = (const char *) p1;
221185029Spjd    const char *s2 = (const char *) p2;
222185029Spjd    for (; n > 0 && *s1 == *s2; s1++, s2++, n--);
223185029Spjd    if (n)
224185029Spjd        return (unsigned char)*s1 - (unsigned char)*s2;
225185029Spjd    else
226185029Spjd	return 0;
227185029Spjd}
228185029Spjd
229185029Spjdstatic void
230185029Spjdmemset(void *p, char val, size_t n)
231185029Spjd{
232185029Spjd    char *s = (char *) p;
233185029Spjd    while (n--)
234185029Spjd	*s++ = val;
235185029Spjd}
236185029Spjd
237185029Spjdstatic void *
238185029Spjdmalloc(size_t n)
239185029Spjd{
240185029Spjd	static char *heap_next;
241185029Spjd	static char *heap_end;
242185029Spjd
243185029Spjd	if (!heap_next) {
244185029Spjd		heap_next = (char *) dmadat + sizeof(*dmadat);
245185029Spjd		heap_end = (char *) (640*1024);
246185029Spjd	}
247185029Spjd
248185029Spjd	char *p = heap_next;
249185029Spjd	if (p + n > heap_end) {
250185029Spjd		printf("malloc failure\n");
251185029Spjd		for (;;)
252185029Spjd		    ;
253185029Spjd		return 0;
254185029Spjd	}
255185029Spjd	heap_next += n;
256185029Spjd	return p;
257185029Spjd}
258185029Spjd
259185029Spjdstatic size_t
260185029Spjdstrlen(const char *s)
261185029Spjd{
262185029Spjd	size_t len = 0;
263185029Spjd	while (*s++)
264185029Spjd		len++;
265185029Spjd	return len;
266185029Spjd}
267185029Spjd
268185029Spjdstatic char *
269185029Spjdstrdup(const char *s)
270185029Spjd{
271185029Spjd	char *p = malloc(strlen(s) + 1);
272185029Spjd	strcpy(p, s);
273185029Spjd	return p;
274185029Spjd}
275185029Spjd
276185029Spjd#include "zfsimpl.c"
277185029Spjd
278185029Spjd/*
279185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
280185029Spjd */
281185029Spjdstatic int
282185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
283185029Spjd{
284185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
285185029Spjd	size_t n;
286185029Spjd	int rc;
287185029Spjd
288185029Spjd	n = size;
289185029Spjd	if (*offp + n > zp->zp_size)
290185029Spjd		n = zp->zp_size - *offp;
291185029Spjd
292185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
293185029Spjd	if (rc)
294185029Spjd		return (-1);
295185029Spjd	*offp += n;
296185029Spjd
297185029Spjd	return (n);
298185029Spjd}
299185029Spjd
300185029Spjd/*
301185029Spjd * Current ZFS pool
302185029Spjd */
303185029Spjdspa_t *spa;
304185029Spjd
305185029Spjd/*
306185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
307185029Spjd * buffer pointer crosses a 64k boundary.
308185029Spjd */
309185029Spjdstatic int
310185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
311185029Spjd{
312185029Spjd	char *p;
313185029Spjd	unsigned int lba, nb;
314185029Spjd	struct dsk *dsk = (struct dsk *) priv;
315185029Spjd
316185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
317185029Spjd		return -1;
318185029Spjd
319185029Spjd	p = buf;
320185029Spjd	lba = off / DEV_BSIZE;
321185029Spjd	while (bytes > 0) {
322185029Spjd		nb = bytes / DEV_BSIZE;
323185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
324185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
325185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
326185029Spjd			return -1;
327185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
328185029Spjd		p += nb * DEV_BSIZE;
329185029Spjd		lba += nb;
330185029Spjd		bytes -= nb * DEV_BSIZE;
331185029Spjd	}
332185029Spjd
333185029Spjd	return 0;
334185029Spjd}
335185029Spjd
336185029Spjdstatic int
337185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
338185029Spjd{
339185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
340185029Spjd	printf("Invalid %s\n", "format");
341185029Spjd	return -1;
342185029Spjd    }
343185029Spjd    return 0;
344185029Spjd}
345185029Spjd
346185029Spjdstatic inline uint32_t
347185029Spjdmemsize(void)
348185029Spjd{
349185029Spjd    v86.addr = MEM_EXT;
350185029Spjd    v86.eax = 0x8800;
351185029Spjd    v86int();
352185029Spjd    return v86.eax;
353185029Spjd}
354185029Spjd
355185029Spjdstatic inline void
356185029Spjdgetstr(void)
357185029Spjd{
358185029Spjd    char *s;
359185029Spjd    int c;
360185029Spjd
361185029Spjd    s = cmd;
362185029Spjd    for (;;) {
363185029Spjd	switch (c = xgetc(0)) {
364185029Spjd	case 0:
365185029Spjd	    break;
366185029Spjd	case '\177':
367185029Spjd	case '\b':
368185029Spjd	    if (s > cmd) {
369185029Spjd		s--;
370185029Spjd		printf("\b \b");
371185029Spjd	    }
372185029Spjd	    break;
373185029Spjd	case '\n':
374185029Spjd	case '\r':
375185029Spjd	    *s = 0;
376185029Spjd	    return;
377185029Spjd	default:
378185029Spjd	    if (s - cmd < sizeof(cmd) - 1)
379185029Spjd		*s++ = c;
380185029Spjd	    putchar(c);
381185029Spjd	}
382185029Spjd    }
383185029Spjd}
384185029Spjd
385185029Spjdstatic inline void
386185029Spjdputc(int c)
387185029Spjd{
388185029Spjd    v86.addr = 0x10;
389185029Spjd    v86.eax = 0xe00 | (c & 0xff);
390185029Spjd    v86.ebx = 0x7;
391185029Spjd    v86int();
392185029Spjd}
393185029Spjd
394185029Spjd/*
395185029Spjd * Try to detect a device supported by the legacy int13 BIOS
396185029Spjd */
397185029Spjdstatic int
398185029Spjdint13probe(int drive)
399185029Spjd{
400185029Spjd    v86.ctl = V86_FLAGS;
401185029Spjd    v86.addr = 0x13;
402185029Spjd    v86.eax = 0x800;
403185029Spjd    v86.edx = drive;
404185029Spjd    v86int();
405185029Spjd
406185029Spjd    if (!(v86.efl & 0x1) &&				/* carry clear */
407185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
408185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
409185029Spjd		return(0);				/* skip device */
410185029Spjd	}
411185029Spjd	return (1);
412185029Spjd    }
413185029Spjd    return(0);
414185029Spjd}
415185029Spjd
416192194Sdfr/*
417192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
418192194Sdfr * structure so we must make a new one.
419192194Sdfr */
420192194Sdfrstatic struct dsk *
421192194Sdfrcopy_dsk(struct dsk *dsk)
422192194Sdfr{
423192194Sdfr    struct dsk *newdsk;
424192194Sdfr
425192194Sdfr    newdsk = malloc(sizeof(struct dsk));
426192194Sdfr    *newdsk = *dsk;
427192194Sdfr    return (newdsk);
428192194Sdfr}
429192194Sdfr
430185029Spjdstatic void
431185029Spjdprobe_drive(struct dsk *dsk, spa_t **spap)
432185029Spjd{
433185096Sdfr#ifdef GPT
434185096Sdfr    struct gpt_hdr hdr;
435185096Sdfr    struct gpt_ent *ent;
436185096Sdfr    daddr_t slba, elba;
437185096Sdfr    unsigned part, entries_per_sec;
438185096Sdfr#endif
439185029Spjd    struct dos_partition *dp;
440185029Spjd    char *sec;
441185029Spjd    unsigned i;
442185029Spjd
443185029Spjd    /*
444185029Spjd     * If we find a vdev on the whole disk, stop here. Otherwise dig
445185029Spjd     * out the MBR and probe each slice in turn for a vdev.
446185029Spjd     */
447185029Spjd    if (vdev_probe(vdev_read, dsk, spap) == 0)
448185029Spjd	return;
449185029Spjd
450185029Spjd    sec = dmadat->secbuf;
451185029Spjd    dsk->start = 0;
452185096Sdfr
453185096Sdfr#ifdef GPT
454185096Sdfr    /*
455185096Sdfr     * First check for GPT.
456185096Sdfr     */
457185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
458185096Sdfr	return;
459185096Sdfr    }
460185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
461185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
462185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
463185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
464185096Sdfr	goto trymbr;
465185096Sdfr    }
466185096Sdfr
467185096Sdfr    /*
468185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
469185096Sdfr     * return the spa_t for the first we find (if requested). This
470185096Sdfr     * will have the effect of booting from the first pool on the
471185096Sdfr     * disk.
472185096Sdfr     */
473185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
474185096Sdfr    slba = hdr.hdr_lba_table;
475185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
476185096Sdfr    while (slba < elba) {
477198420Srnoland	dsk->start = 0;
478185096Sdfr	if (drvread(dsk, sec, slba, 1))
479185096Sdfr	    return;
480185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
481185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
482185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
483185096Sdfr		     sizeof(uuid_t)) == 0) {
484185096Sdfr		dsk->start = ent->ent_lba_start;
485185096Sdfr		if (vdev_probe(vdev_read, dsk, spap) == 0) {
486185096Sdfr		    /*
487185096Sdfr		     * We record the first pool we find (we will try
488192194Sdfr		     * to boot from that one).
489185096Sdfr		     */
490185096Sdfr		    spap = 0;
491185096Sdfr
492185096Sdfr		    /*
493185096Sdfr		     * This slice had a vdev. We need a new dsk
494185096Sdfr		     * structure now since the vdev now owns this one.
495185096Sdfr		     */
496192194Sdfr		    dsk = copy_dsk(dsk);
497185096Sdfr		}
498185096Sdfr	    }
499185096Sdfr	}
500185096Sdfr	slba++;
501185096Sdfr    }
502185096Sdfr    return;
503185096Sdfrtrymbr:
504185096Sdfr#endif
505185096Sdfr
506185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
507185029Spjd	return;
508185029Spjd    dp = (void *)(sec + DOSPARTOFF);
509185029Spjd
510185029Spjd    for (i = 0; i < NDOSPART; i++) {
511185029Spjd	if (!dp[i].dp_typ)
512185029Spjd	    continue;
513185029Spjd	dsk->start = dp[i].dp_start;
514185029Spjd	if (vdev_probe(vdev_read, dsk, spap) == 0) {
515185029Spjd	    /*
516185029Spjd	     * We record the first pool we find (we will try to boot
517185029Spjd	     * from that one.
518185029Spjd	     */
519185029Spjd	    spap = 0;
520185029Spjd
521185029Spjd	    /*
522185029Spjd	     * This slice had a vdev. We need a new dsk structure now
523185096Sdfr	     * since the vdev now owns this one.
524185029Spjd	     */
525192194Sdfr	    dsk = copy_dsk(dsk);
526185029Spjd	}
527185029Spjd    }
528185029Spjd}
529185029Spjd
530185029Spjdint
531185029Spjdmain(void)
532185029Spjd{
533185029Spjd    int autoboot, i;
534185029Spjd    dnode_phys_t dn;
535185029Spjd    off_t off;
536185029Spjd    struct dsk *dsk;
537185029Spjd
538185029Spjd    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
539185029Spjd    v86.ctl = V86_FLAGS;
540185029Spjd
541185029Spjd    dsk = malloc(sizeof(struct dsk));
542185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
543185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
544185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
545185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
546185029Spjd    dsk->part = 0;
547185029Spjd    dsk->start = 0;
548185029Spjd    dsk->init = 0;
549185029Spjd
550185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
551185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
552185029Spjd    bootinfo.bi_basemem = 0;	/* XXX will be filled by loader or kernel */
553185029Spjd    bootinfo.bi_extmem = memsize();
554185029Spjd    bootinfo.bi_memsizes_valid++;
555185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
556185029Spjd
557185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
558185029Spjd			  dsk->slice, dsk->unit, dsk->part),
559185029Spjd
560185029Spjd    /* Process configuration file */
561185029Spjd
562185029Spjd    autoboot = 1;
563185029Spjd
564185029Spjd    zfs_init();
565185029Spjd
566185029Spjd    /*
567185029Spjd     * Probe the boot drive first - we will try to boot from whatever
568185029Spjd     * pool we find on that drive.
569185029Spjd     */
570185029Spjd    probe_drive(dsk, &spa);
571185029Spjd
572185029Spjd    /*
573185029Spjd     * Probe the rest of the drives that the bios knows about. This
574185029Spjd     * will find any other available pools and it may fill in missing
575185029Spjd     * vdevs for the boot pool.
576185029Spjd     */
577192194Sdfr    for (i = 0; i < 128; i++) {
578185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
579185029Spjd	    continue;
580185029Spjd
581192194Sdfr	if (!int13probe(i | DRV_HARD))
582192194Sdfr	    break;
583192194Sdfr
584185029Spjd	dsk = malloc(sizeof(struct dsk));
585185029Spjd	dsk->drive = i | DRV_HARD;
586185029Spjd	dsk->type = dsk->drive & TYPE_AD;
587185029Spjd	dsk->unit = i;
588185029Spjd	dsk->slice = 0;
589185029Spjd	dsk->part = 0;
590185029Spjd	dsk->start = 0;
591185029Spjd	dsk->init = 0;
592185029Spjd	probe_drive(dsk, 0);
593185029Spjd    }
594185029Spjd
595185029Spjd    /*
596185029Spjd     * If we didn't find a pool on the boot drive, default to the
597185029Spjd     * first pool we found, if any.
598185029Spjd     */
599185029Spjd    if (!spa) {
600185029Spjd	spa = STAILQ_FIRST(&zfs_pools);
601185029Spjd	if (!spa) {
602185029Spjd	    printf("No ZFS pools located, can't boot\n");
603185029Spjd	    for (;;)
604185029Spjd		;
605185029Spjd	}
606185029Spjd    }
607185029Spjd
608185029Spjd    zfs_mount_pool(spa);
609185029Spjd
610185029Spjd    if (zfs_lookup(spa, PATH_CONFIG, &dn) == 0) {
611185029Spjd	off = 0;
612198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
613185029Spjd    }
614185029Spjd
615185029Spjd    if (*cmd) {
616185029Spjd	if (parse())
617185029Spjd	    autoboot = 0;
618185029Spjd	if (!OPT_CHECK(RBX_QUIET))
619185029Spjd	    printf("%s: %s", PATH_CONFIG, cmd);
620185029Spjd	/* Do not process this command twice */
621185029Spjd	*cmd = 0;
622185029Spjd    }
623185029Spjd
624185029Spjd    /*
625185029Spjd     * Try to exec stage 3 boot loader. If interrupted by a keypress,
626185029Spjd     * or in case of failure, try to load a kernel directly instead.
627185029Spjd     */
628185029Spjd
629185029Spjd    if (autoboot && !*kname) {
630185029Spjd	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
631185029Spjd	if (!keyhit(3*SECOND)) {
632185029Spjd	    load();
633185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
634185029Spjd	}
635185029Spjd    }
636185029Spjd
637185029Spjd    /* Present the user with the boot2 prompt. */
638185029Spjd
639185029Spjd    for (;;) {
640185029Spjd	if (!autoboot || !OPT_CHECK(RBX_QUIET))
641185029Spjd	    printf("\nFreeBSD/i386 boot\n"
642185029Spjd		   "Default: %s:%s\n"
643185029Spjd		   "boot: ",
644185029Spjd		   spa->spa_name, kname);
645185029Spjd	if (ioctrl & IO_SERIAL)
646185029Spjd	    sio_flush();
647185029Spjd	if (!autoboot || keyhit(5*SECOND))
648185029Spjd	    getstr();
649185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
650185029Spjd	    putchar('\n');
651185029Spjd	autoboot = 0;
652185029Spjd	if (parse())
653185029Spjd	    putchar('\a');
654185029Spjd	else
655185029Spjd	    load();
656185029Spjd    }
657185029Spjd}
658185029Spjd
659185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
660185029Spjdvoid
661185029Spjdexit(int x)
662185029Spjd{
663185029Spjd}
664185029Spjd
665185029Spjdstatic void
666185029Spjdload(void)
667185029Spjd{
668185029Spjd    union {
669185029Spjd	struct exec ex;
670185029Spjd	Elf32_Ehdr eh;
671185029Spjd    } hdr;
672185029Spjd    static Elf32_Phdr ep[2];
673185029Spjd    static Elf32_Shdr es[2];
674185029Spjd    caddr_t p;
675185029Spjd    dnode_phys_t dn;
676185029Spjd    off_t off;
677185029Spjd    uint32_t addr, x;
678185029Spjd    int fmt, i, j;
679185029Spjd
680185029Spjd    if (zfs_lookup(spa, kname, &dn)) {
681185029Spjd	return;
682185029Spjd    }
683185029Spjd    off = 0;
684185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
685185029Spjd	return;
686185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
687185029Spjd	fmt = 0;
688185029Spjd    else if (IS_ELF(hdr.eh))
689185029Spjd	fmt = 1;
690185029Spjd    else {
691185029Spjd	printf("Invalid %s\n", "format");
692185029Spjd	return;
693185029Spjd    }
694185029Spjd    if (fmt == 0) {
695185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
696185029Spjd	p = PTOV(addr);
697185029Spjd	off = PAGE_SIZE;
698185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
699185029Spjd	    return;
700185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
701185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
702185029Spjd	    return;
703185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
704185029Spjd	bootinfo.bi_symtab = VTOP(p);
705185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
706185029Spjd	p += sizeof(hdr.ex.a_syms);
707185029Spjd	if (hdr.ex.a_syms) {
708185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
709185029Spjd		return;
710185029Spjd	    p += hdr.ex.a_syms;
711185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
712185029Spjd		return;
713185029Spjd	    x = *(uint32_t *)p;
714185029Spjd	    p += sizeof(int);
715185029Spjd	    x -= sizeof(int);
716185029Spjd	    if (xfsread(&dn, &off, p, x))
717185029Spjd		return;
718185029Spjd	    p += x;
719185029Spjd	}
720185029Spjd    } else {
721185029Spjd	off = hdr.eh.e_phoff;
722185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
723185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
724185029Spjd		return;
725185029Spjd	    if (ep[j].p_type == PT_LOAD)
726185029Spjd		j++;
727185029Spjd	}
728185029Spjd	for (i = 0; i < 2; i++) {
729185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
730185029Spjd	    off = ep[i].p_offset;
731185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
732185029Spjd		return;
733185029Spjd	}
734185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
735185029Spjd	bootinfo.bi_symtab = VTOP(p);
736185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
737185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
738185029Spjd		(hdr.eh.e_shstrndx + 1);
739185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
740185029Spjd		return;
741185029Spjd	    for (i = 0; i < 2; i++) {
742185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
743185029Spjd		p += sizeof(es[i].sh_size);
744185029Spjd		off = es[i].sh_offset;
745185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
746185029Spjd		    return;
747185029Spjd		p += es[i].sh_size;
748185029Spjd	    }
749185029Spjd	}
750185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
751185029Spjd    }
752185029Spjd    bootinfo.bi_esymtab = VTOP(p);
753185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
754185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
755185029Spjd	   bootdev,
756185029Spjd	   KARGS_FLAGS_ZFS,
757185029Spjd	   (uint32_t) spa->spa_guid,
758185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
759185029Spjd	   VTOP(&bootinfo));
760185029Spjd}
761185029Spjd
762185029Spjdstatic int
763185029Spjdparse()
764185029Spjd{
765185029Spjd    char *arg = cmd;
766185029Spjd    char *ep, *p, *q;
767185029Spjd    const char *cp;
768185029Spjd    //unsigned int drv;
769185029Spjd    int c, i, j;
770185029Spjd
771185029Spjd    while ((c = *arg++)) {
772185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
773185029Spjd	    continue;
774185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
775185029Spjd	ep = p;
776185029Spjd	if (*p)
777185029Spjd	    *p++ = 0;
778185029Spjd	if (c == '-') {
779185029Spjd	    while ((c = *arg++)) {
780185029Spjd		if (c == 'P') {
781185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
782185029Spjd			cp = "yes";
783185029Spjd		    } else {
784185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
785185029Spjd			cp = "no";
786185029Spjd		    }
787185029Spjd		    printf("Keyboard: %s\n", cp);
788185029Spjd		    continue;
789185029Spjd		} else if (c == 'S') {
790185029Spjd		    j = 0;
791185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
792185029Spjd			j = j * 10 + i;
793185029Spjd		    if (j > 0 && i == -'0') {
794185029Spjd			comspeed = j;
795185029Spjd			break;
796185029Spjd		    }
797185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
798185029Spjd		}
799185029Spjd		for (i = 0; c != optstr[i]; i++)
800185029Spjd		    if (i == NOPT - 1)
801185029Spjd			return -1;
802185029Spjd		opts ^= OPT_SET(flags[i]);
803185029Spjd	    }
804185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
805185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
806185029Spjd	    if (ioctrl & IO_SERIAL)
807185029Spjd	        sio_init(115200 / comspeed);
808185029Spjd	} if (c == '?') {
809185029Spjd	    dnode_phys_t dn;
810185029Spjd
811185029Spjd	    if (zfs_lookup(spa, arg, &dn) == 0) {
812185029Spjd		zap_list(spa, &dn);
813185029Spjd	    }
814185029Spjd	    return -1;
815185029Spjd	} else {
816185029Spjd	    arg--;
817185029Spjd
818185029Spjd	    /*
819185029Spjd	     * Report pool status if the comment is 'status'. Lets
820185029Spjd	     * hope no-one wants to load /status as a kernel.
821185029Spjd	     */
822185029Spjd	    if (!strcmp(arg, "status")) {
823185029Spjd		spa_all_status();
824185029Spjd		return -1;
825185029Spjd	    }
826185029Spjd
827185029Spjd	    /*
828185029Spjd	     * If there is a colon, switch pools.
829185029Spjd	     */
830185029Spjd	    q = (char *) strchr(arg, ':');
831185029Spjd	    if (q) {
832185029Spjd		spa_t *newspa;
833185029Spjd
834185029Spjd		*q++ = 0;
835185029Spjd		newspa = spa_find_by_name(arg);
836185029Spjd		if (newspa) {
837185029Spjd		    spa = newspa;
838185029Spjd		    zfs_mount_pool(spa);
839185029Spjd		} else {
840185029Spjd		    printf("\nCan't find ZFS pool %s\n", arg);
841185029Spjd		    return -1;
842185029Spjd		}
843185029Spjd		arg = q;
844185029Spjd	    }
845185029Spjd	    if ((i = ep - arg)) {
846185029Spjd		if ((size_t)i >= sizeof(kname))
847185029Spjd		    return -1;
848185029Spjd		memcpy(kname, arg, i + 1);
849185029Spjd	    }
850185029Spjd	}
851185029Spjd	arg = p;
852185029Spjd    }
853185029Spjd    return 0;
854185029Spjd}
855185029Spjd
856185029Spjdstatic void
857185029Spjdprintf(const char *fmt,...)
858185029Spjd{
859185029Spjd    va_list ap;
860198420Srnoland    char buf[20];
861185029Spjd    char *s;
862198420Srnoland    unsigned long long u;
863185029Spjd    int c;
864185029Spjd    int minus;
865185029Spjd    int prec;
866198420Srnoland    int l;
867185029Spjd    int len;
868185029Spjd    int pad;
869185029Spjd
870185029Spjd    va_start(ap, fmt);
871185029Spjd    while ((c = *fmt++)) {
872185029Spjd	if (c == '%') {
873185029Spjd	    minus = 0;
874185029Spjd	    prec = 0;
875198420Srnoland	    l = 0;
876185029Spjd	nextfmt:
877185029Spjd	    c = *fmt++;
878185029Spjd	    switch (c) {
879185029Spjd	    case '-':
880185029Spjd		minus = 1;
881185029Spjd		goto nextfmt;
882185029Spjd	    case '0':
883185029Spjd	    case '1':
884185029Spjd	    case '2':
885185029Spjd	    case '3':
886185029Spjd	    case '4':
887185029Spjd	    case '5':
888185029Spjd	    case '6':
889185029Spjd	    case '7':
890185029Spjd	    case '8':
891185029Spjd	    case '9':
892185029Spjd		prec = 10 * prec + (c - '0');
893185029Spjd		goto nextfmt;
894185029Spjd	    case 'c':
895185029Spjd		putchar(va_arg(ap, int));
896185029Spjd		continue;
897198420Srnoland	    case 'l':
898198420Srnoland		l++;
899198420Srnoland		goto nextfmt;
900185029Spjd	    case 's':
901185029Spjd		s = va_arg(ap, char *);
902185029Spjd		if (prec) {
903185029Spjd		    len = strlen(s);
904185029Spjd		    if (len < prec)
905185029Spjd			pad = prec - len;
906185029Spjd		    else
907185029Spjd			pad = 0;
908185029Spjd		    if (minus)
909185029Spjd			while (pad--)
910185029Spjd			    putchar(' ');
911185029Spjd		    for (; *s; s++)
912185029Spjd			putchar(*s);
913185029Spjd		    if (!minus)
914185029Spjd			while (pad--)
915185029Spjd			    putchar(' ');
916185029Spjd		} else {
917185029Spjd		    for (; *s; s++)
918185029Spjd			putchar(*s);
919185029Spjd		}
920185029Spjd		continue;
921185029Spjd	    case 'u':
922198420Srnoland		switch (l) {
923198420Srnoland		case 2:
924198420Srnoland		    u = va_arg(ap, unsigned long long);
925198420Srnoland		    break;
926198420Srnoland		case 1:
927198420Srnoland		    u = va_arg(ap, unsigned long);
928198420Srnoland		    break;
929198420Srnoland		default:
930198420Srnoland		    u = va_arg(ap, unsigned);
931198420Srnoland		    break;
932198420Srnoland		}
933185029Spjd		s = buf;
934185029Spjd		do
935185029Spjd		    *s++ = '0' + u % 10U;
936185029Spjd		while (u /= 10U);
937185029Spjd		while (--s >= buf)
938185029Spjd		    putchar(*s);
939185029Spjd		continue;
940185029Spjd	    }
941185029Spjd	}
942185029Spjd	putchar(c);
943185029Spjd    }
944185029Spjd    va_end(ap);
945185029Spjd    return;
946185029Spjd}
947185029Spjd
948185029Spjdstatic void
949185029Spjdputchar(int c)
950185029Spjd{
951185029Spjd    if (c == '\n')
952185029Spjd	xputc('\r');
953185029Spjd    xputc(c);
954185029Spjd}
955185029Spjd
956185096Sdfr#ifdef GPT
957185096Sdfrstatic struct {
958185096Sdfr	uint16_t len;
959185096Sdfr	uint16_t count;
960185096Sdfr	uint16_t seg;
961185096Sdfr	uint16_t off;
962185096Sdfr	uint64_t lba;
963185096Sdfr} packet;
964185096Sdfr#endif
965185096Sdfr
966185029Spjdstatic int
967185029Spjddrvread(struct dsk *dsk, void *buf, unsigned lba, unsigned nblk)
968185029Spjd{
969185096Sdfr#ifdef GPT
970192194Sdfr    static unsigned c = 0x2d5c7c2f;
971185096Sdfr
972185096Sdfr    if (!OPT_CHECK(RBX_QUIET))
973185096Sdfr	printf("%c\b", c = c << 8 | c >> 24);
974185096Sdfr    packet.len = 0x10;
975185096Sdfr    packet.count = nblk;
976185096Sdfr    packet.seg = VTOPOFF(buf);
977185096Sdfr    packet.off = VTOPSEG(buf);
978185096Sdfr    packet.lba = lba + dsk->start;
979185096Sdfr    v86.ctl = V86_FLAGS;
980185096Sdfr    v86.addr = 0x13;
981185096Sdfr    v86.eax = 0x4200;
982185096Sdfr    v86.edx = dsk->drive;
983185096Sdfr    v86.ds = VTOPSEG(&packet);
984185096Sdfr    v86.esi = VTOPOFF(&packet);
985185096Sdfr    v86int();
986185096Sdfr    if (V86_CY(v86.efl)) {
987185096Sdfr	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
988185096Sdfr	return -1;
989185096Sdfr    }
990185096Sdfr    return 0;
991185096Sdfr#else
992185029Spjd    static unsigned c = 0x2d5c7c2f;
993185029Spjd
994185029Spjd    lba += dsk->start;
995185029Spjd    if (!OPT_CHECK(RBX_QUIET))
996185029Spjd	printf("%c\b", c = c << 8 | c >> 24);
997185029Spjd    v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS;
998185029Spjd    v86.addr = XREADORG;		/* call to xread in boot1 */
999185029Spjd    v86.es = VTOPSEG(buf);
1000185029Spjd    v86.eax = lba;
1001185029Spjd    v86.ebx = VTOPOFF(buf);
1002185029Spjd    v86.ecx = lba >> 16;
1003185029Spjd    v86.edx = nblk << 8 | dsk->drive;
1004185029Spjd    v86int();
1005185029Spjd    v86.ctl = V86_FLAGS;
1006185029Spjd    if (V86_CY(v86.efl)) {
1007185029Spjd	printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba);
1008185029Spjd	return -1;
1009185029Spjd    }
1010185029Spjd    return 0;
1011185096Sdfr#endif
1012185029Spjd}
1013185029Spjd
1014185029Spjdstatic int
1015185029Spjdkeyhit(unsigned ticks)
1016185029Spjd{
1017185029Spjd    uint32_t t0, t1;
1018185029Spjd
1019185029Spjd    if (OPT_CHECK(RBX_NOINTR))
1020185029Spjd	return 0;
1021185029Spjd    t0 = 0;
1022185029Spjd    for (;;) {
1023185029Spjd	if (xgetc(1))
1024185029Spjd	    return 1;
1025185029Spjd	t1 = *(uint32_t *)PTOV(0x46c);
1026185029Spjd	if (!t0)
1027185029Spjd	    t0 = t1;
1028185029Spjd	if (t1 < t0 || t1 >= t0 + ticks)
1029185029Spjd	    return 0;
1030185029Spjd    }
1031185029Spjd}
1032185029Spjd
1033185029Spjdstatic int
1034185029Spjdxputc(int c)
1035185029Spjd{
1036185029Spjd    if (ioctrl & IO_KEYBOARD)
1037185029Spjd	putc(c);
1038185029Spjd    if (ioctrl & IO_SERIAL)
1039185029Spjd	sio_putc(c);
1040185029Spjd    return c;
1041185029Spjd}
1042185029Spjd
1043185029Spjdstatic int
1044185029Spjdxgetc(int fn)
1045185029Spjd{
1046185029Spjd    if (OPT_CHECK(RBX_NOINTR))
1047185029Spjd	return 0;
1048185029Spjd    for (;;) {
1049185029Spjd	if (ioctrl & IO_KEYBOARD && getc(1))
1050185029Spjd	    return fn ? 1 : getc(0);
1051185029Spjd	if (ioctrl & IO_SERIAL && sio_ischar())
1052185029Spjd	    return fn ? 1 : sio_getc();
1053185029Spjd	if (fn)
1054185029Spjd	    return 0;
1055185029Spjd    }
1056185029Spjd}
1057185029Spjd
1058185029Spjdstatic int
1059185029Spjdgetc(int fn)
1060185029Spjd{
1061185029Spjd    /*
1062185029Spjd     * The extra comparison against zero is an attempt to work around
1063185029Spjd     * what appears to be a bug in QEMU and Bochs. Both emulators
1064185029Spjd     * sometimes report a key-press with scancode one and ascii zero
1065185029Spjd     * when no such key is pressed in reality. As far as I can tell,
1066185029Spjd     * this only happens shortly after a reboot.
1067185029Spjd     */
1068185029Spjd    v86.addr = 0x16;
1069185029Spjd    v86.eax = fn << 8;
1070185029Spjd    v86int();
1071185029Spjd    return fn == 0 ? v86.eax & 0xff : (!V86_ZR(v86.efl) && (v86.eax & 0xff));
1072185029Spjd}
1073