zfsboot.c revision 241288
1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 241288 2012-10-06 19:38:33Z avg $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30200309Sjhb#include <machine/pc/bios.h>
31185029Spjd
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd
35185029Spjd#include <a.out.h>
36185029Spjd
37185029Spjd#include <btxv86.h>
38185029Spjd
39185029Spjd#include "lib.h"
40213136Spjd#include "rbx.h"
41213136Spjd#include "drv.h"
42213136Spjd#include "util.h"
43213136Spjd#include "cons.h"
44235154Savg#include "bootargs.h"
45185029Spjd
46235329Savg#include "libzfs.h"
47235329Savg
48226506Sdes#define PATH_DOTCONFIG	"/boot.config"
49226506Sdes#define PATH_CONFIG	"/boot/config"
50199714Srnoland#define PATH_BOOT3	"/boot/zfsloader"
51185029Spjd#define PATH_KERNEL	"/boot/kernel/kernel"
52185029Spjd
53185029Spjd#define ARGS		0x900
54185029Spjd#define NOPT		14
55185029Spjd#define NDEV		3
56185029Spjd
57212805Spjd#define BIOS_NUMDRIVES	0x475
58185029Spjd#define DRV_HARD	0x80
59185029Spjd#define DRV_MASK	0x7f
60185029Spjd
61185029Spjd#define TYPE_AD		0
62185029Spjd#define TYPE_DA		1
63185029Spjd#define TYPE_MAXHARD	TYPE_DA
64185029Spjd#define TYPE_FD		2
65185029Spjd
66185029Spjdextern uint32_t _end;
67185029Spjd
68185096Sdfr#ifdef GPT
69185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
70185096Sdfr#endif
71185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
72185029Spjdstatic const unsigned char flags[NOPT] = {
73185029Spjd    RBX_DUAL,
74185029Spjd    RBX_SERIAL,
75185029Spjd    RBX_ASKNAME,
76185029Spjd    RBX_CDROM,
77185029Spjd    RBX_CONFIG,
78185029Spjd    RBX_KDB,
79185029Spjd    RBX_GDB,
80185029Spjd    RBX_MUTE,
81185029Spjd    RBX_NOINTR,
82185029Spjd    RBX_PAUSE,
83185029Spjd    RBX_QUIET,
84185029Spjd    RBX_DFLTROOT,
85185029Spjd    RBX_SINGLE,
86185029Spjd    RBX_VERBOSE
87185029Spjd};
88213136Spjduint32_t opts;
89185029Spjd
90185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
91185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
92185029Spjd
93185029Spjdstatic char cmd[512];
94234339Savgstatic char cmddup[512];
95185029Spjdstatic char kname[1024];
96235329Savgstatic char rootname[256];
97185029Spjdstatic int comspeed = SIOSPD;
98185029Spjdstatic struct bootinfo bootinfo;
99185029Spjdstatic uint32_t bootdev;
100235329Savgstatic struct zfs_boot_args zfsargs;
101235329Savgstatic struct zfsmount zfsmount;
102185029Spjd
103200309Sjhbvm_offset_t	high_heap_base;
104200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
105200309Sjhb
106200309Sjhbstatic struct bios_smap smap;
107200309Sjhb
108200309Sjhb/*
109200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
110200309Sjhb */
111200309Sjhb#define	HEAP_MIN	(3 * 1024 * 1024)
112200309Sjhb
113200309Sjhbstatic char *heap_next;
114200309Sjhbstatic char *heap_end;
115200309Sjhb
116185029Spjd/* Buffers that must not span a 64k boundary. */
117185029Spjd#define READ_BUF_SIZE	8192
118185029Spjdstruct dmadat {
119185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
120185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
121185029Spjd};
122185029Spjdstatic struct dmadat *dmadat;
123185029Spjd
124185029Spjdvoid exit(int);
125185029Spjdstatic void load(void);
126185029Spjdstatic int parse(void);
127200309Sjhbstatic void bios_getmem(void);
128185029Spjd
129185029Spjdstatic void *
130185029Spjdmalloc(size_t n)
131185029Spjd{
132185029Spjd	char *p = heap_next;
133185029Spjd	if (p + n > heap_end) {
134185029Spjd		printf("malloc failure\n");
135185029Spjd		for (;;)
136185029Spjd		    ;
137185029Spjd		return 0;
138185029Spjd	}
139185029Spjd	heap_next += n;
140185029Spjd	return p;
141185029Spjd}
142185029Spjd
143185029Spjdstatic char *
144185029Spjdstrdup(const char *s)
145185029Spjd{
146185029Spjd	char *p = malloc(strlen(s) + 1);
147185029Spjd	strcpy(p, s);
148185029Spjd	return p;
149185029Spjd}
150185029Spjd
151185029Spjd#include "zfsimpl.c"
152185029Spjd
153185029Spjd/*
154185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
155185029Spjd */
156185029Spjdstatic int
157185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
158185029Spjd{
159185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
160185029Spjd	size_t n;
161185029Spjd	int rc;
162185029Spjd
163185029Spjd	n = size;
164185029Spjd	if (*offp + n > zp->zp_size)
165185029Spjd		n = zp->zp_size - *offp;
166185029Spjd
167185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
168185029Spjd	if (rc)
169185029Spjd		return (-1);
170185029Spjd	*offp += n;
171185029Spjd
172185029Spjd	return (n);
173185029Spjd}
174185029Spjd
175185029Spjd/*
176185029Spjd * Current ZFS pool
177185029Spjd */
178235329Savgstatic spa_t *spa;
179185029Spjd
180185029Spjd/*
181185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
182185029Spjd * buffer pointer crosses a 64k boundary.
183185029Spjd */
184185029Spjdstatic int
185185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
186185029Spjd{
187185029Spjd	char *p;
188199579Sjhb	daddr_t lba;
189199579Sjhb	unsigned int nb;
190185029Spjd	struct dsk *dsk = (struct dsk *) priv;
191185029Spjd
192185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
193185029Spjd		return -1;
194185029Spjd
195185029Spjd	p = buf;
196185029Spjd	lba = off / DEV_BSIZE;
197213136Spjd	lba += dsk->start;
198185029Spjd	while (bytes > 0) {
199185029Spjd		nb = bytes / DEV_BSIZE;
200185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
201185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
202185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
203185029Spjd			return -1;
204185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
205185029Spjd		p += nb * DEV_BSIZE;
206185029Spjd		lba += nb;
207185029Spjd		bytes -= nb * DEV_BSIZE;
208185029Spjd	}
209185029Spjd
210185029Spjd	return 0;
211185029Spjd}
212185029Spjd
213185029Spjdstatic int
214185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
215185029Spjd{
216185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
217235329Savg	printf("Invalid format\n");
218185029Spjd	return -1;
219185029Spjd    }
220185029Spjd    return 0;
221185029Spjd}
222185029Spjd
223200309Sjhbstatic void
224200309Sjhbbios_getmem(void)
225185029Spjd{
226200309Sjhb    uint64_t size;
227185029Spjd
228200309Sjhb    /* Parse system memory map */
229200309Sjhb    v86.ebx = 0;
230200309Sjhb    do {
231200309Sjhb	v86.ctl = V86_FLAGS;
232200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
233200309Sjhb	v86.eax = 0xe820;
234200309Sjhb	v86.ecx = sizeof(struct bios_smap);
235200309Sjhb	v86.edx = SMAP_SIG;
236200309Sjhb	v86.es = VTOPSEG(&smap);
237200309Sjhb	v86.edi = VTOPOFF(&smap);
238200309Sjhb	v86int();
239200309Sjhb	if ((v86.efl & 1) || (v86.eax != SMAP_SIG))
240200309Sjhb	    break;
241200309Sjhb	/* look for a low-memory segment that's large enough */
242200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
243200309Sjhb	    (smap.length >= (512 * 1024)))
244200309Sjhb	    bios_basemem = smap.length;
245200309Sjhb	/* look for the first segment in 'extended' memory */
246200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
247200309Sjhb	    bios_extmem = smap.length;
248200309Sjhb	}
249200309Sjhb
250200309Sjhb	/*
251200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
252200309Sjhb	 * 1MB but below 4GB.
253200309Sjhb	 */
254200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
255200309Sjhb	    (smap.base < 0x100000000ull)) {
256200309Sjhb	    size = smap.length;
257200309Sjhb
258200309Sjhb	    /*
259200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
260200309Sjhb	     */
261200309Sjhb	    if (smap.base + size > 0x100000000ull)
262200309Sjhb		size = 0x100000000ull - smap.base;
263200309Sjhb
264200309Sjhb	    if (size > high_heap_size) {
265200309Sjhb		high_heap_size = size;
266200309Sjhb		high_heap_base = smap.base;
267200309Sjhb	    }
268200309Sjhb	}
269200309Sjhb    } while (v86.ebx != 0);
270200309Sjhb
271200309Sjhb    /* Fall back to the old compatibility function for base memory */
272200309Sjhb    if (bios_basemem == 0) {
273200309Sjhb	v86.ctl = 0;
274200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
275200309Sjhb	v86int();
276200309Sjhb
277200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
278200309Sjhb    }
279200309Sjhb
280200309Sjhb    /* Fall back through several compatibility functions for extended memory */
281200309Sjhb    if (bios_extmem == 0) {
282200309Sjhb	v86.ctl = V86_FLAGS;
283200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
284200309Sjhb	v86.eax = 0xe801;
285200309Sjhb	v86int();
286200309Sjhb	if (!(v86.efl & 1)) {
287200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
288200309Sjhb	}
289200309Sjhb    }
290200309Sjhb    if (bios_extmem == 0) {
291200309Sjhb	v86.ctl = 0;
292200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
293200309Sjhb	v86.eax = 0x8800;
294200309Sjhb	v86int();
295200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
296200309Sjhb    }
297200309Sjhb
298200309Sjhb    /*
299200309Sjhb     * If we have extended memory and did not find a suitable heap
300200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
301200309Sjhb     * high heap candidate.
302200309Sjhb     */
303200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
304200309Sjhb	high_heap_size = HEAP_MIN;
305200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
306200309Sjhb    }
307200309Sjhb}
308200309Sjhb
309185029Spjd/*
310185029Spjd * Try to detect a device supported by the legacy int13 BIOS
311185029Spjd */
312185029Spjdstatic int
313185029Spjdint13probe(int drive)
314185029Spjd{
315185029Spjd    v86.ctl = V86_FLAGS;
316185029Spjd    v86.addr = 0x13;
317185029Spjd    v86.eax = 0x800;
318185029Spjd    v86.edx = drive;
319185029Spjd    v86int();
320185029Spjd
321185029Spjd    if (!(v86.efl & 0x1) &&				/* carry clear */
322185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
323185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
324185029Spjd		return(0);				/* skip device */
325185029Spjd	}
326185029Spjd	return (1);
327185029Spjd    }
328185029Spjd    return(0);
329185029Spjd}
330185029Spjd
331192194Sdfr/*
332192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
333192194Sdfr * structure so we must make a new one.
334192194Sdfr */
335192194Sdfrstatic struct dsk *
336192194Sdfrcopy_dsk(struct dsk *dsk)
337192194Sdfr{
338192194Sdfr    struct dsk *newdsk;
339192194Sdfr
340192194Sdfr    newdsk = malloc(sizeof(struct dsk));
341192194Sdfr    *newdsk = *dsk;
342192194Sdfr    return (newdsk);
343192194Sdfr}
344192194Sdfr
345185029Spjdstatic void
346185029Spjdprobe_drive(struct dsk *dsk, spa_t **spap)
347185029Spjd{
348185096Sdfr#ifdef GPT
349185096Sdfr    struct gpt_hdr hdr;
350185096Sdfr    struct gpt_ent *ent;
351185096Sdfr    daddr_t slba, elba;
352185096Sdfr    unsigned part, entries_per_sec;
353185096Sdfr#endif
354185029Spjd    struct dos_partition *dp;
355185029Spjd    char *sec;
356185029Spjd    unsigned i;
357185029Spjd
358185029Spjd    /*
359185029Spjd     * If we find a vdev on the whole disk, stop here. Otherwise dig
360185029Spjd     * out the MBR and probe each slice in turn for a vdev.
361185029Spjd     */
362185029Spjd    if (vdev_probe(vdev_read, dsk, spap) == 0)
363185029Spjd	return;
364185029Spjd
365185029Spjd    sec = dmadat->secbuf;
366185029Spjd    dsk->start = 0;
367185096Sdfr
368185096Sdfr#ifdef GPT
369185096Sdfr    /*
370185096Sdfr     * First check for GPT.
371185096Sdfr     */
372185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
373185096Sdfr	return;
374185096Sdfr    }
375185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
376185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
377185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
378185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
379185096Sdfr	goto trymbr;
380185096Sdfr    }
381185096Sdfr
382185096Sdfr    /*
383185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
384185096Sdfr     * return the spa_t for the first we find (if requested). This
385185096Sdfr     * will have the effect of booting from the first pool on the
386185096Sdfr     * disk.
387185096Sdfr     */
388185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
389185096Sdfr    slba = hdr.hdr_lba_table;
390185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
391185096Sdfr    while (slba < elba) {
392198420Srnoland	dsk->start = 0;
393185096Sdfr	if (drvread(dsk, sec, slba, 1))
394185096Sdfr	    return;
395185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
396185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
397185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
398185096Sdfr		     sizeof(uuid_t)) == 0) {
399185096Sdfr		dsk->start = ent->ent_lba_start;
400185096Sdfr		if (vdev_probe(vdev_read, dsk, spap) == 0) {
401185096Sdfr		    /*
402185096Sdfr		     * We record the first pool we find (we will try
403192194Sdfr		     * to boot from that one).
404185096Sdfr		     */
405213136Spjd		    spap = NULL;
406185096Sdfr
407185096Sdfr		    /*
408185096Sdfr		     * This slice had a vdev. We need a new dsk
409185096Sdfr		     * structure now since the vdev now owns this one.
410185096Sdfr		     */
411192194Sdfr		    dsk = copy_dsk(dsk);
412185096Sdfr		}
413185096Sdfr	    }
414185096Sdfr	}
415185096Sdfr	slba++;
416185096Sdfr    }
417185096Sdfr    return;
418185096Sdfrtrymbr:
419185096Sdfr#endif
420185096Sdfr
421185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
422185029Spjd	return;
423185029Spjd    dp = (void *)(sec + DOSPARTOFF);
424185029Spjd
425185029Spjd    for (i = 0; i < NDOSPART; i++) {
426185029Spjd	if (!dp[i].dp_typ)
427185029Spjd	    continue;
428185029Spjd	dsk->start = dp[i].dp_start;
429185029Spjd	if (vdev_probe(vdev_read, dsk, spap) == 0) {
430185029Spjd	    /*
431185029Spjd	     * We record the first pool we find (we will try to boot
432185029Spjd	     * from that one.
433185029Spjd	     */
434185029Spjd	    spap = 0;
435185029Spjd
436185029Spjd	    /*
437185029Spjd	     * This slice had a vdev. We need a new dsk structure now
438185096Sdfr	     * since the vdev now owns this one.
439185029Spjd	     */
440192194Sdfr	    dsk = copy_dsk(dsk);
441185029Spjd	}
442185029Spjd    }
443185029Spjd}
444185029Spjd
445185029Spjdint
446185029Spjdmain(void)
447185029Spjd{
448185029Spjd    int autoboot, i;
449185029Spjd    dnode_phys_t dn;
450185029Spjd    off_t off;
451185029Spjd    struct dsk *dsk;
452185029Spjd
453208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
454208388Sjhb
455200309Sjhb    bios_getmem();
456200309Sjhb
457200309Sjhb    if (high_heap_size > 0) {
458200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
459200309Sjhb	heap_next = PTOV(high_heap_base);
460200309Sjhb    } else {
461200309Sjhb	heap_next = (char *) dmadat + sizeof(*dmadat);
462200309Sjhb	heap_end = (char *) PTOV(bios_basemem);
463200309Sjhb    }
464200309Sjhb
465185029Spjd    dsk = malloc(sizeof(struct dsk));
466185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
467185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
468185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
469185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
470185029Spjd    dsk->part = 0;
471185029Spjd    dsk->start = 0;
472185029Spjd    dsk->init = 0;
473185029Spjd
474185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
475185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
476200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
477200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
478185029Spjd    bootinfo.bi_memsizes_valid++;
479185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
480185029Spjd
481185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
482185029Spjd			  dsk->slice, dsk->unit, dsk->part),
483185029Spjd
484185029Spjd    /* Process configuration file */
485185029Spjd
486185029Spjd    autoboot = 1;
487185029Spjd
488185029Spjd    zfs_init();
489185029Spjd
490185029Spjd    /*
491185029Spjd     * Probe the boot drive first - we will try to boot from whatever
492185029Spjd     * pool we find on that drive.
493185029Spjd     */
494185029Spjd    probe_drive(dsk, &spa);
495185029Spjd
496185029Spjd    /*
497185029Spjd     * Probe the rest of the drives that the bios knows about. This
498185029Spjd     * will find any other available pools and it may fill in missing
499185029Spjd     * vdevs for the boot pool.
500185029Spjd     */
501212805Spjd#ifndef VIRTUALBOX
502212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
503212805Spjd#else
504212805Spjd    for (i = 0; i < MAXBDDEV; i++)
505212805Spjd#endif
506212805Spjd    {
507185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
508185029Spjd	    continue;
509185029Spjd
510192194Sdfr	if (!int13probe(i | DRV_HARD))
511192194Sdfr	    break;
512192194Sdfr
513185029Spjd	dsk = malloc(sizeof(struct dsk));
514185029Spjd	dsk->drive = i | DRV_HARD;
515185029Spjd	dsk->type = dsk->drive & TYPE_AD;
516185029Spjd	dsk->unit = i;
517185029Spjd	dsk->slice = 0;
518185029Spjd	dsk->part = 0;
519185029Spjd	dsk->start = 0;
520185029Spjd	dsk->init = 0;
521213136Spjd	probe_drive(dsk, NULL);
522185029Spjd    }
523185029Spjd
524185029Spjd    /*
525185029Spjd     * If we didn't find a pool on the boot drive, default to the
526185029Spjd     * first pool we found, if any.
527185029Spjd     */
528185029Spjd    if (!spa) {
529185029Spjd	spa = STAILQ_FIRST(&zfs_pools);
530185029Spjd	if (!spa) {
531213136Spjd	    printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
532185029Spjd	    for (;;)
533185029Spjd		;
534185029Spjd	}
535185029Spjd    }
536185029Spjd
537235329Savg    if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) {
538235329Savg	printf("%s: failed to mount default pool %s\n",
539235329Savg	    BOOTPROG, spa->spa_name);
540235329Savg	autoboot = 0;
541235329Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
542235329Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
543185029Spjd	off = 0;
544198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
545185029Spjd    }
546185029Spjd
547185029Spjd    if (*cmd) {
548234339Savg	/*
549234339Savg	 * Note that parse() is destructive to cmd[] and we also want
550234339Savg	 * to honor RBX_QUIET option that could be present in cmd[].
551234339Savg	 */
552234339Savg	memcpy(cmddup, cmd, sizeof(cmd));
553185029Spjd	if (parse())
554185029Spjd	    autoboot = 0;
555234339Savg	if (!OPT_CHECK(RBX_QUIET))
556241288Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
557185029Spjd	/* Do not process this command twice */
558185029Spjd	*cmd = 0;
559185029Spjd    }
560185029Spjd
561185029Spjd    /*
562185029Spjd     * Try to exec stage 3 boot loader. If interrupted by a keypress,
563185029Spjd     * or in case of failure, try to load a kernel directly instead.
564185029Spjd     */
565185029Spjd
566185029Spjd    if (autoboot && !*kname) {
567185029Spjd	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
568213136Spjd	if (!keyhit(3)) {
569185029Spjd	    load();
570185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
571185029Spjd	}
572185029Spjd    }
573185029Spjd
574185029Spjd    /* Present the user with the boot2 prompt. */
575185029Spjd
576185029Spjd    for (;;) {
577235329Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
578235329Savg	    printf("\nFreeBSD/x86 boot\n");
579235329Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
580241288Savg		printf("Default: %s/<0x%llx>:%s\n"
581235329Savg		       "boot: ",
582235329Savg		       spa->spa_name, zfsmount.rootobj, kname);
583241288Savg	    else if (rootname[0] != '\0')
584241288Savg		printf("Default: %s/%s:%s\n"
585241288Savg		       "boot: ",
586241288Savg		       spa->spa_name, rootname, kname);
587235329Savg	    else
588241288Savg		printf("Default: %s:%s\n"
589235329Savg		       "boot: ",
590241288Savg		       spa->spa_name, kname);
591235329Savg	}
592185029Spjd	if (ioctrl & IO_SERIAL)
593185029Spjd	    sio_flush();
594213136Spjd	if (!autoboot || keyhit(5))
595213136Spjd	    getstr(cmd, sizeof(cmd));
596185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
597185029Spjd	    putchar('\n');
598185029Spjd	autoboot = 0;
599185029Spjd	if (parse())
600185029Spjd	    putchar('\a');
601185029Spjd	else
602185029Spjd	    load();
603185029Spjd    }
604185029Spjd}
605185029Spjd
606185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
607185029Spjdvoid
608185029Spjdexit(int x)
609185029Spjd{
610185029Spjd}
611185029Spjd
612185029Spjdstatic void
613185029Spjdload(void)
614185029Spjd{
615185029Spjd    union {
616185029Spjd	struct exec ex;
617185029Spjd	Elf32_Ehdr eh;
618185029Spjd    } hdr;
619185029Spjd    static Elf32_Phdr ep[2];
620185029Spjd    static Elf32_Shdr es[2];
621185029Spjd    caddr_t p;
622185029Spjd    dnode_phys_t dn;
623185029Spjd    off_t off;
624185029Spjd    uint32_t addr, x;
625185029Spjd    int fmt, i, j;
626185029Spjd
627235329Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
628235329Savg	printf("\nCan't find %s\n", kname);
629185029Spjd	return;
630185029Spjd    }
631185029Spjd    off = 0;
632185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
633185029Spjd	return;
634185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
635185029Spjd	fmt = 0;
636185029Spjd    else if (IS_ELF(hdr.eh))
637185029Spjd	fmt = 1;
638185029Spjd    else {
639185029Spjd	printf("Invalid %s\n", "format");
640185029Spjd	return;
641185029Spjd    }
642185029Spjd    if (fmt == 0) {
643185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
644185029Spjd	p = PTOV(addr);
645185029Spjd	off = PAGE_SIZE;
646185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
647185029Spjd	    return;
648185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
649185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
650185029Spjd	    return;
651185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
652185029Spjd	bootinfo.bi_symtab = VTOP(p);
653185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
654185029Spjd	p += sizeof(hdr.ex.a_syms);
655185029Spjd	if (hdr.ex.a_syms) {
656185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
657185029Spjd		return;
658185029Spjd	    p += hdr.ex.a_syms;
659185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
660185029Spjd		return;
661185029Spjd	    x = *(uint32_t *)p;
662185029Spjd	    p += sizeof(int);
663185029Spjd	    x -= sizeof(int);
664185029Spjd	    if (xfsread(&dn, &off, p, x))
665185029Spjd		return;
666185029Spjd	    p += x;
667185029Spjd	}
668185029Spjd    } else {
669185029Spjd	off = hdr.eh.e_phoff;
670185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
671185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
672185029Spjd		return;
673185029Spjd	    if (ep[j].p_type == PT_LOAD)
674185029Spjd		j++;
675185029Spjd	}
676185029Spjd	for (i = 0; i < 2; i++) {
677185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
678185029Spjd	    off = ep[i].p_offset;
679185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
680185029Spjd		return;
681185029Spjd	}
682185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
683185029Spjd	bootinfo.bi_symtab = VTOP(p);
684185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
685185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
686185029Spjd		(hdr.eh.e_shstrndx + 1);
687185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
688185029Spjd		return;
689185029Spjd	    for (i = 0; i < 2; i++) {
690185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
691185029Spjd		p += sizeof(es[i].sh_size);
692185029Spjd		off = es[i].sh_offset;
693185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
694185029Spjd		    return;
695185029Spjd		p += es[i].sh_size;
696185029Spjd	    }
697185029Spjd	}
698185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
699185029Spjd    }
700185029Spjd    bootinfo.bi_esymtab = VTOP(p);
701185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
702235329Savg    zfsargs.size = sizeof(zfsargs);
703235329Savg    zfsargs.pool = zfsmount.spa->spa_guid;
704235329Savg    zfsargs.root = zfsmount.rootobj;
705185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
706185029Spjd	   bootdev,
707235329Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
708185029Spjd	   (uint32_t) spa->spa_guid,
709185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
710235329Savg	   VTOP(&bootinfo),
711235329Savg	   zfsargs);
712185029Spjd}
713185029Spjd
714185029Spjdstatic int
715241288Savgzfs_mount_ds(char *dsname)
716241288Savg{
717241288Savg    uint64_t newroot;
718241288Savg    spa_t *newspa;
719241288Savg    char *q;
720241288Savg
721241288Savg    q = strchr(dsname, '/');
722241288Savg    if (q)
723241288Savg	*q++ = '\0';
724241288Savg    newspa = spa_find_by_name(dsname);
725241288Savg    if (newspa == NULL) {
726241288Savg	printf("\nCan't find ZFS pool %s\n", dsname);
727241288Savg	return -1;
728241288Savg    }
729241288Savg
730241288Savg    if (zfs_spa_init(newspa))
731241288Savg	return -1;
732241288Savg
733241288Savg    newroot = 0;
734241288Savg    if (q) {
735241288Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
736241288Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
737241288Savg		    q, newspa->spa_name);
738241288Savg	    return -1;
739241288Savg	}
740241288Savg    }
741241288Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
742241288Savg	printf("\nCan't mount ZFS dataset\n");
743241288Savg	return -1;
744241288Savg    }
745241288Savg    spa = newspa;
746241288Savg    return (0);
747241288Savg}
748241288Savg
749241288Savgstatic int
750213136Spjdparse(void)
751185029Spjd{
752185029Spjd    char *arg = cmd;
753185029Spjd    char *ep, *p, *q;
754185029Spjd    const char *cp;
755185029Spjd    int c, i, j;
756185029Spjd
757185029Spjd    while ((c = *arg++)) {
758185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
759185029Spjd	    continue;
760185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
761185029Spjd	ep = p;
762185029Spjd	if (*p)
763185029Spjd	    *p++ = 0;
764185029Spjd	if (c == '-') {
765185029Spjd	    while ((c = *arg++)) {
766185029Spjd		if (c == 'P') {
767185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
768185029Spjd			cp = "yes";
769185029Spjd		    } else {
770185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
771185029Spjd			cp = "no";
772185029Spjd		    }
773185029Spjd		    printf("Keyboard: %s\n", cp);
774185029Spjd		    continue;
775185029Spjd		} else if (c == 'S') {
776185029Spjd		    j = 0;
777185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
778185029Spjd			j = j * 10 + i;
779185029Spjd		    if (j > 0 && i == -'0') {
780185029Spjd			comspeed = j;
781185029Spjd			break;
782185029Spjd		    }
783185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
784185029Spjd		}
785185029Spjd		for (i = 0; c != optstr[i]; i++)
786185029Spjd		    if (i == NOPT - 1)
787185029Spjd			return -1;
788185029Spjd		opts ^= OPT_SET(flags[i]);
789185029Spjd	    }
790185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
791185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
792185029Spjd	    if (ioctrl & IO_SERIAL)
793185029Spjd	        sio_init(115200 / comspeed);
794185029Spjd	} if (c == '?') {
795185029Spjd	    dnode_phys_t dn;
796185029Spjd
797235329Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
798185029Spjd		zap_list(spa, &dn);
799185029Spjd	    }
800185029Spjd	    return -1;
801185029Spjd	} else {
802185029Spjd	    arg--;
803185029Spjd
804185029Spjd	    /*
805185029Spjd	     * Report pool status if the comment is 'status'. Lets
806185029Spjd	     * hope no-one wants to load /status as a kernel.
807185029Spjd	     */
808185029Spjd	    if (!strcmp(arg, "status")) {
809185029Spjd		spa_all_status();
810185029Spjd		return -1;
811185029Spjd	    }
812185029Spjd
813185029Spjd	    /*
814241288Savg	     * If there is "zfs:" prefix simply ignore it.
815241288Savg	     */
816241288Savg	    if (strncmp(arg, "zfs:", 4) == 0)
817241288Savg		arg += 4;
818241288Savg
819241288Savg	    /*
820185029Spjd	     * If there is a colon, switch pools.
821185029Spjd	     */
822241288Savg	    q = strchr(arg, ':');
823185029Spjd	    if (q) {
824241288Savg		*q++ = '\0';
825241288Savg		if (zfs_mount_ds(arg) != 0)
826185029Spjd		    return -1;
827241288Savg		arg = q;
828185029Spjd	    }
829185029Spjd	    if ((i = ep - arg)) {
830185029Spjd		if ((size_t)i >= sizeof(kname))
831185029Spjd		    return -1;
832185029Spjd		memcpy(kname, arg, i + 1);
833185029Spjd	    }
834185029Spjd	}
835185029Spjd	arg = p;
836185029Spjd    }
837185029Spjd    return 0;
838185029Spjd}
839