1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD$");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30200309Sjhb#include <machine/pc/bios.h>
31185029Spjd
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd
35185029Spjd#include <a.out.h>
36185029Spjd
37185029Spjd#include <btxv86.h>
38185029Spjd
39185029Spjd#include "lib.h"
40213136Spjd#include "rbx.h"
41213136Spjd#include "drv.h"
42213136Spjd#include "util.h"
43213136Spjd#include "cons.h"
44237756Savg#include "bootargs.h"
45185029Spjd
46237766Savg#include "libzfs.h"
47237766Savg
48231287Sbapt#define PATH_DOTCONFIG	"/boot.config"
49231287Sbapt#define PATH_CONFIG	"/boot/config"
50199714Srnoland#define PATH_BOOT3	"/boot/zfsloader"
51185029Spjd#define PATH_KERNEL	"/boot/kernel/kernel"
52185029Spjd
53185029Spjd#define ARGS		0x900
54185029Spjd#define NOPT		14
55185029Spjd#define NDEV		3
56185029Spjd
57212805Spjd#define BIOS_NUMDRIVES	0x475
58185029Spjd#define DRV_HARD	0x80
59185029Spjd#define DRV_MASK	0x7f
60185029Spjd
61185029Spjd#define TYPE_AD		0
62185029Spjd#define TYPE_DA		1
63185029Spjd#define TYPE_MAXHARD	TYPE_DA
64185029Spjd#define TYPE_FD		2
65185029Spjd
66185029Spjdextern uint32_t _end;
67185029Spjd
68185096Sdfr#ifdef GPT
69185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
70185096Sdfr#endif
71185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
72185029Spjdstatic const unsigned char flags[NOPT] = {
73185029Spjd    RBX_DUAL,
74185029Spjd    RBX_SERIAL,
75185029Spjd    RBX_ASKNAME,
76185029Spjd    RBX_CDROM,
77185029Spjd    RBX_CONFIG,
78185029Spjd    RBX_KDB,
79185029Spjd    RBX_GDB,
80185029Spjd    RBX_MUTE,
81185029Spjd    RBX_NOINTR,
82185029Spjd    RBX_PAUSE,
83185029Spjd    RBX_QUIET,
84185029Spjd    RBX_DFLTROOT,
85185029Spjd    RBX_SINGLE,
86185029Spjd    RBX_VERBOSE
87185029Spjd};
88213136Spjduint32_t opts;
89185029Spjd
90185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
91185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
92185029Spjd
93185029Spjdstatic char cmd[512];
94234679Savgstatic char cmddup[512];
95185029Spjdstatic char kname[1024];
96237766Savgstatic char rootname[256];
97185029Spjdstatic int comspeed = SIOSPD;
98185029Spjdstatic struct bootinfo bootinfo;
99185029Spjdstatic uint32_t bootdev;
100237766Savgstatic struct zfs_boot_args zfsargs;
101237766Savgstatic struct zfsmount zfsmount;
102185029Spjd
103200309Sjhbvm_offset_t	high_heap_base;
104200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
105200309Sjhb
106200309Sjhbstatic struct bios_smap smap;
107200309Sjhb
108200309Sjhb/*
109200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
110200309Sjhb */
111200309Sjhb#define	HEAP_MIN	(3 * 1024 * 1024)
112200309Sjhb
113200309Sjhbstatic char *heap_next;
114200309Sjhbstatic char *heap_end;
115200309Sjhb
116185029Spjd/* Buffers that must not span a 64k boundary. */
117185029Spjd#define READ_BUF_SIZE	8192
118185029Spjdstruct dmadat {
119185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
120185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
121185029Spjd};
122185029Spjdstatic struct dmadat *dmadat;
123185029Spjd
124185029Spjdvoid exit(int);
125185029Spjdstatic void load(void);
126185029Spjdstatic int parse(void);
127200309Sjhbstatic void bios_getmem(void);
128185029Spjd
129185029Spjdstatic void *
130185029Spjdmalloc(size_t n)
131185029Spjd{
132185029Spjd	char *p = heap_next;
133185029Spjd	if (p + n > heap_end) {
134185029Spjd		printf("malloc failure\n");
135185029Spjd		for (;;)
136185029Spjd		    ;
137185029Spjd		return 0;
138185029Spjd	}
139185029Spjd	heap_next += n;
140185029Spjd	return p;
141185029Spjd}
142185029Spjd
143185029Spjdstatic char *
144185029Spjdstrdup(const char *s)
145185029Spjd{
146185029Spjd	char *p = malloc(strlen(s) + 1);
147185029Spjd	strcpy(p, s);
148185029Spjd	return p;
149185029Spjd}
150185029Spjd
151185029Spjd#include "zfsimpl.c"
152185029Spjd
153185029Spjd/*
154185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
155185029Spjd */
156185029Spjdstatic int
157185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
158185029Spjd{
159185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
160185029Spjd	size_t n;
161185029Spjd	int rc;
162185029Spjd
163185029Spjd	n = size;
164185029Spjd	if (*offp + n > zp->zp_size)
165185029Spjd		n = zp->zp_size - *offp;
166185029Spjd
167185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
168185029Spjd	if (rc)
169185029Spjd		return (-1);
170185029Spjd	*offp += n;
171185029Spjd
172185029Spjd	return (n);
173185029Spjd}
174185029Spjd
175185029Spjd/*
176185029Spjd * Current ZFS pool
177185029Spjd */
178237766Savgstatic spa_t *spa;
179242241Savgstatic spa_t *primary_spa;
180242241Savgstatic vdev_t *primary_vdev;
181185029Spjd
182185029Spjd/*
183185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
184185029Spjd * buffer pointer crosses a 64k boundary.
185185029Spjd */
186185029Spjdstatic int
187185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
188185029Spjd{
189185029Spjd	char *p;
190199579Sjhb	daddr_t lba;
191199579Sjhb	unsigned int nb;
192185029Spjd	struct dsk *dsk = (struct dsk *) priv;
193185029Spjd
194185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
195185029Spjd		return -1;
196185029Spjd
197185029Spjd	p = buf;
198185029Spjd	lba = off / DEV_BSIZE;
199213136Spjd	lba += dsk->start;
200185029Spjd	while (bytes > 0) {
201185029Spjd		nb = bytes / DEV_BSIZE;
202185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
203185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
204185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
205185029Spjd			return -1;
206185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
207185029Spjd		p += nb * DEV_BSIZE;
208185029Spjd		lba += nb;
209185029Spjd		bytes -= nb * DEV_BSIZE;
210185029Spjd	}
211185029Spjd
212185029Spjd	return 0;
213185029Spjd}
214185029Spjd
215185029Spjdstatic int
216185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
217185029Spjd{
218185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
219237766Savg	printf("Invalid format\n");
220185029Spjd	return -1;
221185029Spjd    }
222185029Spjd    return 0;
223185029Spjd}
224185029Spjd
225200309Sjhbstatic void
226200309Sjhbbios_getmem(void)
227185029Spjd{
228200309Sjhb    uint64_t size;
229185029Spjd
230200309Sjhb    /* Parse system memory map */
231200309Sjhb    v86.ebx = 0;
232200309Sjhb    do {
233200309Sjhb	v86.ctl = V86_FLAGS;
234200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
235200309Sjhb	v86.eax = 0xe820;
236200309Sjhb	v86.ecx = sizeof(struct bios_smap);
237200309Sjhb	v86.edx = SMAP_SIG;
238200309Sjhb	v86.es = VTOPSEG(&smap);
239200309Sjhb	v86.edi = VTOPOFF(&smap);
240200309Sjhb	v86int();
241200309Sjhb	if ((v86.efl & 1) || (v86.eax != SMAP_SIG))
242200309Sjhb	    break;
243200309Sjhb	/* look for a low-memory segment that's large enough */
244200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
245200309Sjhb	    (smap.length >= (512 * 1024)))
246200309Sjhb	    bios_basemem = smap.length;
247200309Sjhb	/* look for the first segment in 'extended' memory */
248200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
249200309Sjhb	    bios_extmem = smap.length;
250200309Sjhb	}
251200309Sjhb
252200309Sjhb	/*
253200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
254200309Sjhb	 * 1MB but below 4GB.
255200309Sjhb	 */
256200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
257200309Sjhb	    (smap.base < 0x100000000ull)) {
258200309Sjhb	    size = smap.length;
259200309Sjhb
260200309Sjhb	    /*
261200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
262200309Sjhb	     */
263200309Sjhb	    if (smap.base + size > 0x100000000ull)
264200309Sjhb		size = 0x100000000ull - smap.base;
265200309Sjhb
266200309Sjhb	    if (size > high_heap_size) {
267200309Sjhb		high_heap_size = size;
268200309Sjhb		high_heap_base = smap.base;
269200309Sjhb	    }
270200309Sjhb	}
271200309Sjhb    } while (v86.ebx != 0);
272200309Sjhb
273200309Sjhb    /* Fall back to the old compatibility function for base memory */
274200309Sjhb    if (bios_basemem == 0) {
275200309Sjhb	v86.ctl = 0;
276200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
277200309Sjhb	v86int();
278200309Sjhb
279200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
280200309Sjhb    }
281200309Sjhb
282200309Sjhb    /* Fall back through several compatibility functions for extended memory */
283200309Sjhb    if (bios_extmem == 0) {
284200309Sjhb	v86.ctl = V86_FLAGS;
285200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
286200309Sjhb	v86.eax = 0xe801;
287200309Sjhb	v86int();
288200309Sjhb	if (!(v86.efl & 1)) {
289200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
290200309Sjhb	}
291200309Sjhb    }
292200309Sjhb    if (bios_extmem == 0) {
293200309Sjhb	v86.ctl = 0;
294200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
295200309Sjhb	v86.eax = 0x8800;
296200309Sjhb	v86int();
297200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
298200309Sjhb    }
299200309Sjhb
300200309Sjhb    /*
301200309Sjhb     * If we have extended memory and did not find a suitable heap
302200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
303200309Sjhb     * high heap candidate.
304200309Sjhb     */
305200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
306200309Sjhb	high_heap_size = HEAP_MIN;
307200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
308200309Sjhb    }
309200309Sjhb}
310200309Sjhb
311185029Spjd/*
312185029Spjd * Try to detect a device supported by the legacy int13 BIOS
313185029Spjd */
314185029Spjdstatic int
315185029Spjdint13probe(int drive)
316185029Spjd{
317185029Spjd    v86.ctl = V86_FLAGS;
318185029Spjd    v86.addr = 0x13;
319185029Spjd    v86.eax = 0x800;
320185029Spjd    v86.edx = drive;
321185029Spjd    v86int();
322185029Spjd
323185029Spjd    if (!(v86.efl & 0x1) &&				/* carry clear */
324185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
325185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
326185029Spjd		return(0);				/* skip device */
327185029Spjd	}
328185029Spjd	return (1);
329185029Spjd    }
330185029Spjd    return(0);
331185029Spjd}
332185029Spjd
333192194Sdfr/*
334192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
335192194Sdfr * structure so we must make a new one.
336192194Sdfr */
337192194Sdfrstatic struct dsk *
338192194Sdfrcopy_dsk(struct dsk *dsk)
339192194Sdfr{
340192194Sdfr    struct dsk *newdsk;
341192194Sdfr
342192194Sdfr    newdsk = malloc(sizeof(struct dsk));
343192194Sdfr    *newdsk = *dsk;
344192194Sdfr    return (newdsk);
345192194Sdfr}
346192194Sdfr
347185029Spjdstatic void
348242243Savgprobe_drive(struct dsk *dsk)
349185029Spjd{
350185096Sdfr#ifdef GPT
351185096Sdfr    struct gpt_hdr hdr;
352185096Sdfr    struct gpt_ent *ent;
353185096Sdfr    daddr_t slba, elba;
354185096Sdfr    unsigned part, entries_per_sec;
355185096Sdfr#endif
356185029Spjd    struct dos_partition *dp;
357185029Spjd    char *sec;
358185029Spjd    unsigned i;
359185029Spjd
360185029Spjd    /*
361185029Spjd     * If we find a vdev on the whole disk, stop here. Otherwise dig
362242243Savg     * out the partition table and probe each slice/partition
363242243Savg     * in turn for a vdev.
364185029Spjd     */
365242243Savg    if (vdev_probe(vdev_read, dsk, NULL) == 0)
366185029Spjd	return;
367185029Spjd
368185029Spjd    sec = dmadat->secbuf;
369185029Spjd    dsk->start = 0;
370185096Sdfr
371185096Sdfr#ifdef GPT
372185096Sdfr    /*
373185096Sdfr     * First check for GPT.
374185096Sdfr     */
375185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
376185096Sdfr	return;
377185096Sdfr    }
378185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
379185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
380185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
381185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
382185096Sdfr	goto trymbr;
383185096Sdfr    }
384185096Sdfr
385185096Sdfr    /*
386185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
387185096Sdfr     * return the spa_t for the first we find (if requested). This
388185096Sdfr     * will have the effect of booting from the first pool on the
389185096Sdfr     * disk.
390185096Sdfr     */
391185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
392185096Sdfr    slba = hdr.hdr_lba_table;
393185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
394185096Sdfr    while (slba < elba) {
395198420Srnoland	dsk->start = 0;
396185096Sdfr	if (drvread(dsk, sec, slba, 1))
397185096Sdfr	    return;
398185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
399185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
400185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
401185096Sdfr		     sizeof(uuid_t)) == 0) {
402185096Sdfr		dsk->start = ent->ent_lba_start;
403242243Savg		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
404185096Sdfr		    /*
405185096Sdfr		     * This slice had a vdev. We need a new dsk
406185096Sdfr		     * structure now since the vdev now owns this one.
407185096Sdfr		     */
408192194Sdfr		    dsk = copy_dsk(dsk);
409185096Sdfr		}
410185096Sdfr	    }
411185096Sdfr	}
412185096Sdfr	slba++;
413185096Sdfr    }
414185096Sdfr    return;
415185096Sdfrtrymbr:
416185096Sdfr#endif
417185096Sdfr
418185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
419185029Spjd	return;
420185029Spjd    dp = (void *)(sec + DOSPARTOFF);
421185029Spjd
422185029Spjd    for (i = 0; i < NDOSPART; i++) {
423185029Spjd	if (!dp[i].dp_typ)
424185029Spjd	    continue;
425185029Spjd	dsk->start = dp[i].dp_start;
426242243Savg	if (vdev_probe(vdev_read, dsk, NULL) == 0) {
427185029Spjd	    /*
428185029Spjd	     * This slice had a vdev. We need a new dsk structure now
429185096Sdfr	     * since the vdev now owns this one.
430185029Spjd	     */
431192194Sdfr	    dsk = copy_dsk(dsk);
432185029Spjd	}
433185029Spjd    }
434185029Spjd}
435185029Spjd
436185029Spjdint
437185029Spjdmain(void)
438185029Spjd{
439185029Spjd    int autoboot, i;
440185029Spjd    dnode_phys_t dn;
441185029Spjd    off_t off;
442185029Spjd    struct dsk *dsk;
443185029Spjd
444208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
445208388Sjhb
446200309Sjhb    bios_getmem();
447200309Sjhb
448200309Sjhb    if (high_heap_size > 0) {
449200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
450200309Sjhb	heap_next = PTOV(high_heap_base);
451200309Sjhb    } else {
452200309Sjhb	heap_next = (char *) dmadat + sizeof(*dmadat);
453200309Sjhb	heap_end = (char *) PTOV(bios_basemem);
454200309Sjhb    }
455200309Sjhb
456185029Spjd    dsk = malloc(sizeof(struct dsk));
457185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
458185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
459185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
460185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
461185029Spjd    dsk->part = 0;
462185029Spjd    dsk->start = 0;
463185029Spjd    dsk->init = 0;
464185029Spjd
465185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
466185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
467200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
468200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
469185029Spjd    bootinfo.bi_memsizes_valid++;
470185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
471185029Spjd
472185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
473185029Spjd			  dsk->slice, dsk->unit, dsk->part),
474185029Spjd
475185029Spjd    /* Process configuration file */
476185029Spjd
477185029Spjd    autoboot = 1;
478185029Spjd
479185029Spjd    zfs_init();
480185029Spjd
481185029Spjd    /*
482185029Spjd     * Probe the boot drive first - we will try to boot from whatever
483185029Spjd     * pool we find on that drive.
484185029Spjd     */
485242243Savg    probe_drive(dsk);
486185029Spjd
487185029Spjd    /*
488185029Spjd     * Probe the rest of the drives that the bios knows about. This
489185029Spjd     * will find any other available pools and it may fill in missing
490185029Spjd     * vdevs for the boot pool.
491185029Spjd     */
492212805Spjd#ifndef VIRTUALBOX
493212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
494212805Spjd#else
495212805Spjd    for (i = 0; i < MAXBDDEV; i++)
496212805Spjd#endif
497212805Spjd    {
498185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
499185029Spjd	    continue;
500185029Spjd
501192194Sdfr	if (!int13probe(i | DRV_HARD))
502192194Sdfr	    break;
503192194Sdfr
504185029Spjd	dsk = malloc(sizeof(struct dsk));
505185029Spjd	dsk->drive = i | DRV_HARD;
506185029Spjd	dsk->type = dsk->drive & TYPE_AD;
507185029Spjd	dsk->unit = i;
508185029Spjd	dsk->slice = 0;
509185029Spjd	dsk->part = 0;
510185029Spjd	dsk->start = 0;
511185029Spjd	dsk->init = 0;
512242243Savg	probe_drive(dsk);
513185029Spjd    }
514185029Spjd
515185029Spjd    /*
516242243Savg     * The first discovered pool, if any, is the pool.
517185029Spjd     */
518242243Savg    spa = spa_get_primary();
519185029Spjd    if (!spa) {
520242243Savg	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
521242243Savg	for (;;)
522242243Savg	    ;
523185029Spjd    }
524185029Spjd
525242241Savg    primary_spa = spa;
526242241Savg    primary_vdev = spa_get_primary_vdev(spa);
527242241Savg
528237766Savg    if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) {
529237766Savg	printf("%s: failed to mount default pool %s\n",
530237766Savg	    BOOTPROG, spa->spa_name);
531237766Savg	autoboot = 0;
532237766Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
533237766Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
534185029Spjd	off = 0;
535198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
536185029Spjd    }
537185029Spjd
538185029Spjd    if (*cmd) {
539234679Savg	/*
540234679Savg	 * Note that parse() is destructive to cmd[] and we also want
541234679Savg	 * to honor RBX_QUIET option that could be present in cmd[].
542234679Savg	 */
543234679Savg	memcpy(cmddup, cmd, sizeof(cmd));
544185029Spjd	if (parse())
545185029Spjd	    autoboot = 0;
546234679Savg	if (!OPT_CHECK(RBX_QUIET))
547242224Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
548185029Spjd	/* Do not process this command twice */
549185029Spjd	*cmd = 0;
550185029Spjd    }
551185029Spjd
552185029Spjd    /*
553185029Spjd     * Try to exec stage 3 boot loader. If interrupted by a keypress,
554185029Spjd     * or in case of failure, try to load a kernel directly instead.
555185029Spjd     */
556185029Spjd
557185029Spjd    if (autoboot && !*kname) {
558185029Spjd	memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3));
559213136Spjd	if (!keyhit(3)) {
560185029Spjd	    load();
561185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
562185029Spjd	}
563185029Spjd    }
564185029Spjd
565185029Spjd    /* Present the user with the boot2 prompt. */
566185029Spjd
567185029Spjd    for (;;) {
568237766Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
569237766Savg	    printf("\nFreeBSD/x86 boot\n");
570237766Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
571242224Savg		printf("Default: %s/<0x%llx>:%s\n"
572237766Savg		       "boot: ",
573237766Savg		       spa->spa_name, zfsmount.rootobj, kname);
574242224Savg	    else if (rootname[0] != '\0')
575242224Savg		printf("Default: %s/%s:%s\n"
576242224Savg		       "boot: ",
577242224Savg		       spa->spa_name, rootname, kname);
578237766Savg	    else
579242224Savg		printf("Default: %s:%s\n"
580237766Savg		       "boot: ",
581242224Savg		       spa->spa_name, kname);
582237766Savg	}
583185029Spjd	if (ioctrl & IO_SERIAL)
584185029Spjd	    sio_flush();
585213136Spjd	if (!autoboot || keyhit(5))
586213136Spjd	    getstr(cmd, sizeof(cmd));
587185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
588185029Spjd	    putchar('\n');
589185029Spjd	autoboot = 0;
590185029Spjd	if (parse())
591185029Spjd	    putchar('\a');
592185029Spjd	else
593185029Spjd	    load();
594185029Spjd    }
595185029Spjd}
596185029Spjd
597185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
598185029Spjdvoid
599185029Spjdexit(int x)
600185029Spjd{
601185029Spjd}
602185029Spjd
603185029Spjdstatic void
604185029Spjdload(void)
605185029Spjd{
606185029Spjd    union {
607185029Spjd	struct exec ex;
608185029Spjd	Elf32_Ehdr eh;
609185029Spjd    } hdr;
610185029Spjd    static Elf32_Phdr ep[2];
611185029Spjd    static Elf32_Shdr es[2];
612185029Spjd    caddr_t p;
613185029Spjd    dnode_phys_t dn;
614185029Spjd    off_t off;
615185029Spjd    uint32_t addr, x;
616185029Spjd    int fmt, i, j;
617185029Spjd
618237766Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
619237766Savg	printf("\nCan't find %s\n", kname);
620185029Spjd	return;
621185029Spjd    }
622185029Spjd    off = 0;
623185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
624185029Spjd	return;
625185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
626185029Spjd	fmt = 0;
627185029Spjd    else if (IS_ELF(hdr.eh))
628185029Spjd	fmt = 1;
629185029Spjd    else {
630185029Spjd	printf("Invalid %s\n", "format");
631185029Spjd	return;
632185029Spjd    }
633185029Spjd    if (fmt == 0) {
634185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
635185029Spjd	p = PTOV(addr);
636185029Spjd	off = PAGE_SIZE;
637185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
638185029Spjd	    return;
639185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
640185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
641185029Spjd	    return;
642185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
643185029Spjd	bootinfo.bi_symtab = VTOP(p);
644185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
645185029Spjd	p += sizeof(hdr.ex.a_syms);
646185029Spjd	if (hdr.ex.a_syms) {
647185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
648185029Spjd		return;
649185029Spjd	    p += hdr.ex.a_syms;
650185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
651185029Spjd		return;
652185029Spjd	    x = *(uint32_t *)p;
653185029Spjd	    p += sizeof(int);
654185029Spjd	    x -= sizeof(int);
655185029Spjd	    if (xfsread(&dn, &off, p, x))
656185029Spjd		return;
657185029Spjd	    p += x;
658185029Spjd	}
659185029Spjd    } else {
660185029Spjd	off = hdr.eh.e_phoff;
661185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
662185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
663185029Spjd		return;
664185029Spjd	    if (ep[j].p_type == PT_LOAD)
665185029Spjd		j++;
666185029Spjd	}
667185029Spjd	for (i = 0; i < 2; i++) {
668185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
669185029Spjd	    off = ep[i].p_offset;
670185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
671185029Spjd		return;
672185029Spjd	}
673185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
674185029Spjd	bootinfo.bi_symtab = VTOP(p);
675185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
676185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
677185029Spjd		(hdr.eh.e_shstrndx + 1);
678185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
679185029Spjd		return;
680185029Spjd	    for (i = 0; i < 2; i++) {
681185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
682185029Spjd		p += sizeof(es[i].sh_size);
683185029Spjd		off = es[i].sh_offset;
684185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
685185029Spjd		    return;
686185029Spjd		p += es[i].sh_size;
687185029Spjd	    }
688185029Spjd	}
689185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
690185029Spjd    }
691185029Spjd    bootinfo.bi_esymtab = VTOP(p);
692185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
693237766Savg    zfsargs.size = sizeof(zfsargs);
694237766Savg    zfsargs.pool = zfsmount.spa->spa_guid;
695237766Savg    zfsargs.root = zfsmount.rootobj;
696242241Savg    zfsargs.primary_pool = primary_spa->spa_guid;
697242241Savg    if (primary_vdev != NULL)
698242241Savg	zfsargs.primary_vdev = primary_vdev->v_guid;
699242241Savg    else
700242241Savg	printf("failed to detect primary vdev\n");
701185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
702185029Spjd	   bootdev,
703237766Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
704185029Spjd	   (uint32_t) spa->spa_guid,
705185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
706237766Savg	   VTOP(&bootinfo),
707237766Savg	   zfsargs);
708185029Spjd}
709185029Spjd
710185029Spjdstatic int
711242224Savgzfs_mount_ds(char *dsname)
712242224Savg{
713242224Savg    uint64_t newroot;
714242224Savg    spa_t *newspa;
715242224Savg    char *q;
716242224Savg
717242224Savg    q = strchr(dsname, '/');
718242224Savg    if (q)
719242224Savg	*q++ = '\0';
720242224Savg    newspa = spa_find_by_name(dsname);
721242224Savg    if (newspa == NULL) {
722242224Savg	printf("\nCan't find ZFS pool %s\n", dsname);
723242224Savg	return -1;
724242224Savg    }
725242224Savg
726242224Savg    if (zfs_spa_init(newspa))
727242224Savg	return -1;
728242224Savg
729242224Savg    newroot = 0;
730242224Savg    if (q) {
731242224Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
732242224Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
733242224Savg		    q, newspa->spa_name);
734242224Savg	    return -1;
735242224Savg	}
736242224Savg    }
737242224Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
738242224Savg	printf("\nCan't mount ZFS dataset\n");
739242224Savg	return -1;
740242224Savg    }
741242224Savg    spa = newspa;
742242224Savg    return (0);
743242224Savg}
744242224Savg
745242224Savgstatic int
746213136Spjdparse(void)
747185029Spjd{
748185029Spjd    char *arg = cmd;
749185029Spjd    char *ep, *p, *q;
750185029Spjd    const char *cp;
751185029Spjd    int c, i, j;
752185029Spjd
753185029Spjd    while ((c = *arg++)) {
754185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
755185029Spjd	    continue;
756185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
757185029Spjd	ep = p;
758185029Spjd	if (*p)
759185029Spjd	    *p++ = 0;
760185029Spjd	if (c == '-') {
761185029Spjd	    while ((c = *arg++)) {
762185029Spjd		if (c == 'P') {
763185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
764185029Spjd			cp = "yes";
765185029Spjd		    } else {
766185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
767185029Spjd			cp = "no";
768185029Spjd		    }
769185029Spjd		    printf("Keyboard: %s\n", cp);
770185029Spjd		    continue;
771185029Spjd		} else if (c == 'S') {
772185029Spjd		    j = 0;
773185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
774185029Spjd			j = j * 10 + i;
775185029Spjd		    if (j > 0 && i == -'0') {
776185029Spjd			comspeed = j;
777185029Spjd			break;
778185029Spjd		    }
779185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
780185029Spjd		}
781185029Spjd		for (i = 0; c != optstr[i]; i++)
782185029Spjd		    if (i == NOPT - 1)
783185029Spjd			return -1;
784185029Spjd		opts ^= OPT_SET(flags[i]);
785185029Spjd	    }
786185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
787185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
788242562Savg	    if (ioctrl & IO_SERIAL) {
789242562Savg	        if (sio_init(115200 / comspeed) != 0)
790242562Savg		    ioctrl &= ~IO_SERIAL;
791242562Savg	    }
792185029Spjd	} if (c == '?') {
793185029Spjd	    dnode_phys_t dn;
794185029Spjd
795237766Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
796185029Spjd		zap_list(spa, &dn);
797185029Spjd	    }
798185029Spjd	    return -1;
799185029Spjd	} else {
800185029Spjd	    arg--;
801185029Spjd
802185029Spjd	    /*
803185029Spjd	     * Report pool status if the comment is 'status'. Lets
804185029Spjd	     * hope no-one wants to load /status as a kernel.
805185029Spjd	     */
806185029Spjd	    if (!strcmp(arg, "status")) {
807185029Spjd		spa_all_status();
808185029Spjd		return -1;
809185029Spjd	    }
810185029Spjd
811185029Spjd	    /*
812242224Savg	     * If there is "zfs:" prefix simply ignore it.
813242224Savg	     */
814242224Savg	    if (strncmp(arg, "zfs:", 4) == 0)
815242224Savg		arg += 4;
816242224Savg
817242224Savg	    /*
818185029Spjd	     * If there is a colon, switch pools.
819185029Spjd	     */
820242224Savg	    q = strchr(arg, ':');
821185029Spjd	    if (q) {
822242224Savg		*q++ = '\0';
823242224Savg		if (zfs_mount_ds(arg) != 0)
824185029Spjd		    return -1;
825242224Savg		arg = q;
826185029Spjd	    }
827185029Spjd	    if ((i = ep - arg)) {
828185029Spjd		if ((size_t)i >= sizeof(kname))
829185029Spjd		    return -1;
830185029Spjd		memcpy(kname, arg, i + 1);
831185029Spjd	    }
832185029Spjd	}
833185029Spjd	arg = p;
834185029Spjd    }
835185029Spjd    return 0;
836185029Spjd}
837