1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: stable/11/stand/i386/zfsboot/zfsboot.c 348866 2019-06-10 15:55:38Z kevans $");
18185029Spjd
19329175Skevans#include "stand.h"
20329175Skevans
21185029Spjd#include <sys/param.h>
22185029Spjd#include <sys/errno.h>
23185029Spjd#include <sys/diskmbr.h>
24185096Sdfr#ifdef GPT
25185096Sdfr#include <sys/gpt.h>
26185096Sdfr#endif
27185029Spjd#include <sys/reboot.h>
28185029Spjd#include <sys/queue.h>
29185029Spjd
30185029Spjd#include <machine/bootinfo.h>
31185029Spjd#include <machine/elf.h>
32200309Sjhb#include <machine/pc/bios.h>
33185029Spjd
34185029Spjd#include <stdarg.h>
35185029Spjd#include <stddef.h>
36185029Spjd
37185029Spjd#include <a.out.h>
38185029Spjd
39185029Spjd#include <btxv86.h>
40185029Spjd
41185029Spjd#include "lib.h"
42213136Spjd#include "rbx.h"
43213136Spjd#include "drv.h"
44329100Skevans#include "edd.h"
45213136Spjd#include "cons.h"
46235154Savg#include "bootargs.h"
47294765Simp#include "paths.h"
48185029Spjd
49235329Savg#include "libzfs.h"
50235329Savg
51297629Sallanjude#define ARGS			0x900
52297629Sallanjude#define NOPT			14
53297629Sallanjude#define NDEV			3
54185029Spjd
55297629Sallanjude#define BIOS_NUMDRIVES		0x475
56297629Sallanjude#define DRV_HARD		0x80
57297629Sallanjude#define DRV_MASK		0x7f
58185029Spjd
59297629Sallanjude#define TYPE_AD			0
60297629Sallanjude#define TYPE_DA			1
61297629Sallanjude#define TYPE_MAXHARD		TYPE_DA
62297629Sallanjude#define TYPE_FD			2
63185029Spjd
64297629Sallanjude#define DEV_GELIBOOT_BSIZE	4096
65297629Sallanjude
66185029Spjdextern uint32_t _end;
67185029Spjd
68185096Sdfr#ifdef GPT
69185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
70185096Sdfr#endif
71185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
72185029Spjdstatic const unsigned char flags[NOPT] = {
73185029Spjd    RBX_DUAL,
74185029Spjd    RBX_SERIAL,
75185029Spjd    RBX_ASKNAME,
76185029Spjd    RBX_CDROM,
77185029Spjd    RBX_CONFIG,
78185029Spjd    RBX_KDB,
79185029Spjd    RBX_GDB,
80185029Spjd    RBX_MUTE,
81185029Spjd    RBX_NOINTR,
82185029Spjd    RBX_PAUSE,
83185029Spjd    RBX_QUIET,
84185029Spjd    RBX_DFLTROOT,
85185029Spjd    RBX_SINGLE,
86185029Spjd    RBX_VERBOSE
87185029Spjd};
88213136Spjduint32_t opts;
89185029Spjd
90348866Skevans/*
91348866Skevans * Paths to try loading before falling back to the boot2 prompt.
92348866Skevans *
93348866Skevans * /boot/zfsloader must be tried before /boot/loader in order to remain
94348866Skevans * backward compatible with ZFS boot environments where /boot/loader exists
95348866Skevans * but does not have ZFS support, which was the case before FreeBSD 12.
96348866Skevans *
97348866Skevans * If no loader is found, try to load a kernel directly instead.
98348866Skevans */
99348866Skevansstatic const struct string {
100348866Skevans    const char *p;
101348866Skevans    size_t len;
102348866Skevans} loadpath[] = {
103348866Skevans    { PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS) },
104348866Skevans    { PATH_LOADER, sizeof(PATH_LOADER) },
105348866Skevans    { PATH_KERNEL, sizeof(PATH_KERNEL) },
106348866Skevans};
107348866Skevans
108185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
109185029Spjd
110185029Spjdstatic char cmd[512];
111234339Savgstatic char cmddup[512];
112185029Spjdstatic char kname[1024];
113235329Savgstatic char rootname[256];
114185029Spjdstatic int comspeed = SIOSPD;
115185029Spjdstatic struct bootinfo bootinfo;
116185029Spjdstatic uint32_t bootdev;
117235329Savgstatic struct zfs_boot_args zfsargs;
118185029Spjd
119200309Sjhbvm_offset_t	high_heap_base;
120200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
121200309Sjhb
122200309Sjhbstatic struct bios_smap smap;
123200309Sjhb
124200309Sjhb/*
125200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
126200309Sjhb */
127328866Skevans#define	HEAP_MIN		(64 * 1024 * 1024)
128200309Sjhb
129200309Sjhbstatic char *heap_next;
130200309Sjhbstatic char *heap_end;
131200309Sjhb
132185029Spjd/* Buffers that must not span a 64k boundary. */
133297629Sallanjude#define READ_BUF_SIZE		8192
134185029Spjdstruct dmadat {
135185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
136185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
137185029Spjd};
138185029Spjdstatic struct dmadat *dmadat;
139185029Spjd
140185029Spjdvoid exit(int);
141308914Savgvoid reboot(void);
142185029Spjdstatic void load(void);
143329099Skevansstatic int parse_cmd(void);
144200309Sjhbstatic void bios_getmem(void);
145329100Skevansint main(void);
146185029Spjd
147296963Sallanjude#ifdef LOADER_GELI_SUPPORT
148344399Skevans#include "geliboot.h"
149296963Sallanjudestatic char gelipw[GELI_PW_MAXLEN];
150296963Sallanjude#endif
151296963Sallanjude
152344399Skevansstruct zfsdsk {
153344399Skevans	struct dsk       dsk;
154344399Skevans#ifdef LOADER_GELI_SUPPORT
155344399Skevans	struct geli_dev *gdev;
156344399Skevans#endif
157344399Skevans};
158344399Skevans
159185029Spjd#include "zfsimpl.c"
160185029Spjd
161185029Spjd/*
162185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
163185029Spjd */
164185029Spjdstatic int
165185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
166185029Spjd{
167185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
168185029Spjd	size_t n;
169185029Spjd	int rc;
170185029Spjd
171185029Spjd	n = size;
172185029Spjd	if (*offp + n > zp->zp_size)
173185029Spjd		n = zp->zp_size - *offp;
174308914Savg
175185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
176185029Spjd	if (rc)
177185029Spjd		return (-1);
178185029Spjd	*offp += n;
179185029Spjd
180185029Spjd	return (n);
181185029Spjd}
182185029Spjd
183185029Spjd/*
184185029Spjd * Current ZFS pool
185185029Spjd */
186235329Savgstatic spa_t *spa;
187241293Savgstatic spa_t *primary_spa;
188241293Savgstatic vdev_t *primary_vdev;
189185029Spjd
190185029Spjd/*
191185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
192185029Spjd * buffer pointer crosses a 64k boundary.
193185029Spjd */
194185029Spjdstatic int
195329175Skevansvdev_read(void *xvdev, void *priv, off_t off, void *buf, size_t bytes)
196185029Spjd{
197185029Spjd	char *p;
198297629Sallanjude	daddr_t lba, alignlba;
199300257Sallanjude	off_t diff;
200297629Sallanjude	unsigned int nb, alignnb;
201344399Skevans	struct zfsdsk *zdsk = (struct zfsdsk *) priv;
202185029Spjd
203185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
204185029Spjd		return -1;
205185029Spjd
206185029Spjd	p = buf;
207185029Spjd	lba = off / DEV_BSIZE;
208344399Skevans	lba += zdsk->dsk.start;
209297629Sallanjude	/*
210300257Sallanjude	 * Align reads to 4k else 4k sector GELIs will not decrypt.
211300257Sallanjude	 * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes.
212300257Sallanjude	 */
213300257Sallanjude	alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE;
214300257Sallanjude	/*
215297629Sallanjude	 * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the
216297629Sallanjude	 * start of the GELI partition, not the start of the actual disk.
217297629Sallanjude	 */
218344399Skevans	alignlba += zdsk->dsk.start;
219297629Sallanjude	diff = (lba - alignlba) * DEV_BSIZE;
220297629Sallanjude
221185029Spjd	while (bytes > 0) {
222185029Spjd		nb = bytes / DEV_BSIZE;
223297629Sallanjude		/*
224297629Sallanjude		 * Ensure that the read size plus the leading offset does not
225297629Sallanjude		 * exceed the size of the read buffer.
226297629Sallanjude		 */
227298949Speter		if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE)
228298949Speter			nb = (READ_BUF_SIZE - diff) / DEV_BSIZE;
229297629Sallanjude		/*
230297629Sallanjude		 * Round the number of blocks to read up to the nearest multiple
231297629Sallanjude		 * of DEV_GELIBOOT_BSIZE.
232297629Sallanjude		 */
233298949Speter		alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE)
234298949Speter		    / DEV_BSIZE;
235297629Sallanjude
236344399Skevans		if (zdsk->dsk.size > 0 && alignlba + alignnb >
237344399Skevans		    zdsk->dsk.size + zdsk->dsk.start) {
238344399Skevans			printf("Shortening read at %lld from %d to %lld\n",
239344399Skevans			    alignlba, alignnb,
240344399Skevans			    (zdsk->dsk.size + zdsk->dsk.start) - alignlba);
241344399Skevans			alignnb = (zdsk->dsk.size + zdsk->dsk.start) - alignlba;
242344295Skevans		}
243344295Skevans
244344399Skevans		if (drvread(&zdsk->dsk, dmadat->rdbuf, alignlba, alignnb))
245185029Spjd			return -1;
246296963Sallanjude#ifdef LOADER_GELI_SUPPORT
247296963Sallanjude		/* decrypt */
248344399Skevans		if (zdsk->gdev != NULL) {
249344399Skevans			if (geli_read(zdsk->gdev, ((alignlba - zdsk->dsk.start) *
250297629Sallanjude			    DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE))
251297629Sallanjude				return (-1);
252296963Sallanjude		}
253296963Sallanjude#endif
254297629Sallanjude		memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE);
255185029Spjd		p += nb * DEV_BSIZE;
256185029Spjd		lba += nb;
257297629Sallanjude		alignlba += alignnb;
258185029Spjd		bytes -= nb * DEV_BSIZE;
259297629Sallanjude		/* Don't need the leading offset after the first block. */
260297629Sallanjude		diff = 0;
261185029Spjd	}
262185029Spjd
263185029Spjd	return 0;
264185029Spjd}
265329175Skevans/* Match the signature exactly due to signature madness */
266329175Skevansstatic int
267329175Skevansvdev_read2(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
268329175Skevans{
269329175Skevans	return vdev_read(vdev, priv, off, buf, bytes);
270329175Skevans}
271185029Spjd
272329175Skevans
273185029Spjdstatic int
274308914Savgvdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
275308914Savg{
276308914Savg	char *p;
277308914Savg	daddr_t lba;
278308914Savg	unsigned int nb;
279344399Skevans	struct zfsdsk *zdsk = (struct zfsdsk *) priv;
280308914Savg
281308914Savg	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
282308914Savg		return -1;
283308914Savg
284308914Savg	p = buf;
285308914Savg	lba = off / DEV_BSIZE;
286344399Skevans	lba += zdsk->dsk.start;
287308914Savg	while (bytes > 0) {
288308914Savg		nb = bytes / DEV_BSIZE;
289308914Savg		if (nb > READ_BUF_SIZE / DEV_BSIZE)
290308914Savg			nb = READ_BUF_SIZE / DEV_BSIZE;
291308914Savg		memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE);
292344399Skevans		if (drvwrite(&zdsk->dsk, dmadat->rdbuf, lba, nb))
293308914Savg			return -1;
294308914Savg		p += nb * DEV_BSIZE;
295308914Savg		lba += nb;
296308914Savg		bytes -= nb * DEV_BSIZE;
297308914Savg	}
298308914Savg
299308914Savg	return 0;
300308914Savg}
301308914Savg
302308914Savgstatic int
303185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
304185029Spjd{
305185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
306235329Savg	printf("Invalid format\n");
307185029Spjd	return -1;
308185029Spjd    }
309185029Spjd    return 0;
310185029Spjd}
311185029Spjd
312308914Savg/*
313308914Savg * Read Pad2 (formerly "Boot Block Header") area of the first
314308914Savg * vdev label of the given vdev.
315308914Savg */
316308914Savgstatic int
317308914Savgvdev_read_pad2(vdev_t *vdev, char *buf, size_t size)
318308914Savg{
319308914Savg	blkptr_t bp;
320308914Savg	char *tmp = zap_scratch;
321308914Savg	off_t off = offsetof(vdev_label_t, vl_pad2);
322308914Savg
323308914Savg	if (size > VDEV_PAD_SIZE)
324308914Savg		size = VDEV_PAD_SIZE;
325308914Savg
326308914Savg	BP_ZERO(&bp);
327308914Savg	BP_SET_LSIZE(&bp, VDEV_PAD_SIZE);
328308914Savg	BP_SET_PSIZE(&bp, VDEV_PAD_SIZE);
329308914Savg	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
330308914Savg	BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
331308914Savg	DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
332308914Savg	if (vdev_read_phys(vdev, &bp, tmp, off, 0))
333308914Savg		return (EIO);
334308914Savg	memcpy(buf, tmp, size);
335308914Savg	return (0);
336308914Savg}
337308914Savg
338308914Savgstatic int
339308914Savgvdev_clear_pad2(vdev_t *vdev)
340308914Savg{
341308914Savg	char *zeroes = zap_scratch;
342308914Savg	uint64_t *end;
343308914Savg	off_t off = offsetof(vdev_label_t, vl_pad2);
344308914Savg
345308914Savg	memset(zeroes, 0, VDEV_PAD_SIZE);
346308914Savg	end = (uint64_t *)(zeroes + VDEV_PAD_SIZE);
347308914Savg	/* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */
348308914Savg	end[-5] = 0x0210da7ab10c7a11;
349308914Savg	end[-4] = 0x97f48f807f6e2a3f;
350308914Savg	end[-3] = 0xaf909f1658aacefc;
351308914Savg	end[-2] = 0xcbd1ea57ff6db48b;
352308914Savg	end[-1] = 0x6ec692db0d465fab;
353308914Savg	if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE))
354308914Savg		return (EIO);
355308914Savg	return (0);
356308914Savg}
357308914Savg
358200309Sjhbstatic void
359200309Sjhbbios_getmem(void)
360185029Spjd{
361200309Sjhb    uint64_t size;
362185029Spjd
363200309Sjhb    /* Parse system memory map */
364200309Sjhb    v86.ebx = 0;
365200309Sjhb    do {
366200309Sjhb	v86.ctl = V86_FLAGS;
367200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
368200309Sjhb	v86.eax = 0xe820;
369200309Sjhb	v86.ecx = sizeof(struct bios_smap);
370200309Sjhb	v86.edx = SMAP_SIG;
371200309Sjhb	v86.es = VTOPSEG(&smap);
372200309Sjhb	v86.edi = VTOPOFF(&smap);
373200309Sjhb	v86int();
374292682Sjhb	if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
375200309Sjhb	    break;
376200309Sjhb	/* look for a low-memory segment that's large enough */
377200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
378200309Sjhb	    (smap.length >= (512 * 1024)))
379200309Sjhb	    bios_basemem = smap.length;
380200309Sjhb	/* look for the first segment in 'extended' memory */
381200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
382200309Sjhb	    bios_extmem = smap.length;
383200309Sjhb	}
384200309Sjhb
385200309Sjhb	/*
386200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
387200309Sjhb	 * 1MB but below 4GB.
388200309Sjhb	 */
389200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
390200309Sjhb	    (smap.base < 0x100000000ull)) {
391200309Sjhb	    size = smap.length;
392200309Sjhb
393200309Sjhb	    /*
394200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
395200309Sjhb	     */
396200309Sjhb	    if (smap.base + size > 0x100000000ull)
397200309Sjhb		size = 0x100000000ull - smap.base;
398200309Sjhb
399200309Sjhb	    if (size > high_heap_size) {
400200309Sjhb		high_heap_size = size;
401200309Sjhb		high_heap_base = smap.base;
402200309Sjhb	    }
403200309Sjhb	}
404200309Sjhb    } while (v86.ebx != 0);
405200309Sjhb
406200309Sjhb    /* Fall back to the old compatibility function for base memory */
407200309Sjhb    if (bios_basemem == 0) {
408200309Sjhb	v86.ctl = 0;
409200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
410200309Sjhb	v86int();
411200309Sjhb
412200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
413200309Sjhb    }
414200309Sjhb
415200309Sjhb    /* Fall back through several compatibility functions for extended memory */
416200309Sjhb    if (bios_extmem == 0) {
417200309Sjhb	v86.ctl = V86_FLAGS;
418200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
419200309Sjhb	v86.eax = 0xe801;
420200309Sjhb	v86int();
421292682Sjhb	if (!V86_CY(v86.efl)) {
422200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
423200309Sjhb	}
424200309Sjhb    }
425200309Sjhb    if (bios_extmem == 0) {
426200309Sjhb	v86.ctl = 0;
427200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
428200309Sjhb	v86.eax = 0x8800;
429200309Sjhb	v86int();
430200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
431200309Sjhb    }
432200309Sjhb
433200309Sjhb    /*
434200309Sjhb     * If we have extended memory and did not find a suitable heap
435200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
436200309Sjhb     * high heap candidate.
437200309Sjhb     */
438200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
439200309Sjhb	high_heap_size = HEAP_MIN;
440200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
441200309Sjhb    }
442296963Sallanjude}
443200309Sjhb
444185029Spjd/*
445185029Spjd * Try to detect a device supported by the legacy int13 BIOS
446185029Spjd */
447185029Spjdstatic int
448185029Spjdint13probe(int drive)
449185029Spjd{
450185029Spjd    v86.ctl = V86_FLAGS;
451185029Spjd    v86.addr = 0x13;
452185029Spjd    v86.eax = 0x800;
453185029Spjd    v86.edx = drive;
454185029Spjd    v86int();
455185029Spjd
456292682Sjhb    if (!V86_CY(v86.efl) &&				/* carry clear */
457185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
458185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
459185029Spjd		return(0);				/* skip device */
460185029Spjd	}
461185029Spjd	return (1);
462185029Spjd    }
463185029Spjd    return(0);
464185029Spjd}
465185029Spjd
466192194Sdfr/*
467192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
468192194Sdfr * structure so we must make a new one.
469192194Sdfr */
470344399Skevansstatic struct zfsdsk *
471344399Skevanscopy_dsk(struct zfsdsk *zdsk)
472192194Sdfr{
473344399Skevans    struct zfsdsk *newdsk;
474192194Sdfr
475344399Skevans    newdsk = malloc(sizeof(struct zfsdsk));
476344399Skevans    *newdsk = *zdsk;
477192194Sdfr    return (newdsk);
478192194Sdfr}
479192194Sdfr
480329100Skevans/*
481329100Skevans * Get disk size from eax=0x800 and 0x4800. We need to probe both
482329100Skevans * because 0x4800 may not be available and we would like to get more
483329100Skevans * or less correct disk size - if it is possible at all.
484329100Skevans * Note we do not really want to touch drv.c because that code is shared
485329100Skevans * with boot2 and we can not afford to grow that code.
486329100Skevans */
487329100Skevansstatic uint64_t
488344399Skevansdrvsize_ext(struct zfsdsk *zdsk)
489329100Skevans{
490344399Skevans	struct dsk *dskp;
491329100Skevans	uint64_t size, tmp;
492329100Skevans	int cyl, hds, sec;
493329100Skevans
494344399Skevans	dskp = &zdsk->dsk;
495344399Skevans
496329100Skevans	v86.ctl = V86_FLAGS;
497329100Skevans	v86.addr = 0x13;
498329100Skevans	v86.eax = 0x800;
499329100Skevans	v86.edx = dskp->drive;
500329100Skevans	v86int();
501329100Skevans
502329100Skevans	/* Don't error out if we get bad sector number, try EDD as well */
503329100Skevans	if (V86_CY(v86.efl) ||	/* carry set */
504329100Skevans	    (v86.edx & 0xff) <= (unsigned)(dskp->drive & 0x7f)) /* unit # bad */
505329100Skevans		return (0);
506329100Skevans	cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
507329100Skevans	/* Convert max head # -> # of heads */
508329100Skevans	hds = ((v86.edx & 0xff00) >> 8) + 1;
509329100Skevans	sec = v86.ecx & 0x3f;
510329100Skevans
511329100Skevans	size = (uint64_t)cyl * hds * sec;
512329100Skevans
513329100Skevans	/* Determine if we can use EDD with this device. */
514329100Skevans	v86.ctl = V86_FLAGS;
515329100Skevans	v86.addr = 0x13;
516329100Skevans	v86.eax = 0x4100;
517329100Skevans	v86.edx = dskp->drive;
518329100Skevans	v86.ebx = 0x55aa;
519329100Skevans	v86int();
520329100Skevans	if (V86_CY(v86.efl) ||  /* carry set */
521329100Skevans	    (v86.ebx & 0xffff) != 0xaa55 || /* signature */
522329100Skevans	    (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
523329100Skevans		return (size);
524329100Skevans
525329100Skevans	tmp = drvsize(dskp);
526329100Skevans	if (tmp > size)
527329100Skevans		size = tmp;
528329100Skevans
529329100Skevans	return (size);
530329100Skevans}
531329100Skevans
532329100Skevans/*
533329100Skevans * The "layered" ioctl to read disk/partition size. Unfortunately
534329100Skevans * the zfsboot case is hardest, because we do not have full software
535329100Skevans * stack available, so we need to do some manual work here.
536329100Skevans */
537329100Skevansuint64_t
538329100Skevansldi_get_size(void *priv)
539329100Skevans{
540344399Skevans	struct zfsdsk *zdsk = priv;
541344399Skevans	uint64_t size = zdsk->dsk.size;
542329100Skevans
543344399Skevans	if (zdsk->dsk.start == 0)
544344399Skevans		size = drvsize_ext(zdsk);
545329100Skevans
546329100Skevans	return (size * DEV_BSIZE);
547329100Skevans}
548329100Skevans
549185029Spjdstatic void
550344399Skevansprobe_drive(struct zfsdsk *zdsk)
551185029Spjd{
552185096Sdfr#ifdef GPT
553185096Sdfr    struct gpt_hdr hdr;
554185096Sdfr    struct gpt_ent *ent;
555185096Sdfr    unsigned part, entries_per_sec;
556299997Speter    daddr_t slba;
557185096Sdfr#endif
558299997Speter#if defined(GPT) || defined(LOADER_GELI_SUPPORT)
559299997Speter    daddr_t elba;
560299660Sngie#endif
561299997Speter
562185029Spjd    struct dos_partition *dp;
563185029Spjd    char *sec;
564185029Spjd    unsigned i;
565185029Spjd
566185029Spjd    /*
567296963Sallanjude     * If we find a vdev on the whole disk, stop here.
568185029Spjd     */
569344399Skevans    if (vdev_probe(vdev_read2, zdsk, NULL) == 0)
570185029Spjd	return;
571185029Spjd
572296963Sallanjude#ifdef LOADER_GELI_SUPPORT
573296963Sallanjude    /*
574296963Sallanjude     * Taste the disk, if it is GELI encrypted, decrypt it and check to see if
575296963Sallanjude     * it is a usable vdev then. Otherwise dig
576296963Sallanjude     * out the partition table and probe each slice/partition
577296963Sallanjude     * in turn for a vdev or GELI encrypted vdev.
578296963Sallanjude     */
579344399Skevans    elba = drvsize_ext(zdsk);
580296963Sallanjude    if (elba > 0) {
581296963Sallanjude	elba--;
582296963Sallanjude    }
583344399Skevans    zdsk->gdev = geli_taste(vdev_read, zdsk, elba, "disk%u:0:");
584344399Skevans    if (zdsk->gdev != NULL) {
585344399Skevans	if (geli_havekey(zdsk->gdev) == 0 ||
586344399Skevans	    geli_passphrase(zdsk->gdev, gelipw) == 0) {
587344399Skevans	    if (vdev_probe(vdev_read2, zdsk, NULL) == 0) {
588296963Sallanjude		return;
589296963Sallanjude	    }
590296963Sallanjude	}
591296963Sallanjude    }
592296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
593296963Sallanjude
594185029Spjd    sec = dmadat->secbuf;
595344399Skevans    zdsk->dsk.start = 0;
596185096Sdfr
597185096Sdfr#ifdef GPT
598185096Sdfr    /*
599185096Sdfr     * First check for GPT.
600185096Sdfr     */
601344399Skevans    if (drvread(&zdsk->dsk, sec, 1, 1)) {
602185096Sdfr	return;
603185096Sdfr    }
604185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
605185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
606185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
607185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
608185096Sdfr	goto trymbr;
609185096Sdfr    }
610185096Sdfr
611185096Sdfr    /*
612298826Spfg     * Probe all GPT partitions for the presence of ZFS pools. We
613185096Sdfr     * return the spa_t for the first we find (if requested). This
614185096Sdfr     * will have the effect of booting from the first pool on the
615185096Sdfr     * disk.
616296963Sallanjude     *
617296963Sallanjude     * If no vdev is found, GELI decrypting the device and try again
618185096Sdfr     */
619185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
620185096Sdfr    slba = hdr.hdr_lba_table;
621185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
622185096Sdfr    while (slba < elba) {
623344399Skevans	zdsk->dsk.start = 0;
624344399Skevans	if (drvread(&zdsk->dsk, sec, slba, 1))
625185096Sdfr	    return;
626185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
627185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
628185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
629185096Sdfr		     sizeof(uuid_t)) == 0) {
630344399Skevans		zdsk->dsk.start = ent->ent_lba_start;
631344399Skevans		zdsk->dsk.size = ent->ent_lba_end - ent->ent_lba_start + 1;
632344399Skevans		zdsk->dsk.slice = part + 1;
633344399Skevans		zdsk->dsk.part = 255;
634344399Skevans		if (vdev_probe(vdev_read2, zdsk, NULL) == 0) {
635185096Sdfr		    /*
636185096Sdfr		     * This slice had a vdev. We need a new dsk
637185096Sdfr		     * structure now since the vdev now owns this one.
638185096Sdfr		     */
639344399Skevans		    zdsk = copy_dsk(zdsk);
640185096Sdfr		}
641296963Sallanjude#ifdef LOADER_GELI_SUPPORT
642344399Skevans		else if ((zdsk->gdev = geli_taste(vdev_read, zdsk,
643344399Skevans		    ent->ent_lba_end - ent->ent_lba_start, "disk%up%u:",
644344399Skevans		    zdsk->dsk.unit, zdsk->dsk.slice)) != NULL) {
645344399Skevans		    if (geli_havekey(zdsk->gdev) == 0 ||
646344399Skevans			geli_passphrase(zdsk->gdev, gelipw) == 0) {
647296963Sallanjude			/*
648296963Sallanjude			 * This slice has GELI, check it for ZFS.
649296963Sallanjude			 */
650344399Skevans			if (vdev_probe(vdev_read2, zdsk, NULL) == 0) {
651296963Sallanjude			    /*
652296963Sallanjude			     * This slice had a vdev. We need a new dsk
653296963Sallanjude			     * structure now since the vdev now owns this one.
654296963Sallanjude			     */
655344399Skevans			    zdsk = copy_dsk(zdsk);
656296963Sallanjude			}
657296963Sallanjude			break;
658296963Sallanjude		    }
659296963Sallanjude		}
660296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
661185096Sdfr	    }
662185096Sdfr	}
663185096Sdfr	slba++;
664185096Sdfr    }
665185096Sdfr    return;
666185096Sdfrtrymbr:
667296963Sallanjude#endif /* GPT */
668185096Sdfr
669344399Skevans    if (drvread(&zdsk->dsk, sec, DOSBBSECTOR, 1))
670185029Spjd	return;
671185029Spjd    dp = (void *)(sec + DOSPARTOFF);
672185029Spjd
673185029Spjd    for (i = 0; i < NDOSPART; i++) {
674185029Spjd	if (!dp[i].dp_typ)
675185029Spjd	    continue;
676344399Skevans	zdsk->dsk.start = dp[i].dp_start;
677344399Skevans	zdsk->dsk.size = dp[i].dp_size;
678344399Skevans	zdsk->dsk.slice = i + 1;
679344399Skevans	if (vdev_probe(vdev_read2, zdsk, NULL) == 0) {
680344399Skevans	    zdsk = copy_dsk(zdsk);
681185029Spjd	}
682296963Sallanjude#ifdef LOADER_GELI_SUPPORT
683344399Skevans	else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, dp[i].dp_size -
684344399Skevans		 dp[i].dp_start, "disk%us%u:")) != NULL) {
685344399Skevans	    if (geli_havekey(zdsk->gdev) == 0 ||
686344399Skevans		geli_passphrase(zdsk->gdev, gelipw) == 0) {
687296963Sallanjude		/*
688296963Sallanjude		 * This slice has GELI, check it for ZFS.
689296963Sallanjude		 */
690344399Skevans		if (vdev_probe(vdev_read2, zdsk, NULL) == 0) {
691296963Sallanjude		    /*
692296963Sallanjude		     * This slice had a vdev. We need a new dsk
693296963Sallanjude		     * structure now since the vdev now owns this one.
694296963Sallanjude		     */
695344399Skevans		    zdsk = copy_dsk(zdsk);
696296963Sallanjude		}
697296963Sallanjude		break;
698296963Sallanjude	    }
699296963Sallanjude	}
700296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
701185029Spjd    }
702185029Spjd}
703185029Spjd
704185029Spjdint
705185029Spjdmain(void)
706185029Spjd{
707185029Spjd    dnode_phys_t dn;
708185029Spjd    off_t off;
709344399Skevans    struct zfsdsk *zdsk;
710308914Savg    int autoboot, i;
711308914Savg    int nextboot;
712308914Savg    int rc;
713185029Spjd
714208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
715208388Sjhb
716200309Sjhb    bios_getmem();
717200309Sjhb
718200309Sjhb    if (high_heap_size > 0) {
719200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
720200309Sjhb	heap_next = PTOV(high_heap_base);
721200309Sjhb    } else {
722296963Sallanjude	heap_next = (char *)dmadat + sizeof(*dmadat);
723296963Sallanjude	heap_end = (char *)PTOV(bios_basemem);
724200309Sjhb    }
725329175Skevans    setheap(heap_next, heap_end);
726200309Sjhb
727344399Skevans    zdsk = calloc(1, sizeof(struct zfsdsk));
728344399Skevans    zdsk->dsk.drive = *(uint8_t *)PTOV(ARGS);
729344399Skevans    zdsk->dsk.type = zdsk->dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD;
730344399Skevans    zdsk->dsk.unit = zdsk->dsk.drive & DRV_MASK;
731344399Skevans    zdsk->dsk.slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
732344399Skevans    zdsk->dsk.part = 0;
733344399Skevans    zdsk->dsk.start = 0;
734344399Skevans    zdsk->dsk.size = drvsize_ext(zdsk);
735185029Spjd
736185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
737185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
738200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
739200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
740185029Spjd    bootinfo.bi_memsizes_valid++;
741344399Skevans    bootinfo.bi_bios_dev = zdsk->dsk.drive;
742185029Spjd
743344399Skevans    bootdev = MAKEBOOTDEV(dev_maj[zdsk->dsk.type],
744344399Skevans			  zdsk->dsk.slice, zdsk->dsk.unit, zdsk->dsk.part);
745185029Spjd
746185029Spjd    /* Process configuration file */
747185029Spjd
748185029Spjd    autoboot = 1;
749185029Spjd
750185029Spjd    zfs_init();
751185029Spjd
752185029Spjd    /*
753185029Spjd     * Probe the boot drive first - we will try to boot from whatever
754185029Spjd     * pool we find on that drive.
755185029Spjd     */
756344399Skevans    probe_drive(zdsk);
757185029Spjd
758185029Spjd    /*
759185029Spjd     * Probe the rest of the drives that the bios knows about. This
760185029Spjd     * will find any other available pools and it may fill in missing
761185029Spjd     * vdevs for the boot pool.
762185029Spjd     */
763212805Spjd#ifndef VIRTUALBOX
764212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
765212805Spjd#else
766212805Spjd    for (i = 0; i < MAXBDDEV; i++)
767212805Spjd#endif
768212805Spjd    {
769185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
770185029Spjd	    continue;
771185029Spjd
772192194Sdfr	if (!int13probe(i | DRV_HARD))
773192194Sdfr	    break;
774192194Sdfr
775344399Skevans	zdsk = calloc(1, sizeof(struct zfsdsk));
776344399Skevans	zdsk->dsk.drive = i | DRV_HARD;
777344399Skevans	zdsk->dsk.type = zdsk->dsk.drive & TYPE_AD;
778344399Skevans	zdsk->dsk.unit = i;
779344399Skevans	zdsk->dsk.slice = 0;
780344399Skevans	zdsk->dsk.part = 0;
781344399Skevans	zdsk->dsk.start = 0;
782344399Skevans	zdsk->dsk.size = drvsize_ext(zdsk);
783344399Skevans	probe_drive(zdsk);
784185029Spjd    }
785185029Spjd
786185029Spjd    /*
787241294Savg     * The first discovered pool, if any, is the pool.
788185029Spjd     */
789241294Savg    spa = spa_get_primary();
790185029Spjd    if (!spa) {
791241294Savg	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
792241294Savg	for (;;)
793241294Savg	    ;
794185029Spjd    }
795185029Spjd
796241293Savg    primary_spa = spa;
797241293Savg    primary_vdev = spa_get_primary_vdev(spa);
798241293Savg
799308914Savg    nextboot = 0;
800308914Savg    rc  = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd));
801308914Savg    if (vdev_clear_pad2(primary_vdev))
802308914Savg	printf("failed to clear pad2 area of primary vdev\n");
803308914Savg    if (rc == 0) {
804308914Savg	if (*cmd) {
805308914Savg	    /*
806308914Savg	     * We could find an old-style ZFS Boot Block header here.
807308914Savg	     * Simply ignore it.
808308914Savg	     */
809308914Savg	    if (*(uint64_t *)cmd != 0x2f5b007b10c) {
810308914Savg		/*
811308914Savg		 * Note that parse() is destructive to cmd[] and we also want
812308914Savg		 * to honor RBX_QUIET option that could be present in cmd[].
813308914Savg		 */
814308914Savg		nextboot = 1;
815308914Savg		memcpy(cmddup, cmd, sizeof(cmd));
816329099Skevans		if (parse_cmd()) {
817308914Savg		    printf("failed to parse pad2 area of primary vdev\n");
818308914Savg		    reboot();
819308914Savg		}
820308914Savg		if (!OPT_CHECK(RBX_QUIET))
821308914Savg		    printf("zfs nextboot: %s\n", cmddup);
822308914Savg	    }
823308914Savg	    /* Do not process this command twice */
824308914Savg	    *cmd = 0;
825308914Savg	}
826308914Savg    } else
827308914Savg	printf("failed to read pad2 area of primary vdev\n");
828308914Savg
829308914Savg    /* Mount ZFS only if it's not already mounted via nextboot parsing. */
830308914Savg    if (zfsmount.spa == NULL &&
831308914Savg	(zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) {
832235329Savg	printf("%s: failed to mount default pool %s\n",
833235329Savg	    BOOTPROG, spa->spa_name);
834235329Savg	autoboot = 0;
835235329Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
836235329Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
837185029Spjd	off = 0;
838198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
839185029Spjd    }
840185029Spjd
841185029Spjd    if (*cmd) {
842234339Savg	/*
843329099Skevans	 * Note that parse_cmd() is destructive to cmd[] and we also want
844234339Savg	 * to honor RBX_QUIET option that could be present in cmd[].
845234339Savg	 */
846234339Savg	memcpy(cmddup, cmd, sizeof(cmd));
847329099Skevans	if (parse_cmd())
848185029Spjd	    autoboot = 0;
849234339Savg	if (!OPT_CHECK(RBX_QUIET))
850241288Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
851185029Spjd	/* Do not process this command twice */
852185029Spjd	*cmd = 0;
853185029Spjd    }
854185029Spjd
855308914Savg    /* Do not risk waiting at the prompt forever. */
856308914Savg    if (nextboot && !autoboot)
857308914Savg	reboot();
858308914Savg
859185029Spjd    if (autoboot && !*kname) {
860348866Skevans	/*
861348866Skevans	 * Iterate through the list of loader and kernel paths, trying to load.
862348866Skevans	 * If interrupted by a keypress, or in case of failure, drop the user
863348866Skevans	 * to the boot2 prompt.
864348866Skevans	 */
865348866Skevans	for (i = 0; i < nitems(loadpath); i++) {
866348866Skevans	    memcpy(kname, loadpath[i].p, loadpath[i].len);
867348866Skevans	    if (keyhit(3))
868348866Skevans		break;
869185029Spjd	    load();
870185029Spjd	}
871185029Spjd    }
872185029Spjd
873185029Spjd    /* Present the user with the boot2 prompt. */
874185029Spjd
875185029Spjd    for (;;) {
876235329Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
877235329Savg	    printf("\nFreeBSD/x86 boot\n");
878235329Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
879241288Savg		printf("Default: %s/<0x%llx>:%s\n"
880235329Savg		       "boot: ",
881235329Savg		       spa->spa_name, zfsmount.rootobj, kname);
882241288Savg	    else if (rootname[0] != '\0')
883241288Savg		printf("Default: %s/%s:%s\n"
884241288Savg		       "boot: ",
885241288Savg		       spa->spa_name, rootname, kname);
886235329Savg	    else
887241288Savg		printf("Default: %s:%s\n"
888235329Savg		       "boot: ",
889241288Savg		       spa->spa_name, kname);
890235329Savg	}
891185029Spjd	if (ioctrl & IO_SERIAL)
892185029Spjd	    sio_flush();
893213136Spjd	if (!autoboot || keyhit(5))
894213136Spjd	    getstr(cmd, sizeof(cmd));
895185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
896185029Spjd	    putchar('\n');
897185029Spjd	autoboot = 0;
898329099Skevans	if (parse_cmd())
899185029Spjd	    putchar('\a');
900185029Spjd	else
901185029Spjd	    load();
902185029Spjd    }
903185029Spjd}
904185029Spjd
905185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
906185029Spjdvoid
907185029Spjdexit(int x)
908185029Spjd{
909308914Savg    __exit(x);
910185029Spjd}
911185029Spjd
912308914Savgvoid
913308914Savgreboot(void)
914308914Savg{
915308914Savg    __exit(0);
916308914Savg}
917308914Savg
918185029Spjdstatic void
919185029Spjdload(void)
920185029Spjd{
921185029Spjd    union {
922185029Spjd	struct exec ex;
923185029Spjd	Elf32_Ehdr eh;
924185029Spjd    } hdr;
925185029Spjd    static Elf32_Phdr ep[2];
926185029Spjd    static Elf32_Shdr es[2];
927185029Spjd    caddr_t p;
928185029Spjd    dnode_phys_t dn;
929185029Spjd    off_t off;
930185029Spjd    uint32_t addr, x;
931185029Spjd    int fmt, i, j;
932185029Spjd
933235329Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
934235329Savg	printf("\nCan't find %s\n", kname);
935185029Spjd	return;
936185029Spjd    }
937185029Spjd    off = 0;
938185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
939185029Spjd	return;
940185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
941185029Spjd	fmt = 0;
942185029Spjd    else if (IS_ELF(hdr.eh))
943185029Spjd	fmt = 1;
944185029Spjd    else {
945185029Spjd	printf("Invalid %s\n", "format");
946185029Spjd	return;
947185029Spjd    }
948185029Spjd    if (fmt == 0) {
949185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
950185029Spjd	p = PTOV(addr);
951185029Spjd	off = PAGE_SIZE;
952185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
953185029Spjd	    return;
954185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
955185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
956185029Spjd	    return;
957185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
958185029Spjd	bootinfo.bi_symtab = VTOP(p);
959185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
960185029Spjd	p += sizeof(hdr.ex.a_syms);
961185029Spjd	if (hdr.ex.a_syms) {
962185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
963185029Spjd		return;
964185029Spjd	    p += hdr.ex.a_syms;
965185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
966185029Spjd		return;
967185029Spjd	    x = *(uint32_t *)p;
968185029Spjd	    p += sizeof(int);
969185029Spjd	    x -= sizeof(int);
970185029Spjd	    if (xfsread(&dn, &off, p, x))
971185029Spjd		return;
972185029Spjd	    p += x;
973185029Spjd	}
974185029Spjd    } else {
975185029Spjd	off = hdr.eh.e_phoff;
976185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
977185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
978185029Spjd		return;
979185029Spjd	    if (ep[j].p_type == PT_LOAD)
980185029Spjd		j++;
981185029Spjd	}
982185029Spjd	for (i = 0; i < 2; i++) {
983185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
984185029Spjd	    off = ep[i].p_offset;
985185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
986185029Spjd		return;
987185029Spjd	}
988185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
989185029Spjd	bootinfo.bi_symtab = VTOP(p);
990185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
991185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
992185029Spjd		(hdr.eh.e_shstrndx + 1);
993185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
994185029Spjd		return;
995185029Spjd	    for (i = 0; i < 2; i++) {
996185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
997185029Spjd		p += sizeof(es[i].sh_size);
998185029Spjd		off = es[i].sh_offset;
999185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
1000185029Spjd		    return;
1001185029Spjd		p += es[i].sh_size;
1002185029Spjd	    }
1003185029Spjd	}
1004185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
1005185029Spjd    }
1006185029Spjd    bootinfo.bi_esymtab = VTOP(p);
1007185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
1008235329Savg    zfsargs.size = sizeof(zfsargs);
1009235329Savg    zfsargs.pool = zfsmount.spa->spa_guid;
1010235329Savg    zfsargs.root = zfsmount.rootobj;
1011241293Savg    zfsargs.primary_pool = primary_spa->spa_guid;
1012296963Sallanjude#ifdef LOADER_GELI_SUPPORT
1013329099Skevans    explicit_bzero(gelipw, sizeof(gelipw));
1014344399Skevans    export_geli_boot_data(&zfsargs.gelidata);
1015296963Sallanjude#endif
1016241293Savg    if (primary_vdev != NULL)
1017241293Savg	zfsargs.primary_vdev = primary_vdev->v_guid;
1018241293Savg    else
1019241293Savg	printf("failed to detect primary vdev\n");
1020344399Skevans    /*
1021344399Skevans     * Note that the zfsargs struct is passed by value, not by pointer.  Code in
1022344399Skevans     * btxldr.S copies the values from the entry stack to a fixed location
1023344399Skevans     * within loader(8) at startup due to the presence of KARGS_FLAGS_EXTARG.
1024344399Skevans     */
1025185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
1026185029Spjd	   bootdev,
1027235329Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
1028185029Spjd	   (uint32_t) spa->spa_guid,
1029185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
1030235329Savg	   VTOP(&bootinfo),
1031235329Savg	   zfsargs);
1032185029Spjd}
1033185029Spjd
1034185029Spjdstatic int
1035241288Savgzfs_mount_ds(char *dsname)
1036241288Savg{
1037241288Savg    uint64_t newroot;
1038241288Savg    spa_t *newspa;
1039241288Savg    char *q;
1040241288Savg
1041241288Savg    q = strchr(dsname, '/');
1042241288Savg    if (q)
1043241288Savg	*q++ = '\0';
1044241288Savg    newspa = spa_find_by_name(dsname);
1045241288Savg    if (newspa == NULL) {
1046241288Savg	printf("\nCan't find ZFS pool %s\n", dsname);
1047241288Savg	return -1;
1048241288Savg    }
1049241288Savg
1050241288Savg    if (zfs_spa_init(newspa))
1051241288Savg	return -1;
1052241288Savg
1053241288Savg    newroot = 0;
1054241288Savg    if (q) {
1055241288Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
1056241288Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
1057241288Savg		    q, newspa->spa_name);
1058241288Savg	    return -1;
1059241288Savg	}
1060241288Savg    }
1061241288Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
1062241288Savg	printf("\nCan't mount ZFS dataset\n");
1063241288Savg	return -1;
1064241288Savg    }
1065241288Savg    spa = newspa;
1066241288Savg    return (0);
1067241288Savg}
1068241288Savg
1069241288Savgstatic int
1070329099Skevansparse_cmd(void)
1071185029Spjd{
1072185029Spjd    char *arg = cmd;
1073185029Spjd    char *ep, *p, *q;
1074185029Spjd    const char *cp;
1075185029Spjd    int c, i, j;
1076185029Spjd
1077185029Spjd    while ((c = *arg++)) {
1078185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
1079185029Spjd	    continue;
1080185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
1081185029Spjd	ep = p;
1082185029Spjd	if (*p)
1083185029Spjd	    *p++ = 0;
1084185029Spjd	if (c == '-') {
1085185029Spjd	    while ((c = *arg++)) {
1086185029Spjd		if (c == 'P') {
1087185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
1088185029Spjd			cp = "yes";
1089185029Spjd		    } else {
1090185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
1091185029Spjd			cp = "no";
1092185029Spjd		    }
1093185029Spjd		    printf("Keyboard: %s\n", cp);
1094185029Spjd		    continue;
1095185029Spjd		} else if (c == 'S') {
1096185029Spjd		    j = 0;
1097185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
1098185029Spjd			j = j * 10 + i;
1099185029Spjd		    if (j > 0 && i == -'0') {
1100185029Spjd			comspeed = j;
1101185029Spjd			break;
1102185029Spjd		    }
1103185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
1104185029Spjd		}
1105185029Spjd		for (i = 0; c != optstr[i]; i++)
1106185029Spjd		    if (i == NOPT - 1)
1107185029Spjd			return -1;
1108185029Spjd		opts ^= OPT_SET(flags[i]);
1109185029Spjd	    }
1110185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
1111185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
1112241301Savg	    if (ioctrl & IO_SERIAL) {
1113241301Savg	        if (sio_init(115200 / comspeed) != 0)
1114241301Savg		    ioctrl &= ~IO_SERIAL;
1115241301Savg	    }
1116185029Spjd	} if (c == '?') {
1117185029Spjd	    dnode_phys_t dn;
1118185029Spjd
1119235329Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
1120185029Spjd		zap_list(spa, &dn);
1121185029Spjd	    }
1122185029Spjd	    return -1;
1123185029Spjd	} else {
1124185029Spjd	    arg--;
1125185029Spjd
1126185029Spjd	    /*
1127185029Spjd	     * Report pool status if the comment is 'status'. Lets
1128185029Spjd	     * hope no-one wants to load /status as a kernel.
1129185029Spjd	     */
1130185029Spjd	    if (!strcmp(arg, "status")) {
1131185029Spjd		spa_all_status();
1132185029Spjd		return -1;
1133185029Spjd	    }
1134185029Spjd
1135185029Spjd	    /*
1136241288Savg	     * If there is "zfs:" prefix simply ignore it.
1137241288Savg	     */
1138241288Savg	    if (strncmp(arg, "zfs:", 4) == 0)
1139241288Savg		arg += 4;
1140241288Savg
1141241288Savg	    /*
1142185029Spjd	     * If there is a colon, switch pools.
1143185029Spjd	     */
1144241288Savg	    q = strchr(arg, ':');
1145185029Spjd	    if (q) {
1146241288Savg		*q++ = '\0';
1147241288Savg		if (zfs_mount_ds(arg) != 0)
1148185029Spjd		    return -1;
1149241288Savg		arg = q;
1150185029Spjd	    }
1151185029Spjd	    if ((i = ep - arg)) {
1152185029Spjd		if ((size_t)i >= sizeof(kname))
1153185029Spjd		    return -1;
1154185029Spjd		memcpy(kname, arg, i + 1);
1155185029Spjd	    }
1156185029Spjd	}
1157185029Spjd	arg = p;
1158185029Spjd    }
1159185029Spjd    return 0;
1160185029Spjd}
1161