zfsboot.c revision 298826
1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 298826 2016-04-30 00:26:38Z pfg $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30200309Sjhb#include <machine/pc/bios.h>
31185029Spjd
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd
35185029Spjd#include <a.out.h>
36185029Spjd
37185029Spjd#include <btxv86.h>
38185029Spjd
39185029Spjd#include "lib.h"
40213136Spjd#include "rbx.h"
41213136Spjd#include "drv.h"
42213136Spjd#include "util.h"
43213136Spjd#include "cons.h"
44235154Savg#include "bootargs.h"
45294765Simp#include "paths.h"
46185029Spjd
47235329Savg#include "libzfs.h"
48235329Savg
49297629Sallanjude#define ARGS			0x900
50297629Sallanjude#define NOPT			14
51297629Sallanjude#define NDEV			3
52185029Spjd
53297629Sallanjude#define BIOS_NUMDRIVES		0x475
54297629Sallanjude#define DRV_HARD		0x80
55297629Sallanjude#define DRV_MASK		0x7f
56185029Spjd
57297629Sallanjude#define TYPE_AD			0
58297629Sallanjude#define TYPE_DA			1
59297629Sallanjude#define TYPE_MAXHARD		TYPE_DA
60297629Sallanjude#define TYPE_FD			2
61185029Spjd
62297629Sallanjude#define DEV_GELIBOOT_BSIZE	4096
63297629Sallanjude
64185029Spjdextern uint32_t _end;
65185029Spjd
66185096Sdfr#ifdef GPT
67185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
68185096Sdfr#endif
69185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
70185029Spjdstatic const unsigned char flags[NOPT] = {
71185029Spjd    RBX_DUAL,
72185029Spjd    RBX_SERIAL,
73185029Spjd    RBX_ASKNAME,
74185029Spjd    RBX_CDROM,
75185029Spjd    RBX_CONFIG,
76185029Spjd    RBX_KDB,
77185029Spjd    RBX_GDB,
78185029Spjd    RBX_MUTE,
79185029Spjd    RBX_NOINTR,
80185029Spjd    RBX_PAUSE,
81185029Spjd    RBX_QUIET,
82185029Spjd    RBX_DFLTROOT,
83185029Spjd    RBX_SINGLE,
84185029Spjd    RBX_VERBOSE
85185029Spjd};
86213136Spjduint32_t opts;
87185029Spjd
88185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
89185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
90185029Spjd
91185029Spjdstatic char cmd[512];
92234339Savgstatic char cmddup[512];
93185029Spjdstatic char kname[1024];
94235329Savgstatic char rootname[256];
95185029Spjdstatic int comspeed = SIOSPD;
96185029Spjdstatic struct bootinfo bootinfo;
97185029Spjdstatic uint32_t bootdev;
98235329Savgstatic struct zfs_boot_args zfsargs;
99235329Savgstatic struct zfsmount zfsmount;
100185029Spjd
101200309Sjhbvm_offset_t	high_heap_base;
102200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
103200309Sjhb
104200309Sjhbstatic struct bios_smap smap;
105200309Sjhb
106200309Sjhb/*
107200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
108200309Sjhb */
109297629Sallanjude#define	HEAP_MIN		(3 * 1024 * 1024)
110200309Sjhb
111200309Sjhbstatic char *heap_next;
112200309Sjhbstatic char *heap_end;
113200309Sjhb
114185029Spjd/* Buffers that must not span a 64k boundary. */
115297629Sallanjude#define READ_BUF_SIZE		8192
116185029Spjdstruct dmadat {
117185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
118185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
119185029Spjd};
120185029Spjdstatic struct dmadat *dmadat;
121185029Spjd
122185029Spjdvoid exit(int);
123185029Spjdstatic void load(void);
124185029Spjdstatic int parse(void);
125200309Sjhbstatic void bios_getmem(void);
126296963Sallanjudevoid *malloc(size_t n);
127296963Sallanjudevoid free(void *ptr);
128185029Spjd
129296963Sallanjudevoid *
130185029Spjdmalloc(size_t n)
131185029Spjd{
132185029Spjd	char *p = heap_next;
133185029Spjd	if (p + n > heap_end) {
134185029Spjd		printf("malloc failure\n");
135185029Spjd		for (;;)
136185029Spjd		    ;
137296963Sallanjude		/* NOTREACHED */
138296963Sallanjude		return (0);
139185029Spjd	}
140185029Spjd	heap_next += n;
141296963Sallanjude	return (p);
142185029Spjd}
143185029Spjd
144296963Sallanjudevoid
145296963Sallanjudefree(void *ptr)
146296963Sallanjude{
147296963Sallanjude
148296963Sallanjude	return;
149296963Sallanjude}
150296963Sallanjude
151185029Spjdstatic char *
152185029Spjdstrdup(const char *s)
153185029Spjd{
154185029Spjd	char *p = malloc(strlen(s) + 1);
155185029Spjd	strcpy(p, s);
156296963Sallanjude	return (p);
157185029Spjd}
158185029Spjd
159296963Sallanjude#ifdef LOADER_GELI_SUPPORT
160296963Sallanjude#include "geliboot.c"
161296963Sallanjudestatic char gelipw[GELI_PW_MAXLEN];
162296963Sallanjude#endif
163296963Sallanjude
164185029Spjd#include "zfsimpl.c"
165185029Spjd
166185029Spjd/*
167185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
168185029Spjd */
169185029Spjdstatic int
170185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
171185029Spjd{
172185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
173185029Spjd	size_t n;
174185029Spjd	int rc;
175185029Spjd
176185029Spjd	n = size;
177185029Spjd	if (*offp + n > zp->zp_size)
178185029Spjd		n = zp->zp_size - *offp;
179185029Spjd
180185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
181185029Spjd	if (rc)
182185029Spjd		return (-1);
183185029Spjd	*offp += n;
184185029Spjd
185185029Spjd	return (n);
186185029Spjd}
187185029Spjd
188185029Spjd/*
189185029Spjd * Current ZFS pool
190185029Spjd */
191235329Savgstatic spa_t *spa;
192241293Savgstatic spa_t *primary_spa;
193241293Savgstatic vdev_t *primary_vdev;
194185029Spjd
195185029Spjd/*
196185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
197185029Spjd * buffer pointer crosses a 64k boundary.
198185029Spjd */
199185029Spjdstatic int
200185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
201185029Spjd{
202185029Spjd	char *p;
203297629Sallanjude	daddr_t lba, alignlba;
204297629Sallanjude	off_t alignoff, diff;
205297629Sallanjude	unsigned int nb, alignnb;
206185029Spjd	struct dsk *dsk = (struct dsk *) priv;
207185029Spjd
208185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
209185029Spjd		return -1;
210185029Spjd
211185029Spjd	p = buf;
212185029Spjd	lba = off / DEV_BSIZE;
213213136Spjd	lba += dsk->start;
214297629Sallanjude	/* Align reads to 4k else 4k sector GELIs will not decrypt. */
215297629Sallanjude	alignoff = off & ~ (off_t)(DEV_GELIBOOT_BSIZE - 1);
216297629Sallanjude	/* Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. */
217297629Sallanjude	alignlba = alignoff / DEV_BSIZE;
218297629Sallanjude	/*
219297629Sallanjude	 * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the
220297629Sallanjude	 * start of the GELI partition, not the start of the actual disk.
221297629Sallanjude	 */
222297629Sallanjude	alignlba += dsk->start;
223297629Sallanjude	diff = (lba - alignlba) * DEV_BSIZE;
224297629Sallanjude
225185029Spjd	while (bytes > 0) {
226185029Spjd		nb = bytes / DEV_BSIZE;
227185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
228185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
229297629Sallanjude		/*
230297629Sallanjude		 * Ensure that the read size plus the leading offset does not
231297629Sallanjude		 * exceed the size of the read buffer.
232297629Sallanjude		 */
233297629Sallanjude		if (nb * DEV_BSIZE + diff > READ_BUF_SIZE)
234297629Sallanjude		    nb -= diff / DEV_BSIZE;
235297629Sallanjude		/*
236297629Sallanjude		 * Round the number of blocks to read up to the nearest multiple
237297629Sallanjude		 * of DEV_GELIBOOT_BSIZE.
238297629Sallanjude		 */
239297629Sallanjude		alignnb = nb + (diff / DEV_BSIZE) +
240297629Sallanjude		    (DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1) & ~
241297629Sallanjude		    (unsigned int)(DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1);
242297629Sallanjude
243297629Sallanjude		if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb))
244185029Spjd			return -1;
245296963Sallanjude#ifdef LOADER_GELI_SUPPORT
246296963Sallanjude		/* decrypt */
247296963Sallanjude		if (is_geli(dsk) == 0) {
248297629Sallanjude			if (geli_read(dsk, ((alignlba - dsk->start) *
249297629Sallanjude			    DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE))
250297629Sallanjude				return (-1);
251296963Sallanjude		}
252296963Sallanjude#endif
253297629Sallanjude		memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE);
254185029Spjd		p += nb * DEV_BSIZE;
255185029Spjd		lba += nb;
256297629Sallanjude		alignlba += alignnb;
257185029Spjd		bytes -= nb * DEV_BSIZE;
258297629Sallanjude		/* Don't need the leading offset after the first block. */
259297629Sallanjude		diff = 0;
260185029Spjd	}
261185029Spjd
262185029Spjd	return 0;
263185029Spjd}
264185029Spjd
265185029Spjdstatic int
266185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
267185029Spjd{
268185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
269235329Savg	printf("Invalid format\n");
270185029Spjd	return -1;
271185029Spjd    }
272185029Spjd    return 0;
273185029Spjd}
274185029Spjd
275200309Sjhbstatic void
276200309Sjhbbios_getmem(void)
277185029Spjd{
278200309Sjhb    uint64_t size;
279185029Spjd
280200309Sjhb    /* Parse system memory map */
281200309Sjhb    v86.ebx = 0;
282200309Sjhb    do {
283200309Sjhb	v86.ctl = V86_FLAGS;
284200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
285200309Sjhb	v86.eax = 0xe820;
286200309Sjhb	v86.ecx = sizeof(struct bios_smap);
287200309Sjhb	v86.edx = SMAP_SIG;
288200309Sjhb	v86.es = VTOPSEG(&smap);
289200309Sjhb	v86.edi = VTOPOFF(&smap);
290200309Sjhb	v86int();
291292682Sjhb	if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
292200309Sjhb	    break;
293200309Sjhb	/* look for a low-memory segment that's large enough */
294200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
295200309Sjhb	    (smap.length >= (512 * 1024)))
296200309Sjhb	    bios_basemem = smap.length;
297200309Sjhb	/* look for the first segment in 'extended' memory */
298200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
299200309Sjhb	    bios_extmem = smap.length;
300200309Sjhb	}
301200309Sjhb
302200309Sjhb	/*
303200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
304200309Sjhb	 * 1MB but below 4GB.
305200309Sjhb	 */
306200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
307200309Sjhb	    (smap.base < 0x100000000ull)) {
308200309Sjhb	    size = smap.length;
309200309Sjhb
310200309Sjhb	    /*
311200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
312200309Sjhb	     */
313200309Sjhb	    if (smap.base + size > 0x100000000ull)
314200309Sjhb		size = 0x100000000ull - smap.base;
315200309Sjhb
316200309Sjhb	    if (size > high_heap_size) {
317200309Sjhb		high_heap_size = size;
318200309Sjhb		high_heap_base = smap.base;
319200309Sjhb	    }
320200309Sjhb	}
321200309Sjhb    } while (v86.ebx != 0);
322200309Sjhb
323200309Sjhb    /* Fall back to the old compatibility function for base memory */
324200309Sjhb    if (bios_basemem == 0) {
325200309Sjhb	v86.ctl = 0;
326200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
327200309Sjhb	v86int();
328200309Sjhb
329200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
330200309Sjhb    }
331200309Sjhb
332200309Sjhb    /* Fall back through several compatibility functions for extended memory */
333200309Sjhb    if (bios_extmem == 0) {
334200309Sjhb	v86.ctl = V86_FLAGS;
335200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
336200309Sjhb	v86.eax = 0xe801;
337200309Sjhb	v86int();
338292682Sjhb	if (!V86_CY(v86.efl)) {
339200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
340200309Sjhb	}
341200309Sjhb    }
342200309Sjhb    if (bios_extmem == 0) {
343200309Sjhb	v86.ctl = 0;
344200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
345200309Sjhb	v86.eax = 0x8800;
346200309Sjhb	v86int();
347200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
348200309Sjhb    }
349200309Sjhb
350200309Sjhb    /*
351200309Sjhb     * If we have extended memory and did not find a suitable heap
352200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
353200309Sjhb     * high heap candidate.
354200309Sjhb     */
355200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
356200309Sjhb	high_heap_size = HEAP_MIN;
357200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
358200309Sjhb    }
359296963Sallanjude}
360200309Sjhb
361185029Spjd/*
362185029Spjd * Try to detect a device supported by the legacy int13 BIOS
363185029Spjd */
364185029Spjdstatic int
365185029Spjdint13probe(int drive)
366185029Spjd{
367185029Spjd    v86.ctl = V86_FLAGS;
368185029Spjd    v86.addr = 0x13;
369185029Spjd    v86.eax = 0x800;
370185029Spjd    v86.edx = drive;
371185029Spjd    v86int();
372185029Spjd
373292682Sjhb    if (!V86_CY(v86.efl) &&				/* carry clear */
374185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
375185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
376185029Spjd		return(0);				/* skip device */
377185029Spjd	}
378185029Spjd	return (1);
379185029Spjd    }
380185029Spjd    return(0);
381185029Spjd}
382185029Spjd
383192194Sdfr/*
384192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
385192194Sdfr * structure so we must make a new one.
386192194Sdfr */
387192194Sdfrstatic struct dsk *
388192194Sdfrcopy_dsk(struct dsk *dsk)
389192194Sdfr{
390192194Sdfr    struct dsk *newdsk;
391192194Sdfr
392192194Sdfr    newdsk = malloc(sizeof(struct dsk));
393192194Sdfr    *newdsk = *dsk;
394192194Sdfr    return (newdsk);
395192194Sdfr}
396192194Sdfr
397185029Spjdstatic void
398241294Savgprobe_drive(struct dsk *dsk)
399185029Spjd{
400185096Sdfr#ifdef GPT
401185096Sdfr    struct gpt_hdr hdr;
402185096Sdfr    struct gpt_ent *ent;
403185096Sdfr    unsigned part, entries_per_sec;
404185096Sdfr#endif
405296963Sallanjude    daddr_t slba, elba;
406185029Spjd    struct dos_partition *dp;
407185029Spjd    char *sec;
408185029Spjd    unsigned i;
409185029Spjd
410185029Spjd    /*
411296963Sallanjude     * If we find a vdev on the whole disk, stop here.
412185029Spjd     */
413241294Savg    if (vdev_probe(vdev_read, dsk, NULL) == 0)
414185029Spjd	return;
415185029Spjd
416296963Sallanjude#ifdef LOADER_GELI_SUPPORT
417296963Sallanjude    /*
418296963Sallanjude     * Taste the disk, if it is GELI encrypted, decrypt it and check to see if
419296963Sallanjude     * it is a usable vdev then. Otherwise dig
420296963Sallanjude     * out the partition table and probe each slice/partition
421296963Sallanjude     * in turn for a vdev or GELI encrypted vdev.
422296963Sallanjude     */
423296963Sallanjude    elba = drvsize(dsk);
424296963Sallanjude    if (elba > 0) {
425296963Sallanjude	elba--;
426296963Sallanjude    }
427296963Sallanjude    if (geli_taste(vdev_read, dsk, elba) == 0) {
428296963Sallanjude	if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) {
429296963Sallanjude	    if (vdev_probe(vdev_read, dsk, NULL) == 0) {
430296963Sallanjude		return;
431296963Sallanjude	    }
432296963Sallanjude	}
433296963Sallanjude    }
434296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
435296963Sallanjude
436185029Spjd    sec = dmadat->secbuf;
437185029Spjd    dsk->start = 0;
438185096Sdfr
439185096Sdfr#ifdef GPT
440185096Sdfr    /*
441185096Sdfr     * First check for GPT.
442185096Sdfr     */
443185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
444185096Sdfr	return;
445185096Sdfr    }
446185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
447185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
448185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
449185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
450185096Sdfr	goto trymbr;
451185096Sdfr    }
452185096Sdfr
453185096Sdfr    /*
454298826Spfg     * Probe all GPT partitions for the presence of ZFS pools. We
455185096Sdfr     * return the spa_t for the first we find (if requested). This
456185096Sdfr     * will have the effect of booting from the first pool on the
457185096Sdfr     * disk.
458296963Sallanjude     *
459296963Sallanjude     * If no vdev is found, GELI decrypting the device and try again
460185096Sdfr     */
461185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
462185096Sdfr    slba = hdr.hdr_lba_table;
463185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
464185096Sdfr    while (slba < elba) {
465198420Srnoland	dsk->start = 0;
466185096Sdfr	if (drvread(dsk, sec, slba, 1))
467185096Sdfr	    return;
468185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
469185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
470185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
471185096Sdfr		     sizeof(uuid_t)) == 0) {
472185096Sdfr		dsk->start = ent->ent_lba_start;
473296963Sallanjude		dsk->slice = part + 1;
474296963Sallanjude		dsk->part = 255;
475241294Savg		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
476185096Sdfr		    /*
477185096Sdfr		     * This slice had a vdev. We need a new dsk
478185096Sdfr		     * structure now since the vdev now owns this one.
479185096Sdfr		     */
480192194Sdfr		    dsk = copy_dsk(dsk);
481185096Sdfr		}
482296963Sallanjude#ifdef LOADER_GELI_SUPPORT
483296963Sallanjude		else if (geli_taste(vdev_read, dsk, ent->ent_lba_end -
484296963Sallanjude			 ent->ent_lba_start) == 0) {
485296963Sallanjude		    if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) {
486296963Sallanjude			/*
487296963Sallanjude			 * This slice has GELI, check it for ZFS.
488296963Sallanjude			 */
489296963Sallanjude			if (vdev_probe(vdev_read, dsk, NULL) == 0) {
490296963Sallanjude			    /*
491296963Sallanjude			     * This slice had a vdev. We need a new dsk
492296963Sallanjude			     * structure now since the vdev now owns this one.
493296963Sallanjude			     */
494296963Sallanjude			    dsk = copy_dsk(dsk);
495296963Sallanjude			}
496296963Sallanjude			break;
497296963Sallanjude		    }
498296963Sallanjude		}
499296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
500185096Sdfr	    }
501185096Sdfr	}
502185096Sdfr	slba++;
503185096Sdfr    }
504185096Sdfr    return;
505185096Sdfrtrymbr:
506296963Sallanjude#endif /* GPT */
507185096Sdfr
508185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
509185029Spjd	return;
510185029Spjd    dp = (void *)(sec + DOSPARTOFF);
511185029Spjd
512185029Spjd    for (i = 0; i < NDOSPART; i++) {
513185029Spjd	if (!dp[i].dp_typ)
514185029Spjd	    continue;
515185029Spjd	dsk->start = dp[i].dp_start;
516296963Sallanjude	dsk->slice = i + 1;
517241294Savg	if (vdev_probe(vdev_read, dsk, NULL) == 0) {
518192194Sdfr	    dsk = copy_dsk(dsk);
519185029Spjd	}
520296963Sallanjude#ifdef LOADER_GELI_SUPPORT
521296963Sallanjude	else if (geli_taste(vdev_read, dsk, dp[i].dp_size -
522296963Sallanjude		 dp[i].dp_start) == 0) {
523296963Sallanjude	    if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) {
524296963Sallanjude		/*
525296963Sallanjude		 * This slice has GELI, check it for ZFS.
526296963Sallanjude		 */
527296963Sallanjude		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
528296963Sallanjude		    /*
529296963Sallanjude		     * This slice had a vdev. We need a new dsk
530296963Sallanjude		     * structure now since the vdev now owns this one.
531296963Sallanjude		     */
532296963Sallanjude		    dsk = copy_dsk(dsk);
533296963Sallanjude		}
534296963Sallanjude		break;
535296963Sallanjude	    }
536296963Sallanjude	}
537296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
538185029Spjd    }
539185029Spjd}
540185029Spjd
541185029Spjdint
542185029Spjdmain(void)
543185029Spjd{
544185029Spjd    int autoboot, i;
545185029Spjd    dnode_phys_t dn;
546185029Spjd    off_t off;
547185029Spjd    struct dsk *dsk;
548185029Spjd
549208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
550208388Sjhb
551200309Sjhb    bios_getmem();
552200309Sjhb
553200309Sjhb    if (high_heap_size > 0) {
554200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
555200309Sjhb	heap_next = PTOV(high_heap_base);
556200309Sjhb    } else {
557296963Sallanjude	heap_next = (char *)dmadat + sizeof(*dmadat);
558296963Sallanjude	heap_end = (char *)PTOV(bios_basemem);
559200309Sjhb    }
560200309Sjhb
561185029Spjd    dsk = malloc(sizeof(struct dsk));
562185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
563185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
564185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
565185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
566185029Spjd    dsk->part = 0;
567185029Spjd    dsk->start = 0;
568185029Spjd    dsk->init = 0;
569185029Spjd
570185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
571185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
572200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
573200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
574185029Spjd    bootinfo.bi_memsizes_valid++;
575185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
576185029Spjd
577185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
578185029Spjd			  dsk->slice, dsk->unit, dsk->part),
579185029Spjd
580185029Spjd    /* Process configuration file */
581185029Spjd
582185029Spjd    autoboot = 1;
583185029Spjd
584296963Sallanjude#ifdef LOADER_GELI_SUPPORT
585296963Sallanjude    geli_init();
586296963Sallanjude#endif
587185029Spjd    zfs_init();
588185029Spjd
589185029Spjd    /*
590185029Spjd     * Probe the boot drive first - we will try to boot from whatever
591185029Spjd     * pool we find on that drive.
592185029Spjd     */
593241294Savg    probe_drive(dsk);
594185029Spjd
595185029Spjd    /*
596185029Spjd     * Probe the rest of the drives that the bios knows about. This
597185029Spjd     * will find any other available pools and it may fill in missing
598185029Spjd     * vdevs for the boot pool.
599185029Spjd     */
600212805Spjd#ifndef VIRTUALBOX
601212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
602212805Spjd#else
603212805Spjd    for (i = 0; i < MAXBDDEV; i++)
604212805Spjd#endif
605212805Spjd    {
606185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
607185029Spjd	    continue;
608185029Spjd
609192194Sdfr	if (!int13probe(i | DRV_HARD))
610192194Sdfr	    break;
611192194Sdfr
612185029Spjd	dsk = malloc(sizeof(struct dsk));
613185029Spjd	dsk->drive = i | DRV_HARD;
614185029Spjd	dsk->type = dsk->drive & TYPE_AD;
615185029Spjd	dsk->unit = i;
616185029Spjd	dsk->slice = 0;
617185029Spjd	dsk->part = 0;
618185029Spjd	dsk->start = 0;
619185029Spjd	dsk->init = 0;
620241294Savg	probe_drive(dsk);
621185029Spjd    }
622185029Spjd
623185029Spjd    /*
624241294Savg     * The first discovered pool, if any, is the pool.
625185029Spjd     */
626241294Savg    spa = spa_get_primary();
627185029Spjd    if (!spa) {
628241294Savg	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
629241294Savg	for (;;)
630241294Savg	    ;
631185029Spjd    }
632185029Spjd
633241293Savg    primary_spa = spa;
634241293Savg    primary_vdev = spa_get_primary_vdev(spa);
635241293Savg
636235329Savg    if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) {
637235329Savg	printf("%s: failed to mount default pool %s\n",
638235329Savg	    BOOTPROG, spa->spa_name);
639235329Savg	autoboot = 0;
640235329Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
641235329Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
642185029Spjd	off = 0;
643198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
644185029Spjd    }
645185029Spjd
646185029Spjd    if (*cmd) {
647234339Savg	/*
648234339Savg	 * Note that parse() is destructive to cmd[] and we also want
649234339Savg	 * to honor RBX_QUIET option that could be present in cmd[].
650234339Savg	 */
651234339Savg	memcpy(cmddup, cmd, sizeof(cmd));
652185029Spjd	if (parse())
653185029Spjd	    autoboot = 0;
654234339Savg	if (!OPT_CHECK(RBX_QUIET))
655241288Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
656185029Spjd	/* Do not process this command twice */
657185029Spjd	*cmd = 0;
658185029Spjd    }
659185029Spjd
660185029Spjd    /*
661294925Simp     * Try to exec /boot/loader. If interrupted by a keypress,
662185029Spjd     * or in case of failure, try to load a kernel directly instead.
663185029Spjd     */
664185029Spjd
665185029Spjd    if (autoboot && !*kname) {
666294925Simp	memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
667213136Spjd	if (!keyhit(3)) {
668185029Spjd	    load();
669185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
670185029Spjd	}
671185029Spjd    }
672185029Spjd
673185029Spjd    /* Present the user with the boot2 prompt. */
674185029Spjd
675185029Spjd    for (;;) {
676235329Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
677235329Savg	    printf("\nFreeBSD/x86 boot\n");
678235329Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
679241288Savg		printf("Default: %s/<0x%llx>:%s\n"
680235329Savg		       "boot: ",
681235329Savg		       spa->spa_name, zfsmount.rootobj, kname);
682241288Savg	    else if (rootname[0] != '\0')
683241288Savg		printf("Default: %s/%s:%s\n"
684241288Savg		       "boot: ",
685241288Savg		       spa->spa_name, rootname, kname);
686235329Savg	    else
687241288Savg		printf("Default: %s:%s\n"
688235329Savg		       "boot: ",
689241288Savg		       spa->spa_name, kname);
690235329Savg	}
691185029Spjd	if (ioctrl & IO_SERIAL)
692185029Spjd	    sio_flush();
693213136Spjd	if (!autoboot || keyhit(5))
694213136Spjd	    getstr(cmd, sizeof(cmd));
695185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
696185029Spjd	    putchar('\n');
697185029Spjd	autoboot = 0;
698185029Spjd	if (parse())
699185029Spjd	    putchar('\a');
700185029Spjd	else
701185029Spjd	    load();
702185029Spjd    }
703185029Spjd}
704185029Spjd
705185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
706185029Spjdvoid
707185029Spjdexit(int x)
708185029Spjd{
709185029Spjd}
710185029Spjd
711185029Spjdstatic void
712185029Spjdload(void)
713185029Spjd{
714185029Spjd    union {
715185029Spjd	struct exec ex;
716185029Spjd	Elf32_Ehdr eh;
717185029Spjd    } hdr;
718185029Spjd    static Elf32_Phdr ep[2];
719185029Spjd    static Elf32_Shdr es[2];
720185029Spjd    caddr_t p;
721185029Spjd    dnode_phys_t dn;
722185029Spjd    off_t off;
723185029Spjd    uint32_t addr, x;
724185029Spjd    int fmt, i, j;
725185029Spjd
726235329Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
727235329Savg	printf("\nCan't find %s\n", kname);
728185029Spjd	return;
729185029Spjd    }
730185029Spjd    off = 0;
731185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
732185029Spjd	return;
733185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
734185029Spjd	fmt = 0;
735185029Spjd    else if (IS_ELF(hdr.eh))
736185029Spjd	fmt = 1;
737185029Spjd    else {
738185029Spjd	printf("Invalid %s\n", "format");
739185029Spjd	return;
740185029Spjd    }
741185029Spjd    if (fmt == 0) {
742185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
743185029Spjd	p = PTOV(addr);
744185029Spjd	off = PAGE_SIZE;
745185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
746185029Spjd	    return;
747185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
748185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
749185029Spjd	    return;
750185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
751185029Spjd	bootinfo.bi_symtab = VTOP(p);
752185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
753185029Spjd	p += sizeof(hdr.ex.a_syms);
754185029Spjd	if (hdr.ex.a_syms) {
755185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
756185029Spjd		return;
757185029Spjd	    p += hdr.ex.a_syms;
758185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
759185029Spjd		return;
760185029Spjd	    x = *(uint32_t *)p;
761185029Spjd	    p += sizeof(int);
762185029Spjd	    x -= sizeof(int);
763185029Spjd	    if (xfsread(&dn, &off, p, x))
764185029Spjd		return;
765185029Spjd	    p += x;
766185029Spjd	}
767185029Spjd    } else {
768185029Spjd	off = hdr.eh.e_phoff;
769185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
770185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
771185029Spjd		return;
772185029Spjd	    if (ep[j].p_type == PT_LOAD)
773185029Spjd		j++;
774185029Spjd	}
775185029Spjd	for (i = 0; i < 2; i++) {
776185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
777185029Spjd	    off = ep[i].p_offset;
778185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
779185029Spjd		return;
780185029Spjd	}
781185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
782185029Spjd	bootinfo.bi_symtab = VTOP(p);
783185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
784185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
785185029Spjd		(hdr.eh.e_shstrndx + 1);
786185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
787185029Spjd		return;
788185029Spjd	    for (i = 0; i < 2; i++) {
789185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
790185029Spjd		p += sizeof(es[i].sh_size);
791185029Spjd		off = es[i].sh_offset;
792185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
793185029Spjd		    return;
794185029Spjd		p += es[i].sh_size;
795185029Spjd	    }
796185029Spjd	}
797185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
798185029Spjd    }
799185029Spjd    bootinfo.bi_esymtab = VTOP(p);
800185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
801235329Savg    zfsargs.size = sizeof(zfsargs);
802235329Savg    zfsargs.pool = zfsmount.spa->spa_guid;
803235329Savg    zfsargs.root = zfsmount.rootobj;
804241293Savg    zfsargs.primary_pool = primary_spa->spa_guid;
805296963Sallanjude#ifdef LOADER_GELI_SUPPORT
806296963Sallanjude    bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw));
807296963Sallanjude    bzero(gelipw, sizeof(gelipw));
808296963Sallanjude#else
809296963Sallanjude    zfsargs.gelipw[0] = '\0';
810296963Sallanjude#endif
811241293Savg    if (primary_vdev != NULL)
812241293Savg	zfsargs.primary_vdev = primary_vdev->v_guid;
813241293Savg    else
814241293Savg	printf("failed to detect primary vdev\n");
815185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
816185029Spjd	   bootdev,
817235329Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
818185029Spjd	   (uint32_t) spa->spa_guid,
819185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
820235329Savg	   VTOP(&bootinfo),
821235329Savg	   zfsargs);
822185029Spjd}
823185029Spjd
824185029Spjdstatic int
825241288Savgzfs_mount_ds(char *dsname)
826241288Savg{
827241288Savg    uint64_t newroot;
828241288Savg    spa_t *newspa;
829241288Savg    char *q;
830241288Savg
831241288Savg    q = strchr(dsname, '/');
832241288Savg    if (q)
833241288Savg	*q++ = '\0';
834241288Savg    newspa = spa_find_by_name(dsname);
835241288Savg    if (newspa == NULL) {
836241288Savg	printf("\nCan't find ZFS pool %s\n", dsname);
837241288Savg	return -1;
838241288Savg    }
839241288Savg
840241288Savg    if (zfs_spa_init(newspa))
841241288Savg	return -1;
842241288Savg
843241288Savg    newroot = 0;
844241288Savg    if (q) {
845241288Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
846241288Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
847241288Savg		    q, newspa->spa_name);
848241288Savg	    return -1;
849241288Savg	}
850241288Savg    }
851241288Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
852241288Savg	printf("\nCan't mount ZFS dataset\n");
853241288Savg	return -1;
854241288Savg    }
855241288Savg    spa = newspa;
856241288Savg    return (0);
857241288Savg}
858241288Savg
859241288Savgstatic int
860213136Spjdparse(void)
861185029Spjd{
862185029Spjd    char *arg = cmd;
863185029Spjd    char *ep, *p, *q;
864185029Spjd    const char *cp;
865185029Spjd    int c, i, j;
866185029Spjd
867185029Spjd    while ((c = *arg++)) {
868185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
869185029Spjd	    continue;
870185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
871185029Spjd	ep = p;
872185029Spjd	if (*p)
873185029Spjd	    *p++ = 0;
874185029Spjd	if (c == '-') {
875185029Spjd	    while ((c = *arg++)) {
876185029Spjd		if (c == 'P') {
877185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
878185029Spjd			cp = "yes";
879185029Spjd		    } else {
880185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
881185029Spjd			cp = "no";
882185029Spjd		    }
883185029Spjd		    printf("Keyboard: %s\n", cp);
884185029Spjd		    continue;
885185029Spjd		} else if (c == 'S') {
886185029Spjd		    j = 0;
887185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
888185029Spjd			j = j * 10 + i;
889185029Spjd		    if (j > 0 && i == -'0') {
890185029Spjd			comspeed = j;
891185029Spjd			break;
892185029Spjd		    }
893185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
894185029Spjd		}
895185029Spjd		for (i = 0; c != optstr[i]; i++)
896185029Spjd		    if (i == NOPT - 1)
897185029Spjd			return -1;
898185029Spjd		opts ^= OPT_SET(flags[i]);
899185029Spjd	    }
900185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
901185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
902241301Savg	    if (ioctrl & IO_SERIAL) {
903241301Savg	        if (sio_init(115200 / comspeed) != 0)
904241301Savg		    ioctrl &= ~IO_SERIAL;
905241301Savg	    }
906185029Spjd	} if (c == '?') {
907185029Spjd	    dnode_phys_t dn;
908185029Spjd
909235329Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
910185029Spjd		zap_list(spa, &dn);
911185029Spjd	    }
912185029Spjd	    return -1;
913185029Spjd	} else {
914185029Spjd	    arg--;
915185029Spjd
916185029Spjd	    /*
917185029Spjd	     * Report pool status if the comment is 'status'. Lets
918185029Spjd	     * hope no-one wants to load /status as a kernel.
919185029Spjd	     */
920185029Spjd	    if (!strcmp(arg, "status")) {
921185029Spjd		spa_all_status();
922185029Spjd		return -1;
923185029Spjd	    }
924185029Spjd
925185029Spjd	    /*
926241288Savg	     * If there is "zfs:" prefix simply ignore it.
927241288Savg	     */
928241288Savg	    if (strncmp(arg, "zfs:", 4) == 0)
929241288Savg		arg += 4;
930241288Savg
931241288Savg	    /*
932185029Spjd	     * If there is a colon, switch pools.
933185029Spjd	     */
934241288Savg	    q = strchr(arg, ':');
935185029Spjd	    if (q) {
936241288Savg		*q++ = '\0';
937241288Savg		if (zfs_mount_ds(arg) != 0)
938185029Spjd		    return -1;
939241288Savg		arg = q;
940185029Spjd	    }
941185029Spjd	    if ((i = ep - arg)) {
942185029Spjd		if ((size_t)i >= sizeof(kname))
943185029Spjd		    return -1;
944185029Spjd		memcpy(kname, arg, i + 1);
945185029Spjd	    }
946185029Spjd	}
947185029Spjd	arg = p;
948185029Spjd    }
949185029Spjd    return 0;
950185029Spjd}
951