zfsboot.c revision 296963
1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 296963 2016-03-16 23:12:19Z allanjude $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30200309Sjhb#include <machine/pc/bios.h>
31185029Spjd
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd
35185029Spjd#include <a.out.h>
36185029Spjd
37185029Spjd#include <btxv86.h>
38185029Spjd
39185029Spjd#include "lib.h"
40213136Spjd#include "rbx.h"
41213136Spjd#include "drv.h"
42213136Spjd#include "util.h"
43213136Spjd#include "cons.h"
44235154Savg#include "bootargs.h"
45294765Simp#include "paths.h"
46185029Spjd
47235329Savg#include "libzfs.h"
48235329Savg
49185029Spjd#define ARGS		0x900
50185029Spjd#define NOPT		14
51185029Spjd#define NDEV		3
52185029Spjd
53212805Spjd#define BIOS_NUMDRIVES	0x475
54185029Spjd#define DRV_HARD	0x80
55185029Spjd#define DRV_MASK	0x7f
56185029Spjd
57185029Spjd#define TYPE_AD		0
58185029Spjd#define TYPE_DA		1
59185029Spjd#define TYPE_MAXHARD	TYPE_DA
60185029Spjd#define TYPE_FD		2
61185029Spjd
62185029Spjdextern uint32_t _end;
63185029Spjd
64185096Sdfr#ifdef GPT
65185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
66185096Sdfr#endif
67185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
68185029Spjdstatic const unsigned char flags[NOPT] = {
69185029Spjd    RBX_DUAL,
70185029Spjd    RBX_SERIAL,
71185029Spjd    RBX_ASKNAME,
72185029Spjd    RBX_CDROM,
73185029Spjd    RBX_CONFIG,
74185029Spjd    RBX_KDB,
75185029Spjd    RBX_GDB,
76185029Spjd    RBX_MUTE,
77185029Spjd    RBX_NOINTR,
78185029Spjd    RBX_PAUSE,
79185029Spjd    RBX_QUIET,
80185029Spjd    RBX_DFLTROOT,
81185029Spjd    RBX_SINGLE,
82185029Spjd    RBX_VERBOSE
83185029Spjd};
84213136Spjduint32_t opts;
85185029Spjd
86185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"};
87185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
88185029Spjd
89185029Spjdstatic char cmd[512];
90234339Savgstatic char cmddup[512];
91185029Spjdstatic char kname[1024];
92235329Savgstatic char rootname[256];
93185029Spjdstatic int comspeed = SIOSPD;
94185029Spjdstatic struct bootinfo bootinfo;
95185029Spjdstatic uint32_t bootdev;
96235329Savgstatic struct zfs_boot_args zfsargs;
97235329Savgstatic struct zfsmount zfsmount;
98185029Spjd
99200309Sjhbvm_offset_t	high_heap_base;
100200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
101200309Sjhb
102200309Sjhbstatic struct bios_smap smap;
103200309Sjhb
104200309Sjhb/*
105200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
106200309Sjhb */
107200309Sjhb#define	HEAP_MIN	(3 * 1024 * 1024)
108200309Sjhb
109200309Sjhbstatic char *heap_next;
110200309Sjhbstatic char *heap_end;
111200309Sjhb
112185029Spjd/* Buffers that must not span a 64k boundary. */
113185029Spjd#define READ_BUF_SIZE	8192
114185029Spjdstruct dmadat {
115185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
116185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
117185029Spjd};
118185029Spjdstatic struct dmadat *dmadat;
119185029Spjd
120185029Spjdvoid exit(int);
121185029Spjdstatic void load(void);
122185029Spjdstatic int parse(void);
123200309Sjhbstatic void bios_getmem(void);
124296963Sallanjudevoid *malloc(size_t n);
125296963Sallanjudevoid free(void *ptr);
126185029Spjd
127296963Sallanjudevoid *
128185029Spjdmalloc(size_t n)
129185029Spjd{
130185029Spjd	char *p = heap_next;
131185029Spjd	if (p + n > heap_end) {
132185029Spjd		printf("malloc failure\n");
133185029Spjd		for (;;)
134185029Spjd		    ;
135296963Sallanjude		/* NOTREACHED */
136296963Sallanjude		return (0);
137185029Spjd	}
138185029Spjd	heap_next += n;
139296963Sallanjude	return (p);
140185029Spjd}
141185029Spjd
142296963Sallanjudevoid
143296963Sallanjudefree(void *ptr)
144296963Sallanjude{
145296963Sallanjude
146296963Sallanjude	return;
147296963Sallanjude}
148296963Sallanjude
149185029Spjdstatic char *
150185029Spjdstrdup(const char *s)
151185029Spjd{
152185029Spjd	char *p = malloc(strlen(s) + 1);
153185029Spjd	strcpy(p, s);
154296963Sallanjude	return (p);
155185029Spjd}
156185029Spjd
157296963Sallanjude#ifdef LOADER_GELI_SUPPORT
158296963Sallanjude#include "geliboot.c"
159296963Sallanjudestatic char gelipw[GELI_PW_MAXLEN];
160296963Sallanjude#endif
161296963Sallanjude
162185029Spjd#include "zfsimpl.c"
163185029Spjd
164185029Spjd/*
165185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
166185029Spjd */
167185029Spjdstatic int
168185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
169185029Spjd{
170185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
171185029Spjd	size_t n;
172185029Spjd	int rc;
173185029Spjd
174185029Spjd	n = size;
175185029Spjd	if (*offp + n > zp->zp_size)
176185029Spjd		n = zp->zp_size - *offp;
177185029Spjd
178185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
179185029Spjd	if (rc)
180185029Spjd		return (-1);
181185029Spjd	*offp += n;
182185029Spjd
183185029Spjd	return (n);
184185029Spjd}
185185029Spjd
186185029Spjd/*
187185029Spjd * Current ZFS pool
188185029Spjd */
189235329Savgstatic spa_t *spa;
190241293Savgstatic spa_t *primary_spa;
191241293Savgstatic vdev_t *primary_vdev;
192185029Spjd
193185029Spjd/*
194185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
195185029Spjd * buffer pointer crosses a 64k boundary.
196185029Spjd */
197185029Spjdstatic int
198185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
199185029Spjd{
200185029Spjd	char *p;
201199579Sjhb	daddr_t lba;
202199579Sjhb	unsigned int nb;
203185029Spjd	struct dsk *dsk = (struct dsk *) priv;
204185029Spjd
205185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
206185029Spjd		return -1;
207185029Spjd
208185029Spjd	p = buf;
209185029Spjd	lba = off / DEV_BSIZE;
210213136Spjd	lba += dsk->start;
211185029Spjd	while (bytes > 0) {
212185029Spjd		nb = bytes / DEV_BSIZE;
213185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
214185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
215185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
216185029Spjd			return -1;
217296963Sallanjude#ifdef LOADER_GELI_SUPPORT
218296963Sallanjude		/* decrypt */
219296963Sallanjude		if (is_geli(dsk) == 0) {
220296963Sallanjude		    if (geli_read(dsk, ((lba - dsk->start) * DEV_BSIZE),
221296963Sallanjude			dmadat->rdbuf, nb * DEV_BSIZE))
222296963Sallanjude			    return (-1);
223296963Sallanjude		}
224296963Sallanjude#endif
225185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
226185029Spjd		p += nb * DEV_BSIZE;
227185029Spjd		lba += nb;
228185029Spjd		bytes -= nb * DEV_BSIZE;
229185029Spjd	}
230185029Spjd
231185029Spjd	return 0;
232185029Spjd}
233185029Spjd
234185029Spjdstatic int
235185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
236185029Spjd{
237185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
238235329Savg	printf("Invalid format\n");
239185029Spjd	return -1;
240185029Spjd    }
241185029Spjd    return 0;
242185029Spjd}
243185029Spjd
244200309Sjhbstatic void
245200309Sjhbbios_getmem(void)
246185029Spjd{
247200309Sjhb    uint64_t size;
248185029Spjd
249200309Sjhb    /* Parse system memory map */
250200309Sjhb    v86.ebx = 0;
251200309Sjhb    do {
252200309Sjhb	v86.ctl = V86_FLAGS;
253200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
254200309Sjhb	v86.eax = 0xe820;
255200309Sjhb	v86.ecx = sizeof(struct bios_smap);
256200309Sjhb	v86.edx = SMAP_SIG;
257200309Sjhb	v86.es = VTOPSEG(&smap);
258200309Sjhb	v86.edi = VTOPOFF(&smap);
259200309Sjhb	v86int();
260292682Sjhb	if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
261200309Sjhb	    break;
262200309Sjhb	/* look for a low-memory segment that's large enough */
263200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
264200309Sjhb	    (smap.length >= (512 * 1024)))
265200309Sjhb	    bios_basemem = smap.length;
266200309Sjhb	/* look for the first segment in 'extended' memory */
267200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
268200309Sjhb	    bios_extmem = smap.length;
269200309Sjhb	}
270200309Sjhb
271200309Sjhb	/*
272200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
273200309Sjhb	 * 1MB but below 4GB.
274200309Sjhb	 */
275200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
276200309Sjhb	    (smap.base < 0x100000000ull)) {
277200309Sjhb	    size = smap.length;
278200309Sjhb
279200309Sjhb	    /*
280200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
281200309Sjhb	     */
282200309Sjhb	    if (smap.base + size > 0x100000000ull)
283200309Sjhb		size = 0x100000000ull - smap.base;
284200309Sjhb
285200309Sjhb	    if (size > high_heap_size) {
286200309Sjhb		high_heap_size = size;
287200309Sjhb		high_heap_base = smap.base;
288200309Sjhb	    }
289200309Sjhb	}
290200309Sjhb    } while (v86.ebx != 0);
291200309Sjhb
292200309Sjhb    /* Fall back to the old compatibility function for base memory */
293200309Sjhb    if (bios_basemem == 0) {
294200309Sjhb	v86.ctl = 0;
295200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
296200309Sjhb	v86int();
297200309Sjhb
298200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
299200309Sjhb    }
300200309Sjhb
301200309Sjhb    /* Fall back through several compatibility functions for extended memory */
302200309Sjhb    if (bios_extmem == 0) {
303200309Sjhb	v86.ctl = V86_FLAGS;
304200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
305200309Sjhb	v86.eax = 0xe801;
306200309Sjhb	v86int();
307292682Sjhb	if (!V86_CY(v86.efl)) {
308200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
309200309Sjhb	}
310200309Sjhb    }
311200309Sjhb    if (bios_extmem == 0) {
312200309Sjhb	v86.ctl = 0;
313200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
314200309Sjhb	v86.eax = 0x8800;
315200309Sjhb	v86int();
316200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
317200309Sjhb    }
318200309Sjhb
319200309Sjhb    /*
320200309Sjhb     * If we have extended memory and did not find a suitable heap
321200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
322200309Sjhb     * high heap candidate.
323200309Sjhb     */
324200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
325200309Sjhb	high_heap_size = HEAP_MIN;
326200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
327200309Sjhb    }
328296963Sallanjude}
329200309Sjhb
330185029Spjd/*
331185029Spjd * Try to detect a device supported by the legacy int13 BIOS
332185029Spjd */
333185029Spjdstatic int
334185029Spjdint13probe(int drive)
335185029Spjd{
336185029Spjd    v86.ctl = V86_FLAGS;
337185029Spjd    v86.addr = 0x13;
338185029Spjd    v86.eax = 0x800;
339185029Spjd    v86.edx = drive;
340185029Spjd    v86int();
341185029Spjd
342292682Sjhb    if (!V86_CY(v86.efl) &&				/* carry clear */
343185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
344185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
345185029Spjd		return(0);				/* skip device */
346185029Spjd	}
347185029Spjd	return (1);
348185029Spjd    }
349185029Spjd    return(0);
350185029Spjd}
351185029Spjd
352192194Sdfr/*
353192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
354192194Sdfr * structure so we must make a new one.
355192194Sdfr */
356192194Sdfrstatic struct dsk *
357192194Sdfrcopy_dsk(struct dsk *dsk)
358192194Sdfr{
359192194Sdfr    struct dsk *newdsk;
360192194Sdfr
361192194Sdfr    newdsk = malloc(sizeof(struct dsk));
362192194Sdfr    *newdsk = *dsk;
363192194Sdfr    return (newdsk);
364192194Sdfr}
365192194Sdfr
366185029Spjdstatic void
367241294Savgprobe_drive(struct dsk *dsk)
368185029Spjd{
369185096Sdfr#ifdef GPT
370185096Sdfr    struct gpt_hdr hdr;
371185096Sdfr    struct gpt_ent *ent;
372185096Sdfr    unsigned part, entries_per_sec;
373185096Sdfr#endif
374296963Sallanjude    daddr_t slba, elba;
375185029Spjd    struct dos_partition *dp;
376185029Spjd    char *sec;
377185029Spjd    unsigned i;
378185029Spjd
379185029Spjd    /*
380296963Sallanjude     * If we find a vdev on the whole disk, stop here.
381185029Spjd     */
382241294Savg    if (vdev_probe(vdev_read, dsk, NULL) == 0)
383185029Spjd	return;
384185029Spjd
385296963Sallanjude#ifdef LOADER_GELI_SUPPORT
386296963Sallanjude    /*
387296963Sallanjude     * Taste the disk, if it is GELI encrypted, decrypt it and check to see if
388296963Sallanjude     * it is a usable vdev then. Otherwise dig
389296963Sallanjude     * out the partition table and probe each slice/partition
390296963Sallanjude     * in turn for a vdev or GELI encrypted vdev.
391296963Sallanjude     */
392296963Sallanjude    elba = drvsize(dsk);
393296963Sallanjude    if (elba > 0) {
394296963Sallanjude	elba--;
395296963Sallanjude    }
396296963Sallanjude    if (geli_taste(vdev_read, dsk, elba) == 0) {
397296963Sallanjude	if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) {
398296963Sallanjude	    if (vdev_probe(vdev_read, dsk, NULL) == 0) {
399296963Sallanjude		return;
400296963Sallanjude	    }
401296963Sallanjude	}
402296963Sallanjude    }
403296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
404296963Sallanjude
405185029Spjd    sec = dmadat->secbuf;
406185029Spjd    dsk->start = 0;
407185096Sdfr
408185096Sdfr#ifdef GPT
409185096Sdfr    /*
410185096Sdfr     * First check for GPT.
411185096Sdfr     */
412185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
413185096Sdfr	return;
414185096Sdfr    }
415185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
416185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
417185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
418185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
419185096Sdfr	goto trymbr;
420185096Sdfr    }
421185096Sdfr
422185096Sdfr    /*
423185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
424185096Sdfr     * return the spa_t for the first we find (if requested). This
425185096Sdfr     * will have the effect of booting from the first pool on the
426185096Sdfr     * disk.
427296963Sallanjude     *
428296963Sallanjude     * If no vdev is found, GELI decrypting the device and try again
429185096Sdfr     */
430185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
431185096Sdfr    slba = hdr.hdr_lba_table;
432185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
433185096Sdfr    while (slba < elba) {
434198420Srnoland	dsk->start = 0;
435185096Sdfr	if (drvread(dsk, sec, slba, 1))
436185096Sdfr	    return;
437185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
438185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
439185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
440185096Sdfr		     sizeof(uuid_t)) == 0) {
441185096Sdfr		dsk->start = ent->ent_lba_start;
442296963Sallanjude		dsk->slice = part + 1;
443296963Sallanjude		dsk->part = 255;
444241294Savg		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
445185096Sdfr		    /*
446185096Sdfr		     * This slice had a vdev. We need a new dsk
447185096Sdfr		     * structure now since the vdev now owns this one.
448185096Sdfr		     */
449192194Sdfr		    dsk = copy_dsk(dsk);
450185096Sdfr		}
451296963Sallanjude#ifdef LOADER_GELI_SUPPORT
452296963Sallanjude		else if (geli_taste(vdev_read, dsk, ent->ent_lba_end -
453296963Sallanjude			 ent->ent_lba_start) == 0) {
454296963Sallanjude		    if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) {
455296963Sallanjude			/*
456296963Sallanjude			 * This slice has GELI, check it for ZFS.
457296963Sallanjude			 */
458296963Sallanjude			if (vdev_probe(vdev_read, dsk, NULL) == 0) {
459296963Sallanjude			    /*
460296963Sallanjude			     * This slice had a vdev. We need a new dsk
461296963Sallanjude			     * structure now since the vdev now owns this one.
462296963Sallanjude			     */
463296963Sallanjude			    dsk = copy_dsk(dsk);
464296963Sallanjude			}
465296963Sallanjude			break;
466296963Sallanjude		    }
467296963Sallanjude		}
468296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
469185096Sdfr	    }
470185096Sdfr	}
471185096Sdfr	slba++;
472185096Sdfr    }
473185096Sdfr    return;
474185096Sdfrtrymbr:
475296963Sallanjude#endif /* GPT */
476185096Sdfr
477185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
478185029Spjd	return;
479185029Spjd    dp = (void *)(sec + DOSPARTOFF);
480185029Spjd
481185029Spjd    for (i = 0; i < NDOSPART; i++) {
482185029Spjd	if (!dp[i].dp_typ)
483185029Spjd	    continue;
484185029Spjd	dsk->start = dp[i].dp_start;
485296963Sallanjude	dsk->slice = i + 1;
486241294Savg	if (vdev_probe(vdev_read, dsk, NULL) == 0) {
487192194Sdfr	    dsk = copy_dsk(dsk);
488185029Spjd	}
489296963Sallanjude#ifdef LOADER_GELI_SUPPORT
490296963Sallanjude	else if (geli_taste(vdev_read, dsk, dp[i].dp_size -
491296963Sallanjude		 dp[i].dp_start) == 0) {
492296963Sallanjude	    if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) {
493296963Sallanjude		/*
494296963Sallanjude		 * This slice has GELI, check it for ZFS.
495296963Sallanjude		 */
496296963Sallanjude		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
497296963Sallanjude		    /*
498296963Sallanjude		     * This slice had a vdev. We need a new dsk
499296963Sallanjude		     * structure now since the vdev now owns this one.
500296963Sallanjude		     */
501296963Sallanjude		    dsk = copy_dsk(dsk);
502296963Sallanjude		}
503296963Sallanjude		break;
504296963Sallanjude	    }
505296963Sallanjude	}
506296963Sallanjude#endif /* LOADER_GELI_SUPPORT */
507185029Spjd    }
508185029Spjd}
509185029Spjd
510185029Spjdint
511185029Spjdmain(void)
512185029Spjd{
513185029Spjd    int autoboot, i;
514185029Spjd    dnode_phys_t dn;
515185029Spjd    off_t off;
516185029Spjd    struct dsk *dsk;
517185029Spjd
518208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
519208388Sjhb
520200309Sjhb    bios_getmem();
521200309Sjhb
522200309Sjhb    if (high_heap_size > 0) {
523200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
524200309Sjhb	heap_next = PTOV(high_heap_base);
525200309Sjhb    } else {
526296963Sallanjude	heap_next = (char *)dmadat + sizeof(*dmadat);
527296963Sallanjude	heap_end = (char *)PTOV(bios_basemem);
528200309Sjhb    }
529200309Sjhb
530185029Spjd    dsk = malloc(sizeof(struct dsk));
531185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
532185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
533185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
534185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
535185029Spjd    dsk->part = 0;
536185029Spjd    dsk->start = 0;
537185029Spjd    dsk->init = 0;
538185029Spjd
539185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
540185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
541200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
542200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
543185029Spjd    bootinfo.bi_memsizes_valid++;
544185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
545185029Spjd
546185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
547185029Spjd			  dsk->slice, dsk->unit, dsk->part),
548185029Spjd
549185029Spjd    /* Process configuration file */
550185029Spjd
551185029Spjd    autoboot = 1;
552185029Spjd
553296963Sallanjude#ifdef LOADER_GELI_SUPPORT
554296963Sallanjude    geli_init();
555296963Sallanjude#endif
556185029Spjd    zfs_init();
557185029Spjd
558185029Spjd    /*
559185029Spjd     * Probe the boot drive first - we will try to boot from whatever
560185029Spjd     * pool we find on that drive.
561185029Spjd     */
562241294Savg    probe_drive(dsk);
563185029Spjd
564185029Spjd    /*
565185029Spjd     * Probe the rest of the drives that the bios knows about. This
566185029Spjd     * will find any other available pools and it may fill in missing
567185029Spjd     * vdevs for the boot pool.
568185029Spjd     */
569212805Spjd#ifndef VIRTUALBOX
570212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
571212805Spjd#else
572212805Spjd    for (i = 0; i < MAXBDDEV; i++)
573212805Spjd#endif
574212805Spjd    {
575185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
576185029Spjd	    continue;
577185029Spjd
578192194Sdfr	if (!int13probe(i | DRV_HARD))
579192194Sdfr	    break;
580192194Sdfr
581185029Spjd	dsk = malloc(sizeof(struct dsk));
582185029Spjd	dsk->drive = i | DRV_HARD;
583185029Spjd	dsk->type = dsk->drive & TYPE_AD;
584185029Spjd	dsk->unit = i;
585185029Spjd	dsk->slice = 0;
586185029Spjd	dsk->part = 0;
587185029Spjd	dsk->start = 0;
588185029Spjd	dsk->init = 0;
589241294Savg	probe_drive(dsk);
590185029Spjd    }
591185029Spjd
592185029Spjd    /*
593241294Savg     * The first discovered pool, if any, is the pool.
594185029Spjd     */
595241294Savg    spa = spa_get_primary();
596185029Spjd    if (!spa) {
597241294Savg	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
598241294Savg	for (;;)
599241294Savg	    ;
600185029Spjd    }
601185029Spjd
602241293Savg    primary_spa = spa;
603241293Savg    primary_vdev = spa_get_primary_vdev(spa);
604241293Savg
605235329Savg    if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) {
606235329Savg	printf("%s: failed to mount default pool %s\n",
607235329Savg	    BOOTPROG, spa->spa_name);
608235329Savg	autoboot = 0;
609235329Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
610235329Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
611185029Spjd	off = 0;
612198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
613185029Spjd    }
614185029Spjd
615185029Spjd    if (*cmd) {
616234339Savg	/*
617234339Savg	 * Note that parse() is destructive to cmd[] and we also want
618234339Savg	 * to honor RBX_QUIET option that could be present in cmd[].
619234339Savg	 */
620234339Savg	memcpy(cmddup, cmd, sizeof(cmd));
621185029Spjd	if (parse())
622185029Spjd	    autoboot = 0;
623234339Savg	if (!OPT_CHECK(RBX_QUIET))
624241288Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
625185029Spjd	/* Do not process this command twice */
626185029Spjd	*cmd = 0;
627185029Spjd    }
628185029Spjd
629185029Spjd    /*
630294925Simp     * Try to exec /boot/loader. If interrupted by a keypress,
631185029Spjd     * or in case of failure, try to load a kernel directly instead.
632185029Spjd     */
633185029Spjd
634185029Spjd    if (autoboot && !*kname) {
635294925Simp	memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
636213136Spjd	if (!keyhit(3)) {
637185029Spjd	    load();
638185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
639185029Spjd	}
640185029Spjd    }
641185029Spjd
642185029Spjd    /* Present the user with the boot2 prompt. */
643185029Spjd
644185029Spjd    for (;;) {
645235329Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
646235329Savg	    printf("\nFreeBSD/x86 boot\n");
647235329Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
648241288Savg		printf("Default: %s/<0x%llx>:%s\n"
649235329Savg		       "boot: ",
650235329Savg		       spa->spa_name, zfsmount.rootobj, kname);
651241288Savg	    else if (rootname[0] != '\0')
652241288Savg		printf("Default: %s/%s:%s\n"
653241288Savg		       "boot: ",
654241288Savg		       spa->spa_name, rootname, kname);
655235329Savg	    else
656241288Savg		printf("Default: %s:%s\n"
657235329Savg		       "boot: ",
658241288Savg		       spa->spa_name, kname);
659235329Savg	}
660185029Spjd	if (ioctrl & IO_SERIAL)
661185029Spjd	    sio_flush();
662213136Spjd	if (!autoboot || keyhit(5))
663213136Spjd	    getstr(cmd, sizeof(cmd));
664185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
665185029Spjd	    putchar('\n');
666185029Spjd	autoboot = 0;
667185029Spjd	if (parse())
668185029Spjd	    putchar('\a');
669185029Spjd	else
670185029Spjd	    load();
671185029Spjd    }
672185029Spjd}
673185029Spjd
674185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
675185029Spjdvoid
676185029Spjdexit(int x)
677185029Spjd{
678185029Spjd}
679185029Spjd
680185029Spjdstatic void
681185029Spjdload(void)
682185029Spjd{
683185029Spjd    union {
684185029Spjd	struct exec ex;
685185029Spjd	Elf32_Ehdr eh;
686185029Spjd    } hdr;
687185029Spjd    static Elf32_Phdr ep[2];
688185029Spjd    static Elf32_Shdr es[2];
689185029Spjd    caddr_t p;
690185029Spjd    dnode_phys_t dn;
691185029Spjd    off_t off;
692185029Spjd    uint32_t addr, x;
693185029Spjd    int fmt, i, j;
694185029Spjd
695235329Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
696235329Savg	printf("\nCan't find %s\n", kname);
697185029Spjd	return;
698185029Spjd    }
699185029Spjd    off = 0;
700185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
701185029Spjd	return;
702185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
703185029Spjd	fmt = 0;
704185029Spjd    else if (IS_ELF(hdr.eh))
705185029Spjd	fmt = 1;
706185029Spjd    else {
707185029Spjd	printf("Invalid %s\n", "format");
708185029Spjd	return;
709185029Spjd    }
710185029Spjd    if (fmt == 0) {
711185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
712185029Spjd	p = PTOV(addr);
713185029Spjd	off = PAGE_SIZE;
714185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
715185029Spjd	    return;
716185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
717185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
718185029Spjd	    return;
719185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
720185029Spjd	bootinfo.bi_symtab = VTOP(p);
721185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
722185029Spjd	p += sizeof(hdr.ex.a_syms);
723185029Spjd	if (hdr.ex.a_syms) {
724185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
725185029Spjd		return;
726185029Spjd	    p += hdr.ex.a_syms;
727185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
728185029Spjd		return;
729185029Spjd	    x = *(uint32_t *)p;
730185029Spjd	    p += sizeof(int);
731185029Spjd	    x -= sizeof(int);
732185029Spjd	    if (xfsread(&dn, &off, p, x))
733185029Spjd		return;
734185029Spjd	    p += x;
735185029Spjd	}
736185029Spjd    } else {
737185029Spjd	off = hdr.eh.e_phoff;
738185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
739185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
740185029Spjd		return;
741185029Spjd	    if (ep[j].p_type == PT_LOAD)
742185029Spjd		j++;
743185029Spjd	}
744185029Spjd	for (i = 0; i < 2; i++) {
745185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
746185029Spjd	    off = ep[i].p_offset;
747185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
748185029Spjd		return;
749185029Spjd	}
750185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
751185029Spjd	bootinfo.bi_symtab = VTOP(p);
752185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
753185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
754185029Spjd		(hdr.eh.e_shstrndx + 1);
755185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
756185029Spjd		return;
757185029Spjd	    for (i = 0; i < 2; i++) {
758185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
759185029Spjd		p += sizeof(es[i].sh_size);
760185029Spjd		off = es[i].sh_offset;
761185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
762185029Spjd		    return;
763185029Spjd		p += es[i].sh_size;
764185029Spjd	    }
765185029Spjd	}
766185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
767185029Spjd    }
768185029Spjd    bootinfo.bi_esymtab = VTOP(p);
769185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
770235329Savg    zfsargs.size = sizeof(zfsargs);
771235329Savg    zfsargs.pool = zfsmount.spa->spa_guid;
772235329Savg    zfsargs.root = zfsmount.rootobj;
773241293Savg    zfsargs.primary_pool = primary_spa->spa_guid;
774296963Sallanjude#ifdef LOADER_GELI_SUPPORT
775296963Sallanjude    bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw));
776296963Sallanjude    bzero(gelipw, sizeof(gelipw));
777296963Sallanjude#else
778296963Sallanjude    zfsargs.gelipw[0] = '\0';
779296963Sallanjude#endif
780241293Savg    if (primary_vdev != NULL)
781241293Savg	zfsargs.primary_vdev = primary_vdev->v_guid;
782241293Savg    else
783241293Savg	printf("failed to detect primary vdev\n");
784185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
785185029Spjd	   bootdev,
786235329Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
787185029Spjd	   (uint32_t) spa->spa_guid,
788185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
789235329Savg	   VTOP(&bootinfo),
790235329Savg	   zfsargs);
791185029Spjd}
792185029Spjd
793185029Spjdstatic int
794241288Savgzfs_mount_ds(char *dsname)
795241288Savg{
796241288Savg    uint64_t newroot;
797241288Savg    spa_t *newspa;
798241288Savg    char *q;
799241288Savg
800241288Savg    q = strchr(dsname, '/');
801241288Savg    if (q)
802241288Savg	*q++ = '\0';
803241288Savg    newspa = spa_find_by_name(dsname);
804241288Savg    if (newspa == NULL) {
805241288Savg	printf("\nCan't find ZFS pool %s\n", dsname);
806241288Savg	return -1;
807241288Savg    }
808241288Savg
809241288Savg    if (zfs_spa_init(newspa))
810241288Savg	return -1;
811241288Savg
812241288Savg    newroot = 0;
813241288Savg    if (q) {
814241288Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
815241288Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
816241288Savg		    q, newspa->spa_name);
817241288Savg	    return -1;
818241288Savg	}
819241288Savg    }
820241288Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
821241288Savg	printf("\nCan't mount ZFS dataset\n");
822241288Savg	return -1;
823241288Savg    }
824241288Savg    spa = newspa;
825241288Savg    return (0);
826241288Savg}
827241288Savg
828241288Savgstatic int
829213136Spjdparse(void)
830185029Spjd{
831185029Spjd    char *arg = cmd;
832185029Spjd    char *ep, *p, *q;
833185029Spjd    const char *cp;
834185029Spjd    int c, i, j;
835185029Spjd
836185029Spjd    while ((c = *arg++)) {
837185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
838185029Spjd	    continue;
839185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
840185029Spjd	ep = p;
841185029Spjd	if (*p)
842185029Spjd	    *p++ = 0;
843185029Spjd	if (c == '-') {
844185029Spjd	    while ((c = *arg++)) {
845185029Spjd		if (c == 'P') {
846185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
847185029Spjd			cp = "yes";
848185029Spjd		    } else {
849185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
850185029Spjd			cp = "no";
851185029Spjd		    }
852185029Spjd		    printf("Keyboard: %s\n", cp);
853185029Spjd		    continue;
854185029Spjd		} else if (c == 'S') {
855185029Spjd		    j = 0;
856185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
857185029Spjd			j = j * 10 + i;
858185029Spjd		    if (j > 0 && i == -'0') {
859185029Spjd			comspeed = j;
860185029Spjd			break;
861185029Spjd		    }
862185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
863185029Spjd		}
864185029Spjd		for (i = 0; c != optstr[i]; i++)
865185029Spjd		    if (i == NOPT - 1)
866185029Spjd			return -1;
867185029Spjd		opts ^= OPT_SET(flags[i]);
868185029Spjd	    }
869185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
870185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
871241301Savg	    if (ioctrl & IO_SERIAL) {
872241301Savg	        if (sio_init(115200 / comspeed) != 0)
873241301Savg		    ioctrl &= ~IO_SERIAL;
874241301Savg	    }
875185029Spjd	} if (c == '?') {
876185029Spjd	    dnode_phys_t dn;
877185029Spjd
878235329Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
879185029Spjd		zap_list(spa, &dn);
880185029Spjd	    }
881185029Spjd	    return -1;
882185029Spjd	} else {
883185029Spjd	    arg--;
884185029Spjd
885185029Spjd	    /*
886185029Spjd	     * Report pool status if the comment is 'status'. Lets
887185029Spjd	     * hope no-one wants to load /status as a kernel.
888185029Spjd	     */
889185029Spjd	    if (!strcmp(arg, "status")) {
890185029Spjd		spa_all_status();
891185029Spjd		return -1;
892185029Spjd	    }
893185029Spjd
894185029Spjd	    /*
895241288Savg	     * If there is "zfs:" prefix simply ignore it.
896241288Savg	     */
897241288Savg	    if (strncmp(arg, "zfs:", 4) == 0)
898241288Savg		arg += 4;
899241288Savg
900241288Savg	    /*
901185029Spjd	     * If there is a colon, switch pools.
902185029Spjd	     */
903241288Savg	    q = strchr(arg, ':');
904185029Spjd	    if (q) {
905241288Savg		*q++ = '\0';
906241288Savg		if (zfs_mount_ds(arg) != 0)
907185029Spjd		    return -1;
908241288Savg		arg = q;
909185029Spjd	    }
910185029Spjd	    if ((i = ep - arg)) {
911185029Spjd		if ((size_t)i >= sizeof(kname))
912185029Spjd		    return -1;
913185029Spjd		memcpy(kname, arg, i + 1);
914185029Spjd	    }
915185029Spjd	}
916185029Spjd	arg = p;
917185029Spjd    }
918185029Spjd    return 0;
919185029Spjd}
920