1185029Spjd/*-
2185029Spjd * Copyright (c) 1998 Robert Nordier
3185029Spjd * All rights reserved.
4185029Spjd *
5185029Spjd * Redistribution and use in source and binary forms are freely
6185029Spjd * permitted provided that the above copyright notice and this
7185029Spjd * paragraph and the following disclaimer are duplicated in all
8185029Spjd * such forms.
9185029Spjd *
10185029Spjd * This software is provided "AS IS" and without any express or
11185029Spjd * implied warranties, including, without limitation, the implied
12185029Spjd * warranties of merchantability and fitness for a particular
13185029Spjd * purpose.
14185029Spjd */
15185029Spjd
16185029Spjd#include <sys/cdefs.h>
17185029Spjd__FBSDID("$FreeBSD: stable/10/sys/boot/i386/zfsboot/zfsboot.c 308915 2016-11-21 10:14:36Z avg $");
18185029Spjd
19185029Spjd#include <sys/param.h>
20185029Spjd#include <sys/errno.h>
21185029Spjd#include <sys/diskmbr.h>
22185096Sdfr#ifdef GPT
23185096Sdfr#include <sys/gpt.h>
24185096Sdfr#endif
25185029Spjd#include <sys/reboot.h>
26185029Spjd#include <sys/queue.h>
27185029Spjd
28185029Spjd#include <machine/bootinfo.h>
29185029Spjd#include <machine/elf.h>
30200309Sjhb#include <machine/pc/bios.h>
31185029Spjd
32185029Spjd#include <stdarg.h>
33185029Spjd#include <stddef.h>
34185029Spjd
35185029Spjd#include <a.out.h>
36185029Spjd
37185029Spjd#include <btxv86.h>
38185029Spjd
39185029Spjd#include "lib.h"
40213136Spjd#include "rbx.h"
41213136Spjd#include "drv.h"
42213136Spjd#include "util.h"
43213136Spjd#include "cons.h"
44235154Savg#include "bootargs.h"
45295453Semaste#include "paths.h"
46185029Spjd
47235329Savg#include "libzfs.h"
48235329Savg
49185029Spjd#define ARGS		0x900
50185029Spjd#define NOPT		14
51185029Spjd#define NDEV		3
52185029Spjd
53212805Spjd#define BIOS_NUMDRIVES	0x475
54185029Spjd#define DRV_HARD	0x80
55185029Spjd#define DRV_MASK	0x7f
56185029Spjd
57185029Spjd#define TYPE_AD		0
58185029Spjd#define TYPE_DA		1
59185029Spjd#define TYPE_MAXHARD	TYPE_DA
60185029Spjd#define TYPE_FD		2
61185029Spjd
62185029Spjdextern uint32_t _end;
63185029Spjd
64185096Sdfr#ifdef GPT
65185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS;
66185096Sdfr#endif
67185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */
68185029Spjdstatic const unsigned char flags[NOPT] = {
69185029Spjd    RBX_DUAL,
70185029Spjd    RBX_SERIAL,
71185029Spjd    RBX_ASKNAME,
72185029Spjd    RBX_CDROM,
73185029Spjd    RBX_CONFIG,
74185029Spjd    RBX_KDB,
75185029Spjd    RBX_GDB,
76185029Spjd    RBX_MUTE,
77185029Spjd    RBX_NOINTR,
78185029Spjd    RBX_PAUSE,
79185029Spjd    RBX_QUIET,
80185029Spjd    RBX_DFLTROOT,
81185029Spjd    RBX_SINGLE,
82185029Spjd    RBX_VERBOSE
83185029Spjd};
84213136Spjduint32_t opts;
85185029Spjd
86185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2};
87185029Spjd
88185029Spjdstatic char cmd[512];
89234339Savgstatic char cmddup[512];
90185029Spjdstatic char kname[1024];
91235329Savgstatic char rootname[256];
92185029Spjdstatic int comspeed = SIOSPD;
93185029Spjdstatic struct bootinfo bootinfo;
94185029Spjdstatic uint32_t bootdev;
95235329Savgstatic struct zfs_boot_args zfsargs;
96235329Savgstatic struct zfsmount zfsmount;
97185029Spjd
98200309Sjhbvm_offset_t	high_heap_base;
99200309Sjhbuint32_t	bios_basemem, bios_extmem, high_heap_size;
100200309Sjhb
101200309Sjhbstatic struct bios_smap smap;
102200309Sjhb
103200309Sjhb/*
104200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap.
105200309Sjhb */
106200309Sjhb#define	HEAP_MIN	(3 * 1024 * 1024)
107200309Sjhb
108200309Sjhbstatic char *heap_next;
109200309Sjhbstatic char *heap_end;
110200309Sjhb
111185029Spjd/* Buffers that must not span a 64k boundary. */
112185029Spjd#define READ_BUF_SIZE	8192
113185029Spjdstruct dmadat {
114185029Spjd	char rdbuf[READ_BUF_SIZE];	/* for reading large things */
115185029Spjd	char secbuf[READ_BUF_SIZE];	/* for MBR/disklabel */
116185029Spjd};
117185029Spjdstatic struct dmadat *dmadat;
118185029Spjd
119185029Spjdvoid exit(int);
120308915Savgvoid reboot(void);
121185029Spjdstatic void load(void);
122185029Spjdstatic int parse(void);
123200309Sjhbstatic void bios_getmem(void);
124185029Spjd
125185029Spjdstatic void *
126185029Spjdmalloc(size_t n)
127185029Spjd{
128185029Spjd	char *p = heap_next;
129185029Spjd	if (p + n > heap_end) {
130185029Spjd		printf("malloc failure\n");
131185029Spjd		for (;;)
132185029Spjd		    ;
133185029Spjd		return 0;
134185029Spjd	}
135185029Spjd	heap_next += n;
136185029Spjd	return p;
137185029Spjd}
138185029Spjd
139185029Spjdstatic char *
140185029Spjdstrdup(const char *s)
141185029Spjd{
142185029Spjd	char *p = malloc(strlen(s) + 1);
143185029Spjd	strcpy(p, s);
144185029Spjd	return p;
145185029Spjd}
146185029Spjd
147185029Spjd#include "zfsimpl.c"
148185029Spjd
149185029Spjd/*
150185029Spjd * Read from a dnode (which must be from a ZPL filesystem).
151185029Spjd */
152185029Spjdstatic int
153185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size)
154185029Spjd{
155185029Spjd	const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus;
156185029Spjd	size_t n;
157185029Spjd	int rc;
158185029Spjd
159185029Spjd	n = size;
160185029Spjd	if (*offp + n > zp->zp_size)
161185029Spjd		n = zp->zp_size - *offp;
162308915Savg
163185029Spjd	rc = dnode_read(spa, dnode, *offp, start, n);
164185029Spjd	if (rc)
165185029Spjd		return (-1);
166185029Spjd	*offp += n;
167185029Spjd
168185029Spjd	return (n);
169185029Spjd}
170185029Spjd
171185029Spjd/*
172185029Spjd * Current ZFS pool
173185029Spjd */
174235329Savgstatic spa_t *spa;
175241293Savgstatic spa_t *primary_spa;
176241293Savgstatic vdev_t *primary_vdev;
177185029Spjd
178185029Spjd/*
179185029Spjd * A wrapper for dskread that doesn't have to worry about whether the
180185029Spjd * buffer pointer crosses a 64k boundary.
181185029Spjd */
182185029Spjdstatic int
183185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
184185029Spjd{
185185029Spjd	char *p;
186199579Sjhb	daddr_t lba;
187199579Sjhb	unsigned int nb;
188185029Spjd	struct dsk *dsk = (struct dsk *) priv;
189185029Spjd
190185029Spjd	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
191185029Spjd		return -1;
192185029Spjd
193185029Spjd	p = buf;
194185029Spjd	lba = off / DEV_BSIZE;
195213136Spjd	lba += dsk->start;
196185029Spjd	while (bytes > 0) {
197185029Spjd		nb = bytes / DEV_BSIZE;
198185029Spjd		if (nb > READ_BUF_SIZE / DEV_BSIZE)
199185029Spjd			nb = READ_BUF_SIZE / DEV_BSIZE;
200185029Spjd		if (drvread(dsk, dmadat->rdbuf, lba, nb))
201185029Spjd			return -1;
202185029Spjd		memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE);
203185029Spjd		p += nb * DEV_BSIZE;
204185029Spjd		lba += nb;
205185029Spjd		bytes -= nb * DEV_BSIZE;
206185029Spjd	}
207185029Spjd
208185029Spjd	return 0;
209185029Spjd}
210185029Spjd
211185029Spjdstatic int
212308915Savgvdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes)
213308915Savg{
214308915Savg	char *p;
215308915Savg	daddr_t lba;
216308915Savg	unsigned int nb;
217308915Savg	struct dsk *dsk = (struct dsk *) priv;
218308915Savg
219308915Savg	if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1)))
220308915Savg		return -1;
221308915Savg
222308915Savg	p = buf;
223308915Savg	lba = off / DEV_BSIZE;
224308915Savg	lba += dsk->start;
225308915Savg	while (bytes > 0) {
226308915Savg		nb = bytes / DEV_BSIZE;
227308915Savg		if (nb > READ_BUF_SIZE / DEV_BSIZE)
228308915Savg			nb = READ_BUF_SIZE / DEV_BSIZE;
229308915Savg		memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE);
230308915Savg		if (drvwrite(dsk, dmadat->rdbuf, lba, nb))
231308915Savg			return -1;
232308915Savg		p += nb * DEV_BSIZE;
233308915Savg		lba += nb;
234308915Savg		bytes -= nb * DEV_BSIZE;
235308915Savg	}
236308915Savg
237308915Savg	return 0;
238308915Savg}
239308915Savg
240308915Savgstatic int
241185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte)
242185029Spjd{
243185029Spjd    if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) {
244235329Savg	printf("Invalid format\n");
245185029Spjd	return -1;
246185029Spjd    }
247185029Spjd    return 0;
248185029Spjd}
249185029Spjd
250308915Savg/*
251308915Savg * Read Pad2 (formerly "Boot Block Header") area of the first
252308915Savg * vdev label of the given vdev.
253308915Savg */
254308915Savgstatic int
255308915Savgvdev_read_pad2(vdev_t *vdev, char *buf, size_t size)
256308915Savg{
257308915Savg	blkptr_t bp;
258308915Savg	char *tmp = zap_scratch;
259308915Savg	off_t off = offsetof(vdev_label_t, vl_pad2);
260308915Savg
261308915Savg	if (size > VDEV_PAD_SIZE)
262308915Savg		size = VDEV_PAD_SIZE;
263308915Savg
264308915Savg	BP_ZERO(&bp);
265308915Savg	BP_SET_LSIZE(&bp, VDEV_PAD_SIZE);
266308915Savg	BP_SET_PSIZE(&bp, VDEV_PAD_SIZE);
267308915Savg	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
268308915Savg	BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
269308915Savg	DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
270308915Savg	if (vdev_read_phys(vdev, &bp, tmp, off, 0))
271308915Savg		return (EIO);
272308915Savg	memcpy(buf, tmp, size);
273308915Savg	return (0);
274308915Savg}
275308915Savg
276308915Savgstatic int
277308915Savgvdev_clear_pad2(vdev_t *vdev)
278308915Savg{
279308915Savg	char *zeroes = zap_scratch;
280308915Savg	uint64_t *end;
281308915Savg	off_t off = offsetof(vdev_label_t, vl_pad2);
282308915Savg
283308915Savg	memset(zeroes, 0, VDEV_PAD_SIZE);
284308915Savg	end = (uint64_t *)(zeroes + VDEV_PAD_SIZE);
285308915Savg	/* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */
286308915Savg	end[-5] = 0x0210da7ab10c7a11;
287308915Savg	end[-4] = 0x97f48f807f6e2a3f;
288308915Savg	end[-3] = 0xaf909f1658aacefc;
289308915Savg	end[-2] = 0xcbd1ea57ff6db48b;
290308915Savg	end[-1] = 0x6ec692db0d465fab;
291308915Savg	if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE))
292308915Savg		return (EIO);
293308915Savg	return (0);
294308915Savg}
295308915Savg
296200309Sjhbstatic void
297200309Sjhbbios_getmem(void)
298185029Spjd{
299200309Sjhb    uint64_t size;
300185029Spjd
301200309Sjhb    /* Parse system memory map */
302200309Sjhb    v86.ebx = 0;
303200309Sjhb    do {
304200309Sjhb	v86.ctl = V86_FLAGS;
305200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe820*/
306200309Sjhb	v86.eax = 0xe820;
307200309Sjhb	v86.ecx = sizeof(struct bios_smap);
308200309Sjhb	v86.edx = SMAP_SIG;
309200309Sjhb	v86.es = VTOPSEG(&smap);
310200309Sjhb	v86.edi = VTOPOFF(&smap);
311200309Sjhb	v86int();
312294480Sjhb	if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG))
313200309Sjhb	    break;
314200309Sjhb	/* look for a low-memory segment that's large enough */
315200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) &&
316200309Sjhb	    (smap.length >= (512 * 1024)))
317200309Sjhb	    bios_basemem = smap.length;
318200309Sjhb	/* look for the first segment in 'extended' memory */
319200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) {
320200309Sjhb	    bios_extmem = smap.length;
321200309Sjhb	}
322200309Sjhb
323200309Sjhb	/*
324200309Sjhb	 * Look for the largest segment in 'extended' memory beyond
325200309Sjhb	 * 1MB but below 4GB.
326200309Sjhb	 */
327200309Sjhb	if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) &&
328200309Sjhb	    (smap.base < 0x100000000ull)) {
329200309Sjhb	    size = smap.length;
330200309Sjhb
331200309Sjhb	    /*
332200309Sjhb	     * If this segment crosses the 4GB boundary, truncate it.
333200309Sjhb	     */
334200309Sjhb	    if (smap.base + size > 0x100000000ull)
335200309Sjhb		size = 0x100000000ull - smap.base;
336200309Sjhb
337200309Sjhb	    if (size > high_heap_size) {
338200309Sjhb		high_heap_size = size;
339200309Sjhb		high_heap_base = smap.base;
340200309Sjhb	    }
341200309Sjhb	}
342200309Sjhb    } while (v86.ebx != 0);
343200309Sjhb
344200309Sjhb    /* Fall back to the old compatibility function for base memory */
345200309Sjhb    if (bios_basemem == 0) {
346200309Sjhb	v86.ctl = 0;
347200309Sjhb	v86.addr = 0x12;		/* int 0x12 */
348200309Sjhb	v86int();
349200309Sjhb
350200309Sjhb	bios_basemem = (v86.eax & 0xffff) * 1024;
351200309Sjhb    }
352200309Sjhb
353200309Sjhb    /* Fall back through several compatibility functions for extended memory */
354200309Sjhb    if (bios_extmem == 0) {
355200309Sjhb	v86.ctl = V86_FLAGS;
356200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0xe801*/
357200309Sjhb	v86.eax = 0xe801;
358200309Sjhb	v86int();
359294480Sjhb	if (!V86_CY(v86.efl)) {
360200309Sjhb	    bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024;
361200309Sjhb	}
362200309Sjhb    }
363200309Sjhb    if (bios_extmem == 0) {
364200309Sjhb	v86.ctl = 0;
365200309Sjhb	v86.addr = 0x15;		/* int 0x15 function 0x88*/
366200309Sjhb	v86.eax = 0x8800;
367200309Sjhb	v86int();
368200309Sjhb	bios_extmem = (v86.eax & 0xffff) * 1024;
369200309Sjhb    }
370200309Sjhb
371200309Sjhb    /*
372200309Sjhb     * If we have extended memory and did not find a suitable heap
373200309Sjhb     * region in the SMAP, use the last 3MB of 'extended' memory as a
374200309Sjhb     * high heap candidate.
375200309Sjhb     */
376200309Sjhb    if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) {
377200309Sjhb	high_heap_size = HEAP_MIN;
378200309Sjhb	high_heap_base = bios_extmem + 0x100000 - HEAP_MIN;
379200309Sjhb    }
380200309Sjhb}
381200309Sjhb
382185029Spjd/*
383185029Spjd * Try to detect a device supported by the legacy int13 BIOS
384185029Spjd */
385185029Spjdstatic int
386185029Spjdint13probe(int drive)
387185029Spjd{
388185029Spjd    v86.ctl = V86_FLAGS;
389185029Spjd    v86.addr = 0x13;
390185029Spjd    v86.eax = 0x800;
391185029Spjd    v86.edx = drive;
392185029Spjd    v86int();
393185029Spjd
394294480Sjhb    if (!V86_CY(v86.efl) &&				/* carry clear */
395185029Spjd	((v86.edx & 0xff) != (drive & DRV_MASK))) {	/* unit # OK */
396185029Spjd	if ((v86.ecx & 0x3f) == 0) {			/* absurd sector size */
397185029Spjd		return(0);				/* skip device */
398185029Spjd	}
399185029Spjd	return (1);
400185029Spjd    }
401185029Spjd    return(0);
402185029Spjd}
403185029Spjd
404192194Sdfr/*
405192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk
406192194Sdfr * structure so we must make a new one.
407192194Sdfr */
408192194Sdfrstatic struct dsk *
409192194Sdfrcopy_dsk(struct dsk *dsk)
410192194Sdfr{
411192194Sdfr    struct dsk *newdsk;
412192194Sdfr
413192194Sdfr    newdsk = malloc(sizeof(struct dsk));
414192194Sdfr    *newdsk = *dsk;
415192194Sdfr    return (newdsk);
416192194Sdfr}
417192194Sdfr
418185029Spjdstatic void
419241294Savgprobe_drive(struct dsk *dsk)
420185029Spjd{
421185096Sdfr#ifdef GPT
422185096Sdfr    struct gpt_hdr hdr;
423185096Sdfr    struct gpt_ent *ent;
424185096Sdfr    daddr_t slba, elba;
425185096Sdfr    unsigned part, entries_per_sec;
426185096Sdfr#endif
427185029Spjd    struct dos_partition *dp;
428185029Spjd    char *sec;
429185029Spjd    unsigned i;
430185029Spjd
431185029Spjd    /*
432185029Spjd     * If we find a vdev on the whole disk, stop here. Otherwise dig
433241294Savg     * out the partition table and probe each slice/partition
434241294Savg     * in turn for a vdev.
435185029Spjd     */
436241294Savg    if (vdev_probe(vdev_read, dsk, NULL) == 0)
437185029Spjd	return;
438185029Spjd
439185029Spjd    sec = dmadat->secbuf;
440185029Spjd    dsk->start = 0;
441185096Sdfr
442185096Sdfr#ifdef GPT
443185096Sdfr    /*
444185096Sdfr     * First check for GPT.
445185096Sdfr     */
446185096Sdfr    if (drvread(dsk, sec, 1, 1)) {
447185096Sdfr	return;
448185096Sdfr    }
449185096Sdfr    memcpy(&hdr, sec, sizeof(hdr));
450185096Sdfr    if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 ||
451185096Sdfr	hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 ||
452185096Sdfr	hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) {
453185096Sdfr	goto trymbr;
454185096Sdfr    }
455185096Sdfr
456185096Sdfr    /*
457185096Sdfr     * Probe all GPT partitions for the presense of ZFS pools. We
458185096Sdfr     * return the spa_t for the first we find (if requested). This
459185096Sdfr     * will have the effect of booting from the first pool on the
460185096Sdfr     * disk.
461185096Sdfr     */
462185096Sdfr    entries_per_sec = DEV_BSIZE / hdr.hdr_entsz;
463185096Sdfr    slba = hdr.hdr_lba_table;
464185096Sdfr    elba = slba + hdr.hdr_entries / entries_per_sec;
465185096Sdfr    while (slba < elba) {
466198420Srnoland	dsk->start = 0;
467185096Sdfr	if (drvread(dsk, sec, slba, 1))
468185096Sdfr	    return;
469185096Sdfr	for (part = 0; part < entries_per_sec; part++) {
470185096Sdfr	    ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz);
471185096Sdfr	    if (memcmp(&ent->ent_type, &freebsd_zfs_uuid,
472185096Sdfr		     sizeof(uuid_t)) == 0) {
473185096Sdfr		dsk->start = ent->ent_lba_start;
474241294Savg		if (vdev_probe(vdev_read, dsk, NULL) == 0) {
475185096Sdfr		    /*
476185096Sdfr		     * This slice had a vdev. We need a new dsk
477185096Sdfr		     * structure now since the vdev now owns this one.
478185096Sdfr		     */
479192194Sdfr		    dsk = copy_dsk(dsk);
480185096Sdfr		}
481185096Sdfr	    }
482185096Sdfr	}
483185096Sdfr	slba++;
484185096Sdfr    }
485185096Sdfr    return;
486185096Sdfrtrymbr:
487185096Sdfr#endif
488185096Sdfr
489185029Spjd    if (drvread(dsk, sec, DOSBBSECTOR, 1))
490185029Spjd	return;
491185029Spjd    dp = (void *)(sec + DOSPARTOFF);
492185029Spjd
493185029Spjd    for (i = 0; i < NDOSPART; i++) {
494185029Spjd	if (!dp[i].dp_typ)
495185029Spjd	    continue;
496185029Spjd	dsk->start = dp[i].dp_start;
497241294Savg	if (vdev_probe(vdev_read, dsk, NULL) == 0) {
498185029Spjd	    /*
499185029Spjd	     * This slice had a vdev. We need a new dsk structure now
500185096Sdfr	     * since the vdev now owns this one.
501185029Spjd	     */
502192194Sdfr	    dsk = copy_dsk(dsk);
503185029Spjd	}
504185029Spjd    }
505185029Spjd}
506185029Spjd
507185029Spjdint
508185029Spjdmain(void)
509185029Spjd{
510185029Spjd    dnode_phys_t dn;
511185029Spjd    off_t off;
512185029Spjd    struct dsk *dsk;
513308915Savg    int autoboot, i;
514308915Savg    int nextboot;
515308915Savg    int rc;
516185029Spjd
517208388Sjhb    dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base);
518208388Sjhb
519200309Sjhb    bios_getmem();
520200309Sjhb
521200309Sjhb    if (high_heap_size > 0) {
522200309Sjhb	heap_end = PTOV(high_heap_base + high_heap_size);
523200309Sjhb	heap_next = PTOV(high_heap_base);
524200309Sjhb    } else {
525200309Sjhb	heap_next = (char *) dmadat + sizeof(*dmadat);
526200309Sjhb	heap_end = (char *) PTOV(bios_basemem);
527200309Sjhb    }
528200309Sjhb
529185029Spjd    dsk = malloc(sizeof(struct dsk));
530185029Spjd    dsk->drive = *(uint8_t *)PTOV(ARGS);
531185029Spjd    dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD;
532185029Spjd    dsk->unit = dsk->drive & DRV_MASK;
533185029Spjd    dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1;
534185029Spjd    dsk->part = 0;
535185029Spjd    dsk->start = 0;
536185029Spjd    dsk->init = 0;
537185029Spjd
538185029Spjd    bootinfo.bi_version = BOOTINFO_VERSION;
539185029Spjd    bootinfo.bi_size = sizeof(bootinfo);
540200309Sjhb    bootinfo.bi_basemem = bios_basemem / 1024;
541200309Sjhb    bootinfo.bi_extmem = bios_extmem / 1024;
542185029Spjd    bootinfo.bi_memsizes_valid++;
543185029Spjd    bootinfo.bi_bios_dev = dsk->drive;
544185029Spjd
545185029Spjd    bootdev = MAKEBOOTDEV(dev_maj[dsk->type],
546305615Spfg			  dsk->slice, dsk->unit, dsk->part);
547185029Spjd
548185029Spjd    /* Process configuration file */
549185029Spjd
550185029Spjd    autoboot = 1;
551185029Spjd
552185029Spjd    zfs_init();
553185029Spjd
554185029Spjd    /*
555185029Spjd     * Probe the boot drive first - we will try to boot from whatever
556185029Spjd     * pool we find on that drive.
557185029Spjd     */
558241294Savg    probe_drive(dsk);
559185029Spjd
560185029Spjd    /*
561185029Spjd     * Probe the rest of the drives that the bios knows about. This
562185029Spjd     * will find any other available pools and it may fill in missing
563185029Spjd     * vdevs for the boot pool.
564185029Spjd     */
565212805Spjd#ifndef VIRTUALBOX
566212805Spjd    for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++)
567212805Spjd#else
568212805Spjd    for (i = 0; i < MAXBDDEV; i++)
569212805Spjd#endif
570212805Spjd    {
571185029Spjd	if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS))
572185029Spjd	    continue;
573185029Spjd
574192194Sdfr	if (!int13probe(i | DRV_HARD))
575192194Sdfr	    break;
576192194Sdfr
577185029Spjd	dsk = malloc(sizeof(struct dsk));
578185029Spjd	dsk->drive = i | DRV_HARD;
579185029Spjd	dsk->type = dsk->drive & TYPE_AD;
580185029Spjd	dsk->unit = i;
581185029Spjd	dsk->slice = 0;
582185029Spjd	dsk->part = 0;
583185029Spjd	dsk->start = 0;
584185029Spjd	dsk->init = 0;
585241294Savg	probe_drive(dsk);
586185029Spjd    }
587185029Spjd
588185029Spjd    /*
589241294Savg     * The first discovered pool, if any, is the pool.
590185029Spjd     */
591241294Savg    spa = spa_get_primary();
592185029Spjd    if (!spa) {
593241294Savg	printf("%s: No ZFS pools located, can't boot\n", BOOTPROG);
594241294Savg	for (;;)
595241294Savg	    ;
596185029Spjd    }
597185029Spjd
598241293Savg    primary_spa = spa;
599241293Savg    primary_vdev = spa_get_primary_vdev(spa);
600241293Savg
601308915Savg    nextboot = 0;
602308915Savg    rc  = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd));
603308915Savg    if (vdev_clear_pad2(primary_vdev))
604308915Savg	printf("failed to clear pad2 area of primary vdev\n");
605308915Savg    if (rc == 0) {
606308915Savg	if (*cmd) {
607308915Savg	    /*
608308915Savg	     * We could find an old-style ZFS Boot Block header here.
609308915Savg	     * Simply ignore it.
610308915Savg	     */
611308915Savg	    if (*(uint64_t *)cmd != 0x2f5b007b10c) {
612308915Savg		/*
613308915Savg		 * Note that parse() is destructive to cmd[] and we also want
614308915Savg		 * to honor RBX_QUIET option that could be present in cmd[].
615308915Savg		 */
616308915Savg		nextboot = 1;
617308915Savg		memcpy(cmddup, cmd, sizeof(cmd));
618308915Savg		if (parse()) {
619308915Savg		    printf("failed to parse pad2 area of primary vdev\n");
620308915Savg		    reboot();
621308915Savg		}
622308915Savg		if (!OPT_CHECK(RBX_QUIET))
623308915Savg		    printf("zfs nextboot: %s\n", cmddup);
624308915Savg	    }
625308915Savg	    /* Do not process this command twice */
626308915Savg	    *cmd = 0;
627308915Savg	}
628308915Savg    } else
629308915Savg	printf("failed to read pad2 area of primary vdev\n");
630308915Savg
631308915Savg    /* Mount ZFS only if it's not already mounted via nextboot parsing. */
632308915Savg    if (zfsmount.spa == NULL &&
633308915Savg	(zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) {
634235329Savg	printf("%s: failed to mount default pool %s\n",
635235329Savg	    BOOTPROG, spa->spa_name);
636235329Savg	autoboot = 0;
637235329Savg    } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 ||
638235329Savg        zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) {
639185029Spjd	off = 0;
640198079Sjhb	zfs_read(spa, &dn, &off, cmd, sizeof(cmd));
641185029Spjd    }
642185029Spjd
643185029Spjd    if (*cmd) {
644234339Savg	/*
645234339Savg	 * Note that parse() is destructive to cmd[] and we also want
646234339Savg	 * to honor RBX_QUIET option that could be present in cmd[].
647234339Savg	 */
648234339Savg	memcpy(cmddup, cmd, sizeof(cmd));
649185029Spjd	if (parse())
650185029Spjd	    autoboot = 0;
651234339Savg	if (!OPT_CHECK(RBX_QUIET))
652241288Savg	    printf("%s: %s\n", PATH_CONFIG, cmddup);
653185029Spjd	/* Do not process this command twice */
654185029Spjd	*cmd = 0;
655185029Spjd    }
656185029Spjd
657308915Savg    /* Do not risk waiting at the prompt forever. */
658308915Savg    if (nextboot && !autoboot)
659308915Savg	reboot();
660308915Savg
661185029Spjd    /*
662295453Semaste     * Try to exec /boot/loader. If interrupted by a keypress,
663185029Spjd     * or in case of failure, try to load a kernel directly instead.
664185029Spjd     */
665185029Spjd
666185029Spjd    if (autoboot && !*kname) {
667295453Semaste	memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS));
668213136Spjd	if (!keyhit(3)) {
669185029Spjd	    load();
670185029Spjd	    memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL));
671185029Spjd	}
672185029Spjd    }
673185029Spjd
674185029Spjd    /* Present the user with the boot2 prompt. */
675185029Spjd
676185029Spjd    for (;;) {
677235329Savg	if (!autoboot || !OPT_CHECK(RBX_QUIET)) {
678235329Savg	    printf("\nFreeBSD/x86 boot\n");
679235329Savg	    if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0)
680241288Savg		printf("Default: %s/<0x%llx>:%s\n"
681235329Savg		       "boot: ",
682235329Savg		       spa->spa_name, zfsmount.rootobj, kname);
683241288Savg	    else if (rootname[0] != '\0')
684241288Savg		printf("Default: %s/%s:%s\n"
685241288Savg		       "boot: ",
686241288Savg		       spa->spa_name, rootname, kname);
687235329Savg	    else
688241288Savg		printf("Default: %s:%s\n"
689235329Savg		       "boot: ",
690241288Savg		       spa->spa_name, kname);
691235329Savg	}
692185029Spjd	if (ioctrl & IO_SERIAL)
693185029Spjd	    sio_flush();
694213136Spjd	if (!autoboot || keyhit(5))
695213136Spjd	    getstr(cmd, sizeof(cmd));
696185029Spjd	else if (!autoboot || !OPT_CHECK(RBX_QUIET))
697185029Spjd	    putchar('\n');
698185029Spjd	autoboot = 0;
699185029Spjd	if (parse())
700185029Spjd	    putchar('\a');
701185029Spjd	else
702185029Spjd	    load();
703185029Spjd    }
704185029Spjd}
705185029Spjd
706185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */
707185029Spjdvoid
708185029Spjdexit(int x)
709185029Spjd{
710308915Savg    __exit(x);
711185029Spjd}
712185029Spjd
713308915Savgvoid
714308915Savgreboot(void)
715308915Savg{
716308915Savg    __exit(0);
717308915Savg}
718308915Savg
719185029Spjdstatic void
720185029Spjdload(void)
721185029Spjd{
722185029Spjd    union {
723185029Spjd	struct exec ex;
724185029Spjd	Elf32_Ehdr eh;
725185029Spjd    } hdr;
726185029Spjd    static Elf32_Phdr ep[2];
727185029Spjd    static Elf32_Shdr es[2];
728185029Spjd    caddr_t p;
729185029Spjd    dnode_phys_t dn;
730185029Spjd    off_t off;
731185029Spjd    uint32_t addr, x;
732185029Spjd    int fmt, i, j;
733185029Spjd
734235329Savg    if (zfs_lookup(&zfsmount, kname, &dn)) {
735235329Savg	printf("\nCan't find %s\n", kname);
736185029Spjd	return;
737185029Spjd    }
738185029Spjd    off = 0;
739185029Spjd    if (xfsread(&dn, &off, &hdr, sizeof(hdr)))
740185029Spjd	return;
741185029Spjd    if (N_GETMAGIC(hdr.ex) == ZMAGIC)
742185029Spjd	fmt = 0;
743185029Spjd    else if (IS_ELF(hdr.eh))
744185029Spjd	fmt = 1;
745185029Spjd    else {
746185029Spjd	printf("Invalid %s\n", "format");
747185029Spjd	return;
748185029Spjd    }
749185029Spjd    if (fmt == 0) {
750185029Spjd	addr = hdr.ex.a_entry & 0xffffff;
751185029Spjd	p = PTOV(addr);
752185029Spjd	off = PAGE_SIZE;
753185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_text))
754185029Spjd	    return;
755185029Spjd	p += roundup2(hdr.ex.a_text, PAGE_SIZE);
756185029Spjd	if (xfsread(&dn, &off, p, hdr.ex.a_data))
757185029Spjd	    return;
758185029Spjd	p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
759185029Spjd	bootinfo.bi_symtab = VTOP(p);
760185029Spjd	memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms));
761185029Spjd	p += sizeof(hdr.ex.a_syms);
762185029Spjd	if (hdr.ex.a_syms) {
763185029Spjd	    if (xfsread(&dn, &off, p, hdr.ex.a_syms))
764185029Spjd		return;
765185029Spjd	    p += hdr.ex.a_syms;
766185029Spjd	    if (xfsread(&dn, &off, p, sizeof(int)))
767185029Spjd		return;
768185029Spjd	    x = *(uint32_t *)p;
769185029Spjd	    p += sizeof(int);
770185029Spjd	    x -= sizeof(int);
771185029Spjd	    if (xfsread(&dn, &off, p, x))
772185029Spjd		return;
773185029Spjd	    p += x;
774185029Spjd	}
775185029Spjd    } else {
776185029Spjd	off = hdr.eh.e_phoff;
777185029Spjd	for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
778185029Spjd	    if (xfsread(&dn, &off, ep + j, sizeof(ep[0])))
779185029Spjd		return;
780185029Spjd	    if (ep[j].p_type == PT_LOAD)
781185029Spjd		j++;
782185029Spjd	}
783185029Spjd	for (i = 0; i < 2; i++) {
784185029Spjd	    p = PTOV(ep[i].p_paddr & 0xffffff);
785185029Spjd	    off = ep[i].p_offset;
786185029Spjd	    if (xfsread(&dn, &off, p, ep[i].p_filesz))
787185029Spjd		return;
788185029Spjd	}
789185029Spjd	p += roundup2(ep[1].p_memsz, PAGE_SIZE);
790185029Spjd	bootinfo.bi_symtab = VTOP(p);
791185029Spjd	if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
792185029Spjd	    off = hdr.eh.e_shoff + sizeof(es[0]) *
793185029Spjd		(hdr.eh.e_shstrndx + 1);
794185029Spjd	    if (xfsread(&dn, &off, &es, sizeof(es)))
795185029Spjd		return;
796185029Spjd	    for (i = 0; i < 2; i++) {
797185029Spjd		memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size));
798185029Spjd		p += sizeof(es[i].sh_size);
799185029Spjd		off = es[i].sh_offset;
800185029Spjd		if (xfsread(&dn, &off, p, es[i].sh_size))
801185029Spjd		    return;
802185029Spjd		p += es[i].sh_size;
803185029Spjd	    }
804185029Spjd	}
805185029Spjd	addr = hdr.eh.e_entry & 0xffffff;
806185029Spjd    }
807185029Spjd    bootinfo.bi_esymtab = VTOP(p);
808185029Spjd    bootinfo.bi_kernelname = VTOP(kname);
809235329Savg    zfsargs.size = sizeof(zfsargs);
810235329Savg    zfsargs.pool = zfsmount.spa->spa_guid;
811235329Savg    zfsargs.root = zfsmount.rootobj;
812241293Savg    zfsargs.primary_pool = primary_spa->spa_guid;
813241293Savg    if (primary_vdev != NULL)
814241293Savg	zfsargs.primary_vdev = primary_vdev->v_guid;
815241293Savg    else
816241293Savg	printf("failed to detect primary vdev\n");
817185029Spjd    __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
818185029Spjd	   bootdev,
819235329Savg	   KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
820185029Spjd	   (uint32_t) spa->spa_guid,
821185029Spjd	   (uint32_t) (spa->spa_guid >> 32),
822235329Savg	   VTOP(&bootinfo),
823235329Savg	   zfsargs);
824185029Spjd}
825185029Spjd
826185029Spjdstatic int
827241288Savgzfs_mount_ds(char *dsname)
828241288Savg{
829241288Savg    uint64_t newroot;
830241288Savg    spa_t *newspa;
831241288Savg    char *q;
832241288Savg
833241288Savg    q = strchr(dsname, '/');
834241288Savg    if (q)
835241288Savg	*q++ = '\0';
836241288Savg    newspa = spa_find_by_name(dsname);
837241288Savg    if (newspa == NULL) {
838241288Savg	printf("\nCan't find ZFS pool %s\n", dsname);
839241288Savg	return -1;
840241288Savg    }
841241288Savg
842241288Savg    if (zfs_spa_init(newspa))
843241288Savg	return -1;
844241288Savg
845241288Savg    newroot = 0;
846241288Savg    if (q) {
847241288Savg	if (zfs_lookup_dataset(newspa, q, &newroot)) {
848241288Savg	    printf("\nCan't find dataset %s in ZFS pool %s\n",
849241288Savg		    q, newspa->spa_name);
850241288Savg	    return -1;
851241288Savg	}
852241288Savg    }
853241288Savg    if (zfs_mount(newspa, newroot, &zfsmount)) {
854241288Savg	printf("\nCan't mount ZFS dataset\n");
855241288Savg	return -1;
856241288Savg    }
857241288Savg    spa = newspa;
858241288Savg    return (0);
859241288Savg}
860241288Savg
861241288Savgstatic int
862213136Spjdparse(void)
863185029Spjd{
864185029Spjd    char *arg = cmd;
865185029Spjd    char *ep, *p, *q;
866185029Spjd    const char *cp;
867185029Spjd    int c, i, j;
868185029Spjd
869185029Spjd    while ((c = *arg++)) {
870185029Spjd	if (c == ' ' || c == '\t' || c == '\n')
871185029Spjd	    continue;
872185029Spjd	for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++);
873185029Spjd	ep = p;
874185029Spjd	if (*p)
875185029Spjd	    *p++ = 0;
876185029Spjd	if (c == '-') {
877185029Spjd	    while ((c = *arg++)) {
878185029Spjd		if (c == 'P') {
879185029Spjd		    if (*(uint8_t *)PTOV(0x496) & 0x10) {
880185029Spjd			cp = "yes";
881185029Spjd		    } else {
882185029Spjd			opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL);
883185029Spjd			cp = "no";
884185029Spjd		    }
885185029Spjd		    printf("Keyboard: %s\n", cp);
886185029Spjd		    continue;
887185029Spjd		} else if (c == 'S') {
888185029Spjd		    j = 0;
889185029Spjd		    while ((unsigned int)(i = *arg++ - '0') <= 9)
890185029Spjd			j = j * 10 + i;
891185029Spjd		    if (j > 0 && i == -'0') {
892185029Spjd			comspeed = j;
893185029Spjd			break;
894185029Spjd		    }
895185029Spjd		    /* Fall through to error below ('S' not in optstr[]). */
896185029Spjd		}
897185029Spjd		for (i = 0; c != optstr[i]; i++)
898185029Spjd		    if (i == NOPT - 1)
899185029Spjd			return -1;
900185029Spjd		opts ^= OPT_SET(flags[i]);
901185029Spjd	    }
902185029Spjd	    ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) :
903185029Spjd		     OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD;
904241301Savg	    if (ioctrl & IO_SERIAL) {
905241301Savg	        if (sio_init(115200 / comspeed) != 0)
906241301Savg		    ioctrl &= ~IO_SERIAL;
907241301Savg	    }
908185029Spjd	} if (c == '?') {
909185029Spjd	    dnode_phys_t dn;
910185029Spjd
911235329Savg	    if (zfs_lookup(&zfsmount, arg, &dn) == 0) {
912185029Spjd		zap_list(spa, &dn);
913185029Spjd	    }
914185029Spjd	    return -1;
915185029Spjd	} else {
916185029Spjd	    arg--;
917185029Spjd
918185029Spjd	    /*
919185029Spjd	     * Report pool status if the comment is 'status'. Lets
920185029Spjd	     * hope no-one wants to load /status as a kernel.
921185029Spjd	     */
922185029Spjd	    if (!strcmp(arg, "status")) {
923185029Spjd		spa_all_status();
924185029Spjd		return -1;
925185029Spjd	    }
926185029Spjd
927185029Spjd	    /*
928241288Savg	     * If there is "zfs:" prefix simply ignore it.
929241288Savg	     */
930241288Savg	    if (strncmp(arg, "zfs:", 4) == 0)
931241288Savg		arg += 4;
932241288Savg
933241288Savg	    /*
934185029Spjd	     * If there is a colon, switch pools.
935185029Spjd	     */
936241288Savg	    q = strchr(arg, ':');
937185029Spjd	    if (q) {
938241288Savg		*q++ = '\0';
939241288Savg		if (zfs_mount_ds(arg) != 0)
940185029Spjd		    return -1;
941241288Savg		arg = q;
942185029Spjd	    }
943185029Spjd	    if ((i = ep - arg)) {
944185029Spjd		if ((size_t)i >= sizeof(kname))
945185029Spjd		    return -1;
946185029Spjd		memcpy(kname, arg, i + 1);
947185029Spjd	    }
948185029Spjd	}
949185029Spjd	arg = p;
950185029Spjd    }
951185029Spjd    return 0;
952185029Spjd}
953