zfsboot.c revision 198420
1185029Spjd/*- 2185029Spjd * Copyright (c) 1998 Robert Nordier 3185029Spjd * All rights reserved. 4185029Spjd * 5185029Spjd * Redistribution and use in source and binary forms are freely 6185029Spjd * permitted provided that the above copyright notice and this 7185029Spjd * paragraph and the following disclaimer are duplicated in all 8185029Spjd * such forms. 9185029Spjd * 10185029Spjd * This software is provided "AS IS" and without any express or 11185029Spjd * implied warranties, including, without limitation, the implied 12185029Spjd * warranties of merchantability and fitness for a particular 13185029Spjd * purpose. 14185029Spjd */ 15185029Spjd 16185029Spjd#include <sys/cdefs.h> 17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 198420 2009-10-23 18:44:53Z rnoland $"); 18185029Spjd 19185029Spjd#include <sys/param.h> 20185029Spjd#include <sys/errno.h> 21185029Spjd#include <sys/diskmbr.h> 22185096Sdfr#ifdef GPT 23185096Sdfr#include <sys/gpt.h> 24185096Sdfr#endif 25185029Spjd#include <sys/reboot.h> 26185029Spjd#include <sys/queue.h> 27185029Spjd 28185029Spjd#include <machine/bootinfo.h> 29185029Spjd#include <machine/elf.h> 30185029Spjd 31185029Spjd#include <stdarg.h> 32185029Spjd#include <stddef.h> 33185029Spjd 34185029Spjd#include <a.out.h> 35185029Spjd 36185029Spjd#include <btxv86.h> 37185029Spjd 38185096Sdfr#ifndef GPT 39185029Spjd#include "zfsboot.h" 40185096Sdfr#endif 41185029Spjd#include "lib.h" 42185029Spjd 43185029Spjd#define IO_KEYBOARD 1 44185029Spjd#define IO_SERIAL 2 45185029Spjd 46185029Spjd#define SECOND 18 /* Circa that many ticks in a second. */ 47185029Spjd 48185029Spjd#define RBX_ASKNAME 0x0 /* -a */ 49185029Spjd#define RBX_SINGLE 0x1 /* -s */ 50185029Spjd/* 0x2 is reserved for log2(RB_NOSYNC). */ 51185029Spjd/* 0x3 is reserved for log2(RB_HALT). */ 52185029Spjd/* 0x4 is reserved for log2(RB_INITNAME). */ 53185029Spjd#define RBX_DFLTROOT 0x5 /* -r */ 54185029Spjd#define RBX_KDB 0x6 /* -d */ 55185029Spjd/* 0x7 is reserved for log2(RB_RDONLY). */ 56185029Spjd/* 0x8 is reserved for log2(RB_DUMP). */ 57185029Spjd/* 0x9 is reserved for log2(RB_MINIROOT). */ 58185029Spjd#define RBX_CONFIG 0xa /* -c */ 59185029Spjd#define RBX_VERBOSE 0xb /* -v */ 60185029Spjd#define RBX_SERIAL 0xc /* -h */ 61185029Spjd#define RBX_CDROM 0xd /* -C */ 62185029Spjd/* 0xe is reserved for log2(RB_POWEROFF). */ 63185029Spjd#define RBX_GDB 0xf /* -g */ 64185029Spjd#define RBX_MUTE 0x10 /* -m */ 65185029Spjd/* 0x11 is reserved for log2(RB_SELFTEST). */ 66185029Spjd/* 0x12 is reserved for boot programs. */ 67185029Spjd/* 0x13 is reserved for boot programs. */ 68185029Spjd#define RBX_PAUSE 0x14 /* -p */ 69185029Spjd#define RBX_QUIET 0x15 /* -q */ 70185029Spjd#define RBX_NOINTR 0x1c /* -n */ 71185029Spjd/* 0x1d is reserved for log2(RB_MULTIPLE) and is just misnamed here. */ 72185029Spjd#define RBX_DUAL 0x1d /* -D */ 73185029Spjd/* 0x1f is reserved for log2(RB_BOOTINFO). */ 74185029Spjd 75185029Spjd/* pass: -a, -s, -r, -d, -c, -v, -h, -C, -g, -m, -p, -D */ 76185029Spjd#define RBX_MASK (OPT_SET(RBX_ASKNAME) | OPT_SET(RBX_SINGLE) | \ 77185029Spjd OPT_SET(RBX_DFLTROOT) | OPT_SET(RBX_KDB ) | \ 78185029Spjd OPT_SET(RBX_CONFIG) | OPT_SET(RBX_VERBOSE) | \ 79185029Spjd OPT_SET(RBX_SERIAL) | OPT_SET(RBX_CDROM) | \ 80185029Spjd OPT_SET(RBX_GDB ) | OPT_SET(RBX_MUTE) | \ 81185029Spjd OPT_SET(RBX_PAUSE) | OPT_SET(RBX_DUAL)) 82185029Spjd 83185029Spjd/* Hint to loader that we came from ZFS */ 84185029Spjd#define KARGS_FLAGS_ZFS 0x4 85185029Spjd 86185029Spjd#define PATH_CONFIG "/boot.config" 87185029Spjd#define PATH_BOOT3 "/boot/loader" 88185029Spjd#define PATH_KERNEL "/boot/kernel/kernel" 89185029Spjd 90185029Spjd#define ARGS 0x900 91185029Spjd#define NOPT 14 92185029Spjd#define NDEV 3 93185029Spjd#define MEM_BASE 0x12 94185029Spjd#define MEM_EXT 0x15 95185029Spjd#define V86_CY(x) ((x) & 1) 96185029Spjd#define V86_ZR(x) ((x) & 0x40) 97185029Spjd 98185029Spjd#define DRV_HARD 0x80 99185029Spjd#define DRV_MASK 0x7f 100185029Spjd 101185029Spjd#define TYPE_AD 0 102185029Spjd#define TYPE_DA 1 103185029Spjd#define TYPE_MAXHARD TYPE_DA 104185029Spjd#define TYPE_FD 2 105185029Spjd 106185029Spjd#define OPT_SET(opt) (1 << (opt)) 107185029Spjd#define OPT_CHECK(opt) ((opts) & OPT_SET(opt)) 108185029Spjd 109185029Spjdextern uint32_t _end; 110185029Spjd 111185096Sdfr#ifdef GPT 112185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; 113185096Sdfr#endif 114185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ 115185029Spjdstatic const unsigned char flags[NOPT] = { 116185029Spjd RBX_DUAL, 117185029Spjd RBX_SERIAL, 118185029Spjd RBX_ASKNAME, 119185029Spjd RBX_CDROM, 120185029Spjd RBX_CONFIG, 121185029Spjd RBX_KDB, 122185029Spjd RBX_GDB, 123185029Spjd RBX_MUTE, 124185029Spjd RBX_NOINTR, 125185029Spjd RBX_PAUSE, 126185029Spjd RBX_QUIET, 127185029Spjd RBX_DFLTROOT, 128185029Spjd RBX_SINGLE, 129185029Spjd RBX_VERBOSE 130185029Spjd}; 131185029Spjd 132185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"}; 133185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2}; 134185029Spjd 135185029Spjdstruct dsk { 136185029Spjd unsigned drive; 137185029Spjd unsigned type; 138185029Spjd unsigned unit; 139185029Spjd unsigned slice; 140185029Spjd unsigned part; 141185029Spjd unsigned start; 142185029Spjd int init; 143185029Spjd}; 144185029Spjdstatic char cmd[512]; 145185029Spjdstatic char kname[1024]; 146185029Spjdstatic uint32_t opts; 147185029Spjdstatic int comspeed = SIOSPD; 148185029Spjdstatic struct bootinfo bootinfo; 149185029Spjdstatic uint32_t bootdev; 150185029Spjdstatic uint8_t ioctrl = IO_KEYBOARD; 151185029Spjd 152185029Spjd/* Buffers that must not span a 64k boundary. */ 153185029Spjd#define READ_BUF_SIZE 8192 154185029Spjdstruct dmadat { 155185029Spjd char rdbuf[READ_BUF_SIZE]; /* for reading large things */ 156185029Spjd char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ 157185029Spjd}; 158185029Spjdstatic struct dmadat *dmadat; 159185029Spjd 160185029Spjdvoid exit(int); 161185029Spjdstatic void load(void); 162185029Spjdstatic int parse(void); 163185029Spjdstatic void printf(const char *,...); 164185029Spjdstatic void putchar(int); 165185029Spjdstatic uint32_t memsize(void); 166185029Spjdstatic int drvread(struct dsk *, void *, unsigned, unsigned); 167185029Spjdstatic int keyhit(unsigned); 168185029Spjdstatic int xputc(int); 169185029Spjdstatic int xgetc(int); 170185029Spjdstatic int getc(int); 171185029Spjd 172185029Spjdstatic void memcpy(void *, const void *, int); 173185029Spjdstatic void 174185029Spjdmemcpy(void *dst, const void *src, int len) 175185029Spjd{ 176185029Spjd const char *s = src; 177185029Spjd char *d = dst; 178185029Spjd 179185029Spjd while (len--) 180185029Spjd *d++ = *s++; 181185029Spjd} 182185029Spjd 183185029Spjdstatic void 184185029Spjdstrcpy(char *dst, const char *src) 185185029Spjd{ 186185029Spjd while (*src) 187185029Spjd *dst++ = *src++; 188185029Spjd *dst++ = 0; 189185029Spjd} 190185029Spjd 191185029Spjdstatic void 192185029Spjdstrcat(char *dst, const char *src) 193185029Spjd{ 194185029Spjd while (*dst) 195185029Spjd dst++; 196185029Spjd while (*src) 197185029Spjd *dst++ = *src++; 198185029Spjd *dst++ = 0; 199185029Spjd} 200185029Spjd 201185029Spjdstatic int 202185029Spjdstrcmp(const char *s1, const char *s2) 203185029Spjd{ 204185029Spjd for (; *s1 == *s2 && *s1; s1++, s2++); 205185029Spjd return (unsigned char)*s1 - (unsigned char)*s2; 206185029Spjd} 207185029Spjd 208185029Spjdstatic const char * 209185029Spjdstrchr(const char *s, char ch) 210185029Spjd{ 211185029Spjd for (; *s; s++) 212185029Spjd if (*s == ch) 213185029Spjd return s; 214185029Spjd return 0; 215185029Spjd} 216185029Spjd 217185029Spjdstatic int 218185029Spjdmemcmp(const void *p1, const void *p2, size_t n) 219185029Spjd{ 220185029Spjd const char *s1 = (const char *) p1; 221185029Spjd const char *s2 = (const char *) p2; 222185029Spjd for (; n > 0 && *s1 == *s2; s1++, s2++, n--); 223185029Spjd if (n) 224185029Spjd return (unsigned char)*s1 - (unsigned char)*s2; 225185029Spjd else 226185029Spjd return 0; 227185029Spjd} 228185029Spjd 229185029Spjdstatic void 230185029Spjdmemset(void *p, char val, size_t n) 231185029Spjd{ 232185029Spjd char *s = (char *) p; 233185029Spjd while (n--) 234185029Spjd *s++ = val; 235185029Spjd} 236185029Spjd 237185029Spjdstatic void * 238185029Spjdmalloc(size_t n) 239185029Spjd{ 240185029Spjd static char *heap_next; 241185029Spjd static char *heap_end; 242185029Spjd 243185029Spjd if (!heap_next) { 244185029Spjd heap_next = (char *) dmadat + sizeof(*dmadat); 245185029Spjd heap_end = (char *) (640*1024); 246185029Spjd } 247185029Spjd 248185029Spjd char *p = heap_next; 249185029Spjd if (p + n > heap_end) { 250185029Spjd printf("malloc failure\n"); 251185029Spjd for (;;) 252185029Spjd ; 253185029Spjd return 0; 254185029Spjd } 255185029Spjd heap_next += n; 256185029Spjd return p; 257185029Spjd} 258185029Spjd 259185029Spjdstatic size_t 260185029Spjdstrlen(const char *s) 261185029Spjd{ 262185029Spjd size_t len = 0; 263185029Spjd while (*s++) 264185029Spjd len++; 265185029Spjd return len; 266185029Spjd} 267185029Spjd 268185029Spjdstatic char * 269185029Spjdstrdup(const char *s) 270185029Spjd{ 271185029Spjd char *p = malloc(strlen(s) + 1); 272185029Spjd strcpy(p, s); 273185029Spjd return p; 274185029Spjd} 275185029Spjd 276185029Spjd#include "zfsimpl.c" 277185029Spjd 278185029Spjd/* 279185029Spjd * Read from a dnode (which must be from a ZPL filesystem). 280185029Spjd */ 281185029Spjdstatic int 282185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) 283185029Spjd{ 284185029Spjd const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; 285185029Spjd size_t n; 286185029Spjd int rc; 287185029Spjd 288185029Spjd n = size; 289185029Spjd if (*offp + n > zp->zp_size) 290185029Spjd n = zp->zp_size - *offp; 291185029Spjd 292185029Spjd rc = dnode_read(spa, dnode, *offp, start, n); 293185029Spjd if (rc) 294185029Spjd return (-1); 295185029Spjd *offp += n; 296185029Spjd 297185029Spjd return (n); 298185029Spjd} 299185029Spjd 300185029Spjd/* 301185029Spjd * Current ZFS pool 302185029Spjd */ 303185029Spjdspa_t *spa; 304185029Spjd 305185029Spjd/* 306185029Spjd * A wrapper for dskread that doesn't have to worry about whether the 307185029Spjd * buffer pointer crosses a 64k boundary. 308185029Spjd */ 309185029Spjdstatic int 310185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 311185029Spjd{ 312185029Spjd char *p; 313185029Spjd unsigned int lba, nb; 314185029Spjd struct dsk *dsk = (struct dsk *) priv; 315185029Spjd 316185029Spjd if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 317185029Spjd return -1; 318185029Spjd 319185029Spjd p = buf; 320185029Spjd lba = off / DEV_BSIZE; 321185029Spjd while (bytes > 0) { 322185029Spjd nb = bytes / DEV_BSIZE; 323185029Spjd if (nb > READ_BUF_SIZE / DEV_BSIZE) 324185029Spjd nb = READ_BUF_SIZE / DEV_BSIZE; 325185029Spjd if (drvread(dsk, dmadat->rdbuf, lba, nb)) 326185029Spjd return -1; 327185029Spjd memcpy(p, dmadat->rdbuf, nb * DEV_BSIZE); 328185029Spjd p += nb * DEV_BSIZE; 329185029Spjd lba += nb; 330185029Spjd bytes -= nb * DEV_BSIZE; 331185029Spjd } 332185029Spjd 333185029Spjd return 0; 334185029Spjd} 335185029Spjd 336185029Spjdstatic int 337185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) 338185029Spjd{ 339185029Spjd if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { 340185029Spjd printf("Invalid %s\n", "format"); 341185029Spjd return -1; 342185029Spjd } 343185029Spjd return 0; 344185029Spjd} 345185029Spjd 346185029Spjdstatic inline uint32_t 347185029Spjdmemsize(void) 348185029Spjd{ 349185029Spjd v86.addr = MEM_EXT; 350185029Spjd v86.eax = 0x8800; 351185029Spjd v86int(); 352185029Spjd return v86.eax; 353185029Spjd} 354185029Spjd 355185029Spjdstatic inline void 356185029Spjdgetstr(void) 357185029Spjd{ 358185029Spjd char *s; 359185029Spjd int c; 360185029Spjd 361185029Spjd s = cmd; 362185029Spjd for (;;) { 363185029Spjd switch (c = xgetc(0)) { 364185029Spjd case 0: 365185029Spjd break; 366185029Spjd case '\177': 367185029Spjd case '\b': 368185029Spjd if (s > cmd) { 369185029Spjd s--; 370185029Spjd printf("\b \b"); 371185029Spjd } 372185029Spjd break; 373185029Spjd case '\n': 374185029Spjd case '\r': 375185029Spjd *s = 0; 376185029Spjd return; 377185029Spjd default: 378185029Spjd if (s - cmd < sizeof(cmd) - 1) 379185029Spjd *s++ = c; 380185029Spjd putchar(c); 381185029Spjd } 382185029Spjd } 383185029Spjd} 384185029Spjd 385185029Spjdstatic inline void 386185029Spjdputc(int c) 387185029Spjd{ 388185029Spjd v86.addr = 0x10; 389185029Spjd v86.eax = 0xe00 | (c & 0xff); 390185029Spjd v86.ebx = 0x7; 391185029Spjd v86int(); 392185029Spjd} 393185029Spjd 394185029Spjd/* 395185029Spjd * Try to detect a device supported by the legacy int13 BIOS 396185029Spjd */ 397185029Spjdstatic int 398185029Spjdint13probe(int drive) 399185029Spjd{ 400185029Spjd v86.ctl = V86_FLAGS; 401185029Spjd v86.addr = 0x13; 402185029Spjd v86.eax = 0x800; 403185029Spjd v86.edx = drive; 404185029Spjd v86int(); 405185029Spjd 406185029Spjd if (!(v86.efl & 0x1) && /* carry clear */ 407185029Spjd ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ 408185029Spjd if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ 409185029Spjd return(0); /* skip device */ 410185029Spjd } 411185029Spjd return (1); 412185029Spjd } 413185029Spjd return(0); 414185029Spjd} 415185029Spjd 416192194Sdfr/* 417192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk 418192194Sdfr * structure so we must make a new one. 419192194Sdfr */ 420192194Sdfrstatic struct dsk * 421192194Sdfrcopy_dsk(struct dsk *dsk) 422192194Sdfr{ 423192194Sdfr struct dsk *newdsk; 424192194Sdfr 425192194Sdfr newdsk = malloc(sizeof(struct dsk)); 426192194Sdfr *newdsk = *dsk; 427192194Sdfr return (newdsk); 428192194Sdfr} 429192194Sdfr 430185029Spjdstatic void 431185029Spjdprobe_drive(struct dsk *dsk, spa_t **spap) 432185029Spjd{ 433185096Sdfr#ifdef GPT 434185096Sdfr struct gpt_hdr hdr; 435185096Sdfr struct gpt_ent *ent; 436185096Sdfr daddr_t slba, elba; 437185096Sdfr unsigned part, entries_per_sec; 438185096Sdfr#endif 439185029Spjd struct dos_partition *dp; 440185029Spjd char *sec; 441185029Spjd unsigned i; 442185029Spjd 443185029Spjd /* 444185029Spjd * If we find a vdev on the whole disk, stop here. Otherwise dig 445185029Spjd * out the MBR and probe each slice in turn for a vdev. 446185029Spjd */ 447185029Spjd if (vdev_probe(vdev_read, dsk, spap) == 0) 448185029Spjd return; 449185029Spjd 450185029Spjd sec = dmadat->secbuf; 451185029Spjd dsk->start = 0; 452185096Sdfr 453185096Sdfr#ifdef GPT 454185096Sdfr /* 455185096Sdfr * First check for GPT. 456185096Sdfr */ 457185096Sdfr if (drvread(dsk, sec, 1, 1)) { 458185096Sdfr return; 459185096Sdfr } 460185096Sdfr memcpy(&hdr, sec, sizeof(hdr)); 461185096Sdfr if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || 462185096Sdfr hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || 463185096Sdfr hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { 464185096Sdfr goto trymbr; 465185096Sdfr } 466185096Sdfr 467185096Sdfr /* 468185096Sdfr * Probe all GPT partitions for the presense of ZFS pools. We 469185096Sdfr * return the spa_t for the first we find (if requested). This 470185096Sdfr * will have the effect of booting from the first pool on the 471185096Sdfr * disk. 472185096Sdfr */ 473185096Sdfr entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; 474185096Sdfr slba = hdr.hdr_lba_table; 475185096Sdfr elba = slba + hdr.hdr_entries / entries_per_sec; 476185096Sdfr while (slba < elba) { 477198420Srnoland dsk->start = 0; 478185096Sdfr if (drvread(dsk, sec, slba, 1)) 479185096Sdfr return; 480185096Sdfr for (part = 0; part < entries_per_sec; part++) { 481185096Sdfr ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); 482185096Sdfr if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, 483185096Sdfr sizeof(uuid_t)) == 0) { 484185096Sdfr dsk->start = ent->ent_lba_start; 485185096Sdfr if (vdev_probe(vdev_read, dsk, spap) == 0) { 486185096Sdfr /* 487185096Sdfr * We record the first pool we find (we will try 488192194Sdfr * to boot from that one). 489185096Sdfr */ 490185096Sdfr spap = 0; 491185096Sdfr 492185096Sdfr /* 493185096Sdfr * This slice had a vdev. We need a new dsk 494185096Sdfr * structure now since the vdev now owns this one. 495185096Sdfr */ 496192194Sdfr dsk = copy_dsk(dsk); 497185096Sdfr } 498185096Sdfr } 499185096Sdfr } 500185096Sdfr slba++; 501185096Sdfr } 502185096Sdfr return; 503185096Sdfrtrymbr: 504185096Sdfr#endif 505185096Sdfr 506185029Spjd if (drvread(dsk, sec, DOSBBSECTOR, 1)) 507185029Spjd return; 508185029Spjd dp = (void *)(sec + DOSPARTOFF); 509185029Spjd 510185029Spjd for (i = 0; i < NDOSPART; i++) { 511185029Spjd if (!dp[i].dp_typ) 512185029Spjd continue; 513185029Spjd dsk->start = dp[i].dp_start; 514185029Spjd if (vdev_probe(vdev_read, dsk, spap) == 0) { 515185029Spjd /* 516185029Spjd * We record the first pool we find (we will try to boot 517185029Spjd * from that one. 518185029Spjd */ 519185029Spjd spap = 0; 520185029Spjd 521185029Spjd /* 522185029Spjd * This slice had a vdev. We need a new dsk structure now 523185096Sdfr * since the vdev now owns this one. 524185029Spjd */ 525192194Sdfr dsk = copy_dsk(dsk); 526185029Spjd } 527185029Spjd } 528185029Spjd} 529185029Spjd 530185029Spjdint 531185029Spjdmain(void) 532185029Spjd{ 533185029Spjd int autoboot, i; 534185029Spjd dnode_phys_t dn; 535185029Spjd off_t off; 536185029Spjd struct dsk *dsk; 537185029Spjd 538185029Spjd dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); 539185029Spjd v86.ctl = V86_FLAGS; 540185029Spjd 541185029Spjd dsk = malloc(sizeof(struct dsk)); 542185029Spjd dsk->drive = *(uint8_t *)PTOV(ARGS); 543185029Spjd dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD; 544185029Spjd dsk->unit = dsk->drive & DRV_MASK; 545185029Spjd dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; 546185029Spjd dsk->part = 0; 547185029Spjd dsk->start = 0; 548185029Spjd dsk->init = 0; 549185029Spjd 550185029Spjd bootinfo.bi_version = BOOTINFO_VERSION; 551185029Spjd bootinfo.bi_size = sizeof(bootinfo); 552185029Spjd bootinfo.bi_basemem = 0; /* XXX will be filled by loader or kernel */ 553185029Spjd bootinfo.bi_extmem = memsize(); 554185029Spjd bootinfo.bi_memsizes_valid++; 555185029Spjd bootinfo.bi_bios_dev = dsk->drive; 556185029Spjd 557185029Spjd bootdev = MAKEBOOTDEV(dev_maj[dsk->type], 558185029Spjd dsk->slice, dsk->unit, dsk->part), 559185029Spjd 560185029Spjd /* Process configuration file */ 561185029Spjd 562185029Spjd autoboot = 1; 563185029Spjd 564185029Spjd zfs_init(); 565185029Spjd 566185029Spjd /* 567185029Spjd * Probe the boot drive first - we will try to boot from whatever 568185029Spjd * pool we find on that drive. 569185029Spjd */ 570185029Spjd probe_drive(dsk, &spa); 571185029Spjd 572185029Spjd /* 573185029Spjd * Probe the rest of the drives that the bios knows about. This 574185029Spjd * will find any other available pools and it may fill in missing 575185029Spjd * vdevs for the boot pool. 576185029Spjd */ 577192194Sdfr for (i = 0; i < 128; i++) { 578185029Spjd if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) 579185029Spjd continue; 580185029Spjd 581192194Sdfr if (!int13probe(i | DRV_HARD)) 582192194Sdfr break; 583192194Sdfr 584185029Spjd dsk = malloc(sizeof(struct dsk)); 585185029Spjd dsk->drive = i | DRV_HARD; 586185029Spjd dsk->type = dsk->drive & TYPE_AD; 587185029Spjd dsk->unit = i; 588185029Spjd dsk->slice = 0; 589185029Spjd dsk->part = 0; 590185029Spjd dsk->start = 0; 591185029Spjd dsk->init = 0; 592185029Spjd probe_drive(dsk, 0); 593185029Spjd } 594185029Spjd 595185029Spjd /* 596185029Spjd * If we didn't find a pool on the boot drive, default to the 597185029Spjd * first pool we found, if any. 598185029Spjd */ 599185029Spjd if (!spa) { 600185029Spjd spa = STAILQ_FIRST(&zfs_pools); 601185029Spjd if (!spa) { 602185029Spjd printf("No ZFS pools located, can't boot\n"); 603185029Spjd for (;;) 604185029Spjd ; 605185029Spjd } 606185029Spjd } 607185029Spjd 608185029Spjd zfs_mount_pool(spa); 609185029Spjd 610185029Spjd if (zfs_lookup(spa, PATH_CONFIG, &dn) == 0) { 611185029Spjd off = 0; 612198079Sjhb zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); 613185029Spjd } 614185029Spjd 615185029Spjd if (*cmd) { 616185029Spjd if (parse()) 617185029Spjd autoboot = 0; 618185029Spjd if (!OPT_CHECK(RBX_QUIET)) 619185029Spjd printf("%s: %s", PATH_CONFIG, cmd); 620185029Spjd /* Do not process this command twice */ 621185029Spjd *cmd = 0; 622185029Spjd } 623185029Spjd 624185029Spjd /* 625185029Spjd * Try to exec stage 3 boot loader. If interrupted by a keypress, 626185029Spjd * or in case of failure, try to load a kernel directly instead. 627185029Spjd */ 628185029Spjd 629185029Spjd if (autoboot && !*kname) { 630185029Spjd memcpy(kname, PATH_BOOT3, sizeof(PATH_BOOT3)); 631185029Spjd if (!keyhit(3*SECOND)) { 632185029Spjd load(); 633185029Spjd memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL)); 634185029Spjd } 635185029Spjd } 636185029Spjd 637185029Spjd /* Present the user with the boot2 prompt. */ 638185029Spjd 639185029Spjd for (;;) { 640185029Spjd if (!autoboot || !OPT_CHECK(RBX_QUIET)) 641185029Spjd printf("\nFreeBSD/i386 boot\n" 642185029Spjd "Default: %s:%s\n" 643185029Spjd "boot: ", 644185029Spjd spa->spa_name, kname); 645185029Spjd if (ioctrl & IO_SERIAL) 646185029Spjd sio_flush(); 647185029Spjd if (!autoboot || keyhit(5*SECOND)) 648185029Spjd getstr(); 649185029Spjd else if (!autoboot || !OPT_CHECK(RBX_QUIET)) 650185029Spjd putchar('\n'); 651185029Spjd autoboot = 0; 652185029Spjd if (parse()) 653185029Spjd putchar('\a'); 654185029Spjd else 655185029Spjd load(); 656185029Spjd } 657185029Spjd} 658185029Spjd 659185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */ 660185029Spjdvoid 661185029Spjdexit(int x) 662185029Spjd{ 663185029Spjd} 664185029Spjd 665185029Spjdstatic void 666185029Spjdload(void) 667185029Spjd{ 668185029Spjd union { 669185029Spjd struct exec ex; 670185029Spjd Elf32_Ehdr eh; 671185029Spjd } hdr; 672185029Spjd static Elf32_Phdr ep[2]; 673185029Spjd static Elf32_Shdr es[2]; 674185029Spjd caddr_t p; 675185029Spjd dnode_phys_t dn; 676185029Spjd off_t off; 677185029Spjd uint32_t addr, x; 678185029Spjd int fmt, i, j; 679185029Spjd 680185029Spjd if (zfs_lookup(spa, kname, &dn)) { 681185029Spjd return; 682185029Spjd } 683185029Spjd off = 0; 684185029Spjd if (xfsread(&dn, &off, &hdr, sizeof(hdr))) 685185029Spjd return; 686185029Spjd if (N_GETMAGIC(hdr.ex) == ZMAGIC) 687185029Spjd fmt = 0; 688185029Spjd else if (IS_ELF(hdr.eh)) 689185029Spjd fmt = 1; 690185029Spjd else { 691185029Spjd printf("Invalid %s\n", "format"); 692185029Spjd return; 693185029Spjd } 694185029Spjd if (fmt == 0) { 695185029Spjd addr = hdr.ex.a_entry & 0xffffff; 696185029Spjd p = PTOV(addr); 697185029Spjd off = PAGE_SIZE; 698185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_text)) 699185029Spjd return; 700185029Spjd p += roundup2(hdr.ex.a_text, PAGE_SIZE); 701185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_data)) 702185029Spjd return; 703185029Spjd p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); 704185029Spjd bootinfo.bi_symtab = VTOP(p); 705185029Spjd memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); 706185029Spjd p += sizeof(hdr.ex.a_syms); 707185029Spjd if (hdr.ex.a_syms) { 708185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_syms)) 709185029Spjd return; 710185029Spjd p += hdr.ex.a_syms; 711185029Spjd if (xfsread(&dn, &off, p, sizeof(int))) 712185029Spjd return; 713185029Spjd x = *(uint32_t *)p; 714185029Spjd p += sizeof(int); 715185029Spjd x -= sizeof(int); 716185029Spjd if (xfsread(&dn, &off, p, x)) 717185029Spjd return; 718185029Spjd p += x; 719185029Spjd } 720185029Spjd } else { 721185029Spjd off = hdr.eh.e_phoff; 722185029Spjd for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { 723185029Spjd if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) 724185029Spjd return; 725185029Spjd if (ep[j].p_type == PT_LOAD) 726185029Spjd j++; 727185029Spjd } 728185029Spjd for (i = 0; i < 2; i++) { 729185029Spjd p = PTOV(ep[i].p_paddr & 0xffffff); 730185029Spjd off = ep[i].p_offset; 731185029Spjd if (xfsread(&dn, &off, p, ep[i].p_filesz)) 732185029Spjd return; 733185029Spjd } 734185029Spjd p += roundup2(ep[1].p_memsz, PAGE_SIZE); 735185029Spjd bootinfo.bi_symtab = VTOP(p); 736185029Spjd if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { 737185029Spjd off = hdr.eh.e_shoff + sizeof(es[0]) * 738185029Spjd (hdr.eh.e_shstrndx + 1); 739185029Spjd if (xfsread(&dn, &off, &es, sizeof(es))) 740185029Spjd return; 741185029Spjd for (i = 0; i < 2; i++) { 742185029Spjd memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); 743185029Spjd p += sizeof(es[i].sh_size); 744185029Spjd off = es[i].sh_offset; 745185029Spjd if (xfsread(&dn, &off, p, es[i].sh_size)) 746185029Spjd return; 747185029Spjd p += es[i].sh_size; 748185029Spjd } 749185029Spjd } 750185029Spjd addr = hdr.eh.e_entry & 0xffffff; 751185029Spjd } 752185029Spjd bootinfo.bi_esymtab = VTOP(p); 753185029Spjd bootinfo.bi_kernelname = VTOP(kname); 754185029Spjd __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 755185029Spjd bootdev, 756185029Spjd KARGS_FLAGS_ZFS, 757185029Spjd (uint32_t) spa->spa_guid, 758185029Spjd (uint32_t) (spa->spa_guid >> 32), 759185029Spjd VTOP(&bootinfo)); 760185029Spjd} 761185029Spjd 762185029Spjdstatic int 763185029Spjdparse() 764185029Spjd{ 765185029Spjd char *arg = cmd; 766185029Spjd char *ep, *p, *q; 767185029Spjd const char *cp; 768185029Spjd //unsigned int drv; 769185029Spjd int c, i, j; 770185029Spjd 771185029Spjd while ((c = *arg++)) { 772185029Spjd if (c == ' ' || c == '\t' || c == '\n') 773185029Spjd continue; 774185029Spjd for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); 775185029Spjd ep = p; 776185029Spjd if (*p) 777185029Spjd *p++ = 0; 778185029Spjd if (c == '-') { 779185029Spjd while ((c = *arg++)) { 780185029Spjd if (c == 'P') { 781185029Spjd if (*(uint8_t *)PTOV(0x496) & 0x10) { 782185029Spjd cp = "yes"; 783185029Spjd } else { 784185029Spjd opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); 785185029Spjd cp = "no"; 786185029Spjd } 787185029Spjd printf("Keyboard: %s\n", cp); 788185029Spjd continue; 789185029Spjd } else if (c == 'S') { 790185029Spjd j = 0; 791185029Spjd while ((unsigned int)(i = *arg++ - '0') <= 9) 792185029Spjd j = j * 10 + i; 793185029Spjd if (j > 0 && i == -'0') { 794185029Spjd comspeed = j; 795185029Spjd break; 796185029Spjd } 797185029Spjd /* Fall through to error below ('S' not in optstr[]). */ 798185029Spjd } 799185029Spjd for (i = 0; c != optstr[i]; i++) 800185029Spjd if (i == NOPT - 1) 801185029Spjd return -1; 802185029Spjd opts ^= OPT_SET(flags[i]); 803185029Spjd } 804185029Spjd ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : 805185029Spjd OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; 806185029Spjd if (ioctrl & IO_SERIAL) 807185029Spjd sio_init(115200 / comspeed); 808185029Spjd } if (c == '?') { 809185029Spjd dnode_phys_t dn; 810185029Spjd 811185029Spjd if (zfs_lookup(spa, arg, &dn) == 0) { 812185029Spjd zap_list(spa, &dn); 813185029Spjd } 814185029Spjd return -1; 815185029Spjd } else { 816185029Spjd arg--; 817185029Spjd 818185029Spjd /* 819185029Spjd * Report pool status if the comment is 'status'. Lets 820185029Spjd * hope no-one wants to load /status as a kernel. 821185029Spjd */ 822185029Spjd if (!strcmp(arg, "status")) { 823185029Spjd spa_all_status(); 824185029Spjd return -1; 825185029Spjd } 826185029Spjd 827185029Spjd /* 828185029Spjd * If there is a colon, switch pools. 829185029Spjd */ 830185029Spjd q = (char *) strchr(arg, ':'); 831185029Spjd if (q) { 832185029Spjd spa_t *newspa; 833185029Spjd 834185029Spjd *q++ = 0; 835185029Spjd newspa = spa_find_by_name(arg); 836185029Spjd if (newspa) { 837185029Spjd spa = newspa; 838185029Spjd zfs_mount_pool(spa); 839185029Spjd } else { 840185029Spjd printf("\nCan't find ZFS pool %s\n", arg); 841185029Spjd return -1; 842185029Spjd } 843185029Spjd arg = q; 844185029Spjd } 845185029Spjd if ((i = ep - arg)) { 846185029Spjd if ((size_t)i >= sizeof(kname)) 847185029Spjd return -1; 848185029Spjd memcpy(kname, arg, i + 1); 849185029Spjd } 850185029Spjd } 851185029Spjd arg = p; 852185029Spjd } 853185029Spjd return 0; 854185029Spjd} 855185029Spjd 856185029Spjdstatic void 857185029Spjdprintf(const char *fmt,...) 858185029Spjd{ 859185029Spjd va_list ap; 860198420Srnoland char buf[20]; 861185029Spjd char *s; 862198420Srnoland unsigned long long u; 863185029Spjd int c; 864185029Spjd int minus; 865185029Spjd int prec; 866198420Srnoland int l; 867185029Spjd int len; 868185029Spjd int pad; 869185029Spjd 870185029Spjd va_start(ap, fmt); 871185029Spjd while ((c = *fmt++)) { 872185029Spjd if (c == '%') { 873185029Spjd minus = 0; 874185029Spjd prec = 0; 875198420Srnoland l = 0; 876185029Spjd nextfmt: 877185029Spjd c = *fmt++; 878185029Spjd switch (c) { 879185029Spjd case '-': 880185029Spjd minus = 1; 881185029Spjd goto nextfmt; 882185029Spjd case '0': 883185029Spjd case '1': 884185029Spjd case '2': 885185029Spjd case '3': 886185029Spjd case '4': 887185029Spjd case '5': 888185029Spjd case '6': 889185029Spjd case '7': 890185029Spjd case '8': 891185029Spjd case '9': 892185029Spjd prec = 10 * prec + (c - '0'); 893185029Spjd goto nextfmt; 894185029Spjd case 'c': 895185029Spjd putchar(va_arg(ap, int)); 896185029Spjd continue; 897198420Srnoland case 'l': 898198420Srnoland l++; 899198420Srnoland goto nextfmt; 900185029Spjd case 's': 901185029Spjd s = va_arg(ap, char *); 902185029Spjd if (prec) { 903185029Spjd len = strlen(s); 904185029Spjd if (len < prec) 905185029Spjd pad = prec - len; 906185029Spjd else 907185029Spjd pad = 0; 908185029Spjd if (minus) 909185029Spjd while (pad--) 910185029Spjd putchar(' '); 911185029Spjd for (; *s; s++) 912185029Spjd putchar(*s); 913185029Spjd if (!minus) 914185029Spjd while (pad--) 915185029Spjd putchar(' '); 916185029Spjd } else { 917185029Spjd for (; *s; s++) 918185029Spjd putchar(*s); 919185029Spjd } 920185029Spjd continue; 921185029Spjd case 'u': 922198420Srnoland switch (l) { 923198420Srnoland case 2: 924198420Srnoland u = va_arg(ap, unsigned long long); 925198420Srnoland break; 926198420Srnoland case 1: 927198420Srnoland u = va_arg(ap, unsigned long); 928198420Srnoland break; 929198420Srnoland default: 930198420Srnoland u = va_arg(ap, unsigned); 931198420Srnoland break; 932198420Srnoland } 933185029Spjd s = buf; 934185029Spjd do 935185029Spjd *s++ = '0' + u % 10U; 936185029Spjd while (u /= 10U); 937185029Spjd while (--s >= buf) 938185029Spjd putchar(*s); 939185029Spjd continue; 940185029Spjd } 941185029Spjd } 942185029Spjd putchar(c); 943185029Spjd } 944185029Spjd va_end(ap); 945185029Spjd return; 946185029Spjd} 947185029Spjd 948185029Spjdstatic void 949185029Spjdputchar(int c) 950185029Spjd{ 951185029Spjd if (c == '\n') 952185029Spjd xputc('\r'); 953185029Spjd xputc(c); 954185029Spjd} 955185029Spjd 956185096Sdfr#ifdef GPT 957185096Sdfrstatic struct { 958185096Sdfr uint16_t len; 959185096Sdfr uint16_t count; 960185096Sdfr uint16_t seg; 961185096Sdfr uint16_t off; 962185096Sdfr uint64_t lba; 963185096Sdfr} packet; 964185096Sdfr#endif 965185096Sdfr 966185029Spjdstatic int 967185029Spjddrvread(struct dsk *dsk, void *buf, unsigned lba, unsigned nblk) 968185029Spjd{ 969185096Sdfr#ifdef GPT 970192194Sdfr static unsigned c = 0x2d5c7c2f; 971185096Sdfr 972185096Sdfr if (!OPT_CHECK(RBX_QUIET)) 973185096Sdfr printf("%c\b", c = c << 8 | c >> 24); 974185096Sdfr packet.len = 0x10; 975185096Sdfr packet.count = nblk; 976185096Sdfr packet.seg = VTOPOFF(buf); 977185096Sdfr packet.off = VTOPSEG(buf); 978185096Sdfr packet.lba = lba + dsk->start; 979185096Sdfr v86.ctl = V86_FLAGS; 980185096Sdfr v86.addr = 0x13; 981185096Sdfr v86.eax = 0x4200; 982185096Sdfr v86.edx = dsk->drive; 983185096Sdfr v86.ds = VTOPSEG(&packet); 984185096Sdfr v86.esi = VTOPOFF(&packet); 985185096Sdfr v86int(); 986185096Sdfr if (V86_CY(v86.efl)) { 987185096Sdfr printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba); 988185096Sdfr return -1; 989185096Sdfr } 990185096Sdfr return 0; 991185096Sdfr#else 992185029Spjd static unsigned c = 0x2d5c7c2f; 993185029Spjd 994185029Spjd lba += dsk->start; 995185029Spjd if (!OPT_CHECK(RBX_QUIET)) 996185029Spjd printf("%c\b", c = c << 8 | c >> 24); 997185029Spjd v86.ctl = V86_ADDR | V86_CALLF | V86_FLAGS; 998185029Spjd v86.addr = XREADORG; /* call to xread in boot1 */ 999185029Spjd v86.es = VTOPSEG(buf); 1000185029Spjd v86.eax = lba; 1001185029Spjd v86.ebx = VTOPOFF(buf); 1002185029Spjd v86.ecx = lba >> 16; 1003185029Spjd v86.edx = nblk << 8 | dsk->drive; 1004185029Spjd v86int(); 1005185029Spjd v86.ctl = V86_FLAGS; 1006185029Spjd if (V86_CY(v86.efl)) { 1007185029Spjd printf("error %u lba %u\n", v86.eax >> 8 & 0xff, lba); 1008185029Spjd return -1; 1009185029Spjd } 1010185029Spjd return 0; 1011185096Sdfr#endif 1012185029Spjd} 1013185029Spjd 1014185029Spjdstatic int 1015185029Spjdkeyhit(unsigned ticks) 1016185029Spjd{ 1017185029Spjd uint32_t t0, t1; 1018185029Spjd 1019185029Spjd if (OPT_CHECK(RBX_NOINTR)) 1020185029Spjd return 0; 1021185029Spjd t0 = 0; 1022185029Spjd for (;;) { 1023185029Spjd if (xgetc(1)) 1024185029Spjd return 1; 1025185029Spjd t1 = *(uint32_t *)PTOV(0x46c); 1026185029Spjd if (!t0) 1027185029Spjd t0 = t1; 1028185029Spjd if (t1 < t0 || t1 >= t0 + ticks) 1029185029Spjd return 0; 1030185029Spjd } 1031185029Spjd} 1032185029Spjd 1033185029Spjdstatic int 1034185029Spjdxputc(int c) 1035185029Spjd{ 1036185029Spjd if (ioctrl & IO_KEYBOARD) 1037185029Spjd putc(c); 1038185029Spjd if (ioctrl & IO_SERIAL) 1039185029Spjd sio_putc(c); 1040185029Spjd return c; 1041185029Spjd} 1042185029Spjd 1043185029Spjdstatic int 1044185029Spjdxgetc(int fn) 1045185029Spjd{ 1046185029Spjd if (OPT_CHECK(RBX_NOINTR)) 1047185029Spjd return 0; 1048185029Spjd for (;;) { 1049185029Spjd if (ioctrl & IO_KEYBOARD && getc(1)) 1050185029Spjd return fn ? 1 : getc(0); 1051185029Spjd if (ioctrl & IO_SERIAL && sio_ischar()) 1052185029Spjd return fn ? 1 : sio_getc(); 1053185029Spjd if (fn) 1054185029Spjd return 0; 1055185029Spjd } 1056185029Spjd} 1057185029Spjd 1058185029Spjdstatic int 1059185029Spjdgetc(int fn) 1060185029Spjd{ 1061185029Spjd /* 1062185029Spjd * The extra comparison against zero is an attempt to work around 1063185029Spjd * what appears to be a bug in QEMU and Bochs. Both emulators 1064185029Spjd * sometimes report a key-press with scancode one and ascii zero 1065185029Spjd * when no such key is pressed in reality. As far as I can tell, 1066185029Spjd * this only happens shortly after a reboot. 1067185029Spjd */ 1068185029Spjd v86.addr = 0x16; 1069185029Spjd v86.eax = fn << 8; 1070185029Spjd v86int(); 1071185029Spjd return fn == 0 ? v86.eax & 0xff : (!V86_ZR(v86.efl) && (v86.eax & 0xff)); 1072185029Spjd} 1073