zfsboot.c revision 298826
1185029Spjd/*- 2185029Spjd * Copyright (c) 1998 Robert Nordier 3185029Spjd * All rights reserved. 4185029Spjd * 5185029Spjd * Redistribution and use in source and binary forms are freely 6185029Spjd * permitted provided that the above copyright notice and this 7185029Spjd * paragraph and the following disclaimer are duplicated in all 8185029Spjd * such forms. 9185029Spjd * 10185029Spjd * This software is provided "AS IS" and without any express or 11185029Spjd * implied warranties, including, without limitation, the implied 12185029Spjd * warranties of merchantability and fitness for a particular 13185029Spjd * purpose. 14185029Spjd */ 15185029Spjd 16185029Spjd#include <sys/cdefs.h> 17185029Spjd__FBSDID("$FreeBSD: head/sys/boot/i386/zfsboot/zfsboot.c 298826 2016-04-30 00:26:38Z pfg $"); 18185029Spjd 19185029Spjd#include <sys/param.h> 20185029Spjd#include <sys/errno.h> 21185029Spjd#include <sys/diskmbr.h> 22185096Sdfr#ifdef GPT 23185096Sdfr#include <sys/gpt.h> 24185096Sdfr#endif 25185029Spjd#include <sys/reboot.h> 26185029Spjd#include <sys/queue.h> 27185029Spjd 28185029Spjd#include <machine/bootinfo.h> 29185029Spjd#include <machine/elf.h> 30200309Sjhb#include <machine/pc/bios.h> 31185029Spjd 32185029Spjd#include <stdarg.h> 33185029Spjd#include <stddef.h> 34185029Spjd 35185029Spjd#include <a.out.h> 36185029Spjd 37185029Spjd#include <btxv86.h> 38185029Spjd 39185029Spjd#include "lib.h" 40213136Spjd#include "rbx.h" 41213136Spjd#include "drv.h" 42213136Spjd#include "util.h" 43213136Spjd#include "cons.h" 44235154Savg#include "bootargs.h" 45294765Simp#include "paths.h" 46185029Spjd 47235329Savg#include "libzfs.h" 48235329Savg 49297629Sallanjude#define ARGS 0x900 50297629Sallanjude#define NOPT 14 51297629Sallanjude#define NDEV 3 52185029Spjd 53297629Sallanjude#define BIOS_NUMDRIVES 0x475 54297629Sallanjude#define DRV_HARD 0x80 55297629Sallanjude#define DRV_MASK 0x7f 56185029Spjd 57297629Sallanjude#define TYPE_AD 0 58297629Sallanjude#define TYPE_DA 1 59297629Sallanjude#define TYPE_MAXHARD TYPE_DA 60297629Sallanjude#define TYPE_FD 2 61185029Spjd 62297629Sallanjude#define DEV_GELIBOOT_BSIZE 4096 63297629Sallanjude 64185029Spjdextern uint32_t _end; 65185029Spjd 66185096Sdfr#ifdef GPT 67185096Sdfrstatic const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; 68185096Sdfr#endif 69185029Spjdstatic const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ 70185029Spjdstatic const unsigned char flags[NOPT] = { 71185029Spjd RBX_DUAL, 72185029Spjd RBX_SERIAL, 73185029Spjd RBX_ASKNAME, 74185029Spjd RBX_CDROM, 75185029Spjd RBX_CONFIG, 76185029Spjd RBX_KDB, 77185029Spjd RBX_GDB, 78185029Spjd RBX_MUTE, 79185029Spjd RBX_NOINTR, 80185029Spjd RBX_PAUSE, 81185029Spjd RBX_QUIET, 82185029Spjd RBX_DFLTROOT, 83185029Spjd RBX_SINGLE, 84185029Spjd RBX_VERBOSE 85185029Spjd}; 86213136Spjduint32_t opts; 87185029Spjd 88185029Spjdstatic const char *const dev_nm[NDEV] = {"ad", "da", "fd"}; 89185029Spjdstatic const unsigned char dev_maj[NDEV] = {30, 4, 2}; 90185029Spjd 91185029Spjdstatic char cmd[512]; 92234339Savgstatic char cmddup[512]; 93185029Spjdstatic char kname[1024]; 94235329Savgstatic char rootname[256]; 95185029Spjdstatic int comspeed = SIOSPD; 96185029Spjdstatic struct bootinfo bootinfo; 97185029Spjdstatic uint32_t bootdev; 98235329Savgstatic struct zfs_boot_args zfsargs; 99235329Savgstatic struct zfsmount zfsmount; 100185029Spjd 101200309Sjhbvm_offset_t high_heap_base; 102200309Sjhbuint32_t bios_basemem, bios_extmem, high_heap_size; 103200309Sjhb 104200309Sjhbstatic struct bios_smap smap; 105200309Sjhb 106200309Sjhb/* 107200309Sjhb * The minimum amount of memory to reserve in bios_extmem for the heap. 108200309Sjhb */ 109297629Sallanjude#define HEAP_MIN (3 * 1024 * 1024) 110200309Sjhb 111200309Sjhbstatic char *heap_next; 112200309Sjhbstatic char *heap_end; 113200309Sjhb 114185029Spjd/* Buffers that must not span a 64k boundary. */ 115297629Sallanjude#define READ_BUF_SIZE 8192 116185029Spjdstruct dmadat { 117185029Spjd char rdbuf[READ_BUF_SIZE]; /* for reading large things */ 118185029Spjd char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ 119185029Spjd}; 120185029Spjdstatic struct dmadat *dmadat; 121185029Spjd 122185029Spjdvoid exit(int); 123185029Spjdstatic void load(void); 124185029Spjdstatic int parse(void); 125200309Sjhbstatic void bios_getmem(void); 126296963Sallanjudevoid *malloc(size_t n); 127296963Sallanjudevoid free(void *ptr); 128185029Spjd 129296963Sallanjudevoid * 130185029Spjdmalloc(size_t n) 131185029Spjd{ 132185029Spjd char *p = heap_next; 133185029Spjd if (p + n > heap_end) { 134185029Spjd printf("malloc failure\n"); 135185029Spjd for (;;) 136185029Spjd ; 137296963Sallanjude /* NOTREACHED */ 138296963Sallanjude return (0); 139185029Spjd } 140185029Spjd heap_next += n; 141296963Sallanjude return (p); 142185029Spjd} 143185029Spjd 144296963Sallanjudevoid 145296963Sallanjudefree(void *ptr) 146296963Sallanjude{ 147296963Sallanjude 148296963Sallanjude return; 149296963Sallanjude} 150296963Sallanjude 151185029Spjdstatic char * 152185029Spjdstrdup(const char *s) 153185029Spjd{ 154185029Spjd char *p = malloc(strlen(s) + 1); 155185029Spjd strcpy(p, s); 156296963Sallanjude return (p); 157185029Spjd} 158185029Spjd 159296963Sallanjude#ifdef LOADER_GELI_SUPPORT 160296963Sallanjude#include "geliboot.c" 161296963Sallanjudestatic char gelipw[GELI_PW_MAXLEN]; 162296963Sallanjude#endif 163296963Sallanjude 164185029Spjd#include "zfsimpl.c" 165185029Spjd 166185029Spjd/* 167185029Spjd * Read from a dnode (which must be from a ZPL filesystem). 168185029Spjd */ 169185029Spjdstatic int 170185029Spjdzfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) 171185029Spjd{ 172185029Spjd const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; 173185029Spjd size_t n; 174185029Spjd int rc; 175185029Spjd 176185029Spjd n = size; 177185029Spjd if (*offp + n > zp->zp_size) 178185029Spjd n = zp->zp_size - *offp; 179185029Spjd 180185029Spjd rc = dnode_read(spa, dnode, *offp, start, n); 181185029Spjd if (rc) 182185029Spjd return (-1); 183185029Spjd *offp += n; 184185029Spjd 185185029Spjd return (n); 186185029Spjd} 187185029Spjd 188185029Spjd/* 189185029Spjd * Current ZFS pool 190185029Spjd */ 191235329Savgstatic spa_t *spa; 192241293Savgstatic spa_t *primary_spa; 193241293Savgstatic vdev_t *primary_vdev; 194185029Spjd 195185029Spjd/* 196185029Spjd * A wrapper for dskread that doesn't have to worry about whether the 197185029Spjd * buffer pointer crosses a 64k boundary. 198185029Spjd */ 199185029Spjdstatic int 200185029Spjdvdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 201185029Spjd{ 202185029Spjd char *p; 203297629Sallanjude daddr_t lba, alignlba; 204297629Sallanjude off_t alignoff, diff; 205297629Sallanjude unsigned int nb, alignnb; 206185029Spjd struct dsk *dsk = (struct dsk *) priv; 207185029Spjd 208185029Spjd if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 209185029Spjd return -1; 210185029Spjd 211185029Spjd p = buf; 212185029Spjd lba = off / DEV_BSIZE; 213213136Spjd lba += dsk->start; 214297629Sallanjude /* Align reads to 4k else 4k sector GELIs will not decrypt. */ 215297629Sallanjude alignoff = off & ~ (off_t)(DEV_GELIBOOT_BSIZE - 1); 216297629Sallanjude /* Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. */ 217297629Sallanjude alignlba = alignoff / DEV_BSIZE; 218297629Sallanjude /* 219297629Sallanjude * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the 220297629Sallanjude * start of the GELI partition, not the start of the actual disk. 221297629Sallanjude */ 222297629Sallanjude alignlba += dsk->start; 223297629Sallanjude diff = (lba - alignlba) * DEV_BSIZE; 224297629Sallanjude 225185029Spjd while (bytes > 0) { 226185029Spjd nb = bytes / DEV_BSIZE; 227185029Spjd if (nb > READ_BUF_SIZE / DEV_BSIZE) 228185029Spjd nb = READ_BUF_SIZE / DEV_BSIZE; 229297629Sallanjude /* 230297629Sallanjude * Ensure that the read size plus the leading offset does not 231297629Sallanjude * exceed the size of the read buffer. 232297629Sallanjude */ 233297629Sallanjude if (nb * DEV_BSIZE + diff > READ_BUF_SIZE) 234297629Sallanjude nb -= diff / DEV_BSIZE; 235297629Sallanjude /* 236297629Sallanjude * Round the number of blocks to read up to the nearest multiple 237297629Sallanjude * of DEV_GELIBOOT_BSIZE. 238297629Sallanjude */ 239297629Sallanjude alignnb = nb + (diff / DEV_BSIZE) + 240297629Sallanjude (DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1) & ~ 241297629Sallanjude (unsigned int)(DEV_GELIBOOT_BSIZE / DEV_BSIZE - 1); 242297629Sallanjude 243297629Sallanjude if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb)) 244185029Spjd return -1; 245296963Sallanjude#ifdef LOADER_GELI_SUPPORT 246296963Sallanjude /* decrypt */ 247296963Sallanjude if (is_geli(dsk) == 0) { 248297629Sallanjude if (geli_read(dsk, ((alignlba - dsk->start) * 249297629Sallanjude DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE)) 250297629Sallanjude return (-1); 251296963Sallanjude } 252296963Sallanjude#endif 253297629Sallanjude memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); 254185029Spjd p += nb * DEV_BSIZE; 255185029Spjd lba += nb; 256297629Sallanjude alignlba += alignnb; 257185029Spjd bytes -= nb * DEV_BSIZE; 258297629Sallanjude /* Don't need the leading offset after the first block. */ 259297629Sallanjude diff = 0; 260185029Spjd } 261185029Spjd 262185029Spjd return 0; 263185029Spjd} 264185029Spjd 265185029Spjdstatic int 266185029Spjdxfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) 267185029Spjd{ 268185029Spjd if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { 269235329Savg printf("Invalid format\n"); 270185029Spjd return -1; 271185029Spjd } 272185029Spjd return 0; 273185029Spjd} 274185029Spjd 275200309Sjhbstatic void 276200309Sjhbbios_getmem(void) 277185029Spjd{ 278200309Sjhb uint64_t size; 279185029Spjd 280200309Sjhb /* Parse system memory map */ 281200309Sjhb v86.ebx = 0; 282200309Sjhb do { 283200309Sjhb v86.ctl = V86_FLAGS; 284200309Sjhb v86.addr = 0x15; /* int 0x15 function 0xe820*/ 285200309Sjhb v86.eax = 0xe820; 286200309Sjhb v86.ecx = sizeof(struct bios_smap); 287200309Sjhb v86.edx = SMAP_SIG; 288200309Sjhb v86.es = VTOPSEG(&smap); 289200309Sjhb v86.edi = VTOPOFF(&smap); 290200309Sjhb v86int(); 291292682Sjhb if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) 292200309Sjhb break; 293200309Sjhb /* look for a low-memory segment that's large enough */ 294200309Sjhb if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && 295200309Sjhb (smap.length >= (512 * 1024))) 296200309Sjhb bios_basemem = smap.length; 297200309Sjhb /* look for the first segment in 'extended' memory */ 298200309Sjhb if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { 299200309Sjhb bios_extmem = smap.length; 300200309Sjhb } 301200309Sjhb 302200309Sjhb /* 303200309Sjhb * Look for the largest segment in 'extended' memory beyond 304200309Sjhb * 1MB but below 4GB. 305200309Sjhb */ 306200309Sjhb if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && 307200309Sjhb (smap.base < 0x100000000ull)) { 308200309Sjhb size = smap.length; 309200309Sjhb 310200309Sjhb /* 311200309Sjhb * If this segment crosses the 4GB boundary, truncate it. 312200309Sjhb */ 313200309Sjhb if (smap.base + size > 0x100000000ull) 314200309Sjhb size = 0x100000000ull - smap.base; 315200309Sjhb 316200309Sjhb if (size > high_heap_size) { 317200309Sjhb high_heap_size = size; 318200309Sjhb high_heap_base = smap.base; 319200309Sjhb } 320200309Sjhb } 321200309Sjhb } while (v86.ebx != 0); 322200309Sjhb 323200309Sjhb /* Fall back to the old compatibility function for base memory */ 324200309Sjhb if (bios_basemem == 0) { 325200309Sjhb v86.ctl = 0; 326200309Sjhb v86.addr = 0x12; /* int 0x12 */ 327200309Sjhb v86int(); 328200309Sjhb 329200309Sjhb bios_basemem = (v86.eax & 0xffff) * 1024; 330200309Sjhb } 331200309Sjhb 332200309Sjhb /* Fall back through several compatibility functions for extended memory */ 333200309Sjhb if (bios_extmem == 0) { 334200309Sjhb v86.ctl = V86_FLAGS; 335200309Sjhb v86.addr = 0x15; /* int 0x15 function 0xe801*/ 336200309Sjhb v86.eax = 0xe801; 337200309Sjhb v86int(); 338292682Sjhb if (!V86_CY(v86.efl)) { 339200309Sjhb bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; 340200309Sjhb } 341200309Sjhb } 342200309Sjhb if (bios_extmem == 0) { 343200309Sjhb v86.ctl = 0; 344200309Sjhb v86.addr = 0x15; /* int 0x15 function 0x88*/ 345200309Sjhb v86.eax = 0x8800; 346200309Sjhb v86int(); 347200309Sjhb bios_extmem = (v86.eax & 0xffff) * 1024; 348200309Sjhb } 349200309Sjhb 350200309Sjhb /* 351200309Sjhb * If we have extended memory and did not find a suitable heap 352200309Sjhb * region in the SMAP, use the last 3MB of 'extended' memory as a 353200309Sjhb * high heap candidate. 354200309Sjhb */ 355200309Sjhb if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { 356200309Sjhb high_heap_size = HEAP_MIN; 357200309Sjhb high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; 358200309Sjhb } 359296963Sallanjude} 360200309Sjhb 361185029Spjd/* 362185029Spjd * Try to detect a device supported by the legacy int13 BIOS 363185029Spjd */ 364185029Spjdstatic int 365185029Spjdint13probe(int drive) 366185029Spjd{ 367185029Spjd v86.ctl = V86_FLAGS; 368185029Spjd v86.addr = 0x13; 369185029Spjd v86.eax = 0x800; 370185029Spjd v86.edx = drive; 371185029Spjd v86int(); 372185029Spjd 373292682Sjhb if (!V86_CY(v86.efl) && /* carry clear */ 374185029Spjd ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ 375185029Spjd if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ 376185029Spjd return(0); /* skip device */ 377185029Spjd } 378185029Spjd return (1); 379185029Spjd } 380185029Spjd return(0); 381185029Spjd} 382185029Spjd 383192194Sdfr/* 384192194Sdfr * We call this when we find a ZFS vdev - ZFS consumes the dsk 385192194Sdfr * structure so we must make a new one. 386192194Sdfr */ 387192194Sdfrstatic struct dsk * 388192194Sdfrcopy_dsk(struct dsk *dsk) 389192194Sdfr{ 390192194Sdfr struct dsk *newdsk; 391192194Sdfr 392192194Sdfr newdsk = malloc(sizeof(struct dsk)); 393192194Sdfr *newdsk = *dsk; 394192194Sdfr return (newdsk); 395192194Sdfr} 396192194Sdfr 397185029Spjdstatic void 398241294Savgprobe_drive(struct dsk *dsk) 399185029Spjd{ 400185096Sdfr#ifdef GPT 401185096Sdfr struct gpt_hdr hdr; 402185096Sdfr struct gpt_ent *ent; 403185096Sdfr unsigned part, entries_per_sec; 404185096Sdfr#endif 405296963Sallanjude daddr_t slba, elba; 406185029Spjd struct dos_partition *dp; 407185029Spjd char *sec; 408185029Spjd unsigned i; 409185029Spjd 410185029Spjd /* 411296963Sallanjude * If we find a vdev on the whole disk, stop here. 412185029Spjd */ 413241294Savg if (vdev_probe(vdev_read, dsk, NULL) == 0) 414185029Spjd return; 415185029Spjd 416296963Sallanjude#ifdef LOADER_GELI_SUPPORT 417296963Sallanjude /* 418296963Sallanjude * Taste the disk, if it is GELI encrypted, decrypt it and check to see if 419296963Sallanjude * it is a usable vdev then. Otherwise dig 420296963Sallanjude * out the partition table and probe each slice/partition 421296963Sallanjude * in turn for a vdev or GELI encrypted vdev. 422296963Sallanjude */ 423296963Sallanjude elba = drvsize(dsk); 424296963Sallanjude if (elba > 0) { 425296963Sallanjude elba--; 426296963Sallanjude } 427296963Sallanjude if (geli_taste(vdev_read, dsk, elba) == 0) { 428296963Sallanjude if (geli_passphrase(&gelipw, dsk->unit, ':', 0, dsk) == 0) { 429296963Sallanjude if (vdev_probe(vdev_read, dsk, NULL) == 0) { 430296963Sallanjude return; 431296963Sallanjude } 432296963Sallanjude } 433296963Sallanjude } 434296963Sallanjude#endif /* LOADER_GELI_SUPPORT */ 435296963Sallanjude 436185029Spjd sec = dmadat->secbuf; 437185029Spjd dsk->start = 0; 438185096Sdfr 439185096Sdfr#ifdef GPT 440185096Sdfr /* 441185096Sdfr * First check for GPT. 442185096Sdfr */ 443185096Sdfr if (drvread(dsk, sec, 1, 1)) { 444185096Sdfr return; 445185096Sdfr } 446185096Sdfr memcpy(&hdr, sec, sizeof(hdr)); 447185096Sdfr if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || 448185096Sdfr hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || 449185096Sdfr hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { 450185096Sdfr goto trymbr; 451185096Sdfr } 452185096Sdfr 453185096Sdfr /* 454298826Spfg * Probe all GPT partitions for the presence of ZFS pools. We 455185096Sdfr * return the spa_t for the first we find (if requested). This 456185096Sdfr * will have the effect of booting from the first pool on the 457185096Sdfr * disk. 458296963Sallanjude * 459296963Sallanjude * If no vdev is found, GELI decrypting the device and try again 460185096Sdfr */ 461185096Sdfr entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; 462185096Sdfr slba = hdr.hdr_lba_table; 463185096Sdfr elba = slba + hdr.hdr_entries / entries_per_sec; 464185096Sdfr while (slba < elba) { 465198420Srnoland dsk->start = 0; 466185096Sdfr if (drvread(dsk, sec, slba, 1)) 467185096Sdfr return; 468185096Sdfr for (part = 0; part < entries_per_sec; part++) { 469185096Sdfr ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); 470185096Sdfr if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, 471185096Sdfr sizeof(uuid_t)) == 0) { 472185096Sdfr dsk->start = ent->ent_lba_start; 473296963Sallanjude dsk->slice = part + 1; 474296963Sallanjude dsk->part = 255; 475241294Savg if (vdev_probe(vdev_read, dsk, NULL) == 0) { 476185096Sdfr /* 477185096Sdfr * This slice had a vdev. We need a new dsk 478185096Sdfr * structure now since the vdev now owns this one. 479185096Sdfr */ 480192194Sdfr dsk = copy_dsk(dsk); 481185096Sdfr } 482296963Sallanjude#ifdef LOADER_GELI_SUPPORT 483296963Sallanjude else if (geli_taste(vdev_read, dsk, ent->ent_lba_end - 484296963Sallanjude ent->ent_lba_start) == 0) { 485296963Sallanjude if (geli_passphrase(&gelipw, dsk->unit, 'p', dsk->slice, dsk) == 0) { 486296963Sallanjude /* 487296963Sallanjude * This slice has GELI, check it for ZFS. 488296963Sallanjude */ 489296963Sallanjude if (vdev_probe(vdev_read, dsk, NULL) == 0) { 490296963Sallanjude /* 491296963Sallanjude * This slice had a vdev. We need a new dsk 492296963Sallanjude * structure now since the vdev now owns this one. 493296963Sallanjude */ 494296963Sallanjude dsk = copy_dsk(dsk); 495296963Sallanjude } 496296963Sallanjude break; 497296963Sallanjude } 498296963Sallanjude } 499296963Sallanjude#endif /* LOADER_GELI_SUPPORT */ 500185096Sdfr } 501185096Sdfr } 502185096Sdfr slba++; 503185096Sdfr } 504185096Sdfr return; 505185096Sdfrtrymbr: 506296963Sallanjude#endif /* GPT */ 507185096Sdfr 508185029Spjd if (drvread(dsk, sec, DOSBBSECTOR, 1)) 509185029Spjd return; 510185029Spjd dp = (void *)(sec + DOSPARTOFF); 511185029Spjd 512185029Spjd for (i = 0; i < NDOSPART; i++) { 513185029Spjd if (!dp[i].dp_typ) 514185029Spjd continue; 515185029Spjd dsk->start = dp[i].dp_start; 516296963Sallanjude dsk->slice = i + 1; 517241294Savg if (vdev_probe(vdev_read, dsk, NULL) == 0) { 518192194Sdfr dsk = copy_dsk(dsk); 519185029Spjd } 520296963Sallanjude#ifdef LOADER_GELI_SUPPORT 521296963Sallanjude else if (geli_taste(vdev_read, dsk, dp[i].dp_size - 522296963Sallanjude dp[i].dp_start) == 0) { 523296963Sallanjude if (geli_passphrase(&gelipw, dsk->unit, 's', i, dsk) == 0) { 524296963Sallanjude /* 525296963Sallanjude * This slice has GELI, check it for ZFS. 526296963Sallanjude */ 527296963Sallanjude if (vdev_probe(vdev_read, dsk, NULL) == 0) { 528296963Sallanjude /* 529296963Sallanjude * This slice had a vdev. We need a new dsk 530296963Sallanjude * structure now since the vdev now owns this one. 531296963Sallanjude */ 532296963Sallanjude dsk = copy_dsk(dsk); 533296963Sallanjude } 534296963Sallanjude break; 535296963Sallanjude } 536296963Sallanjude } 537296963Sallanjude#endif /* LOADER_GELI_SUPPORT */ 538185029Spjd } 539185029Spjd} 540185029Spjd 541185029Spjdint 542185029Spjdmain(void) 543185029Spjd{ 544185029Spjd int autoboot, i; 545185029Spjd dnode_phys_t dn; 546185029Spjd off_t off; 547185029Spjd struct dsk *dsk; 548185029Spjd 549208388Sjhb dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); 550208388Sjhb 551200309Sjhb bios_getmem(); 552200309Sjhb 553200309Sjhb if (high_heap_size > 0) { 554200309Sjhb heap_end = PTOV(high_heap_base + high_heap_size); 555200309Sjhb heap_next = PTOV(high_heap_base); 556200309Sjhb } else { 557296963Sallanjude heap_next = (char *)dmadat + sizeof(*dmadat); 558296963Sallanjude heap_end = (char *)PTOV(bios_basemem); 559200309Sjhb } 560200309Sjhb 561185029Spjd dsk = malloc(sizeof(struct dsk)); 562185029Spjd dsk->drive = *(uint8_t *)PTOV(ARGS); 563185029Spjd dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD; 564185029Spjd dsk->unit = dsk->drive & DRV_MASK; 565185029Spjd dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; 566185029Spjd dsk->part = 0; 567185029Spjd dsk->start = 0; 568185029Spjd dsk->init = 0; 569185029Spjd 570185029Spjd bootinfo.bi_version = BOOTINFO_VERSION; 571185029Spjd bootinfo.bi_size = sizeof(bootinfo); 572200309Sjhb bootinfo.bi_basemem = bios_basemem / 1024; 573200309Sjhb bootinfo.bi_extmem = bios_extmem / 1024; 574185029Spjd bootinfo.bi_memsizes_valid++; 575185029Spjd bootinfo.bi_bios_dev = dsk->drive; 576185029Spjd 577185029Spjd bootdev = MAKEBOOTDEV(dev_maj[dsk->type], 578185029Spjd dsk->slice, dsk->unit, dsk->part), 579185029Spjd 580185029Spjd /* Process configuration file */ 581185029Spjd 582185029Spjd autoboot = 1; 583185029Spjd 584296963Sallanjude#ifdef LOADER_GELI_SUPPORT 585296963Sallanjude geli_init(); 586296963Sallanjude#endif 587185029Spjd zfs_init(); 588185029Spjd 589185029Spjd /* 590185029Spjd * Probe the boot drive first - we will try to boot from whatever 591185029Spjd * pool we find on that drive. 592185029Spjd */ 593241294Savg probe_drive(dsk); 594185029Spjd 595185029Spjd /* 596185029Spjd * Probe the rest of the drives that the bios knows about. This 597185029Spjd * will find any other available pools and it may fill in missing 598185029Spjd * vdevs for the boot pool. 599185029Spjd */ 600212805Spjd#ifndef VIRTUALBOX 601212805Spjd for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) 602212805Spjd#else 603212805Spjd for (i = 0; i < MAXBDDEV; i++) 604212805Spjd#endif 605212805Spjd { 606185029Spjd if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) 607185029Spjd continue; 608185029Spjd 609192194Sdfr if (!int13probe(i | DRV_HARD)) 610192194Sdfr break; 611192194Sdfr 612185029Spjd dsk = malloc(sizeof(struct dsk)); 613185029Spjd dsk->drive = i | DRV_HARD; 614185029Spjd dsk->type = dsk->drive & TYPE_AD; 615185029Spjd dsk->unit = i; 616185029Spjd dsk->slice = 0; 617185029Spjd dsk->part = 0; 618185029Spjd dsk->start = 0; 619185029Spjd dsk->init = 0; 620241294Savg probe_drive(dsk); 621185029Spjd } 622185029Spjd 623185029Spjd /* 624241294Savg * The first discovered pool, if any, is the pool. 625185029Spjd */ 626241294Savg spa = spa_get_primary(); 627185029Spjd if (!spa) { 628241294Savg printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); 629241294Savg for (;;) 630241294Savg ; 631185029Spjd } 632185029Spjd 633241293Savg primary_spa = spa; 634241293Savg primary_vdev = spa_get_primary_vdev(spa); 635241293Savg 636235329Savg if (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0) { 637235329Savg printf("%s: failed to mount default pool %s\n", 638235329Savg BOOTPROG, spa->spa_name); 639235329Savg autoboot = 0; 640235329Savg } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || 641235329Savg zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { 642185029Spjd off = 0; 643198079Sjhb zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); 644185029Spjd } 645185029Spjd 646185029Spjd if (*cmd) { 647234339Savg /* 648234339Savg * Note that parse() is destructive to cmd[] and we also want 649234339Savg * to honor RBX_QUIET option that could be present in cmd[]. 650234339Savg */ 651234339Savg memcpy(cmddup, cmd, sizeof(cmd)); 652185029Spjd if (parse()) 653185029Spjd autoboot = 0; 654234339Savg if (!OPT_CHECK(RBX_QUIET)) 655241288Savg printf("%s: %s\n", PATH_CONFIG, cmddup); 656185029Spjd /* Do not process this command twice */ 657185029Spjd *cmd = 0; 658185029Spjd } 659185029Spjd 660185029Spjd /* 661294925Simp * Try to exec /boot/loader. If interrupted by a keypress, 662185029Spjd * or in case of failure, try to load a kernel directly instead. 663185029Spjd */ 664185029Spjd 665185029Spjd if (autoboot && !*kname) { 666294925Simp memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS)); 667213136Spjd if (!keyhit(3)) { 668185029Spjd load(); 669185029Spjd memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL)); 670185029Spjd } 671185029Spjd } 672185029Spjd 673185029Spjd /* Present the user with the boot2 prompt. */ 674185029Spjd 675185029Spjd for (;;) { 676235329Savg if (!autoboot || !OPT_CHECK(RBX_QUIET)) { 677235329Savg printf("\nFreeBSD/x86 boot\n"); 678235329Savg if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) 679241288Savg printf("Default: %s/<0x%llx>:%s\n" 680235329Savg "boot: ", 681235329Savg spa->spa_name, zfsmount.rootobj, kname); 682241288Savg else if (rootname[0] != '\0') 683241288Savg printf("Default: %s/%s:%s\n" 684241288Savg "boot: ", 685241288Savg spa->spa_name, rootname, kname); 686235329Savg else 687241288Savg printf("Default: %s:%s\n" 688235329Savg "boot: ", 689241288Savg spa->spa_name, kname); 690235329Savg } 691185029Spjd if (ioctrl & IO_SERIAL) 692185029Spjd sio_flush(); 693213136Spjd if (!autoboot || keyhit(5)) 694213136Spjd getstr(cmd, sizeof(cmd)); 695185029Spjd else if (!autoboot || !OPT_CHECK(RBX_QUIET)) 696185029Spjd putchar('\n'); 697185029Spjd autoboot = 0; 698185029Spjd if (parse()) 699185029Spjd putchar('\a'); 700185029Spjd else 701185029Spjd load(); 702185029Spjd } 703185029Spjd} 704185029Spjd 705185029Spjd/* XXX - Needed for btxld to link the boot2 binary; do not remove. */ 706185029Spjdvoid 707185029Spjdexit(int x) 708185029Spjd{ 709185029Spjd} 710185029Spjd 711185029Spjdstatic void 712185029Spjdload(void) 713185029Spjd{ 714185029Spjd union { 715185029Spjd struct exec ex; 716185029Spjd Elf32_Ehdr eh; 717185029Spjd } hdr; 718185029Spjd static Elf32_Phdr ep[2]; 719185029Spjd static Elf32_Shdr es[2]; 720185029Spjd caddr_t p; 721185029Spjd dnode_phys_t dn; 722185029Spjd off_t off; 723185029Spjd uint32_t addr, x; 724185029Spjd int fmt, i, j; 725185029Spjd 726235329Savg if (zfs_lookup(&zfsmount, kname, &dn)) { 727235329Savg printf("\nCan't find %s\n", kname); 728185029Spjd return; 729185029Spjd } 730185029Spjd off = 0; 731185029Spjd if (xfsread(&dn, &off, &hdr, sizeof(hdr))) 732185029Spjd return; 733185029Spjd if (N_GETMAGIC(hdr.ex) == ZMAGIC) 734185029Spjd fmt = 0; 735185029Spjd else if (IS_ELF(hdr.eh)) 736185029Spjd fmt = 1; 737185029Spjd else { 738185029Spjd printf("Invalid %s\n", "format"); 739185029Spjd return; 740185029Spjd } 741185029Spjd if (fmt == 0) { 742185029Spjd addr = hdr.ex.a_entry & 0xffffff; 743185029Spjd p = PTOV(addr); 744185029Spjd off = PAGE_SIZE; 745185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_text)) 746185029Spjd return; 747185029Spjd p += roundup2(hdr.ex.a_text, PAGE_SIZE); 748185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_data)) 749185029Spjd return; 750185029Spjd p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); 751185029Spjd bootinfo.bi_symtab = VTOP(p); 752185029Spjd memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); 753185029Spjd p += sizeof(hdr.ex.a_syms); 754185029Spjd if (hdr.ex.a_syms) { 755185029Spjd if (xfsread(&dn, &off, p, hdr.ex.a_syms)) 756185029Spjd return; 757185029Spjd p += hdr.ex.a_syms; 758185029Spjd if (xfsread(&dn, &off, p, sizeof(int))) 759185029Spjd return; 760185029Spjd x = *(uint32_t *)p; 761185029Spjd p += sizeof(int); 762185029Spjd x -= sizeof(int); 763185029Spjd if (xfsread(&dn, &off, p, x)) 764185029Spjd return; 765185029Spjd p += x; 766185029Spjd } 767185029Spjd } else { 768185029Spjd off = hdr.eh.e_phoff; 769185029Spjd for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { 770185029Spjd if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) 771185029Spjd return; 772185029Spjd if (ep[j].p_type == PT_LOAD) 773185029Spjd j++; 774185029Spjd } 775185029Spjd for (i = 0; i < 2; i++) { 776185029Spjd p = PTOV(ep[i].p_paddr & 0xffffff); 777185029Spjd off = ep[i].p_offset; 778185029Spjd if (xfsread(&dn, &off, p, ep[i].p_filesz)) 779185029Spjd return; 780185029Spjd } 781185029Spjd p += roundup2(ep[1].p_memsz, PAGE_SIZE); 782185029Spjd bootinfo.bi_symtab = VTOP(p); 783185029Spjd if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { 784185029Spjd off = hdr.eh.e_shoff + sizeof(es[0]) * 785185029Spjd (hdr.eh.e_shstrndx + 1); 786185029Spjd if (xfsread(&dn, &off, &es, sizeof(es))) 787185029Spjd return; 788185029Spjd for (i = 0; i < 2; i++) { 789185029Spjd memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); 790185029Spjd p += sizeof(es[i].sh_size); 791185029Spjd off = es[i].sh_offset; 792185029Spjd if (xfsread(&dn, &off, p, es[i].sh_size)) 793185029Spjd return; 794185029Spjd p += es[i].sh_size; 795185029Spjd } 796185029Spjd } 797185029Spjd addr = hdr.eh.e_entry & 0xffffff; 798185029Spjd } 799185029Spjd bootinfo.bi_esymtab = VTOP(p); 800185029Spjd bootinfo.bi_kernelname = VTOP(kname); 801235329Savg zfsargs.size = sizeof(zfsargs); 802235329Savg zfsargs.pool = zfsmount.spa->spa_guid; 803235329Savg zfsargs.root = zfsmount.rootobj; 804241293Savg zfsargs.primary_pool = primary_spa->spa_guid; 805296963Sallanjude#ifdef LOADER_GELI_SUPPORT 806296963Sallanjude bcopy(gelipw, zfsargs.gelipw, sizeof(zfsargs.gelipw)); 807296963Sallanjude bzero(gelipw, sizeof(gelipw)); 808296963Sallanjude#else 809296963Sallanjude zfsargs.gelipw[0] = '\0'; 810296963Sallanjude#endif 811241293Savg if (primary_vdev != NULL) 812241293Savg zfsargs.primary_vdev = primary_vdev->v_guid; 813241293Savg else 814241293Savg printf("failed to detect primary vdev\n"); 815185029Spjd __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 816185029Spjd bootdev, 817235329Savg KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, 818185029Spjd (uint32_t) spa->spa_guid, 819185029Spjd (uint32_t) (spa->spa_guid >> 32), 820235329Savg VTOP(&bootinfo), 821235329Savg zfsargs); 822185029Spjd} 823185029Spjd 824185029Spjdstatic int 825241288Savgzfs_mount_ds(char *dsname) 826241288Savg{ 827241288Savg uint64_t newroot; 828241288Savg spa_t *newspa; 829241288Savg char *q; 830241288Savg 831241288Savg q = strchr(dsname, '/'); 832241288Savg if (q) 833241288Savg *q++ = '\0'; 834241288Savg newspa = spa_find_by_name(dsname); 835241288Savg if (newspa == NULL) { 836241288Savg printf("\nCan't find ZFS pool %s\n", dsname); 837241288Savg return -1; 838241288Savg } 839241288Savg 840241288Savg if (zfs_spa_init(newspa)) 841241288Savg return -1; 842241288Savg 843241288Savg newroot = 0; 844241288Savg if (q) { 845241288Savg if (zfs_lookup_dataset(newspa, q, &newroot)) { 846241288Savg printf("\nCan't find dataset %s in ZFS pool %s\n", 847241288Savg q, newspa->spa_name); 848241288Savg return -1; 849241288Savg } 850241288Savg } 851241288Savg if (zfs_mount(newspa, newroot, &zfsmount)) { 852241288Savg printf("\nCan't mount ZFS dataset\n"); 853241288Savg return -1; 854241288Savg } 855241288Savg spa = newspa; 856241288Savg return (0); 857241288Savg} 858241288Savg 859241288Savgstatic int 860213136Spjdparse(void) 861185029Spjd{ 862185029Spjd char *arg = cmd; 863185029Spjd char *ep, *p, *q; 864185029Spjd const char *cp; 865185029Spjd int c, i, j; 866185029Spjd 867185029Spjd while ((c = *arg++)) { 868185029Spjd if (c == ' ' || c == '\t' || c == '\n') 869185029Spjd continue; 870185029Spjd for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); 871185029Spjd ep = p; 872185029Spjd if (*p) 873185029Spjd *p++ = 0; 874185029Spjd if (c == '-') { 875185029Spjd while ((c = *arg++)) { 876185029Spjd if (c == 'P') { 877185029Spjd if (*(uint8_t *)PTOV(0x496) & 0x10) { 878185029Spjd cp = "yes"; 879185029Spjd } else { 880185029Spjd opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); 881185029Spjd cp = "no"; 882185029Spjd } 883185029Spjd printf("Keyboard: %s\n", cp); 884185029Spjd continue; 885185029Spjd } else if (c == 'S') { 886185029Spjd j = 0; 887185029Spjd while ((unsigned int)(i = *arg++ - '0') <= 9) 888185029Spjd j = j * 10 + i; 889185029Spjd if (j > 0 && i == -'0') { 890185029Spjd comspeed = j; 891185029Spjd break; 892185029Spjd } 893185029Spjd /* Fall through to error below ('S' not in optstr[]). */ 894185029Spjd } 895185029Spjd for (i = 0; c != optstr[i]; i++) 896185029Spjd if (i == NOPT - 1) 897185029Spjd return -1; 898185029Spjd opts ^= OPT_SET(flags[i]); 899185029Spjd } 900185029Spjd ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : 901185029Spjd OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; 902241301Savg if (ioctrl & IO_SERIAL) { 903241301Savg if (sio_init(115200 / comspeed) != 0) 904241301Savg ioctrl &= ~IO_SERIAL; 905241301Savg } 906185029Spjd } if (c == '?') { 907185029Spjd dnode_phys_t dn; 908185029Spjd 909235329Savg if (zfs_lookup(&zfsmount, arg, &dn) == 0) { 910185029Spjd zap_list(spa, &dn); 911185029Spjd } 912185029Spjd return -1; 913185029Spjd } else { 914185029Spjd arg--; 915185029Spjd 916185029Spjd /* 917185029Spjd * Report pool status if the comment is 'status'. Lets 918185029Spjd * hope no-one wants to load /status as a kernel. 919185029Spjd */ 920185029Spjd if (!strcmp(arg, "status")) { 921185029Spjd spa_all_status(); 922185029Spjd return -1; 923185029Spjd } 924185029Spjd 925185029Spjd /* 926241288Savg * If there is "zfs:" prefix simply ignore it. 927241288Savg */ 928241288Savg if (strncmp(arg, "zfs:", 4) == 0) 929241288Savg arg += 4; 930241288Savg 931241288Savg /* 932185029Spjd * If there is a colon, switch pools. 933185029Spjd */ 934241288Savg q = strchr(arg, ':'); 935185029Spjd if (q) { 936241288Savg *q++ = '\0'; 937241288Savg if (zfs_mount_ds(arg) != 0) 938185029Spjd return -1; 939241288Savg arg = q; 940185029Spjd } 941185029Spjd if ((i = ep - arg)) { 942185029Spjd if ((size_t)i >= sizeof(kname)) 943185029Spjd return -1; 944185029Spjd memcpy(kname, arg, i + 1); 945185029Spjd } 946185029Spjd } 947185029Spjd arg = p; 948185029Spjd } 949185029Spjd return 0; 950185029Spjd} 951