zfsboot.c revision 348866
1/*- 2 * Copyright (c) 1998 Robert Nordier 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are freely 6 * permitted provided that the above copyright notice and this 7 * paragraph and the following disclaimer are duplicated in all 8 * such forms. 9 * 10 * This software is provided "AS IS" and without any express or 11 * implied warranties, including, without limitation, the implied 12 * warranties of merchantability and fitness for a particular 13 * purpose. 14 */ 15 16#include <sys/cdefs.h> 17__FBSDID("$FreeBSD: stable/11/stand/i386/zfsboot/zfsboot.c 348866 2019-06-10 15:55:38Z kevans $"); 18 19#include "stand.h" 20 21#include <sys/param.h> 22#include <sys/errno.h> 23#include <sys/diskmbr.h> 24#ifdef GPT 25#include <sys/gpt.h> 26#endif 27#include <sys/reboot.h> 28#include <sys/queue.h> 29 30#include <machine/bootinfo.h> 31#include <machine/elf.h> 32#include <machine/pc/bios.h> 33 34#include <stdarg.h> 35#include <stddef.h> 36 37#include <a.out.h> 38 39#include <btxv86.h> 40 41#include "lib.h" 42#include "rbx.h" 43#include "drv.h" 44#include "edd.h" 45#include "cons.h" 46#include "bootargs.h" 47#include "paths.h" 48 49#include "libzfs.h" 50 51#define ARGS 0x900 52#define NOPT 14 53#define NDEV 3 54 55#define BIOS_NUMDRIVES 0x475 56#define DRV_HARD 0x80 57#define DRV_MASK 0x7f 58 59#define TYPE_AD 0 60#define TYPE_DA 1 61#define TYPE_MAXHARD TYPE_DA 62#define TYPE_FD 2 63 64#define DEV_GELIBOOT_BSIZE 4096 65 66extern uint32_t _end; 67 68#ifdef GPT 69static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; 70#endif 71static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ 72static const unsigned char flags[NOPT] = { 73 RBX_DUAL, 74 RBX_SERIAL, 75 RBX_ASKNAME, 76 RBX_CDROM, 77 RBX_CONFIG, 78 RBX_KDB, 79 RBX_GDB, 80 RBX_MUTE, 81 RBX_NOINTR, 82 RBX_PAUSE, 83 RBX_QUIET, 84 RBX_DFLTROOT, 85 RBX_SINGLE, 86 RBX_VERBOSE 87}; 88uint32_t opts; 89 90/* 91 * Paths to try loading before falling back to the boot2 prompt. 92 * 93 * /boot/zfsloader must be tried before /boot/loader in order to remain 94 * backward compatible with ZFS boot environments where /boot/loader exists 95 * but does not have ZFS support, which was the case before FreeBSD 12. 96 * 97 * If no loader is found, try to load a kernel directly instead. 98 */ 99static const struct string { 100 const char *p; 101 size_t len; 102} loadpath[] = { 103 { PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS) }, 104 { PATH_LOADER, sizeof(PATH_LOADER) }, 105 { PATH_KERNEL, sizeof(PATH_KERNEL) }, 106}; 107 108static const unsigned char dev_maj[NDEV] = {30, 4, 2}; 109 110static char cmd[512]; 111static char cmddup[512]; 112static char kname[1024]; 113static char rootname[256]; 114static int comspeed = SIOSPD; 115static struct bootinfo bootinfo; 116static uint32_t bootdev; 117static struct zfs_boot_args zfsargs; 118 119vm_offset_t high_heap_base; 120uint32_t bios_basemem, bios_extmem, high_heap_size; 121 122static struct bios_smap smap; 123 124/* 125 * The minimum amount of memory to reserve in bios_extmem for the heap. 126 */ 127#define HEAP_MIN (64 * 1024 * 1024) 128 129static char *heap_next; 130static char *heap_end; 131 132/* Buffers that must not span a 64k boundary. */ 133#define READ_BUF_SIZE 8192 134struct dmadat { 135 char rdbuf[READ_BUF_SIZE]; /* for reading large things */ 136 char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ 137}; 138static struct dmadat *dmadat; 139 140void exit(int); 141void reboot(void); 142static void load(void); 143static int parse_cmd(void); 144static void bios_getmem(void); 145int main(void); 146 147#ifdef LOADER_GELI_SUPPORT 148#include "geliboot.h" 149static char gelipw[GELI_PW_MAXLEN]; 150#endif 151 152struct zfsdsk { 153 struct dsk dsk; 154#ifdef LOADER_GELI_SUPPORT 155 struct geli_dev *gdev; 156#endif 157}; 158 159#include "zfsimpl.c" 160 161/* 162 * Read from a dnode (which must be from a ZPL filesystem). 163 */ 164static int 165zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) 166{ 167 const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; 168 size_t n; 169 int rc; 170 171 n = size; 172 if (*offp + n > zp->zp_size) 173 n = zp->zp_size - *offp; 174 175 rc = dnode_read(spa, dnode, *offp, start, n); 176 if (rc) 177 return (-1); 178 *offp += n; 179 180 return (n); 181} 182 183/* 184 * Current ZFS pool 185 */ 186static spa_t *spa; 187static spa_t *primary_spa; 188static vdev_t *primary_vdev; 189 190/* 191 * A wrapper for dskread that doesn't have to worry about whether the 192 * buffer pointer crosses a 64k boundary. 193 */ 194static int 195vdev_read(void *xvdev, void *priv, off_t off, void *buf, size_t bytes) 196{ 197 char *p; 198 daddr_t lba, alignlba; 199 off_t diff; 200 unsigned int nb, alignnb; 201 struct zfsdsk *zdsk = (struct zfsdsk *) priv; 202 203 if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 204 return -1; 205 206 p = buf; 207 lba = off / DEV_BSIZE; 208 lba += zdsk->dsk.start; 209 /* 210 * Align reads to 4k else 4k sector GELIs will not decrypt. 211 * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. 212 */ 213 alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE; 214 /* 215 * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the 216 * start of the GELI partition, not the start of the actual disk. 217 */ 218 alignlba += zdsk->dsk.start; 219 diff = (lba - alignlba) * DEV_BSIZE; 220 221 while (bytes > 0) { 222 nb = bytes / DEV_BSIZE; 223 /* 224 * Ensure that the read size plus the leading offset does not 225 * exceed the size of the read buffer. 226 */ 227 if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE) 228 nb = (READ_BUF_SIZE - diff) / DEV_BSIZE; 229 /* 230 * Round the number of blocks to read up to the nearest multiple 231 * of DEV_GELIBOOT_BSIZE. 232 */ 233 alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE) 234 / DEV_BSIZE; 235 236 if (zdsk->dsk.size > 0 && alignlba + alignnb > 237 zdsk->dsk.size + zdsk->dsk.start) { 238 printf("Shortening read at %lld from %d to %lld\n", 239 alignlba, alignnb, 240 (zdsk->dsk.size + zdsk->dsk.start) - alignlba); 241 alignnb = (zdsk->dsk.size + zdsk->dsk.start) - alignlba; 242 } 243 244 if (drvread(&zdsk->dsk, dmadat->rdbuf, alignlba, alignnb)) 245 return -1; 246#ifdef LOADER_GELI_SUPPORT 247 /* decrypt */ 248 if (zdsk->gdev != NULL) { 249 if (geli_read(zdsk->gdev, ((alignlba - zdsk->dsk.start) * 250 DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE)) 251 return (-1); 252 } 253#endif 254 memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); 255 p += nb * DEV_BSIZE; 256 lba += nb; 257 alignlba += alignnb; 258 bytes -= nb * DEV_BSIZE; 259 /* Don't need the leading offset after the first block. */ 260 diff = 0; 261 } 262 263 return 0; 264} 265/* Match the signature exactly due to signature madness */ 266static int 267vdev_read2(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 268{ 269 return vdev_read(vdev, priv, off, buf, bytes); 270} 271 272 273static int 274vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 275{ 276 char *p; 277 daddr_t lba; 278 unsigned int nb; 279 struct zfsdsk *zdsk = (struct zfsdsk *) priv; 280 281 if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 282 return -1; 283 284 p = buf; 285 lba = off / DEV_BSIZE; 286 lba += zdsk->dsk.start; 287 while (bytes > 0) { 288 nb = bytes / DEV_BSIZE; 289 if (nb > READ_BUF_SIZE / DEV_BSIZE) 290 nb = READ_BUF_SIZE / DEV_BSIZE; 291 memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE); 292 if (drvwrite(&zdsk->dsk, dmadat->rdbuf, lba, nb)) 293 return -1; 294 p += nb * DEV_BSIZE; 295 lba += nb; 296 bytes -= nb * DEV_BSIZE; 297 } 298 299 return 0; 300} 301 302static int 303xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) 304{ 305 if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { 306 printf("Invalid format\n"); 307 return -1; 308 } 309 return 0; 310} 311 312/* 313 * Read Pad2 (formerly "Boot Block Header") area of the first 314 * vdev label of the given vdev. 315 */ 316static int 317vdev_read_pad2(vdev_t *vdev, char *buf, size_t size) 318{ 319 blkptr_t bp; 320 char *tmp = zap_scratch; 321 off_t off = offsetof(vdev_label_t, vl_pad2); 322 323 if (size > VDEV_PAD_SIZE) 324 size = VDEV_PAD_SIZE; 325 326 BP_ZERO(&bp); 327 BP_SET_LSIZE(&bp, VDEV_PAD_SIZE); 328 BP_SET_PSIZE(&bp, VDEV_PAD_SIZE); 329 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); 330 BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); 331 DVA_SET_OFFSET(BP_IDENTITY(&bp), off); 332 if (vdev_read_phys(vdev, &bp, tmp, off, 0)) 333 return (EIO); 334 memcpy(buf, tmp, size); 335 return (0); 336} 337 338static int 339vdev_clear_pad2(vdev_t *vdev) 340{ 341 char *zeroes = zap_scratch; 342 uint64_t *end; 343 off_t off = offsetof(vdev_label_t, vl_pad2); 344 345 memset(zeroes, 0, VDEV_PAD_SIZE); 346 end = (uint64_t *)(zeroes + VDEV_PAD_SIZE); 347 /* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */ 348 end[-5] = 0x0210da7ab10c7a11; 349 end[-4] = 0x97f48f807f6e2a3f; 350 end[-3] = 0xaf909f1658aacefc; 351 end[-2] = 0xcbd1ea57ff6db48b; 352 end[-1] = 0x6ec692db0d465fab; 353 if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE)) 354 return (EIO); 355 return (0); 356} 357 358static void 359bios_getmem(void) 360{ 361 uint64_t size; 362 363 /* Parse system memory map */ 364 v86.ebx = 0; 365 do { 366 v86.ctl = V86_FLAGS; 367 v86.addr = 0x15; /* int 0x15 function 0xe820*/ 368 v86.eax = 0xe820; 369 v86.ecx = sizeof(struct bios_smap); 370 v86.edx = SMAP_SIG; 371 v86.es = VTOPSEG(&smap); 372 v86.edi = VTOPOFF(&smap); 373 v86int(); 374 if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) 375 break; 376 /* look for a low-memory segment that's large enough */ 377 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && 378 (smap.length >= (512 * 1024))) 379 bios_basemem = smap.length; 380 /* look for the first segment in 'extended' memory */ 381 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { 382 bios_extmem = smap.length; 383 } 384 385 /* 386 * Look for the largest segment in 'extended' memory beyond 387 * 1MB but below 4GB. 388 */ 389 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && 390 (smap.base < 0x100000000ull)) { 391 size = smap.length; 392 393 /* 394 * If this segment crosses the 4GB boundary, truncate it. 395 */ 396 if (smap.base + size > 0x100000000ull) 397 size = 0x100000000ull - smap.base; 398 399 if (size > high_heap_size) { 400 high_heap_size = size; 401 high_heap_base = smap.base; 402 } 403 } 404 } while (v86.ebx != 0); 405 406 /* Fall back to the old compatibility function for base memory */ 407 if (bios_basemem == 0) { 408 v86.ctl = 0; 409 v86.addr = 0x12; /* int 0x12 */ 410 v86int(); 411 412 bios_basemem = (v86.eax & 0xffff) * 1024; 413 } 414 415 /* Fall back through several compatibility functions for extended memory */ 416 if (bios_extmem == 0) { 417 v86.ctl = V86_FLAGS; 418 v86.addr = 0x15; /* int 0x15 function 0xe801*/ 419 v86.eax = 0xe801; 420 v86int(); 421 if (!V86_CY(v86.efl)) { 422 bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; 423 } 424 } 425 if (bios_extmem == 0) { 426 v86.ctl = 0; 427 v86.addr = 0x15; /* int 0x15 function 0x88*/ 428 v86.eax = 0x8800; 429 v86int(); 430 bios_extmem = (v86.eax & 0xffff) * 1024; 431 } 432 433 /* 434 * If we have extended memory and did not find a suitable heap 435 * region in the SMAP, use the last 3MB of 'extended' memory as a 436 * high heap candidate. 437 */ 438 if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { 439 high_heap_size = HEAP_MIN; 440 high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; 441 } 442} 443 444/* 445 * Try to detect a device supported by the legacy int13 BIOS 446 */ 447static int 448int13probe(int drive) 449{ 450 v86.ctl = V86_FLAGS; 451 v86.addr = 0x13; 452 v86.eax = 0x800; 453 v86.edx = drive; 454 v86int(); 455 456 if (!V86_CY(v86.efl) && /* carry clear */ 457 ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ 458 if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ 459 return(0); /* skip device */ 460 } 461 return (1); 462 } 463 return(0); 464} 465 466/* 467 * We call this when we find a ZFS vdev - ZFS consumes the dsk 468 * structure so we must make a new one. 469 */ 470static struct zfsdsk * 471copy_dsk(struct zfsdsk *zdsk) 472{ 473 struct zfsdsk *newdsk; 474 475 newdsk = malloc(sizeof(struct zfsdsk)); 476 *newdsk = *zdsk; 477 return (newdsk); 478} 479 480/* 481 * Get disk size from eax=0x800 and 0x4800. We need to probe both 482 * because 0x4800 may not be available and we would like to get more 483 * or less correct disk size - if it is possible at all. 484 * Note we do not really want to touch drv.c because that code is shared 485 * with boot2 and we can not afford to grow that code. 486 */ 487static uint64_t 488drvsize_ext(struct zfsdsk *zdsk) 489{ 490 struct dsk *dskp; 491 uint64_t size, tmp; 492 int cyl, hds, sec; 493 494 dskp = &zdsk->dsk; 495 496 v86.ctl = V86_FLAGS; 497 v86.addr = 0x13; 498 v86.eax = 0x800; 499 v86.edx = dskp->drive; 500 v86int(); 501 502 /* Don't error out if we get bad sector number, try EDD as well */ 503 if (V86_CY(v86.efl) || /* carry set */ 504 (v86.edx & 0xff) <= (unsigned)(dskp->drive & 0x7f)) /* unit # bad */ 505 return (0); 506 cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; 507 /* Convert max head # -> # of heads */ 508 hds = ((v86.edx & 0xff00) >> 8) + 1; 509 sec = v86.ecx & 0x3f; 510 511 size = (uint64_t)cyl * hds * sec; 512 513 /* Determine if we can use EDD with this device. */ 514 v86.ctl = V86_FLAGS; 515 v86.addr = 0x13; 516 v86.eax = 0x4100; 517 v86.edx = dskp->drive; 518 v86.ebx = 0x55aa; 519 v86int(); 520 if (V86_CY(v86.efl) || /* carry set */ 521 (v86.ebx & 0xffff) != 0xaa55 || /* signature */ 522 (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) 523 return (size); 524 525 tmp = drvsize(dskp); 526 if (tmp > size) 527 size = tmp; 528 529 return (size); 530} 531 532/* 533 * The "layered" ioctl to read disk/partition size. Unfortunately 534 * the zfsboot case is hardest, because we do not have full software 535 * stack available, so we need to do some manual work here. 536 */ 537uint64_t 538ldi_get_size(void *priv) 539{ 540 struct zfsdsk *zdsk = priv; 541 uint64_t size = zdsk->dsk.size; 542 543 if (zdsk->dsk.start == 0) 544 size = drvsize_ext(zdsk); 545 546 return (size * DEV_BSIZE); 547} 548 549static void 550probe_drive(struct zfsdsk *zdsk) 551{ 552#ifdef GPT 553 struct gpt_hdr hdr; 554 struct gpt_ent *ent; 555 unsigned part, entries_per_sec; 556 daddr_t slba; 557#endif 558#if defined(GPT) || defined(LOADER_GELI_SUPPORT) 559 daddr_t elba; 560#endif 561 562 struct dos_partition *dp; 563 char *sec; 564 unsigned i; 565 566 /* 567 * If we find a vdev on the whole disk, stop here. 568 */ 569 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) 570 return; 571 572#ifdef LOADER_GELI_SUPPORT 573 /* 574 * Taste the disk, if it is GELI encrypted, decrypt it and check to see if 575 * it is a usable vdev then. Otherwise dig 576 * out the partition table and probe each slice/partition 577 * in turn for a vdev or GELI encrypted vdev. 578 */ 579 elba = drvsize_ext(zdsk); 580 if (elba > 0) { 581 elba--; 582 } 583 zdsk->gdev = geli_taste(vdev_read, zdsk, elba, "disk%u:0:"); 584 if (zdsk->gdev != NULL) { 585 if (geli_havekey(zdsk->gdev) == 0 || 586 geli_passphrase(zdsk->gdev, gelipw) == 0) { 587 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { 588 return; 589 } 590 } 591 } 592#endif /* LOADER_GELI_SUPPORT */ 593 594 sec = dmadat->secbuf; 595 zdsk->dsk.start = 0; 596 597#ifdef GPT 598 /* 599 * First check for GPT. 600 */ 601 if (drvread(&zdsk->dsk, sec, 1, 1)) { 602 return; 603 } 604 memcpy(&hdr, sec, sizeof(hdr)); 605 if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || 606 hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || 607 hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { 608 goto trymbr; 609 } 610 611 /* 612 * Probe all GPT partitions for the presence of ZFS pools. We 613 * return the spa_t for the first we find (if requested). This 614 * will have the effect of booting from the first pool on the 615 * disk. 616 * 617 * If no vdev is found, GELI decrypting the device and try again 618 */ 619 entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; 620 slba = hdr.hdr_lba_table; 621 elba = slba + hdr.hdr_entries / entries_per_sec; 622 while (slba < elba) { 623 zdsk->dsk.start = 0; 624 if (drvread(&zdsk->dsk, sec, slba, 1)) 625 return; 626 for (part = 0; part < entries_per_sec; part++) { 627 ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); 628 if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, 629 sizeof(uuid_t)) == 0) { 630 zdsk->dsk.start = ent->ent_lba_start; 631 zdsk->dsk.size = ent->ent_lba_end - ent->ent_lba_start + 1; 632 zdsk->dsk.slice = part + 1; 633 zdsk->dsk.part = 255; 634 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { 635 /* 636 * This slice had a vdev. We need a new dsk 637 * structure now since the vdev now owns this one. 638 */ 639 zdsk = copy_dsk(zdsk); 640 } 641#ifdef LOADER_GELI_SUPPORT 642 else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, 643 ent->ent_lba_end - ent->ent_lba_start, "disk%up%u:", 644 zdsk->dsk.unit, zdsk->dsk.slice)) != NULL) { 645 if (geli_havekey(zdsk->gdev) == 0 || 646 geli_passphrase(zdsk->gdev, gelipw) == 0) { 647 /* 648 * This slice has GELI, check it for ZFS. 649 */ 650 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { 651 /* 652 * This slice had a vdev. We need a new dsk 653 * structure now since the vdev now owns this one. 654 */ 655 zdsk = copy_dsk(zdsk); 656 } 657 break; 658 } 659 } 660#endif /* LOADER_GELI_SUPPORT */ 661 } 662 } 663 slba++; 664 } 665 return; 666trymbr: 667#endif /* GPT */ 668 669 if (drvread(&zdsk->dsk, sec, DOSBBSECTOR, 1)) 670 return; 671 dp = (void *)(sec + DOSPARTOFF); 672 673 for (i = 0; i < NDOSPART; i++) { 674 if (!dp[i].dp_typ) 675 continue; 676 zdsk->dsk.start = dp[i].dp_start; 677 zdsk->dsk.size = dp[i].dp_size; 678 zdsk->dsk.slice = i + 1; 679 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { 680 zdsk = copy_dsk(zdsk); 681 } 682#ifdef LOADER_GELI_SUPPORT 683 else if ((zdsk->gdev = geli_taste(vdev_read, zdsk, dp[i].dp_size - 684 dp[i].dp_start, "disk%us%u:")) != NULL) { 685 if (geli_havekey(zdsk->gdev) == 0 || 686 geli_passphrase(zdsk->gdev, gelipw) == 0) { 687 /* 688 * This slice has GELI, check it for ZFS. 689 */ 690 if (vdev_probe(vdev_read2, zdsk, NULL) == 0) { 691 /* 692 * This slice had a vdev. We need a new dsk 693 * structure now since the vdev now owns this one. 694 */ 695 zdsk = copy_dsk(zdsk); 696 } 697 break; 698 } 699 } 700#endif /* LOADER_GELI_SUPPORT */ 701 } 702} 703 704int 705main(void) 706{ 707 dnode_phys_t dn; 708 off_t off; 709 struct zfsdsk *zdsk; 710 int autoboot, i; 711 int nextboot; 712 int rc; 713 714 dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); 715 716 bios_getmem(); 717 718 if (high_heap_size > 0) { 719 heap_end = PTOV(high_heap_base + high_heap_size); 720 heap_next = PTOV(high_heap_base); 721 } else { 722 heap_next = (char *)dmadat + sizeof(*dmadat); 723 heap_end = (char *)PTOV(bios_basemem); 724 } 725 setheap(heap_next, heap_end); 726 727 zdsk = calloc(1, sizeof(struct zfsdsk)); 728 zdsk->dsk.drive = *(uint8_t *)PTOV(ARGS); 729 zdsk->dsk.type = zdsk->dsk.drive & DRV_HARD ? TYPE_AD : TYPE_FD; 730 zdsk->dsk.unit = zdsk->dsk.drive & DRV_MASK; 731 zdsk->dsk.slice = *(uint8_t *)PTOV(ARGS + 1) + 1; 732 zdsk->dsk.part = 0; 733 zdsk->dsk.start = 0; 734 zdsk->dsk.size = drvsize_ext(zdsk); 735 736 bootinfo.bi_version = BOOTINFO_VERSION; 737 bootinfo.bi_size = sizeof(bootinfo); 738 bootinfo.bi_basemem = bios_basemem / 1024; 739 bootinfo.bi_extmem = bios_extmem / 1024; 740 bootinfo.bi_memsizes_valid++; 741 bootinfo.bi_bios_dev = zdsk->dsk.drive; 742 743 bootdev = MAKEBOOTDEV(dev_maj[zdsk->dsk.type], 744 zdsk->dsk.slice, zdsk->dsk.unit, zdsk->dsk.part); 745 746 /* Process configuration file */ 747 748 autoboot = 1; 749 750 zfs_init(); 751 752 /* 753 * Probe the boot drive first - we will try to boot from whatever 754 * pool we find on that drive. 755 */ 756 probe_drive(zdsk); 757 758 /* 759 * Probe the rest of the drives that the bios knows about. This 760 * will find any other available pools and it may fill in missing 761 * vdevs for the boot pool. 762 */ 763#ifndef VIRTUALBOX 764 for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) 765#else 766 for (i = 0; i < MAXBDDEV; i++) 767#endif 768 { 769 if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) 770 continue; 771 772 if (!int13probe(i | DRV_HARD)) 773 break; 774 775 zdsk = calloc(1, sizeof(struct zfsdsk)); 776 zdsk->dsk.drive = i | DRV_HARD; 777 zdsk->dsk.type = zdsk->dsk.drive & TYPE_AD; 778 zdsk->dsk.unit = i; 779 zdsk->dsk.slice = 0; 780 zdsk->dsk.part = 0; 781 zdsk->dsk.start = 0; 782 zdsk->dsk.size = drvsize_ext(zdsk); 783 probe_drive(zdsk); 784 } 785 786 /* 787 * The first discovered pool, if any, is the pool. 788 */ 789 spa = spa_get_primary(); 790 if (!spa) { 791 printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); 792 for (;;) 793 ; 794 } 795 796 primary_spa = spa; 797 primary_vdev = spa_get_primary_vdev(spa); 798 799 nextboot = 0; 800 rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd)); 801 if (vdev_clear_pad2(primary_vdev)) 802 printf("failed to clear pad2 area of primary vdev\n"); 803 if (rc == 0) { 804 if (*cmd) { 805 /* 806 * We could find an old-style ZFS Boot Block header here. 807 * Simply ignore it. 808 */ 809 if (*(uint64_t *)cmd != 0x2f5b007b10c) { 810 /* 811 * Note that parse() is destructive to cmd[] and we also want 812 * to honor RBX_QUIET option that could be present in cmd[]. 813 */ 814 nextboot = 1; 815 memcpy(cmddup, cmd, sizeof(cmd)); 816 if (parse_cmd()) { 817 printf("failed to parse pad2 area of primary vdev\n"); 818 reboot(); 819 } 820 if (!OPT_CHECK(RBX_QUIET)) 821 printf("zfs nextboot: %s\n", cmddup); 822 } 823 /* Do not process this command twice */ 824 *cmd = 0; 825 } 826 } else 827 printf("failed to read pad2 area of primary vdev\n"); 828 829 /* Mount ZFS only if it's not already mounted via nextboot parsing. */ 830 if (zfsmount.spa == NULL && 831 (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) { 832 printf("%s: failed to mount default pool %s\n", 833 BOOTPROG, spa->spa_name); 834 autoboot = 0; 835 } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || 836 zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { 837 off = 0; 838 zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); 839 } 840 841 if (*cmd) { 842 /* 843 * Note that parse_cmd() is destructive to cmd[] and we also want 844 * to honor RBX_QUIET option that could be present in cmd[]. 845 */ 846 memcpy(cmddup, cmd, sizeof(cmd)); 847 if (parse_cmd()) 848 autoboot = 0; 849 if (!OPT_CHECK(RBX_QUIET)) 850 printf("%s: %s\n", PATH_CONFIG, cmddup); 851 /* Do not process this command twice */ 852 *cmd = 0; 853 } 854 855 /* Do not risk waiting at the prompt forever. */ 856 if (nextboot && !autoboot) 857 reboot(); 858 859 if (autoboot && !*kname) { 860 /* 861 * Iterate through the list of loader and kernel paths, trying to load. 862 * If interrupted by a keypress, or in case of failure, drop the user 863 * to the boot2 prompt. 864 */ 865 for (i = 0; i < nitems(loadpath); i++) { 866 memcpy(kname, loadpath[i].p, loadpath[i].len); 867 if (keyhit(3)) 868 break; 869 load(); 870 } 871 } 872 873 /* Present the user with the boot2 prompt. */ 874 875 for (;;) { 876 if (!autoboot || !OPT_CHECK(RBX_QUIET)) { 877 printf("\nFreeBSD/x86 boot\n"); 878 if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) 879 printf("Default: %s/<0x%llx>:%s\n" 880 "boot: ", 881 spa->spa_name, zfsmount.rootobj, kname); 882 else if (rootname[0] != '\0') 883 printf("Default: %s/%s:%s\n" 884 "boot: ", 885 spa->spa_name, rootname, kname); 886 else 887 printf("Default: %s:%s\n" 888 "boot: ", 889 spa->spa_name, kname); 890 } 891 if (ioctrl & IO_SERIAL) 892 sio_flush(); 893 if (!autoboot || keyhit(5)) 894 getstr(cmd, sizeof(cmd)); 895 else if (!autoboot || !OPT_CHECK(RBX_QUIET)) 896 putchar('\n'); 897 autoboot = 0; 898 if (parse_cmd()) 899 putchar('\a'); 900 else 901 load(); 902 } 903} 904 905/* XXX - Needed for btxld to link the boot2 binary; do not remove. */ 906void 907exit(int x) 908{ 909 __exit(x); 910} 911 912void 913reboot(void) 914{ 915 __exit(0); 916} 917 918static void 919load(void) 920{ 921 union { 922 struct exec ex; 923 Elf32_Ehdr eh; 924 } hdr; 925 static Elf32_Phdr ep[2]; 926 static Elf32_Shdr es[2]; 927 caddr_t p; 928 dnode_phys_t dn; 929 off_t off; 930 uint32_t addr, x; 931 int fmt, i, j; 932 933 if (zfs_lookup(&zfsmount, kname, &dn)) { 934 printf("\nCan't find %s\n", kname); 935 return; 936 } 937 off = 0; 938 if (xfsread(&dn, &off, &hdr, sizeof(hdr))) 939 return; 940 if (N_GETMAGIC(hdr.ex) == ZMAGIC) 941 fmt = 0; 942 else if (IS_ELF(hdr.eh)) 943 fmt = 1; 944 else { 945 printf("Invalid %s\n", "format"); 946 return; 947 } 948 if (fmt == 0) { 949 addr = hdr.ex.a_entry & 0xffffff; 950 p = PTOV(addr); 951 off = PAGE_SIZE; 952 if (xfsread(&dn, &off, p, hdr.ex.a_text)) 953 return; 954 p += roundup2(hdr.ex.a_text, PAGE_SIZE); 955 if (xfsread(&dn, &off, p, hdr.ex.a_data)) 956 return; 957 p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); 958 bootinfo.bi_symtab = VTOP(p); 959 memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); 960 p += sizeof(hdr.ex.a_syms); 961 if (hdr.ex.a_syms) { 962 if (xfsread(&dn, &off, p, hdr.ex.a_syms)) 963 return; 964 p += hdr.ex.a_syms; 965 if (xfsread(&dn, &off, p, sizeof(int))) 966 return; 967 x = *(uint32_t *)p; 968 p += sizeof(int); 969 x -= sizeof(int); 970 if (xfsread(&dn, &off, p, x)) 971 return; 972 p += x; 973 } 974 } else { 975 off = hdr.eh.e_phoff; 976 for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { 977 if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) 978 return; 979 if (ep[j].p_type == PT_LOAD) 980 j++; 981 } 982 for (i = 0; i < 2; i++) { 983 p = PTOV(ep[i].p_paddr & 0xffffff); 984 off = ep[i].p_offset; 985 if (xfsread(&dn, &off, p, ep[i].p_filesz)) 986 return; 987 } 988 p += roundup2(ep[1].p_memsz, PAGE_SIZE); 989 bootinfo.bi_symtab = VTOP(p); 990 if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { 991 off = hdr.eh.e_shoff + sizeof(es[0]) * 992 (hdr.eh.e_shstrndx + 1); 993 if (xfsread(&dn, &off, &es, sizeof(es))) 994 return; 995 for (i = 0; i < 2; i++) { 996 memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); 997 p += sizeof(es[i].sh_size); 998 off = es[i].sh_offset; 999 if (xfsread(&dn, &off, p, es[i].sh_size)) 1000 return; 1001 p += es[i].sh_size; 1002 } 1003 } 1004 addr = hdr.eh.e_entry & 0xffffff; 1005 } 1006 bootinfo.bi_esymtab = VTOP(p); 1007 bootinfo.bi_kernelname = VTOP(kname); 1008 zfsargs.size = sizeof(zfsargs); 1009 zfsargs.pool = zfsmount.spa->spa_guid; 1010 zfsargs.root = zfsmount.rootobj; 1011 zfsargs.primary_pool = primary_spa->spa_guid; 1012#ifdef LOADER_GELI_SUPPORT 1013 explicit_bzero(gelipw, sizeof(gelipw)); 1014 export_geli_boot_data(&zfsargs.gelidata); 1015#endif 1016 if (primary_vdev != NULL) 1017 zfsargs.primary_vdev = primary_vdev->v_guid; 1018 else 1019 printf("failed to detect primary vdev\n"); 1020 /* 1021 * Note that the zfsargs struct is passed by value, not by pointer. Code in 1022 * btxldr.S copies the values from the entry stack to a fixed location 1023 * within loader(8) at startup due to the presence of KARGS_FLAGS_EXTARG. 1024 */ 1025 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 1026 bootdev, 1027 KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, 1028 (uint32_t) spa->spa_guid, 1029 (uint32_t) (spa->spa_guid >> 32), 1030 VTOP(&bootinfo), 1031 zfsargs); 1032} 1033 1034static int 1035zfs_mount_ds(char *dsname) 1036{ 1037 uint64_t newroot; 1038 spa_t *newspa; 1039 char *q; 1040 1041 q = strchr(dsname, '/'); 1042 if (q) 1043 *q++ = '\0'; 1044 newspa = spa_find_by_name(dsname); 1045 if (newspa == NULL) { 1046 printf("\nCan't find ZFS pool %s\n", dsname); 1047 return -1; 1048 } 1049 1050 if (zfs_spa_init(newspa)) 1051 return -1; 1052 1053 newroot = 0; 1054 if (q) { 1055 if (zfs_lookup_dataset(newspa, q, &newroot)) { 1056 printf("\nCan't find dataset %s in ZFS pool %s\n", 1057 q, newspa->spa_name); 1058 return -1; 1059 } 1060 } 1061 if (zfs_mount(newspa, newroot, &zfsmount)) { 1062 printf("\nCan't mount ZFS dataset\n"); 1063 return -1; 1064 } 1065 spa = newspa; 1066 return (0); 1067} 1068 1069static int 1070parse_cmd(void) 1071{ 1072 char *arg = cmd; 1073 char *ep, *p, *q; 1074 const char *cp; 1075 int c, i, j; 1076 1077 while ((c = *arg++)) { 1078 if (c == ' ' || c == '\t' || c == '\n') 1079 continue; 1080 for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); 1081 ep = p; 1082 if (*p) 1083 *p++ = 0; 1084 if (c == '-') { 1085 while ((c = *arg++)) { 1086 if (c == 'P') { 1087 if (*(uint8_t *)PTOV(0x496) & 0x10) { 1088 cp = "yes"; 1089 } else { 1090 opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); 1091 cp = "no"; 1092 } 1093 printf("Keyboard: %s\n", cp); 1094 continue; 1095 } else if (c == 'S') { 1096 j = 0; 1097 while ((unsigned int)(i = *arg++ - '0') <= 9) 1098 j = j * 10 + i; 1099 if (j > 0 && i == -'0') { 1100 comspeed = j; 1101 break; 1102 } 1103 /* Fall through to error below ('S' not in optstr[]). */ 1104 } 1105 for (i = 0; c != optstr[i]; i++) 1106 if (i == NOPT - 1) 1107 return -1; 1108 opts ^= OPT_SET(flags[i]); 1109 } 1110 ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : 1111 OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; 1112 if (ioctrl & IO_SERIAL) { 1113 if (sio_init(115200 / comspeed) != 0) 1114 ioctrl &= ~IO_SERIAL; 1115 } 1116 } if (c == '?') { 1117 dnode_phys_t dn; 1118 1119 if (zfs_lookup(&zfsmount, arg, &dn) == 0) { 1120 zap_list(spa, &dn); 1121 } 1122 return -1; 1123 } else { 1124 arg--; 1125 1126 /* 1127 * Report pool status if the comment is 'status'. Lets 1128 * hope no-one wants to load /status as a kernel. 1129 */ 1130 if (!strcmp(arg, "status")) { 1131 spa_all_status(); 1132 return -1; 1133 } 1134 1135 /* 1136 * If there is "zfs:" prefix simply ignore it. 1137 */ 1138 if (strncmp(arg, "zfs:", 4) == 0) 1139 arg += 4; 1140 1141 /* 1142 * If there is a colon, switch pools. 1143 */ 1144 q = strchr(arg, ':'); 1145 if (q) { 1146 *q++ = '\0'; 1147 if (zfs_mount_ds(arg) != 0) 1148 return -1; 1149 arg = q; 1150 } 1151 if ((i = ep - arg)) { 1152 if ((size_t)i >= sizeof(kname)) 1153 return -1; 1154 memcpy(kname, arg, i + 1); 1155 } 1156 } 1157 arg = p; 1158 } 1159 return 0; 1160} 1161