zfs.c revision 239292
1/*- 2 * Copyright (c) 2007 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/boot/zfs/zfs.c 239292 2012-08-15 09:18:49Z ae $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/boot/zfs/zfs.c 239292 2012-08-15 09:18:49Z ae $"); 31 32/* 33 * Stand-alone file reading package. 34 */ 35 36#include <sys/disk.h> 37#include <sys/param.h> 38#include <sys/time.h> 39#include <sys/queue.h> 40#include <part.h> 41#include <stddef.h> 42#include <stdarg.h> 43#include <string.h> 44#include <stand.h> 45#include <bootstrap.h> 46 47#include "libzfs.h" 48 49#include "zfsimpl.c" 50 51static int zfs_open(const char *path, struct open_file *f); 52static int zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid); 53static int zfs_close(struct open_file *f); 54static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); 55static off_t zfs_seek(struct open_file *f, off_t offset, int where); 56static int zfs_stat(struct open_file *f, struct stat *sb); 57static int zfs_readdir(struct open_file *f, struct dirent *d); 58 59struct devsw zfs_dev; 60 61struct fs_ops zfs_fsops = { 62 "zfs", 63 zfs_open, 64 zfs_close, 65 zfs_read, 66 zfs_write, 67 zfs_seek, 68 zfs_stat, 69 zfs_readdir 70}; 71 72/* 73 * In-core open file. 74 */ 75struct file { 76 off_t f_seekp; /* seek pointer */ 77 dnode_phys_t f_dnode; 78 uint64_t f_zap_type; /* zap type for readdir */ 79 uint64_t f_num_leafs; /* number of fzap leaf blocks */ 80 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ 81}; 82 83/* 84 * Open a file. 85 */ 86static int 87zfs_open(const char *upath, struct open_file *f) 88{ 89 struct zfsmount *mount = (struct zfsmount *)f->f_devdata; 90 struct file *fp; 91 int rc; 92 93 if (f->f_dev != &zfs_dev) 94 return (EINVAL); 95 96 /* allocate file system specific data structure */ 97 fp = malloc(sizeof(struct file)); 98 bzero(fp, sizeof(struct file)); 99 f->f_fsdata = (void *)fp; 100 101 rc = zfs_lookup(mount, upath, &fp->f_dnode); 102 fp->f_seekp = 0; 103 if (rc) { 104 f->f_fsdata = NULL; 105 free(fp); 106 } 107 return (rc); 108} 109 110static int 111zfs_close(struct open_file *f) 112{ 113 struct file *fp = (struct file *)f->f_fsdata; 114 115 dnode_cache_obj = 0; 116 f->f_fsdata = (void *)0; 117 if (fp == (struct file *)0) 118 return (0); 119 120 free(fp); 121 return (0); 122} 123 124/* 125 * Copy a portion of a file into kernel memory. 126 * Cross block boundaries when necessary. 127 */ 128static int 129zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 130{ 131 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 132 struct file *fp = (struct file *)f->f_fsdata; 133 struct stat sb; 134 size_t n; 135 int rc; 136 137 rc = zfs_stat(f, &sb); 138 if (rc) 139 return (rc); 140 n = size; 141 if (fp->f_seekp + n > sb.st_size) 142 n = sb.st_size - fp->f_seekp; 143 144 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); 145 if (rc) 146 return (rc); 147 148 if (0) { 149 int i; 150 for (i = 0; i < n; i++) 151 putchar(((char*) start)[i]); 152 } 153 fp->f_seekp += n; 154 if (resid) 155 *resid = size - n; 156 157 return (0); 158} 159 160/* 161 * Don't be silly - the bootstrap has no business writing anything. 162 */ 163static int 164zfs_write(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 165{ 166 167 return (EROFS); 168} 169 170static off_t 171zfs_seek(struct open_file *f, off_t offset, int where) 172{ 173 struct file *fp = (struct file *)f->f_fsdata; 174 175 switch (where) { 176 case SEEK_SET: 177 fp->f_seekp = offset; 178 break; 179 case SEEK_CUR: 180 fp->f_seekp += offset; 181 break; 182 case SEEK_END: 183 { 184 struct stat sb; 185 int error; 186 187 error = zfs_stat(f, &sb); 188 if (error != 0) { 189 errno = error; 190 return (-1); 191 } 192 fp->f_seekp = sb.st_size - offset; 193 break; 194 } 195 default: 196 errno = EINVAL; 197 return (-1); 198 } 199 return (fp->f_seekp); 200} 201 202static int 203zfs_stat(struct open_file *f, struct stat *sb) 204{ 205 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 206 struct file *fp = (struct file *)f->f_fsdata; 207 208 return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); 209} 210 211static int 212zfs_readdir(struct open_file *f, struct dirent *d) 213{ 214 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 215 struct file *fp = (struct file *)f->f_fsdata; 216 mzap_ent_phys_t mze; 217 struct stat sb; 218 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; 219 int rc; 220 221 rc = zfs_stat(f, &sb); 222 if (rc) 223 return (rc); 224 if (!S_ISDIR(sb.st_mode)) 225 return (ENOTDIR); 226 227 /* 228 * If this is the first read, get the zap type. 229 */ 230 if (fp->f_seekp == 0) { 231 rc = dnode_read(spa, &fp->f_dnode, 232 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); 233 if (rc) 234 return (rc); 235 236 if (fp->f_zap_type == ZBT_MICRO) { 237 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); 238 } else { 239 rc = dnode_read(spa, &fp->f_dnode, 240 offsetof(zap_phys_t, zap_num_leafs), 241 &fp->f_num_leafs, 242 sizeof(fp->f_num_leafs)); 243 if (rc) 244 return (rc); 245 246 fp->f_seekp = bsize; 247 fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize); 248 rc = dnode_read(spa, &fp->f_dnode, 249 fp->f_seekp, 250 fp->f_zap_leaf, 251 bsize); 252 if (rc) 253 return (rc); 254 } 255 } 256 257 if (fp->f_zap_type == ZBT_MICRO) { 258 mzap_next: 259 if (fp->f_seekp >= bsize) 260 return (ENOENT); 261 262 rc = dnode_read(spa, &fp->f_dnode, 263 fp->f_seekp, &mze, sizeof(mze)); 264 if (rc) 265 return (rc); 266 fp->f_seekp += sizeof(mze); 267 268 if (!mze.mze_name[0]) 269 goto mzap_next; 270 271 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); 272 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); 273 strcpy(d->d_name, mze.mze_name); 274 d->d_namlen = strlen(d->d_name); 275 return (0); 276 } else { 277 zap_leaf_t zl; 278 zap_leaf_chunk_t *zc, *nc; 279 int chunk; 280 size_t namelen; 281 char *p; 282 uint64_t value; 283 284 /* 285 * Initialise this so we can use the ZAP size 286 * calculating macros. 287 */ 288 zl.l_bs = ilog2(bsize); 289 zl.l_phys = fp->f_zap_leaf; 290 291 /* 292 * Figure out which chunk we are currently looking at 293 * and consider seeking to the next leaf. We use the 294 * low bits of f_seekp as a simple chunk index. 295 */ 296 fzap_next: 297 chunk = fp->f_seekp & (bsize - 1); 298 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { 299 fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize; 300 chunk = 0; 301 302 /* 303 * Check for EOF and read the new leaf. 304 */ 305 if (fp->f_seekp >= bsize * fp->f_num_leafs) 306 return (ENOENT); 307 308 rc = dnode_read(spa, &fp->f_dnode, 309 fp->f_seekp, 310 fp->f_zap_leaf, 311 bsize); 312 if (rc) 313 return (rc); 314 } 315 316 zc = &ZAP_LEAF_CHUNK(&zl, chunk); 317 fp->f_seekp++; 318 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) 319 goto fzap_next; 320 321 namelen = zc->l_entry.le_name_length; 322 if (namelen > sizeof(d->d_name)) 323 namelen = sizeof(d->d_name); 324 325 /* 326 * Paste the name back together. 327 */ 328 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); 329 p = d->d_name; 330 while (namelen > 0) { 331 int len; 332 len = namelen; 333 if (len > ZAP_LEAF_ARRAY_BYTES) 334 len = ZAP_LEAF_ARRAY_BYTES; 335 memcpy(p, nc->l_array.la_array, len); 336 p += len; 337 namelen -= len; 338 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); 339 } 340 d->d_name[sizeof(d->d_name) - 1] = 0; 341 342 /* 343 * Assume the first eight bytes of the value are 344 * a uint64_t. 345 */ 346 value = fzap_leaf_value(&zl, zc); 347 348 d->d_fileno = ZFS_DIRENT_OBJ(value); 349 d->d_type = ZFS_DIRENT_TYPE(value); 350 d->d_namlen = strlen(d->d_name); 351 352 return (0); 353 } 354} 355 356static int 357vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size) 358{ 359 int fd; 360 361 fd = (uintptr_t) priv; 362 lseek(fd, offset, SEEK_SET); 363 if (read(fd, buf, size) == size) { 364 return 0; 365 } else { 366 return (EIO); 367 } 368} 369 370static int 371zfs_dev_init(void) 372{ 373 zfs_init(); 374 if (archsw.arch_zfs_probe == NULL) 375 return (ENXIO); 376 archsw.arch_zfs_probe(); 377 return (0); 378} 379 380struct zfs_probe_args { 381 int fd; 382 const char *devname; 383 uint64_t *pool_guid; 384 uint16_t secsz; 385}; 386 387static int 388zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset) 389{ 390 struct zfs_probe_args *ppa; 391 392 ppa = (struct zfs_probe_args *)arg; 393 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, 394 offset * ppa->secsz, buf, blocks * ppa->secsz)); 395} 396 397static int 398zfs_probe(int fd, uint64_t *pool_guid) 399{ 400 spa_t *spa; 401 int ret; 402 403 ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); 404 if (ret == 0 && pool_guid != NULL) 405 *pool_guid = spa->spa_guid; 406 return (ret); 407} 408 409static void 410zfs_probe_partition(void *arg, const char *partname, 411 const struct ptable_entry *part) 412{ 413 struct zfs_probe_args *ppa, pa; 414 struct ptable *table; 415 char devname[32]; 416 int ret; 417 418 /* Probe only freebsd-zfs and freebsd partitions */ 419 if (part->type != PART_FREEBSD && 420 part->type != PART_FREEBSD_ZFS) 421 return; 422 423 ppa = (struct zfs_probe_args *)arg; 424 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); 425 devname[strlen(ppa->devname) - 1] = '\0'; 426 sprintf(devname, "%s%s:", devname, partname); 427 pa.fd = open(devname, O_RDONLY); 428 if (pa.fd == -1) 429 return; 430 ret = zfs_probe(pa.fd, ppa->pool_guid); 431 if (ret == 0) 432 return; 433 /* Do we have BSD label here? */ 434 if (part->type == PART_FREEBSD) { 435 pa.devname = devname; 436 pa.pool_guid = ppa->pool_guid; 437 pa.secsz = ppa->secsz; 438 table = ptable_open(&pa, part->end - part->start + 1, 439 ppa->secsz, zfs_diskread); 440 if (table != NULL) { 441 ptable_iterate(table, &pa, zfs_probe_partition); 442 ptable_close(table); 443 } 444 } 445 close(pa.fd); 446} 447 448int 449zfs_probe_dev(const char *devname, uint64_t *pool_guid) 450{ 451 struct ptable *table; 452 struct zfs_probe_args pa; 453 off_t mediasz; 454 int ret; 455 456 pa.fd = open(devname, O_RDONLY); 457 if (pa.fd == -1) 458 return (ENXIO); 459 /* Probe the whole disk */ 460 ret = zfs_probe(pa.fd, pool_guid); 461 if (ret == 0) 462 return (0); 463 /* Probe each partition */ 464 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); 465 if (ret == 0) 466 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); 467 if (ret == 0) { 468 pa.devname = devname; 469 pa.pool_guid = pool_guid; 470 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, 471 zfs_diskread); 472 if (table != NULL) { 473 ptable_iterate(table, &pa, zfs_probe_partition); 474 ptable_close(table); 475 } 476 } 477 close(pa.fd); 478 return (0); 479} 480 481/* 482 * Print information about ZFS pools 483 */ 484static void 485zfs_dev_print(int verbose) 486{ 487 spa_t *spa; 488 char line[80]; 489 490 if (verbose) { 491 spa_all_status(); 492 return; 493 } 494 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 495 sprintf(line, " zfs:%s\n", spa->spa_name); 496 pager_output(line); 497 } 498} 499 500/* 501 * Attempt to open the pool described by (dev) for use by (f). 502 */ 503static int 504zfs_dev_open(struct open_file *f, ...) 505{ 506 va_list args; 507 struct zfs_devdesc *dev; 508 struct zfsmount *mount; 509 spa_t *spa; 510 int rv; 511 512 va_start(args, f); 513 dev = va_arg(args, struct zfs_devdesc *); 514 va_end(args); 515 516 spa = spa_find_by_guid(dev->pool_guid); 517 if (!spa) 518 return (ENXIO); 519 rv = zfs_spa_init(spa); 520 if (rv != 0) 521 return (rv); 522 mount = malloc(sizeof(*mount)); 523 rv = zfs_mount(spa, dev->root_guid, mount); 524 if (rv != 0) { 525 free(mount); 526 return (rv); 527 } 528 if (mount->objset.os_type != DMU_OST_ZFS) { 529 printf("Unexpected object set type %ju\n", 530 (uintmax_t)mount->objset.os_type); 531 free(mount); 532 return (EIO); 533 } 534 f->f_devdata = mount; 535 free(dev); 536 return (0); 537} 538 539static int 540zfs_dev_close(struct open_file *f) 541{ 542 543 free(f->f_devdata); 544 f->f_devdata = NULL; 545 return (0); 546} 547 548static int 549zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) 550{ 551 552 return (ENOSYS); 553} 554 555struct devsw zfs_dev = { 556 .dv_name = "zfs", 557 .dv_type = DEVT_ZFS, 558 .dv_init = zfs_dev_init, 559 .dv_strategy = zfs_dev_strategy, 560 .dv_open = zfs_dev_open, 561 .dv_close = zfs_dev_close, 562 .dv_ioctl = noioctl, 563 .dv_print = zfs_dev_print, 564 .dv_cleanup = NULL 565}; 566 567int 568zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) 569{ 570 static char rootname[ZFS_MAXNAMELEN]; 571 static char poolname[ZFS_MAXNAMELEN]; 572 spa_t *spa; 573 const char *end; 574 const char *np; 575 const char *sep; 576 int rv; 577 578 np = devspec; 579 if (*np != ':') 580 return (EINVAL); 581 np++; 582 end = strchr(np, ':'); 583 if (end == NULL) 584 return (EINVAL); 585 sep = strchr(np, '/'); 586 if (sep == NULL || sep >= end) 587 sep = end; 588 memcpy(poolname, np, sep - np); 589 poolname[sep - np] = '\0'; 590 if (sep < end) { 591 sep++; 592 memcpy(rootname, sep, end - sep); 593 rootname[end - sep] = '\0'; 594 } 595 else 596 rootname[0] = '\0'; 597 598 spa = spa_find_by_name(poolname); 599 if (!spa) 600 return (ENXIO); 601 rv = zfs_spa_init(spa); 602 if (rv != 0) 603 return (rv); 604 dev->pool_guid = spa->spa_guid; 605 if (rootname[0] != '\0') { 606 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); 607 if (rv != 0) 608 return (rv); 609 } else 610 dev->root_guid = 0; 611 if (path != NULL) 612 *path = (*end == '\0') ? end : end + 1; 613 dev->d_dev = &zfs_dev; 614 dev->d_type = zfs_dev.dv_type; 615 return (0); 616} 617 618char * 619zfs_fmtdev(void *vdev) 620{ 621 static char rootname[ZFS_MAXNAMELEN]; 622 static char buf[2 * ZFS_MAXNAMELEN + 8]; 623 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; 624 spa_t *spa; 625 626 buf[0] = '\0'; 627 if (dev->d_type != DEVT_ZFS) 628 return (buf); 629 630 spa = spa_find_by_guid(dev->pool_guid); 631 if (spa == NULL) { 632 printf("ZFS: can't find pool by guid\n"); 633 return (buf); 634 } 635 if (zfs_spa_init(spa) != 0) { 636 printf("ZFS: can't init pool\n"); 637 return (buf); 638 } 639 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { 640 printf("ZFS: can't find root filesystem\n"); 641 return (buf); 642 } 643 if (zfs_rlookup(spa, dev->root_guid, rootname)) { 644 printf("ZFS: can't find filesystem by guid\n"); 645 return (buf); 646 } 647 648 if (rootname[0] == '\0') 649 sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name); 650 else 651 sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name, 652 rootname); 653 return (buf); 654} 655