zfs.c revision 208669
1/*- 2 * Copyright (c) 2007 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/boot/zfs/zfs.c 208669 2010-05-31 09:06:03Z avg $ 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD: head/sys/boot/zfs/zfs.c 208669 2010-05-31 09:06:03Z avg $"); 31 32/* 33 * Stand-alone file reading package. 34 */ 35 36#include <sys/param.h> 37#include <sys/disklabel.h> 38#include <sys/time.h> 39#include <sys/queue.h> 40#include <stddef.h> 41#include <stdarg.h> 42#include <string.h> 43#include <stand.h> 44#include <bootstrap.h> 45 46#include "zfsimpl.c" 47 48static int zfs_open(const char *path, struct open_file *f); 49static int zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid); 50static int zfs_close(struct open_file *f); 51static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); 52static off_t zfs_seek(struct open_file *f, off_t offset, int where); 53static int zfs_stat(struct open_file *f, struct stat *sb); 54static int zfs_readdir(struct open_file *f, struct dirent *d); 55 56struct devsw zfs_dev; 57 58struct fs_ops zfs_fsops = { 59 "zfs", 60 zfs_open, 61 zfs_close, 62 zfs_read, 63 zfs_write, 64 zfs_seek, 65 zfs_stat, 66 zfs_readdir 67}; 68 69/* 70 * In-core open file. 71 */ 72struct file { 73 off_t f_seekp; /* seek pointer */ 74 dnode_phys_t f_dnode; 75 uint64_t f_zap_type; /* zap type for readdir */ 76 uint64_t f_num_leafs; /* number of fzap leaf blocks */ 77 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ 78}; 79 80/* 81 * Open a file. 82 */ 83static int 84zfs_open(const char *upath, struct open_file *f) 85{ 86 spa_t *spa = (spa_t *) f->f_devdata; 87 struct file *fp; 88 int rc; 89 90 if (f->f_dev != &zfs_dev) 91 return (EINVAL); 92 93 rc = zfs_mount_pool(spa); 94 if (rc) 95 return (rc); 96 97 /* allocate file system specific data structure */ 98 fp = malloc(sizeof(struct file)); 99 bzero(fp, sizeof(struct file)); 100 f->f_fsdata = (void *)fp; 101 102 if (spa->spa_root_objset.os_type != DMU_OST_ZFS) { 103 printf("Unexpected object set type %llu\n", 104 spa->spa_root_objset.os_type); 105 rc = EIO; 106 goto out; 107 } 108 109 rc = zfs_lookup(spa, upath, &fp->f_dnode); 110 if (rc) 111 goto out; 112 113 fp->f_seekp = 0; 114out: 115 if (rc) { 116 f->f_fsdata = NULL; 117 free(fp); 118 } 119 return (rc); 120} 121 122static int 123zfs_close(struct open_file *f) 124{ 125 struct file *fp = (struct file *)f->f_fsdata; 126 127 dnode_cache_obj = 0; 128 f->f_fsdata = (void *)0; 129 if (fp == (struct file *)0) 130 return (0); 131 132 free(fp); 133 return (0); 134} 135 136/* 137 * Copy a portion of a file into kernel memory. 138 * Cross block boundaries when necessary. 139 */ 140static int 141zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 142{ 143 spa_t *spa = (spa_t *) f->f_devdata; 144 struct file *fp = (struct file *)f->f_fsdata; 145 const znode_phys_t *zp = (const znode_phys_t *) fp->f_dnode.dn_bonus; 146 size_t n; 147 int rc; 148 149 n = size; 150 if (fp->f_seekp + n > zp->zp_size) 151 n = zp->zp_size - fp->f_seekp; 152 153 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); 154 if (rc) 155 return (rc); 156 157 if (0) { 158 int i; 159 for (i = 0; i < n; i++) 160 putchar(((char*) start)[i]); 161 } 162 fp->f_seekp += n; 163 if (resid) 164 *resid = size - n; 165 166 return (0); 167} 168 169/* 170 * Don't be silly - the bootstrap has no business writing anything. 171 */ 172static int 173zfs_write(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 174{ 175 176 return (EROFS); 177} 178 179static off_t 180zfs_seek(struct open_file *f, off_t offset, int where) 181{ 182 struct file *fp = (struct file *)f->f_fsdata; 183 znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus; 184 185 switch (where) { 186 case SEEK_SET: 187 fp->f_seekp = offset; 188 break; 189 case SEEK_CUR: 190 fp->f_seekp += offset; 191 break; 192 case SEEK_END: 193 fp->f_seekp = zp->zp_size - offset; 194 break; 195 default: 196 errno = EINVAL; 197 return (-1); 198 } 199 return (fp->f_seekp); 200} 201 202static int 203zfs_stat(struct open_file *f, struct stat *sb) 204{ 205 struct file *fp = (struct file *)f->f_fsdata; 206 znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus; 207 208 /* only important stuff */ 209 sb->st_mode = zp->zp_mode; 210 sb->st_uid = zp->zp_uid; 211 sb->st_gid = zp->zp_gid; 212 sb->st_size = zp->zp_size; 213 214 return (0); 215} 216 217static int 218zfs_readdir(struct open_file *f, struct dirent *d) 219{ 220 spa_t *spa = (spa_t *) f->f_devdata; 221 struct file *fp = (struct file *)f->f_fsdata; 222 znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus; 223 mzap_ent_phys_t mze; 224 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; 225 int rc; 226 227 if ((zp->zp_mode >> 12) != 0x4) { 228 return (ENOTDIR); 229 } 230 231 /* 232 * If this is the first read, get the zap type. 233 */ 234 if (fp->f_seekp == 0) { 235 rc = dnode_read(spa, &fp->f_dnode, 236 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); 237 if (rc) 238 return (rc); 239 240 if (fp->f_zap_type == ZBT_MICRO) { 241 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); 242 } else { 243 rc = dnode_read(spa, &fp->f_dnode, 244 offsetof(zap_phys_t, zap_num_leafs), 245 &fp->f_num_leafs, 246 sizeof(fp->f_num_leafs)); 247 if (rc) 248 return (rc); 249 250 fp->f_seekp = bsize; 251 fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize); 252 rc = dnode_read(spa, &fp->f_dnode, 253 fp->f_seekp, 254 fp->f_zap_leaf, 255 bsize); 256 if (rc) 257 return (rc); 258 } 259 } 260 261 if (fp->f_zap_type == ZBT_MICRO) { 262 mzap_next: 263 if (fp->f_seekp >= bsize) 264 return (ENOENT); 265 266 rc = dnode_read(spa, &fp->f_dnode, 267 fp->f_seekp, &mze, sizeof(mze)); 268 if (rc) 269 return (rc); 270 fp->f_seekp += sizeof(mze); 271 272 if (!mze.mze_name[0]) 273 goto mzap_next; 274 275 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); 276 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); 277 strcpy(d->d_name, mze.mze_name); 278 d->d_namlen = strlen(d->d_name); 279 return (0); 280 } else { 281 zap_leaf_t zl; 282 zap_leaf_chunk_t *zc, *nc; 283 int chunk; 284 size_t namelen; 285 char *p; 286 uint64_t value; 287 288 /* 289 * Initialise this so we can use the ZAP size 290 * calculating macros. 291 */ 292 zl.l_bs = ilog2(bsize); 293 zl.l_phys = fp->f_zap_leaf; 294 295 /* 296 * Figure out which chunk we are currently looking at 297 * and consider seeking to the next leaf. We use the 298 * low bits of f_seekp as a simple chunk index. 299 */ 300 fzap_next: 301 chunk = fp->f_seekp & (bsize - 1); 302 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { 303 fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize; 304 chunk = 0; 305 306 /* 307 * Check for EOF and read the new leaf. 308 */ 309 if (fp->f_seekp >= bsize * fp->f_num_leafs) 310 return (ENOENT); 311 312 rc = dnode_read(spa, &fp->f_dnode, 313 fp->f_seekp, 314 fp->f_zap_leaf, 315 bsize); 316 if (rc) 317 return (rc); 318 } 319 320 zc = &ZAP_LEAF_CHUNK(&zl, chunk); 321 fp->f_seekp++; 322 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) 323 goto fzap_next; 324 325 namelen = zc->l_entry.le_name_length; 326 if (namelen > sizeof(d->d_name)) 327 namelen = sizeof(d->d_name); 328 329 /* 330 * Paste the name back together. 331 */ 332 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); 333 p = d->d_name; 334 while (namelen > 0) { 335 int len; 336 len = namelen; 337 if (len > ZAP_LEAF_ARRAY_BYTES) 338 len = ZAP_LEAF_ARRAY_BYTES; 339 memcpy(p, nc->l_array.la_array, len); 340 p += len; 341 namelen -= len; 342 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); 343 } 344 d->d_name[sizeof(d->d_name) - 1] = 0; 345 346 /* 347 * Assume the first eight bytes of the value are 348 * a uint64_t. 349 */ 350 value = fzap_leaf_value(&zl, zc); 351 352 d->d_fileno = ZFS_DIRENT_OBJ(value); 353 d->d_type = ZFS_DIRENT_TYPE(value); 354 d->d_namlen = strlen(d->d_name); 355 356 return (0); 357 } 358} 359 360static int 361vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size) 362{ 363 int fd; 364 365 fd = (uintptr_t) priv; 366 lseek(fd, offset, SEEK_SET); 367 if (read(fd, buf, size) == size) { 368 return 0; 369 } else { 370 return (EIO); 371 } 372} 373 374/* 375 * Convert a pool guid to a 'unit number' suitable for use with zfs_dev_open. 376 */ 377int 378zfs_guid_to_unit(uint64_t guid) 379{ 380 spa_t *spa; 381 int unit; 382 383 unit = 0; 384 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 385 if (spa->spa_guid == guid) 386 return unit; 387 unit++; 388 } 389 return (-1); 390} 391 392static int 393zfs_dev_init(void) 394{ 395 char devname[512]; 396 int unit, slice; 397 int fd; 398 399 /* 400 * Open all the disks we can find and see if we can reconstruct 401 * ZFS pools from them. Bogusly assumes that the disks are named 402 * diskN, diskNpM or diskNsM. 403 */ 404 zfs_init(); 405 for (unit = 0; unit < 32 /* XXX */; unit++) { 406 sprintf(devname, "disk%d:", unit); 407 fd = open(devname, O_RDONLY); 408 if (fd == -1) 409 continue; 410 411 /* 412 * If we find a vdev, the zfs code will eat the fd, otherwise 413 * we close it. 414 */ 415 if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0)) 416 close(fd); 417 418 for (slice = 1; slice <= 128; slice++) { 419 sprintf(devname, "disk%dp%d:", unit, slice); 420 fd = open(devname, O_RDONLY); 421 if (fd == -1) { 422 sprintf(devname, "disk%ds%d:", unit, slice); 423 fd = open(devname, O_RDONLY); 424 if (fd == -1) 425 continue; 426 } 427 if (vdev_probe(vdev_read, (void*) (uintptr_t) fd, 0)) 428 close(fd); 429 } 430 } 431 432 return (0); 433} 434 435/* 436 * Print information about ZFS pools 437 */ 438static void 439zfs_dev_print(int verbose) 440{ 441 spa_t *spa; 442 char line[80]; 443 int unit; 444 445 if (verbose) { 446 spa_all_status(); 447 return; 448 } 449 unit = 0; 450 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 451 sprintf(line, " zfs%d: %s\n", unit, spa->spa_name); 452 pager_output(line); 453 unit++; 454 } 455} 456 457/* 458 * Attempt to open the pool described by (dev) for use by (f). 459 */ 460static int 461zfs_dev_open(struct open_file *f, ...) 462{ 463 va_list args; 464 struct devdesc *dev; 465 int unit, i; 466 spa_t *spa; 467 468 va_start(args, f); 469 dev = va_arg(args, struct devdesc*); 470 va_end(args); 471 472 /* 473 * We mostly ignore the stuff that devopen sends us. For now, 474 * use the unit to find a pool - later we will override the 475 * devname parsing so that we can name a pool and a fs within 476 * the pool. 477 */ 478 unit = dev->d_unit; 479 480 i = 0; 481 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 482 if (i == unit) 483 break; 484 i++; 485 } 486 if (!spa) { 487 return (ENXIO); 488 } 489 490 f->f_devdata = spa; 491 free(dev); 492 return (0); 493} 494 495static int 496zfs_dev_close(struct open_file *f) 497{ 498 499 f->f_devdata = NULL; 500 return (0); 501} 502 503static int 504zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) 505{ 506 507 return (ENOSYS); 508} 509 510struct devsw zfs_dev = { 511 .dv_name = "zfs", 512 .dv_type = DEVT_ZFS, 513 .dv_init = zfs_dev_init, 514 .dv_strategy = zfs_dev_strategy, 515 .dv_open = zfs_dev_open, 516 .dv_close = zfs_dev_close, 517 .dv_ioctl = noioctl, 518 .dv_print = zfs_dev_print, 519 .dv_cleanup = NULL 520}; 521