zfs_vfsops.c revision 168839
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/sysmacros.h> 33#include <sys/kmem.h> 34#include <sys/acl.h> 35#include <sys/vnode.h> 36#include <sys/vfs.h> 37#include <sys/mntent.h> 38#include <sys/mount.h> 39#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/dnlc.h> 54 55struct mtx atomic_mtx; 56MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF); 57 58struct mtx zfs_debug_mtx; 59MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 60SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 61int zfs_debug_level = 0; 62TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 63SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 64 "Debug level"); 65 66static int zfs_mount(vfs_t *vfsp, kthread_t *td); 67static int zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td); 68static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td); 69static int zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td); 70static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 71static int zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td); 72static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 73static void zfs_objset_close(zfsvfs_t *zfsvfs); 74static void zfs_freevfs(vfs_t *vfsp); 75 76static struct vfsops zfs_vfsops = { 77 .vfs_mount = zfs_mount, 78 .vfs_unmount = zfs_umount, 79 .vfs_root = zfs_root, 80 .vfs_statfs = zfs_statfs, 81 .vfs_vget = zfs_vget, 82 .vfs_sync = zfs_sync, 83 .vfs_fhtovp = zfs_fhtovp, 84}; 85 86VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 87 88/* 89 * We need to keep a count of active fs's. 90 * This is necessary to prevent our module 91 * from being unloaded after a umount -f 92 */ 93static uint32_t zfs_active_fs_count = 0; 94 95/*ARGSUSED*/ 96static int 97zfs_sync(vfs_t *vfsp, int waitfor, kthread_t *td) 98{ 99 100 /* 101 * Data integrity is job one. We don't want a compromised kernel 102 * writing to the storage pool, so we never sync during panic. 103 */ 104 if (panicstr) 105 return (0); 106 107 if (vfsp != NULL) { 108 /* 109 * Sync a specific filesystem. 110 */ 111 zfsvfs_t *zfsvfs = vfsp->vfs_data; 112 int error; 113 114 error = vfs_stdsync(vfsp, waitfor, td); 115 if (error != 0) 116 return (error); 117 118 ZFS_ENTER(zfsvfs); 119 if (zfsvfs->z_log != NULL) 120 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 121 else 122 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 123 ZFS_EXIT(zfsvfs); 124 } else { 125 /* 126 * Sync all ZFS filesystems. This is what happens when you 127 * run sync(1M). Unlike other filesystems, ZFS honors the 128 * request by waiting for all pools to commit all dirty data. 129 */ 130 spa_sync_allpools(); 131 } 132 133 return (0); 134} 135 136static void 137atime_changed_cb(void *arg, uint64_t newval) 138{ 139 zfsvfs_t *zfsvfs = arg; 140 141 if (newval == TRUE) { 142 zfsvfs->z_atime = TRUE; 143 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 144 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 145 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 146 } else { 147 zfsvfs->z_atime = FALSE; 148 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 149 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 150 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 151 } 152} 153 154static void 155xattr_changed_cb(void *arg, uint64_t newval) 156{ 157 zfsvfs_t *zfsvfs = arg; 158 159 if (newval == TRUE) { 160 /* XXX locking on vfs_flag? */ 161#ifdef TODO 162 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 163#endif 164 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 165 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 166 } else { 167 /* XXX locking on vfs_flag? */ 168#ifdef TODO 169 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 170#endif 171 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 172 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 173 } 174} 175 176static void 177blksz_changed_cb(void *arg, uint64_t newval) 178{ 179 zfsvfs_t *zfsvfs = arg; 180 181 if (newval < SPA_MINBLOCKSIZE || 182 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 183 newval = SPA_MAXBLOCKSIZE; 184 185 zfsvfs->z_max_blksz = newval; 186 zfsvfs->z_vfs->vfs_bsize = newval; 187} 188 189static void 190readonly_changed_cb(void *arg, uint64_t newval) 191{ 192 zfsvfs_t *zfsvfs = arg; 193 194 if (newval) { 195 /* XXX locking on vfs_flag? */ 196 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 197 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 198 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 199 } else { 200 /* XXX locking on vfs_flag? */ 201 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 202 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 203 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 204 } 205} 206 207static void 208setuid_changed_cb(void *arg, uint64_t newval) 209{ 210 zfsvfs_t *zfsvfs = arg; 211 212 if (newval == FALSE) { 213 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 214 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 215 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 216 } else { 217 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 218 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 219 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 220 } 221} 222 223static void 224exec_changed_cb(void *arg, uint64_t newval) 225{ 226 zfsvfs_t *zfsvfs = arg; 227 228 if (newval == FALSE) { 229 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 230 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 231 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 232 } else { 233 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 234 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 235 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 236 } 237} 238 239static void 240snapdir_changed_cb(void *arg, uint64_t newval) 241{ 242 zfsvfs_t *zfsvfs = arg; 243 244 zfsvfs->z_show_ctldir = newval; 245} 246 247static void 248acl_mode_changed_cb(void *arg, uint64_t newval) 249{ 250 zfsvfs_t *zfsvfs = arg; 251 252 zfsvfs->z_acl_mode = newval; 253} 254 255static void 256acl_inherit_changed_cb(void *arg, uint64_t newval) 257{ 258 zfsvfs_t *zfsvfs = arg; 259 260 zfsvfs->z_acl_inherit = newval; 261} 262 263static int 264zfs_refresh_properties(vfs_t *vfsp) 265{ 266 zfsvfs_t *zfsvfs = vfsp->vfs_data; 267 268 /* 269 * Remount operations default to "rw" unless "ro" is explicitly 270 * specified. 271 */ 272 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 273 readonly_changed_cb(zfsvfs, B_TRUE); 274 } else { 275 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 276 readonly_changed_cb(zfsvfs, B_FALSE); 277 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 278 return (EROFS); 279 } 280 281 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 282 setuid_changed_cb(zfsvfs, B_FALSE); 283 } else { 284 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 285 setuid_changed_cb(zfsvfs, B_FALSE); 286 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 287 setuid_changed_cb(zfsvfs, B_TRUE); 288 } 289 290 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 291 exec_changed_cb(zfsvfs, B_FALSE); 292 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 293 exec_changed_cb(zfsvfs, B_TRUE); 294 295 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 296 atime_changed_cb(zfsvfs, B_TRUE); 297 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 298 atime_changed_cb(zfsvfs, B_FALSE); 299 300 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 301 xattr_changed_cb(zfsvfs, B_TRUE); 302 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 303 xattr_changed_cb(zfsvfs, B_FALSE); 304 305 return (0); 306} 307 308static int 309zfs_register_callbacks(vfs_t *vfsp) 310{ 311 struct dsl_dataset *ds = NULL; 312 objset_t *os = NULL; 313 zfsvfs_t *zfsvfs = NULL; 314 int readonly, do_readonly = FALSE; 315 int setuid, do_setuid = FALSE; 316 int exec, do_exec = FALSE; 317 int xattr, do_xattr = FALSE; 318 int error = 0; 319 320 ASSERT(vfsp); 321 zfsvfs = vfsp->vfs_data; 322 ASSERT(zfsvfs); 323 os = zfsvfs->z_os; 324 325 /* 326 * The act of registering our callbacks will destroy any mount 327 * options we may have. In order to enable temporary overrides 328 * of mount options, we stash away the current values and 329 * restore them after we register the callbacks. 330 */ 331 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 332 readonly = B_TRUE; 333 do_readonly = B_TRUE; 334 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 335 readonly = B_FALSE; 336 do_readonly = B_TRUE; 337 } 338 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 339 setuid = B_FALSE; 340 do_setuid = B_TRUE; 341 } else { 342 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 343 setuid = B_FALSE; 344 do_setuid = B_TRUE; 345 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 346 setuid = B_TRUE; 347 do_setuid = B_TRUE; 348 } 349 } 350 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 351 exec = B_FALSE; 352 do_exec = B_TRUE; 353 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 354 exec = B_TRUE; 355 do_exec = B_TRUE; 356 } 357 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 358 xattr = B_FALSE; 359 do_xattr = B_TRUE; 360 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 361 xattr = B_TRUE; 362 do_xattr = B_TRUE; 363 } 364 365 /* 366 * Register property callbacks. 367 * 368 * It would probably be fine to just check for i/o error from 369 * the first prop_register(), but I guess I like to go 370 * overboard... 371 */ 372 ds = dmu_objset_ds(os); 373 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 374 error = error ? error : dsl_prop_register(ds, 375 "xattr", xattr_changed_cb, zfsvfs); 376 error = error ? error : dsl_prop_register(ds, 377 "recordsize", blksz_changed_cb, zfsvfs); 378 error = error ? error : dsl_prop_register(ds, 379 "readonly", readonly_changed_cb, zfsvfs); 380 error = error ? error : dsl_prop_register(ds, 381 "setuid", setuid_changed_cb, zfsvfs); 382 error = error ? error : dsl_prop_register(ds, 383 "exec", exec_changed_cb, zfsvfs); 384 error = error ? error : dsl_prop_register(ds, 385 "snapdir", snapdir_changed_cb, zfsvfs); 386 error = error ? error : dsl_prop_register(ds, 387 "aclmode", acl_mode_changed_cb, zfsvfs); 388 error = error ? error : dsl_prop_register(ds, 389 "aclinherit", acl_inherit_changed_cb, zfsvfs); 390 if (error) 391 goto unregister; 392 393 /* 394 * Invoke our callbacks to restore temporary mount options. 395 */ 396 if (do_readonly) 397 readonly_changed_cb(zfsvfs, readonly); 398 if (do_setuid) 399 setuid_changed_cb(zfsvfs, setuid); 400 if (do_exec) 401 exec_changed_cb(zfsvfs, exec); 402 if (do_xattr) 403 xattr_changed_cb(zfsvfs, xattr); 404 405 return (0); 406 407unregister: 408 /* 409 * We may attempt to unregister some callbacks that are not 410 * registered, but this is OK; it will simply return ENOMSG, 411 * which we will ignore. 412 */ 413 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 414 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 415 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 421 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 422 zfsvfs); 423 return (error); 424 425} 426 427static int 428zfs_domount(vfs_t *vfsp, char *osname, kthread_t *td) 429{ 430 cred_t *cr = td->td_ucred; 431 uint64_t recordsize, readonly; 432 int error = 0; 433 int mode; 434 zfsvfs_t *zfsvfs; 435 znode_t *zp = NULL; 436 437 ASSERT(vfsp); 438 ASSERT(osname); 439 440 /* 441 * Initialize the zfs-specific filesystem structure. 442 * Should probably make this a kmem cache, shuffle fields, 443 * and just bzero up to z_hold_mtx[]. 444 */ 445 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 446 zfsvfs->z_vfs = vfsp; 447 zfsvfs->z_parent = zfsvfs; 448 zfsvfs->z_assign = TXG_NOWAIT; 449 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 450 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 451 452 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 453 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 454 offsetof(znode_t, z_link_node)); 455 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 456 457 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 458 NULL)) 459 goto out; 460 zfsvfs->z_vfs->vfs_bsize = recordsize; 461 462 vfsp->vfs_data = zfsvfs; 463 vfsp->mnt_flag |= MNT_LOCAL; 464 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 465 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 466 467 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 468 goto out; 469 470 if (readonly) 471 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 472 else 473 mode = DS_MODE_PRIMARY; 474 475 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 476 if (error == EROFS) { 477 mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 478 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 479 &zfsvfs->z_os); 480 } 481 482 if (error) 483 goto out; 484 485 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 486 goto out; 487 488 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 489 uint64_t xattr; 490 491 ASSERT(mode & DS_MODE_READONLY); 492 atime_changed_cb(zfsvfs, B_FALSE); 493 readonly_changed_cb(zfsvfs, B_TRUE); 494 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 495 goto out; 496 xattr_changed_cb(zfsvfs, xattr); 497 zfsvfs->z_issnap = B_TRUE; 498 } else { 499 error = zfs_register_callbacks(vfsp); 500 if (error) 501 goto out; 502 503 zfs_unlinked_drain(zfsvfs); 504 505 /* 506 * Parse and replay the intent log. 507 */ 508 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 509 zfs_replay_vector); 510 511 if (!zil_disable) 512 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 513 } 514 515 vfs_mountedfrom(vfsp, osname); 516 517 if (!zfsvfs->z_issnap) 518 zfsctl_create(zfsvfs); 519out: 520 if (error) { 521 if (zfsvfs->z_os) 522 dmu_objset_close(zfsvfs->z_os); 523 rw_destroy(&zfsvfs->z_um_lock); 524 mutex_destroy(&zfsvfs->z_znodes_lock); 525 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 526 } else { 527 atomic_add_32(&zfs_active_fs_count, 1); 528 } 529 530 return (error); 531 532} 533 534void 535zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 536{ 537 objset_t *os = zfsvfs->z_os; 538 struct dsl_dataset *ds; 539 540 /* 541 * Unregister properties. 542 */ 543 if (!dmu_objset_is_snapshot(os)) { 544 ds = dmu_objset_ds(os); 545 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 546 zfsvfs) == 0); 547 548 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 549 zfsvfs) == 0); 550 551 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 552 zfsvfs) == 0); 553 554 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 555 zfsvfs) == 0); 556 557 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 558 zfsvfs) == 0); 559 560 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 561 zfsvfs) == 0); 562 563 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 564 zfsvfs) == 0); 565 566 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 567 zfsvfs) == 0); 568 569 VERIFY(dsl_prop_unregister(ds, "aclinherit", 570 acl_inherit_changed_cb, zfsvfs) == 0); 571 } 572} 573 574/*ARGSUSED*/ 575static int 576zfs_mount(vfs_t *vfsp, kthread_t *td) 577{ 578 char *from; 579 int error; 580 581 /* 582 * When doing a remount, we simply refresh our temporary properties 583 * according to those options set in the current VFS options. 584 */ 585 if (vfsp->vfs_flag & MS_REMOUNT) 586 return (zfs_refresh_properties(vfsp)); 587 588 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&from, NULL)) 589 return (EINVAL); 590 591 DROP_GIANT(); 592 error = zfs_domount(vfsp, from, td); 593 PICKUP_GIANT(); 594 return (error); 595} 596 597static int 598zfs_statfs(vfs_t *vfsp, struct statfs *statp, kthread_t *td) 599{ 600 zfsvfs_t *zfsvfs = vfsp->vfs_data; 601 uint64_t refdbytes, availbytes, usedobjs, availobjs; 602 603 statp->f_version = STATFS_VERSION; 604 605 ZFS_ENTER(zfsvfs); 606 607 dmu_objset_space(zfsvfs->z_os, 608 &refdbytes, &availbytes, &usedobjs, &availobjs); 609 610 /* 611 * The underlying storage pool actually uses multiple block sizes. 612 * We report the fragsize as the smallest block size we support, 613 * and we report our blocksize as the filesystem's maximum blocksize. 614 */ 615 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 616 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 617 618 /* 619 * The following report "total" blocks of various kinds in the 620 * file system, but reported in terms of f_frsize - the 621 * "fragment" size. 622 */ 623 624 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 625 statp->f_bfree = availbytes / statp->f_bsize; 626 statp->f_bavail = statp->f_bfree; /* no root reservation */ 627 628 /* 629 * statvfs() should really be called statufs(), because it assumes 630 * static metadata. ZFS doesn't preallocate files, so the best 631 * we can do is report the max that could possibly fit in f_files, 632 * and that minus the number actually used in f_ffree. 633 * For f_ffree, report the smaller of the number of object available 634 * and the number of blocks (each object will take at least a block). 635 */ 636 statp->f_ffree = MIN(availobjs, statp->f_bfree); 637 statp->f_files = statp->f_ffree + usedobjs; 638 639 /* 640 * We're a zfs filesystem. 641 */ 642 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 643 644 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 645 sizeof(statp->f_mntfromname)); 646 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 647 sizeof(statp->f_mntonname)); 648 649 statp->f_namemax = ZFS_MAXNAMELEN; 650 651 ZFS_EXIT(zfsvfs); 652 return (0); 653} 654 655static int 656zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp, kthread_t *td) 657{ 658 zfsvfs_t *zfsvfs = vfsp->vfs_data; 659 znode_t *rootzp; 660 int error; 661 662 ZFS_ENTER(zfsvfs); 663 664 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 665 if (error == 0) { 666 *vpp = ZTOV(rootzp); 667 error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 668 (*vpp)->v_vflag |= VV_ROOT; 669 } 670 671 ZFS_EXIT(zfsvfs); 672 return (error); 673} 674 675/*ARGSUSED*/ 676static int 677zfs_umount(vfs_t *vfsp, int fflag, kthread_t *td) 678{ 679 zfsvfs_t *zfsvfs = vfsp->vfs_data; 680 cred_t *cr = td->td_ucred; 681 int ret; 682 683 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 684 return (ret); 685 686 (void) dnlc_purge_vfsp(vfsp, 0); 687 688 /* 689 * Unmount any snapshots mounted under .zfs before unmounting the 690 * dataset itself. 691 */ 692 if (zfsvfs->z_ctldir != NULL) { 693 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 694 return (ret); 695 ret = vflush(vfsp, 0, 0, td); 696 ASSERT(ret == EBUSY); 697 if (!(fflag & MS_FORCE)) { 698 if (zfsvfs->z_ctldir->v_count > 1) 699 return (EBUSY); 700 ASSERT(zfsvfs->z_ctldir->v_count == 1); 701 } 702 zfsctl_destroy(zfsvfs); 703 ASSERT(zfsvfs->z_ctldir == NULL); 704 } 705 706 /* 707 * Flush all the files. 708 */ 709 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 710 if (ret != 0) { 711 if (!zfsvfs->z_issnap) { 712 zfsctl_create(zfsvfs); 713 ASSERT(zfsvfs->z_ctldir != NULL); 714 } 715 return (ret); 716 } 717 718 if (fflag & MS_FORCE) { 719 MNT_ILOCK(vfsp); 720 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 721 MNT_IUNLOCK(vfsp); 722 zfsvfs->z_unmounted1 = B_TRUE; 723 724 /* 725 * Wait for all zfs threads to leave zfs. 726 * Grabbing a rwlock as reader in all vops and 727 * as writer here doesn't work because it too easy to get 728 * multiple reader enters as zfs can re-enter itself. 729 * This can lead to deadlock if there is an intervening 730 * rw_enter as writer. 731 * So a file system threads ref count (z_op_cnt) is used. 732 * A polling loop on z_op_cnt may seem inefficient, but 733 * - this saves all threads on exit from having to grab a 734 * mutex in order to cv_signal 735 * - only occurs on forced unmount in the rare case when 736 * there are outstanding threads within the file system. 737 */ 738 while (zfsvfs->z_op_cnt) { 739 delay(1); 740 } 741 } 742 743 zfs_objset_close(zfsvfs); 744 VFS_RELE(vfsp); 745 zfs_freevfs(vfsp); 746 747 return (0); 748} 749 750static int 751zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 752{ 753 zfsvfs_t *zfsvfs = vfsp->vfs_data; 754 znode_t *zp; 755 int err; 756 757 ZFS_ENTER(zfsvfs); 758 err = zfs_zget(zfsvfs, ino, &zp); 759 if (err == 0 && zp->z_unlinked) { 760 VN_RELE(ZTOV(zp)); 761 err = EINVAL; 762 } 763 if (err != 0) 764 *vpp = NULL; 765 else { 766 *vpp = ZTOV(zp); 767 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, curthread); 768 } 769 ZFS_EXIT(zfsvfs); 770 return (0); 771} 772 773static int 774zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 775{ 776 kthread_t *td = curthread; 777 zfsvfs_t *zfsvfs = vfsp->vfs_data; 778 znode_t *zp; 779 uint64_t object = 0; 780 uint64_t fid_gen = 0; 781 uint64_t gen_mask; 782 uint64_t zp_gen; 783 int i, err; 784 785 *vpp = NULL; 786 787 ZFS_ENTER(zfsvfs); 788 789 if (fidp->fid_len == LONG_FID_LEN) { 790 zfid_long_t *zlfid = (zfid_long_t *)fidp; 791 uint64_t objsetid = 0; 792 uint64_t setgen = 0; 793 794 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 795 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 796 797 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 798 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 799 800 ZFS_EXIT(zfsvfs); 801 802 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 803 if (err) 804 return (EINVAL); 805 ZFS_ENTER(zfsvfs); 806 } 807 808 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 809 zfid_short_t *zfid = (zfid_short_t *)fidp; 810 811 for (i = 0; i < sizeof (zfid->zf_object); i++) 812 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 813 814 for (i = 0; i < sizeof (zfid->zf_gen); i++) 815 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 816 } else { 817 ZFS_EXIT(zfsvfs); 818 return (EINVAL); 819 } 820 821 /* A zero fid_gen means we are in the .zfs control directories */ 822 if (fid_gen == 0 && 823 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 824 *vpp = zfsvfs->z_ctldir; 825 ASSERT(*vpp != NULL); 826 if (object == ZFSCTL_INO_SNAPDIR) { 827 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 828 0, NULL, NULL) == 0); 829 } else { 830 VN_HOLD(*vpp); 831 } 832 ZFS_EXIT(zfsvfs); 833 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 834 return (0); 835 } 836 837 gen_mask = -1ULL >> (64 - 8 * i); 838 839 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 840 if (err = zfs_zget(zfsvfs, object, &zp)) { 841 ZFS_EXIT(zfsvfs); 842 return (err); 843 } 844 zp_gen = zp->z_phys->zp_gen & gen_mask; 845 if (zp_gen == 0) 846 zp_gen = 1; 847 if (zp->z_unlinked || zp_gen != fid_gen) { 848 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 849 VN_RELE(ZTOV(zp)); 850 ZFS_EXIT(zfsvfs); 851 return (EINVAL); 852 } 853 854 *vpp = ZTOV(zp); 855 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); 856 vnode_create_vobject(*vpp, zp->z_phys->zp_size, td); 857 ZFS_EXIT(zfsvfs); 858 return (0); 859} 860 861static void 862zfs_objset_close(zfsvfs_t *zfsvfs) 863{ 864 znode_t *zp, *nextzp; 865 objset_t *os = zfsvfs->z_os; 866 867 /* 868 * For forced unmount, at this point all vops except zfs_inactive 869 * are erroring EIO. We need to now suspend zfs_inactive threads 870 * while we are freeing dbufs before switching zfs_inactive 871 * to use behaviour without a objset. 872 */ 873 rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 874 875 /* 876 * Release all holds on dbufs 877 * Note, although we have stopped all other vop threads and 878 * zfs_inactive(), the dmu can callback via znode_pageout_func() 879 * which can zfs_znode_free() the znode. 880 * So we lock z_all_znodes; search the list for a held 881 * dbuf; drop the lock (we know zp can't disappear if we hold 882 * a dbuf lock; then regrab the lock and restart. 883 */ 884 mutex_enter(&zfsvfs->z_znodes_lock); 885 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 886 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 887 if (zp->z_dbuf_held) { 888 /* dbufs should only be held when force unmounting */ 889 zp->z_dbuf_held = 0; 890 mutex_exit(&zfsvfs->z_znodes_lock); 891 dmu_buf_rele(zp->z_dbuf, NULL); 892 /* Start again */ 893 mutex_enter(&zfsvfs->z_znodes_lock); 894 nextzp = list_head(&zfsvfs->z_all_znodes); 895 } 896 } 897 mutex_exit(&zfsvfs->z_znodes_lock); 898 899 /* 900 * Unregister properties. 901 */ 902 if (!dmu_objset_is_snapshot(os)) 903 zfs_unregister_callbacks(zfsvfs); 904 905 /* 906 * Switch zfs_inactive to behaviour without an objset. 907 * It just tosses cached pages and frees the znode & vnode. 908 * Then re-enable zfs_inactive threads in that new behaviour. 909 */ 910 zfsvfs->z_unmounted2 = B_TRUE; 911 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 912 913 /* 914 * Close the zil. Can't close the zil while zfs_inactive 915 * threads are blocked as zil_close can call zfs_inactive. 916 */ 917 if (zfsvfs->z_log) { 918 zil_close(zfsvfs->z_log); 919 zfsvfs->z_log = NULL; 920 } 921 922 /* 923 * Evict all dbufs so that cached znodes will be freed 924 */ 925 if (dmu_objset_evict_dbufs(os, 1)) { 926 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 927 (void) dmu_objset_evict_dbufs(os, 0); 928 } 929 930 /* 931 * Finally close the objset 932 */ 933 dmu_objset_close(os); 934} 935 936static void 937zfs_freevfs(vfs_t *vfsp) 938{ 939 zfsvfs_t *zfsvfs = vfsp->vfs_data; 940 int i; 941 942 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 943 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 944 rw_destroy(&zfsvfs->z_um_lock); 945 mutex_destroy(&zfsvfs->z_znodes_lock); 946 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 947 948 atomic_add_32(&zfs_active_fs_count, -1); 949} 950 951void 952zfs_init(void) 953{ 954 955 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 956 957 /* 958 * Initialize .zfs directory structures 959 */ 960 zfsctl_init(); 961 962 /* 963 * Initialize znode cache, vnode ops, etc... 964 */ 965 zfs_znode_init(); 966} 967 968void 969zfs_fini(void) 970{ 971 zfsctl_fini(); 972 zfs_znode_fini(); 973} 974 975int 976zfs_busy(void) 977{ 978 return (zfs_active_fs_count != 0); 979} 980