Cross Reference: /freebsd-10.2-release/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c

Deleted Added

sdiff udiff text old ( 169194 ) new ( 170431 )

full compact

zfs_vfsops.c (169194)	zfs_vfsops.c (170431)
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 / 21/ 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/sysmacros.h> 33#include <sys/kmem.h> 34#include <sys/acl.h> 35#include <sys/vnode.h> 36#include <sys/vfs.h> 37#include <sys/mntent.h> 38#include <sys/mount.h> 39#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/policy.h> 51#include <sys/atomic.h> 52#include <sys/zfs_ioctl.h> 53#include <sys/zfs_ctldir.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56	1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 / 21/ 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26#pragma ident "%Z%%M% %I% %E% SMI" 27 28#include <sys/types.h> 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/sysmacros.h> 33#include <sys/kmem.h> 34#include <sys/acl.h> 35#include <sys/vnode.h> 36#include <sys/vfs.h> 37#include <sys/mntent.h> 38#include <sys/mount.h> 39#include <sys/cmn_err.h> 40#include <sys/zfs_znode.h> 41#include <sys/zfs_dir.h> 42#include <sys/zil.h> 43#include <sys/fs/zfs.h> 44#include <sys/dmu.h> 45#include <sys/dsl_prop.h> 46#include <sys/dsl_dataset.h> 47#include <sys/spa.h> 48#include <sys/zap.h> 49#include <sys/varargs.h> 50#include <sys/policy.h> 51#include <sys/atomic.h> 52#include <sys/zfs_ioctl.h> 53#include <sys/zfs_ctldir.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56
57struct mtx atomic_mtx; 58MTX_SYSINIT(atomic, &atomic_mtx, "atomic", MTX_DEF); 59
60struct mtx zfs_debug_mtx; 61MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 63int zfs_debug_level = 0; 64TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 65SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 66 "Debug level"); 67 68static int zfs_mount(vfs_t vfsp, kthread_t td); 69static int zfs_umount(vfs_t vfsp, int fflag, kthread_t td); 70static int zfs_root(vfs_t vfsp, int flags, vnode_t vpp, kthread_t td); 71static int zfs_statfs(vfs_t vfsp, struct statfs statp, kthread_t td); 72static int zfs_vget(vfs_t vfsp, ino_t ino, int flags, vnode_t *vpp); 73static int zfs_sync(vfs_t vfsp, int waitfor, kthread_t td); 74static int zfs_fhtovp(vfs_t vfsp, fid_t fidp, vnode_t vpp); 75static void zfs_objset_close(zfsvfs_t zfsvfs); 76static void zfs_freevfs(vfs_t vfsp); 77 78static struct vfsops zfs_vfsops = { 79 .vfs_mount = zfs_mount, 80 .vfs_unmount = zfs_umount, 81 .vfs_root = zfs_root, 82 .vfs_statfs = zfs_statfs, 83 .vfs_vget = zfs_vget, 84 .vfs_sync = zfs_sync, 85 .vfs_fhtovp = zfs_fhtovp, 86}; 87 88VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 89 90/ 91 * We need to keep a count of active fs's. 92 * This is necessary to prevent our module 93 * from being unloaded after a umount -f 94 / 95static uint32_t zfs_active_fs_count = 0; 96 97/ARGSUSED/ 98static int 99zfs_sync(vfs_t vfsp, int waitfor, kthread_t td) 100{ 101* 102 /* 103 * Data integrity is job one. We don't want a compromised kernel 104 * writing to the storage pool, so we never sync during panic. 105 / 106* if (panicstr) 107 return (0); 108 109 if (vfsp != NULL) { 110 /* 111 * Sync a specific filesystem. 112 / 113* zfsvfs_t zfsvfs = vfsp->vfs_data; 114* int error; 115 116 error = vfs_stdsync(vfsp, waitfor, td); 117 if (error != 0) 118 return (error); 119 120 ZFS_ENTER(zfsvfs); 121 if (zfsvfs->z_log != NULL) 122 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 123 else 124 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 125 ZFS_EXIT(zfsvfs); 126 } else { 127 /* 128 * Sync all ZFS filesystems. This is what happens when you 129 * run sync(1M). Unlike other filesystems, ZFS honors the 130 * request by waiting for all pools to commit all dirty data. 131 / 132* spa_sync_allpools(); 133 } 134 135 return (0); 136} 137 138static void 139atime_changed_cb(void arg, uint64_t newval) 140{ 141* zfsvfs_t zfsvfs = arg; 142* 143 if (newval == TRUE) { 144 zfsvfs->z_atime = TRUE; 145 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 146 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 147 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 148 } else { 149 zfsvfs->z_atime = FALSE; 150 zfsvfs->z_vfs->vfs_flag \|= MNT_NOATIME; 151 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 152 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 153 } 154} 155 156static void 157xattr_changed_cb(void arg, uint64_t newval) 158{ 159* zfsvfs_t zfsvfs = arg; 160* 161 if (newval == TRUE) { 162 /* XXX locking on vfs_flag? / 163#ifdef TODO 164* zfsvfs->z_vfs->vfs_flag \|= VFS_XATTR; 165#endif 166 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 167 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 168 } else { 169 /* XXX locking on vfs_flag? / 170#ifdef TODO 171* zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 172#endif 173 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 174 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 175 } 176} 177 178static void 179blksz_changed_cb(void arg, uint64_t newval) 180{ 181* zfsvfs_t zfsvfs = arg; 182* 183 if (newval < SPA_MINBLOCKSIZE \|\| 184 newval > SPA_MAXBLOCKSIZE \|\| !ISP2(newval)) 185 newval = SPA_MAXBLOCKSIZE; 186 187 zfsvfs->z_max_blksz = newval; 188 zfsvfs->z_vfs->vfs_bsize = newval; 189} 190 191static void 192readonly_changed_cb(void arg, uint64_t newval) 193{ 194* zfsvfs_t zfsvfs = arg; 195* 196 if (newval) { 197 /* XXX locking on vfs_flag? / 198* zfsvfs->z_vfs->vfs_flag \|= VFS_RDONLY; 199 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 200 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 201 } else { 202 /* XXX locking on vfs_flag? / 203* zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 204 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 205 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 206 } 207} 208 209static void 210setuid_changed_cb(void arg, uint64_t newval) 211{ 212* zfsvfs_t zfsvfs = arg; 213* 214 if (newval == FALSE) { 215 zfsvfs->z_vfs->vfs_flag \|= VFS_NOSETUID; 216 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 217 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 218 } else { 219 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 220 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 221 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 222 } 223} 224 225static void 226exec_changed_cb(void arg, uint64_t newval) 227{ 228* zfsvfs_t zfsvfs = arg; 229* 230 if (newval == FALSE) { 231 zfsvfs->z_vfs->vfs_flag \|= VFS_NOEXEC; 232 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 233 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 234 } else { 235 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 236 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 237 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 238 } 239} 240 241static void 242snapdir_changed_cb(void arg, uint64_t newval) 243{ 244* zfsvfs_t zfsvfs = arg; 245* 246 zfsvfs->z_show_ctldir = newval; 247} 248 249static void 250acl_mode_changed_cb(void arg, uint64_t newval) 251{ 252* zfsvfs_t zfsvfs = arg; 253* 254 zfsvfs->z_acl_mode = newval; 255} 256 257static void 258acl_inherit_changed_cb(void arg, uint64_t newval) 259{ 260* zfsvfs_t zfsvfs = arg; 261* 262 zfsvfs->z_acl_inherit = newval; 263} 264 265static int 266zfs_refresh_properties(vfs_t vfsp) 267{ 268* zfsvfs_t zfsvfs = vfsp->vfs_data; 269* 270 /* 271 * Remount operations default to "rw" unless "ro" is explicitly 272 * specified. 273 / 274* if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 275 readonly_changed_cb(zfsvfs, B_TRUE); 276 } else { 277 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 278 readonly_changed_cb(zfsvfs, B_FALSE); 279 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 280 return (EROFS); 281 } 282 283 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 284 setuid_changed_cb(zfsvfs, B_FALSE); 285 } else { 286 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 287 setuid_changed_cb(zfsvfs, B_FALSE); 288 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 289 setuid_changed_cb(zfsvfs, B_TRUE); 290 } 291 292 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 293 exec_changed_cb(zfsvfs, B_FALSE); 294 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 295 exec_changed_cb(zfsvfs, B_TRUE); 296 297 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 298 atime_changed_cb(zfsvfs, B_TRUE); 299 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 300 atime_changed_cb(zfsvfs, B_FALSE); 301 302 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 303 xattr_changed_cb(zfsvfs, B_TRUE); 304 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 305 xattr_changed_cb(zfsvfs, B_FALSE); 306 307 return (0); 308} 309 310static int 311zfs_register_callbacks(vfs_t vfsp) 312{ 313* struct dsl_dataset ds = NULL; 314* objset_t os = NULL; 315* zfsvfs_t zfsvfs = NULL; 316* int readonly, do_readonly = FALSE; 317 int setuid, do_setuid = FALSE; 318 int exec, do_exec = FALSE; 319 int xattr, do_xattr = FALSE; 320 int error = 0; 321 322 ASSERT(vfsp); 323 zfsvfs = vfsp->vfs_data; 324 ASSERT(zfsvfs); 325 os = zfsvfs->z_os; 326 327 /* 328 * The act of registering our callbacks will destroy any mount 329 * options we may have. In order to enable temporary overrides 330 * of mount options, we stash away the current values and 331 * restore them after we register the callbacks. 332 / 333* if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 334 readonly = B_TRUE; 335 do_readonly = B_TRUE; 336 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 337 readonly = B_FALSE; 338 do_readonly = B_TRUE; 339 } 340 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 341 setuid = B_FALSE; 342 do_setuid = B_TRUE; 343 } else { 344 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 345 setuid = B_FALSE; 346 do_setuid = B_TRUE; 347 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 348 setuid = B_TRUE; 349 do_setuid = B_TRUE; 350 } 351 } 352 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 353 exec = B_FALSE; 354 do_exec = B_TRUE; 355 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 356 exec = B_TRUE; 357 do_exec = B_TRUE; 358 } 359 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 360 xattr = B_FALSE; 361 do_xattr = B_TRUE; 362 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 363 xattr = B_TRUE; 364 do_xattr = B_TRUE; 365 } 366 367 /* 368 * Register property callbacks. 369 * 370 * It would probably be fine to just check for i/o error from 371 * the first prop_register(), but I guess I like to go 372 * overboard... 373 / 374* ds = dmu_objset_ds(os); 375 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 376 error = error ? error : dsl_prop_register(ds, 377 "xattr", xattr_changed_cb, zfsvfs); 378 error = error ? error : dsl_prop_register(ds, 379 "recordsize", blksz_changed_cb, zfsvfs); 380 error = error ? error : dsl_prop_register(ds, 381 "readonly", readonly_changed_cb, zfsvfs); 382 error = error ? error : dsl_prop_register(ds, 383 "setuid", setuid_changed_cb, zfsvfs); 384 error = error ? error : dsl_prop_register(ds, 385 "exec", exec_changed_cb, zfsvfs); 386 error = error ? error : dsl_prop_register(ds, 387 "snapdir", snapdir_changed_cb, zfsvfs); 388 error = error ? error : dsl_prop_register(ds, 389 "aclmode", acl_mode_changed_cb, zfsvfs); 390 error = error ? error : dsl_prop_register(ds, 391 "aclinherit", acl_inherit_changed_cb, zfsvfs); 392 if (error) 393 goto unregister; 394 395 /* 396 * Invoke our callbacks to restore temporary mount options. 397 / 398* if (do_readonly) 399 readonly_changed_cb(zfsvfs, readonly); 400 if (do_setuid) 401 setuid_changed_cb(zfsvfs, setuid); 402 if (do_exec) 403 exec_changed_cb(zfsvfs, exec); 404 if (do_xattr) 405 xattr_changed_cb(zfsvfs, xattr); 406 407 return (0); 408 409unregister: 410 /* 411 * We may attempt to unregister some callbacks that are not 412 * registered, but this is OK; it will simply return ENOMSG, 413 * which we will ignore. 414 / 415* (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 421 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 422 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 423 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 424 zfsvfs); 425 return (error); 426 427} 428 429static int 430zfs_domount(vfs_t vfsp, char osname, kthread_t td) 431{ 432* cred_t cr = td->td_ucred; 433* uint64_t recordsize, readonly; 434 int error = 0; 435 int mode; 436 zfsvfs_t zfsvfs; 437* znode_t zp = NULL; 438* 439 ASSERT(vfsp); 440 ASSERT(osname); 441 442 /* 443 * Initialize the zfs-specific filesystem structure. 444 * Should probably make this a kmem cache, shuffle fields, 445 * and just bzero up to z_hold_mtx[]. 446 / 447* zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 448 zfsvfs->z_vfs = vfsp; 449 zfsvfs->z_parent = zfsvfs; 450 zfsvfs->z_assign = TXG_NOWAIT; 451 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 452 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 453 454 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 455 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 456 offsetof(znode_t, z_link_node)); 457 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 458 459 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 460 NULL)) 461 goto out; 462 zfsvfs->z_vfs->vfs_bsize = recordsize; 463 464 vfsp->vfs_data = zfsvfs; 465 vfsp->mnt_flag \|= MNT_LOCAL; 466 vfsp->mnt_kern_flag \|= MNTK_MPSAFE; 467 vfsp->mnt_kern_flag \|= MNTK_LOOKUP_SHARED; 468 469 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 470 goto out; 471 472 if (readonly) 473 mode = DS_MODE_PRIMARY \| DS_MODE_READONLY; 474 else 475 mode = DS_MODE_PRIMARY; 476 477 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 478 if (error == EROFS) { 479 mode = DS_MODE_PRIMARY \| DS_MODE_READONLY; 480 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 481 &zfsvfs->z_os); 482 } 483 484 if (error) 485 goto out; 486 487 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 488 goto out; 489 490 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 491 uint64_t xattr; 492 493 ASSERT(mode & DS_MODE_READONLY); 494 atime_changed_cb(zfsvfs, B_FALSE); 495 readonly_changed_cb(zfsvfs, B_TRUE); 496 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 497 goto out; 498 xattr_changed_cb(zfsvfs, xattr); 499 zfsvfs->z_issnap = B_TRUE; 500 } else { 501 error = zfs_register_callbacks(vfsp); 502 if (error) 503 goto out; 504 505 zfs_unlinked_drain(zfsvfs); 506 507 /* 508 * Parse and replay the intent log. 509 / 510* zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 511 zfs_replay_vector); 512 513 if (!zil_disable) 514 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 515 } 516 517 vfs_mountedfrom(vfsp, osname); 518 519 if (!zfsvfs->z_issnap) 520 zfsctl_create(zfsvfs); 521out: 522 if (error) { 523 if (zfsvfs->z_os) 524 dmu_objset_close(zfsvfs->z_os); 525 rw_destroy(&zfsvfs->z_um_lock); 526 mutex_destroy(&zfsvfs->z_znodes_lock); 527 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 528 } else { 529 atomic_add_32(&zfs_active_fs_count, 1); 530 } 531 532 return (error); 533 534} 535 536void 537zfs_unregister_callbacks(zfsvfs_t zfsvfs) 538{ 539* objset_t os = zfsvfs->z_os; 540* struct dsl_dataset ds; 541* 542 /* 543 * Unregister properties. 544 / 545* if (!dmu_objset_is_snapshot(os)) { 546 ds = dmu_objset_ds(os); 547 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 548 zfsvfs) == 0); 549 550 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 551 zfsvfs) == 0); 552 553 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 554 zfsvfs) == 0); 555 556 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 557 zfsvfs) == 0); 558 559 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 560 zfsvfs) == 0); 561 562 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 563 zfsvfs) == 0); 564 565 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 566 zfsvfs) == 0); 567 568 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 569 zfsvfs) == 0); 570 571 VERIFY(dsl_prop_unregister(ds, "aclinherit", 572 acl_inherit_changed_cb, zfsvfs) == 0); 573 } 574} 575 576/ARGSUSED/ 577static int 578zfs_mount(vfs_t vfsp, kthread_t td) 579{ 580 char from; 581* int error; 582 583 /* 584 * When doing a remount, we simply refresh our temporary properties 585 * according to those options set in the current VFS options. 586 / 587* if (vfsp->vfs_flag & MS_REMOUNT) 588 return (zfs_refresh_properties(vfsp)); 589 590 if (vfs_getopt(vfsp->mnt_optnew, "from", (void *)&from, NULL)) 591* return (EINVAL); 592 593 DROP_GIANT(); 594 error = zfs_domount(vfsp, from, td); 595 PICKUP_GIANT(); 596 return (error); 597} 598 599static int 600zfs_statfs(vfs_t vfsp, struct statfs statp, kthread_t td) 601{ 602* zfsvfs_t zfsvfs = vfsp->vfs_data; 603* uint64_t refdbytes, availbytes, usedobjs, availobjs; 604 605 statp->f_version = STATFS_VERSION; 606 607 ZFS_ENTER(zfsvfs); 608 609 dmu_objset_space(zfsvfs->z_os, 610 &refdbytes, &availbytes, &usedobjs, &availobjs); 611 612 /* 613 * The underlying storage pool actually uses multiple block sizes. 614 * We report the fragsize as the smallest block size we support, 615 * and we report our blocksize as the filesystem's maximum blocksize. 616 / 617* statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 618 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 619 620 /* 621 * The following report "total" blocks of various kinds in the 622 * file system, but reported in terms of f_frsize - the 623 * "fragment" size. 624 / 625* 626 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 627 statp->f_bfree = availbytes / statp->f_bsize; 628 statp->f_bavail = statp->f_bfree; /* no root reservation / 629* 630 /* 631 * statvfs() should really be called statufs(), because it assumes 632 * static metadata. ZFS doesn't preallocate files, so the best 633 * we can do is report the max that could possibly fit in f_files, 634 * and that minus the number actually used in f_ffree. 635 * For f_ffree, report the smaller of the number of object available 636 * and the number of blocks (each object will take at least a block). 637 / 638* statp->f_ffree = MIN(availobjs, statp->f_bfree); 639 statp->f_files = statp->f_ffree + usedobjs; 640 641 /* 642 * We're a zfs filesystem. 643 / 644* (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 645 646 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 647 sizeof(statp->f_mntfromname)); 648 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 649 sizeof(statp->f_mntonname)); 650 651 statp->f_namemax = ZFS_MAXNAMELEN; 652 653 ZFS_EXIT(zfsvfs); 654 return (0); 655} 656 657static int 658zfs_root(vfs_t vfsp, int flags, vnode_t vpp, kthread_t td) 659{ 660 zfsvfs_t zfsvfs = vfsp->vfs_data; 661* znode_t rootzp; 662* int error; 663 664 ZFS_ENTER(zfsvfs); 665 666 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 667 if (error == 0) { 668 vpp = ZTOV(rootzp); 669* error = vn_lock(vpp, flags, td); 670* (vpp)->v_vflag \|= VV_ROOT; 671* } 672 673 ZFS_EXIT(zfsvfs); 674 return (error); 675} 676 677/ARGSUSED/ 678static int 679zfs_umount(vfs_t vfsp, int fflag, kthread_t td) 680{ 681 zfsvfs_t zfsvfs = vfsp->vfs_data; 682* cred_t cr = td->td_ucred; 683* int ret; 684 685 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 686 return (ret); 687 688 (void) dnlc_purge_vfsp(vfsp, 0); 689 690 /* 691 * Unmount any snapshots mounted under .zfs before unmounting the 692 * dataset itself. 693 / 694* if (zfsvfs->z_ctldir != NULL) { 695 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 696 return (ret); 697 ret = vflush(vfsp, 0, 0, td); 698 ASSERT(ret == EBUSY); 699 if (!(fflag & MS_FORCE)) { 700 if (zfsvfs->z_ctldir->v_count > 1) 701 return (EBUSY); 702 ASSERT(zfsvfs->z_ctldir->v_count == 1); 703 } 704 zfsctl_destroy(zfsvfs); 705 ASSERT(zfsvfs->z_ctldir == NULL); 706 } 707 708 /* 709 * Flush all the files. 710 / 711* ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 712 if (ret != 0) { 713 if (!zfsvfs->z_issnap) { 714 zfsctl_create(zfsvfs); 715 ASSERT(zfsvfs->z_ctldir != NULL); 716 } 717 return (ret); 718 } 719 720 if (fflag & MS_FORCE) { 721 MNT_ILOCK(vfsp); 722 vfsp->mnt_kern_flag \|= MNTK_UNMOUNTF; 723 MNT_IUNLOCK(vfsp); 724 zfsvfs->z_unmounted1 = B_TRUE; 725 726 /* 727 * Wait for all zfs threads to leave zfs. 728 * Grabbing a rwlock as reader in all vops and 729 * as writer here doesn't work because it too easy to get 730 * multiple reader enters as zfs can re-enter itself. 731 * This can lead to deadlock if there is an intervening 732 * rw_enter as writer. 733 * So a file system threads ref count (z_op_cnt) is used. 734 * A polling loop on z_op_cnt may seem inefficient, but 735 * - this saves all threads on exit from having to grab a 736 * mutex in order to cv_signal 737 * - only occurs on forced unmount in the rare case when 738 * there are outstanding threads within the file system. 739 / 740* while (zfsvfs->z_op_cnt) { 741 delay(1); 742 } 743 } 744 745 zfs_objset_close(zfsvfs); 746 VFS_RELE(vfsp); 747 zfs_freevfs(vfsp); 748 749 return (0); 750} 751 752static int 753zfs_vget(vfs_t vfsp, ino_t ino, int flags, vnode_t vpp) 754{ 755* zfsvfs_t zfsvfs = vfsp->vfs_data; 756* znode_t zp; 757* int err; 758 759 ZFS_ENTER(zfsvfs); 760 err = zfs_zget(zfsvfs, ino, &zp); 761 if (err == 0 && zp->z_unlinked) { 762 VN_RELE(ZTOV(zp)); 763 err = EINVAL; 764 } 765 if (err != 0) 766 vpp = NULL; 767* else { 768 vpp = ZTOV(zp); 769* vn_lock(vpp, flags, curthread); 770* } 771 ZFS_EXIT(zfsvfs); 772 return (0); 773} 774 775static int 776zfs_fhtovp(vfs_t vfsp, fid_t fidp, vnode_t *vpp) 777{ 778* kthread_t td = curthread; 779* zfsvfs_t zfsvfs = vfsp->vfs_data; 780* znode_t zp; 781* uint64_t object = 0; 782 uint64_t fid_gen = 0; 783 uint64_t gen_mask; 784 uint64_t zp_gen; 785 int i, err; 786 787 vpp = NULL; 788* 789 ZFS_ENTER(zfsvfs); 790 791 if (fidp->fid_len == LONG_FID_LEN) { 792 zfid_long_t zlfid = (zfid_long_t )fidp; 793 uint64_t objsetid = 0; 794 uint64_t setgen = 0; 795 796 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 797 objsetid \|= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 798 799 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 800 setgen \|= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 801 802 ZFS_EXIT(zfsvfs); 803 804 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 805 if (err) 806 return (EINVAL); 807 ZFS_ENTER(zfsvfs); 808 } 809 810 if (fidp->fid_len == SHORT_FID_LEN \|\| fidp->fid_len == LONG_FID_LEN) { 811 zfid_short_t zfid = (zfid_short_t )fidp; 812 813 for (i = 0; i < sizeof (zfid->zf_object); i++) 814 object \|= ((uint64_t)zfid->zf_object[i]) << (8 * i); 815 816 for (i = 0; i < sizeof (zfid->zf_gen); i++) 817 fid_gen \|= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 818 } else { 819 ZFS_EXIT(zfsvfs); 820 return (EINVAL); 821 } 822 823 /* A zero fid_gen means we are in the .zfs control directories / 824* if (fid_gen == 0 && 825 (object == ZFSCTL_INO_ROOT \|\| object == ZFSCTL_INO_SNAPDIR)) { 826 vpp = zfsvfs->z_ctldir; 827* ASSERT(vpp != NULL); 828* if (object == ZFSCTL_INO_SNAPDIR) { 829 VERIFY(zfsctl_root_lookup(vpp, "snapshot", vpp, NULL, 830* 0, NULL, NULL) == 0); 831 } else { 832 VN_HOLD(vpp); 833* } 834 ZFS_EXIT(zfsvfs); 835 /* XXX: LK_RETRY? / 836* vn_lock(vpp, LK_EXCLUSIVE \| LK_RETRY, td); 837* return (0); 838 } 839 840 gen_mask = -1ULL >> (64 - 8 * i); 841 842 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 843 if (err = zfs_zget(zfsvfs, object, &zp)) { 844 ZFS_EXIT(zfsvfs); 845 return (err); 846 } 847 zp_gen = zp->z_phys->zp_gen & gen_mask; 848 if (zp_gen == 0) 849 zp_gen = 1; 850 if (zp->z_unlinked \|\| zp_gen != fid_gen) { 851 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 852 VN_RELE(ZTOV(zp)); 853 ZFS_EXIT(zfsvfs); 854 return (EINVAL); 855 } 856 857 vpp = ZTOV(zp); 858* /* XXX: LK_RETRY? / 859* vn_lock(vpp, LK_EXCLUSIVE \| LK_RETRY, td); 860* vnode_create_vobject(vpp, zp->z_phys->zp_size, td); 861* ZFS_EXIT(zfsvfs); 862 return (0); 863} 864 865static void 866zfs_objset_close(zfsvfs_t zfsvfs) 867{ 868* znode_t zp, nextzp; 869 objset_t os = zfsvfs->z_os; 870* 871 /* 872 * For forced unmount, at this point all vops except zfs_inactive 873 * are erroring EIO. We need to now suspend zfs_inactive threads 874 * while we are freeing dbufs before switching zfs_inactive 875 * to use behaviour without a objset. 876 / 877* rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 878 879 /* 880 * Release all holds on dbufs 881 * Note, although we have stopped all other vop threads and 882 * zfs_inactive(), the dmu can callback via znode_pageout_func() 883 * which can zfs_znode_free() the znode. 884 * So we lock z_all_znodes; search the list for a held 885 * dbuf; drop the lock (we know zp can't disappear if we hold 886 * a dbuf lock; then regrab the lock and restart. 887 / 888* mutex_enter(&zfsvfs->z_znodes_lock); 889 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 890 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 891 if (zp->z_dbuf_held) { 892 /* dbufs should only be held when force unmounting / 893* zp->z_dbuf_held = 0; 894 mutex_exit(&zfsvfs->z_znodes_lock); 895 dmu_buf_rele(zp->z_dbuf, NULL); 896 /* Start again / 897* mutex_enter(&zfsvfs->z_znodes_lock); 898 nextzp = list_head(&zfsvfs->z_all_znodes); 899 } 900 } 901 mutex_exit(&zfsvfs->z_znodes_lock); 902 903 /* 904 * Unregister properties. 905 / 906* if (!dmu_objset_is_snapshot(os)) 907 zfs_unregister_callbacks(zfsvfs); 908 909 /* 910 * Switch zfs_inactive to behaviour without an objset. 911 * It just tosses cached pages and frees the znode & vnode. 912 * Then re-enable zfs_inactive threads in that new behaviour. 913 / 914* zfsvfs->z_unmounted2 = B_TRUE; 915 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads / 916* 917 /* 918 * Close the zil. Can't close the zil while zfs_inactive 919 * threads are blocked as zil_close can call zfs_inactive. 920 / 921* if (zfsvfs->z_log) { 922 zil_close(zfsvfs->z_log); 923 zfsvfs->z_log = NULL; 924 } 925 926 /* 927 * Evict all dbufs so that cached znodes will be freed 928 / 929* if (dmu_objset_evict_dbufs(os, 1)) { 930 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 931 (void) dmu_objset_evict_dbufs(os, 0); 932 } 933 934 /* 935 * Finally close the objset 936 / 937* dmu_objset_close(os); 938} 939 940static void 941zfs_freevfs(vfs_t vfsp) 942{ 943* zfsvfs_t zfsvfs = vfsp->vfs_data; 944* int i; 945 946 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 947 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 948 rw_destroy(&zfsvfs->z_um_lock); 949 mutex_destroy(&zfsvfs->z_znodes_lock); 950 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 951 952 atomic_add_32(&zfs_active_fs_count, -1); 953} 954 955void 956zfs_init(void) 957{ 958 959 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 960 961 /* 962 * Initialize .zfs directory structures 963 / 964* zfsctl_init(); 965 966 /* 967 * Initialize znode cache, vnode ops, etc... 968 / 969* zfs_znode_init(); 970} 971 972void 973zfs_fini(void) 974{ 975 zfsctl_fini(); 976 zfs_znode_fini(); 977} 978 979int 980zfs_busy(void) 981{ 982 return (zfs_active_fs_count != 0); 983}	57struct mtx zfs_debug_mtx; 58MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 59SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 60int zfs_debug_level = 0; 61TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 62SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 63 "Debug level"); 64 65static int zfs_mount(vfs_t vfsp, kthread_t td); 66static int zfs_umount(vfs_t vfsp, int fflag, kthread_t td); 67static int zfs_root(vfs_t vfsp, int flags, vnode_t vpp, kthread_t td); 68static int zfs_statfs(vfs_t vfsp, struct statfs statp, kthread_t td); 69static int zfs_vget(vfs_t vfsp, ino_t ino, int flags, vnode_t *vpp); 70static int zfs_sync(vfs_t vfsp, int waitfor, kthread_t td); 71static int zfs_fhtovp(vfs_t vfsp, fid_t fidp, vnode_t vpp); 72static void zfs_objset_close(zfsvfs_t zfsvfs); 73static void zfs_freevfs(vfs_t vfsp); 74 75static struct vfsops zfs_vfsops = { 76 .vfs_mount = zfs_mount, 77 .vfs_unmount = zfs_umount, 78 .vfs_root = zfs_root, 79 .vfs_statfs = zfs_statfs, 80 .vfs_vget = zfs_vget, 81 .vfs_sync = zfs_sync, 82 .vfs_fhtovp = zfs_fhtovp, 83}; 84 85VFS_SET(zfs_vfsops, zfs, VFCF_JAIL); 86 87/ 88 * We need to keep a count of active fs's. 89 * This is necessary to prevent our module 90 * from being unloaded after a umount -f 91 / 92static uint32_t zfs_active_fs_count = 0; 93 94/ARGSUSED/ 95static int 96zfs_sync(vfs_t vfsp, int waitfor, kthread_t td) 97{ 98 99 / 100 * Data integrity is job one. We don't want a compromised kernel 101 * writing to the storage pool, so we never sync during panic. 102 / 103* if (panicstr) 104 return (0); 105 106 if (vfsp != NULL) { 107 /* 108 * Sync a specific filesystem. 109 / 110* zfsvfs_t zfsvfs = vfsp->vfs_data; 111* int error; 112 113 error = vfs_stdsync(vfsp, waitfor, td); 114 if (error != 0) 115 return (error); 116 117 ZFS_ENTER(zfsvfs); 118 if (zfsvfs->z_log != NULL) 119 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 120 else 121 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 122 ZFS_EXIT(zfsvfs); 123 } else { 124 /* 125 * Sync all ZFS filesystems. This is what happens when you 126 * run sync(1M). Unlike other filesystems, ZFS honors the 127 * request by waiting for all pools to commit all dirty data. 128 / 129* spa_sync_allpools(); 130 } 131 132 return (0); 133} 134 135static void 136atime_changed_cb(void arg, uint64_t newval) 137{ 138* zfsvfs_t zfsvfs = arg; 139* 140 if (newval == TRUE) { 141 zfsvfs->z_atime = TRUE; 142 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 143 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 144 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 145 } else { 146 zfsvfs->z_atime = FALSE; 147 zfsvfs->z_vfs->vfs_flag \|= MNT_NOATIME; 148 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 149 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 150 } 151} 152 153static void 154xattr_changed_cb(void arg, uint64_t newval) 155{ 156* zfsvfs_t zfsvfs = arg; 157* 158 if (newval == TRUE) { 159 /* XXX locking on vfs_flag? / 160#ifdef TODO 161* zfsvfs->z_vfs->vfs_flag \|= VFS_XATTR; 162#endif 163 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 164 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 165 } else { 166 /* XXX locking on vfs_flag? / 167#ifdef TODO 168* zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 169#endif 170 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 171 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 172 } 173} 174 175static void 176blksz_changed_cb(void arg, uint64_t newval) 177{ 178* zfsvfs_t zfsvfs = arg; 179* 180 if (newval < SPA_MINBLOCKSIZE \|\| 181 newval > SPA_MAXBLOCKSIZE \|\| !ISP2(newval)) 182 newval = SPA_MAXBLOCKSIZE; 183 184 zfsvfs->z_max_blksz = newval; 185 zfsvfs->z_vfs->vfs_bsize = newval; 186} 187 188static void 189readonly_changed_cb(void arg, uint64_t newval) 190{ 191* zfsvfs_t zfsvfs = arg; 192* 193 if (newval) { 194 /* XXX locking on vfs_flag? / 195* zfsvfs->z_vfs->vfs_flag \|= VFS_RDONLY; 196 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 197 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 198 } else { 199 /* XXX locking on vfs_flag? / 200* zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 201 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 202 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 203 } 204} 205 206static void 207setuid_changed_cb(void arg, uint64_t newval) 208{ 209* zfsvfs_t zfsvfs = arg; 210* 211 if (newval == FALSE) { 212 zfsvfs->z_vfs->vfs_flag \|= VFS_NOSETUID; 213 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 214 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 215 } else { 216 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 217 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 218 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 219 } 220} 221 222static void 223exec_changed_cb(void arg, uint64_t newval) 224{ 225* zfsvfs_t zfsvfs = arg; 226* 227 if (newval == FALSE) { 228 zfsvfs->z_vfs->vfs_flag \|= VFS_NOEXEC; 229 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 230 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 231 } else { 232 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 233 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 234 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 235 } 236} 237 238static void 239snapdir_changed_cb(void arg, uint64_t newval) 240{ 241* zfsvfs_t zfsvfs = arg; 242* 243 zfsvfs->z_show_ctldir = newval; 244} 245 246static void 247acl_mode_changed_cb(void arg, uint64_t newval) 248{ 249* zfsvfs_t zfsvfs = arg; 250* 251 zfsvfs->z_acl_mode = newval; 252} 253 254static void 255acl_inherit_changed_cb(void arg, uint64_t newval) 256{ 257* zfsvfs_t zfsvfs = arg; 258* 259 zfsvfs->z_acl_inherit = newval; 260} 261 262static int 263zfs_refresh_properties(vfs_t vfsp) 264{ 265* zfsvfs_t zfsvfs = vfsp->vfs_data; 266* 267 /* 268 * Remount operations default to "rw" unless "ro" is explicitly 269 * specified. 270 / 271* if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 272 readonly_changed_cb(zfsvfs, B_TRUE); 273 } else { 274 if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 275 readonly_changed_cb(zfsvfs, B_FALSE); 276 else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 277 return (EROFS); 278 } 279 280 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 281 setuid_changed_cb(zfsvfs, B_FALSE); 282 } else { 283 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 284 setuid_changed_cb(zfsvfs, B_FALSE); 285 else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 286 setuid_changed_cb(zfsvfs, B_TRUE); 287 } 288 289 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 290 exec_changed_cb(zfsvfs, B_FALSE); 291 else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 292 exec_changed_cb(zfsvfs, B_TRUE); 293 294 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 295 atime_changed_cb(zfsvfs, B_TRUE); 296 else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 297 atime_changed_cb(zfsvfs, B_FALSE); 298 299 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 300 xattr_changed_cb(zfsvfs, B_TRUE); 301 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 302 xattr_changed_cb(zfsvfs, B_FALSE); 303 304 return (0); 305} 306 307static int 308zfs_register_callbacks(vfs_t vfsp) 309{ 310* struct dsl_dataset ds = NULL; 311* objset_t os = NULL; 312* zfsvfs_t zfsvfs = NULL; 313* int readonly, do_readonly = FALSE; 314 int setuid, do_setuid = FALSE; 315 int exec, do_exec = FALSE; 316 int xattr, do_xattr = FALSE; 317 int error = 0; 318 319 ASSERT(vfsp); 320 zfsvfs = vfsp->vfs_data; 321 ASSERT(zfsvfs); 322 os = zfsvfs->z_os; 323 324 /* 325 * The act of registering our callbacks will destroy any mount 326 * options we may have. In order to enable temporary overrides 327 * of mount options, we stash away the current values and 328 * restore them after we register the callbacks. 329 / 330* if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 331 readonly = B_TRUE; 332 do_readonly = B_TRUE; 333 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 334 readonly = B_FALSE; 335 do_readonly = B_TRUE; 336 } 337 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 338 setuid = B_FALSE; 339 do_setuid = B_TRUE; 340 } else { 341 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 342 setuid = B_FALSE; 343 do_setuid = B_TRUE; 344 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 345 setuid = B_TRUE; 346 do_setuid = B_TRUE; 347 } 348 } 349 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 350 exec = B_FALSE; 351 do_exec = B_TRUE; 352 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 353 exec = B_TRUE; 354 do_exec = B_TRUE; 355 } 356 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 357 xattr = B_FALSE; 358 do_xattr = B_TRUE; 359 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 360 xattr = B_TRUE; 361 do_xattr = B_TRUE; 362 } 363 364 /* 365 * Register property callbacks. 366 * 367 * It would probably be fine to just check for i/o error from 368 * the first prop_register(), but I guess I like to go 369 * overboard... 370 / 371* ds = dmu_objset_ds(os); 372 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 373 error = error ? error : dsl_prop_register(ds, 374 "xattr", xattr_changed_cb, zfsvfs); 375 error = error ? error : dsl_prop_register(ds, 376 "recordsize", blksz_changed_cb, zfsvfs); 377 error = error ? error : dsl_prop_register(ds, 378 "readonly", readonly_changed_cb, zfsvfs); 379 error = error ? error : dsl_prop_register(ds, 380 "setuid", setuid_changed_cb, zfsvfs); 381 error = error ? error : dsl_prop_register(ds, 382 "exec", exec_changed_cb, zfsvfs); 383 error = error ? error : dsl_prop_register(ds, 384 "snapdir", snapdir_changed_cb, zfsvfs); 385 error = error ? error : dsl_prop_register(ds, 386 "aclmode", acl_mode_changed_cb, zfsvfs); 387 error = error ? error : dsl_prop_register(ds, 388 "aclinherit", acl_inherit_changed_cb, zfsvfs); 389 if (error) 390 goto unregister; 391 392 /* 393 * Invoke our callbacks to restore temporary mount options. 394 / 395* if (do_readonly) 396 readonly_changed_cb(zfsvfs, readonly); 397 if (do_setuid) 398 setuid_changed_cb(zfsvfs, setuid); 399 if (do_exec) 400 exec_changed_cb(zfsvfs, exec); 401 if (do_xattr) 402 xattr_changed_cb(zfsvfs, xattr); 403 404 return (0); 405 406unregister: 407 /* 408 * We may attempt to unregister some callbacks that are not 409 * registered, but this is OK; it will simply return ENOMSG, 410 * which we will ignore. 411 / 412* (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 413 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 414 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 415 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 416 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 417 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 418 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 419 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 420 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 421 zfsvfs); 422 return (error); 423 424} 425 426static int 427zfs_domount(vfs_t vfsp, char osname, kthread_t td) 428{ 429* cred_t cr = td->td_ucred; 430* uint64_t recordsize, readonly; 431 int error = 0; 432 int mode; 433 zfsvfs_t zfsvfs; 434* znode_t zp = NULL; 435* 436 ASSERT(vfsp); 437 ASSERT(osname); 438 439 /* 440 * Initialize the zfs-specific filesystem structure. 441 * Should probably make this a kmem cache, shuffle fields, 442 * and just bzero up to z_hold_mtx[]. 443 / 444* zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 445 zfsvfs->z_vfs = vfsp; 446 zfsvfs->z_parent = zfsvfs; 447 zfsvfs->z_assign = TXG_NOWAIT; 448 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 449 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 450 451 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 452 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 453 offsetof(znode_t, z_link_node)); 454 rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 455 456 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 457 NULL)) 458 goto out; 459 zfsvfs->z_vfs->vfs_bsize = recordsize; 460 461 vfsp->vfs_data = zfsvfs; 462 vfsp->mnt_flag \|= MNT_LOCAL; 463 vfsp->mnt_kern_flag \|= MNTK_MPSAFE; 464 vfsp->mnt_kern_flag \|= MNTK_LOOKUP_SHARED; 465 466 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 467 goto out; 468 469 if (readonly) 470 mode = DS_MODE_PRIMARY \| DS_MODE_READONLY; 471 else 472 mode = DS_MODE_PRIMARY; 473 474 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 475 if (error == EROFS) { 476 mode = DS_MODE_PRIMARY \| DS_MODE_READONLY; 477 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 478 &zfsvfs->z_os); 479 } 480 481 if (error) 482 goto out; 483 484 if (error = zfs_init_fs(zfsvfs, &zp, cr)) 485 goto out; 486 487 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 488 uint64_t xattr; 489 490 ASSERT(mode & DS_MODE_READONLY); 491 atime_changed_cb(zfsvfs, B_FALSE); 492 readonly_changed_cb(zfsvfs, B_TRUE); 493 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 494 goto out; 495 xattr_changed_cb(zfsvfs, xattr); 496 zfsvfs->z_issnap = B_TRUE; 497 } else { 498 error = zfs_register_callbacks(vfsp); 499 if (error) 500 goto out; 501 502 zfs_unlinked_drain(zfsvfs); 503 504 /* 505 * Parse and replay the intent log. 506 / 507* zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 508 zfs_replay_vector); 509 510 if (!zil_disable) 511 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 512 } 513 514 vfs_mountedfrom(vfsp, osname); 515 516 if (!zfsvfs->z_issnap) 517 zfsctl_create(zfsvfs); 518out: 519 if (error) { 520 if (zfsvfs->z_os) 521 dmu_objset_close(zfsvfs->z_os); 522 rw_destroy(&zfsvfs->z_um_lock); 523 mutex_destroy(&zfsvfs->z_znodes_lock); 524 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 525 } else { 526 atomic_add_32(&zfs_active_fs_count, 1); 527 } 528 529 return (error); 530 531} 532 533void 534zfs_unregister_callbacks(zfsvfs_t zfsvfs) 535{ 536* objset_t os = zfsvfs->z_os; 537* struct dsl_dataset ds; 538* 539 /* 540 * Unregister properties. 541 / 542* if (!dmu_objset_is_snapshot(os)) { 543 ds = dmu_objset_ds(os); 544 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 545 zfsvfs) == 0); 546 547 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 548 zfsvfs) == 0); 549 550 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 551 zfsvfs) == 0); 552 553 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 554 zfsvfs) == 0); 555 556 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 557 zfsvfs) == 0); 558 559 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 560 zfsvfs) == 0); 561 562 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 563 zfsvfs) == 0); 564 565 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 566 zfsvfs) == 0); 567 568 VERIFY(dsl_prop_unregister(ds, "aclinherit", 569 acl_inherit_changed_cb, zfsvfs) == 0); 570 } 571} 572 573/ARGSUSED/ 574static int 575zfs_mount(vfs_t vfsp, kthread_t td) 576{ 577 char from; 578* int error; 579 580 /* 581 * When doing a remount, we simply refresh our temporary properties 582 * according to those options set in the current VFS options. 583 / 584* if (vfsp->vfs_flag & MS_REMOUNT) 585 return (zfs_refresh_properties(vfsp)); 586 587 if (vfs_getopt(vfsp->mnt_optnew, "from", (void *)&from, NULL)) 588* return (EINVAL); 589 590 DROP_GIANT(); 591 error = zfs_domount(vfsp, from, td); 592 PICKUP_GIANT(); 593 return (error); 594} 595 596static int 597zfs_statfs(vfs_t vfsp, struct statfs statp, kthread_t td) 598{ 599* zfsvfs_t zfsvfs = vfsp->vfs_data; 600* uint64_t refdbytes, availbytes, usedobjs, availobjs; 601 602 statp->f_version = STATFS_VERSION; 603 604 ZFS_ENTER(zfsvfs); 605 606 dmu_objset_space(zfsvfs->z_os, 607 &refdbytes, &availbytes, &usedobjs, &availobjs); 608 609 /* 610 * The underlying storage pool actually uses multiple block sizes. 611 * We report the fragsize as the smallest block size we support, 612 * and we report our blocksize as the filesystem's maximum blocksize. 613 / 614* statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 615 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 616 617 /* 618 * The following report "total" blocks of various kinds in the 619 * file system, but reported in terms of f_frsize - the 620 * "fragment" size. 621 / 622* 623 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 624 statp->f_bfree = availbytes / statp->f_bsize; 625 statp->f_bavail = statp->f_bfree; /* no root reservation / 626* 627 /* 628 * statvfs() should really be called statufs(), because it assumes 629 * static metadata. ZFS doesn't preallocate files, so the best 630 * we can do is report the max that could possibly fit in f_files, 631 * and that minus the number actually used in f_ffree. 632 * For f_ffree, report the smaller of the number of object available 633 * and the number of blocks (each object will take at least a block). 634 / 635* statp->f_ffree = MIN(availobjs, statp->f_bfree); 636 statp->f_files = statp->f_ffree + usedobjs; 637 638 /* 639 * We're a zfs filesystem. 640 / 641* (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 642 643 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 644 sizeof(statp->f_mntfromname)); 645 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 646 sizeof(statp->f_mntonname)); 647 648 statp->f_namemax = ZFS_MAXNAMELEN; 649 650 ZFS_EXIT(zfsvfs); 651 return (0); 652} 653 654static int 655zfs_root(vfs_t vfsp, int flags, vnode_t vpp, kthread_t td) 656{ 657 zfsvfs_t zfsvfs = vfsp->vfs_data; 658* znode_t rootzp; 659* int error; 660 661 ZFS_ENTER(zfsvfs); 662 663 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 664 if (error == 0) { 665 vpp = ZTOV(rootzp); 666* error = vn_lock(vpp, flags, td); 667* (vpp)->v_vflag \|= VV_ROOT; 668* } 669 670 ZFS_EXIT(zfsvfs); 671 return (error); 672} 673 674/ARGSUSED/ 675static int 676zfs_umount(vfs_t vfsp, int fflag, kthread_t td) 677{ 678 zfsvfs_t zfsvfs = vfsp->vfs_data; 679* cred_t cr = td->td_ucred; 680* int ret; 681 682 if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0) 683 return (ret); 684 685 (void) dnlc_purge_vfsp(vfsp, 0); 686 687 /* 688 * Unmount any snapshots mounted under .zfs before unmounting the 689 * dataset itself. 690 / 691* if (zfsvfs->z_ctldir != NULL) { 692 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 693 return (ret); 694 ret = vflush(vfsp, 0, 0, td); 695 ASSERT(ret == EBUSY); 696 if (!(fflag & MS_FORCE)) { 697 if (zfsvfs->z_ctldir->v_count > 1) 698 return (EBUSY); 699 ASSERT(zfsvfs->z_ctldir->v_count == 1); 700 } 701 zfsctl_destroy(zfsvfs); 702 ASSERT(zfsvfs->z_ctldir == NULL); 703 } 704 705 /* 706 * Flush all the files. 707 / 708* ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 709 if (ret != 0) { 710 if (!zfsvfs->z_issnap) { 711 zfsctl_create(zfsvfs); 712 ASSERT(zfsvfs->z_ctldir != NULL); 713 } 714 return (ret); 715 } 716 717 if (fflag & MS_FORCE) { 718 MNT_ILOCK(vfsp); 719 vfsp->mnt_kern_flag \|= MNTK_UNMOUNTF; 720 MNT_IUNLOCK(vfsp); 721 zfsvfs->z_unmounted1 = B_TRUE; 722 723 /* 724 * Wait for all zfs threads to leave zfs. 725 * Grabbing a rwlock as reader in all vops and 726 * as writer here doesn't work because it too easy to get 727 * multiple reader enters as zfs can re-enter itself. 728 * This can lead to deadlock if there is an intervening 729 * rw_enter as writer. 730 * So a file system threads ref count (z_op_cnt) is used. 731 * A polling loop on z_op_cnt may seem inefficient, but 732 * - this saves all threads on exit from having to grab a 733 * mutex in order to cv_signal 734 * - only occurs on forced unmount in the rare case when 735 * there are outstanding threads within the file system. 736 / 737* while (zfsvfs->z_op_cnt) { 738 delay(1); 739 } 740 } 741 742 zfs_objset_close(zfsvfs); 743 VFS_RELE(vfsp); 744 zfs_freevfs(vfsp); 745 746 return (0); 747} 748 749static int 750zfs_vget(vfs_t vfsp, ino_t ino, int flags, vnode_t vpp) 751{ 752* zfsvfs_t zfsvfs = vfsp->vfs_data; 753* znode_t zp; 754* int err; 755 756 ZFS_ENTER(zfsvfs); 757 err = zfs_zget(zfsvfs, ino, &zp); 758 if (err == 0 && zp->z_unlinked) { 759 VN_RELE(ZTOV(zp)); 760 err = EINVAL; 761 } 762 if (err != 0) 763 vpp = NULL; 764* else { 765 vpp = ZTOV(zp); 766* vn_lock(vpp, flags, curthread); 767* } 768 ZFS_EXIT(zfsvfs); 769 return (0); 770} 771 772static int 773zfs_fhtovp(vfs_t vfsp, fid_t fidp, vnode_t *vpp) 774{ 775* kthread_t td = curthread; 776* zfsvfs_t zfsvfs = vfsp->vfs_data; 777* znode_t zp; 778* uint64_t object = 0; 779 uint64_t fid_gen = 0; 780 uint64_t gen_mask; 781 uint64_t zp_gen; 782 int i, err; 783 784 vpp = NULL; 785* 786 ZFS_ENTER(zfsvfs); 787 788 if (fidp->fid_len == LONG_FID_LEN) { 789 zfid_long_t zlfid = (zfid_long_t )fidp; 790 uint64_t objsetid = 0; 791 uint64_t setgen = 0; 792 793 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 794 objsetid \|= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 795 796 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 797 setgen \|= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 798 799 ZFS_EXIT(zfsvfs); 800 801 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 802 if (err) 803 return (EINVAL); 804 ZFS_ENTER(zfsvfs); 805 } 806 807 if (fidp->fid_len == SHORT_FID_LEN \|\| fidp->fid_len == LONG_FID_LEN) { 808 zfid_short_t zfid = (zfid_short_t )fidp; 809 810 for (i = 0; i < sizeof (zfid->zf_object); i++) 811 object \|= ((uint64_t)zfid->zf_object[i]) << (8 * i); 812 813 for (i = 0; i < sizeof (zfid->zf_gen); i++) 814 fid_gen \|= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 815 } else { 816 ZFS_EXIT(zfsvfs); 817 return (EINVAL); 818 } 819 820 /* A zero fid_gen means we are in the .zfs control directories / 821* if (fid_gen == 0 && 822 (object == ZFSCTL_INO_ROOT \|\| object == ZFSCTL_INO_SNAPDIR)) { 823 vpp = zfsvfs->z_ctldir; 824* ASSERT(vpp != NULL); 825* if (object == ZFSCTL_INO_SNAPDIR) { 826 VERIFY(zfsctl_root_lookup(vpp, "snapshot", vpp, NULL, 827* 0, NULL, NULL) == 0); 828 } else { 829 VN_HOLD(vpp); 830* } 831 ZFS_EXIT(zfsvfs); 832 /* XXX: LK_RETRY? / 833* vn_lock(vpp, LK_EXCLUSIVE \| LK_RETRY, td); 834* return (0); 835 } 836 837 gen_mask = -1ULL >> (64 - 8 * i); 838 839 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 840 if (err = zfs_zget(zfsvfs, object, &zp)) { 841 ZFS_EXIT(zfsvfs); 842 return (err); 843 } 844 zp_gen = zp->z_phys->zp_gen & gen_mask; 845 if (zp_gen == 0) 846 zp_gen = 1; 847 if (zp->z_unlinked \|\| zp_gen != fid_gen) { 848 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 849 VN_RELE(ZTOV(zp)); 850 ZFS_EXIT(zfsvfs); 851 return (EINVAL); 852 } 853 854 vpp = ZTOV(zp); 855* /* XXX: LK_RETRY? / 856* vn_lock(vpp, LK_EXCLUSIVE \| LK_RETRY, td); 857* vnode_create_vobject(vpp, zp->z_phys->zp_size, td); 858* ZFS_EXIT(zfsvfs); 859 return (0); 860} 861 862static void 863zfs_objset_close(zfsvfs_t zfsvfs) 864{ 865* znode_t zp, nextzp; 866 objset_t os = zfsvfs->z_os; 867* 868 /* 869 * For forced unmount, at this point all vops except zfs_inactive 870 * are erroring EIO. We need to now suspend zfs_inactive threads 871 * while we are freeing dbufs before switching zfs_inactive 872 * to use behaviour without a objset. 873 / 874* rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 875 876 /* 877 * Release all holds on dbufs 878 * Note, although we have stopped all other vop threads and 879 * zfs_inactive(), the dmu can callback via znode_pageout_func() 880 * which can zfs_znode_free() the znode. 881 * So we lock z_all_znodes; search the list for a held 882 * dbuf; drop the lock (we know zp can't disappear if we hold 883 * a dbuf lock; then regrab the lock and restart. 884 / 885* mutex_enter(&zfsvfs->z_znodes_lock); 886 for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 887 nextzp = list_next(&zfsvfs->z_all_znodes, zp); 888 if (zp->z_dbuf_held) { 889 /* dbufs should only be held when force unmounting / 890* zp->z_dbuf_held = 0; 891 mutex_exit(&zfsvfs->z_znodes_lock); 892 dmu_buf_rele(zp->z_dbuf, NULL); 893 /* Start again / 894* mutex_enter(&zfsvfs->z_znodes_lock); 895 nextzp = list_head(&zfsvfs->z_all_znodes); 896 } 897 } 898 mutex_exit(&zfsvfs->z_znodes_lock); 899 900 /* 901 * Unregister properties. 902 / 903* if (!dmu_objset_is_snapshot(os)) 904 zfs_unregister_callbacks(zfsvfs); 905 906 /* 907 * Switch zfs_inactive to behaviour without an objset. 908 * It just tosses cached pages and frees the znode & vnode. 909 * Then re-enable zfs_inactive threads in that new behaviour. 910 / 911* zfsvfs->z_unmounted2 = B_TRUE; 912 rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads / 913* 914 /* 915 * Close the zil. Can't close the zil while zfs_inactive 916 * threads are blocked as zil_close can call zfs_inactive. 917 / 918* if (zfsvfs->z_log) { 919 zil_close(zfsvfs->z_log); 920 zfsvfs->z_log = NULL; 921 } 922 923 /* 924 * Evict all dbufs so that cached znodes will be freed 925 / 926* if (dmu_objset_evict_dbufs(os, 1)) { 927 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 928 (void) dmu_objset_evict_dbufs(os, 0); 929 } 930 931 /* 932 * Finally close the objset 933 / 934* dmu_objset_close(os); 935} 936 937static void 938zfs_freevfs(vfs_t vfsp) 939{ 940* zfsvfs_t zfsvfs = vfsp->vfs_data; 941* int i; 942 943 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 944 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 945 rw_destroy(&zfsvfs->z_um_lock); 946 mutex_destroy(&zfsvfs->z_znodes_lock); 947 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 948 949 atomic_add_32(&zfs_active_fs_count, -1); 950} 951 952void 953zfs_init(void) 954{ 955 956 printf("ZFS filesystem version " ZFS_VERSION_STRING "\n"); 957 958 /* 959 * Initialize .zfs directory structures 960 / 961* zfsctl_init(); 962 963 /* 964 * Initialize znode cache, vnode ops, etc... 965 / 966* zfs_znode_init(); 967} 968 969void 970zfs_fini(void) 971{ 972 zfsctl_fini(); 973 zfs_znode_fini(); 974} 975 976int 977zfs_busy(void) 978{ 979 return (zfs_active_fs_count != 0); 980}