zfs_vfsops.c revision 196944
1158115Sume/* 2158115Sume * CDDL HEADER START 3158115Sume * 4158115Sume * The contents of this file are subject to the terms of the 5158115Sume * Common Development and Distribution License (the "License"). 6158115Sume * You may not use this file except in compliance with the License. 7158115Sume * 8158115Sume * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9158115Sume * or http://www.opensolaris.org/os/licensing. 10158115Sume * See the License for the specific language governing permissions 11158115Sume * and limitations under the License. 12158115Sume * 13158115Sume * When distributing Covered Code, include this CDDL HEADER in each 14158115Sume * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15158115Sume * If applicable, add the following below this CDDL HEADER, with the 16158115Sume * fields enclosed by brackets "[]" replaced with your own identifying 17158115Sume * information: Portions Copyright [yyyy] [name of copyright owner] 18158115Sume * 19158115Sume * CDDL HEADER END 20158115Sume */ 21158115Sume/* 22158115Sume * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23158115Sume * Use is subject to license terms. 24158115Sume */ 25158115Sume 26158115Sume#include <sys/types.h> 27158115Sume#include <sys/param.h> 28158115Sume#include <sys/systm.h> 29158115Sume#include <sys/kernel.h> 30158115Sume#include <sys/sysmacros.h> 31194112Sdes#include <sys/kmem.h> 32194112Sdes#include <sys/acl.h> 33#include <sys/vnode.h> 34#include <sys/vfs.h> 35#include <sys/mntent.h> 36#include <sys/mount.h> 37#include <sys/cmn_err.h> 38#include <sys/zfs_znode.h> 39#include <sys/zfs_dir.h> 40#include <sys/zil.h> 41#include <sys/fs/zfs.h> 42#include <sys/dmu.h> 43#include <sys/dsl_prop.h> 44#include <sys/dsl_dataset.h> 45#include <sys/dsl_deleg.h> 46#include <sys/spa.h> 47#include <sys/zap.h> 48#include <sys/varargs.h> 49#include <sys/policy.h> 50#include <sys/atomic.h> 51#include <sys/zfs_ioctl.h> 52#include <sys/zfs_ctldir.h> 53#include <sys/zfs_fuid.h> 54#include <sys/sunddi.h> 55#include <sys/dnlc.h> 56#include <sys/dmu_objset.h> 57#include <sys/spa_boot.h> 58#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 59 60struct mtx zfs_debug_mtx; 61MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62 63SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 64 65int zfs_super_owner = 0; 66SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 67 "File system owner can perform privileged operation on his file systems"); 68 69int zfs_debug_level = 0; 70TUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 71SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 72 "Debug level"); 73 74SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 75static int zfs_version_acl = ZFS_ACL_VERSION; 76SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 77 "ZFS_ACL_VERSION"); 78static int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 79SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 80 &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 81static int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 82SYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 83 &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 84static int zfs_version_spa = SPA_VERSION; 85SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 86 "SPA_VERSION"); 87static int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 88SYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 89 &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90static int zfs_version_zpl = ZPL_VERSION; 91SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 92 "ZPL_VERSION"); 93 94static int zfs_mount(vfs_t *vfsp); 95static int zfs_umount(vfs_t *vfsp, int fflag); 96static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 97static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 98static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 99static int zfs_sync(vfs_t *vfsp, int waitfor); 100static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 101static void zfs_objset_close(zfsvfs_t *zfsvfs); 102static void zfs_freevfs(vfs_t *vfsp); 103 104static struct vfsops zfs_vfsops = { 105 .vfs_mount = zfs_mount, 106 .vfs_unmount = zfs_umount, 107 .vfs_root = zfs_root, 108 .vfs_statfs = zfs_statfs, 109 .vfs_vget = zfs_vget, 110 .vfs_sync = zfs_sync, 111 .vfs_fhtovp = zfs_fhtovp, 112}; 113 114VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 115 116/* 117 * We need to keep a count of active fs's. 118 * This is necessary to prevent our module 119 * from being unloaded after a umount -f 120 */ 121static uint32_t zfs_active_fs_count = 0; 122 123/*ARGSUSED*/ 124static int 125zfs_sync(vfs_t *vfsp, int waitfor) 126{ 127 128 /* 129 * Data integrity is job one. We don't want a compromised kernel 130 * writing to the storage pool, so we never sync during panic. 131 */ 132 if (panicstr) 133 return (0); 134 135 if (vfsp != NULL) { 136 /* 137 * Sync a specific filesystem. 138 */ 139 zfsvfs_t *zfsvfs = vfsp->vfs_data; 140 int error; 141 142 error = vfs_stdsync(vfsp, waitfor); 143 if (error != 0) 144 return (error); 145 146 ZFS_ENTER(zfsvfs); 147 if (zfsvfs->z_log != NULL) 148 zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 149 else 150 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 151 ZFS_EXIT(zfsvfs); 152 } else { 153 /* 154 * Sync all ZFS filesystems. This is what happens when you 155 * run sync(1M). Unlike other filesystems, ZFS honors the 156 * request by waiting for all pools to commit all dirty data. 157 */ 158 spa_sync_allpools(); 159 } 160 161 return (0); 162} 163 164static void 165atime_changed_cb(void *arg, uint64_t newval) 166{ 167 zfsvfs_t *zfsvfs = arg; 168 169 if (newval == TRUE) { 170 zfsvfs->z_atime = TRUE; 171 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 172 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 173 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 174 } else { 175 zfsvfs->z_atime = FALSE; 176 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 177 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 178 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 179 } 180} 181 182static void 183xattr_changed_cb(void *arg, uint64_t newval) 184{ 185 zfsvfs_t *zfsvfs = arg; 186 187 if (newval == TRUE) { 188 /* XXX locking on vfs_flag? */ 189#ifdef TODO 190 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 191#endif 192 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 193 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 194 } else { 195 /* XXX locking on vfs_flag? */ 196#ifdef TODO 197 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 198#endif 199 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 200 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 201 } 202} 203 204static void 205blksz_changed_cb(void *arg, uint64_t newval) 206{ 207 zfsvfs_t *zfsvfs = arg; 208 209 if (newval < SPA_MINBLOCKSIZE || 210 newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 211 newval = SPA_MAXBLOCKSIZE; 212 213 zfsvfs->z_max_blksz = newval; 214 zfsvfs->z_vfs->vfs_bsize = newval; 215} 216 217static void 218readonly_changed_cb(void *arg, uint64_t newval) 219{ 220 zfsvfs_t *zfsvfs = arg; 221 222 if (newval) { 223 /* XXX locking on vfs_flag? */ 224 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 225 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 226 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 227 } else { 228 /* XXX locking on vfs_flag? */ 229 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 230 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 231 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 232 } 233} 234 235static void 236setuid_changed_cb(void *arg, uint64_t newval) 237{ 238 zfsvfs_t *zfsvfs = arg; 239 240 if (newval == FALSE) { 241 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 242 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 243 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 244 } else { 245 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 246 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 247 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 248 } 249} 250 251static void 252exec_changed_cb(void *arg, uint64_t newval) 253{ 254 zfsvfs_t *zfsvfs = arg; 255 256 if (newval == FALSE) { 257 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 258 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 259 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 260 } else { 261 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 262 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 263 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 264 } 265} 266 267/* 268 * The nbmand mount option can be changed at mount time. 269 * We can't allow it to be toggled on live file systems or incorrect 270 * behavior may be seen from cifs clients 271 * 272 * This property isn't registered via dsl_prop_register(), but this callback 273 * will be called when a file system is first mounted 274 */ 275static void 276nbmand_changed_cb(void *arg, uint64_t newval) 277{ 278 zfsvfs_t *zfsvfs = arg; 279 if (newval == FALSE) { 280 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 281 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 282 } else { 283 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 284 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 285 } 286} 287 288static void 289snapdir_changed_cb(void *arg, uint64_t newval) 290{ 291 zfsvfs_t *zfsvfs = arg; 292 293 zfsvfs->z_show_ctldir = newval; 294} 295 296static void 297vscan_changed_cb(void *arg, uint64_t newval) 298{ 299 zfsvfs_t *zfsvfs = arg; 300 301 zfsvfs->z_vscan = newval; 302} 303 304static void 305acl_mode_changed_cb(void *arg, uint64_t newval) 306{ 307 zfsvfs_t *zfsvfs = arg; 308 309 zfsvfs->z_acl_mode = newval; 310} 311 312static void 313acl_inherit_changed_cb(void *arg, uint64_t newval) 314{ 315 zfsvfs_t *zfsvfs = arg; 316 317 zfsvfs->z_acl_inherit = newval; 318} 319 320static int 321zfs_register_callbacks(vfs_t *vfsp) 322{ 323 struct dsl_dataset *ds = NULL; 324 objset_t *os = NULL; 325 zfsvfs_t *zfsvfs = NULL; 326 uint64_t nbmand; 327 int readonly, do_readonly = FALSE; 328 int setuid, do_setuid = FALSE; 329 int exec, do_exec = FALSE; 330 int xattr, do_xattr = FALSE; 331 int atime, do_atime = FALSE; 332 int error = 0; 333 334 ASSERT(vfsp); 335 zfsvfs = vfsp->vfs_data; 336 ASSERT(zfsvfs); 337 os = zfsvfs->z_os; 338 339 /* 340 * The act of registering our callbacks will destroy any mount 341 * options we may have. In order to enable temporary overrides 342 * of mount options, we stash away the current values and 343 * restore them after we register the callbacks. 344 */ 345 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 346 readonly = B_TRUE; 347 do_readonly = B_TRUE; 348 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 349 readonly = B_FALSE; 350 do_readonly = B_TRUE; 351 } 352 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 353 setuid = B_FALSE; 354 do_setuid = B_TRUE; 355 } else { 356 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 357 setuid = B_FALSE; 358 do_setuid = B_TRUE; 359 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 360 setuid = B_TRUE; 361 do_setuid = B_TRUE; 362 } 363 } 364 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 365 exec = B_FALSE; 366 do_exec = B_TRUE; 367 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 368 exec = B_TRUE; 369 do_exec = B_TRUE; 370 } 371 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 372 xattr = B_FALSE; 373 do_xattr = B_TRUE; 374 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 375 xattr = B_TRUE; 376 do_xattr = B_TRUE; 377 } 378 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 379 atime = B_FALSE; 380 do_atime = B_TRUE; 381 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 382 atime = B_TRUE; 383 do_atime = B_TRUE; 384 } 385 386 /* 387 * nbmand is a special property. It can only be changed at 388 * mount time. 389 * 390 * This is weird, but it is documented to only be changeable 391 * at mount time. 392 */ 393 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 394 nbmand = B_FALSE; 395 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 396 nbmand = B_TRUE; 397 } else { 398 char osname[MAXNAMELEN]; 399 400 dmu_objset_name(os, osname); 401 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 402 NULL)) { 403 return (error); 404 } 405 } 406 407 /* 408 * Register property callbacks. 409 * 410 * It would probably be fine to just check for i/o error from 411 * the first prop_register(), but I guess I like to go 412 * overboard... 413 */ 414 ds = dmu_objset_ds(os); 415 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 416 error = error ? error : dsl_prop_register(ds, 417 "xattr", xattr_changed_cb, zfsvfs); 418 error = error ? error : dsl_prop_register(ds, 419 "recordsize", blksz_changed_cb, zfsvfs); 420 error = error ? error : dsl_prop_register(ds, 421 "readonly", readonly_changed_cb, zfsvfs); 422 error = error ? error : dsl_prop_register(ds, 423 "setuid", setuid_changed_cb, zfsvfs); 424 error = error ? error : dsl_prop_register(ds, 425 "exec", exec_changed_cb, zfsvfs); 426 error = error ? error : dsl_prop_register(ds, 427 "snapdir", snapdir_changed_cb, zfsvfs); 428 error = error ? error : dsl_prop_register(ds, 429 "aclmode", acl_mode_changed_cb, zfsvfs); 430 error = error ? error : dsl_prop_register(ds, 431 "aclinherit", acl_inherit_changed_cb, zfsvfs); 432 error = error ? error : dsl_prop_register(ds, 433 "vscan", vscan_changed_cb, zfsvfs); 434 if (error) 435 goto unregister; 436 437 /* 438 * Invoke our callbacks to restore temporary mount options. 439 */ 440 if (do_readonly) 441 readonly_changed_cb(zfsvfs, readonly); 442 if (do_setuid) 443 setuid_changed_cb(zfsvfs, setuid); 444 if (do_exec) 445 exec_changed_cb(zfsvfs, exec); 446 if (do_xattr) 447 xattr_changed_cb(zfsvfs, xattr); 448 if (do_atime) 449 atime_changed_cb(zfsvfs, atime); 450 451 nbmand_changed_cb(zfsvfs, nbmand); 452 453 return (0); 454 455unregister: 456 /* 457 * We may attempt to unregister some callbacks that are not 458 * registered, but this is OK; it will simply return ENOMSG, 459 * which we will ignore. 460 */ 461 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 462 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 463 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 464 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 465 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 466 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 467 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 468 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 469 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 470 zfsvfs); 471 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 472 return (error); 473 474} 475 476static int 477zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 478{ 479 int error; 480 481 error = zfs_register_callbacks(zfsvfs->z_vfs); 482 if (error) 483 return (error); 484 485 /* 486 * Set the objset user_ptr to track its zfsvfs. 487 */ 488 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 489 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 490 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 491 492 /* 493 * If we are not mounting (ie: online recv), then we don't 494 * have to worry about replaying the log as we blocked all 495 * operations out since we closed the ZIL. 496 */ 497 if (mounting) { 498 boolean_t readonly; 499 500 /* 501 * During replay we remove the read only flag to 502 * allow replays to succeed. 503 */ 504 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 505 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 506 507 /* 508 * Parse and replay the intent log. 509 */ 510 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 511 zfs_replay_vector, zfs_unlinked_drain); 512 513 zfs_unlinked_drain(zfsvfs); 514 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 515 } 516 517 if (!zil_disable) 518 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 519 520 return (0); 521} 522 523static void 524zfs_freezfsvfs(zfsvfs_t *zfsvfs) 525{ 526 mutex_destroy(&zfsvfs->z_znodes_lock); 527 mutex_destroy(&zfsvfs->z_online_recv_lock); 528 list_destroy(&zfsvfs->z_all_znodes); 529 rrw_destroy(&zfsvfs->z_teardown_lock); 530 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 531 rw_destroy(&zfsvfs->z_fuid_lock); 532 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 533} 534 535static int 536zfs_domount(vfs_t *vfsp, char *osname) 537{ 538 uint64_t recordsize, readonly; 539 int error = 0; 540 int mode; 541 zfsvfs_t *zfsvfs; 542 znode_t *zp = NULL; 543 544 ASSERT(vfsp); 545 ASSERT(osname); 546 547 /* 548 * Initialize the zfs-specific filesystem structure. 549 * Should probably make this a kmem cache, shuffle fields, 550 * and just bzero up to z_hold_mtx[]. 551 */ 552 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 553 zfsvfs->z_vfs = vfsp; 554 zfsvfs->z_parent = zfsvfs; 555 zfsvfs->z_assign = TXG_NOWAIT; 556 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 557 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 558 559 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 560 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 561 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 562 offsetof(znode_t, z_link_node)); 563 rrw_init(&zfsvfs->z_teardown_lock); 564 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 565 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 566 567 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 568 NULL)) 569 goto out; 570 zfsvfs->z_vfs->vfs_bsize = recordsize; 571 572 vfsp->vfs_data = zfsvfs; 573 vfsp->mnt_flag |= MNT_LOCAL; 574 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 575 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 576 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 577 578 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 579 goto out; 580 581 mode = DS_MODE_OWNER; 582 if (readonly) 583 mode |= DS_MODE_READONLY; 584 585 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 586 if (error == EROFS) { 587 mode = DS_MODE_OWNER | DS_MODE_READONLY; 588 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 589 &zfsvfs->z_os); 590 } 591 592 if (error) 593 goto out; 594 595 if (error = zfs_init_fs(zfsvfs, &zp)) 596 goto out; 597 598 /* 599 * Set features for file system. 600 */ 601 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 602 if (zfsvfs->z_use_fuids) { 603 vfs_set_feature(vfsp, VFSFT_XVATTR); 604 vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 605 vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 606 vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 607 } 608 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 609 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 610 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 611 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 612 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 613 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 614 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 615 } 616 617 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 618 uint64_t pval; 619 620 ASSERT(mode & DS_MODE_READONLY); 621 atime_changed_cb(zfsvfs, B_FALSE); 622 readonly_changed_cb(zfsvfs, B_TRUE); 623 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 624 goto out; 625 xattr_changed_cb(zfsvfs, pval); 626 zfsvfs->z_issnap = B_TRUE; 627 } else { 628 error = zfsvfs_setup(zfsvfs, B_TRUE); 629 } 630 631 vfs_mountedfrom(vfsp, osname); 632 633 if (!zfsvfs->z_issnap) 634 zfsctl_create(zfsvfs); 635out: 636 if (error) { 637 if (zfsvfs->z_os) 638 dmu_objset_close(zfsvfs->z_os); 639 zfs_freezfsvfs(zfsvfs); 640 } else { 641 atomic_add_32(&zfs_active_fs_count, 1); 642 } 643 644 return (error); 645} 646 647void 648zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 649{ 650 objset_t *os = zfsvfs->z_os; 651 struct dsl_dataset *ds; 652 653 /* 654 * Unregister properties. 655 */ 656 if (!dmu_objset_is_snapshot(os)) { 657 ds = dmu_objset_ds(os); 658 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 659 zfsvfs) == 0); 660 661 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 662 zfsvfs) == 0); 663 664 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 665 zfsvfs) == 0); 666 667 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 668 zfsvfs) == 0); 669 670 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 671 zfsvfs) == 0); 672 673 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 674 zfsvfs) == 0); 675 676 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 677 zfsvfs) == 0); 678 679 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 680 zfsvfs) == 0); 681 682 VERIFY(dsl_prop_unregister(ds, "aclinherit", 683 acl_inherit_changed_cb, zfsvfs) == 0); 684 685 VERIFY(dsl_prop_unregister(ds, "vscan", 686 vscan_changed_cb, zfsvfs) == 0); 687 } 688} 689 690/*ARGSUSED*/ 691static int 692zfs_mount(vfs_t *vfsp) 693{ 694 kthread_t *td = curthread; 695 vnode_t *mvp = vfsp->mnt_vnodecovered; 696 cred_t *cr = td->td_ucred; 697 char *osname; 698 int error = 0; 699 int canwrite; 700 701 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 702 return (EINVAL); 703 704 /* 705 * If full-owner-access is enabled and delegated administration is 706 * turned on, we must set nosuid. 707 */ 708 if (zfs_super_owner && 709 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 710 secpolicy_fs_mount_clearopts(cr, vfsp); 711 } 712 713 /* 714 * Check for mount privilege? 715 * 716 * If we don't have privilege then see if 717 * we have local permission to allow it 718 */ 719 error = secpolicy_fs_mount(cr, mvp, vfsp); 720 if (error) { 721 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 722 if (error != 0) 723 goto out; 724 725 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 726 vattr_t vattr; 727 728 /* 729 * Make sure user is the owner of the mount point 730 * or has sufficient privileges. 731 */ 732 733 vattr.va_mask = AT_UID; 734 735 vn_lock(mvp, LK_SHARED | LK_RETRY); 736 if (error = VOP_GETATTR(mvp, &vattr, cr)) { 737 VOP_UNLOCK(mvp, 0); 738 goto out; 739 } 740 741#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 742 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 743 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 744 error = EPERM; 745 goto out; 746 } 747#else 748 if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 749 VOP_UNLOCK(mvp, 0); 750 goto out; 751 } 752 753 if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 754 VOP_UNLOCK(mvp, 0); 755 goto out; 756 } 757 VOP_UNLOCK(mvp, 0); 758#endif 759 } 760 761 secpolicy_fs_mount_clearopts(cr, vfsp); 762 } 763 764 /* 765 * Refuse to mount a filesystem if we are in a local zone and the 766 * dataset is not visible. 767 */ 768 if (!INGLOBALZONE(curthread) && 769 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 770 error = EPERM; 771 goto out; 772 } 773 774 /* 775 * When doing a remount, we simply refresh our temporary properties 776 * according to those options set in the current VFS options. 777 */ 778 if (vfsp->vfs_flag & MS_REMOUNT) { 779 /* refresh mount options */ 780 zfs_unregister_callbacks(vfsp->vfs_data); 781 error = zfs_register_callbacks(vfsp); 782 goto out; 783 } 784 785 DROP_GIANT(); 786 error = zfs_domount(vfsp, osname); 787 PICKUP_GIANT(); 788out: 789 return (error); 790} 791 792static int 793zfs_statfs(vfs_t *vfsp, struct statfs *statp) 794{ 795 zfsvfs_t *zfsvfs = vfsp->vfs_data; 796 uint64_t refdbytes, availbytes, usedobjs, availobjs; 797 798 statp->f_version = STATFS_VERSION; 799 800 ZFS_ENTER(zfsvfs); 801 802 dmu_objset_space(zfsvfs->z_os, 803 &refdbytes, &availbytes, &usedobjs, &availobjs); 804 805 /* 806 * The underlying storage pool actually uses multiple block sizes. 807 * We report the fragsize as the smallest block size we support, 808 * and we report our blocksize as the filesystem's maximum blocksize. 809 */ 810 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 811 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 812 813 /* 814 * The following report "total" blocks of various kinds in the 815 * file system, but reported in terms of f_frsize - the 816 * "fragment" size. 817 */ 818 819 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 820 statp->f_bfree = availbytes / statp->f_bsize; 821 statp->f_bavail = statp->f_bfree; /* no root reservation */ 822 823 /* 824 * statvfs() should really be called statufs(), because it assumes 825 * static metadata. ZFS doesn't preallocate files, so the best 826 * we can do is report the max that could possibly fit in f_files, 827 * and that minus the number actually used in f_ffree. 828 * For f_ffree, report the smaller of the number of object available 829 * and the number of blocks (each object will take at least a block). 830 */ 831 statp->f_ffree = MIN(availobjs, statp->f_bfree); 832 statp->f_files = statp->f_ffree + usedobjs; 833 834 /* 835 * We're a zfs filesystem. 836 */ 837 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 838 839 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 840 sizeof(statp->f_mntfromname)); 841 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 842 sizeof(statp->f_mntonname)); 843 844 statp->f_namemax = ZFS_MAXNAMELEN; 845 846 ZFS_EXIT(zfsvfs); 847 return (0); 848} 849 850static int 851zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 852{ 853 zfsvfs_t *zfsvfs = vfsp->vfs_data; 854 znode_t *rootzp; 855 int error; 856 857 ZFS_ENTER(zfsvfs); 858 859 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 860 if (error == 0) { 861 *vpp = ZTOV(rootzp); 862 error = vn_lock(*vpp, flags); 863 (*vpp)->v_vflag |= VV_ROOT; 864 } 865 866 ZFS_EXIT(zfsvfs); 867 return (error); 868} 869 870/* 871 * Teardown the zfsvfs::z_os. 872 * 873 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 874 * and 'z_teardown_inactive_lock' held. 875 */ 876static int 877zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 878{ 879 znode_t *zp; 880 881 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 882 883 if (!unmounting) { 884 /* 885 * We purge the parent filesystem's vfsp as the parent 886 * filesystem and all of its snapshots have their vnode's 887 * v_vfsp set to the parent's filesystem's vfsp. Note, 888 * 'z_parent' is self referential for non-snapshots. 889 */ 890 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 891 } 892 893 /* 894 * Close the zil. NB: Can't close the zil while zfs_inactive 895 * threads are blocked as zil_close can call zfs_inactive. 896 */ 897 if (zfsvfs->z_log) { 898 zil_close(zfsvfs->z_log); 899 zfsvfs->z_log = NULL; 900 } 901 902 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 903 904 /* 905 * If we are not unmounting (ie: online recv) and someone already 906 * unmounted this file system while we were doing the switcheroo, 907 * or a reopen of z_os failed then just bail out now. 908 */ 909 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 910 rw_exit(&zfsvfs->z_teardown_inactive_lock); 911 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 912 return (EIO); 913 } 914 915 /* 916 * At this point there are no vops active, and any new vops will 917 * fail with EIO since we have z_teardown_lock for writer (only 918 * relavent for forced unmount). 919 * 920 * Release all holds on dbufs. 921 */ 922 mutex_enter(&zfsvfs->z_znodes_lock); 923 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 924 zp = list_next(&zfsvfs->z_all_znodes, zp)) 925 if (zp->z_dbuf) { 926 ASSERT(ZTOV(zp)->v_count >= 0); 927 zfs_znode_dmu_fini(zp); 928 } 929 mutex_exit(&zfsvfs->z_znodes_lock); 930 931 /* 932 * If we are unmounting, set the unmounted flag and let new vops 933 * unblock. zfs_inactive will have the unmounted behavior, and all 934 * other vops will fail with EIO. 935 */ 936 if (unmounting) { 937 zfsvfs->z_unmounted = B_TRUE; 938 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 939 rw_exit(&zfsvfs->z_teardown_inactive_lock); 940 } 941 942 /* 943 * z_os will be NULL if there was an error in attempting to reopen 944 * zfsvfs, so just return as the properties had already been 945 * unregistered and cached data had been evicted before. 946 */ 947 if (zfsvfs->z_os == NULL) 948 return (0); 949 950 /* 951 * Unregister properties. 952 */ 953 zfs_unregister_callbacks(zfsvfs); 954 955 /* 956 * Evict cached data 957 */ 958 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 959 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 960 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 961 } 962 963 return (0); 964} 965 966/*ARGSUSED*/ 967static int 968zfs_umount(vfs_t *vfsp, int fflag) 969{ 970 zfsvfs_t *zfsvfs = vfsp->vfs_data; 971 objset_t *os; 972 cred_t *cr = curthread->td_ucred; 973 int ret; 974 975 if (fflag & MS_FORCE) { 976 /* TODO: Force unmount is not well implemented yet, so deny it. */ 977 ZFS_LOG(0, "Force unmount is experimental - report any problems."); 978 } 979 980 ret = secpolicy_fs_unmount(cr, vfsp); 981 if (ret) { 982 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 983 ZFS_DELEG_PERM_MOUNT, cr); 984 if (ret) 985 return (ret); 986 } 987 /* 988 * We purge the parent filesystem's vfsp as the parent filesystem 989 * and all of its snapshots have their vnode's v_vfsp set to the 990 * parent's filesystem's vfsp. Note, 'z_parent' is self 991 * referential for non-snapshots. 992 */ 993 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 994 995 /* 996 * Unmount any snapshots mounted under .zfs before unmounting the 997 * dataset itself. 998 */ 999 if (zfsvfs->z_ctldir != NULL) { 1000 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1001 return (ret); 1002 ret = vflush(vfsp, 0, 0, curthread); 1003 ASSERT(ret == EBUSY); 1004 if (!(fflag & MS_FORCE)) { 1005 if (zfsvfs->z_ctldir->v_count > 1) 1006 return (EBUSY); 1007 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1008 } 1009 zfsctl_destroy(zfsvfs); 1010 ASSERT(zfsvfs->z_ctldir == NULL); 1011 } 1012 1013 /* 1014 * Flush all the files. 1015 */ 1016 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1017 if (ret != 0) { 1018 if (!zfsvfs->z_issnap) { 1019 zfsctl_create(zfsvfs); 1020 ASSERT(zfsvfs->z_ctldir != NULL); 1021 } 1022 return (ret); 1023 } 1024 1025 if (!(fflag & MS_FORCE)) { 1026 /* 1027 * Check the number of active vnodes in the file system. 1028 * Our count is maintained in the vfs structure, but the 1029 * number is off by 1 to indicate a hold on the vfs 1030 * structure itself. 1031 * 1032 * The '.zfs' directory maintains a reference of its 1033 * own, and any active references underneath are 1034 * reflected in the vnode count. 1035 */ 1036 if (zfsvfs->z_ctldir == NULL) { 1037 if (vfsp->vfs_count > 1) 1038 return (EBUSY); 1039 } else { 1040 if (vfsp->vfs_count > 2 || 1041 zfsvfs->z_ctldir->v_count > 1) 1042 return (EBUSY); 1043 } 1044 } else { 1045 MNT_ILOCK(vfsp); 1046 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1047 MNT_IUNLOCK(vfsp); 1048 } 1049 1050 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1051 os = zfsvfs->z_os; 1052 1053 /* 1054 * z_os will be NULL if there was an error in 1055 * attempting to reopen zfsvfs. 1056 */ 1057 if (os != NULL) { 1058 /* 1059 * Unset the objset user_ptr. 1060 */ 1061 mutex_enter(&os->os->os_user_ptr_lock); 1062 dmu_objset_set_user(os, NULL); 1063 mutex_exit(&os->os->os_user_ptr_lock); 1064 1065 /* 1066 * Finally release the objset 1067 */ 1068 dmu_objset_close(os); 1069 } 1070 1071 /* 1072 * We can now safely destroy the '.zfs' directory node. 1073 */ 1074 if (zfsvfs->z_ctldir != NULL) 1075 zfsctl_destroy(zfsvfs); 1076 if (zfsvfs->z_issnap) { 1077 vnode_t *svp = vfsp->mnt_vnodecovered; 1078 1079 ASSERT(svp->v_count == 2 || svp->v_count == 1); 1080 if (svp->v_count == 2) 1081 VN_RELE(svp); 1082 } 1083 zfs_freevfs(vfsp); 1084 1085 return (0); 1086} 1087 1088static int 1089zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1090{ 1091 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1092 znode_t *zp; 1093 int err; 1094 1095 ZFS_ENTER(zfsvfs); 1096 err = zfs_zget(zfsvfs, ino, &zp); 1097 if (err == 0 && zp->z_unlinked) { 1098 VN_RELE(ZTOV(zp)); 1099 err = EINVAL; 1100 } 1101 if (err != 0) 1102 *vpp = NULL; 1103 else { 1104 *vpp = ZTOV(zp); 1105 vn_lock(*vpp, flags); 1106 } 1107 ZFS_EXIT(zfsvfs); 1108 return (err); 1109} 1110 1111static int 1112zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1113{ 1114 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1115 znode_t *zp; 1116 uint64_t object = 0; 1117 uint64_t fid_gen = 0; 1118 uint64_t gen_mask; 1119 uint64_t zp_gen; 1120 int i, err; 1121 1122 *vpp = NULL; 1123 1124 ZFS_ENTER(zfsvfs); 1125 1126 if (fidp->fid_len == LONG_FID_LEN) { 1127 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1128 uint64_t objsetid = 0; 1129 uint64_t setgen = 0; 1130 1131 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1132 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1133 1134 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1135 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1136 1137 ZFS_EXIT(zfsvfs); 1138 1139 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1140 if (err) 1141 return (EINVAL); 1142 ZFS_ENTER(zfsvfs); 1143 } 1144 1145 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1146 zfid_short_t *zfid = (zfid_short_t *)fidp; 1147 1148 for (i = 0; i < sizeof (zfid->zf_object); i++) 1149 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1150 1151 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1152 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1153 } else { 1154 ZFS_EXIT(zfsvfs); 1155 return (EINVAL); 1156 } 1157 1158 /* A zero fid_gen means we are in the .zfs control directories */ 1159 if (fid_gen == 0 && 1160 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1161 *vpp = zfsvfs->z_ctldir; 1162 ASSERT(*vpp != NULL); 1163 if (object == ZFSCTL_INO_SNAPDIR) { 1164 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1165 0, NULL, NULL, NULL, NULL, NULL) == 0); 1166 } else { 1167 VN_HOLD(*vpp); 1168 } 1169 ZFS_EXIT(zfsvfs); 1170 /* XXX: LK_RETRY? */ 1171 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1172 return (0); 1173 } 1174 1175 gen_mask = -1ULL >> (64 - 8 * i); 1176 1177 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1178 if (err = zfs_zget(zfsvfs, object, &zp)) { 1179 ZFS_EXIT(zfsvfs); 1180 return (err); 1181 } 1182 zp_gen = zp->z_phys->zp_gen & gen_mask; 1183 if (zp_gen == 0) 1184 zp_gen = 1; 1185 if (zp->z_unlinked || zp_gen != fid_gen) { 1186 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1187 VN_RELE(ZTOV(zp)); 1188 ZFS_EXIT(zfsvfs); 1189 return (EINVAL); 1190 } 1191 1192 *vpp = ZTOV(zp); 1193 /* XXX: LK_RETRY? */ 1194 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1195 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1196 ZFS_EXIT(zfsvfs); 1197 return (0); 1198} 1199 1200/* 1201 * Block out VOPs and close zfsvfs_t::z_os 1202 * 1203 * Note, if successful, then we return with the 'z_teardown_lock' and 1204 * 'z_teardown_inactive_lock' write held. 1205 */ 1206int 1207zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1208{ 1209 int error; 1210 1211 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1212 return (error); 1213 1214 *mode = zfsvfs->z_os->os_mode; 1215 dmu_objset_name(zfsvfs->z_os, name); 1216 dmu_objset_close(zfsvfs->z_os); 1217 1218 return (0); 1219} 1220 1221/* 1222 * Reopen zfsvfs_t::z_os and release VOPs. 1223 */ 1224int 1225zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1226{ 1227 int err; 1228 1229 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1230 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1231 1232 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1233 if (err) { 1234 zfsvfs->z_os = NULL; 1235 } else { 1236 znode_t *zp; 1237 1238 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1239 1240 /* 1241 * Attempt to re-establish all the active znodes with 1242 * their dbufs. If a zfs_rezget() fails, then we'll let 1243 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1244 * when they try to use their znode. 1245 */ 1246 mutex_enter(&zfsvfs->z_znodes_lock); 1247 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1248 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1249 (void) zfs_rezget(zp); 1250 } 1251 mutex_exit(&zfsvfs->z_znodes_lock); 1252 1253 } 1254 1255 /* release the VOPs */ 1256 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1257 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1258 1259 if (err) { 1260 /* 1261 * Since we couldn't reopen zfsvfs::z_os, force 1262 * unmount this file system. 1263 */ 1264 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1265 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1266 } 1267 return (err); 1268} 1269 1270static void 1271zfs_freevfs(vfs_t *vfsp) 1272{ 1273 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1274 int i; 1275 1276 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1277 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1278 1279 zfs_fuid_destroy(zfsvfs); 1280 zfs_freezfsvfs(zfsvfs); 1281 1282 atomic_add_32(&zfs_active_fs_count, -1); 1283} 1284 1285#ifdef __i386__ 1286static int desiredvnodes_backup; 1287#endif 1288 1289static void 1290zfs_vnodes_adjust(void) 1291{ 1292#ifdef __i386__ 1293 int newdesiredvnodes; 1294 1295 desiredvnodes_backup = desiredvnodes; 1296 1297 /* 1298 * We calculate newdesiredvnodes the same way it is done in 1299 * vntblinit(). If it is equal to desiredvnodes, it means that 1300 * it wasn't tuned by the administrator and we can tune it down. 1301 */ 1302 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1303 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1304 sizeof(struct vnode)))); 1305 if (newdesiredvnodes == desiredvnodes) 1306 desiredvnodes = (3 * newdesiredvnodes) / 4; 1307#endif 1308} 1309 1310static void 1311zfs_vnodes_adjust_back(void) 1312{ 1313 1314#ifdef __i386__ 1315 desiredvnodes = desiredvnodes_backup; 1316#endif 1317} 1318 1319void 1320zfs_init(void) 1321{ 1322 1323 printf("ZFS filesystem version " SPA_VERSION_STRING "\n"); 1324 1325 /* 1326 * Initialize znode cache, vnode ops, etc... 1327 */ 1328 zfs_znode_init(); 1329 1330 /* 1331 * Initialize .zfs directory structures 1332 */ 1333 zfsctl_init(); 1334 1335 /* 1336 * Reduce number of vnode. Originally number of vnodes is calculated 1337 * with UFS inode in mind. We reduce it here, because it's too big for 1338 * ZFS/i386. 1339 */ 1340 zfs_vnodes_adjust(); 1341} 1342 1343void 1344zfs_fini(void) 1345{ 1346 zfsctl_fini(); 1347 zfs_znode_fini(); 1348 zfs_vnodes_adjust_back(); 1349} 1350 1351int 1352zfs_busy(void) 1353{ 1354 return (zfs_active_fs_count != 0); 1355} 1356 1357int 1358zfs_set_version(const char *name, uint64_t newvers) 1359{ 1360 int error; 1361 objset_t *os; 1362 dmu_tx_t *tx; 1363 uint64_t curvers; 1364 1365 /* 1366 * XXX for now, require that the filesystem be unmounted. Would 1367 * be nice to find the zfsvfs_t and just update that if 1368 * possible. 1369 */ 1370 1371 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1372 return (EINVAL); 1373 1374 error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1375 if (error) 1376 return (error); 1377 1378 error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1379 8, 1, &curvers); 1380 if (error) 1381 goto out; 1382 if (newvers < curvers) { 1383 error = EINVAL; 1384 goto out; 1385 } 1386 1387 tx = dmu_tx_create(os); 1388 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1389 error = dmu_tx_assign(tx, TXG_WAIT); 1390 if (error) { 1391 dmu_tx_abort(tx); 1392 goto out; 1393 } 1394 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1395 &newvers, tx); 1396 1397 spa_history_internal_log(LOG_DS_UPGRADE, 1398 dmu_objset_spa(os), tx, CRED(), 1399 "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1400 dmu_objset_id(os)); 1401 dmu_tx_commit(tx); 1402 1403out: 1404 dmu_objset_close(os); 1405 return (error); 1406} 1407/* 1408 * Read a property stored within the master node. 1409 */ 1410int 1411zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1412{ 1413 const char *pname; 1414 int error = ENOENT; 1415 1416 /* 1417 * Look up the file system's value for the property. For the 1418 * version property, we look up a slightly different string. 1419 */ 1420 if (prop == ZFS_PROP_VERSION) 1421 pname = ZPL_VERSION_STR; 1422 else 1423 pname = zfs_prop_to_name(prop); 1424 1425 if (os != NULL) 1426 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1427 1428 if (error == ENOENT) { 1429 /* No value set, use the default value */ 1430 switch (prop) { 1431 case ZFS_PROP_VERSION: 1432 *value = ZPL_VERSION; 1433 break; 1434 case ZFS_PROP_NORMALIZE: 1435 case ZFS_PROP_UTF8ONLY: 1436 *value = 0; 1437 break; 1438 case ZFS_PROP_CASE: 1439 *value = ZFS_CASE_SENSITIVE; 1440 break; 1441 default: 1442 return (error); 1443 } 1444 error = 0; 1445 } 1446 return (error); 1447} 1448