zfs_vfsops.c revision 196965
11558Srgrimes/* 21558Srgrimes * CDDL HEADER START 31558Srgrimes * 41558Srgrimes * The contents of this file are subject to the terms of the 51558Srgrimes * Common Development and Distribution License (the "License"). 61558Srgrimes * You may not use this file except in compliance with the License. 71558Srgrimes * 81558Srgrimes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91558Srgrimes * or http://www.opensolaris.org/os/licensing. 101558Srgrimes * See the License for the specific language governing permissions 111558Srgrimes * and limitations under the License. 121558Srgrimes * 131558Srgrimes * When distributing Covered Code, include this CDDL HEADER in each 141558Srgrimes * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151558Srgrimes * If applicable, add the following below this CDDL HEADER, with the 161558Srgrimes * fields enclosed by brackets "[]" replaced with your own identifying 171558Srgrimes * information: Portions Copyright [yyyy] [name of copyright owner] 181558Srgrimes * 191558Srgrimes * CDDL HEADER END 201558Srgrimes */ 211558Srgrimes/* 221558Srgrimes * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 231558Srgrimes * Use is subject to license terms. 241558Srgrimes */ 251558Srgrimes 261558Srgrimes#include <sys/types.h> 271558Srgrimes#include <sys/param.h> 281558Srgrimes#include <sys/systm.h> 291558Srgrimes#include <sys/kernel.h> 301558Srgrimes#include <sys/sysmacros.h> 311558Srgrimes#include <sys/kmem.h> 3250476Speter#include <sys/acl.h> 331558Srgrimes#include <sys/vnode.h> 34232977Sed#include <sys/vfs.h> 351558Srgrimes#include <sys/mntent.h> 3679530Sru#include <sys/mount.h> 371558Srgrimes#include <sys/cmn_err.h> 381558Srgrimes#include <sys/zfs_znode.h> 391558Srgrimes#include <sys/zfs_dir.h> 401558Srgrimes#include <sys/zil.h> 4168960Sru#include <sys/fs/zfs.h> 4268960Sru#include <sys/dmu.h> 4347998Sru#include <sys/dsl_prop.h> 4447998Sru#include <sys/dsl_dataset.h> 4547998Sru#include <sys/dsl_deleg.h> 4647998Sru#include <sys/spa.h> 471558Srgrimes#include <sys/zap.h> 481558Srgrimes#include <sys/varargs.h> 4937417Scharnier#include <sys/policy.h> 5099501Scharnier#include <sys/atomic.h> 511558Srgrimes#include <sys/zfs_ioctl.h> 521558Srgrimes#include <sys/zfs_ctldir.h> 5347998Sru#include <sys/zfs_fuid.h> 541558Srgrimes#include <sys/sunddi.h> 551558Srgrimes#include <sys/dnlc.h> 5637417Scharnier#include <sys/dmu_objset.h> 5747998Sru#include <sys/spa_boot.h> 581558Srgrimes#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 591558Srgrimes 6037417Scharnierstruct mtx zfs_debug_mtx; 6199501ScharnierMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 621558Srgrimes 631558SrgrimesSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 6447998Sru 651558Srgrimesint zfs_super_owner = 0; 661558SrgrimesSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 671558Srgrimes "File system owner can perform privileged operation on his file systems"); 681558Srgrimes 691558Srgrimesint zfs_debug_level = 0; 7037417ScharnierTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 711558SrgrimesSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 721558Srgrimes "Debug level"); 731558Srgrimes 741558SrgrimesSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 751558Srgrimesstatic int zfs_version_acl = ZFS_ACL_VERSION; 7637417ScharnierSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 771558Srgrimes "ZFS_ACL_VERSION"); 781558Srgrimesstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 7947998SruSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 8047998Sru &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 811558Srgrimesstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 8237417ScharnierSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 8347998Sru &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 841558Srgrimesstatic int zfs_version_spa = SPA_VERSION; 8579754SddSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 8637417Scharnier "SPA_VERSION"); 8747998Srustatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 8847998SruSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 891558Srgrimes &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90149766Sgarysstatic int zfs_version_zpl = ZPL_VERSION; 91149766SgarysSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 92149766Sgarys "ZPL_VERSION"); 9337417Scharnier 9418882Sbdestatic int zfs_mount(vfs_t *vfsp); 9518882Sbdestatic int zfs_umount(vfs_t *vfsp, int fflag); 9637417Scharnierstatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 9765773Ssheldonhstatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 98101810Srustatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 9965803Ssheldonhstatic int zfs_sync(vfs_t *vfsp, int waitfor); 10065773Ssheldonhstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 10165773Ssheldonhstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 10265773Ssheldonhstatic void zfs_freevfs(vfs_t *vfsp); 1031558Srgrimes 10465773Ssheldonhstatic struct vfsops zfs_vfsops = { 10579754Sdd .vfs_mount = zfs_mount, 106122506Skensmith .vfs_unmount = zfs_umount, 10765773Ssheldonh .vfs_root = zfs_root, 1081558Srgrimes .vfs_statfs = zfs_statfs, 109122506Skensmith .vfs_vget = zfs_vget, 110122506Skensmith .vfs_sync = zfs_sync, 111129327Sru .vfs_fhtovp = zfs_fhtovp, 112122506Skensmith}; 113165730Srse 114122506SkensmithVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 115149766Sgarys 116122506Skensmith/* 117122506Skensmith * We need to keep a count of active fs's. 118122506Skensmith * This is necessary to prevent our module 119122506Skensmith * from being unloaded after a umount -f 120122506Skensmith */ 121122506Skensmithstatic uint32_t zfs_active_fs_count = 0; 12279754Sdd 12337417Scharnier/*ARGSUSED*/ 1241558Srgrimesstatic int 1251558Srgrimeszfs_sync(vfs_t *vfsp, int waitfor) 1261558Srgrimes{ 12799501Scharnier 12899501Scharnier /* 12999501Scharnier * Data integrity is job one. We don't want a compromised kernel 13054199Ssheldonh * writing to the storage pool, so we never sync during panic. 13154199Ssheldonh */ 13254199Ssheldonh if (panicstr) 13354199Ssheldonh return (0); 1341558Srgrimes 1351558Srgrimes if (vfsp != NULL) { 13615135Smpp /* 1371558Srgrimes * Sync a specific filesystem. 1381558Srgrimes */ 1391558Srgrimes zfsvfs_t *zfsvfs = vfsp->vfs_data; 14015135Smpp int error; 1411558Srgrimes 1421558Srgrimes error = vfs_stdsync(vfsp, waitfor); 14315135Smpp if (error != 0) 1441558Srgrimes return (error); 145131488Sru 146131488Sru ZFS_ENTER(zfsvfs); 1471558Srgrimes if (zfsvfs->z_log != NULL) 1481558Srgrimes zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 149231534Sed else 1501558Srgrimes txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 15115135Smpp ZFS_EXIT(zfsvfs); 1521558Srgrimes } else { 1531558Srgrimes /* 15499501Scharnier * Sync all ZFS filesystems. This is what happens when you 15599501Scharnier * run sync(1M). Unlike other filesystems, ZFS honors the 15699501Scharnier * request by waiting for all pools to commit all dirty data. 15754199Ssheldonh */ 15854199Ssheldonh spa_sync_allpools(); 15954199Ssheldonh } 16054199Ssheldonh 16154199Ssheldonh return (0); 16254199Ssheldonh} 16354199Ssheldonh 16454199Ssheldonhstatic void 16554199Ssheldonhatime_changed_cb(void *arg, uint64_t newval) 16654199Ssheldonh{ 1671558Srgrimes zfsvfs_t *zfsvfs = arg; 1681558Srgrimes 16915135Smpp if (newval == TRUE) { 1701558Srgrimes zfsvfs->z_atime = TRUE; 1711558Srgrimes zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 1721558Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 17337417Scharnier vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 1741558Srgrimes } else { 1751558Srgrimes zfsvfs->z_atime = FALSE; 1761558Srgrimes zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 17737417Scharnier vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 1781558Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 17915135Smpp } 1801558Srgrimes} 1811558Srgrimes 18215135Smppstatic void 18337417Scharnierxattr_changed_cb(void *arg, uint64_t newval) 1841558Srgrimes{ 1851558Srgrimes zfsvfs_t *zfsvfs = arg; 1861558Srgrimes 18715135Smpp if (newval == TRUE) { 1881558Srgrimes /* XXX locking on vfs_flag? */ 18937417Scharnier#ifdef TODO 19054199Ssheldonh zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 19154199Ssheldonh#endif 1921558Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 19379754Sdd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 19437417Scharnier } else { 1951558Srgrimes /* XXX locking on vfs_flag? */ 1961558Srgrimes#ifdef TODO 19715135Smpp zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 19837417Scharnier#endif 1991558Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2001558Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 20199501Scharnier } 20299501Scharnier} 20399501Scharnier 2041558Srgrimesstatic void 2051558Srgrimesblksz_changed_cb(void *arg, uint64_t newval) 2061558Srgrimes{ 2071558Srgrimes zfsvfs_t *zfsvfs = arg; 2081558Srgrimes 2091558Srgrimes if (newval < SPA_MINBLOCKSIZE || 21037417Scharnier newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 2111558Srgrimes newval = SPA_MAXBLOCKSIZE; 2121558Srgrimes 2131558Srgrimes zfsvfs->z_max_blksz = newval; 21499501Scharnier zfsvfs->z_vfs->vfs_bsize = newval; 21599501Scharnier} 21699501Scharnier 2171558Srgrimesstatic void 2181558Srgrimesreadonly_changed_cb(void *arg, uint64_t newval) 21981449Sru{ 2201558Srgrimes zfsvfs_t *zfsvfs = arg; 2211558Srgrimes 22247998Sru if (newval) { 2231558Srgrimes /* XXX locking on vfs_flag? */ 2241558Srgrimes zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 2251558Srgrimes vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 2261558Srgrimes vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 2271558Srgrimes } else { 22899501Scharnier /* XXX locking on vfs_flag? */ 22999501Scharnier zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 23099501Scharnier vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 2312323Snate vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 2322323Snate } 23381449Sru} 2342323Snate 2352323Snatestatic void 23621945Sadamsetuid_changed_cb(void *arg, uint64_t newval) 2372323Snate{ 23899501Scharnier zfsvfs_t *zfsvfs = arg; 23999501Scharnier 24099501Scharnier if (newval == FALSE) { 24147962Sru zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 24247962Sru vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 24347998Sru vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 24447962Sru } else { 24547962Sru zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 24647962Sru vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 24737055Sjkoshy vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 24837417Scharnier } 24927837Sdavidn} 25027837Sdavidn 25157668Ssheldonhstatic void 25257668Ssheldonhexec_changed_cb(void *arg, uint64_t newval) 25327837Sdavidn{ 25427837Sdavidn zfsvfs_t *zfsvfs = arg; 25527837Sdavidn 256150154Srse if (newval == FALSE) { 257150154Srse zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 258152568Sru vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 259152568Sru vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 260150154Srse } else { 261150154Srse zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 262150154Srse vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 26327837Sdavidn vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 2641558Srgrimes } 26537417Scharnier} 2661558Srgrimes 2671558Srgrimes/* 2681558Srgrimes * The nbmand mount option can be changed at mount time. 26937417Scharnier * We can't allow it to be toggled on live file systems or incorrect 2701558Srgrimes * behavior may be seen from cifs clients 27147998Sru * 27247998Sru * This property isn't registered via dsl_prop_register(), but this callback 27348395Sru * will be called when a file system is first mounted 27447998Sru */ 27547998Srustatic void 27647998Srunbmand_changed_cb(void *arg, uint64_t newval) 277131488Sru{ 27847998Sru zfsvfs_t *zfsvfs = arg; 27947998Sru if (newval == FALSE) { 28047998Sru vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 28147998Sru vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 28281251Sru } else { 28347998Sru vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 28447998Sru vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 28547998Sru } 286233510Sjoel} 28747998Sru 28847998Srustatic void 28947998Srusnapdir_changed_cb(void *arg, uint64_t newval) 29047998Sru{ 29147998Sru zfsvfs_t *zfsvfs = arg; 29247998Sru 29347998Sru zfsvfs->z_show_ctldir = newval; 29447998Sru} 2951558Srgrimes 296232977Sedstatic void 2971558Srgrimesvscan_changed_cb(void *arg, uint64_t newval) 29837417Scharnier{ 2991558Srgrimes zfsvfs_t *zfsvfs = arg; 30037417Scharnier 30137417Scharnier zfsvfs->z_vscan = newval; 3021558Srgrimes} 30337417Scharnier 3041558Srgrimesstatic void 30537417Scharnieracl_mode_changed_cb(void *arg, uint64_t newval) 30627837Sdavidn{ 30737417Scharnier zfsvfs_t *zfsvfs = arg; 308232977Sed 309232977Sed zfsvfs->z_acl_mode = newval; 310232977Sed} 311232977Sed 3121558Srgrimesstatic void 313140415Sruacl_inherit_changed_cb(void *arg, uint64_t newval) 314140415Sru{ 315140415Sru zfsvfs_t *zfsvfs = arg; 316140415Sru 317140415Sru zfsvfs->z_acl_inherit = newval; 318140415Sru} 319140415Sru 320140415Srustatic int 321140415Sruzfs_register_callbacks(vfs_t *vfsp) 322140415Sru{ 323140415Sru struct dsl_dataset *ds = NULL; 324140415Sru objset_t *os = NULL; 325140415Sru zfsvfs_t *zfsvfs = NULL; 326140415Sru uint64_t nbmand; 327140415Sru int readonly, do_readonly = FALSE; 328140415Sru int setuid, do_setuid = FALSE; 329140415Sru int exec, do_exec = FALSE; 3301558Srgrimes int xattr, do_xattr = FALSE; 33121635Swosch int atime, do_atime = FALSE; 3321558Srgrimes int error = 0; 3331558Srgrimes 3341558Srgrimes ASSERT(vfsp); 335152568Sru zfsvfs = vfsp->vfs_data; 3361558Srgrimes ASSERT(zfsvfs); 33721635Swosch os = zfsvfs->z_os; 338122506Skensmith 3391558Srgrimes /* 3401558Srgrimes * This function can be called for a snapshot when we update snapshot's 34137055Sjkoshy * mount point, which isn't really supported. 34237055Sjkoshy */ 3431558Srgrimes if (dmu_objset_is_snapshot(os)) 34447962Sru return (EOPNOTSUPP); 3451558Srgrimes 34699501Scharnier /* 3471558Srgrimes * The act of registering our callbacks will destroy any mount 34837055Sjkoshy * options we may have. In order to enable temporary overrides 3491558Srgrimes * of mount options, we stash away the current values and 350116035Scharnier * restore them after we register the callbacks. 3511558Srgrimes */ 35237055Sjkoshy if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 35379754Sdd readonly = B_TRUE; 35479754Sdd do_readonly = B_TRUE; 35579754Sdd } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 356131488Sru readonly = B_FALSE; 357131488Sru do_readonly = B_TRUE; 35879754Sdd } 35979754Sdd if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 36037055Sjkoshy setuid = B_FALSE; 361 do_setuid = B_TRUE; 362 } else { 363 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 364 setuid = B_FALSE; 365 do_setuid = B_TRUE; 366 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 367 setuid = B_TRUE; 368 do_setuid = B_TRUE; 369 } 370 } 371 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 372 exec = B_FALSE; 373 do_exec = B_TRUE; 374 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 375 exec = B_TRUE; 376 do_exec = B_TRUE; 377 } 378 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 379 xattr = B_FALSE; 380 do_xattr = B_TRUE; 381 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 382 xattr = B_TRUE; 383 do_xattr = B_TRUE; 384 } 385 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 386 atime = B_FALSE; 387 do_atime = B_TRUE; 388 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 389 atime = B_TRUE; 390 do_atime = B_TRUE; 391 } 392 393 /* 394 * nbmand is a special property. It can only be changed at 395 * mount time. 396 * 397 * This is weird, but it is documented to only be changeable 398 * at mount time. 399 */ 400 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 401 nbmand = B_FALSE; 402 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 403 nbmand = B_TRUE; 404 } else { 405 char osname[MAXNAMELEN]; 406 407 dmu_objset_name(os, osname); 408 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 409 NULL)) { 410 return (error); 411 } 412 } 413 414 /* 415 * Register property callbacks. 416 * 417 * It would probably be fine to just check for i/o error from 418 * the first prop_register(), but I guess I like to go 419 * overboard... 420 */ 421 ds = dmu_objset_ds(os); 422 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 423 error = error ? error : dsl_prop_register(ds, 424 "xattr", xattr_changed_cb, zfsvfs); 425 error = error ? error : dsl_prop_register(ds, 426 "recordsize", blksz_changed_cb, zfsvfs); 427 error = error ? error : dsl_prop_register(ds, 428 "readonly", readonly_changed_cb, zfsvfs); 429 error = error ? error : dsl_prop_register(ds, 430 "setuid", setuid_changed_cb, zfsvfs); 431 error = error ? error : dsl_prop_register(ds, 432 "exec", exec_changed_cb, zfsvfs); 433 error = error ? error : dsl_prop_register(ds, 434 "snapdir", snapdir_changed_cb, zfsvfs); 435 error = error ? error : dsl_prop_register(ds, 436 "aclmode", acl_mode_changed_cb, zfsvfs); 437 error = error ? error : dsl_prop_register(ds, 438 "aclinherit", acl_inherit_changed_cb, zfsvfs); 439 error = error ? error : dsl_prop_register(ds, 440 "vscan", vscan_changed_cb, zfsvfs); 441 if (error) 442 goto unregister; 443 444 /* 445 * Invoke our callbacks to restore temporary mount options. 446 */ 447 if (do_readonly) 448 readonly_changed_cb(zfsvfs, readonly); 449 if (do_setuid) 450 setuid_changed_cb(zfsvfs, setuid); 451 if (do_exec) 452 exec_changed_cb(zfsvfs, exec); 453 if (do_xattr) 454 xattr_changed_cb(zfsvfs, xattr); 455 if (do_atime) 456 atime_changed_cb(zfsvfs, atime); 457 458 nbmand_changed_cb(zfsvfs, nbmand); 459 460 return (0); 461 462unregister: 463 /* 464 * We may attempt to unregister some callbacks that are not 465 * registered, but this is OK; it will simply return ENOMSG, 466 * which we will ignore. 467 */ 468 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 469 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 470 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 471 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 472 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 473 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 474 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 475 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 476 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 477 zfsvfs); 478 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 479 return (error); 480 481} 482 483static int 484zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 485{ 486 int error; 487 488 error = zfs_register_callbacks(zfsvfs->z_vfs); 489 if (error) 490 return (error); 491 492 /* 493 * Set the objset user_ptr to track its zfsvfs. 494 */ 495 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 496 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 497 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 498 499 /* 500 * If we are not mounting (ie: online recv), then we don't 501 * have to worry about replaying the log as we blocked all 502 * operations out since we closed the ZIL. 503 */ 504 if (mounting) { 505 boolean_t readonly; 506 507 /* 508 * During replay we remove the read only flag to 509 * allow replays to succeed. 510 */ 511 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 512 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 513 514 /* 515 * Parse and replay the intent log. 516 */ 517 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 518 zfs_replay_vector, zfs_unlinked_drain); 519 520 zfs_unlinked_drain(zfsvfs); 521 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 522 } 523 524 if (!zil_disable) 525 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 526 527 return (0); 528} 529 530static void 531zfs_freezfsvfs(zfsvfs_t *zfsvfs) 532{ 533 mutex_destroy(&zfsvfs->z_znodes_lock); 534 mutex_destroy(&zfsvfs->z_online_recv_lock); 535 list_destroy(&zfsvfs->z_all_znodes); 536 rrw_destroy(&zfsvfs->z_teardown_lock); 537 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 538 rw_destroy(&zfsvfs->z_fuid_lock); 539 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 540} 541 542static int 543zfs_domount(vfs_t *vfsp, char *osname) 544{ 545 uint64_t recordsize, readonly; 546 int error = 0; 547 int mode; 548 zfsvfs_t *zfsvfs; 549 znode_t *zp = NULL; 550 551 ASSERT(vfsp); 552 ASSERT(osname); 553 554 /* 555 * Initialize the zfs-specific filesystem structure. 556 * Should probably make this a kmem cache, shuffle fields, 557 * and just bzero up to z_hold_mtx[]. 558 */ 559 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 560 zfsvfs->z_vfs = vfsp; 561 zfsvfs->z_parent = zfsvfs; 562 zfsvfs->z_assign = TXG_NOWAIT; 563 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 564 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 565 566 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 567 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 568 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 569 offsetof(znode_t, z_link_node)); 570 rrw_init(&zfsvfs->z_teardown_lock); 571 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 572 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 573 574 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 575 NULL)) 576 goto out; 577 zfsvfs->z_vfs->vfs_bsize = recordsize; 578 579 vfsp->vfs_data = zfsvfs; 580 vfsp->mnt_flag |= MNT_LOCAL; 581 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 582 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 583 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 584 585 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 586 goto out; 587 588 mode = DS_MODE_OWNER; 589 if (readonly) 590 mode |= DS_MODE_READONLY; 591 592 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 593 if (error == EROFS) { 594 mode = DS_MODE_OWNER | DS_MODE_READONLY; 595 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 596 &zfsvfs->z_os); 597 } 598 599 if (error) 600 goto out; 601 602 if (error = zfs_init_fs(zfsvfs, &zp)) 603 goto out; 604 605 /* 606 * Set features for file system. 607 */ 608 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 609 if (zfsvfs->z_use_fuids) { 610 vfs_set_feature(vfsp, VFSFT_XVATTR); 611 vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 612 vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 613 vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 614 } 615 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 616 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 617 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 618 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 619 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 620 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 621 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 622 } 623 624 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 625 uint64_t pval; 626 627 ASSERT(mode & DS_MODE_READONLY); 628 atime_changed_cb(zfsvfs, B_FALSE); 629 readonly_changed_cb(zfsvfs, B_TRUE); 630 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 631 goto out; 632 xattr_changed_cb(zfsvfs, pval); 633 zfsvfs->z_issnap = B_TRUE; 634 } else { 635 error = zfsvfs_setup(zfsvfs, B_TRUE); 636 } 637 638 vfs_mountedfrom(vfsp, osname); 639 640 if (!zfsvfs->z_issnap) 641 zfsctl_create(zfsvfs); 642out: 643 if (error) { 644 if (zfsvfs->z_os) 645 dmu_objset_close(zfsvfs->z_os); 646 zfs_freezfsvfs(zfsvfs); 647 } else { 648 atomic_add_32(&zfs_active_fs_count, 1); 649 } 650 651 return (error); 652} 653 654void 655zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 656{ 657 objset_t *os = zfsvfs->z_os; 658 struct dsl_dataset *ds; 659 660 /* 661 * Unregister properties. 662 */ 663 if (!dmu_objset_is_snapshot(os)) { 664 ds = dmu_objset_ds(os); 665 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 666 zfsvfs) == 0); 667 668 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 669 zfsvfs) == 0); 670 671 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 672 zfsvfs) == 0); 673 674 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 675 zfsvfs) == 0); 676 677 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 678 zfsvfs) == 0); 679 680 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 681 zfsvfs) == 0); 682 683 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 684 zfsvfs) == 0); 685 686 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 687 zfsvfs) == 0); 688 689 VERIFY(dsl_prop_unregister(ds, "aclinherit", 690 acl_inherit_changed_cb, zfsvfs) == 0); 691 692 VERIFY(dsl_prop_unregister(ds, "vscan", 693 vscan_changed_cb, zfsvfs) == 0); 694 } 695} 696 697/*ARGSUSED*/ 698static int 699zfs_mount(vfs_t *vfsp) 700{ 701 kthread_t *td = curthread; 702 vnode_t *mvp = vfsp->mnt_vnodecovered; 703 cred_t *cr = td->td_ucred; 704 char *osname; 705 int error = 0; 706 int canwrite; 707 708 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 709 return (EINVAL); 710 711 /* 712 * If full-owner-access is enabled and delegated administration is 713 * turned on, we must set nosuid. 714 */ 715 if (zfs_super_owner && 716 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 717 secpolicy_fs_mount_clearopts(cr, vfsp); 718 } 719 720 /* 721 * Check for mount privilege? 722 * 723 * If we don't have privilege then see if 724 * we have local permission to allow it 725 */ 726 error = secpolicy_fs_mount(cr, mvp, vfsp); 727 if (error) { 728 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 729 if (error != 0) 730 goto out; 731 732 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 733 vattr_t vattr; 734 735 /* 736 * Make sure user is the owner of the mount point 737 * or has sufficient privileges. 738 */ 739 740 vattr.va_mask = AT_UID; 741 742 vn_lock(mvp, LK_SHARED | LK_RETRY); 743 if (error = VOP_GETATTR(mvp, &vattr, cr)) { 744 VOP_UNLOCK(mvp, 0); 745 goto out; 746 } 747 748#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 749 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 750 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 751 error = EPERM; 752 goto out; 753 } 754#else 755 if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 756 VOP_UNLOCK(mvp, 0); 757 goto out; 758 } 759 760 if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 761 VOP_UNLOCK(mvp, 0); 762 goto out; 763 } 764 VOP_UNLOCK(mvp, 0); 765#endif 766 } 767 768 secpolicy_fs_mount_clearopts(cr, vfsp); 769 } 770 771 /* 772 * Refuse to mount a filesystem if we are in a local zone and the 773 * dataset is not visible. 774 */ 775 if (!INGLOBALZONE(curthread) && 776 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 777 error = EPERM; 778 goto out; 779 } 780 781 /* 782 * When doing a remount, we simply refresh our temporary properties 783 * according to those options set in the current VFS options. 784 */ 785 if (vfsp->vfs_flag & MS_REMOUNT) { 786 /* refresh mount options */ 787 zfs_unregister_callbacks(vfsp->vfs_data); 788 error = zfs_register_callbacks(vfsp); 789 goto out; 790 } 791 792 DROP_GIANT(); 793 error = zfs_domount(vfsp, osname); 794 PICKUP_GIANT(); 795out: 796 return (error); 797} 798 799static int 800zfs_statfs(vfs_t *vfsp, struct statfs *statp) 801{ 802 zfsvfs_t *zfsvfs = vfsp->vfs_data; 803 uint64_t refdbytes, availbytes, usedobjs, availobjs; 804 805 statp->f_version = STATFS_VERSION; 806 807 ZFS_ENTER(zfsvfs); 808 809 dmu_objset_space(zfsvfs->z_os, 810 &refdbytes, &availbytes, &usedobjs, &availobjs); 811 812 /* 813 * The underlying storage pool actually uses multiple block sizes. 814 * We report the fragsize as the smallest block size we support, 815 * and we report our blocksize as the filesystem's maximum blocksize. 816 */ 817 statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 818 statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 819 820 /* 821 * The following report "total" blocks of various kinds in the 822 * file system, but reported in terms of f_frsize - the 823 * "fragment" size. 824 */ 825 826 statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 827 statp->f_bfree = availbytes / statp->f_bsize; 828 statp->f_bavail = statp->f_bfree; /* no root reservation */ 829 830 /* 831 * statvfs() should really be called statufs(), because it assumes 832 * static metadata. ZFS doesn't preallocate files, so the best 833 * we can do is report the max that could possibly fit in f_files, 834 * and that minus the number actually used in f_ffree. 835 * For f_ffree, report the smaller of the number of object available 836 * and the number of blocks (each object will take at least a block). 837 */ 838 statp->f_ffree = MIN(availobjs, statp->f_bfree); 839 statp->f_files = statp->f_ffree + usedobjs; 840 841 /* 842 * We're a zfs filesystem. 843 */ 844 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 845 846 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 847 sizeof(statp->f_mntfromname)); 848 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 849 sizeof(statp->f_mntonname)); 850 851 statp->f_namemax = ZFS_MAXNAMELEN; 852 853 ZFS_EXIT(zfsvfs); 854 return (0); 855} 856 857static int 858zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 859{ 860 zfsvfs_t *zfsvfs = vfsp->vfs_data; 861 znode_t *rootzp; 862 int error; 863 864 ZFS_ENTER(zfsvfs); 865 866 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 867 if (error == 0) { 868 *vpp = ZTOV(rootzp); 869 error = vn_lock(*vpp, flags); 870 (*vpp)->v_vflag |= VV_ROOT; 871 } 872 873 ZFS_EXIT(zfsvfs); 874 return (error); 875} 876 877/* 878 * Teardown the zfsvfs::z_os. 879 * 880 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 881 * and 'z_teardown_inactive_lock' held. 882 */ 883static int 884zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 885{ 886 znode_t *zp; 887 888 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 889 890 if (!unmounting) { 891 /* 892 * We purge the parent filesystem's vfsp as the parent 893 * filesystem and all of its snapshots have their vnode's 894 * v_vfsp set to the parent's filesystem's vfsp. Note, 895 * 'z_parent' is self referential for non-snapshots. 896 */ 897 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 898 } 899 900 /* 901 * Close the zil. NB: Can't close the zil while zfs_inactive 902 * threads are blocked as zil_close can call zfs_inactive. 903 */ 904 if (zfsvfs->z_log) { 905 zil_close(zfsvfs->z_log); 906 zfsvfs->z_log = NULL; 907 } 908 909 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 910 911 /* 912 * If we are not unmounting (ie: online recv) and someone already 913 * unmounted this file system while we were doing the switcheroo, 914 * or a reopen of z_os failed then just bail out now. 915 */ 916 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 917 rw_exit(&zfsvfs->z_teardown_inactive_lock); 918 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 919 return (EIO); 920 } 921 922 /* 923 * At this point there are no vops active, and any new vops will 924 * fail with EIO since we have z_teardown_lock for writer (only 925 * relavent for forced unmount). 926 * 927 * Release all holds on dbufs. 928 */ 929 mutex_enter(&zfsvfs->z_znodes_lock); 930 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 931 zp = list_next(&zfsvfs->z_all_znodes, zp)) 932 if (zp->z_dbuf) { 933 ASSERT(ZTOV(zp)->v_count >= 0); 934 zfs_znode_dmu_fini(zp); 935 } 936 mutex_exit(&zfsvfs->z_znodes_lock); 937 938 /* 939 * If we are unmounting, set the unmounted flag and let new vops 940 * unblock. zfs_inactive will have the unmounted behavior, and all 941 * other vops will fail with EIO. 942 */ 943 if (unmounting) { 944 zfsvfs->z_unmounted = B_TRUE; 945 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 946 rw_exit(&zfsvfs->z_teardown_inactive_lock); 947 } 948 949 /* 950 * z_os will be NULL if there was an error in attempting to reopen 951 * zfsvfs, so just return as the properties had already been 952 * unregistered and cached data had been evicted before. 953 */ 954 if (zfsvfs->z_os == NULL) 955 return (0); 956 957 /* 958 * Unregister properties. 959 */ 960 zfs_unregister_callbacks(zfsvfs); 961 962 /* 963 * Evict cached data 964 */ 965 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 966 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 967 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 968 } 969 970 return (0); 971} 972 973/*ARGSUSED*/ 974static int 975zfs_umount(vfs_t *vfsp, int fflag) 976{ 977 zfsvfs_t *zfsvfs = vfsp->vfs_data; 978 objset_t *os; 979 cred_t *cr = curthread->td_ucred; 980 int ret; 981 982 if (fflag & MS_FORCE) { 983 /* TODO: Force unmount is not well implemented yet, so deny it. */ 984 ZFS_LOG(0, "Force unmount is experimental - report any problems."); 985 } 986 987 ret = secpolicy_fs_unmount(cr, vfsp); 988 if (ret) { 989 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 990 ZFS_DELEG_PERM_MOUNT, cr); 991 if (ret) 992 return (ret); 993 } 994 /* 995 * We purge the parent filesystem's vfsp as the parent filesystem 996 * and all of its snapshots have their vnode's v_vfsp set to the 997 * parent's filesystem's vfsp. Note, 'z_parent' is self 998 * referential for non-snapshots. 999 */ 1000 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1001 1002 /* 1003 * Unmount any snapshots mounted under .zfs before unmounting the 1004 * dataset itself. 1005 */ 1006 if (zfsvfs->z_ctldir != NULL) { 1007 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1008 return (ret); 1009 ret = vflush(vfsp, 0, 0, curthread); 1010 ASSERT(ret == EBUSY); 1011 if (!(fflag & MS_FORCE)) { 1012 if (zfsvfs->z_ctldir->v_count > 1) 1013 return (EBUSY); 1014 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1015 } 1016 zfsctl_destroy(zfsvfs); 1017 ASSERT(zfsvfs->z_ctldir == NULL); 1018 } 1019 1020 /* 1021 * Flush all the files. 1022 */ 1023 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1024 if (ret != 0) { 1025 if (!zfsvfs->z_issnap) { 1026 zfsctl_create(zfsvfs); 1027 ASSERT(zfsvfs->z_ctldir != NULL); 1028 } 1029 return (ret); 1030 } 1031 1032 if (!(fflag & MS_FORCE)) { 1033 /* 1034 * Check the number of active vnodes in the file system. 1035 * Our count is maintained in the vfs structure, but the 1036 * number is off by 1 to indicate a hold on the vfs 1037 * structure itself. 1038 * 1039 * The '.zfs' directory maintains a reference of its 1040 * own, and any active references underneath are 1041 * reflected in the vnode count. 1042 */ 1043 if (zfsvfs->z_ctldir == NULL) { 1044 if (vfsp->vfs_count > 1) 1045 return (EBUSY); 1046 } else { 1047 if (vfsp->vfs_count > 2 || 1048 zfsvfs->z_ctldir->v_count > 1) 1049 return (EBUSY); 1050 } 1051 } else { 1052 MNT_ILOCK(vfsp); 1053 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1054 MNT_IUNLOCK(vfsp); 1055 } 1056 1057 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1058 os = zfsvfs->z_os; 1059 1060 /* 1061 * z_os will be NULL if there was an error in 1062 * attempting to reopen zfsvfs. 1063 */ 1064 if (os != NULL) { 1065 /* 1066 * Unset the objset user_ptr. 1067 */ 1068 mutex_enter(&os->os->os_user_ptr_lock); 1069 dmu_objset_set_user(os, NULL); 1070 mutex_exit(&os->os->os_user_ptr_lock); 1071 1072 /* 1073 * Finally release the objset 1074 */ 1075 dmu_objset_close(os); 1076 } 1077 1078 /* 1079 * We can now safely destroy the '.zfs' directory node. 1080 */ 1081 if (zfsvfs->z_ctldir != NULL) 1082 zfsctl_destroy(zfsvfs); 1083 if (zfsvfs->z_issnap) { 1084 vnode_t *svp = vfsp->mnt_vnodecovered; 1085 1086 ASSERT(svp->v_count == 2 || svp->v_count == 1); 1087 if (svp->v_count == 2) 1088 VN_RELE(svp); 1089 } 1090 zfs_freevfs(vfsp); 1091 1092 return (0); 1093} 1094 1095static int 1096zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1097{ 1098 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1099 znode_t *zp; 1100 int err; 1101 1102 ZFS_ENTER(zfsvfs); 1103 err = zfs_zget(zfsvfs, ino, &zp); 1104 if (err == 0 && zp->z_unlinked) { 1105 VN_RELE(ZTOV(zp)); 1106 err = EINVAL; 1107 } 1108 if (err != 0) 1109 *vpp = NULL; 1110 else { 1111 *vpp = ZTOV(zp); 1112 vn_lock(*vpp, flags); 1113 } 1114 ZFS_EXIT(zfsvfs); 1115 return (err); 1116} 1117 1118static int 1119zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1120{ 1121 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1122 znode_t *zp; 1123 uint64_t object = 0; 1124 uint64_t fid_gen = 0; 1125 uint64_t gen_mask; 1126 uint64_t zp_gen; 1127 int i, err; 1128 1129 *vpp = NULL; 1130 1131 ZFS_ENTER(zfsvfs); 1132 1133 if (fidp->fid_len == LONG_FID_LEN) { 1134 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1135 uint64_t objsetid = 0; 1136 uint64_t setgen = 0; 1137 1138 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1139 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1140 1141 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1142 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1143 1144 ZFS_EXIT(zfsvfs); 1145 1146 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1147 if (err) 1148 return (EINVAL); 1149 ZFS_ENTER(zfsvfs); 1150 } 1151 1152 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1153 zfid_short_t *zfid = (zfid_short_t *)fidp; 1154 1155 for (i = 0; i < sizeof (zfid->zf_object); i++) 1156 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1157 1158 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1159 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1160 } else { 1161 ZFS_EXIT(zfsvfs); 1162 return (EINVAL); 1163 } 1164 1165 /* A zero fid_gen means we are in the .zfs control directories */ 1166 if (fid_gen == 0 && 1167 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1168 *vpp = zfsvfs->z_ctldir; 1169 ASSERT(*vpp != NULL); 1170 if (object == ZFSCTL_INO_SNAPDIR) { 1171 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1172 0, NULL, NULL, NULL, NULL, NULL) == 0); 1173 } else { 1174 VN_HOLD(*vpp); 1175 } 1176 ZFS_EXIT(zfsvfs); 1177 /* XXX: LK_RETRY? */ 1178 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1179 return (0); 1180 } 1181 1182 gen_mask = -1ULL >> (64 - 8 * i); 1183 1184 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1185 if (err = zfs_zget(zfsvfs, object, &zp)) { 1186 ZFS_EXIT(zfsvfs); 1187 return (err); 1188 } 1189 zp_gen = zp->z_phys->zp_gen & gen_mask; 1190 if (zp_gen == 0) 1191 zp_gen = 1; 1192 if (zp->z_unlinked || zp_gen != fid_gen) { 1193 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1194 VN_RELE(ZTOV(zp)); 1195 ZFS_EXIT(zfsvfs); 1196 return (EINVAL); 1197 } 1198 1199 *vpp = ZTOV(zp); 1200 /* XXX: LK_RETRY? */ 1201 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1202 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1203 ZFS_EXIT(zfsvfs); 1204 return (0); 1205} 1206 1207/* 1208 * Block out VOPs and close zfsvfs_t::z_os 1209 * 1210 * Note, if successful, then we return with the 'z_teardown_lock' and 1211 * 'z_teardown_inactive_lock' write held. 1212 */ 1213int 1214zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1215{ 1216 int error; 1217 1218 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1219 return (error); 1220 1221 *mode = zfsvfs->z_os->os_mode; 1222 dmu_objset_name(zfsvfs->z_os, name); 1223 dmu_objset_close(zfsvfs->z_os); 1224 1225 return (0); 1226} 1227 1228/* 1229 * Reopen zfsvfs_t::z_os and release VOPs. 1230 */ 1231int 1232zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1233{ 1234 int err; 1235 1236 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1237 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1238 1239 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1240 if (err) { 1241 zfsvfs->z_os = NULL; 1242 } else { 1243 znode_t *zp; 1244 1245 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1246 1247 /* 1248 * Attempt to re-establish all the active znodes with 1249 * their dbufs. If a zfs_rezget() fails, then we'll let 1250 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1251 * when they try to use their znode. 1252 */ 1253 mutex_enter(&zfsvfs->z_znodes_lock); 1254 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1255 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1256 (void) zfs_rezget(zp); 1257 } 1258 mutex_exit(&zfsvfs->z_znodes_lock); 1259 1260 } 1261 1262 /* release the VOPs */ 1263 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1264 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1265 1266 if (err) { 1267 /* 1268 * Since we couldn't reopen zfsvfs::z_os, force 1269 * unmount this file system. 1270 */ 1271 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1272 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1273 } 1274 return (err); 1275} 1276 1277static void 1278zfs_freevfs(vfs_t *vfsp) 1279{ 1280 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1281 int i; 1282 1283 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1284 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1285 1286 zfs_fuid_destroy(zfsvfs); 1287 zfs_freezfsvfs(zfsvfs); 1288 1289 atomic_add_32(&zfs_active_fs_count, -1); 1290} 1291 1292#ifdef __i386__ 1293static int desiredvnodes_backup; 1294#endif 1295 1296static void 1297zfs_vnodes_adjust(void) 1298{ 1299#ifdef __i386__ 1300 int newdesiredvnodes; 1301 1302 desiredvnodes_backup = desiredvnodes; 1303 1304 /* 1305 * We calculate newdesiredvnodes the same way it is done in 1306 * vntblinit(). If it is equal to desiredvnodes, it means that 1307 * it wasn't tuned by the administrator and we can tune it down. 1308 */ 1309 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1310 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1311 sizeof(struct vnode)))); 1312 if (newdesiredvnodes == desiredvnodes) 1313 desiredvnodes = (3 * newdesiredvnodes) / 4; 1314#endif 1315} 1316 1317static void 1318zfs_vnodes_adjust_back(void) 1319{ 1320 1321#ifdef __i386__ 1322 desiredvnodes = desiredvnodes_backup; 1323#endif 1324} 1325 1326void 1327zfs_init(void) 1328{ 1329 1330 printf("ZFS filesystem version " SPA_VERSION_STRING "\n"); 1331 1332 /* 1333 * Initialize znode cache, vnode ops, etc... 1334 */ 1335 zfs_znode_init(); 1336 1337 /* 1338 * Initialize .zfs directory structures 1339 */ 1340 zfsctl_init(); 1341 1342 /* 1343 * Reduce number of vnode. Originally number of vnodes is calculated 1344 * with UFS inode in mind. We reduce it here, because it's too big for 1345 * ZFS/i386. 1346 */ 1347 zfs_vnodes_adjust(); 1348} 1349 1350void 1351zfs_fini(void) 1352{ 1353 zfsctl_fini(); 1354 zfs_znode_fini(); 1355 zfs_vnodes_adjust_back(); 1356} 1357 1358int 1359zfs_busy(void) 1360{ 1361 return (zfs_active_fs_count != 0); 1362} 1363 1364int 1365zfs_set_version(const char *name, uint64_t newvers) 1366{ 1367 int error; 1368 objset_t *os; 1369 dmu_tx_t *tx; 1370 uint64_t curvers; 1371 1372 /* 1373 * XXX for now, require that the filesystem be unmounted. Would 1374 * be nice to find the zfsvfs_t and just update that if 1375 * possible. 1376 */ 1377 1378 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1379 return (EINVAL); 1380 1381 error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1382 if (error) 1383 return (error); 1384 1385 error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1386 8, 1, &curvers); 1387 if (error) 1388 goto out; 1389 if (newvers < curvers) { 1390 error = EINVAL; 1391 goto out; 1392 } 1393 1394 tx = dmu_tx_create(os); 1395 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1396 error = dmu_tx_assign(tx, TXG_WAIT); 1397 if (error) { 1398 dmu_tx_abort(tx); 1399 goto out; 1400 } 1401 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1402 &newvers, tx); 1403 1404 spa_history_internal_log(LOG_DS_UPGRADE, 1405 dmu_objset_spa(os), tx, CRED(), 1406 "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1407 dmu_objset_id(os)); 1408 dmu_tx_commit(tx); 1409 1410out: 1411 dmu_objset_close(os); 1412 return (error); 1413} 1414/* 1415 * Read a property stored within the master node. 1416 */ 1417int 1418zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1419{ 1420 const char *pname; 1421 int error = ENOENT; 1422 1423 /* 1424 * Look up the file system's value for the property. For the 1425 * version property, we look up a slightly different string. 1426 */ 1427 if (prop == ZFS_PROP_VERSION) 1428 pname = ZPL_VERSION_STR; 1429 else 1430 pname = zfs_prop_to_name(prop); 1431 1432 if (os != NULL) 1433 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1434 1435 if (error == ENOENT) { 1436 /* No value set, use the default value */ 1437 switch (prop) { 1438 case ZFS_PROP_VERSION: 1439 *value = ZPL_VERSION; 1440 break; 1441 case ZFS_PROP_NORMALIZE: 1442 case ZFS_PROP_UTF8ONLY: 1443 *value = 0; 1444 break; 1445 case ZFS_PROP_CASE: 1446 *value = ZFS_CASE_SENSITIVE; 1447 break; 1448 default: 1449 return (error); 1450 } 1451 error = 0; 1452 } 1453 return (error); 1454} 1455