zfs_vfsops.c revision 206667
151852Sbp/* 266539Sbp * CDDL HEADER START 351852Sbp * 451852Sbp * The contents of this file are subject to the terms of the 551852Sbp * Common Development and Distribution License (the "License"). 651852Sbp * You may not use this file except in compliance with the License. 751852Sbp * 851852Sbp * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 951852Sbp * or http://www.opensolaris.org/os/licensing. 1051852Sbp * See the License for the specific language governing permissions 1151852Sbp * and limitations under the License. 1251852Sbp * 1351852Sbp * When distributing Covered Code, include this CDDL HEADER in each 1451852Sbp * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1551852Sbp * If applicable, add the following below this CDDL HEADER, with the 1651852Sbp * fields enclosed by brackets "[]" replaced with your own identifying 1751852Sbp * information: Portions Copyright [yyyy] [name of copyright owner] 1851852Sbp * 1951852Sbp * CDDL HEADER END 2051852Sbp */ 2151852Sbp/* 2251852Sbp * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 2351852Sbp * Use is subject to license terms. 2451852Sbp */ 2551852Sbp 2651852Sbp#include <sys/types.h> 2751852Sbp#include <sys/param.h> 2851852Sbp#include <sys/systm.h> 2951852Sbp#include <sys/kernel.h> 3051852Sbp#include <sys/sysmacros.h> 3151852Sbp#include <sys/kmem.h> 3251852Sbp#include <sys/acl.h> 3351852Sbp#include <sys/vnode.h> 3451852Sbp#include <sys/vfs.h> 3551852Sbp#include <sys/mntent.h> 3651852Sbp#include <sys/mount.h> 3751852Sbp#include <sys/cmn_err.h> 3851852Sbp#include <sys/zfs_znode.h> 3951852Sbp#include <sys/zfs_dir.h> 4051852Sbp#include <sys/zil.h> 4151852Sbp#include <sys/fs/zfs.h> 4251852Sbp#include <sys/dmu.h> 4351852Sbp#include <sys/dsl_prop.h> 4451852Sbp#include <sys/dsl_dataset.h> 4551852Sbp#include <sys/dsl_deleg.h> 4651852Sbp#include <sys/spa.h> 4751852Sbp#include <sys/zap.h> 4860041Sphk#include <sys/varargs.h> 4951852Sbp#include <sys/policy.h> 5051852Sbp#include <sys/atomic.h> 5151852Sbp#include <sys/zfs_ioctl.h> 5251852Sbp#include <sys/zfs_ctldir.h> 5351852Sbp#include <sys/zfs_fuid.h> 5451852Sbp#include <sys/sunddi.h> 5551852Sbp#include <sys/dnlc.h> 5651852Sbp#include <sys/dmu_objset.h> 5777223Sru#include <sys/spa_boot.h> 5877223Sru#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 5977223Sru 6051852Sbpstruct mtx zfs_debug_mtx; 6151852SbpMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 6251852Sbp 6351852SbpSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 6451852Sbp 6551852Sbpint zfs_super_owner = 0; 6696755StrhodesSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 6751852Sbp "File system owner can perform privileged operation on his file systems"); 6851852Sbp 6951852Sbpint zfs_debug_level = 0; 7059755SpeterTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 7174637SbpSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 7259755Speter "Debug level"); 73116271Sphk 74116271SphkSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 75116271Sphkstatic int zfs_version_acl = ZFS_ACL_VERSION; 76116271SphkSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 77116271Sphk "ZFS_ACL_VERSION"); 78116271Sphkstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 79116271SphkSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 80116271Sphk &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 8151852Sbpstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 8251852SbpSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 83116271Sphk &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 84116271Sphkstatic int zfs_version_spa = SPA_VERSION; 85116271SphkSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 86116271Sphk "SPA_VERSION"); 87116271Sphkstatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 88116271SphkSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 89116271Sphk &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90116271Sphkstatic int zfs_version_zpl = ZPL_VERSION; 91116271SphkSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 9251852Sbp "ZPL_VERSION"); 9351852Sbp 9451852Sbpstatic int zfs_mount(vfs_t *vfsp); 9551852Sbpstatic int zfs_umount(vfs_t *vfsp, int fflag); 9651852Sbpstatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 9751852Sbpstatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 9851852Sbpstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 9951852Sbpstatic int zfs_sync(vfs_t *vfsp, int waitfor); 10051852Sbpstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 10151852Sbp struct ucred **credanonp, int *numsecflavors, int **secflavors); 10251852Sbpstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 10351852Sbpstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 10451852Sbpstatic void zfs_freevfs(vfs_t *vfsp); 10551852Sbp 10651852Sbpstatic struct vfsops zfs_vfsops = { 10751852Sbp .vfs_mount = zfs_mount, 10851852Sbp .vfs_unmount = zfs_umount, 10951852Sbp .vfs_root = zfs_root, 11051852Sbp .vfs_statfs = zfs_statfs, 11151852Sbp .vfs_vget = zfs_vget, 11251852Sbp .vfs_sync = zfs_sync, 11351852Sbp .vfs_checkexp = zfs_checkexp, 11451852Sbp .vfs_fhtovp = zfs_fhtovp, 11552814Sarchie}; 11652814Sarchie 11751852SbpVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 11851852Sbp 11951852Sbp/* 12051852Sbp * We need to keep a count of active fs's. 121111119Simp * This is necessary to prevent our module 12251852Sbp * from being unloaded after a umount -f 12351852Sbp */ 12452814Sarchiestatic uint32_t zfs_active_fs_count = 0; 12552814Sarchie 12651852Sbp/*ARGSUSED*/ 12751852Sbpstatic int 12851852Sbpzfs_sync(vfs_t *vfsp, int waitfor) 12951852Sbp{ 13051852Sbp 13151852Sbp /* 13251852Sbp * Data integrity is job one. We don't want a compromised kernel 13351852Sbp * writing to the storage pool, so we never sync during panic. 13451852Sbp */ 13551852Sbp if (panicstr) 13651852Sbp return (0); 13751852Sbp 13851852Sbp if (vfsp != NULL) { 13951852Sbp /* 14083366Sjulian * Sync a specific filesystem. 14151852Sbp */ 14251852Sbp zfsvfs_t *zfsvfs = vfsp->vfs_data; 14351852Sbp int error; 14451852Sbp 14551852Sbp error = vfs_stdsync(vfsp, waitfor); 14651852Sbp if (error != 0) 14751852Sbp return (error); 14851852Sbp 14951852Sbp ZFS_ENTER(zfsvfs); 15051852Sbp if (zfsvfs->z_log != NULL) 15151852Sbp zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 15251852Sbp else 15351852Sbp txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 15451852Sbp ZFS_EXIT(zfsvfs); 15551852Sbp } else { 15651852Sbp /* 15751852Sbp * Sync all ZFS filesystems. This is what happens when you 15851852Sbp * run sync(1M). Unlike other filesystems, ZFS honors the 15951852Sbp * request by waiting for all pools to commit all dirty data. 16051852Sbp */ 16151852Sbp spa_sync_allpools(); 16251852Sbp } 16351852Sbp 16451852Sbp return (0); 16591406Sjhb} 16651852Sbp 16751852Sbpstatic void 16851852Sbpatime_changed_cb(void *arg, uint64_t newval) 16951852Sbp{ 17051852Sbp zfsvfs_t *zfsvfs = arg; 17151852Sbp 17251852Sbp if (newval == TRUE) { 17351852Sbp zfsvfs->z_atime = TRUE; 17451852Sbp zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 17583366Sjulian vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 17651852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 17751852Sbp } else { 178112916Stjr zfsvfs->z_atime = FALSE; 179112916Stjr zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 18051852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 18151852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 18251852Sbp } 18351852Sbp} 18451852Sbp 18551852Sbpstatic void 18651852Sbpxattr_changed_cb(void *arg, uint64_t newval) 18751852Sbp{ 18851852Sbp zfsvfs_t *zfsvfs = arg; 18951852Sbp 19051852Sbp if (newval == TRUE) { 19151852Sbp /* XXX locking on vfs_flag? */ 19251852Sbp#ifdef TODO 19351852Sbp zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 19451852Sbp#endif 19551852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 19651852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 19751852Sbp } else { 19851852Sbp /* XXX locking on vfs_flag? */ 19951852Sbp#ifdef TODO 20051852Sbp zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 20151852Sbp#endif 20251852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 20351852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 20451852Sbp } 20551852Sbp} 20651852Sbp 20751852Sbpstatic void 20851852Sbpblksz_changed_cb(void *arg, uint64_t newval) 20951852Sbp{ 21051852Sbp zfsvfs_t *zfsvfs = arg; 211132023Salfred 21251852Sbp if (newval < SPA_MINBLOCKSIZE || 21351852Sbp newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 21451852Sbp newval = SPA_MAXBLOCKSIZE; 21551852Sbp 21651852Sbp zfsvfs->z_max_blksz = newval; 21783366Sjulian zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 218103936Sjeff} 21951852Sbp 22051852Sbpstatic void 22151852Sbpreadonly_changed_cb(void *arg, uint64_t newval) 22251852Sbp{ 22351852Sbp zfsvfs_t *zfsvfs = arg; 22451852Sbp 22551852Sbp if (newval) { 22651852Sbp /* XXX locking on vfs_flag? */ 22751852Sbp zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 22851852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 22951852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 23083366Sjulian } else { 23151852Sbp /* XXX locking on vfs_flag? */ 23251852Sbp zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 23351852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 23451852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 23551852Sbp } 23651852Sbp} 23751852Sbp 23851852Sbpstatic void 23951852Sbpsetuid_changed_cb(void *arg, uint64_t newval) 24076688Siedowse{ 241132023Salfred zfsvfs_t *zfsvfs = arg; 24276688Siedowse 24351852Sbp if (newval == FALSE) { 24451852Sbp zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 24551852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 24691406Sjhb vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 24774062Sbp } else { 24883366Sjulian zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 24951852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 25051852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 25151852Sbp } 25252814Sarchie} 25351852Sbp 25451852Sbpstatic void 25551852Sbpexec_changed_cb(void *arg, uint64_t newval) 25651852Sbp{ 25751852Sbp zfsvfs_t *zfsvfs = arg; 25851852Sbp 25951852Sbp if (newval == FALSE) { 260132023Salfred zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 26151852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 26251852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 26351852Sbp } else { 26451852Sbp zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 26551852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 26691406Sjhb vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 26751852Sbp } 26851852Sbp} 26951852Sbp 27051852Sbp/* 27151852Sbp * The nbmand mount option can be changed at mount time. 27251852Sbp * We can't allow it to be toggled on live file systems or incorrect 27351852Sbp * behavior may be seen from cifs clients 27483366Sjulian * 27566540Sbp * This property isn't registered via dsl_prop_register(), but this callback 27651852Sbp * will be called when a file system is first mounted 27751852Sbp */ 27851852Sbpstatic void 27983366Sjuliannbmand_changed_cb(void *arg, uint64_t newval) 28051852Sbp{ 28151852Sbp zfsvfs_t *zfsvfs = arg; 28251852Sbp if (newval == FALSE) { 28383366Sjulian vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 28451852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 28551852Sbp } else { 28651852Sbp vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 28751852Sbp vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 28851852Sbp } 28951852Sbp} 29051852Sbp 29151852Sbpstatic void 29251852Sbpsnapdir_changed_cb(void *arg, uint64_t newval) 29351852Sbp{ 29451852Sbp zfsvfs_t *zfsvfs = arg; 29551852Sbp 29651852Sbp zfsvfs->z_show_ctldir = newval; 29751852Sbp} 29851852Sbp 29951852Sbpstatic void 30051852Sbpvscan_changed_cb(void *arg, uint64_t newval) 30151852Sbp{ 30251852Sbp zfsvfs_t *zfsvfs = arg; 30351852Sbp 30451852Sbp zfsvfs->z_vscan = newval; 30551852Sbp} 30651852Sbp 30751852Sbpstatic void 30851852Sbpacl_mode_changed_cb(void *arg, uint64_t newval) 30983366Sjulian{ 31051852Sbp zfsvfs_t *zfsvfs = arg; 31151852Sbp 31251852Sbp zfsvfs->z_acl_mode = newval; 31351852Sbp} 31451852Sbp 31551852Sbpstatic void 31651852Sbpacl_inherit_changed_cb(void *arg, uint64_t newval) 31783366Sjulian{ 31851852Sbp zfsvfs_t *zfsvfs = arg; 31951852Sbp 32051852Sbp zfsvfs->z_acl_inherit = newval; 32151852Sbp} 32251852Sbp 32351852Sbpstatic int 32451852Sbpzfs_register_callbacks(vfs_t *vfsp) 32583366Sjulian{ 32651852Sbp struct dsl_dataset *ds = NULL; 32751852Sbp objset_t *os = NULL; 32851852Sbp zfsvfs_t *zfsvfs = NULL; 32951852Sbp uint64_t nbmand; 33074064Sbp int readonly, do_readonly = FALSE; 33151852Sbp int setuid, do_setuid = FALSE; 33251852Sbp int exec, do_exec = FALSE; 33351852Sbp int xattr, do_xattr = FALSE; 33451852Sbp int atime, do_atime = FALSE; 33551852Sbp int error = 0; 336101308Sjeff 33751852Sbp ASSERT(vfsp); 33851852Sbp zfsvfs = vfsp->vfs_data; 33951852Sbp ASSERT(zfsvfs); 34051852Sbp os = zfsvfs->z_os; 34183366Sjulian 34251852Sbp /* 34351852Sbp * This function can be called for a snapshot when we update snapshot's 34451852Sbp * mount point, which isn't really supported. 34551852Sbp */ 34651852Sbp if (dmu_objset_is_snapshot(os)) 34751852Sbp return (EOPNOTSUPP); 34851852Sbp 34951852Sbp /* 35051852Sbp * The act of registering our callbacks will destroy any mount 35151852Sbp * options we may have. In order to enable temporary overrides 35251852Sbp * of mount options, we stash away the current values and 35351852Sbp * restore them after we register the callbacks. 35451852Sbp */ 35551852Sbp if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 35683366Sjulian readonly = B_TRUE; 35751852Sbp do_readonly = B_TRUE; 35851852Sbp } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 35983366Sjulian readonly = B_FALSE; 36051852Sbp do_readonly = B_TRUE; 36151852Sbp } 36251852Sbp if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 36351852Sbp setuid = B_FALSE; 36451852Sbp do_setuid = B_TRUE; 36551852Sbp } else { 36651852Sbp if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 36751852Sbp setuid = B_FALSE; 36851852Sbp do_setuid = B_TRUE; 36951852Sbp } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 37083366Sjulian setuid = B_TRUE; 37151852Sbp do_setuid = B_TRUE; 37251852Sbp } 37351852Sbp } 37451852Sbp if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 37583366Sjulian exec = B_FALSE; 37651852Sbp do_exec = B_TRUE; 37751852Sbp } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 37851852Sbp exec = B_TRUE; 37951852Sbp do_exec = B_TRUE; 38051852Sbp } 38151852Sbp if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 38251852Sbp xattr = B_FALSE; 38351852Sbp do_xattr = B_TRUE; 38451852Sbp } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 38551852Sbp xattr = B_TRUE; 38651852Sbp do_xattr = B_TRUE; 38751852Sbp } 38851852Sbp if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 38951852Sbp atime = B_FALSE; 39051852Sbp do_atime = B_TRUE; 39151852Sbp } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 39251852Sbp atime = B_TRUE; 39351852Sbp do_atime = B_TRUE; 39451852Sbp } 39551852Sbp 39651852Sbp /* 39751852Sbp * nbmand is a special property. It can only be changed at 39851852Sbp * mount time. 39951852Sbp * 40051852Sbp * This is weird, but it is documented to only be changeable 40151852Sbp * at mount time. 40251852Sbp */ 40351852Sbp if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 40451852Sbp nbmand = B_FALSE; 40583366Sjulian } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 40651852Sbp nbmand = B_TRUE; 40766539Sbp } else { 40883366Sjulian char osname[MAXNAMELEN]; 40951852Sbp 41051852Sbp dmu_objset_name(os, osname); 41151852Sbp if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 41251852Sbp NULL)) { 41351852Sbp return (error); 41451852Sbp } 41551852Sbp } 41683366Sjulian 41791406Sjhb /* 41851852Sbp * Register property callbacks. 41951852Sbp * 42096755Strhodes * It would probably be fine to just check for i/o error from 42151852Sbp * the first prop_register(), but I guess I like to go 42251852Sbp * overboard... 42351852Sbp */ 42496755Strhodes ds = dmu_objset_ds(os); 42551852Sbp error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 42651852Sbp error = error ? error : dsl_prop_register(ds, 42751852Sbp "xattr", xattr_changed_cb, zfsvfs); 42851852Sbp error = error ? error : dsl_prop_register(ds, 42951852Sbp "recordsize", blksz_changed_cb, zfsvfs); 43096755Strhodes error = error ? error : dsl_prop_register(ds, 43151852Sbp "readonly", readonly_changed_cb, zfsvfs); 43251852Sbp error = error ? error : dsl_prop_register(ds, 43351852Sbp "setuid", setuid_changed_cb, zfsvfs); 43451852Sbp error = error ? error : dsl_prop_register(ds, 43551852Sbp "exec", exec_changed_cb, zfsvfs); 43696755Strhodes error = error ? error : dsl_prop_register(ds, 43751852Sbp "snapdir", snapdir_changed_cb, zfsvfs); 43851852Sbp error = error ? error : dsl_prop_register(ds, 43951852Sbp "aclmode", acl_mode_changed_cb, zfsvfs); 44051852Sbp error = error ? error : dsl_prop_register(ds, 44151852Sbp "aclinherit", acl_inherit_changed_cb, zfsvfs); 44251852Sbp error = error ? error : dsl_prop_register(ds, 44351852Sbp "vscan", vscan_changed_cb, zfsvfs); 44451852Sbp if (error) 445 goto unregister; 446 447 /* 448 * Invoke our callbacks to restore temporary mount options. 449 */ 450 if (do_readonly) 451 readonly_changed_cb(zfsvfs, readonly); 452 if (do_setuid) 453 setuid_changed_cb(zfsvfs, setuid); 454 if (do_exec) 455 exec_changed_cb(zfsvfs, exec); 456 if (do_xattr) 457 xattr_changed_cb(zfsvfs, xattr); 458 if (do_atime) 459 atime_changed_cb(zfsvfs, atime); 460 461 nbmand_changed_cb(zfsvfs, nbmand); 462 463 return (0); 464 465unregister: 466 /* 467 * We may attempt to unregister some callbacks that are not 468 * registered, but this is OK; it will simply return ENOMSG, 469 * which we will ignore. 470 */ 471 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 472 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 473 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 474 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 475 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 476 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 477 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 478 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 479 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 480 zfsvfs); 481 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 482 return (error); 483 484} 485 486static int 487zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 488{ 489 int error; 490 491 error = zfs_register_callbacks(zfsvfs->z_vfs); 492 if (error) 493 return (error); 494 495 /* 496 * Set the objset user_ptr to track its zfsvfs. 497 */ 498 mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 499 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 500 mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 501 502 /* 503 * If we are not mounting (ie: online recv), then we don't 504 * have to worry about replaying the log as we blocked all 505 * operations out since we closed the ZIL. 506 */ 507 if (mounting) { 508 boolean_t readonly; 509 510 /* 511 * During replay we remove the read only flag to 512 * allow replays to succeed. 513 */ 514 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 515 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 516 517 /* 518 * Parse and replay the intent log. 519 */ 520 zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 521 zfs_replay_vector, zfs_unlinked_drain); 522 523 zfs_unlinked_drain(zfsvfs); 524 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 525 } 526 527 if (!zil_disable) 528 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 529 530 return (0); 531} 532 533static void 534zfs_freezfsvfs(zfsvfs_t *zfsvfs) 535{ 536 mutex_destroy(&zfsvfs->z_znodes_lock); 537 mutex_destroy(&zfsvfs->z_online_recv_lock); 538 list_destroy(&zfsvfs->z_all_znodes); 539 rrw_destroy(&zfsvfs->z_teardown_lock); 540 rw_destroy(&zfsvfs->z_teardown_inactive_lock); 541 rw_destroy(&zfsvfs->z_fuid_lock); 542 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 543} 544 545static int 546zfs_domount(vfs_t *vfsp, char *osname) 547{ 548 uint64_t recordsize, readonly; 549 int error = 0; 550 int mode; 551 zfsvfs_t *zfsvfs; 552 znode_t *zp = NULL; 553 554 ASSERT(vfsp); 555 ASSERT(osname); 556 557 /* 558 * Initialize the zfs-specific filesystem structure. 559 * Should probably make this a kmem cache, shuffle fields, 560 * and just bzero up to z_hold_mtx[]. 561 */ 562 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 563 zfsvfs->z_vfs = vfsp; 564 zfsvfs->z_parent = zfsvfs; 565 zfsvfs->z_assign = TXG_NOWAIT; 566 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 567 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 568 569 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 570 mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 571 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 572 offsetof(znode_t, z_link_node)); 573 rrw_init(&zfsvfs->z_teardown_lock); 574 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 575 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 576 577 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 578 NULL)) 579 goto out; 580 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 581 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 582 583 vfsp->vfs_data = zfsvfs; 584 vfsp->mnt_flag |= MNT_LOCAL; 585 vfsp->mnt_kern_flag |= MNTK_MPSAFE; 586 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 587 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 588 589 if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 590 goto out; 591 592 mode = DS_MODE_OWNER; 593 if (readonly) 594 mode |= DS_MODE_READONLY; 595 596 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 597 if (error == EROFS) { 598 mode = DS_MODE_OWNER | DS_MODE_READONLY; 599 error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 600 &zfsvfs->z_os); 601 } 602 603 if (error) 604 goto out; 605 606 if (error = zfs_init_fs(zfsvfs, &zp)) 607 goto out; 608 609 /* 610 * Set features for file system. 611 */ 612 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 613 if (zfsvfs->z_use_fuids) { 614 vfs_set_feature(vfsp, VFSFT_XVATTR); 615 vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 616 vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 617 vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 618 } 619 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 620 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 621 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 622 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 623 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 624 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 625 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 626 } 627 628 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 629 uint64_t pval; 630 631 ASSERT(mode & DS_MODE_READONLY); 632 atime_changed_cb(zfsvfs, B_FALSE); 633 readonly_changed_cb(zfsvfs, B_TRUE); 634 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 635 goto out; 636 xattr_changed_cb(zfsvfs, pval); 637 zfsvfs->z_issnap = B_TRUE; 638 } else { 639 error = zfsvfs_setup(zfsvfs, B_TRUE); 640 } 641 642 vfs_mountedfrom(vfsp, osname); 643 644 if (!zfsvfs->z_issnap) 645 zfsctl_create(zfsvfs); 646out: 647 if (error) { 648 if (zfsvfs->z_os) 649 dmu_objset_close(zfsvfs->z_os); 650 zfs_freezfsvfs(zfsvfs); 651 } else { 652 atomic_add_32(&zfs_active_fs_count, 1); 653 } 654 655 return (error); 656} 657 658void 659zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 660{ 661 objset_t *os = zfsvfs->z_os; 662 struct dsl_dataset *ds; 663 664 /* 665 * Unregister properties. 666 */ 667 if (!dmu_objset_is_snapshot(os)) { 668 ds = dmu_objset_ds(os); 669 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 670 zfsvfs) == 0); 671 672 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 673 zfsvfs) == 0); 674 675 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 676 zfsvfs) == 0); 677 678 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 679 zfsvfs) == 0); 680 681 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 682 zfsvfs) == 0); 683 684 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 685 zfsvfs) == 0); 686 687 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 688 zfsvfs) == 0); 689 690 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 691 zfsvfs) == 0); 692 693 VERIFY(dsl_prop_unregister(ds, "aclinherit", 694 acl_inherit_changed_cb, zfsvfs) == 0); 695 696 VERIFY(dsl_prop_unregister(ds, "vscan", 697 vscan_changed_cb, zfsvfs) == 0); 698 } 699} 700 701/*ARGSUSED*/ 702static int 703zfs_mount(vfs_t *vfsp) 704{ 705 kthread_t *td = curthread; 706 vnode_t *mvp = vfsp->mnt_vnodecovered; 707 cred_t *cr = td->td_ucred; 708 char *osname; 709 int error = 0; 710 int canwrite; 711 712 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 713 return (EINVAL); 714 715 /* 716 * If full-owner-access is enabled and delegated administration is 717 * turned on, we must set nosuid. 718 */ 719 if (zfs_super_owner && 720 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 721 secpolicy_fs_mount_clearopts(cr, vfsp); 722 } 723 724 /* 725 * Check for mount privilege? 726 * 727 * If we don't have privilege then see if 728 * we have local permission to allow it 729 */ 730 error = secpolicy_fs_mount(cr, mvp, vfsp); 731 if (error) { 732 error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 733 if (error != 0) 734 goto out; 735 736 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 737 vattr_t vattr; 738 739 /* 740 * Make sure user is the owner of the mount point 741 * or has sufficient privileges. 742 */ 743 744 vattr.va_mask = AT_UID; 745 746 vn_lock(mvp, LK_SHARED | LK_RETRY); 747 if (error = VOP_GETATTR(mvp, &vattr, cr)) { 748 VOP_UNLOCK(mvp, 0); 749 goto out; 750 } 751 752#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 753 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 754 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 755 error = EPERM; 756 goto out; 757 } 758#else 759 if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 760 VOP_UNLOCK(mvp, 0); 761 goto out; 762 } 763 764 if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 765 VOP_UNLOCK(mvp, 0); 766 goto out; 767 } 768 VOP_UNLOCK(mvp, 0); 769#endif 770 } 771 772 secpolicy_fs_mount_clearopts(cr, vfsp); 773 } 774 775 /* 776 * Refuse to mount a filesystem if we are in a local zone and the 777 * dataset is not visible. 778 */ 779 if (!INGLOBALZONE(curthread) && 780 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 781 error = EPERM; 782 goto out; 783 } 784 785 /* 786 * When doing a remount, we simply refresh our temporary properties 787 * according to those options set in the current VFS options. 788 */ 789 if (vfsp->vfs_flag & MS_REMOUNT) { 790 /* refresh mount options */ 791 zfs_unregister_callbacks(vfsp->vfs_data); 792 error = zfs_register_callbacks(vfsp); 793 goto out; 794 } 795 796 DROP_GIANT(); 797 error = zfs_domount(vfsp, osname); 798 PICKUP_GIANT(); 799out: 800 return (error); 801} 802 803static int 804zfs_statfs(vfs_t *vfsp, struct statfs *statp) 805{ 806 zfsvfs_t *zfsvfs = vfsp->vfs_data; 807 uint64_t refdbytes, availbytes, usedobjs, availobjs; 808 809 statp->f_version = STATFS_VERSION; 810 811 ZFS_ENTER(zfsvfs); 812 813 dmu_objset_space(zfsvfs->z_os, 814 &refdbytes, &availbytes, &usedobjs, &availobjs); 815 816 /* 817 * The underlying storage pool actually uses multiple block sizes. 818 * We report the fragsize as the smallest block size we support, 819 * and we report our blocksize as the filesystem's maximum blocksize. 820 */ 821 statp->f_bsize = SPA_MINBLOCKSIZE; 822 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 823 824 /* 825 * The following report "total" blocks of various kinds in the 826 * file system, but reported in terms of f_frsize - the 827 * "fragment" size. 828 */ 829 830 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 831 statp->f_bfree = availbytes / statp->f_bsize; 832 statp->f_bavail = statp->f_bfree; /* no root reservation */ 833 834 /* 835 * statvfs() should really be called statufs(), because it assumes 836 * static metadata. ZFS doesn't preallocate files, so the best 837 * we can do is report the max that could possibly fit in f_files, 838 * and that minus the number actually used in f_ffree. 839 * For f_ffree, report the smaller of the number of object available 840 * and the number of blocks (each object will take at least a block). 841 */ 842 statp->f_ffree = MIN(availobjs, statp->f_bfree); 843 statp->f_files = statp->f_ffree + usedobjs; 844 845 /* 846 * We're a zfs filesystem. 847 */ 848 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 849 850 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 851 sizeof(statp->f_mntfromname)); 852 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 853 sizeof(statp->f_mntonname)); 854 855 statp->f_namemax = ZFS_MAXNAMELEN; 856 857 ZFS_EXIT(zfsvfs); 858 return (0); 859} 860 861static int 862zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 863{ 864 zfsvfs_t *zfsvfs = vfsp->vfs_data; 865 znode_t *rootzp; 866 int error; 867 868 ZFS_ENTER_NOERROR(zfsvfs); 869 870 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 871 872 ZFS_EXIT(zfsvfs); 873 874 if (error == 0) { 875 *vpp = ZTOV(rootzp); 876 error = vn_lock(*vpp, flags); 877 (*vpp)->v_vflag |= VV_ROOT; 878 } 879 880 return (error); 881} 882 883/* 884 * Teardown the zfsvfs::z_os. 885 * 886 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 887 * and 'z_teardown_inactive_lock' held. 888 */ 889static int 890zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 891{ 892 znode_t *zp; 893 894 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 895 896 if (!unmounting) { 897 /* 898 * We purge the parent filesystem's vfsp as the parent 899 * filesystem and all of its snapshots have their vnode's 900 * v_vfsp set to the parent's filesystem's vfsp. Note, 901 * 'z_parent' is self referential for non-snapshots. 902 */ 903 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 904#ifdef FREEBSD_NAMECACHE 905 cache_purgevfs(zfsvfs->z_parent->z_vfs); 906#endif 907 } 908 909 /* 910 * Close the zil. NB: Can't close the zil while zfs_inactive 911 * threads are blocked as zil_close can call zfs_inactive. 912 */ 913 if (zfsvfs->z_log) { 914 zil_close(zfsvfs->z_log); 915 zfsvfs->z_log = NULL; 916 } 917 918 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 919 920 /* 921 * If we are not unmounting (ie: online recv) and someone already 922 * unmounted this file system while we were doing the switcheroo, 923 * or a reopen of z_os failed then just bail out now. 924 */ 925 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 926 rw_exit(&zfsvfs->z_teardown_inactive_lock); 927 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 928 return (EIO); 929 } 930 931 /* 932 * At this point there are no vops active, and any new vops will 933 * fail with EIO since we have z_teardown_lock for writer (only 934 * relavent for forced unmount). 935 * 936 * Release all holds on dbufs. 937 */ 938 mutex_enter(&zfsvfs->z_znodes_lock); 939 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 940 zp = list_next(&zfsvfs->z_all_znodes, zp)) 941 if (zp->z_dbuf) { 942 ASSERT(ZTOV(zp)->v_count >= 0); 943 zfs_znode_dmu_fini(zp); 944 } 945 mutex_exit(&zfsvfs->z_znodes_lock); 946 947 /* 948 * If we are unmounting, set the unmounted flag and let new vops 949 * unblock. zfs_inactive will have the unmounted behavior, and all 950 * other vops will fail with EIO. 951 */ 952 if (unmounting) { 953 zfsvfs->z_unmounted = B_TRUE; 954 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 955 rw_exit(&zfsvfs->z_teardown_inactive_lock); 956 957#ifdef __FreeBSD__ 958 /* 959 * Some znodes might not be fully reclaimed, wait for them. 960 */ 961 mutex_enter(&zfsvfs->z_znodes_lock); 962 while (list_head(&zfsvfs->z_all_znodes) != NULL) { 963 msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 964 "zteardown", 0); 965 } 966 mutex_exit(&zfsvfs->z_znodes_lock); 967#endif 968 } 969 970 /* 971 * z_os will be NULL if there was an error in attempting to reopen 972 * zfsvfs, so just return as the properties had already been 973 * unregistered and cached data had been evicted before. 974 */ 975 if (zfsvfs->z_os == NULL) 976 return (0); 977 978 /* 979 * Unregister properties. 980 */ 981 zfs_unregister_callbacks(zfsvfs); 982 983 /* 984 * Evict cached data 985 */ 986 if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 987 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 988 (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 989 } 990 991 return (0); 992} 993 994/*ARGSUSED*/ 995static int 996zfs_umount(vfs_t *vfsp, int fflag) 997{ 998 zfsvfs_t *zfsvfs = vfsp->vfs_data; 999 objset_t *os; 1000 cred_t *cr = curthread->td_ucred; 1001 int ret; 1002 1003 ret = secpolicy_fs_unmount(cr, vfsp); 1004 if (ret) { 1005 ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1006 ZFS_DELEG_PERM_MOUNT, cr); 1007 if (ret) 1008 return (ret); 1009 } 1010 /* 1011 * We purge the parent filesystem's vfsp as the parent filesystem 1012 * and all of its snapshots have their vnode's v_vfsp set to the 1013 * parent's filesystem's vfsp. Note, 'z_parent' is self 1014 * referential for non-snapshots. 1015 */ 1016 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1017 1018 /* 1019 * Unmount any snapshots mounted under .zfs before unmounting the 1020 * dataset itself. 1021 */ 1022 if (zfsvfs->z_ctldir != NULL) { 1023 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1024 return (ret); 1025 ret = vflush(vfsp, 0, 0, curthread); 1026 ASSERT(ret == EBUSY); 1027 if (!(fflag & MS_FORCE)) { 1028 if (zfsvfs->z_ctldir->v_count > 1) 1029 return (EBUSY); 1030 ASSERT(zfsvfs->z_ctldir->v_count == 1); 1031 } 1032 zfsctl_destroy(zfsvfs); 1033 ASSERT(zfsvfs->z_ctldir == NULL); 1034 } 1035 1036 if (fflag & MS_FORCE) { 1037 /* 1038 * Mark file system as unmounted before calling 1039 * vflush(FORCECLOSE). This way we ensure no future vnops 1040 * will be called and risk operating on DOOMED vnodes. 1041 */ 1042 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1043 zfsvfs->z_unmounted = B_TRUE; 1044 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1045 } 1046 1047 /* 1048 * Flush all the files. 1049 */ 1050 ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1051 if (ret != 0) { 1052 if (!zfsvfs->z_issnap) { 1053 zfsctl_create(zfsvfs); 1054 ASSERT(zfsvfs->z_ctldir != NULL); 1055 } 1056 return (ret); 1057 } 1058 1059 if (!(fflag & MS_FORCE)) { 1060 /* 1061 * Check the number of active vnodes in the file system. 1062 * Our count is maintained in the vfs structure, but the 1063 * number is off by 1 to indicate a hold on the vfs 1064 * structure itself. 1065 * 1066 * The '.zfs' directory maintains a reference of its 1067 * own, and any active references underneath are 1068 * reflected in the vnode count. 1069 */ 1070 if (zfsvfs->z_ctldir == NULL) { 1071 if (vfsp->vfs_count > 1) 1072 return (EBUSY); 1073 } else { 1074 if (vfsp->vfs_count > 2 || 1075 zfsvfs->z_ctldir->v_count > 1) 1076 return (EBUSY); 1077 } 1078 } else { 1079 MNT_ILOCK(vfsp); 1080 vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1081 MNT_IUNLOCK(vfsp); 1082 } 1083 1084 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1085 os = zfsvfs->z_os; 1086 1087 /* 1088 * z_os will be NULL if there was an error in 1089 * attempting to reopen zfsvfs. 1090 */ 1091 if (os != NULL) { 1092 /* 1093 * Unset the objset user_ptr. 1094 */ 1095 mutex_enter(&os->os->os_user_ptr_lock); 1096 dmu_objset_set_user(os, NULL); 1097 mutex_exit(&os->os->os_user_ptr_lock); 1098 1099 /* 1100 * Finally release the objset 1101 */ 1102 dmu_objset_close(os); 1103 } 1104 1105 /* 1106 * We can now safely destroy the '.zfs' directory node. 1107 */ 1108 if (zfsvfs->z_ctldir != NULL) 1109 zfsctl_destroy(zfsvfs); 1110 if (zfsvfs->z_issnap) { 1111 vnode_t *svp = vfsp->mnt_vnodecovered; 1112 1113 if (svp->v_count >= 2) 1114 VN_RELE(svp); 1115 } 1116 zfs_freevfs(vfsp); 1117 1118 return (0); 1119} 1120 1121static int 1122zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1123{ 1124 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1125 znode_t *zp; 1126 int err; 1127 1128 /* 1129 * XXXPJD: zfs_zget() can't operate on virtual entires like .zfs/ or 1130 * .zfs/snapshot/ directories, so for now just return EOPNOTSUPP. 1131 * This will make NFS to fall back to using READDIR instead of 1132 * READDIRPLUS. 1133 * Also snapshots are stored in AVL tree, but based on their names, 1134 * not inode numbers, so it will be very inefficient to iterate 1135 * over all snapshots to find the right one. 1136 * Note that OpenSolaris READDIRPLUS implementation does LOOKUP on 1137 * d_name, and not VGET on d_fileno as we do. 1138 */ 1139 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1140 return (EOPNOTSUPP); 1141 1142 ZFS_ENTER(zfsvfs); 1143 err = zfs_zget(zfsvfs, ino, &zp); 1144 if (err == 0 && zp->z_unlinked) { 1145 VN_RELE(ZTOV(zp)); 1146 err = EINVAL; 1147 } 1148 ZFS_EXIT(zfsvfs); 1149 if (err != 0) 1150 *vpp = NULL; 1151 else { 1152 *vpp = ZTOV(zp); 1153 vn_lock(*vpp, flags); 1154 } 1155 return (err); 1156} 1157 1158static int 1159zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1160 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1161{ 1162 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1163 1164 /* 1165 * If this is regular file system vfsp is the same as 1166 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1167 * zfsvfs->z_parent->z_vfs represents parent file system 1168 * which we have to use here, because only this file system 1169 * has mnt_export configured. 1170 */ 1171 vfsp = zfsvfs->z_parent->z_vfs; 1172 1173 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1174 credanonp, numsecflavors, secflavors)); 1175} 1176 1177CTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1178CTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1179 1180static int 1181zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1182{ 1183 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1184 znode_t *zp; 1185 uint64_t object = 0; 1186 uint64_t fid_gen = 0; 1187 uint64_t gen_mask; 1188 uint64_t zp_gen; 1189 int i, err; 1190 1191 *vpp = NULL; 1192 1193 ZFS_ENTER(zfsvfs); 1194 1195 /* 1196 * On FreeBSD we can get snapshot's mount point or its parent file 1197 * system mount point depending if snapshot is already mounted or not. 1198 */ 1199 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1200 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1201 uint64_t objsetid = 0; 1202 uint64_t setgen = 0; 1203 1204 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1205 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1206 1207 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1208 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1209 1210 ZFS_EXIT(zfsvfs); 1211 1212 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1213 if (err) 1214 return (EINVAL); 1215 ZFS_ENTER(zfsvfs); 1216 } 1217 1218 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1219 zfid_short_t *zfid = (zfid_short_t *)fidp; 1220 1221 for (i = 0; i < sizeof (zfid->zf_object); i++) 1222 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1223 1224 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1225 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1226 } else { 1227 ZFS_EXIT(zfsvfs); 1228 return (EINVAL); 1229 } 1230 1231 /* A zero fid_gen means we are in the .zfs control directories */ 1232 if (fid_gen == 0 && 1233 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1234 *vpp = zfsvfs->z_ctldir; 1235 ASSERT(*vpp != NULL); 1236 if (object == ZFSCTL_INO_SNAPDIR) { 1237 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1238 0, NULL, NULL, NULL, NULL, NULL) == 0); 1239 } else { 1240 VN_HOLD(*vpp); 1241 } 1242 ZFS_EXIT(zfsvfs); 1243 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1244 return (0); 1245 } 1246 1247 gen_mask = -1ULL >> (64 - 8 * i); 1248 1249 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1250 if (err = zfs_zget(zfsvfs, object, &zp)) { 1251 ZFS_EXIT(zfsvfs); 1252 return (err); 1253 } 1254 zp_gen = zp->z_phys->zp_gen & gen_mask; 1255 if (zp_gen == 0) 1256 zp_gen = 1; 1257 if (zp->z_unlinked || zp_gen != fid_gen) { 1258 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1259 VN_RELE(ZTOV(zp)); 1260 ZFS_EXIT(zfsvfs); 1261 return (EINVAL); 1262 } 1263 1264 ZFS_EXIT(zfsvfs); 1265 1266 *vpp = ZTOV(zp); 1267 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1268 vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1269 return (0); 1270} 1271 1272/* 1273 * Block out VOPs and close zfsvfs_t::z_os 1274 * 1275 * Note, if successful, then we return with the 'z_teardown_lock' and 1276 * 'z_teardown_inactive_lock' write held. 1277 */ 1278int 1279zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1280{ 1281 int error; 1282 1283 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1284 return (error); 1285 1286 *mode = zfsvfs->z_os->os_mode; 1287 dmu_objset_name(zfsvfs->z_os, name); 1288 dmu_objset_close(zfsvfs->z_os); 1289 1290 return (0); 1291} 1292 1293/* 1294 * Reopen zfsvfs_t::z_os and release VOPs. 1295 */ 1296int 1297zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1298{ 1299 int err; 1300 1301 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1302 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1303 1304 err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1305 if (err) { 1306 zfsvfs->z_os = NULL; 1307 } else { 1308 znode_t *zp; 1309 1310 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1311 1312 /* 1313 * Attempt to re-establish all the active znodes with 1314 * their dbufs. If a zfs_rezget() fails, then we'll let 1315 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1316 * when they try to use their znode. 1317 */ 1318 mutex_enter(&zfsvfs->z_znodes_lock); 1319 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1320 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1321 (void) zfs_rezget(zp); 1322 } 1323 mutex_exit(&zfsvfs->z_znodes_lock); 1324 1325 } 1326 1327 /* release the VOPs */ 1328 rw_exit(&zfsvfs->z_teardown_inactive_lock); 1329 rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1330 1331 if (err) { 1332 /* 1333 * Since we couldn't reopen zfsvfs::z_os, force 1334 * unmount this file system. 1335 */ 1336 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1337 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1338 } 1339 return (err); 1340} 1341 1342static void 1343zfs_freevfs(vfs_t *vfsp) 1344{ 1345 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1346 int i; 1347 1348 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1349 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1350 1351 zfs_fuid_destroy(zfsvfs); 1352 zfs_freezfsvfs(zfsvfs); 1353 1354 atomic_add_32(&zfs_active_fs_count, -1); 1355} 1356 1357#ifdef __i386__ 1358static int desiredvnodes_backup; 1359#endif 1360 1361static void 1362zfs_vnodes_adjust(void) 1363{ 1364#ifdef __i386__ 1365 int newdesiredvnodes; 1366 1367 desiredvnodes_backup = desiredvnodes; 1368 1369 /* 1370 * We calculate newdesiredvnodes the same way it is done in 1371 * vntblinit(). If it is equal to desiredvnodes, it means that 1372 * it wasn't tuned by the administrator and we can tune it down. 1373 */ 1374 newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1375 vm_kmem_size / (5 * (sizeof(struct vm_object) + 1376 sizeof(struct vnode)))); 1377 if (newdesiredvnodes == desiredvnodes) 1378 desiredvnodes = (3 * newdesiredvnodes) / 4; 1379#endif 1380} 1381 1382static void 1383zfs_vnodes_adjust_back(void) 1384{ 1385 1386#ifdef __i386__ 1387 desiredvnodes = desiredvnodes_backup; 1388#endif 1389} 1390 1391void 1392zfs_init(void) 1393{ 1394 1395 printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1396 1397 /* 1398 * Initialize znode cache, vnode ops, etc... 1399 */ 1400 zfs_znode_init(); 1401 1402 /* 1403 * Initialize .zfs directory structures 1404 */ 1405 zfsctl_init(); 1406 1407 /* 1408 * Reduce number of vnode. Originally number of vnodes is calculated 1409 * with UFS inode in mind. We reduce it here, because it's too big for 1410 * ZFS/i386. 1411 */ 1412 zfs_vnodes_adjust(); 1413} 1414 1415void 1416zfs_fini(void) 1417{ 1418 zfsctl_fini(); 1419 zfs_znode_fini(); 1420 zfs_vnodes_adjust_back(); 1421} 1422 1423int 1424zfs_busy(void) 1425{ 1426 return (zfs_active_fs_count != 0); 1427} 1428 1429int 1430zfs_set_version(const char *name, uint64_t newvers) 1431{ 1432 int error; 1433 objset_t *os; 1434 dmu_tx_t *tx; 1435 uint64_t curvers; 1436 1437 /* 1438 * XXX for now, require that the filesystem be unmounted. Would 1439 * be nice to find the zfsvfs_t and just update that if 1440 * possible. 1441 */ 1442 1443 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1444 return (EINVAL); 1445 1446 error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1447 if (error) 1448 return (error); 1449 1450 error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1451 8, 1, &curvers); 1452 if (error) 1453 goto out; 1454 if (newvers < curvers) { 1455 error = EINVAL; 1456 goto out; 1457 } 1458 1459 tx = dmu_tx_create(os); 1460 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1461 error = dmu_tx_assign(tx, TXG_WAIT); 1462 if (error) { 1463 dmu_tx_abort(tx); 1464 goto out; 1465 } 1466 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1467 &newvers, tx); 1468 1469 spa_history_internal_log(LOG_DS_UPGRADE, 1470 dmu_objset_spa(os), tx, CRED(), 1471 "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1472 dmu_objset_id(os)); 1473 dmu_tx_commit(tx); 1474 1475out: 1476 dmu_objset_close(os); 1477 return (error); 1478} 1479/* 1480 * Read a property stored within the master node. 1481 */ 1482int 1483zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1484{ 1485 const char *pname; 1486 int error = ENOENT; 1487 1488 /* 1489 * Look up the file system's value for the property. For the 1490 * version property, we look up a slightly different string. 1491 */ 1492 if (prop == ZFS_PROP_VERSION) 1493 pname = ZPL_VERSION_STR; 1494 else 1495 pname = zfs_prop_to_name(prop); 1496 1497 if (os != NULL) 1498 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1499 1500 if (error == ENOENT) { 1501 /* No value set, use the default value */ 1502 switch (prop) { 1503 case ZFS_PROP_VERSION: 1504 *value = ZPL_VERSION; 1505 break; 1506 case ZFS_PROP_NORMALIZE: 1507 case ZFS_PROP_UTF8ONLY: 1508 *value = 0; 1509 break; 1510 case ZFS_PROP_CASE: 1511 *value = ZFS_CASE_SENSITIVE; 1512 break; 1513 default: 1514 return (error); 1515 } 1516 error = 0; 1517 } 1518 return (error); 1519} 1520