zfs_vfsops.c revision 197151
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/types.h> 27168404Spjd#include <sys/param.h> 28168404Spjd#include <sys/systm.h> 29168404Spjd#include <sys/kernel.h> 30168404Spjd#include <sys/sysmacros.h> 31168404Spjd#include <sys/kmem.h> 32168404Spjd#include <sys/acl.h> 33168404Spjd#include <sys/vnode.h> 34168404Spjd#include <sys/vfs.h> 35168404Spjd#include <sys/mntent.h> 36168404Spjd#include <sys/mount.h> 37168404Spjd#include <sys/cmn_err.h> 38168404Spjd#include <sys/zfs_znode.h> 39168404Spjd#include <sys/zfs_dir.h> 40168404Spjd#include <sys/zil.h> 41168404Spjd#include <sys/fs/zfs.h> 42168404Spjd#include <sys/dmu.h> 43168404Spjd#include <sys/dsl_prop.h> 44168404Spjd#include <sys/dsl_dataset.h> 45185029Spjd#include <sys/dsl_deleg.h> 46168404Spjd#include <sys/spa.h> 47168404Spjd#include <sys/zap.h> 48168404Spjd#include <sys/varargs.h> 49168962Spjd#include <sys/policy.h> 50168404Spjd#include <sys/atomic.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/zfs_ctldir.h> 53185029Spjd#include <sys/zfs_fuid.h> 54168962Spjd#include <sys/sunddi.h> 55168404Spjd#include <sys/dnlc.h> 56185029Spjd#include <sys/dmu_objset.h> 57185029Spjd#include <sys/spa_boot.h> 58185029Spjd#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 59168404Spjd 60168404Spjdstruct mtx zfs_debug_mtx; 61168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62185029Spjd 63168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 64185029Spjd 65185029Spjdint zfs_super_owner = 0; 66185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 67185029Spjd "File system owner can perform privileged operation on his file systems"); 68185029Spjd 69168404Spjdint zfs_debug_level = 0; 70168713SpjdTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 71168404SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 72168404Spjd "Debug level"); 73168404Spjd 74185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 75185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION; 76185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 77185029Spjd "ZFS_ACL_VERSION"); 78185029Spjdstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 79185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 80185029Spjd &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 81185029Spjdstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 82185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 83185029Spjd &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 84185029Spjdstatic int zfs_version_spa = SPA_VERSION; 85185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 86185029Spjd "SPA_VERSION"); 87185029Spjdstatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 88185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 89185029Spjd &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90185029Spjdstatic int zfs_version_zpl = ZPL_VERSION; 91185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 92185029Spjd "ZPL_VERSION"); 93185029Spjd 94191990Sattiliostatic int zfs_mount(vfs_t *vfsp); 95191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag); 96191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 97191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 98168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 99191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor); 100196982Spjdstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 101196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors); 102168404Spjdstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 103168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 104168404Spjdstatic void zfs_freevfs(vfs_t *vfsp); 105168404Spjd 106168404Spjdstatic struct vfsops zfs_vfsops = { 107168404Spjd .vfs_mount = zfs_mount, 108168404Spjd .vfs_unmount = zfs_umount, 109168404Spjd .vfs_root = zfs_root, 110168404Spjd .vfs_statfs = zfs_statfs, 111168404Spjd .vfs_vget = zfs_vget, 112168404Spjd .vfs_sync = zfs_sync, 113196982Spjd .vfs_checkexp = zfs_checkexp, 114168404Spjd .vfs_fhtovp = zfs_fhtovp, 115168404Spjd}; 116168404Spjd 117185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 118168404Spjd 119168404Spjd/* 120168404Spjd * We need to keep a count of active fs's. 121168404Spjd * This is necessary to prevent our module 122168404Spjd * from being unloaded after a umount -f 123168404Spjd */ 124168404Spjdstatic uint32_t zfs_active_fs_count = 0; 125168404Spjd 126168404Spjd/*ARGSUSED*/ 127168404Spjdstatic int 128191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor) 129168404Spjd{ 130168404Spjd 131168404Spjd /* 132168404Spjd * Data integrity is job one. We don't want a compromised kernel 133168404Spjd * writing to the storage pool, so we never sync during panic. 134168404Spjd */ 135168404Spjd if (panicstr) 136168404Spjd return (0); 137168404Spjd 138168404Spjd if (vfsp != NULL) { 139168404Spjd /* 140168404Spjd * Sync a specific filesystem. 141168404Spjd */ 142168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 143168404Spjd int error; 144168404Spjd 145191990Sattilio error = vfs_stdsync(vfsp, waitfor); 146168404Spjd if (error != 0) 147168404Spjd return (error); 148168404Spjd 149168404Spjd ZFS_ENTER(zfsvfs); 150168404Spjd if (zfsvfs->z_log != NULL) 151168404Spjd zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152168404Spjd else 153168404Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154168404Spjd ZFS_EXIT(zfsvfs); 155168404Spjd } else { 156168404Spjd /* 157168404Spjd * Sync all ZFS filesystems. This is what happens when you 158168404Spjd * run sync(1M). Unlike other filesystems, ZFS honors the 159168404Spjd * request by waiting for all pools to commit all dirty data. 160168404Spjd */ 161168404Spjd spa_sync_allpools(); 162168404Spjd } 163168404Spjd 164168404Spjd return (0); 165168404Spjd} 166168404Spjd 167168404Spjdstatic void 168168404Spjdatime_changed_cb(void *arg, uint64_t newval) 169168404Spjd{ 170168404Spjd zfsvfs_t *zfsvfs = arg; 171168404Spjd 172168404Spjd if (newval == TRUE) { 173168404Spjd zfsvfs->z_atime = TRUE; 174168404Spjd zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 175168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 176168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 177168404Spjd } else { 178168404Spjd zfsvfs->z_atime = FALSE; 179168404Spjd zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 180168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 181168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 182168404Spjd } 183168404Spjd} 184168404Spjd 185168404Spjdstatic void 186168404Spjdxattr_changed_cb(void *arg, uint64_t newval) 187168404Spjd{ 188168404Spjd zfsvfs_t *zfsvfs = arg; 189168404Spjd 190168404Spjd if (newval == TRUE) { 191168404Spjd /* XXX locking on vfs_flag? */ 192168404Spjd#ifdef TODO 193168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 194168404Spjd#endif 195168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 196168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 197168404Spjd } else { 198168404Spjd /* XXX locking on vfs_flag? */ 199168404Spjd#ifdef TODO 200168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 201168404Spjd#endif 202168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 203168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 204168404Spjd } 205168404Spjd} 206168404Spjd 207168404Spjdstatic void 208168404Spjdblksz_changed_cb(void *arg, uint64_t newval) 209168404Spjd{ 210168404Spjd zfsvfs_t *zfsvfs = arg; 211168404Spjd 212168404Spjd if (newval < SPA_MINBLOCKSIZE || 213168404Spjd newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 214168404Spjd newval = SPA_MAXBLOCKSIZE; 215168404Spjd 216168404Spjd zfsvfs->z_max_blksz = newval; 217168404Spjd zfsvfs->z_vfs->vfs_bsize = newval; 218168404Spjd} 219168404Spjd 220168404Spjdstatic void 221168404Spjdreadonly_changed_cb(void *arg, uint64_t newval) 222168404Spjd{ 223168404Spjd zfsvfs_t *zfsvfs = arg; 224168404Spjd 225168404Spjd if (newval) { 226168404Spjd /* XXX locking on vfs_flag? */ 227168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 228168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 229168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 230168404Spjd } else { 231168404Spjd /* XXX locking on vfs_flag? */ 232168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 233168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 234168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 235168404Spjd } 236168404Spjd} 237168404Spjd 238168404Spjdstatic void 239168404Spjdsetuid_changed_cb(void *arg, uint64_t newval) 240168404Spjd{ 241168404Spjd zfsvfs_t *zfsvfs = arg; 242168404Spjd 243168404Spjd if (newval == FALSE) { 244168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 245168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 246168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 247168404Spjd } else { 248168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 249168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 250168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 251168404Spjd } 252168404Spjd} 253168404Spjd 254168404Spjdstatic void 255168404Spjdexec_changed_cb(void *arg, uint64_t newval) 256168404Spjd{ 257168404Spjd zfsvfs_t *zfsvfs = arg; 258168404Spjd 259168404Spjd if (newval == FALSE) { 260168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 261168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 262168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 263168404Spjd } else { 264168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 265168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 266168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 267168404Spjd } 268168404Spjd} 269168404Spjd 270185029Spjd/* 271185029Spjd * The nbmand mount option can be changed at mount time. 272185029Spjd * We can't allow it to be toggled on live file systems or incorrect 273185029Spjd * behavior may be seen from cifs clients 274185029Spjd * 275185029Spjd * This property isn't registered via dsl_prop_register(), but this callback 276185029Spjd * will be called when a file system is first mounted 277185029Spjd */ 278168404Spjdstatic void 279185029Spjdnbmand_changed_cb(void *arg, uint64_t newval) 280185029Spjd{ 281185029Spjd zfsvfs_t *zfsvfs = arg; 282185029Spjd if (newval == FALSE) { 283185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 284185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 285185029Spjd } else { 286185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 287185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 288185029Spjd } 289185029Spjd} 290185029Spjd 291185029Spjdstatic void 292168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval) 293168404Spjd{ 294168404Spjd zfsvfs_t *zfsvfs = arg; 295168404Spjd 296168404Spjd zfsvfs->z_show_ctldir = newval; 297168404Spjd} 298168404Spjd 299168404Spjdstatic void 300185029Spjdvscan_changed_cb(void *arg, uint64_t newval) 301185029Spjd{ 302185029Spjd zfsvfs_t *zfsvfs = arg; 303185029Spjd 304185029Spjd zfsvfs->z_vscan = newval; 305185029Spjd} 306185029Spjd 307185029Spjdstatic void 308168404Spjdacl_mode_changed_cb(void *arg, uint64_t newval) 309168404Spjd{ 310168404Spjd zfsvfs_t *zfsvfs = arg; 311168404Spjd 312168404Spjd zfsvfs->z_acl_mode = newval; 313168404Spjd} 314168404Spjd 315168404Spjdstatic void 316168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval) 317168404Spjd{ 318168404Spjd zfsvfs_t *zfsvfs = arg; 319168404Spjd 320168404Spjd zfsvfs->z_acl_inherit = newval; 321168404Spjd} 322168404Spjd 323168404Spjdstatic int 324168404Spjdzfs_register_callbacks(vfs_t *vfsp) 325168404Spjd{ 326168404Spjd struct dsl_dataset *ds = NULL; 327168404Spjd objset_t *os = NULL; 328168404Spjd zfsvfs_t *zfsvfs = NULL; 329185029Spjd uint64_t nbmand; 330168404Spjd int readonly, do_readonly = FALSE; 331168404Spjd int setuid, do_setuid = FALSE; 332168404Spjd int exec, do_exec = FALSE; 333168404Spjd int xattr, do_xattr = FALSE; 334185029Spjd int atime, do_atime = FALSE; 335168404Spjd int error = 0; 336168404Spjd 337168404Spjd ASSERT(vfsp); 338168404Spjd zfsvfs = vfsp->vfs_data; 339168404Spjd ASSERT(zfsvfs); 340168404Spjd os = zfsvfs->z_os; 341168404Spjd 342168404Spjd /* 343196965Spjd * This function can be called for a snapshot when we update snapshot's 344196965Spjd * mount point, which isn't really supported. 345196965Spjd */ 346196965Spjd if (dmu_objset_is_snapshot(os)) 347196965Spjd return (EOPNOTSUPP); 348196965Spjd 349196965Spjd /* 350168404Spjd * The act of registering our callbacks will destroy any mount 351168404Spjd * options we may have. In order to enable temporary overrides 352168404Spjd * of mount options, we stash away the current values and 353168404Spjd * restore them after we register the callbacks. 354168404Spjd */ 355168404Spjd if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 356168404Spjd readonly = B_TRUE; 357168404Spjd do_readonly = B_TRUE; 358168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 359168404Spjd readonly = B_FALSE; 360168404Spjd do_readonly = B_TRUE; 361168404Spjd } 362168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 363168404Spjd setuid = B_FALSE; 364168404Spjd do_setuid = B_TRUE; 365168404Spjd } else { 366168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 367168404Spjd setuid = B_FALSE; 368168404Spjd do_setuid = B_TRUE; 369168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 370168404Spjd setuid = B_TRUE; 371168404Spjd do_setuid = B_TRUE; 372168404Spjd } 373168404Spjd } 374168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 375168404Spjd exec = B_FALSE; 376168404Spjd do_exec = B_TRUE; 377168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 378168404Spjd exec = B_TRUE; 379168404Spjd do_exec = B_TRUE; 380168404Spjd } 381168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 382168404Spjd xattr = B_FALSE; 383168404Spjd do_xattr = B_TRUE; 384168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 385168404Spjd xattr = B_TRUE; 386168404Spjd do_xattr = B_TRUE; 387168404Spjd } 388185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 389185029Spjd atime = B_FALSE; 390185029Spjd do_atime = B_TRUE; 391185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 392185029Spjd atime = B_TRUE; 393185029Spjd do_atime = B_TRUE; 394185029Spjd } 395168404Spjd 396168404Spjd /* 397185029Spjd * nbmand is a special property. It can only be changed at 398185029Spjd * mount time. 399185029Spjd * 400185029Spjd * This is weird, but it is documented to only be changeable 401185029Spjd * at mount time. 402185029Spjd */ 403185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 404185029Spjd nbmand = B_FALSE; 405185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 406185029Spjd nbmand = B_TRUE; 407185029Spjd } else { 408185029Spjd char osname[MAXNAMELEN]; 409185029Spjd 410185029Spjd dmu_objset_name(os, osname); 411185029Spjd if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 412185029Spjd NULL)) { 413185029Spjd return (error); 414185029Spjd } 415185029Spjd } 416185029Spjd 417185029Spjd /* 418168404Spjd * Register property callbacks. 419168404Spjd * 420168404Spjd * It would probably be fine to just check for i/o error from 421168404Spjd * the first prop_register(), but I guess I like to go 422168404Spjd * overboard... 423168404Spjd */ 424168404Spjd ds = dmu_objset_ds(os); 425168404Spjd error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 426168404Spjd error = error ? error : dsl_prop_register(ds, 427168404Spjd "xattr", xattr_changed_cb, zfsvfs); 428168404Spjd error = error ? error : dsl_prop_register(ds, 429168404Spjd "recordsize", blksz_changed_cb, zfsvfs); 430168404Spjd error = error ? error : dsl_prop_register(ds, 431168404Spjd "readonly", readonly_changed_cb, zfsvfs); 432168404Spjd error = error ? error : dsl_prop_register(ds, 433168404Spjd "setuid", setuid_changed_cb, zfsvfs); 434168404Spjd error = error ? error : dsl_prop_register(ds, 435168404Spjd "exec", exec_changed_cb, zfsvfs); 436168404Spjd error = error ? error : dsl_prop_register(ds, 437168404Spjd "snapdir", snapdir_changed_cb, zfsvfs); 438168404Spjd error = error ? error : dsl_prop_register(ds, 439168404Spjd "aclmode", acl_mode_changed_cb, zfsvfs); 440168404Spjd error = error ? error : dsl_prop_register(ds, 441168404Spjd "aclinherit", acl_inherit_changed_cb, zfsvfs); 442185029Spjd error = error ? error : dsl_prop_register(ds, 443185029Spjd "vscan", vscan_changed_cb, zfsvfs); 444168404Spjd if (error) 445168404Spjd goto unregister; 446168404Spjd 447168404Spjd /* 448168404Spjd * Invoke our callbacks to restore temporary mount options. 449168404Spjd */ 450168404Spjd if (do_readonly) 451168404Spjd readonly_changed_cb(zfsvfs, readonly); 452168404Spjd if (do_setuid) 453168404Spjd setuid_changed_cb(zfsvfs, setuid); 454168404Spjd if (do_exec) 455168404Spjd exec_changed_cb(zfsvfs, exec); 456168404Spjd if (do_xattr) 457168404Spjd xattr_changed_cb(zfsvfs, xattr); 458185029Spjd if (do_atime) 459185029Spjd atime_changed_cb(zfsvfs, atime); 460168404Spjd 461185029Spjd nbmand_changed_cb(zfsvfs, nbmand); 462185029Spjd 463168404Spjd return (0); 464168404Spjd 465168404Spjdunregister: 466168404Spjd /* 467168404Spjd * We may attempt to unregister some callbacks that are not 468168404Spjd * registered, but this is OK; it will simply return ENOMSG, 469168404Spjd * which we will ignore. 470168404Spjd */ 471168404Spjd (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 472168404Spjd (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 473168404Spjd (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 474168404Spjd (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 475168404Spjd (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 476168404Spjd (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 477168404Spjd (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 478168404Spjd (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 479168404Spjd (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 480168404Spjd zfsvfs); 481185029Spjd (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 482168404Spjd return (error); 483168404Spjd 484168404Spjd} 485168404Spjd 486168404Spjdstatic int 487185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 488168404Spjd{ 489185029Spjd int error; 490185029Spjd 491185029Spjd error = zfs_register_callbacks(zfsvfs->z_vfs); 492185029Spjd if (error) 493185029Spjd return (error); 494185029Spjd 495185029Spjd /* 496185029Spjd * Set the objset user_ptr to track its zfsvfs. 497185029Spjd */ 498185029Spjd mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 499185029Spjd dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 500185029Spjd mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 501185029Spjd 502185029Spjd /* 503185029Spjd * If we are not mounting (ie: online recv), then we don't 504185029Spjd * have to worry about replaying the log as we blocked all 505185029Spjd * operations out since we closed the ZIL. 506185029Spjd */ 507185029Spjd if (mounting) { 508185029Spjd boolean_t readonly; 509185029Spjd 510185029Spjd /* 511185029Spjd * During replay we remove the read only flag to 512185029Spjd * allow replays to succeed. 513185029Spjd */ 514185029Spjd readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 515185029Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 516185029Spjd 517185029Spjd /* 518185029Spjd * Parse and replay the intent log. 519185029Spjd */ 520185029Spjd zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 521185029Spjd zfs_replay_vector, zfs_unlinked_drain); 522185029Spjd 523185029Spjd zfs_unlinked_drain(zfsvfs); 524185029Spjd zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 525185029Spjd } 526185029Spjd 527185029Spjd if (!zil_disable) 528185029Spjd zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 529185029Spjd 530185029Spjd return (0); 531185029Spjd} 532185029Spjd 533185029Spjdstatic void 534185029Spjdzfs_freezfsvfs(zfsvfs_t *zfsvfs) 535185029Spjd{ 536185029Spjd mutex_destroy(&zfsvfs->z_znodes_lock); 537185029Spjd mutex_destroy(&zfsvfs->z_online_recv_lock); 538185029Spjd list_destroy(&zfsvfs->z_all_znodes); 539185029Spjd rrw_destroy(&zfsvfs->z_teardown_lock); 540185029Spjd rw_destroy(&zfsvfs->z_teardown_inactive_lock); 541185029Spjd rw_destroy(&zfsvfs->z_fuid_lock); 542185029Spjd kmem_free(zfsvfs, sizeof (zfsvfs_t)); 543185029Spjd} 544185029Spjd 545185029Spjdstatic int 546185029Spjdzfs_domount(vfs_t *vfsp, char *osname) 547185029Spjd{ 548168404Spjd uint64_t recordsize, readonly; 549168404Spjd int error = 0; 550168404Spjd int mode; 551168404Spjd zfsvfs_t *zfsvfs; 552168404Spjd znode_t *zp = NULL; 553168404Spjd 554168404Spjd ASSERT(vfsp); 555168404Spjd ASSERT(osname); 556168404Spjd 557168404Spjd /* 558168404Spjd * Initialize the zfs-specific filesystem structure. 559168404Spjd * Should probably make this a kmem cache, shuffle fields, 560168404Spjd * and just bzero up to z_hold_mtx[]. 561168404Spjd */ 562168404Spjd zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 563168404Spjd zfsvfs->z_vfs = vfsp; 564168404Spjd zfsvfs->z_parent = zfsvfs; 565168404Spjd zfsvfs->z_assign = TXG_NOWAIT; 566168404Spjd zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 567168404Spjd zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 568168404Spjd 569168404Spjd mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 570185029Spjd mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 571168404Spjd list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 572168404Spjd offsetof(znode_t, z_link_node)); 573185029Spjd rrw_init(&zfsvfs->z_teardown_lock); 574185029Spjd rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 575185029Spjd rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 576168404Spjd 577168404Spjd if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 578168404Spjd NULL)) 579168404Spjd goto out; 580168404Spjd zfsvfs->z_vfs->vfs_bsize = recordsize; 581168404Spjd 582168404Spjd vfsp->vfs_data = zfsvfs; 583168404Spjd vfsp->mnt_flag |= MNT_LOCAL; 584168404Spjd vfsp->mnt_kern_flag |= MNTK_MPSAFE; 585168404Spjd vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 586193440Sps vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 587168404Spjd 588168404Spjd if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 589168404Spjd goto out; 590168404Spjd 591185029Spjd mode = DS_MODE_OWNER; 592168404Spjd if (readonly) 593185029Spjd mode |= DS_MODE_READONLY; 594168404Spjd 595168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 596168404Spjd if (error == EROFS) { 597185029Spjd mode = DS_MODE_OWNER | DS_MODE_READONLY; 598168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 599168404Spjd &zfsvfs->z_os); 600168404Spjd } 601168404Spjd 602168404Spjd if (error) 603168404Spjd goto out; 604168404Spjd 605185029Spjd if (error = zfs_init_fs(zfsvfs, &zp)) 606168404Spjd goto out; 607168404Spjd 608185029Spjd /* 609185029Spjd * Set features for file system. 610185029Spjd */ 611185029Spjd zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 612185029Spjd if (zfsvfs->z_use_fuids) { 613185029Spjd vfs_set_feature(vfsp, VFSFT_XVATTR); 614185029Spjd vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 615185029Spjd vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 616185029Spjd vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 617185029Spjd } 618185029Spjd if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 619185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 620185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 621185029Spjd vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 622185029Spjd } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 623185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 624185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 625185029Spjd } 626185029Spjd 627168404Spjd if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 628185029Spjd uint64_t pval; 629168404Spjd 630168404Spjd ASSERT(mode & DS_MODE_READONLY); 631168404Spjd atime_changed_cb(zfsvfs, B_FALSE); 632168404Spjd readonly_changed_cb(zfsvfs, B_TRUE); 633185029Spjd if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 634168404Spjd goto out; 635185029Spjd xattr_changed_cb(zfsvfs, pval); 636168404Spjd zfsvfs->z_issnap = B_TRUE; 637168404Spjd } else { 638185029Spjd error = zfsvfs_setup(zfsvfs, B_TRUE); 639168404Spjd } 640168404Spjd 641168404Spjd vfs_mountedfrom(vfsp, osname); 642168404Spjd 643168404Spjd if (!zfsvfs->z_issnap) 644168404Spjd zfsctl_create(zfsvfs); 645168404Spjdout: 646168404Spjd if (error) { 647168404Spjd if (zfsvfs->z_os) 648168404Spjd dmu_objset_close(zfsvfs->z_os); 649185029Spjd zfs_freezfsvfs(zfsvfs); 650168404Spjd } else { 651168404Spjd atomic_add_32(&zfs_active_fs_count, 1); 652168404Spjd } 653168404Spjd 654168404Spjd return (error); 655168404Spjd} 656168404Spjd 657168404Spjdvoid 658168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs) 659168404Spjd{ 660168404Spjd objset_t *os = zfsvfs->z_os; 661168404Spjd struct dsl_dataset *ds; 662168404Spjd 663168404Spjd /* 664168404Spjd * Unregister properties. 665168404Spjd */ 666168404Spjd if (!dmu_objset_is_snapshot(os)) { 667168404Spjd ds = dmu_objset_ds(os); 668168404Spjd VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 669168404Spjd zfsvfs) == 0); 670168404Spjd 671168404Spjd VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 672168404Spjd zfsvfs) == 0); 673168404Spjd 674168404Spjd VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 675168404Spjd zfsvfs) == 0); 676168404Spjd 677168404Spjd VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 678168404Spjd zfsvfs) == 0); 679168404Spjd 680168404Spjd VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 681168404Spjd zfsvfs) == 0); 682168404Spjd 683168404Spjd VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 684168404Spjd zfsvfs) == 0); 685168404Spjd 686168404Spjd VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 687168404Spjd zfsvfs) == 0); 688168404Spjd 689168404Spjd VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 690168404Spjd zfsvfs) == 0); 691168404Spjd 692168404Spjd VERIFY(dsl_prop_unregister(ds, "aclinherit", 693168404Spjd acl_inherit_changed_cb, zfsvfs) == 0); 694185029Spjd 695185029Spjd VERIFY(dsl_prop_unregister(ds, "vscan", 696185029Spjd vscan_changed_cb, zfsvfs) == 0); 697168404Spjd } 698168404Spjd} 699168404Spjd 700168404Spjd/*ARGSUSED*/ 701168404Spjdstatic int 702191990Sattiliozfs_mount(vfs_t *vfsp) 703168404Spjd{ 704191990Sattilio kthread_t *td = curthread; 705185029Spjd vnode_t *mvp = vfsp->mnt_vnodecovered; 706185029Spjd cred_t *cr = td->td_ucred; 707185029Spjd char *osname; 708185029Spjd int error = 0; 709185029Spjd int canwrite; 710168404Spjd 711185029Spjd if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 712185029Spjd return (EINVAL); 713185029Spjd 714168404Spjd /* 715185029Spjd * If full-owner-access is enabled and delegated administration is 716185029Spjd * turned on, we must set nosuid. 717185029Spjd */ 718185029Spjd if (zfs_super_owner && 719185029Spjd dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 720185029Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 721185029Spjd } 722185029Spjd 723185029Spjd /* 724185029Spjd * Check for mount privilege? 725185029Spjd * 726185029Spjd * If we don't have privilege then see if 727185029Spjd * we have local permission to allow it 728185029Spjd */ 729185029Spjd error = secpolicy_fs_mount(cr, mvp, vfsp); 730185029Spjd if (error) { 731185029Spjd error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 732196944Spjd if (error != 0) 733196944Spjd goto out; 734196944Spjd 735196944Spjd if (!(vfsp->vfs_flag & MS_REMOUNT)) { 736185029Spjd vattr_t vattr; 737185029Spjd 738185029Spjd /* 739185029Spjd * Make sure user is the owner of the mount point 740185029Spjd * or has sufficient privileges. 741185029Spjd */ 742185029Spjd 743185029Spjd vattr.va_mask = AT_UID; 744185029Spjd 745196662Spjd vn_lock(mvp, LK_SHARED | LK_RETRY); 746185029Spjd if (error = VOP_GETATTR(mvp, &vattr, cr)) { 747196662Spjd VOP_UNLOCK(mvp, 0); 748185029Spjd goto out; 749185029Spjd } 750185029Spjd 751185029Spjd#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 752185029Spjd if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 753185029Spjd VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 754185029Spjd error = EPERM; 755185029Spjd goto out; 756185029Spjd } 757185029Spjd#else 758185029Spjd if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 759196662Spjd VOP_UNLOCK(mvp, 0); 760185029Spjd goto out; 761185029Spjd } 762185029Spjd 763185029Spjd if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 764196662Spjd VOP_UNLOCK(mvp, 0); 765185029Spjd goto out; 766185029Spjd } 767196662Spjd VOP_UNLOCK(mvp, 0); 768185029Spjd#endif 769196944Spjd } 770185029Spjd 771196944Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 772185029Spjd } 773185029Spjd 774185029Spjd /* 775185029Spjd * Refuse to mount a filesystem if we are in a local zone and the 776185029Spjd * dataset is not visible. 777185029Spjd */ 778185029Spjd if (!INGLOBALZONE(curthread) && 779185029Spjd (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 780185029Spjd error = EPERM; 781185029Spjd goto out; 782185029Spjd } 783185029Spjd 784185029Spjd /* 785168404Spjd * When doing a remount, we simply refresh our temporary properties 786168404Spjd * according to those options set in the current VFS options. 787168404Spjd */ 788185029Spjd if (vfsp->vfs_flag & MS_REMOUNT) { 789185029Spjd /* refresh mount options */ 790185029Spjd zfs_unregister_callbacks(vfsp->vfs_data); 791185029Spjd error = zfs_register_callbacks(vfsp); 792185029Spjd goto out; 793185029Spjd } 794168404Spjd 795168510Spjd DROP_GIANT(); 796185029Spjd error = zfs_domount(vfsp, osname); 797168510Spjd PICKUP_GIANT(); 798185029Spjdout: 799168510Spjd return (error); 800168404Spjd} 801168404Spjd 802168404Spjdstatic int 803191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp) 804169170Spjd{ 805168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 806168404Spjd uint64_t refdbytes, availbytes, usedobjs, availobjs; 807168404Spjd 808168404Spjd statp->f_version = STATFS_VERSION; 809168404Spjd 810168404Spjd ZFS_ENTER(zfsvfs); 811168404Spjd 812168404Spjd dmu_objset_space(zfsvfs->z_os, 813168404Spjd &refdbytes, &availbytes, &usedobjs, &availobjs); 814168404Spjd 815168404Spjd /* 816168404Spjd * The underlying storage pool actually uses multiple block sizes. 817168404Spjd * We report the fragsize as the smallest block size we support, 818168404Spjd * and we report our blocksize as the filesystem's maximum blocksize. 819168404Spjd */ 820168404Spjd statp->f_bsize = zfsvfs->z_vfs->vfs_bsize; 821168404Spjd statp->f_iosize = zfsvfs->z_vfs->vfs_bsize; 822168404Spjd 823168404Spjd /* 824168404Spjd * The following report "total" blocks of various kinds in the 825168404Spjd * file system, but reported in terms of f_frsize - the 826168404Spjd * "fragment" size. 827168404Spjd */ 828168404Spjd 829168404Spjd statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize; 830168404Spjd statp->f_bfree = availbytes / statp->f_bsize; 831168404Spjd statp->f_bavail = statp->f_bfree; /* no root reservation */ 832168404Spjd 833168404Spjd /* 834168404Spjd * statvfs() should really be called statufs(), because it assumes 835168404Spjd * static metadata. ZFS doesn't preallocate files, so the best 836168404Spjd * we can do is report the max that could possibly fit in f_files, 837168404Spjd * and that minus the number actually used in f_ffree. 838168404Spjd * For f_ffree, report the smaller of the number of object available 839168404Spjd * and the number of blocks (each object will take at least a block). 840168404Spjd */ 841168404Spjd statp->f_ffree = MIN(availobjs, statp->f_bfree); 842168404Spjd statp->f_files = statp->f_ffree + usedobjs; 843168404Spjd 844168404Spjd /* 845168404Spjd * We're a zfs filesystem. 846168404Spjd */ 847168404Spjd (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 848168404Spjd 849168404Spjd strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 850168404Spjd sizeof(statp->f_mntfromname)); 851168404Spjd strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 852168404Spjd sizeof(statp->f_mntonname)); 853168404Spjd 854168404Spjd statp->f_namemax = ZFS_MAXNAMELEN; 855168404Spjd 856168404Spjd ZFS_EXIT(zfsvfs); 857168404Spjd return (0); 858168404Spjd} 859168404Spjd 860168404Spjdstatic int 861191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 862168404Spjd{ 863168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 864168404Spjd znode_t *rootzp; 865168404Spjd int error; 866168404Spjd 867168404Spjd ZFS_ENTER(zfsvfs); 868168404Spjd 869168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 870168404Spjd if (error == 0) { 871168404Spjd *vpp = ZTOV(rootzp); 872175202Sattilio error = vn_lock(*vpp, flags); 873168404Spjd (*vpp)->v_vflag |= VV_ROOT; 874168404Spjd } 875168404Spjd 876168404Spjd ZFS_EXIT(zfsvfs); 877168404Spjd return (error); 878168404Spjd} 879168404Spjd 880185029Spjd/* 881185029Spjd * Teardown the zfsvfs::z_os. 882185029Spjd * 883185029Spjd * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 884185029Spjd * and 'z_teardown_inactive_lock' held. 885185029Spjd */ 886185029Spjdstatic int 887185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 888185029Spjd{ 889185029Spjd znode_t *zp; 890185029Spjd 891185029Spjd rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 892185029Spjd 893185029Spjd if (!unmounting) { 894185029Spjd /* 895185029Spjd * We purge the parent filesystem's vfsp as the parent 896185029Spjd * filesystem and all of its snapshots have their vnode's 897185029Spjd * v_vfsp set to the parent's filesystem's vfsp. Note, 898185029Spjd * 'z_parent' is self referential for non-snapshots. 899185029Spjd */ 900185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 901185029Spjd } 902185029Spjd 903185029Spjd /* 904185029Spjd * Close the zil. NB: Can't close the zil while zfs_inactive 905185029Spjd * threads are blocked as zil_close can call zfs_inactive. 906185029Spjd */ 907185029Spjd if (zfsvfs->z_log) { 908185029Spjd zil_close(zfsvfs->z_log); 909185029Spjd zfsvfs->z_log = NULL; 910185029Spjd } 911185029Spjd 912185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 913185029Spjd 914185029Spjd /* 915185029Spjd * If we are not unmounting (ie: online recv) and someone already 916185029Spjd * unmounted this file system while we were doing the switcheroo, 917185029Spjd * or a reopen of z_os failed then just bail out now. 918185029Spjd */ 919185029Spjd if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 920185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 921185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 922185029Spjd return (EIO); 923185029Spjd } 924185029Spjd 925185029Spjd /* 926185029Spjd * At this point there are no vops active, and any new vops will 927185029Spjd * fail with EIO since we have z_teardown_lock for writer (only 928185029Spjd * relavent for forced unmount). 929185029Spjd * 930185029Spjd * Release all holds on dbufs. 931185029Spjd */ 932185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 933185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 934185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) 935185029Spjd if (zp->z_dbuf) { 936196297Spjd ASSERT(ZTOV(zp)->v_count >= 0); 937185029Spjd zfs_znode_dmu_fini(zp); 938185029Spjd } 939185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 940185029Spjd 941185029Spjd /* 942185029Spjd * If we are unmounting, set the unmounted flag and let new vops 943185029Spjd * unblock. zfs_inactive will have the unmounted behavior, and all 944185029Spjd * other vops will fail with EIO. 945185029Spjd */ 946185029Spjd if (unmounting) { 947185029Spjd zfsvfs->z_unmounted = B_TRUE; 948185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 949185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 950197133Spjd 951197133Spjd#ifdef __FreeBSD__ 952197133Spjd /* 953197133Spjd * Some znodes might not be fully reclaimed, wait for them. 954197133Spjd */ 955197133Spjd mutex_enter(&zfsvfs->z_znodes_lock); 956197133Spjd while (list_head(&zfsvfs->z_all_znodes) != NULL) { 957197133Spjd msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 958197133Spjd "zteardown", 0); 959197133Spjd } 960197133Spjd mutex_exit(&zfsvfs->z_znodes_lock); 961197133Spjd#endif 962185029Spjd } 963185029Spjd 964185029Spjd /* 965185029Spjd * z_os will be NULL if there was an error in attempting to reopen 966185029Spjd * zfsvfs, so just return as the properties had already been 967185029Spjd * unregistered and cached data had been evicted before. 968185029Spjd */ 969185029Spjd if (zfsvfs->z_os == NULL) 970185029Spjd return (0); 971185029Spjd 972185029Spjd /* 973185029Spjd * Unregister properties. 974185029Spjd */ 975185029Spjd zfs_unregister_callbacks(zfsvfs); 976185029Spjd 977185029Spjd /* 978185029Spjd * Evict cached data 979185029Spjd */ 980185029Spjd if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 981185029Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 982185029Spjd (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 983185029Spjd } 984185029Spjd 985185029Spjd return (0); 986185029Spjd} 987185029Spjd 988168404Spjd/*ARGSUSED*/ 989168404Spjdstatic int 990191990Sattiliozfs_umount(vfs_t *vfsp, int fflag) 991168404Spjd{ 992168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 993185029Spjd objset_t *os; 994191990Sattilio cred_t *cr = curthread->td_ucred; 995168404Spjd int ret; 996168404Spjd 997185029Spjd if (fflag & MS_FORCE) { 998185029Spjd /* TODO: Force unmount is not well implemented yet, so deny it. */ 999192211Skmacy ZFS_LOG(0, "Force unmount is experimental - report any problems."); 1000185029Spjd } 1001168404Spjd 1002185029Spjd ret = secpolicy_fs_unmount(cr, vfsp); 1003185029Spjd if (ret) { 1004185029Spjd ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1005185029Spjd ZFS_DELEG_PERM_MOUNT, cr); 1006185029Spjd if (ret) 1007185029Spjd return (ret); 1008185029Spjd } 1009185029Spjd /* 1010185029Spjd * We purge the parent filesystem's vfsp as the parent filesystem 1011185029Spjd * and all of its snapshots have their vnode's v_vfsp set to the 1012185029Spjd * parent's filesystem's vfsp. Note, 'z_parent' is self 1013185029Spjd * referential for non-snapshots. 1014185029Spjd */ 1015185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1016168404Spjd 1017168404Spjd /* 1018168404Spjd * Unmount any snapshots mounted under .zfs before unmounting the 1019168404Spjd * dataset itself. 1020168404Spjd */ 1021169170Spjd if (zfsvfs->z_ctldir != NULL) { 1022168404Spjd if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1023168404Spjd return (ret); 1024191990Sattilio ret = vflush(vfsp, 0, 0, curthread); 1025168404Spjd ASSERT(ret == EBUSY); 1026168404Spjd if (!(fflag & MS_FORCE)) { 1027168404Spjd if (zfsvfs->z_ctldir->v_count > 1) 1028168404Spjd return (EBUSY); 1029168404Spjd ASSERT(zfsvfs->z_ctldir->v_count == 1); 1030168404Spjd } 1031168404Spjd zfsctl_destroy(zfsvfs); 1032168404Spjd ASSERT(zfsvfs->z_ctldir == NULL); 1033168404Spjd } 1034168404Spjd 1035168404Spjd /* 1036168404Spjd * Flush all the files. 1037168404Spjd */ 1038191990Sattilio ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1039168404Spjd if (ret != 0) { 1040168404Spjd if (!zfsvfs->z_issnap) { 1041168404Spjd zfsctl_create(zfsvfs); 1042168404Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1043168404Spjd } 1044168404Spjd return (ret); 1045168404Spjd } 1046168404Spjd 1047185029Spjd if (!(fflag & MS_FORCE)) { 1048185029Spjd /* 1049185029Spjd * Check the number of active vnodes in the file system. 1050185029Spjd * Our count is maintained in the vfs structure, but the 1051185029Spjd * number is off by 1 to indicate a hold on the vfs 1052185029Spjd * structure itself. 1053185029Spjd * 1054185029Spjd * The '.zfs' directory maintains a reference of its 1055185029Spjd * own, and any active references underneath are 1056185029Spjd * reflected in the vnode count. 1057185029Spjd */ 1058185029Spjd if (zfsvfs->z_ctldir == NULL) { 1059185029Spjd if (vfsp->vfs_count > 1) 1060185029Spjd return (EBUSY); 1061185029Spjd } else { 1062185029Spjd if (vfsp->vfs_count > 2 || 1063185029Spjd zfsvfs->z_ctldir->v_count > 1) 1064185029Spjd return (EBUSY); 1065185029Spjd } 1066185029Spjd } else { 1067168404Spjd MNT_ILOCK(vfsp); 1068168404Spjd vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1069168404Spjd MNT_IUNLOCK(vfsp); 1070185029Spjd } 1071168404Spjd 1072185029Spjd VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1073185029Spjd os = zfsvfs->z_os; 1074185029Spjd 1075185029Spjd /* 1076185029Spjd * z_os will be NULL if there was an error in 1077185029Spjd * attempting to reopen zfsvfs. 1078185029Spjd */ 1079185029Spjd if (os != NULL) { 1080168404Spjd /* 1081185029Spjd * Unset the objset user_ptr. 1082168404Spjd */ 1083185029Spjd mutex_enter(&os->os->os_user_ptr_lock); 1084185029Spjd dmu_objset_set_user(os, NULL); 1085185029Spjd mutex_exit(&os->os->os_user_ptr_lock); 1086185029Spjd 1087185029Spjd /* 1088185029Spjd * Finally release the objset 1089185029Spjd */ 1090185029Spjd dmu_objset_close(os); 1091168404Spjd } 1092168404Spjd 1093185029Spjd /* 1094185029Spjd * We can now safely destroy the '.zfs' directory node. 1095185029Spjd */ 1096185029Spjd if (zfsvfs->z_ctldir != NULL) 1097185029Spjd zfsctl_destroy(zfsvfs); 1098185029Spjd if (zfsvfs->z_issnap) { 1099185029Spjd vnode_t *svp = vfsp->mnt_vnodecovered; 1100185029Spjd 1101192211Skmacy ASSERT(svp->v_count == 2 || svp->v_count == 1); 1102192211Skmacy if (svp->v_count == 2) 1103192211Skmacy VN_RELE(svp); 1104185029Spjd } 1105168404Spjd zfs_freevfs(vfsp); 1106168404Spjd 1107168404Spjd return (0); 1108168404Spjd} 1109168404Spjd 1110168404Spjdstatic int 1111168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1112168404Spjd{ 1113168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1114168404Spjd znode_t *zp; 1115168404Spjd int err; 1116168404Spjd 1117168404Spjd ZFS_ENTER(zfsvfs); 1118168404Spjd err = zfs_zget(zfsvfs, ino, &zp); 1119168404Spjd if (err == 0 && zp->z_unlinked) { 1120168404Spjd VN_RELE(ZTOV(zp)); 1121168404Spjd err = EINVAL; 1122168404Spjd } 1123168404Spjd if (err != 0) 1124168404Spjd *vpp = NULL; 1125168404Spjd else { 1126168404Spjd *vpp = ZTOV(zp); 1127175202Sattilio vn_lock(*vpp, flags); 1128168404Spjd } 1129168404Spjd ZFS_EXIT(zfsvfs); 1130171063Sdfr return (err); 1131168404Spjd} 1132168404Spjd 1133168404Spjdstatic int 1134196982Spjdzfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1135196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors) 1136196982Spjd{ 1137196982Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1138196982Spjd 1139196982Spjd /* 1140196982Spjd * If this is regular file system vfsp is the same as 1141196982Spjd * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1142196982Spjd * zfsvfs->z_parent->z_vfs represents parent file system 1143196982Spjd * which we have to use here, because only this file system 1144196982Spjd * has mnt_export configured. 1145196982Spjd */ 1146196982Spjd vfsp = zfsvfs->z_parent->z_vfs; 1147196982Spjd 1148196982Spjd return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1149196982Spjd credanonp, numsecflavors, secflavors)); 1150196982Spjd} 1151196982Spjd 1152197151SpjdCTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1153197151SpjdCTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1154196982Spjd 1155196982Spjdstatic int 1156168404Spjdzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1157168404Spjd{ 1158168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1159168404Spjd znode_t *zp; 1160168404Spjd uint64_t object = 0; 1161168404Spjd uint64_t fid_gen = 0; 1162168404Spjd uint64_t gen_mask; 1163168404Spjd uint64_t zp_gen; 1164168404Spjd int i, err; 1165168404Spjd 1166168404Spjd *vpp = NULL; 1167168404Spjd 1168168404Spjd ZFS_ENTER(zfsvfs); 1169168404Spjd 1170196979Spjd /* 1171196979Spjd * On FreeBSD we are already called with snapshot's mount point 1172196979Spjd * and not the mount point of its parent. 1173196979Spjd */ 1174196979Spjd#ifndef __FreeBSD__ 1175168404Spjd if (fidp->fid_len == LONG_FID_LEN) { 1176168404Spjd zfid_long_t *zlfid = (zfid_long_t *)fidp; 1177168404Spjd uint64_t objsetid = 0; 1178168404Spjd uint64_t setgen = 0; 1179168404Spjd 1180168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1181168404Spjd objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1182168404Spjd 1183168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1184168404Spjd setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1185168404Spjd 1186168404Spjd ZFS_EXIT(zfsvfs); 1187168404Spjd 1188168404Spjd err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1189168404Spjd if (err) 1190168404Spjd return (EINVAL); 1191168404Spjd ZFS_ENTER(zfsvfs); 1192168404Spjd } 1193196979Spjd#endif 1194168404Spjd 1195168404Spjd if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1196168404Spjd zfid_short_t *zfid = (zfid_short_t *)fidp; 1197168404Spjd 1198168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 1199168404Spjd object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1200168404Spjd 1201168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 1202168404Spjd fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1203168404Spjd } else { 1204168404Spjd ZFS_EXIT(zfsvfs); 1205168404Spjd return (EINVAL); 1206168404Spjd } 1207168404Spjd 1208168404Spjd /* A zero fid_gen means we are in the .zfs control directories */ 1209168404Spjd if (fid_gen == 0 && 1210168404Spjd (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1211168404Spjd *vpp = zfsvfs->z_ctldir; 1212168404Spjd ASSERT(*vpp != NULL); 1213168404Spjd if (object == ZFSCTL_INO_SNAPDIR) { 1214168404Spjd VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1215185029Spjd 0, NULL, NULL, NULL, NULL, NULL) == 0); 1216168404Spjd } else { 1217168404Spjd VN_HOLD(*vpp); 1218168404Spjd } 1219196978Spjd vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1220168404Spjd ZFS_EXIT(zfsvfs); 1221168404Spjd return (0); 1222168404Spjd } 1223168404Spjd 1224168404Spjd gen_mask = -1ULL >> (64 - 8 * i); 1225168404Spjd 1226168404Spjd dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1227168404Spjd if (err = zfs_zget(zfsvfs, object, &zp)) { 1228168404Spjd ZFS_EXIT(zfsvfs); 1229168404Spjd return (err); 1230168404Spjd } 1231168404Spjd zp_gen = zp->z_phys->zp_gen & gen_mask; 1232168404Spjd if (zp_gen == 0) 1233168404Spjd zp_gen = 1; 1234168404Spjd if (zp->z_unlinked || zp_gen != fid_gen) { 1235168404Spjd dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1236168404Spjd VN_RELE(ZTOV(zp)); 1237168404Spjd ZFS_EXIT(zfsvfs); 1238168404Spjd return (EINVAL); 1239168404Spjd } 1240168404Spjd 1241168404Spjd *vpp = ZTOV(zp); 1242175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1243185029Spjd vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1244168404Spjd ZFS_EXIT(zfsvfs); 1245168404Spjd return (0); 1246168404Spjd} 1247168404Spjd 1248185029Spjd/* 1249185029Spjd * Block out VOPs and close zfsvfs_t::z_os 1250185029Spjd * 1251185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and 1252185029Spjd * 'z_teardown_inactive_lock' write held. 1253185029Spjd */ 1254185029Spjdint 1255185029Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1256168404Spjd{ 1257185029Spjd int error; 1258168404Spjd 1259185029Spjd if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1260185029Spjd return (error); 1261168404Spjd 1262185029Spjd *mode = zfsvfs->z_os->os_mode; 1263185029Spjd dmu_objset_name(zfsvfs->z_os, name); 1264185029Spjd dmu_objset_close(zfsvfs->z_os); 1265168404Spjd 1266185029Spjd return (0); 1267185029Spjd} 1268168404Spjd 1269185029Spjd/* 1270185029Spjd * Reopen zfsvfs_t::z_os and release VOPs. 1271185029Spjd */ 1272185029Spjdint 1273185029Spjdzfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1274185029Spjd{ 1275185029Spjd int err; 1276168404Spjd 1277185029Spjd ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1278185029Spjd ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1279185029Spjd 1280185029Spjd err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1281185029Spjd if (err) { 1282185029Spjd zfsvfs->z_os = NULL; 1283185029Spjd } else { 1284185029Spjd znode_t *zp; 1285185029Spjd 1286185029Spjd VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1287185029Spjd 1288185029Spjd /* 1289185029Spjd * Attempt to re-establish all the active znodes with 1290185029Spjd * their dbufs. If a zfs_rezget() fails, then we'll let 1291185029Spjd * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1292185029Spjd * when they try to use their znode. 1293185029Spjd */ 1294185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 1295185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1296185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1297185029Spjd (void) zfs_rezget(zp); 1298185029Spjd } 1299185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 1300185029Spjd 1301168404Spjd } 1302168404Spjd 1303185029Spjd /* release the VOPs */ 1304185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 1305185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1306185029Spjd 1307185029Spjd if (err) { 1308185029Spjd /* 1309185029Spjd * Since we couldn't reopen zfsvfs::z_os, force 1310185029Spjd * unmount this file system. 1311185029Spjd */ 1312185029Spjd if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1313185029Spjd (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1314168404Spjd } 1315185029Spjd return (err); 1316168404Spjd} 1317168404Spjd 1318168404Spjdstatic void 1319168404Spjdzfs_freevfs(vfs_t *vfsp) 1320168404Spjd{ 1321168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1322168404Spjd int i; 1323168404Spjd 1324168404Spjd for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1325168404Spjd mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1326168404Spjd 1327185029Spjd zfs_fuid_destroy(zfsvfs); 1328185029Spjd zfs_freezfsvfs(zfsvfs); 1329185029Spjd 1330168404Spjd atomic_add_32(&zfs_active_fs_count, -1); 1331168404Spjd} 1332168404Spjd 1333172135Spjd#ifdef __i386__ 1334172135Spjdstatic int desiredvnodes_backup; 1335172135Spjd#endif 1336172135Spjd 1337172135Spjdstatic void 1338172135Spjdzfs_vnodes_adjust(void) 1339172135Spjd{ 1340172135Spjd#ifdef __i386__ 1341185029Spjd int newdesiredvnodes; 1342172135Spjd 1343172135Spjd desiredvnodes_backup = desiredvnodes; 1344172135Spjd 1345172135Spjd /* 1346172135Spjd * We calculate newdesiredvnodes the same way it is done in 1347172135Spjd * vntblinit(). If it is equal to desiredvnodes, it means that 1348172135Spjd * it wasn't tuned by the administrator and we can tune it down. 1349172135Spjd */ 1350185029Spjd newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1351185029Spjd vm_kmem_size / (5 * (sizeof(struct vm_object) + 1352185029Spjd sizeof(struct vnode)))); 1353185029Spjd if (newdesiredvnodes == desiredvnodes) 1354185029Spjd desiredvnodes = (3 * newdesiredvnodes) / 4; 1355172135Spjd#endif 1356172135Spjd} 1357172135Spjd 1358172135Spjdstatic void 1359172135Spjdzfs_vnodes_adjust_back(void) 1360172135Spjd{ 1361172135Spjd 1362172135Spjd#ifdef __i386__ 1363172135Spjd desiredvnodes = desiredvnodes_backup; 1364172135Spjd#endif 1365172135Spjd} 1366172135Spjd 1367168404Spjdvoid 1368168404Spjdzfs_init(void) 1369168404Spjd{ 1370168404Spjd 1371185029Spjd printf("ZFS filesystem version " SPA_VERSION_STRING "\n"); 1372168404Spjd 1373168404Spjd /* 1374185029Spjd * Initialize znode cache, vnode ops, etc... 1375168404Spjd */ 1376185029Spjd zfs_znode_init(); 1377168404Spjd 1378168404Spjd /* 1379185029Spjd * Initialize .zfs directory structures 1380168404Spjd */ 1381185029Spjd zfsctl_init(); 1382172135Spjd 1383172135Spjd /* 1384185029Spjd * Reduce number of vnode. Originally number of vnodes is calculated 1385172135Spjd * with UFS inode in mind. We reduce it here, because it's too big for 1386172135Spjd * ZFS/i386. 1387172135Spjd */ 1388172135Spjd zfs_vnodes_adjust(); 1389168404Spjd} 1390168404Spjd 1391168404Spjdvoid 1392168404Spjdzfs_fini(void) 1393168404Spjd{ 1394168404Spjd zfsctl_fini(); 1395168404Spjd zfs_znode_fini(); 1396172135Spjd zfs_vnodes_adjust_back(); 1397168404Spjd} 1398168404Spjd 1399168404Spjdint 1400168404Spjdzfs_busy(void) 1401168404Spjd{ 1402168404Spjd return (zfs_active_fs_count != 0); 1403168404Spjd} 1404185029Spjd 1405185029Spjdint 1406185029Spjdzfs_set_version(const char *name, uint64_t newvers) 1407185029Spjd{ 1408185029Spjd int error; 1409185029Spjd objset_t *os; 1410185029Spjd dmu_tx_t *tx; 1411185029Spjd uint64_t curvers; 1412185029Spjd 1413185029Spjd /* 1414185029Spjd * XXX for now, require that the filesystem be unmounted. Would 1415185029Spjd * be nice to find the zfsvfs_t and just update that if 1416185029Spjd * possible. 1417185029Spjd */ 1418185029Spjd 1419185029Spjd if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1420185029Spjd return (EINVAL); 1421185029Spjd 1422185029Spjd error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1423185029Spjd if (error) 1424185029Spjd return (error); 1425185029Spjd 1426185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1427185029Spjd 8, 1, &curvers); 1428185029Spjd if (error) 1429185029Spjd goto out; 1430185029Spjd if (newvers < curvers) { 1431185029Spjd error = EINVAL; 1432185029Spjd goto out; 1433185029Spjd } 1434185029Spjd 1435185029Spjd tx = dmu_tx_create(os); 1436185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1437185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1438185029Spjd if (error) { 1439185029Spjd dmu_tx_abort(tx); 1440185029Spjd goto out; 1441185029Spjd } 1442185029Spjd error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1443185029Spjd &newvers, tx); 1444185029Spjd 1445185029Spjd spa_history_internal_log(LOG_DS_UPGRADE, 1446185029Spjd dmu_objset_spa(os), tx, CRED(), 1447185029Spjd "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1448185029Spjd dmu_objset_id(os)); 1449185029Spjd dmu_tx_commit(tx); 1450185029Spjd 1451185029Spjdout: 1452185029Spjd dmu_objset_close(os); 1453185029Spjd return (error); 1454185029Spjd} 1455185029Spjd/* 1456185029Spjd * Read a property stored within the master node. 1457185029Spjd */ 1458185029Spjdint 1459185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1460185029Spjd{ 1461185029Spjd const char *pname; 1462185029Spjd int error = ENOENT; 1463185029Spjd 1464185029Spjd /* 1465185029Spjd * Look up the file system's value for the property. For the 1466185029Spjd * version property, we look up a slightly different string. 1467185029Spjd */ 1468185029Spjd if (prop == ZFS_PROP_VERSION) 1469185029Spjd pname = ZPL_VERSION_STR; 1470185029Spjd else 1471185029Spjd pname = zfs_prop_to_name(prop); 1472185029Spjd 1473185029Spjd if (os != NULL) 1474185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1475185029Spjd 1476185029Spjd if (error == ENOENT) { 1477185029Spjd /* No value set, use the default value */ 1478185029Spjd switch (prop) { 1479185029Spjd case ZFS_PROP_VERSION: 1480185029Spjd *value = ZPL_VERSION; 1481185029Spjd break; 1482185029Spjd case ZFS_PROP_NORMALIZE: 1483185029Spjd case ZFS_PROP_UTF8ONLY: 1484185029Spjd *value = 0; 1485185029Spjd break; 1486185029Spjd case ZFS_PROP_CASE: 1487185029Spjd *value = ZFS_CASE_SENSITIVE; 1488185029Spjd break; 1489185029Spjd default: 1490185029Spjd return (error); 1491185029Spjd } 1492185029Spjd error = 0; 1493185029Spjd } 1494185029Spjd return (error); 1495185029Spjd} 1496