zfs_vfsops.c revision 206667
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/types.h> 27168404Spjd#include <sys/param.h> 28168404Spjd#include <sys/systm.h> 29168404Spjd#include <sys/kernel.h> 30168404Spjd#include <sys/sysmacros.h> 31168404Spjd#include <sys/kmem.h> 32168404Spjd#include <sys/acl.h> 33168404Spjd#include <sys/vnode.h> 34168404Spjd#include <sys/vfs.h> 35168404Spjd#include <sys/mntent.h> 36168404Spjd#include <sys/mount.h> 37168404Spjd#include <sys/cmn_err.h> 38168404Spjd#include <sys/zfs_znode.h> 39168404Spjd#include <sys/zfs_dir.h> 40168404Spjd#include <sys/zil.h> 41168404Spjd#include <sys/fs/zfs.h> 42168404Spjd#include <sys/dmu.h> 43168404Spjd#include <sys/dsl_prop.h> 44168404Spjd#include <sys/dsl_dataset.h> 45185029Spjd#include <sys/dsl_deleg.h> 46168404Spjd#include <sys/spa.h> 47168404Spjd#include <sys/zap.h> 48168404Spjd#include <sys/varargs.h> 49168962Spjd#include <sys/policy.h> 50168404Spjd#include <sys/atomic.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/zfs_ctldir.h> 53185029Spjd#include <sys/zfs_fuid.h> 54168962Spjd#include <sys/sunddi.h> 55168404Spjd#include <sys/dnlc.h> 56185029Spjd#include <sys/dmu_objset.h> 57185029Spjd#include <sys/spa_boot.h> 58185029Spjd#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 59168404Spjd 60168404Spjdstruct mtx zfs_debug_mtx; 61168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62185029Spjd 63168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 64185029Spjd 65185029Spjdint zfs_super_owner = 0; 66185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 67185029Spjd "File system owner can perform privileged operation on his file systems"); 68185029Spjd 69168404Spjdint zfs_debug_level = 0; 70168713SpjdTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 71168404SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 72168404Spjd "Debug level"); 73168404Spjd 74185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 75185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION; 76185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 77185029Spjd "ZFS_ACL_VERSION"); 78185029Spjdstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 79185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 80185029Spjd &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 81185029Spjdstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 82185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 83185029Spjd &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 84185029Spjdstatic int zfs_version_spa = SPA_VERSION; 85185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 86185029Spjd "SPA_VERSION"); 87185029Spjdstatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 88185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 89185029Spjd &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90185029Spjdstatic int zfs_version_zpl = ZPL_VERSION; 91185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 92185029Spjd "ZPL_VERSION"); 93185029Spjd 94191990Sattiliostatic int zfs_mount(vfs_t *vfsp); 95191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag); 96191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 97191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 98168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 99191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor); 100196982Spjdstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 101196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors); 102168404Spjdstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 103168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 104168404Spjdstatic void zfs_freevfs(vfs_t *vfsp); 105168404Spjd 106168404Spjdstatic struct vfsops zfs_vfsops = { 107168404Spjd .vfs_mount = zfs_mount, 108168404Spjd .vfs_unmount = zfs_umount, 109168404Spjd .vfs_root = zfs_root, 110168404Spjd .vfs_statfs = zfs_statfs, 111168404Spjd .vfs_vget = zfs_vget, 112168404Spjd .vfs_sync = zfs_sync, 113196982Spjd .vfs_checkexp = zfs_checkexp, 114168404Spjd .vfs_fhtovp = zfs_fhtovp, 115168404Spjd}; 116168404Spjd 117185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 118168404Spjd 119168404Spjd/* 120168404Spjd * We need to keep a count of active fs's. 121168404Spjd * This is necessary to prevent our module 122168404Spjd * from being unloaded after a umount -f 123168404Spjd */ 124168404Spjdstatic uint32_t zfs_active_fs_count = 0; 125168404Spjd 126168404Spjd/*ARGSUSED*/ 127168404Spjdstatic int 128191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor) 129168404Spjd{ 130168404Spjd 131168404Spjd /* 132168404Spjd * Data integrity is job one. We don't want a compromised kernel 133168404Spjd * writing to the storage pool, so we never sync during panic. 134168404Spjd */ 135168404Spjd if (panicstr) 136168404Spjd return (0); 137168404Spjd 138168404Spjd if (vfsp != NULL) { 139168404Spjd /* 140168404Spjd * Sync a specific filesystem. 141168404Spjd */ 142168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 143168404Spjd int error; 144168404Spjd 145191990Sattilio error = vfs_stdsync(vfsp, waitfor); 146168404Spjd if (error != 0) 147168404Spjd return (error); 148168404Spjd 149168404Spjd ZFS_ENTER(zfsvfs); 150168404Spjd if (zfsvfs->z_log != NULL) 151168404Spjd zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152168404Spjd else 153168404Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154168404Spjd ZFS_EXIT(zfsvfs); 155168404Spjd } else { 156168404Spjd /* 157168404Spjd * Sync all ZFS filesystems. This is what happens when you 158168404Spjd * run sync(1M). Unlike other filesystems, ZFS honors the 159168404Spjd * request by waiting for all pools to commit all dirty data. 160168404Spjd */ 161168404Spjd spa_sync_allpools(); 162168404Spjd } 163168404Spjd 164168404Spjd return (0); 165168404Spjd} 166168404Spjd 167168404Spjdstatic void 168168404Spjdatime_changed_cb(void *arg, uint64_t newval) 169168404Spjd{ 170168404Spjd zfsvfs_t *zfsvfs = arg; 171168404Spjd 172168404Spjd if (newval == TRUE) { 173168404Spjd zfsvfs->z_atime = TRUE; 174168404Spjd zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 175168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 176168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 177168404Spjd } else { 178168404Spjd zfsvfs->z_atime = FALSE; 179168404Spjd zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 180168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 181168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 182168404Spjd } 183168404Spjd} 184168404Spjd 185168404Spjdstatic void 186168404Spjdxattr_changed_cb(void *arg, uint64_t newval) 187168404Spjd{ 188168404Spjd zfsvfs_t *zfsvfs = arg; 189168404Spjd 190168404Spjd if (newval == TRUE) { 191168404Spjd /* XXX locking on vfs_flag? */ 192168404Spjd#ifdef TODO 193168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 194168404Spjd#endif 195168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 196168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 197168404Spjd } else { 198168404Spjd /* XXX locking on vfs_flag? */ 199168404Spjd#ifdef TODO 200168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 201168404Spjd#endif 202168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 203168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 204168404Spjd } 205168404Spjd} 206168404Spjd 207168404Spjdstatic void 208168404Spjdblksz_changed_cb(void *arg, uint64_t newval) 209168404Spjd{ 210168404Spjd zfsvfs_t *zfsvfs = arg; 211168404Spjd 212168404Spjd if (newval < SPA_MINBLOCKSIZE || 213168404Spjd newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 214168404Spjd newval = SPA_MAXBLOCKSIZE; 215168404Spjd 216168404Spjd zfsvfs->z_max_blksz = newval; 217204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 218168404Spjd} 219168404Spjd 220168404Spjdstatic void 221168404Spjdreadonly_changed_cb(void *arg, uint64_t newval) 222168404Spjd{ 223168404Spjd zfsvfs_t *zfsvfs = arg; 224168404Spjd 225168404Spjd if (newval) { 226168404Spjd /* XXX locking on vfs_flag? */ 227168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 228168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 229168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 230168404Spjd } else { 231168404Spjd /* XXX locking on vfs_flag? */ 232168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 233168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 234168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 235168404Spjd } 236168404Spjd} 237168404Spjd 238168404Spjdstatic void 239168404Spjdsetuid_changed_cb(void *arg, uint64_t newval) 240168404Spjd{ 241168404Spjd zfsvfs_t *zfsvfs = arg; 242168404Spjd 243168404Spjd if (newval == FALSE) { 244168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 245168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 246168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 247168404Spjd } else { 248168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 249168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 250168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 251168404Spjd } 252168404Spjd} 253168404Spjd 254168404Spjdstatic void 255168404Spjdexec_changed_cb(void *arg, uint64_t newval) 256168404Spjd{ 257168404Spjd zfsvfs_t *zfsvfs = arg; 258168404Spjd 259168404Spjd if (newval == FALSE) { 260168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 261168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 262168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 263168404Spjd } else { 264168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 265168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 266168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 267168404Spjd } 268168404Spjd} 269168404Spjd 270185029Spjd/* 271185029Spjd * The nbmand mount option can be changed at mount time. 272185029Spjd * We can't allow it to be toggled on live file systems or incorrect 273185029Spjd * behavior may be seen from cifs clients 274185029Spjd * 275185029Spjd * This property isn't registered via dsl_prop_register(), but this callback 276185029Spjd * will be called when a file system is first mounted 277185029Spjd */ 278168404Spjdstatic void 279185029Spjdnbmand_changed_cb(void *arg, uint64_t newval) 280185029Spjd{ 281185029Spjd zfsvfs_t *zfsvfs = arg; 282185029Spjd if (newval == FALSE) { 283185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 284185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 285185029Spjd } else { 286185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 287185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 288185029Spjd } 289185029Spjd} 290185029Spjd 291185029Spjdstatic void 292168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval) 293168404Spjd{ 294168404Spjd zfsvfs_t *zfsvfs = arg; 295168404Spjd 296168404Spjd zfsvfs->z_show_ctldir = newval; 297168404Spjd} 298168404Spjd 299168404Spjdstatic void 300185029Spjdvscan_changed_cb(void *arg, uint64_t newval) 301185029Spjd{ 302185029Spjd zfsvfs_t *zfsvfs = arg; 303185029Spjd 304185029Spjd zfsvfs->z_vscan = newval; 305185029Spjd} 306185029Spjd 307185029Spjdstatic void 308168404Spjdacl_mode_changed_cb(void *arg, uint64_t newval) 309168404Spjd{ 310168404Spjd zfsvfs_t *zfsvfs = arg; 311168404Spjd 312168404Spjd zfsvfs->z_acl_mode = newval; 313168404Spjd} 314168404Spjd 315168404Spjdstatic void 316168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval) 317168404Spjd{ 318168404Spjd zfsvfs_t *zfsvfs = arg; 319168404Spjd 320168404Spjd zfsvfs->z_acl_inherit = newval; 321168404Spjd} 322168404Spjd 323168404Spjdstatic int 324168404Spjdzfs_register_callbacks(vfs_t *vfsp) 325168404Spjd{ 326168404Spjd struct dsl_dataset *ds = NULL; 327168404Spjd objset_t *os = NULL; 328168404Spjd zfsvfs_t *zfsvfs = NULL; 329185029Spjd uint64_t nbmand; 330168404Spjd int readonly, do_readonly = FALSE; 331168404Spjd int setuid, do_setuid = FALSE; 332168404Spjd int exec, do_exec = FALSE; 333168404Spjd int xattr, do_xattr = FALSE; 334185029Spjd int atime, do_atime = FALSE; 335168404Spjd int error = 0; 336168404Spjd 337168404Spjd ASSERT(vfsp); 338168404Spjd zfsvfs = vfsp->vfs_data; 339168404Spjd ASSERT(zfsvfs); 340168404Spjd os = zfsvfs->z_os; 341168404Spjd 342168404Spjd /* 343196965Spjd * This function can be called for a snapshot when we update snapshot's 344196965Spjd * mount point, which isn't really supported. 345196965Spjd */ 346196965Spjd if (dmu_objset_is_snapshot(os)) 347196965Spjd return (EOPNOTSUPP); 348196965Spjd 349196965Spjd /* 350168404Spjd * The act of registering our callbacks will destroy any mount 351168404Spjd * options we may have. In order to enable temporary overrides 352168404Spjd * of mount options, we stash away the current values and 353168404Spjd * restore them after we register the callbacks. 354168404Spjd */ 355168404Spjd if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 356168404Spjd readonly = B_TRUE; 357168404Spjd do_readonly = B_TRUE; 358168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 359168404Spjd readonly = B_FALSE; 360168404Spjd do_readonly = B_TRUE; 361168404Spjd } 362168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 363168404Spjd setuid = B_FALSE; 364168404Spjd do_setuid = B_TRUE; 365168404Spjd } else { 366168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 367168404Spjd setuid = B_FALSE; 368168404Spjd do_setuid = B_TRUE; 369168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 370168404Spjd setuid = B_TRUE; 371168404Spjd do_setuid = B_TRUE; 372168404Spjd } 373168404Spjd } 374168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 375168404Spjd exec = B_FALSE; 376168404Spjd do_exec = B_TRUE; 377168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 378168404Spjd exec = B_TRUE; 379168404Spjd do_exec = B_TRUE; 380168404Spjd } 381168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 382168404Spjd xattr = B_FALSE; 383168404Spjd do_xattr = B_TRUE; 384168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 385168404Spjd xattr = B_TRUE; 386168404Spjd do_xattr = B_TRUE; 387168404Spjd } 388185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 389185029Spjd atime = B_FALSE; 390185029Spjd do_atime = B_TRUE; 391185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 392185029Spjd atime = B_TRUE; 393185029Spjd do_atime = B_TRUE; 394185029Spjd } 395168404Spjd 396168404Spjd /* 397185029Spjd * nbmand is a special property. It can only be changed at 398185029Spjd * mount time. 399185029Spjd * 400185029Spjd * This is weird, but it is documented to only be changeable 401185029Spjd * at mount time. 402185029Spjd */ 403185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 404185029Spjd nbmand = B_FALSE; 405185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 406185029Spjd nbmand = B_TRUE; 407185029Spjd } else { 408185029Spjd char osname[MAXNAMELEN]; 409185029Spjd 410185029Spjd dmu_objset_name(os, osname); 411185029Spjd if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 412185029Spjd NULL)) { 413185029Spjd return (error); 414185029Spjd } 415185029Spjd } 416185029Spjd 417185029Spjd /* 418168404Spjd * Register property callbacks. 419168404Spjd * 420168404Spjd * It would probably be fine to just check for i/o error from 421168404Spjd * the first prop_register(), but I guess I like to go 422168404Spjd * overboard... 423168404Spjd */ 424168404Spjd ds = dmu_objset_ds(os); 425168404Spjd error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 426168404Spjd error = error ? error : dsl_prop_register(ds, 427168404Spjd "xattr", xattr_changed_cb, zfsvfs); 428168404Spjd error = error ? error : dsl_prop_register(ds, 429168404Spjd "recordsize", blksz_changed_cb, zfsvfs); 430168404Spjd error = error ? error : dsl_prop_register(ds, 431168404Spjd "readonly", readonly_changed_cb, zfsvfs); 432168404Spjd error = error ? error : dsl_prop_register(ds, 433168404Spjd "setuid", setuid_changed_cb, zfsvfs); 434168404Spjd error = error ? error : dsl_prop_register(ds, 435168404Spjd "exec", exec_changed_cb, zfsvfs); 436168404Spjd error = error ? error : dsl_prop_register(ds, 437168404Spjd "snapdir", snapdir_changed_cb, zfsvfs); 438168404Spjd error = error ? error : dsl_prop_register(ds, 439168404Spjd "aclmode", acl_mode_changed_cb, zfsvfs); 440168404Spjd error = error ? error : dsl_prop_register(ds, 441168404Spjd "aclinherit", acl_inherit_changed_cb, zfsvfs); 442185029Spjd error = error ? error : dsl_prop_register(ds, 443185029Spjd "vscan", vscan_changed_cb, zfsvfs); 444168404Spjd if (error) 445168404Spjd goto unregister; 446168404Spjd 447168404Spjd /* 448168404Spjd * Invoke our callbacks to restore temporary mount options. 449168404Spjd */ 450168404Spjd if (do_readonly) 451168404Spjd readonly_changed_cb(zfsvfs, readonly); 452168404Spjd if (do_setuid) 453168404Spjd setuid_changed_cb(zfsvfs, setuid); 454168404Spjd if (do_exec) 455168404Spjd exec_changed_cb(zfsvfs, exec); 456168404Spjd if (do_xattr) 457168404Spjd xattr_changed_cb(zfsvfs, xattr); 458185029Spjd if (do_atime) 459185029Spjd atime_changed_cb(zfsvfs, atime); 460168404Spjd 461185029Spjd nbmand_changed_cb(zfsvfs, nbmand); 462185029Spjd 463168404Spjd return (0); 464168404Spjd 465168404Spjdunregister: 466168404Spjd /* 467168404Spjd * We may attempt to unregister some callbacks that are not 468168404Spjd * registered, but this is OK; it will simply return ENOMSG, 469168404Spjd * which we will ignore. 470168404Spjd */ 471168404Spjd (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 472168404Spjd (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 473168404Spjd (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 474168404Spjd (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 475168404Spjd (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 476168404Spjd (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 477168404Spjd (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 478168404Spjd (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 479168404Spjd (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 480168404Spjd zfsvfs); 481185029Spjd (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 482168404Spjd return (error); 483168404Spjd 484168404Spjd} 485168404Spjd 486168404Spjdstatic int 487185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 488168404Spjd{ 489185029Spjd int error; 490185029Spjd 491185029Spjd error = zfs_register_callbacks(zfsvfs->z_vfs); 492185029Spjd if (error) 493185029Spjd return (error); 494185029Spjd 495185029Spjd /* 496185029Spjd * Set the objset user_ptr to track its zfsvfs. 497185029Spjd */ 498185029Spjd mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 499185029Spjd dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 500185029Spjd mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 501185029Spjd 502185029Spjd /* 503185029Spjd * If we are not mounting (ie: online recv), then we don't 504185029Spjd * have to worry about replaying the log as we blocked all 505185029Spjd * operations out since we closed the ZIL. 506185029Spjd */ 507185029Spjd if (mounting) { 508185029Spjd boolean_t readonly; 509185029Spjd 510185029Spjd /* 511185029Spjd * During replay we remove the read only flag to 512185029Spjd * allow replays to succeed. 513185029Spjd */ 514185029Spjd readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 515185029Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 516185029Spjd 517185029Spjd /* 518185029Spjd * Parse and replay the intent log. 519185029Spjd */ 520185029Spjd zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 521185029Spjd zfs_replay_vector, zfs_unlinked_drain); 522185029Spjd 523185029Spjd zfs_unlinked_drain(zfsvfs); 524185029Spjd zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 525185029Spjd } 526185029Spjd 527185029Spjd if (!zil_disable) 528185029Spjd zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 529185029Spjd 530185029Spjd return (0); 531185029Spjd} 532185029Spjd 533185029Spjdstatic void 534185029Spjdzfs_freezfsvfs(zfsvfs_t *zfsvfs) 535185029Spjd{ 536185029Spjd mutex_destroy(&zfsvfs->z_znodes_lock); 537185029Spjd mutex_destroy(&zfsvfs->z_online_recv_lock); 538185029Spjd list_destroy(&zfsvfs->z_all_znodes); 539185029Spjd rrw_destroy(&zfsvfs->z_teardown_lock); 540185029Spjd rw_destroy(&zfsvfs->z_teardown_inactive_lock); 541185029Spjd rw_destroy(&zfsvfs->z_fuid_lock); 542185029Spjd kmem_free(zfsvfs, sizeof (zfsvfs_t)); 543185029Spjd} 544185029Spjd 545185029Spjdstatic int 546185029Spjdzfs_domount(vfs_t *vfsp, char *osname) 547185029Spjd{ 548168404Spjd uint64_t recordsize, readonly; 549168404Spjd int error = 0; 550168404Spjd int mode; 551168404Spjd zfsvfs_t *zfsvfs; 552168404Spjd znode_t *zp = NULL; 553168404Spjd 554168404Spjd ASSERT(vfsp); 555168404Spjd ASSERT(osname); 556168404Spjd 557168404Spjd /* 558168404Spjd * Initialize the zfs-specific filesystem structure. 559168404Spjd * Should probably make this a kmem cache, shuffle fields, 560168404Spjd * and just bzero up to z_hold_mtx[]. 561168404Spjd */ 562168404Spjd zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 563168404Spjd zfsvfs->z_vfs = vfsp; 564168404Spjd zfsvfs->z_parent = zfsvfs; 565168404Spjd zfsvfs->z_assign = TXG_NOWAIT; 566168404Spjd zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 567168404Spjd zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 568168404Spjd 569168404Spjd mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 570185029Spjd mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 571168404Spjd list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 572168404Spjd offsetof(znode_t, z_link_node)); 573185029Spjd rrw_init(&zfsvfs->z_teardown_lock); 574185029Spjd rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 575185029Spjd rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 576168404Spjd 577168404Spjd if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 578168404Spjd NULL)) 579168404Spjd goto out; 580204101Spjd zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 581204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 582168404Spjd 583168404Spjd vfsp->vfs_data = zfsvfs; 584168404Spjd vfsp->mnt_flag |= MNT_LOCAL; 585168404Spjd vfsp->mnt_kern_flag |= MNTK_MPSAFE; 586168404Spjd vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 587193440Sps vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 588168404Spjd 589168404Spjd if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 590168404Spjd goto out; 591168404Spjd 592185029Spjd mode = DS_MODE_OWNER; 593168404Spjd if (readonly) 594185029Spjd mode |= DS_MODE_READONLY; 595168404Spjd 596168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 597168404Spjd if (error == EROFS) { 598185029Spjd mode = DS_MODE_OWNER | DS_MODE_READONLY; 599168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 600168404Spjd &zfsvfs->z_os); 601168404Spjd } 602168404Spjd 603168404Spjd if (error) 604168404Spjd goto out; 605168404Spjd 606185029Spjd if (error = zfs_init_fs(zfsvfs, &zp)) 607168404Spjd goto out; 608168404Spjd 609185029Spjd /* 610185029Spjd * Set features for file system. 611185029Spjd */ 612185029Spjd zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 613185029Spjd if (zfsvfs->z_use_fuids) { 614185029Spjd vfs_set_feature(vfsp, VFSFT_XVATTR); 615185029Spjd vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 616185029Spjd vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 617185029Spjd vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 618185029Spjd } 619185029Spjd if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 620185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 621185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 622185029Spjd vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 623185029Spjd } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 624185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 625185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 626185029Spjd } 627185029Spjd 628168404Spjd if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 629185029Spjd uint64_t pval; 630168404Spjd 631168404Spjd ASSERT(mode & DS_MODE_READONLY); 632168404Spjd atime_changed_cb(zfsvfs, B_FALSE); 633168404Spjd readonly_changed_cb(zfsvfs, B_TRUE); 634185029Spjd if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 635168404Spjd goto out; 636185029Spjd xattr_changed_cb(zfsvfs, pval); 637168404Spjd zfsvfs->z_issnap = B_TRUE; 638168404Spjd } else { 639185029Spjd error = zfsvfs_setup(zfsvfs, B_TRUE); 640168404Spjd } 641168404Spjd 642168404Spjd vfs_mountedfrom(vfsp, osname); 643168404Spjd 644168404Spjd if (!zfsvfs->z_issnap) 645168404Spjd zfsctl_create(zfsvfs); 646168404Spjdout: 647168404Spjd if (error) { 648168404Spjd if (zfsvfs->z_os) 649168404Spjd dmu_objset_close(zfsvfs->z_os); 650185029Spjd zfs_freezfsvfs(zfsvfs); 651168404Spjd } else { 652168404Spjd atomic_add_32(&zfs_active_fs_count, 1); 653168404Spjd } 654168404Spjd 655168404Spjd return (error); 656168404Spjd} 657168404Spjd 658168404Spjdvoid 659168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs) 660168404Spjd{ 661168404Spjd objset_t *os = zfsvfs->z_os; 662168404Spjd struct dsl_dataset *ds; 663168404Spjd 664168404Spjd /* 665168404Spjd * Unregister properties. 666168404Spjd */ 667168404Spjd if (!dmu_objset_is_snapshot(os)) { 668168404Spjd ds = dmu_objset_ds(os); 669168404Spjd VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 670168404Spjd zfsvfs) == 0); 671168404Spjd 672168404Spjd VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 673168404Spjd zfsvfs) == 0); 674168404Spjd 675168404Spjd VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 676168404Spjd zfsvfs) == 0); 677168404Spjd 678168404Spjd VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 679168404Spjd zfsvfs) == 0); 680168404Spjd 681168404Spjd VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 682168404Spjd zfsvfs) == 0); 683168404Spjd 684168404Spjd VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 685168404Spjd zfsvfs) == 0); 686168404Spjd 687168404Spjd VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 688168404Spjd zfsvfs) == 0); 689168404Spjd 690168404Spjd VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 691168404Spjd zfsvfs) == 0); 692168404Spjd 693168404Spjd VERIFY(dsl_prop_unregister(ds, "aclinherit", 694168404Spjd acl_inherit_changed_cb, zfsvfs) == 0); 695185029Spjd 696185029Spjd VERIFY(dsl_prop_unregister(ds, "vscan", 697185029Spjd vscan_changed_cb, zfsvfs) == 0); 698168404Spjd } 699168404Spjd} 700168404Spjd 701168404Spjd/*ARGSUSED*/ 702168404Spjdstatic int 703191990Sattiliozfs_mount(vfs_t *vfsp) 704168404Spjd{ 705191990Sattilio kthread_t *td = curthread; 706185029Spjd vnode_t *mvp = vfsp->mnt_vnodecovered; 707185029Spjd cred_t *cr = td->td_ucred; 708185029Spjd char *osname; 709185029Spjd int error = 0; 710185029Spjd int canwrite; 711168404Spjd 712185029Spjd if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 713185029Spjd return (EINVAL); 714185029Spjd 715168404Spjd /* 716185029Spjd * If full-owner-access is enabled and delegated administration is 717185029Spjd * turned on, we must set nosuid. 718185029Spjd */ 719185029Spjd if (zfs_super_owner && 720185029Spjd dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 721185029Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 722185029Spjd } 723185029Spjd 724185029Spjd /* 725185029Spjd * Check for mount privilege? 726185029Spjd * 727185029Spjd * If we don't have privilege then see if 728185029Spjd * we have local permission to allow it 729185029Spjd */ 730185029Spjd error = secpolicy_fs_mount(cr, mvp, vfsp); 731185029Spjd if (error) { 732185029Spjd error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 733196944Spjd if (error != 0) 734196944Spjd goto out; 735196944Spjd 736196944Spjd if (!(vfsp->vfs_flag & MS_REMOUNT)) { 737185029Spjd vattr_t vattr; 738185029Spjd 739185029Spjd /* 740185029Spjd * Make sure user is the owner of the mount point 741185029Spjd * or has sufficient privileges. 742185029Spjd */ 743185029Spjd 744185029Spjd vattr.va_mask = AT_UID; 745185029Spjd 746196662Spjd vn_lock(mvp, LK_SHARED | LK_RETRY); 747185029Spjd if (error = VOP_GETATTR(mvp, &vattr, cr)) { 748196662Spjd VOP_UNLOCK(mvp, 0); 749185029Spjd goto out; 750185029Spjd } 751185029Spjd 752185029Spjd#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 753185029Spjd if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 754185029Spjd VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 755185029Spjd error = EPERM; 756185029Spjd goto out; 757185029Spjd } 758185029Spjd#else 759185029Spjd if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 760196662Spjd VOP_UNLOCK(mvp, 0); 761185029Spjd goto out; 762185029Spjd } 763185029Spjd 764185029Spjd if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 765196662Spjd VOP_UNLOCK(mvp, 0); 766185029Spjd goto out; 767185029Spjd } 768196662Spjd VOP_UNLOCK(mvp, 0); 769185029Spjd#endif 770196944Spjd } 771185029Spjd 772196944Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 773185029Spjd } 774185029Spjd 775185029Spjd /* 776185029Spjd * Refuse to mount a filesystem if we are in a local zone and the 777185029Spjd * dataset is not visible. 778185029Spjd */ 779185029Spjd if (!INGLOBALZONE(curthread) && 780185029Spjd (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 781185029Spjd error = EPERM; 782185029Spjd goto out; 783185029Spjd } 784185029Spjd 785185029Spjd /* 786168404Spjd * When doing a remount, we simply refresh our temporary properties 787168404Spjd * according to those options set in the current VFS options. 788168404Spjd */ 789185029Spjd if (vfsp->vfs_flag & MS_REMOUNT) { 790185029Spjd /* refresh mount options */ 791185029Spjd zfs_unregister_callbacks(vfsp->vfs_data); 792185029Spjd error = zfs_register_callbacks(vfsp); 793185029Spjd goto out; 794185029Spjd } 795168404Spjd 796168510Spjd DROP_GIANT(); 797185029Spjd error = zfs_domount(vfsp, osname); 798168510Spjd PICKUP_GIANT(); 799185029Spjdout: 800168510Spjd return (error); 801168404Spjd} 802168404Spjd 803168404Spjdstatic int 804191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp) 805169170Spjd{ 806168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 807168404Spjd uint64_t refdbytes, availbytes, usedobjs, availobjs; 808168404Spjd 809168404Spjd statp->f_version = STATFS_VERSION; 810168404Spjd 811168404Spjd ZFS_ENTER(zfsvfs); 812168404Spjd 813168404Spjd dmu_objset_space(zfsvfs->z_os, 814168404Spjd &refdbytes, &availbytes, &usedobjs, &availobjs); 815168404Spjd 816168404Spjd /* 817168404Spjd * The underlying storage pool actually uses multiple block sizes. 818168404Spjd * We report the fragsize as the smallest block size we support, 819168404Spjd * and we report our blocksize as the filesystem's maximum blocksize. 820168404Spjd */ 821204101Spjd statp->f_bsize = SPA_MINBLOCKSIZE; 822204101Spjd statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 823168404Spjd 824168404Spjd /* 825168404Spjd * The following report "total" blocks of various kinds in the 826168404Spjd * file system, but reported in terms of f_frsize - the 827168404Spjd * "fragment" size. 828168404Spjd */ 829168404Spjd 830204101Spjd statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 831168404Spjd statp->f_bfree = availbytes / statp->f_bsize; 832168404Spjd statp->f_bavail = statp->f_bfree; /* no root reservation */ 833168404Spjd 834168404Spjd /* 835168404Spjd * statvfs() should really be called statufs(), because it assumes 836168404Spjd * static metadata. ZFS doesn't preallocate files, so the best 837168404Spjd * we can do is report the max that could possibly fit in f_files, 838168404Spjd * and that minus the number actually used in f_ffree. 839168404Spjd * For f_ffree, report the smaller of the number of object available 840168404Spjd * and the number of blocks (each object will take at least a block). 841168404Spjd */ 842168404Spjd statp->f_ffree = MIN(availobjs, statp->f_bfree); 843168404Spjd statp->f_files = statp->f_ffree + usedobjs; 844168404Spjd 845168404Spjd /* 846168404Spjd * We're a zfs filesystem. 847168404Spjd */ 848168404Spjd (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 849168404Spjd 850168404Spjd strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 851168404Spjd sizeof(statp->f_mntfromname)); 852168404Spjd strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 853168404Spjd sizeof(statp->f_mntonname)); 854168404Spjd 855168404Spjd statp->f_namemax = ZFS_MAXNAMELEN; 856168404Spjd 857168404Spjd ZFS_EXIT(zfsvfs); 858168404Spjd return (0); 859168404Spjd} 860168404Spjd 861168404Spjdstatic int 862191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 863168404Spjd{ 864168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 865168404Spjd znode_t *rootzp; 866168404Spjd int error; 867168404Spjd 868197459Spjd ZFS_ENTER_NOERROR(zfsvfs); 869168404Spjd 870168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 871206667Spjd 872206667Spjd ZFS_EXIT(zfsvfs); 873206667Spjd 874168404Spjd if (error == 0) { 875168404Spjd *vpp = ZTOV(rootzp); 876175202Sattilio error = vn_lock(*vpp, flags); 877168404Spjd (*vpp)->v_vflag |= VV_ROOT; 878168404Spjd } 879168404Spjd 880168404Spjd return (error); 881168404Spjd} 882168404Spjd 883185029Spjd/* 884185029Spjd * Teardown the zfsvfs::z_os. 885185029Spjd * 886185029Spjd * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 887185029Spjd * and 'z_teardown_inactive_lock' held. 888185029Spjd */ 889185029Spjdstatic int 890185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 891185029Spjd{ 892185029Spjd znode_t *zp; 893185029Spjd 894185029Spjd rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 895185029Spjd 896185029Spjd if (!unmounting) { 897185029Spjd /* 898185029Spjd * We purge the parent filesystem's vfsp as the parent 899185029Spjd * filesystem and all of its snapshots have their vnode's 900185029Spjd * v_vfsp set to the parent's filesystem's vfsp. Note, 901185029Spjd * 'z_parent' is self referential for non-snapshots. 902185029Spjd */ 903185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 904197351Spjd#ifdef FREEBSD_NAMECACHE 905197351Spjd cache_purgevfs(zfsvfs->z_parent->z_vfs); 906197351Spjd#endif 907185029Spjd } 908185029Spjd 909185029Spjd /* 910185029Spjd * Close the zil. NB: Can't close the zil while zfs_inactive 911185029Spjd * threads are blocked as zil_close can call zfs_inactive. 912185029Spjd */ 913185029Spjd if (zfsvfs->z_log) { 914185029Spjd zil_close(zfsvfs->z_log); 915185029Spjd zfsvfs->z_log = NULL; 916185029Spjd } 917185029Spjd 918185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 919185029Spjd 920185029Spjd /* 921185029Spjd * If we are not unmounting (ie: online recv) and someone already 922185029Spjd * unmounted this file system while we were doing the switcheroo, 923185029Spjd * or a reopen of z_os failed then just bail out now. 924185029Spjd */ 925185029Spjd if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 926185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 927185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 928185029Spjd return (EIO); 929185029Spjd } 930185029Spjd 931185029Spjd /* 932185029Spjd * At this point there are no vops active, and any new vops will 933185029Spjd * fail with EIO since we have z_teardown_lock for writer (only 934185029Spjd * relavent for forced unmount). 935185029Spjd * 936185029Spjd * Release all holds on dbufs. 937185029Spjd */ 938185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 939185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 940185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) 941185029Spjd if (zp->z_dbuf) { 942196297Spjd ASSERT(ZTOV(zp)->v_count >= 0); 943185029Spjd zfs_znode_dmu_fini(zp); 944185029Spjd } 945185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 946185029Spjd 947185029Spjd /* 948185029Spjd * If we are unmounting, set the unmounted flag and let new vops 949185029Spjd * unblock. zfs_inactive will have the unmounted behavior, and all 950185029Spjd * other vops will fail with EIO. 951185029Spjd */ 952185029Spjd if (unmounting) { 953185029Spjd zfsvfs->z_unmounted = B_TRUE; 954185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 955185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 956197133Spjd 957197133Spjd#ifdef __FreeBSD__ 958197133Spjd /* 959197133Spjd * Some znodes might not be fully reclaimed, wait for them. 960197133Spjd */ 961197133Spjd mutex_enter(&zfsvfs->z_znodes_lock); 962197133Spjd while (list_head(&zfsvfs->z_all_znodes) != NULL) { 963197133Spjd msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 964197133Spjd "zteardown", 0); 965197133Spjd } 966197133Spjd mutex_exit(&zfsvfs->z_znodes_lock); 967197133Spjd#endif 968185029Spjd } 969185029Spjd 970185029Spjd /* 971185029Spjd * z_os will be NULL if there was an error in attempting to reopen 972185029Spjd * zfsvfs, so just return as the properties had already been 973185029Spjd * unregistered and cached data had been evicted before. 974185029Spjd */ 975185029Spjd if (zfsvfs->z_os == NULL) 976185029Spjd return (0); 977185029Spjd 978185029Spjd /* 979185029Spjd * Unregister properties. 980185029Spjd */ 981185029Spjd zfs_unregister_callbacks(zfsvfs); 982185029Spjd 983185029Spjd /* 984185029Spjd * Evict cached data 985185029Spjd */ 986185029Spjd if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 987185029Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 988185029Spjd (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 989185029Spjd } 990185029Spjd 991185029Spjd return (0); 992185029Spjd} 993185029Spjd 994168404Spjd/*ARGSUSED*/ 995168404Spjdstatic int 996191990Sattiliozfs_umount(vfs_t *vfsp, int fflag) 997168404Spjd{ 998168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 999185029Spjd objset_t *os; 1000191990Sattilio cred_t *cr = curthread->td_ucred; 1001168404Spjd int ret; 1002168404Spjd 1003185029Spjd ret = secpolicy_fs_unmount(cr, vfsp); 1004185029Spjd if (ret) { 1005185029Spjd ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1006185029Spjd ZFS_DELEG_PERM_MOUNT, cr); 1007185029Spjd if (ret) 1008185029Spjd return (ret); 1009185029Spjd } 1010185029Spjd /* 1011185029Spjd * We purge the parent filesystem's vfsp as the parent filesystem 1012185029Spjd * and all of its snapshots have their vnode's v_vfsp set to the 1013185029Spjd * parent's filesystem's vfsp. Note, 'z_parent' is self 1014185029Spjd * referential for non-snapshots. 1015185029Spjd */ 1016185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1017168404Spjd 1018168404Spjd /* 1019168404Spjd * Unmount any snapshots mounted under .zfs before unmounting the 1020168404Spjd * dataset itself. 1021168404Spjd */ 1022169170Spjd if (zfsvfs->z_ctldir != NULL) { 1023168404Spjd if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1024168404Spjd return (ret); 1025191990Sattilio ret = vflush(vfsp, 0, 0, curthread); 1026168404Spjd ASSERT(ret == EBUSY); 1027168404Spjd if (!(fflag & MS_FORCE)) { 1028168404Spjd if (zfsvfs->z_ctldir->v_count > 1) 1029168404Spjd return (EBUSY); 1030168404Spjd ASSERT(zfsvfs->z_ctldir->v_count == 1); 1031168404Spjd } 1032168404Spjd zfsctl_destroy(zfsvfs); 1033168404Spjd ASSERT(zfsvfs->z_ctldir == NULL); 1034168404Spjd } 1035168404Spjd 1036197459Spjd if (fflag & MS_FORCE) { 1037197459Spjd /* 1038197459Spjd * Mark file system as unmounted before calling 1039197459Spjd * vflush(FORCECLOSE). This way we ensure no future vnops 1040197459Spjd * will be called and risk operating on DOOMED vnodes. 1041197459Spjd */ 1042197459Spjd rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1043197459Spjd zfsvfs->z_unmounted = B_TRUE; 1044197459Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1045197459Spjd } 1046197459Spjd 1047168404Spjd /* 1048168404Spjd * Flush all the files. 1049168404Spjd */ 1050191990Sattilio ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1051168404Spjd if (ret != 0) { 1052168404Spjd if (!zfsvfs->z_issnap) { 1053168404Spjd zfsctl_create(zfsvfs); 1054168404Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1055168404Spjd } 1056168404Spjd return (ret); 1057168404Spjd } 1058168404Spjd 1059185029Spjd if (!(fflag & MS_FORCE)) { 1060185029Spjd /* 1061185029Spjd * Check the number of active vnodes in the file system. 1062185029Spjd * Our count is maintained in the vfs structure, but the 1063185029Spjd * number is off by 1 to indicate a hold on the vfs 1064185029Spjd * structure itself. 1065185029Spjd * 1066185029Spjd * The '.zfs' directory maintains a reference of its 1067185029Spjd * own, and any active references underneath are 1068185029Spjd * reflected in the vnode count. 1069185029Spjd */ 1070185029Spjd if (zfsvfs->z_ctldir == NULL) { 1071185029Spjd if (vfsp->vfs_count > 1) 1072185029Spjd return (EBUSY); 1073185029Spjd } else { 1074185029Spjd if (vfsp->vfs_count > 2 || 1075185029Spjd zfsvfs->z_ctldir->v_count > 1) 1076185029Spjd return (EBUSY); 1077185029Spjd } 1078185029Spjd } else { 1079168404Spjd MNT_ILOCK(vfsp); 1080168404Spjd vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1081168404Spjd MNT_IUNLOCK(vfsp); 1082185029Spjd } 1083168404Spjd 1084185029Spjd VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1085185029Spjd os = zfsvfs->z_os; 1086185029Spjd 1087185029Spjd /* 1088185029Spjd * z_os will be NULL if there was an error in 1089185029Spjd * attempting to reopen zfsvfs. 1090185029Spjd */ 1091185029Spjd if (os != NULL) { 1092168404Spjd /* 1093185029Spjd * Unset the objset user_ptr. 1094168404Spjd */ 1095185029Spjd mutex_enter(&os->os->os_user_ptr_lock); 1096185029Spjd dmu_objset_set_user(os, NULL); 1097185029Spjd mutex_exit(&os->os->os_user_ptr_lock); 1098185029Spjd 1099185029Spjd /* 1100185029Spjd * Finally release the objset 1101185029Spjd */ 1102185029Spjd dmu_objset_close(os); 1103168404Spjd } 1104168404Spjd 1105185029Spjd /* 1106185029Spjd * We can now safely destroy the '.zfs' directory node. 1107185029Spjd */ 1108185029Spjd if (zfsvfs->z_ctldir != NULL) 1109185029Spjd zfsctl_destroy(zfsvfs); 1110185029Spjd if (zfsvfs->z_issnap) { 1111185029Spjd vnode_t *svp = vfsp->mnt_vnodecovered; 1112185029Spjd 1113197515Spjd if (svp->v_count >= 2) 1114192211Skmacy VN_RELE(svp); 1115185029Spjd } 1116168404Spjd zfs_freevfs(vfsp); 1117168404Spjd 1118168404Spjd return (0); 1119168404Spjd} 1120168404Spjd 1121168404Spjdstatic int 1122168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1123168404Spjd{ 1124168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1125168404Spjd znode_t *zp; 1126168404Spjd int err; 1127168404Spjd 1128197167Spjd /* 1129197167Spjd * XXXPJD: zfs_zget() can't operate on virtual entires like .zfs/ or 1130197167Spjd * .zfs/snapshot/ directories, so for now just return EOPNOTSUPP. 1131197167Spjd * This will make NFS to fall back to using READDIR instead of 1132197167Spjd * READDIRPLUS. 1133197167Spjd * Also snapshots are stored in AVL tree, but based on their names, 1134197167Spjd * not inode numbers, so it will be very inefficient to iterate 1135197167Spjd * over all snapshots to find the right one. 1136197167Spjd * Note that OpenSolaris READDIRPLUS implementation does LOOKUP on 1137197167Spjd * d_name, and not VGET on d_fileno as we do. 1138197167Spjd */ 1139197167Spjd if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1140197167Spjd return (EOPNOTSUPP); 1141197167Spjd 1142168404Spjd ZFS_ENTER(zfsvfs); 1143168404Spjd err = zfs_zget(zfsvfs, ino, &zp); 1144168404Spjd if (err == 0 && zp->z_unlinked) { 1145168404Spjd VN_RELE(ZTOV(zp)); 1146168404Spjd err = EINVAL; 1147168404Spjd } 1148206667Spjd ZFS_EXIT(zfsvfs); 1149168404Spjd if (err != 0) 1150168404Spjd *vpp = NULL; 1151168404Spjd else { 1152168404Spjd *vpp = ZTOV(zp); 1153175202Sattilio vn_lock(*vpp, flags); 1154168404Spjd } 1155171063Sdfr return (err); 1156168404Spjd} 1157168404Spjd 1158168404Spjdstatic int 1159196982Spjdzfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1160196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors) 1161196982Spjd{ 1162196982Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1163196982Spjd 1164196982Spjd /* 1165196982Spjd * If this is regular file system vfsp is the same as 1166196982Spjd * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1167196982Spjd * zfsvfs->z_parent->z_vfs represents parent file system 1168196982Spjd * which we have to use here, because only this file system 1169196982Spjd * has mnt_export configured. 1170196982Spjd */ 1171196982Spjd vfsp = zfsvfs->z_parent->z_vfs; 1172196982Spjd 1173196982Spjd return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1174196982Spjd credanonp, numsecflavors, secflavors)); 1175196982Spjd} 1176196982Spjd 1177197151SpjdCTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1178197151SpjdCTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1179196982Spjd 1180196982Spjdstatic int 1181168404Spjdzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1182168404Spjd{ 1183168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1184168404Spjd znode_t *zp; 1185168404Spjd uint64_t object = 0; 1186168404Spjd uint64_t fid_gen = 0; 1187168404Spjd uint64_t gen_mask; 1188168404Spjd uint64_t zp_gen; 1189168404Spjd int i, err; 1190168404Spjd 1191168404Spjd *vpp = NULL; 1192168404Spjd 1193168404Spjd ZFS_ENTER(zfsvfs); 1194168404Spjd 1195196979Spjd /* 1196197177Spjd * On FreeBSD we can get snapshot's mount point or its parent file 1197197177Spjd * system mount point depending if snapshot is already mounted or not. 1198196979Spjd */ 1199197177Spjd if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1200168404Spjd zfid_long_t *zlfid = (zfid_long_t *)fidp; 1201168404Spjd uint64_t objsetid = 0; 1202168404Spjd uint64_t setgen = 0; 1203168404Spjd 1204168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1205168404Spjd objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1206168404Spjd 1207168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1208168404Spjd setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1209168404Spjd 1210168404Spjd ZFS_EXIT(zfsvfs); 1211168404Spjd 1212168404Spjd err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1213168404Spjd if (err) 1214168404Spjd return (EINVAL); 1215168404Spjd ZFS_ENTER(zfsvfs); 1216168404Spjd } 1217168404Spjd 1218168404Spjd if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1219168404Spjd zfid_short_t *zfid = (zfid_short_t *)fidp; 1220168404Spjd 1221168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 1222168404Spjd object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1223168404Spjd 1224168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 1225168404Spjd fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1226168404Spjd } else { 1227168404Spjd ZFS_EXIT(zfsvfs); 1228168404Spjd return (EINVAL); 1229168404Spjd } 1230168404Spjd 1231168404Spjd /* A zero fid_gen means we are in the .zfs control directories */ 1232168404Spjd if (fid_gen == 0 && 1233168404Spjd (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1234168404Spjd *vpp = zfsvfs->z_ctldir; 1235168404Spjd ASSERT(*vpp != NULL); 1236168404Spjd if (object == ZFSCTL_INO_SNAPDIR) { 1237168404Spjd VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1238185029Spjd 0, NULL, NULL, NULL, NULL, NULL) == 0); 1239168404Spjd } else { 1240168404Spjd VN_HOLD(*vpp); 1241168404Spjd } 1242206667Spjd ZFS_EXIT(zfsvfs); 1243196978Spjd vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1244168404Spjd return (0); 1245168404Spjd } 1246168404Spjd 1247168404Spjd gen_mask = -1ULL >> (64 - 8 * i); 1248168404Spjd 1249168404Spjd dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1250168404Spjd if (err = zfs_zget(zfsvfs, object, &zp)) { 1251168404Spjd ZFS_EXIT(zfsvfs); 1252168404Spjd return (err); 1253168404Spjd } 1254168404Spjd zp_gen = zp->z_phys->zp_gen & gen_mask; 1255168404Spjd if (zp_gen == 0) 1256168404Spjd zp_gen = 1; 1257168404Spjd if (zp->z_unlinked || zp_gen != fid_gen) { 1258168404Spjd dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1259168404Spjd VN_RELE(ZTOV(zp)); 1260168404Spjd ZFS_EXIT(zfsvfs); 1261168404Spjd return (EINVAL); 1262168404Spjd } 1263168404Spjd 1264206667Spjd ZFS_EXIT(zfsvfs); 1265206667Spjd 1266168404Spjd *vpp = ZTOV(zp); 1267175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1268185029Spjd vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1269168404Spjd return (0); 1270168404Spjd} 1271168404Spjd 1272185029Spjd/* 1273185029Spjd * Block out VOPs and close zfsvfs_t::z_os 1274185029Spjd * 1275185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and 1276185029Spjd * 'z_teardown_inactive_lock' write held. 1277185029Spjd */ 1278185029Spjdint 1279185029Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1280168404Spjd{ 1281185029Spjd int error; 1282168404Spjd 1283185029Spjd if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1284185029Spjd return (error); 1285168404Spjd 1286185029Spjd *mode = zfsvfs->z_os->os_mode; 1287185029Spjd dmu_objset_name(zfsvfs->z_os, name); 1288185029Spjd dmu_objset_close(zfsvfs->z_os); 1289168404Spjd 1290185029Spjd return (0); 1291185029Spjd} 1292168404Spjd 1293185029Spjd/* 1294185029Spjd * Reopen zfsvfs_t::z_os and release VOPs. 1295185029Spjd */ 1296185029Spjdint 1297185029Spjdzfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1298185029Spjd{ 1299185029Spjd int err; 1300168404Spjd 1301185029Spjd ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1302185029Spjd ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1303185029Spjd 1304185029Spjd err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1305185029Spjd if (err) { 1306185029Spjd zfsvfs->z_os = NULL; 1307185029Spjd } else { 1308185029Spjd znode_t *zp; 1309185029Spjd 1310185029Spjd VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1311185029Spjd 1312185029Spjd /* 1313185029Spjd * Attempt to re-establish all the active znodes with 1314185029Spjd * their dbufs. If a zfs_rezget() fails, then we'll let 1315185029Spjd * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1316185029Spjd * when they try to use their znode. 1317185029Spjd */ 1318185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 1319185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1320185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1321185029Spjd (void) zfs_rezget(zp); 1322185029Spjd } 1323185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 1324185029Spjd 1325168404Spjd } 1326168404Spjd 1327185029Spjd /* release the VOPs */ 1328185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 1329185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1330185029Spjd 1331185029Spjd if (err) { 1332185029Spjd /* 1333185029Spjd * Since we couldn't reopen zfsvfs::z_os, force 1334185029Spjd * unmount this file system. 1335185029Spjd */ 1336185029Spjd if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1337185029Spjd (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1338168404Spjd } 1339185029Spjd return (err); 1340168404Spjd} 1341168404Spjd 1342168404Spjdstatic void 1343168404Spjdzfs_freevfs(vfs_t *vfsp) 1344168404Spjd{ 1345168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1346168404Spjd int i; 1347168404Spjd 1348168404Spjd for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1349168404Spjd mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1350168404Spjd 1351185029Spjd zfs_fuid_destroy(zfsvfs); 1352185029Spjd zfs_freezfsvfs(zfsvfs); 1353185029Spjd 1354168404Spjd atomic_add_32(&zfs_active_fs_count, -1); 1355168404Spjd} 1356168404Spjd 1357172135Spjd#ifdef __i386__ 1358172135Spjdstatic int desiredvnodes_backup; 1359172135Spjd#endif 1360172135Spjd 1361172135Spjdstatic void 1362172135Spjdzfs_vnodes_adjust(void) 1363172135Spjd{ 1364172135Spjd#ifdef __i386__ 1365185029Spjd int newdesiredvnodes; 1366172135Spjd 1367172135Spjd desiredvnodes_backup = desiredvnodes; 1368172135Spjd 1369172135Spjd /* 1370172135Spjd * We calculate newdesiredvnodes the same way it is done in 1371172135Spjd * vntblinit(). If it is equal to desiredvnodes, it means that 1372172135Spjd * it wasn't tuned by the administrator and we can tune it down. 1373172135Spjd */ 1374185029Spjd newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1375185029Spjd vm_kmem_size / (5 * (sizeof(struct vm_object) + 1376185029Spjd sizeof(struct vnode)))); 1377185029Spjd if (newdesiredvnodes == desiredvnodes) 1378185029Spjd desiredvnodes = (3 * newdesiredvnodes) / 4; 1379172135Spjd#endif 1380172135Spjd} 1381172135Spjd 1382172135Spjdstatic void 1383172135Spjdzfs_vnodes_adjust_back(void) 1384172135Spjd{ 1385172135Spjd 1386172135Spjd#ifdef __i386__ 1387172135Spjd desiredvnodes = desiredvnodes_backup; 1388172135Spjd#endif 1389172135Spjd} 1390172135Spjd 1391168404Spjdvoid 1392168404Spjdzfs_init(void) 1393168404Spjd{ 1394168404Spjd 1395202129Sdelphij printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1396168404Spjd 1397168404Spjd /* 1398185029Spjd * Initialize znode cache, vnode ops, etc... 1399168404Spjd */ 1400185029Spjd zfs_znode_init(); 1401168404Spjd 1402168404Spjd /* 1403185029Spjd * Initialize .zfs directory structures 1404168404Spjd */ 1405185029Spjd zfsctl_init(); 1406172135Spjd 1407172135Spjd /* 1408185029Spjd * Reduce number of vnode. Originally number of vnodes is calculated 1409172135Spjd * with UFS inode in mind. We reduce it here, because it's too big for 1410172135Spjd * ZFS/i386. 1411172135Spjd */ 1412172135Spjd zfs_vnodes_adjust(); 1413168404Spjd} 1414168404Spjd 1415168404Spjdvoid 1416168404Spjdzfs_fini(void) 1417168404Spjd{ 1418168404Spjd zfsctl_fini(); 1419168404Spjd zfs_znode_fini(); 1420172135Spjd zfs_vnodes_adjust_back(); 1421168404Spjd} 1422168404Spjd 1423168404Spjdint 1424168404Spjdzfs_busy(void) 1425168404Spjd{ 1426168404Spjd return (zfs_active_fs_count != 0); 1427168404Spjd} 1428185029Spjd 1429185029Spjdint 1430185029Spjdzfs_set_version(const char *name, uint64_t newvers) 1431185029Spjd{ 1432185029Spjd int error; 1433185029Spjd objset_t *os; 1434185029Spjd dmu_tx_t *tx; 1435185029Spjd uint64_t curvers; 1436185029Spjd 1437185029Spjd /* 1438185029Spjd * XXX for now, require that the filesystem be unmounted. Would 1439185029Spjd * be nice to find the zfsvfs_t and just update that if 1440185029Spjd * possible. 1441185029Spjd */ 1442185029Spjd 1443185029Spjd if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1444185029Spjd return (EINVAL); 1445185029Spjd 1446185029Spjd error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1447185029Spjd if (error) 1448185029Spjd return (error); 1449185029Spjd 1450185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1451185029Spjd 8, 1, &curvers); 1452185029Spjd if (error) 1453185029Spjd goto out; 1454185029Spjd if (newvers < curvers) { 1455185029Spjd error = EINVAL; 1456185029Spjd goto out; 1457185029Spjd } 1458185029Spjd 1459185029Spjd tx = dmu_tx_create(os); 1460185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1461185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1462185029Spjd if (error) { 1463185029Spjd dmu_tx_abort(tx); 1464185029Spjd goto out; 1465185029Spjd } 1466185029Spjd error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1467185029Spjd &newvers, tx); 1468185029Spjd 1469185029Spjd spa_history_internal_log(LOG_DS_UPGRADE, 1470185029Spjd dmu_objset_spa(os), tx, CRED(), 1471185029Spjd "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1472185029Spjd dmu_objset_id(os)); 1473185029Spjd dmu_tx_commit(tx); 1474185029Spjd 1475185029Spjdout: 1476185029Spjd dmu_objset_close(os); 1477185029Spjd return (error); 1478185029Spjd} 1479185029Spjd/* 1480185029Spjd * Read a property stored within the master node. 1481185029Spjd */ 1482185029Spjdint 1483185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1484185029Spjd{ 1485185029Spjd const char *pname; 1486185029Spjd int error = ENOENT; 1487185029Spjd 1488185029Spjd /* 1489185029Spjd * Look up the file system's value for the property. For the 1490185029Spjd * version property, we look up a slightly different string. 1491185029Spjd */ 1492185029Spjd if (prop == ZFS_PROP_VERSION) 1493185029Spjd pname = ZPL_VERSION_STR; 1494185029Spjd else 1495185029Spjd pname = zfs_prop_to_name(prop); 1496185029Spjd 1497185029Spjd if (os != NULL) 1498185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1499185029Spjd 1500185029Spjd if (error == ENOENT) { 1501185029Spjd /* No value set, use the default value */ 1502185029Spjd switch (prop) { 1503185029Spjd case ZFS_PROP_VERSION: 1504185029Spjd *value = ZPL_VERSION; 1505185029Spjd break; 1506185029Spjd case ZFS_PROP_NORMALIZE: 1507185029Spjd case ZFS_PROP_UTF8ONLY: 1508185029Spjd *value = 0; 1509185029Spjd break; 1510185029Spjd case ZFS_PROP_CASE: 1511185029Spjd *value = ZFS_CASE_SENSITIVE; 1512185029Spjd break; 1513185029Spjd default: 1514185029Spjd return (error); 1515185029Spjd } 1516185029Spjd error = 0; 1517185029Spjd } 1518185029Spjd return (error); 1519185029Spjd} 1520