zfs_vfsops.c revision 208689
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/types.h> 27168404Spjd#include <sys/param.h> 28168404Spjd#include <sys/systm.h> 29168404Spjd#include <sys/kernel.h> 30168404Spjd#include <sys/sysmacros.h> 31168404Spjd#include <sys/kmem.h> 32168404Spjd#include <sys/acl.h> 33168404Spjd#include <sys/vnode.h> 34168404Spjd#include <sys/vfs.h> 35168404Spjd#include <sys/mntent.h> 36168404Spjd#include <sys/mount.h> 37168404Spjd#include <sys/cmn_err.h> 38168404Spjd#include <sys/zfs_znode.h> 39168404Spjd#include <sys/zfs_dir.h> 40168404Spjd#include <sys/zil.h> 41168404Spjd#include <sys/fs/zfs.h> 42168404Spjd#include <sys/dmu.h> 43168404Spjd#include <sys/dsl_prop.h> 44168404Spjd#include <sys/dsl_dataset.h> 45185029Spjd#include <sys/dsl_deleg.h> 46168404Spjd#include <sys/spa.h> 47168404Spjd#include <sys/zap.h> 48168404Spjd#include <sys/varargs.h> 49168962Spjd#include <sys/policy.h> 50168404Spjd#include <sys/atomic.h> 51168404Spjd#include <sys/zfs_ioctl.h> 52168404Spjd#include <sys/zfs_ctldir.h> 53185029Spjd#include <sys/zfs_fuid.h> 54168962Spjd#include <sys/sunddi.h> 55168404Spjd#include <sys/dnlc.h> 56185029Spjd#include <sys/dmu_objset.h> 57185029Spjd#include <sys/spa_boot.h> 58185029Spjd#include <sys/vdev_impl.h> /* VDEV_BOOT_VERSION */ 59168404Spjd 60168404Spjdstruct mtx zfs_debug_mtx; 61168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 62185029Spjd 63168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 64185029Spjd 65185029Spjdint zfs_super_owner = 0; 66185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 67185029Spjd "File system owner can perform privileged operation on his file systems"); 68185029Spjd 69168404Spjdint zfs_debug_level = 0; 70168713SpjdTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level); 71168404SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0, 72168404Spjd "Debug level"); 73168404Spjd 74185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 75185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION; 76185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 77185029Spjd "ZFS_ACL_VERSION"); 78185029Spjdstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION; 79185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD, 80185029Spjd &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION"); 81185029Spjdstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION; 82185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD, 83185029Spjd &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION"); 84185029Spjdstatic int zfs_version_spa = SPA_VERSION; 85185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 86185029Spjd "SPA_VERSION"); 87185029Spjdstatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION; 88185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD, 89185029Spjd &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION"); 90185029Spjdstatic int zfs_version_zpl = ZPL_VERSION; 91185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 92185029Spjd "ZPL_VERSION"); 93185029Spjd 94191990Sattiliostatic int zfs_mount(vfs_t *vfsp); 95191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag); 96191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 97191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 98168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 99191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor); 100196982Spjdstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 101196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors); 102168404Spjdstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp); 103168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs); 104168404Spjdstatic void zfs_freevfs(vfs_t *vfsp); 105168404Spjd 106168404Spjdstatic struct vfsops zfs_vfsops = { 107168404Spjd .vfs_mount = zfs_mount, 108168404Spjd .vfs_unmount = zfs_umount, 109168404Spjd .vfs_root = zfs_root, 110168404Spjd .vfs_statfs = zfs_statfs, 111168404Spjd .vfs_vget = zfs_vget, 112168404Spjd .vfs_sync = zfs_sync, 113196982Spjd .vfs_checkexp = zfs_checkexp, 114168404Spjd .vfs_fhtovp = zfs_fhtovp, 115168404Spjd}; 116168404Spjd 117185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 118168404Spjd 119168404Spjd/* 120168404Spjd * We need to keep a count of active fs's. 121168404Spjd * This is necessary to prevent our module 122168404Spjd * from being unloaded after a umount -f 123168404Spjd */ 124168404Spjdstatic uint32_t zfs_active_fs_count = 0; 125168404Spjd 126168404Spjd/*ARGSUSED*/ 127168404Spjdstatic int 128191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor) 129168404Spjd{ 130168404Spjd 131168404Spjd /* 132168404Spjd * Data integrity is job one. We don't want a compromised kernel 133168404Spjd * writing to the storage pool, so we never sync during panic. 134168404Spjd */ 135168404Spjd if (panicstr) 136168404Spjd return (0); 137168404Spjd 138168404Spjd if (vfsp != NULL) { 139168404Spjd /* 140168404Spjd * Sync a specific filesystem. 141168404Spjd */ 142168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 143168404Spjd int error; 144168404Spjd 145191990Sattilio error = vfs_stdsync(vfsp, waitfor); 146168404Spjd if (error != 0) 147168404Spjd return (error); 148168404Spjd 149168404Spjd ZFS_ENTER(zfsvfs); 150168404Spjd if (zfsvfs->z_log != NULL) 151168404Spjd zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152168404Spjd else 153168404Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154168404Spjd ZFS_EXIT(zfsvfs); 155168404Spjd } else { 156168404Spjd /* 157168404Spjd * Sync all ZFS filesystems. This is what happens when you 158168404Spjd * run sync(1M). Unlike other filesystems, ZFS honors the 159168404Spjd * request by waiting for all pools to commit all dirty data. 160168404Spjd */ 161168404Spjd spa_sync_allpools(); 162168404Spjd } 163168404Spjd 164168404Spjd return (0); 165168404Spjd} 166168404Spjd 167168404Spjdstatic void 168168404Spjdatime_changed_cb(void *arg, uint64_t newval) 169168404Spjd{ 170168404Spjd zfsvfs_t *zfsvfs = arg; 171168404Spjd 172168404Spjd if (newval == TRUE) { 173168404Spjd zfsvfs->z_atime = TRUE; 174168404Spjd zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 175168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 176168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 177168404Spjd } else { 178168404Spjd zfsvfs->z_atime = FALSE; 179168404Spjd zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 180168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 181168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 182168404Spjd } 183168404Spjd} 184168404Spjd 185168404Spjdstatic void 186168404Spjdxattr_changed_cb(void *arg, uint64_t newval) 187168404Spjd{ 188168404Spjd zfsvfs_t *zfsvfs = arg; 189168404Spjd 190168404Spjd if (newval == TRUE) { 191168404Spjd /* XXX locking on vfs_flag? */ 192168404Spjd#ifdef TODO 193168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 194168404Spjd#endif 195168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 196168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 197168404Spjd } else { 198168404Spjd /* XXX locking on vfs_flag? */ 199168404Spjd#ifdef TODO 200168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 201168404Spjd#endif 202168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 203168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 204168404Spjd } 205168404Spjd} 206168404Spjd 207168404Spjdstatic void 208168404Spjdblksz_changed_cb(void *arg, uint64_t newval) 209168404Spjd{ 210168404Spjd zfsvfs_t *zfsvfs = arg; 211168404Spjd 212168404Spjd if (newval < SPA_MINBLOCKSIZE || 213168404Spjd newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 214168404Spjd newval = SPA_MAXBLOCKSIZE; 215168404Spjd 216168404Spjd zfsvfs->z_max_blksz = newval; 217204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 218168404Spjd} 219168404Spjd 220168404Spjdstatic void 221168404Spjdreadonly_changed_cb(void *arg, uint64_t newval) 222168404Spjd{ 223168404Spjd zfsvfs_t *zfsvfs = arg; 224168404Spjd 225168404Spjd if (newval) { 226168404Spjd /* XXX locking on vfs_flag? */ 227168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 228168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 229168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 230168404Spjd } else { 231168404Spjd /* XXX locking on vfs_flag? */ 232168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 233168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 234168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 235168404Spjd } 236168404Spjd} 237168404Spjd 238168404Spjdstatic void 239168404Spjdsetuid_changed_cb(void *arg, uint64_t newval) 240168404Spjd{ 241168404Spjd zfsvfs_t *zfsvfs = arg; 242168404Spjd 243168404Spjd if (newval == FALSE) { 244168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 245168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 246168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 247168404Spjd } else { 248168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 249168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 250168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 251168404Spjd } 252168404Spjd} 253168404Spjd 254168404Spjdstatic void 255168404Spjdexec_changed_cb(void *arg, uint64_t newval) 256168404Spjd{ 257168404Spjd zfsvfs_t *zfsvfs = arg; 258168404Spjd 259168404Spjd if (newval == FALSE) { 260168404Spjd zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 261168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 262168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 263168404Spjd } else { 264168404Spjd zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 265168404Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 266168404Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 267168404Spjd } 268168404Spjd} 269168404Spjd 270185029Spjd/* 271185029Spjd * The nbmand mount option can be changed at mount time. 272185029Spjd * We can't allow it to be toggled on live file systems or incorrect 273185029Spjd * behavior may be seen from cifs clients 274185029Spjd * 275185029Spjd * This property isn't registered via dsl_prop_register(), but this callback 276185029Spjd * will be called when a file system is first mounted 277185029Spjd */ 278168404Spjdstatic void 279185029Spjdnbmand_changed_cb(void *arg, uint64_t newval) 280185029Spjd{ 281185029Spjd zfsvfs_t *zfsvfs = arg; 282185029Spjd if (newval == FALSE) { 283185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 284185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 285185029Spjd } else { 286185029Spjd vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 287185029Spjd vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 288185029Spjd } 289185029Spjd} 290185029Spjd 291185029Spjdstatic void 292168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval) 293168404Spjd{ 294168404Spjd zfsvfs_t *zfsvfs = arg; 295168404Spjd 296168404Spjd zfsvfs->z_show_ctldir = newval; 297168404Spjd} 298168404Spjd 299168404Spjdstatic void 300185029Spjdvscan_changed_cb(void *arg, uint64_t newval) 301185029Spjd{ 302185029Spjd zfsvfs_t *zfsvfs = arg; 303185029Spjd 304185029Spjd zfsvfs->z_vscan = newval; 305185029Spjd} 306185029Spjd 307185029Spjdstatic void 308168404Spjdacl_mode_changed_cb(void *arg, uint64_t newval) 309168404Spjd{ 310168404Spjd zfsvfs_t *zfsvfs = arg; 311168404Spjd 312168404Spjd zfsvfs->z_acl_mode = newval; 313168404Spjd} 314168404Spjd 315168404Spjdstatic void 316168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval) 317168404Spjd{ 318168404Spjd zfsvfs_t *zfsvfs = arg; 319168404Spjd 320168404Spjd zfsvfs->z_acl_inherit = newval; 321168404Spjd} 322168404Spjd 323168404Spjdstatic int 324168404Spjdzfs_register_callbacks(vfs_t *vfsp) 325168404Spjd{ 326168404Spjd struct dsl_dataset *ds = NULL; 327168404Spjd objset_t *os = NULL; 328168404Spjd zfsvfs_t *zfsvfs = NULL; 329185029Spjd uint64_t nbmand; 330168404Spjd int readonly, do_readonly = FALSE; 331168404Spjd int setuid, do_setuid = FALSE; 332168404Spjd int exec, do_exec = FALSE; 333168404Spjd int xattr, do_xattr = FALSE; 334185029Spjd int atime, do_atime = FALSE; 335168404Spjd int error = 0; 336168404Spjd 337168404Spjd ASSERT(vfsp); 338168404Spjd zfsvfs = vfsp->vfs_data; 339168404Spjd ASSERT(zfsvfs); 340168404Spjd os = zfsvfs->z_os; 341168404Spjd 342168404Spjd /* 343196965Spjd * This function can be called for a snapshot when we update snapshot's 344196965Spjd * mount point, which isn't really supported. 345196965Spjd */ 346196965Spjd if (dmu_objset_is_snapshot(os)) 347196965Spjd return (EOPNOTSUPP); 348196965Spjd 349196965Spjd /* 350168404Spjd * The act of registering our callbacks will destroy any mount 351168404Spjd * options we may have. In order to enable temporary overrides 352168404Spjd * of mount options, we stash away the current values and 353168404Spjd * restore them after we register the callbacks. 354168404Spjd */ 355168404Spjd if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 356168404Spjd readonly = B_TRUE; 357168404Spjd do_readonly = B_TRUE; 358168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 359168404Spjd readonly = B_FALSE; 360168404Spjd do_readonly = B_TRUE; 361168404Spjd } 362168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 363168404Spjd setuid = B_FALSE; 364168404Spjd do_setuid = B_TRUE; 365168404Spjd } else { 366168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 367168404Spjd setuid = B_FALSE; 368168404Spjd do_setuid = B_TRUE; 369168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 370168404Spjd setuid = B_TRUE; 371168404Spjd do_setuid = B_TRUE; 372168404Spjd } 373168404Spjd } 374168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 375168404Spjd exec = B_FALSE; 376168404Spjd do_exec = B_TRUE; 377168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 378168404Spjd exec = B_TRUE; 379168404Spjd do_exec = B_TRUE; 380168404Spjd } 381168404Spjd if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 382168404Spjd xattr = B_FALSE; 383168404Spjd do_xattr = B_TRUE; 384168404Spjd } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 385168404Spjd xattr = B_TRUE; 386168404Spjd do_xattr = B_TRUE; 387168404Spjd } 388185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 389185029Spjd atime = B_FALSE; 390185029Spjd do_atime = B_TRUE; 391185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 392185029Spjd atime = B_TRUE; 393185029Spjd do_atime = B_TRUE; 394185029Spjd } 395168404Spjd 396168404Spjd /* 397185029Spjd * nbmand is a special property. It can only be changed at 398185029Spjd * mount time. 399185029Spjd * 400185029Spjd * This is weird, but it is documented to only be changeable 401185029Spjd * at mount time. 402185029Spjd */ 403185029Spjd if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 404185029Spjd nbmand = B_FALSE; 405185029Spjd } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 406185029Spjd nbmand = B_TRUE; 407185029Spjd } else { 408185029Spjd char osname[MAXNAMELEN]; 409185029Spjd 410185029Spjd dmu_objset_name(os, osname); 411185029Spjd if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand, 412185029Spjd NULL)) { 413185029Spjd return (error); 414185029Spjd } 415185029Spjd } 416185029Spjd 417185029Spjd /* 418168404Spjd * Register property callbacks. 419168404Spjd * 420168404Spjd * It would probably be fine to just check for i/o error from 421168404Spjd * the first prop_register(), but I guess I like to go 422168404Spjd * overboard... 423168404Spjd */ 424168404Spjd ds = dmu_objset_ds(os); 425168404Spjd error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 426168404Spjd error = error ? error : dsl_prop_register(ds, 427168404Spjd "xattr", xattr_changed_cb, zfsvfs); 428168404Spjd error = error ? error : dsl_prop_register(ds, 429168404Spjd "recordsize", blksz_changed_cb, zfsvfs); 430168404Spjd error = error ? error : dsl_prop_register(ds, 431168404Spjd "readonly", readonly_changed_cb, zfsvfs); 432168404Spjd error = error ? error : dsl_prop_register(ds, 433168404Spjd "setuid", setuid_changed_cb, zfsvfs); 434168404Spjd error = error ? error : dsl_prop_register(ds, 435168404Spjd "exec", exec_changed_cb, zfsvfs); 436168404Spjd error = error ? error : dsl_prop_register(ds, 437168404Spjd "snapdir", snapdir_changed_cb, zfsvfs); 438168404Spjd error = error ? error : dsl_prop_register(ds, 439168404Spjd "aclmode", acl_mode_changed_cb, zfsvfs); 440168404Spjd error = error ? error : dsl_prop_register(ds, 441168404Spjd "aclinherit", acl_inherit_changed_cb, zfsvfs); 442185029Spjd error = error ? error : dsl_prop_register(ds, 443185029Spjd "vscan", vscan_changed_cb, zfsvfs); 444168404Spjd if (error) 445168404Spjd goto unregister; 446168404Spjd 447168404Spjd /* 448168404Spjd * Invoke our callbacks to restore temporary mount options. 449168404Spjd */ 450168404Spjd if (do_readonly) 451168404Spjd readonly_changed_cb(zfsvfs, readonly); 452168404Spjd if (do_setuid) 453168404Spjd setuid_changed_cb(zfsvfs, setuid); 454168404Spjd if (do_exec) 455168404Spjd exec_changed_cb(zfsvfs, exec); 456168404Spjd if (do_xattr) 457168404Spjd xattr_changed_cb(zfsvfs, xattr); 458185029Spjd if (do_atime) 459185029Spjd atime_changed_cb(zfsvfs, atime); 460168404Spjd 461185029Spjd nbmand_changed_cb(zfsvfs, nbmand); 462185029Spjd 463168404Spjd return (0); 464168404Spjd 465168404Spjdunregister: 466168404Spjd /* 467168404Spjd * We may attempt to unregister some callbacks that are not 468168404Spjd * registered, but this is OK; it will simply return ENOMSG, 469168404Spjd * which we will ignore. 470168404Spjd */ 471168404Spjd (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 472168404Spjd (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 473168404Spjd (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 474168404Spjd (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 475168404Spjd (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 476168404Spjd (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 477168404Spjd (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 478168404Spjd (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 479168404Spjd (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 480168404Spjd zfsvfs); 481185029Spjd (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs); 482168404Spjd return (error); 483168404Spjd 484168404Spjd} 485168404Spjd 486168404Spjdstatic int 487185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 488168404Spjd{ 489185029Spjd int error; 490185029Spjd 491185029Spjd error = zfs_register_callbacks(zfsvfs->z_vfs); 492185029Spjd if (error) 493185029Spjd return (error); 494185029Spjd 495185029Spjd /* 496185029Spjd * Set the objset user_ptr to track its zfsvfs. 497185029Spjd */ 498185029Spjd mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock); 499185029Spjd dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 500185029Spjd mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock); 501185029Spjd 502208689Smm zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 503208689Smm if (zil_disable) { 504208689Smm zil_destroy(zfsvfs->z_log, B_FALSE); 505208689Smm zfsvfs->z_log = NULL; 506208689Smm } 507208689Smm 508185029Spjd /* 509185029Spjd * If we are not mounting (ie: online recv), then we don't 510185029Spjd * have to worry about replaying the log as we blocked all 511185029Spjd * operations out since we closed the ZIL. 512185029Spjd */ 513185029Spjd if (mounting) { 514185029Spjd boolean_t readonly; 515185029Spjd 516185029Spjd /* 517185029Spjd * During replay we remove the read only flag to 518185029Spjd * allow replays to succeed. 519185029Spjd */ 520185029Spjd readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 521208689Smm if (readonly != 0) 522208689Smm zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 523208689Smm else 524208689Smm zfs_unlinked_drain(zfsvfs); 525185029Spjd 526208689Smm if (zfsvfs->z_log) { 527208689Smm /* 528208689Smm * Parse and replay the intent log. 529208689Smm * 530208689Smm * Because of ziltest, this must be done after 531208689Smm * zfs_unlinked_drain(). (Further note: ziltest 532208689Smm * doesn't use readonly mounts, where 533208689Smm * zfs_unlinked_drain() isn't called.) This is because 534208689Smm * ziltest causes spa_sync() to think it's committed, 535208689Smm * but actually it is not, so the intent log contains 536208689Smm * many txg's worth of changes. 537208689Smm * 538208689Smm * In particular, if object N is in the unlinked set in 539208689Smm * the last txg to actually sync, then it could be 540208689Smm * actually freed in a later txg and then reallocated 541208689Smm * in a yet later txg. This would write a "create 542208689Smm * object N" record to the intent log. Normally, this 543208689Smm * would be fine because the spa_sync() would have 544208689Smm * written out the fact that object N is free, before 545208689Smm * we could write the "create object N" intent log 546208689Smm * record. 547208689Smm * 548208689Smm * But when we are in ziltest mode, we advance the "open 549208689Smm * txg" without actually spa_sync()-ing the changes to 550208689Smm * disk. So we would see that object N is still 551208689Smm * allocated and in the unlinked set, and there is an 552208689Smm * intent log record saying to allocate it. 553208689Smm */ 554208689Smm zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 555208689Smm zfs_replay_vector, zfs_unlinked_drain); 556208689Smm } 557185029Spjd zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 558185029Spjd } 559185029Spjd 560185029Spjd return (0); 561185029Spjd} 562185029Spjd 563185029Spjdstatic void 564185029Spjdzfs_freezfsvfs(zfsvfs_t *zfsvfs) 565185029Spjd{ 566185029Spjd mutex_destroy(&zfsvfs->z_znodes_lock); 567185029Spjd mutex_destroy(&zfsvfs->z_online_recv_lock); 568185029Spjd list_destroy(&zfsvfs->z_all_znodes); 569185029Spjd rrw_destroy(&zfsvfs->z_teardown_lock); 570185029Spjd rw_destroy(&zfsvfs->z_teardown_inactive_lock); 571185029Spjd rw_destroy(&zfsvfs->z_fuid_lock); 572185029Spjd kmem_free(zfsvfs, sizeof (zfsvfs_t)); 573185029Spjd} 574185029Spjd 575185029Spjdstatic int 576185029Spjdzfs_domount(vfs_t *vfsp, char *osname) 577185029Spjd{ 578168404Spjd uint64_t recordsize, readonly; 579168404Spjd int error = 0; 580168404Spjd int mode; 581168404Spjd zfsvfs_t *zfsvfs; 582168404Spjd znode_t *zp = NULL; 583168404Spjd 584168404Spjd ASSERT(vfsp); 585168404Spjd ASSERT(osname); 586168404Spjd 587168404Spjd /* 588168404Spjd * Initialize the zfs-specific filesystem structure. 589168404Spjd * Should probably make this a kmem cache, shuffle fields, 590168404Spjd * and just bzero up to z_hold_mtx[]. 591168404Spjd */ 592168404Spjd zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 593168404Spjd zfsvfs->z_vfs = vfsp; 594168404Spjd zfsvfs->z_parent = zfsvfs; 595168404Spjd zfsvfs->z_assign = TXG_NOWAIT; 596168404Spjd zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 597168404Spjd zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 598168404Spjd 599168404Spjd mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 600185029Spjd mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL); 601168404Spjd list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 602168404Spjd offsetof(znode_t, z_link_node)); 603185029Spjd rrw_init(&zfsvfs->z_teardown_lock); 604185029Spjd rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); 605185029Spjd rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 606168404Spjd 607168404Spjd if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 608168404Spjd NULL)) 609168404Spjd goto out; 610204101Spjd zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 611204101Spjd zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 612168404Spjd 613168404Spjd vfsp->vfs_data = zfsvfs; 614168404Spjd vfsp->mnt_flag |= MNT_LOCAL; 615168404Spjd vfsp->mnt_kern_flag |= MNTK_MPSAFE; 616168404Spjd vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 617193440Sps vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 618168404Spjd 619168404Spjd if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 620168404Spjd goto out; 621168404Spjd 622185029Spjd mode = DS_MODE_OWNER; 623168404Spjd if (readonly) 624185029Spjd mode |= DS_MODE_READONLY; 625168404Spjd 626168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 627168404Spjd if (error == EROFS) { 628185029Spjd mode = DS_MODE_OWNER | DS_MODE_READONLY; 629168404Spjd error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 630168404Spjd &zfsvfs->z_os); 631168404Spjd } 632168404Spjd 633168404Spjd if (error) 634168404Spjd goto out; 635168404Spjd 636185029Spjd if (error = zfs_init_fs(zfsvfs, &zp)) 637168404Spjd goto out; 638168404Spjd 639185029Spjd /* 640185029Spjd * Set features for file system. 641185029Spjd */ 642185029Spjd zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 643185029Spjd if (zfsvfs->z_use_fuids) { 644185029Spjd vfs_set_feature(vfsp, VFSFT_XVATTR); 645185029Spjd vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 646185029Spjd vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS); 647185029Spjd vfs_set_feature(vfsp, VFSFT_ACLONCREATE); 648185029Spjd } 649185029Spjd if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 650185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 651185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 652185029Spjd vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 653185029Spjd } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 654185029Spjd vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 655185029Spjd vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 656185029Spjd } 657185029Spjd 658168404Spjd if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 659185029Spjd uint64_t pval; 660168404Spjd 661168404Spjd ASSERT(mode & DS_MODE_READONLY); 662168404Spjd atime_changed_cb(zfsvfs, B_FALSE); 663168404Spjd readonly_changed_cb(zfsvfs, B_TRUE); 664185029Spjd if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL)) 665168404Spjd goto out; 666185029Spjd xattr_changed_cb(zfsvfs, pval); 667168404Spjd zfsvfs->z_issnap = B_TRUE; 668168404Spjd } else { 669185029Spjd error = zfsvfs_setup(zfsvfs, B_TRUE); 670168404Spjd } 671168404Spjd 672168404Spjd vfs_mountedfrom(vfsp, osname); 673168404Spjd 674168404Spjd if (!zfsvfs->z_issnap) 675168404Spjd zfsctl_create(zfsvfs); 676168404Spjdout: 677168404Spjd if (error) { 678168404Spjd if (zfsvfs->z_os) 679168404Spjd dmu_objset_close(zfsvfs->z_os); 680185029Spjd zfs_freezfsvfs(zfsvfs); 681168404Spjd } else { 682168404Spjd atomic_add_32(&zfs_active_fs_count, 1); 683168404Spjd } 684168404Spjd 685168404Spjd return (error); 686168404Spjd} 687168404Spjd 688168404Spjdvoid 689168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs) 690168404Spjd{ 691168404Spjd objset_t *os = zfsvfs->z_os; 692168404Spjd struct dsl_dataset *ds; 693168404Spjd 694168404Spjd /* 695168404Spjd * Unregister properties. 696168404Spjd */ 697168404Spjd if (!dmu_objset_is_snapshot(os)) { 698168404Spjd ds = dmu_objset_ds(os); 699168404Spjd VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 700168404Spjd zfsvfs) == 0); 701168404Spjd 702168404Spjd VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 703168404Spjd zfsvfs) == 0); 704168404Spjd 705168404Spjd VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 706168404Spjd zfsvfs) == 0); 707168404Spjd 708168404Spjd VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 709168404Spjd zfsvfs) == 0); 710168404Spjd 711168404Spjd VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 712168404Spjd zfsvfs) == 0); 713168404Spjd 714168404Spjd VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 715168404Spjd zfsvfs) == 0); 716168404Spjd 717168404Spjd VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 718168404Spjd zfsvfs) == 0); 719168404Spjd 720168404Spjd VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 721168404Spjd zfsvfs) == 0); 722168404Spjd 723168404Spjd VERIFY(dsl_prop_unregister(ds, "aclinherit", 724168404Spjd acl_inherit_changed_cb, zfsvfs) == 0); 725185029Spjd 726185029Spjd VERIFY(dsl_prop_unregister(ds, "vscan", 727185029Spjd vscan_changed_cb, zfsvfs) == 0); 728168404Spjd } 729168404Spjd} 730168404Spjd 731168404Spjd/*ARGSUSED*/ 732168404Spjdstatic int 733191990Sattiliozfs_mount(vfs_t *vfsp) 734168404Spjd{ 735191990Sattilio kthread_t *td = curthread; 736185029Spjd vnode_t *mvp = vfsp->mnt_vnodecovered; 737185029Spjd cred_t *cr = td->td_ucred; 738185029Spjd char *osname; 739185029Spjd int error = 0; 740185029Spjd int canwrite; 741168404Spjd 742185029Spjd if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 743185029Spjd return (EINVAL); 744185029Spjd 745168404Spjd /* 746185029Spjd * If full-owner-access is enabled and delegated administration is 747185029Spjd * turned on, we must set nosuid. 748185029Spjd */ 749185029Spjd if (zfs_super_owner && 750185029Spjd dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 751185029Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 752185029Spjd } 753185029Spjd 754185029Spjd /* 755185029Spjd * Check for mount privilege? 756185029Spjd * 757185029Spjd * If we don't have privilege then see if 758185029Spjd * we have local permission to allow it 759185029Spjd */ 760185029Spjd error = secpolicy_fs_mount(cr, mvp, vfsp); 761185029Spjd if (error) { 762185029Spjd error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 763196944Spjd if (error != 0) 764196944Spjd goto out; 765196944Spjd 766196944Spjd if (!(vfsp->vfs_flag & MS_REMOUNT)) { 767185029Spjd vattr_t vattr; 768185029Spjd 769185029Spjd /* 770185029Spjd * Make sure user is the owner of the mount point 771185029Spjd * or has sufficient privileges. 772185029Spjd */ 773185029Spjd 774185029Spjd vattr.va_mask = AT_UID; 775185029Spjd 776196662Spjd vn_lock(mvp, LK_SHARED | LK_RETRY); 777185029Spjd if (error = VOP_GETATTR(mvp, &vattr, cr)) { 778196662Spjd VOP_UNLOCK(mvp, 0); 779185029Spjd goto out; 780185029Spjd } 781185029Spjd 782185029Spjd#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */ 783185029Spjd if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 784185029Spjd VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 785185029Spjd error = EPERM; 786185029Spjd goto out; 787185029Spjd } 788185029Spjd#else 789185029Spjd if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) { 790196662Spjd VOP_UNLOCK(mvp, 0); 791185029Spjd goto out; 792185029Spjd } 793185029Spjd 794185029Spjd if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) { 795196662Spjd VOP_UNLOCK(mvp, 0); 796185029Spjd goto out; 797185029Spjd } 798196662Spjd VOP_UNLOCK(mvp, 0); 799185029Spjd#endif 800196944Spjd } 801185029Spjd 802196944Spjd secpolicy_fs_mount_clearopts(cr, vfsp); 803185029Spjd } 804185029Spjd 805185029Spjd /* 806185029Spjd * Refuse to mount a filesystem if we are in a local zone and the 807185029Spjd * dataset is not visible. 808185029Spjd */ 809185029Spjd if (!INGLOBALZONE(curthread) && 810185029Spjd (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 811185029Spjd error = EPERM; 812185029Spjd goto out; 813185029Spjd } 814185029Spjd 815185029Spjd /* 816168404Spjd * When doing a remount, we simply refresh our temporary properties 817168404Spjd * according to those options set in the current VFS options. 818168404Spjd */ 819185029Spjd if (vfsp->vfs_flag & MS_REMOUNT) { 820185029Spjd /* refresh mount options */ 821185029Spjd zfs_unregister_callbacks(vfsp->vfs_data); 822185029Spjd error = zfs_register_callbacks(vfsp); 823185029Spjd goto out; 824185029Spjd } 825168404Spjd 826168510Spjd DROP_GIANT(); 827185029Spjd error = zfs_domount(vfsp, osname); 828168510Spjd PICKUP_GIANT(); 829185029Spjdout: 830168510Spjd return (error); 831168404Spjd} 832168404Spjd 833168404Spjdstatic int 834191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp) 835169170Spjd{ 836168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 837168404Spjd uint64_t refdbytes, availbytes, usedobjs, availobjs; 838168404Spjd 839168404Spjd statp->f_version = STATFS_VERSION; 840168404Spjd 841168404Spjd ZFS_ENTER(zfsvfs); 842168404Spjd 843168404Spjd dmu_objset_space(zfsvfs->z_os, 844168404Spjd &refdbytes, &availbytes, &usedobjs, &availobjs); 845168404Spjd 846168404Spjd /* 847168404Spjd * The underlying storage pool actually uses multiple block sizes. 848168404Spjd * We report the fragsize as the smallest block size we support, 849168404Spjd * and we report our blocksize as the filesystem's maximum blocksize. 850168404Spjd */ 851204101Spjd statp->f_bsize = SPA_MINBLOCKSIZE; 852204101Spjd statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 853168404Spjd 854168404Spjd /* 855168404Spjd * The following report "total" blocks of various kinds in the 856168404Spjd * file system, but reported in terms of f_frsize - the 857168404Spjd * "fragment" size. 858168404Spjd */ 859168404Spjd 860204101Spjd statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 861168404Spjd statp->f_bfree = availbytes / statp->f_bsize; 862168404Spjd statp->f_bavail = statp->f_bfree; /* no root reservation */ 863168404Spjd 864168404Spjd /* 865168404Spjd * statvfs() should really be called statufs(), because it assumes 866168404Spjd * static metadata. ZFS doesn't preallocate files, so the best 867168404Spjd * we can do is report the max that could possibly fit in f_files, 868168404Spjd * and that minus the number actually used in f_ffree. 869168404Spjd * For f_ffree, report the smaller of the number of object available 870168404Spjd * and the number of blocks (each object will take at least a block). 871168404Spjd */ 872168404Spjd statp->f_ffree = MIN(availobjs, statp->f_bfree); 873168404Spjd statp->f_files = statp->f_ffree + usedobjs; 874168404Spjd 875168404Spjd /* 876168404Spjd * We're a zfs filesystem. 877168404Spjd */ 878168404Spjd (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename)); 879168404Spjd 880168404Spjd strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 881168404Spjd sizeof(statp->f_mntfromname)); 882168404Spjd strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 883168404Spjd sizeof(statp->f_mntonname)); 884168404Spjd 885168404Spjd statp->f_namemax = ZFS_MAXNAMELEN; 886168404Spjd 887168404Spjd ZFS_EXIT(zfsvfs); 888168404Spjd return (0); 889168404Spjd} 890168404Spjd 891168404Spjdstatic int 892191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 893168404Spjd{ 894168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 895168404Spjd znode_t *rootzp; 896168404Spjd int error; 897168404Spjd 898197459Spjd ZFS_ENTER_NOERROR(zfsvfs); 899168404Spjd 900168404Spjd error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 901206667Spjd 902206667Spjd ZFS_EXIT(zfsvfs); 903206667Spjd 904168404Spjd if (error == 0) { 905168404Spjd *vpp = ZTOV(rootzp); 906175202Sattilio error = vn_lock(*vpp, flags); 907168404Spjd (*vpp)->v_vflag |= VV_ROOT; 908168404Spjd } 909168404Spjd 910168404Spjd return (error); 911168404Spjd} 912168404Spjd 913185029Spjd/* 914185029Spjd * Teardown the zfsvfs::z_os. 915185029Spjd * 916185029Spjd * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock' 917185029Spjd * and 'z_teardown_inactive_lock' held. 918185029Spjd */ 919185029Spjdstatic int 920185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 921185029Spjd{ 922185029Spjd znode_t *zp; 923185029Spjd 924185029Spjd rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 925185029Spjd 926185029Spjd if (!unmounting) { 927185029Spjd /* 928185029Spjd * We purge the parent filesystem's vfsp as the parent 929185029Spjd * filesystem and all of its snapshots have their vnode's 930185029Spjd * v_vfsp set to the parent's filesystem's vfsp. Note, 931185029Spjd * 'z_parent' is self referential for non-snapshots. 932185029Spjd */ 933185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 934197351Spjd#ifdef FREEBSD_NAMECACHE 935197351Spjd cache_purgevfs(zfsvfs->z_parent->z_vfs); 936197351Spjd#endif 937185029Spjd } 938185029Spjd 939185029Spjd /* 940185029Spjd * Close the zil. NB: Can't close the zil while zfs_inactive 941185029Spjd * threads are blocked as zil_close can call zfs_inactive. 942185029Spjd */ 943185029Spjd if (zfsvfs->z_log) { 944185029Spjd zil_close(zfsvfs->z_log); 945185029Spjd zfsvfs->z_log = NULL; 946185029Spjd } 947185029Spjd 948185029Spjd rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER); 949185029Spjd 950185029Spjd /* 951185029Spjd * If we are not unmounting (ie: online recv) and someone already 952185029Spjd * unmounted this file system while we were doing the switcheroo, 953185029Spjd * or a reopen of z_os failed then just bail out now. 954185029Spjd */ 955185029Spjd if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 956185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 957185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 958185029Spjd return (EIO); 959185029Spjd } 960185029Spjd 961185029Spjd /* 962185029Spjd * At this point there are no vops active, and any new vops will 963185029Spjd * fail with EIO since we have z_teardown_lock for writer (only 964185029Spjd * relavent for forced unmount). 965185029Spjd * 966185029Spjd * Release all holds on dbufs. 967185029Spjd */ 968185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 969185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 970185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) 971185029Spjd if (zp->z_dbuf) { 972196297Spjd ASSERT(ZTOV(zp)->v_count >= 0); 973185029Spjd zfs_znode_dmu_fini(zp); 974185029Spjd } 975185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 976185029Spjd 977185029Spjd /* 978185029Spjd * If we are unmounting, set the unmounted flag and let new vops 979185029Spjd * unblock. zfs_inactive will have the unmounted behavior, and all 980185029Spjd * other vops will fail with EIO. 981185029Spjd */ 982185029Spjd if (unmounting) { 983185029Spjd zfsvfs->z_unmounted = B_TRUE; 984185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 985185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 986197133Spjd 987197133Spjd#ifdef __FreeBSD__ 988197133Spjd /* 989197133Spjd * Some znodes might not be fully reclaimed, wait for them. 990197133Spjd */ 991197133Spjd mutex_enter(&zfsvfs->z_znodes_lock); 992197133Spjd while (list_head(&zfsvfs->z_all_znodes) != NULL) { 993197133Spjd msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0, 994197133Spjd "zteardown", 0); 995197133Spjd } 996197133Spjd mutex_exit(&zfsvfs->z_znodes_lock); 997197133Spjd#endif 998185029Spjd } 999185029Spjd 1000185029Spjd /* 1001185029Spjd * z_os will be NULL if there was an error in attempting to reopen 1002185029Spjd * zfsvfs, so just return as the properties had already been 1003185029Spjd * unregistered and cached data had been evicted before. 1004185029Spjd */ 1005185029Spjd if (zfsvfs->z_os == NULL) 1006185029Spjd return (0); 1007185029Spjd 1008185029Spjd /* 1009185029Spjd * Unregister properties. 1010185029Spjd */ 1011185029Spjd zfs_unregister_callbacks(zfsvfs); 1012185029Spjd 1013185029Spjd /* 1014185029Spjd * Evict cached data 1015185029Spjd */ 1016185029Spjd if (dmu_objset_evict_dbufs(zfsvfs->z_os)) { 1017185029Spjd txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1018185029Spjd (void) dmu_objset_evict_dbufs(zfsvfs->z_os); 1019185029Spjd } 1020185029Spjd 1021185029Spjd return (0); 1022185029Spjd} 1023185029Spjd 1024168404Spjd/*ARGSUSED*/ 1025168404Spjdstatic int 1026191990Sattiliozfs_umount(vfs_t *vfsp, int fflag) 1027168404Spjd{ 1028168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1029185029Spjd objset_t *os; 1030191990Sattilio cred_t *cr = curthread->td_ucred; 1031168404Spjd int ret; 1032168404Spjd 1033185029Spjd ret = secpolicy_fs_unmount(cr, vfsp); 1034185029Spjd if (ret) { 1035185029Spjd ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1036185029Spjd ZFS_DELEG_PERM_MOUNT, cr); 1037185029Spjd if (ret) 1038185029Spjd return (ret); 1039185029Spjd } 1040185029Spjd /* 1041185029Spjd * We purge the parent filesystem's vfsp as the parent filesystem 1042185029Spjd * and all of its snapshots have their vnode's v_vfsp set to the 1043185029Spjd * parent's filesystem's vfsp. Note, 'z_parent' is self 1044185029Spjd * referential for non-snapshots. 1045185029Spjd */ 1046185029Spjd (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 1047168404Spjd 1048168404Spjd /* 1049168404Spjd * Unmount any snapshots mounted under .zfs before unmounting the 1050168404Spjd * dataset itself. 1051168404Spjd */ 1052169170Spjd if (zfsvfs->z_ctldir != NULL) { 1053168404Spjd if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1054168404Spjd return (ret); 1055191990Sattilio ret = vflush(vfsp, 0, 0, curthread); 1056168404Spjd ASSERT(ret == EBUSY); 1057168404Spjd if (!(fflag & MS_FORCE)) { 1058168404Spjd if (zfsvfs->z_ctldir->v_count > 1) 1059168404Spjd return (EBUSY); 1060168404Spjd ASSERT(zfsvfs->z_ctldir->v_count == 1); 1061168404Spjd } 1062168404Spjd zfsctl_destroy(zfsvfs); 1063168404Spjd ASSERT(zfsvfs->z_ctldir == NULL); 1064168404Spjd } 1065168404Spjd 1066197459Spjd if (fflag & MS_FORCE) { 1067197459Spjd /* 1068197459Spjd * Mark file system as unmounted before calling 1069197459Spjd * vflush(FORCECLOSE). This way we ensure no future vnops 1070197459Spjd * will be called and risk operating on DOOMED vnodes. 1071197459Spjd */ 1072197459Spjd rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG); 1073197459Spjd zfsvfs->z_unmounted = B_TRUE; 1074197459Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1075197459Spjd } 1076197459Spjd 1077168404Spjd /* 1078168404Spjd * Flush all the files. 1079168404Spjd */ 1080191990Sattilio ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread); 1081168404Spjd if (ret != 0) { 1082168404Spjd if (!zfsvfs->z_issnap) { 1083168404Spjd zfsctl_create(zfsvfs); 1084168404Spjd ASSERT(zfsvfs->z_ctldir != NULL); 1085168404Spjd } 1086168404Spjd return (ret); 1087168404Spjd } 1088168404Spjd 1089185029Spjd if (!(fflag & MS_FORCE)) { 1090185029Spjd /* 1091185029Spjd * Check the number of active vnodes in the file system. 1092185029Spjd * Our count is maintained in the vfs structure, but the 1093185029Spjd * number is off by 1 to indicate a hold on the vfs 1094185029Spjd * structure itself. 1095185029Spjd * 1096185029Spjd * The '.zfs' directory maintains a reference of its 1097185029Spjd * own, and any active references underneath are 1098185029Spjd * reflected in the vnode count. 1099185029Spjd */ 1100185029Spjd if (zfsvfs->z_ctldir == NULL) { 1101185029Spjd if (vfsp->vfs_count > 1) 1102185029Spjd return (EBUSY); 1103185029Spjd } else { 1104185029Spjd if (vfsp->vfs_count > 2 || 1105185029Spjd zfsvfs->z_ctldir->v_count > 1) 1106185029Spjd return (EBUSY); 1107185029Spjd } 1108185029Spjd } else { 1109168404Spjd MNT_ILOCK(vfsp); 1110168404Spjd vfsp->mnt_kern_flag |= MNTK_UNMOUNTF; 1111168404Spjd MNT_IUNLOCK(vfsp); 1112185029Spjd } 1113168404Spjd 1114185029Spjd VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0); 1115185029Spjd os = zfsvfs->z_os; 1116185029Spjd 1117185029Spjd /* 1118185029Spjd * z_os will be NULL if there was an error in 1119185029Spjd * attempting to reopen zfsvfs. 1120185029Spjd */ 1121185029Spjd if (os != NULL) { 1122168404Spjd /* 1123185029Spjd * Unset the objset user_ptr. 1124168404Spjd */ 1125185029Spjd mutex_enter(&os->os->os_user_ptr_lock); 1126185029Spjd dmu_objset_set_user(os, NULL); 1127185029Spjd mutex_exit(&os->os->os_user_ptr_lock); 1128185029Spjd 1129185029Spjd /* 1130185029Spjd * Finally release the objset 1131185029Spjd */ 1132185029Spjd dmu_objset_close(os); 1133168404Spjd } 1134168404Spjd 1135185029Spjd /* 1136185029Spjd * We can now safely destroy the '.zfs' directory node. 1137185029Spjd */ 1138185029Spjd if (zfsvfs->z_ctldir != NULL) 1139185029Spjd zfsctl_destroy(zfsvfs); 1140185029Spjd if (zfsvfs->z_issnap) { 1141185029Spjd vnode_t *svp = vfsp->mnt_vnodecovered; 1142185029Spjd 1143197515Spjd if (svp->v_count >= 2) 1144192211Skmacy VN_RELE(svp); 1145185029Spjd } 1146168404Spjd zfs_freevfs(vfsp); 1147168404Spjd 1148168404Spjd return (0); 1149168404Spjd} 1150168404Spjd 1151168404Spjdstatic int 1152168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1153168404Spjd{ 1154168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1155168404Spjd znode_t *zp; 1156168404Spjd int err; 1157168404Spjd 1158197167Spjd /* 1159197167Spjd * XXXPJD: zfs_zget() can't operate on virtual entires like .zfs/ or 1160197167Spjd * .zfs/snapshot/ directories, so for now just return EOPNOTSUPP. 1161197167Spjd * This will make NFS to fall back to using READDIR instead of 1162197167Spjd * READDIRPLUS. 1163197167Spjd * Also snapshots are stored in AVL tree, but based on their names, 1164197167Spjd * not inode numbers, so it will be very inefficient to iterate 1165197167Spjd * over all snapshots to find the right one. 1166197167Spjd * Note that OpenSolaris READDIRPLUS implementation does LOOKUP on 1167197167Spjd * d_name, and not VGET on d_fileno as we do. 1168197167Spjd */ 1169197167Spjd if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR) 1170197167Spjd return (EOPNOTSUPP); 1171197167Spjd 1172168404Spjd ZFS_ENTER(zfsvfs); 1173168404Spjd err = zfs_zget(zfsvfs, ino, &zp); 1174168404Spjd if (err == 0 && zp->z_unlinked) { 1175168404Spjd VN_RELE(ZTOV(zp)); 1176168404Spjd err = EINVAL; 1177168404Spjd } 1178206667Spjd ZFS_EXIT(zfsvfs); 1179168404Spjd if (err != 0) 1180168404Spjd *vpp = NULL; 1181168404Spjd else { 1182168404Spjd *vpp = ZTOV(zp); 1183175202Sattilio vn_lock(*vpp, flags); 1184168404Spjd } 1185171063Sdfr return (err); 1186168404Spjd} 1187168404Spjd 1188168404Spjdstatic int 1189196982Spjdzfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1190196982Spjd struct ucred **credanonp, int *numsecflavors, int **secflavors) 1191196982Spjd{ 1192196982Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1193196982Spjd 1194196982Spjd /* 1195196982Spjd * If this is regular file system vfsp is the same as 1196196982Spjd * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1197196982Spjd * zfsvfs->z_parent->z_vfs represents parent file system 1198196982Spjd * which we have to use here, because only this file system 1199196982Spjd * has mnt_export configured. 1200196982Spjd */ 1201196982Spjd vfsp = zfsvfs->z_parent->z_vfs; 1202196982Spjd 1203196982Spjd return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1204196982Spjd credanonp, numsecflavors, secflavors)); 1205196982Spjd} 1206196982Spjd 1207197151SpjdCTASSERT(SHORT_FID_LEN <= sizeof(struct fid)); 1208197151SpjdCTASSERT(LONG_FID_LEN <= sizeof(struct fid)); 1209196982Spjd 1210196982Spjdstatic int 1211168404Spjdzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp) 1212168404Spjd{ 1213168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1214168404Spjd znode_t *zp; 1215168404Spjd uint64_t object = 0; 1216168404Spjd uint64_t fid_gen = 0; 1217168404Spjd uint64_t gen_mask; 1218168404Spjd uint64_t zp_gen; 1219168404Spjd int i, err; 1220168404Spjd 1221168404Spjd *vpp = NULL; 1222168404Spjd 1223168404Spjd ZFS_ENTER(zfsvfs); 1224168404Spjd 1225196979Spjd /* 1226197177Spjd * On FreeBSD we can get snapshot's mount point or its parent file 1227197177Spjd * system mount point depending if snapshot is already mounted or not. 1228196979Spjd */ 1229197177Spjd if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1230168404Spjd zfid_long_t *zlfid = (zfid_long_t *)fidp; 1231168404Spjd uint64_t objsetid = 0; 1232168404Spjd uint64_t setgen = 0; 1233168404Spjd 1234168404Spjd for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1235168404Spjd objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1236168404Spjd 1237168404Spjd for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1238168404Spjd setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1239168404Spjd 1240168404Spjd ZFS_EXIT(zfsvfs); 1241168404Spjd 1242168404Spjd err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1243168404Spjd if (err) 1244168404Spjd return (EINVAL); 1245168404Spjd ZFS_ENTER(zfsvfs); 1246168404Spjd } 1247168404Spjd 1248168404Spjd if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1249168404Spjd zfid_short_t *zfid = (zfid_short_t *)fidp; 1250168404Spjd 1251168404Spjd for (i = 0; i < sizeof (zfid->zf_object); i++) 1252168404Spjd object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1253168404Spjd 1254168404Spjd for (i = 0; i < sizeof (zfid->zf_gen); i++) 1255168404Spjd fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1256168404Spjd } else { 1257168404Spjd ZFS_EXIT(zfsvfs); 1258168404Spjd return (EINVAL); 1259168404Spjd } 1260168404Spjd 1261168404Spjd /* A zero fid_gen means we are in the .zfs control directories */ 1262168404Spjd if (fid_gen == 0 && 1263168404Spjd (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1264168404Spjd *vpp = zfsvfs->z_ctldir; 1265168404Spjd ASSERT(*vpp != NULL); 1266168404Spjd if (object == ZFSCTL_INO_SNAPDIR) { 1267168404Spjd VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1268185029Spjd 0, NULL, NULL, NULL, NULL, NULL) == 0); 1269168404Spjd } else { 1270168404Spjd VN_HOLD(*vpp); 1271168404Spjd } 1272206667Spjd ZFS_EXIT(zfsvfs); 1273196978Spjd vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1274168404Spjd return (0); 1275168404Spjd } 1276168404Spjd 1277168404Spjd gen_mask = -1ULL >> (64 - 8 * i); 1278168404Spjd 1279168404Spjd dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1280168404Spjd if (err = zfs_zget(zfsvfs, object, &zp)) { 1281168404Spjd ZFS_EXIT(zfsvfs); 1282168404Spjd return (err); 1283168404Spjd } 1284168404Spjd zp_gen = zp->z_phys->zp_gen & gen_mask; 1285168404Spjd if (zp_gen == 0) 1286168404Spjd zp_gen = 1; 1287168404Spjd if (zp->z_unlinked || zp_gen != fid_gen) { 1288168404Spjd dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1289168404Spjd VN_RELE(ZTOV(zp)); 1290168404Spjd ZFS_EXIT(zfsvfs); 1291168404Spjd return (EINVAL); 1292168404Spjd } 1293168404Spjd 1294206667Spjd ZFS_EXIT(zfsvfs); 1295206667Spjd 1296168404Spjd *vpp = ZTOV(zp); 1297175202Sattilio vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1298185029Spjd vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread); 1299168404Spjd return (0); 1300168404Spjd} 1301168404Spjd 1302185029Spjd/* 1303185029Spjd * Block out VOPs and close zfsvfs_t::z_os 1304185029Spjd * 1305185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and 1306185029Spjd * 'z_teardown_inactive_lock' write held. 1307185029Spjd */ 1308185029Spjdint 1309185029Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode) 1310168404Spjd{ 1311185029Spjd int error; 1312168404Spjd 1313185029Spjd if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1314185029Spjd return (error); 1315168404Spjd 1316185029Spjd *mode = zfsvfs->z_os->os_mode; 1317185029Spjd dmu_objset_name(zfsvfs->z_os, name); 1318185029Spjd dmu_objset_close(zfsvfs->z_os); 1319168404Spjd 1320185029Spjd return (0); 1321185029Spjd} 1322168404Spjd 1323185029Spjd/* 1324185029Spjd * Reopen zfsvfs_t::z_os and release VOPs. 1325185029Spjd */ 1326185029Spjdint 1327185029Spjdzfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode) 1328185029Spjd{ 1329185029Spjd int err; 1330168404Spjd 1331185029Spjd ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock)); 1332185029Spjd ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); 1333185029Spjd 1334185029Spjd err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 1335185029Spjd if (err) { 1336185029Spjd zfsvfs->z_os = NULL; 1337185029Spjd } else { 1338185029Spjd znode_t *zp; 1339185029Spjd 1340185029Spjd VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); 1341185029Spjd 1342185029Spjd /* 1343185029Spjd * Attempt to re-establish all the active znodes with 1344185029Spjd * their dbufs. If a zfs_rezget() fails, then we'll let 1345185029Spjd * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1346185029Spjd * when they try to use their znode. 1347185029Spjd */ 1348185029Spjd mutex_enter(&zfsvfs->z_znodes_lock); 1349185029Spjd for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1350185029Spjd zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1351185029Spjd (void) zfs_rezget(zp); 1352185029Spjd } 1353185029Spjd mutex_exit(&zfsvfs->z_znodes_lock); 1354185029Spjd 1355168404Spjd } 1356168404Spjd 1357185029Spjd /* release the VOPs */ 1358185029Spjd rw_exit(&zfsvfs->z_teardown_inactive_lock); 1359185029Spjd rrw_exit(&zfsvfs->z_teardown_lock, FTAG); 1360185029Spjd 1361185029Spjd if (err) { 1362185029Spjd /* 1363185029Spjd * Since we couldn't reopen zfsvfs::z_os, force 1364185029Spjd * unmount this file system. 1365185029Spjd */ 1366185029Spjd if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) 1367185029Spjd (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1368168404Spjd } 1369185029Spjd return (err); 1370168404Spjd} 1371168404Spjd 1372168404Spjdstatic void 1373168404Spjdzfs_freevfs(vfs_t *vfsp) 1374168404Spjd{ 1375168404Spjd zfsvfs_t *zfsvfs = vfsp->vfs_data; 1376168404Spjd int i; 1377168404Spjd 1378168404Spjd for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1379168404Spjd mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1380168404Spjd 1381185029Spjd zfs_fuid_destroy(zfsvfs); 1382185029Spjd zfs_freezfsvfs(zfsvfs); 1383185029Spjd 1384168404Spjd atomic_add_32(&zfs_active_fs_count, -1); 1385168404Spjd} 1386168404Spjd 1387172135Spjd#ifdef __i386__ 1388172135Spjdstatic int desiredvnodes_backup; 1389172135Spjd#endif 1390172135Spjd 1391172135Spjdstatic void 1392172135Spjdzfs_vnodes_adjust(void) 1393172135Spjd{ 1394172135Spjd#ifdef __i386__ 1395185029Spjd int newdesiredvnodes; 1396172135Spjd 1397172135Spjd desiredvnodes_backup = desiredvnodes; 1398172135Spjd 1399172135Spjd /* 1400172135Spjd * We calculate newdesiredvnodes the same way it is done in 1401172135Spjd * vntblinit(). If it is equal to desiredvnodes, it means that 1402172135Spjd * it wasn't tuned by the administrator and we can tune it down. 1403172135Spjd */ 1404185029Spjd newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * 1405185029Spjd vm_kmem_size / (5 * (sizeof(struct vm_object) + 1406185029Spjd sizeof(struct vnode)))); 1407185029Spjd if (newdesiredvnodes == desiredvnodes) 1408185029Spjd desiredvnodes = (3 * newdesiredvnodes) / 4; 1409172135Spjd#endif 1410172135Spjd} 1411172135Spjd 1412172135Spjdstatic void 1413172135Spjdzfs_vnodes_adjust_back(void) 1414172135Spjd{ 1415172135Spjd 1416172135Spjd#ifdef __i386__ 1417172135Spjd desiredvnodes = desiredvnodes_backup; 1418172135Spjd#endif 1419172135Spjd} 1420172135Spjd 1421168404Spjdvoid 1422168404Spjdzfs_init(void) 1423168404Spjd{ 1424168404Spjd 1425202129Sdelphij printf("ZFS filesystem version " ZPL_VERSION_STRING "\n"); 1426168404Spjd 1427168404Spjd /* 1428185029Spjd * Initialize znode cache, vnode ops, etc... 1429168404Spjd */ 1430185029Spjd zfs_znode_init(); 1431168404Spjd 1432168404Spjd /* 1433185029Spjd * Initialize .zfs directory structures 1434168404Spjd */ 1435185029Spjd zfsctl_init(); 1436172135Spjd 1437172135Spjd /* 1438185029Spjd * Reduce number of vnode. Originally number of vnodes is calculated 1439172135Spjd * with UFS inode in mind. We reduce it here, because it's too big for 1440172135Spjd * ZFS/i386. 1441172135Spjd */ 1442172135Spjd zfs_vnodes_adjust(); 1443168404Spjd} 1444168404Spjd 1445168404Spjdvoid 1446168404Spjdzfs_fini(void) 1447168404Spjd{ 1448168404Spjd zfsctl_fini(); 1449168404Spjd zfs_znode_fini(); 1450172135Spjd zfs_vnodes_adjust_back(); 1451168404Spjd} 1452168404Spjd 1453168404Spjdint 1454168404Spjdzfs_busy(void) 1455168404Spjd{ 1456168404Spjd return (zfs_active_fs_count != 0); 1457168404Spjd} 1458185029Spjd 1459185029Spjdint 1460185029Spjdzfs_set_version(const char *name, uint64_t newvers) 1461185029Spjd{ 1462185029Spjd int error; 1463185029Spjd objset_t *os; 1464185029Spjd dmu_tx_t *tx; 1465185029Spjd uint64_t curvers; 1466185029Spjd 1467185029Spjd /* 1468185029Spjd * XXX for now, require that the filesystem be unmounted. Would 1469185029Spjd * be nice to find the zfsvfs_t and just update that if 1470185029Spjd * possible. 1471185029Spjd */ 1472185029Spjd 1473185029Spjd if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 1474185029Spjd return (EINVAL); 1475185029Spjd 1476185029Spjd error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os); 1477185029Spjd if (error) 1478185029Spjd return (error); 1479185029Spjd 1480185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 1481185029Spjd 8, 1, &curvers); 1482185029Spjd if (error) 1483185029Spjd goto out; 1484185029Spjd if (newvers < curvers) { 1485185029Spjd error = EINVAL; 1486185029Spjd goto out; 1487185029Spjd } 1488185029Spjd 1489185029Spjd tx = dmu_tx_create(os); 1490185029Spjd dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 1491185029Spjd error = dmu_tx_assign(tx, TXG_WAIT); 1492185029Spjd if (error) { 1493185029Spjd dmu_tx_abort(tx); 1494185029Spjd goto out; 1495185029Spjd } 1496185029Spjd error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 1497185029Spjd &newvers, tx); 1498185029Spjd 1499185029Spjd spa_history_internal_log(LOG_DS_UPGRADE, 1500185029Spjd dmu_objset_spa(os), tx, CRED(), 1501185029Spjd "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 1502185029Spjd dmu_objset_id(os)); 1503185029Spjd dmu_tx_commit(tx); 1504185029Spjd 1505185029Spjdout: 1506185029Spjd dmu_objset_close(os); 1507185029Spjd return (error); 1508185029Spjd} 1509185029Spjd/* 1510185029Spjd * Read a property stored within the master node. 1511185029Spjd */ 1512185029Spjdint 1513185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 1514185029Spjd{ 1515185029Spjd const char *pname; 1516185029Spjd int error = ENOENT; 1517185029Spjd 1518185029Spjd /* 1519185029Spjd * Look up the file system's value for the property. For the 1520185029Spjd * version property, we look up a slightly different string. 1521185029Spjd */ 1522185029Spjd if (prop == ZFS_PROP_VERSION) 1523185029Spjd pname = ZPL_VERSION_STR; 1524185029Spjd else 1525185029Spjd pname = zfs_prop_to_name(prop); 1526185029Spjd 1527185029Spjd if (os != NULL) 1528185029Spjd error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 1529185029Spjd 1530185029Spjd if (error == ENOENT) { 1531185029Spjd /* No value set, use the default value */ 1532185029Spjd switch (prop) { 1533185029Spjd case ZFS_PROP_VERSION: 1534185029Spjd *value = ZPL_VERSION; 1535185029Spjd break; 1536185029Spjd case ZFS_PROP_NORMALIZE: 1537185029Spjd case ZFS_PROP_UTF8ONLY: 1538185029Spjd *value = 0; 1539185029Spjd break; 1540185029Spjd case ZFS_PROP_CASE: 1541185029Spjd *value = ZFS_CASE_SENSITIVE; 1542185029Spjd break; 1543185029Spjd default: 1544185029Spjd return (error); 1545185029Spjd } 1546185029Spjd error = 0; 1547185029Spjd } 1548185029Spjd return (error); 1549185029Spjd} 1550