zfs_vfsops.c revision 249643
11590Srgrimes/*
21590Srgrimes * CDDL HEADER START
31590Srgrimes *
41590Srgrimes * The contents of this file are subject to the terms of the
51590Srgrimes * Common Development and Distribution License (the "License").
61590Srgrimes * You may not use this file except in compliance with the License.
71590Srgrimes *
81590Srgrimes * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91590Srgrimes * or http://www.opensolaris.org/os/licensing.
101590Srgrimes * See the License for the specific language governing permissions
111590Srgrimes * and limitations under the License.
121590Srgrimes *
131590Srgrimes * When distributing Covered Code, include this CDDL HEADER in each
141590Srgrimes * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151590Srgrimes * If applicable, add the following below this CDDL HEADER, with the
161590Srgrimes * fields enclosed by brackets "[]" replaced with your own identifying
171590Srgrimes * information: Portions Copyright [yyyy] [name of copyright owner]
181590Srgrimes *
191590Srgrimes * CDDL HEADER END
201590Srgrimes */
211590Srgrimes/*
221590Srgrimes * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
231590Srgrimes * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
241590Srgrimes * All rights reserved.
251590Srgrimes * Copyright (c) 2013 by Delphix. All rights reserved.
261590Srgrimes */
271590Srgrimes
281590Srgrimes/* Portions Copyright 2010 Robert Milkowski */
29279219Sken
30279219Sken#include <sys/types.h>
31279219Sken#include <sys/param.h>
32279219Sken#include <sys/systm.h>
33279219Sken#include <sys/kernel.h>
34279219Sken#include <sys/sysmacros.h>
35279219Sken#include <sys/kmem.h>
36279219Sken#include <sys/acl.h>
37279219Sken#include <sys/vnode.h>
38279219Sken#include <sys/vfs.h>
39279219Sken#include <sys/mntent.h>
40279219Sken#include <sys/mount.h>
41279219Sken#include <sys/cmn_err.h>
42279219Sken#include <sys/zfs_znode.h>
43279219Sken#include <sys/zfs_dir.h>
44279219Sken#include <sys/zil.h>
45279219Sken#include <sys/fs/zfs.h>
46279219Sken#include <sys/dmu.h>
47279219Sken#include <sys/dsl_prop.h>
48279219Sken#include <sys/dsl_dataset.h>
49279219Sken#include <sys/dsl_deleg.h>
50279219Sken#include <sys/spa.h>
51279219Sken#include <sys/zap.h>
52279219Sken#include <sys/sa.h>
53279219Sken#include <sys/sa_impl.h>
54279219Sken#include <sys/varargs.h>
55279219Sken#include <sys/policy.h>
56279219Sken#include <sys/atomic.h>
57279219Sken#include <sys/zfs_ioctl.h>
58279219Sken#include <sys/zfs_ctldir.h>
59279219Sken#include <sys/zfs_fuid.h>
601590Srgrimes#include <sys/sunddi.h>
611590Srgrimes#include <sys/dnlc.h>
6227752Scharnier#include <sys/dmu_objset.h>
631590Srgrimes#include <sys/spa_boot.h>
641590Srgrimes#include <sys/jail.h>
651590Srgrimes#include "zfs_comutil.h"
661590Srgrimes
671590Srgrimesstruct mtx zfs_debug_mtx;
6827752ScharnierMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
6923693Speter
7027752ScharnierSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
711590Srgrimes
721590Srgrimesint zfs_super_owner;
7394505ScharnierSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
7494505Scharnier    "File system owner can perform privileged operation on his file systems");
7594505Scharnier
761590Srgrimesint zfs_debug_level;
771590SrgrimesTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
781590SrgrimesSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
791590Srgrimes    "Debug level");
801590Srgrimes
811590SrgrimesSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
821590Srgrimesstatic int zfs_version_acl = ZFS_ACL_VERSION;
83279219SkenSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
84279219Sken    "ZFS_ACL_VERSION");
8523693Speterstatic int zfs_version_spa = SPA_VERSION;
8623693SpeterSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
8727752Scharnier    "SPA_VERSION");
881590Srgrimesstatic int zfs_version_zpl = ZPL_VERSION;
8923693SpeterSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
901590Srgrimes    "ZPL_VERSION");
911590Srgrimes
9223693Speterstatic int zfs_mount(vfs_t *vfsp);
93279219Skenstatic int zfs_umount(vfs_t *vfsp, int fflag);
94279219Skenstatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
95279219Skenstatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
96279219Skenstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
971590Srgrimesstatic int zfs_sync(vfs_t *vfsp, int waitfor);
98279219Skenstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
99279219Sken    struct ucred **credanonp, int *numsecflavors, int **secflavors);
100279219Skenstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
101279219Skenstatic void zfs_objset_close(zfsvfs_t *zfsvfs);
102279219Skenstatic void zfs_freevfs(vfs_t *vfsp);
103279219Sken
1047913Sjoergstatic struct vfsops zfs_vfsops = {
1057913Sjoerg	.vfs_mount =		zfs_mount,
1067913Sjoerg	.vfs_unmount =		zfs_umount,
1077929Sjoerg	.vfs_root =		zfs_root,
1087929Sjoerg	.vfs_statfs =		zfs_statfs,
1099541Sjoerg	.vfs_vget =		zfs_vget,
11039260Sgibbs	.vfs_sync =		zfs_sync,
111279219Sken	.vfs_checkexp =		zfs_checkexp,
1127913Sjoerg	.vfs_fhtovp =		zfs_fhtovp,
11339260Sgibbs};
11439260Sgibbs
11539260SgibbsVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
11639260Sgibbs
11739260Sgibbs/*
11839260Sgibbs * We need to keep a count of active fs's.
119279219Sken * This is necessary to prevent our module
120279219Sken * from being unloaded after a umount -f
121279219Sken */
122280231Skenstatic uint32_t	zfs_active_fs_count = 0;
12339260Sgibbs
124279219Sken/*ARGSUSED*/
125279219Skenstatic int
126279219Skenzfs_sync(vfs_t *vfsp, int waitfor)
127279219Sken{
128279219Sken
129279219Sken	/*
130227174Sed	 * Data integrity is job one.  We don't want a compromised kernel
131152396Sdwmalone	 * writing to the storage pool, so we never sync during panic.
132228619Sdim	 */
1331590Srgrimes	if (panicstr)
1347913Sjoerg		return (0);
1351590Srgrimes
13694505Scharnier	if (vfsp != NULL) {
13794505Scharnier		/*
1389541Sjoerg		 * Sync a specific filesystem.
1399541Sjoerg		 */
1409541Sjoerg		zfsvfs_t *zfsvfs = vfsp->vfs_data;
14194505Scharnier		dsl_pool_t *dp;
14294505Scharnier		int error;
14394505Scharnier
144279219Sken		error = vfs_stdsync(vfsp, waitfor);
14594505Scharnier		if (error != 0)
14694505Scharnier			return (error);
147279219Sken
14841913Smjacob		ZFS_ENTER(zfsvfs);
149279219Sken		dp = dmu_objset_pool(zfsvfs->z_os);
15039260Sgibbs
1517913Sjoerg		/*
1527929Sjoerg		 * If the system is shutting down, then skip any
15394505Scharnier		 * filesystems which may exist on a suspended pool.
15494505Scharnier		 */
15594505Scharnier		if (sys_shutdown && spa_suspended(dp->dp_spa)) {
15694505Scharnier			ZFS_EXIT(zfsvfs);
15794505Scharnier			return (0);
15894505Scharnier		}
15939260Sgibbs
16094505Scharnier		if (zfsvfs->z_log != NULL)
16194505Scharnier			zil_commit(zfsvfs->z_log, 0);
16294505Scharnier
16341913Smjacob		ZFS_EXIT(zfsvfs);
16441913Smjacob	} else {
16594505Scharnier		/*
16646928Smjacob		 * Sync all ZFS filesystems.  This is what happens when you
16746928Smjacob		 * run sync(1M).  Unlike other filesystems, ZFS honors the
16894505Scharnier		 * request by waiting for all pools to commit all dirty data.
16994505Scharnier		 */
170279219Sken		spa_sync_allpools();
171279219Sken	}
172279219Sken
173279219Sken	return (0);
174279219Sken}
175279219Sken
17694505Scharnier#ifndef __FreeBSD__
1771590Srgrimesstatic int
1781590Srgrimeszfs_create_unique_device(dev_t *dev)
179279219Sken{
180227174Sed	major_t new_major;
181227174Sed
182227174Sed	do {
183227174Sed		ASSERT3U(zfs_minor, <=, MAXMIN32);
184279219Sken		minor_t start = zfs_minor;
185279219Sken		do {
186279219Sken			mutex_enter(&zfs_dev_mtx);
187279219Sken			if (zfs_minor >= MAXMIN32) {
188279219Sken				/*
189279219Sken				 * If we're still using the real major
190279219Sken				 * keep out of /dev/zfs and /dev/zvol minor
191279219Sken				 * number space.  If we're using a getudev()'ed
192279219Sken				 * major number, we can use all of its minors.
193279219Sken				 */
194279219Sken				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
195279219Sken					zfs_minor = ZFS_MIN_MINOR;
196279219Sken				else
197279219Sken					zfs_minor = 0;
198279219Sken			} else {
199279219Sken				zfs_minor++;
200279219Sken			}
201279219Sken			*dev = makedevice(zfs_major, zfs_minor);
202279219Sken			mutex_exit(&zfs_dev_mtx);
203227174Sed		} while (vfs_devismounted(*dev) && zfs_minor != start);
204227174Sed		if (zfs_minor == start) {
205227174Sed			/*
2061590Srgrimes			 * We are using all ~262,000 minor numbers for the
2071590Srgrimes			 * current major number.  Create a new major number.
208152396Sdwmalone			 */
2091590Srgrimes			if ((new_major = getudev()) == (major_t)-1) {
210227174Sed				cmn_err(CE_WARN,
2111590Srgrimes				    "zfs_mount: Can't get unique major "
2121590Srgrimes				    "device number.");
2131590Srgrimes				return (-1);
214152396Sdwmalone			}
2151590Srgrimes			mutex_enter(&zfs_dev_mtx);
216279261Sken			zfs_major = new_major;
217279261Sken			zfs_minor = 0;
2181590Srgrimes
2191590Srgrimes			mutex_exit(&zfs_dev_mtx);
2201590Srgrimes		} else {
22124360Simp			break;
2221590Srgrimes		}
2231590Srgrimes		/* CONSTANTCONDITION */
2241590Srgrimes	} while (1);
2251590Srgrimes
2261590Srgrimes	return (0);
2271590Srgrimes}
228279219Sken#endif	/* !__FreeBSD__ */
229279219Sken
2301590Srgrimesstatic void
231279219Skenatime_changed_cb(void *arg, uint64_t newval)
2321590Srgrimes{
2331590Srgrimes	zfsvfs_t *zfsvfs = arg;
2341590Srgrimes
2351590Srgrimes	if (newval == TRUE) {
236279219Sken		zfsvfs->z_atime = TRUE;
2371590Srgrimes		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
2381590Srgrimes		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
2391590Srgrimes		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
2401590Srgrimes	} else {
2411590Srgrimes		zfsvfs->z_atime = FALSE;
24227752Scharnier		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
2431590Srgrimes		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
2441590Srgrimes		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
2451590Srgrimes	}
2467913Sjoerg}
2477913Sjoerg
2489541Sjoergstatic void
2499541Sjoergxattr_changed_cb(void *arg, uint64_t newval)
2509541Sjoerg{
251279219Sken	zfsvfs_t *zfsvfs = arg;
252279219Sken
253279219Sken	if (newval == TRUE) {
254279219Sken		/* XXX locking on vfs_flag? */
255279219Sken#ifdef TODO
2561590Srgrimes		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
25727752Scharnier#endif
2581590Srgrimes		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
2591590Srgrimes		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
2601590Srgrimes	} else {
2617929Sjoerg		/* XXX locking on vfs_flag? */
26241925Smjacob#ifdef TODO
2637929Sjoerg		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
264279219Sken#endif
2657929Sjoerg		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
26627752Scharnier		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
2677929Sjoerg	}
2687929Sjoerg}
2697929Sjoerg
2707929Sjoergstatic void
2717929Sjoergblksz_changed_cb(void *arg, uint64_t newval)
2727929Sjoerg{
27339260Sgibbs	zfsvfs_t *zfsvfs = arg;
27441925Smjacob
27539260Sgibbs	if (newval < SPA_MINBLOCKSIZE ||
27639260Sgibbs	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
27739260Sgibbs		newval = SPA_MAXBLOCKSIZE;
27839260Sgibbs
27939260Sgibbs	zfsvfs->z_max_blksz = newval;
28039260Sgibbs	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
281279219Sken}
282152396Sdwmalone
2837929Sjoergstatic void
284152396Sdwmalonereadonly_changed_cb(void *arg, uint64_t newval)
285152396Sdwmalone{
286152396Sdwmalone	zfsvfs_t *zfsvfs = arg;
287279219Sken
288279219Sken	if (newval) {
289279219Sken		/* XXX locking on vfs_flag? */
290279219Sken		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
291279219Sken		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
29227752Scharnier		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
2931590Srgrimes	} else {
2941590Srgrimes		/* XXX locking on vfs_flag? */
2951590Srgrimes		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
29641913Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
29741945Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
29841945Smjacob	}
29994505Scharnier}
30041945Smjacob
30141945Smjacobstatic void
30241945Smjacobsetuid_changed_cb(void *arg, uint64_t newval)
30341945Smjacob{
30441945Smjacob	zfsvfs_t *zfsvfs = arg;
30541945Smjacob
30642010Smjacob	if (newval == FALSE) {
30742010Smjacob		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
30842010Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
30942010Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
31041945Smjacob	} else {
31141945Smjacob		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
31241945Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
31341945Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
31441945Smjacob	}
31541945Smjacob}
31641945Smjacob
31741945Smjacobstatic void
31841945Smjacobexec_changed_cb(void *arg, uint64_t newval)
31941945Smjacob{
32042010Smjacob	zfsvfs_t *zfsvfs = arg;
32142010Smjacob
32242010Smjacob	if (newval == FALSE) {
32342010Smjacob		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
32441945Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
32541945Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
32641945Smjacob	} else {
32741945Smjacob		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
32841945Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
32941945Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
33041945Smjacob	}
33141945Smjacob}
33241945Smjacob
33341945Smjacob/*
33441945Smjacob * The nbmand mount option can be changed at mount time.
33541913Smjacob * We can't allow it to be toggled on live file systems or incorrect
33641913Smjacob * behavior may be seen from cifs clients
33741925Smjacob *
33841925Smjacob * This property isn't registered via dsl_prop_register(), but this callback
33941925Smjacob * will be called when a file system is first mounted
34041913Smjacob */
34141945Smjacobstatic void
34241913Smjacobnbmand_changed_cb(void *arg, uint64_t newval)
34341925Smjacob{
34441925Smjacob	zfsvfs_t *zfsvfs = arg;
34541913Smjacob	if (newval == FALSE) {
34641925Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
34741913Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
34841913Smjacob	} else {
34941925Smjacob		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
35041925Smjacob		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
35141925Smjacob	}
35241913Smjacob}
35341925Smjacob
35441913Smjacobstatic void
35541925Smjacobsnapdir_changed_cb(void *arg, uint64_t newval)
35646928Smjacob{
35746928Smjacob	zfsvfs_t *zfsvfs = arg;
35846928Smjacob
35946928Smjacob	zfsvfs->z_show_ctldir = newval;
36046928Smjacob}
36146928Smjacob
36246928Smjacobstatic void
36346928Smjacobvscan_changed_cb(void *arg, uint64_t newval)
36446928Smjacob{
36546928Smjacob	zfsvfs_t *zfsvfs = arg;
36646928Smjacob
36746928Smjacob	zfsvfs->z_vscan = newval;
36846928Smjacob}
36946928Smjacob
37046928Smjacobstatic void
37146928Smjacobacl_mode_changed_cb(void *arg, uint64_t newval)
37246928Smjacob{
37346928Smjacob	zfsvfs_t *zfsvfs = arg;
37446928Smjacob
37546928Smjacob	zfsvfs->z_acl_mode = newval;
37646928Smjacob}
37746928Smjacob
37846928Smjacobstatic void
37946928Smjacobacl_inherit_changed_cb(void *arg, uint64_t newval)
380279219Sken{
381279219Sken	zfsvfs_t *zfsvfs = arg;
382279219Sken
383279219Sken	zfsvfs->z_acl_inherit = newval;
384279219Sken}
385279219Sken
386279219Skenstatic int
387279219Skenzfs_register_callbacks(vfs_t *vfsp)
388280231Sken{
389280231Sken	struct dsl_dataset *ds = NULL;
390280231Sken	objset_t *os = NULL;
391280231Sken	zfsvfs_t *zfsvfs = NULL;
392279219Sken	uint64_t nbmand;
393280231Sken	boolean_t readonly = B_FALSE;
394280231Sken	boolean_t do_readonly = B_FALSE;
395280231Sken	boolean_t setuid = B_FALSE;
396280231Sken	boolean_t do_setuid = B_FALSE;
397280231Sken	boolean_t exec = B_FALSE;
398279219Sken	boolean_t do_exec = B_FALSE;
399279219Sken#ifdef illumos
400279219Sken	boolean_t devices = B_FALSE;
401279219Sken	boolean_t do_devices = B_FALSE;
402279219Sken#endif
403279219Sken	boolean_t xattr = B_FALSE;
404279219Sken	boolean_t do_xattr = B_FALSE;
405279219Sken	boolean_t atime = B_FALSE;
406279219Sken	boolean_t do_atime = B_FALSE;
407279219Sken	int error = 0;
408279219Sken
409279219Sken	ASSERT(vfsp);
410279219Sken	zfsvfs = vfsp->vfs_data;
411279219Sken	ASSERT(zfsvfs);
412279219Sken	os = zfsvfs->z_os;
413279219Sken
414279219Sken	/*
415279219Sken	 * This function can be called for a snapshot when we update snapshot's
416279219Sken	 * mount point, which isn't really supported.
417279219Sken	 */
418279219Sken	if (dmu_objset_is_snapshot(os))
419279219Sken		return (EOPNOTSUPP);
420279219Sken
42141913Smjacob	/*
42241913Smjacob	 * The act of registering our callbacks will destroy any mount
42341913Smjacob	 * options we may have.  In order to enable temporary overrides
4241590Srgrimes	 * of mount options, we stash away the current values and
42527752Scharnier	 * restore them after we register the callbacks.
4261590Srgrimes	 */
4271590Srgrimes	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
42827752Scharnier	    !spa_writeable(dmu_objset_spa(os))) {
4291590Srgrimes		readonly = B_TRUE;
4301590Srgrimes		do_readonly = B_TRUE;
43141925Smjacob	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
4321590Srgrimes		readonly = B_FALSE;
4331590Srgrimes		do_readonly = B_TRUE;
4341590Srgrimes	}
435227174Sed	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
4361590Srgrimes		setuid = B_FALSE;
437152396Sdwmalone		do_setuid = B_TRUE;
438152396Sdwmalone	} else {
439152396Sdwmalone		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
4401590Srgrimes			setuid = B_FALSE;
4417913Sjoerg			do_setuid = B_TRUE;
44294505Scharnier		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
4431590Srgrimes			setuid = B_TRUE;
4441590Srgrimes			do_setuid = B_TRUE;
4451590Srgrimes		}
4461590Srgrimes	}
4471590Srgrimes	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
448227174Sed		exec = B_FALSE;
449152396Sdwmalone		do_exec = B_TRUE;
4501590Srgrimes	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
451227174Sed		exec = B_TRUE;
4521590Srgrimes		do_exec = B_TRUE;
4531590Srgrimes	}
4541590Srgrimes	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
4551590Srgrimes		xattr = B_FALSE;
4561590Srgrimes		do_xattr = B_TRUE;
4571590Srgrimes	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
4581590Srgrimes		xattr = B_TRUE;
4591590Srgrimes		do_xattr = B_TRUE;
4601590Srgrimes	}
4611590Srgrimes	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
4627913Sjoerg		atime = B_FALSE;
4637913Sjoerg		do_atime = B_TRUE;
4647913Sjoerg	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
46592764Sphk		atime = B_TRUE;
46692764Sphk		do_atime = B_TRUE;
46792764Sphk	}
46892764Sphk
46992764Sphk	/*
4707913Sjoerg	 * nbmand is a special property.  It can only be changed at
4711590Srgrimes	 * mount time.
4721590Srgrimes	 *
4731590Srgrimes	 * This is weird, but it is documented to only be changeable
4741590Srgrimes	 * at mount time.
4751590Srgrimes	 */
476227174Sed	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
477152396Sdwmalone		nbmand = B_FALSE;
4781590Srgrimes	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
479152396Sdwmalone		nbmand = B_TRUE;
480152396Sdwmalone	} else {
4811590Srgrimes		char osname[MAXNAMELEN];
4821590Srgrimes
4831590Srgrimes		dmu_objset_name(os, osname);
4841590Srgrimes		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
4851590Srgrimes		    NULL)) {
48611608Sbde			return (error);
48711608Sbde		}
4881590Srgrimes	}
4891590Srgrimes
4901590Srgrimes	/*
49127752Scharnier	 * Register property callbacks.
4921590Srgrimes	 *
4931590Srgrimes	 * It would probably be fine to just check for i/o error from
4941590Srgrimes	 * the first prop_register(), but I guess I like to go
4951590Srgrimes	 * overboard...
4961590Srgrimes	 */
4971590Srgrimes	ds = dmu_objset_ds(os);
4981590Srgrimes	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
4991590Srgrimes	error = dsl_prop_register(ds,
5001590Srgrimes	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
5011590Srgrimes	error = error ? error : dsl_prop_register(ds,
5021590Srgrimes	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
5031590Srgrimes	error = error ? error : dsl_prop_register(ds,
5041590Srgrimes	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
5051590Srgrimes	error = error ? error : dsl_prop_register(ds,
506227174Sed	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
507152396Sdwmalone#ifdef illumos
5081590Srgrimes	error = error ? error : dsl_prop_register(ds,
50994505Scharnier	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
5101590Srgrimes#endif
5111590Srgrimes	error = error ? error : dsl_prop_register(ds,
5121590Srgrimes	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
513227174Sed	error = error ? error : dsl_prop_register(ds,
51439260Sgibbs	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
51539260Sgibbs	error = error ? error : dsl_prop_register(ds,
51639260Sgibbs	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
51739260Sgibbs	error = error ? error : dsl_prop_register(ds,
51839260Sgibbs	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
51939260Sgibbs	error = error ? error : dsl_prop_register(ds,
52039260Sgibbs	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
52139260Sgibbs	    zfsvfs);
52239260Sgibbs	error = error ? error : dsl_prop_register(ds,
52339260Sgibbs	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
52439260Sgibbs	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
52539260Sgibbs	if (error)
526227174Sed		goto unregister;
5277929Sjoerg
5287913Sjoerg	/*
5297913Sjoerg	 * Invoke our callbacks to restore temporary mount options.
530279219Sken	 */
5317913Sjoerg	if (do_readonly)
532279219Sken		readonly_changed_cb(zfsvfs, readonly);
5337929Sjoerg	if (do_setuid)
53444397Smjacob		setuid_changed_cb(zfsvfs, setuid);
535279219Sken	if (do_exec)
53644397Smjacob		exec_changed_cb(zfsvfs, exec);
5377913Sjoerg	if (do_xattr)
5387913Sjoerg		xattr_changed_cb(zfsvfs, xattr);
539227174Sed	if (do_atime)
5407913Sjoerg		atime_changed_cb(zfsvfs, atime);
5417913Sjoerg
5427913Sjoerg	nbmand_changed_cb(zfsvfs, nbmand);
5437913Sjoerg
5447913Sjoerg	return (0);
5457913Sjoerg
54639260Sgibbsunregister:
5477913Sjoerg	/*
5487913Sjoerg	 * We may attempt to unregister some callbacks that are not
5497913Sjoerg	 * registered, but this is OK; it will simply return ENOMSG,
5507913Sjoerg	 * which we will ignore.
551227174Sed	 */
55239260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
55339260Sgibbs	    atime_changed_cb, zfsvfs);
55439260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
555227174Sed	    xattr_changed_cb, zfsvfs);
5567913Sjoerg	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
55739260Sgibbs	    blksz_changed_cb, zfsvfs);
55839260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
55939260Sgibbs	    readonly_changed_cb, zfsvfs);
56039260Sgibbs#ifdef illumos
56139260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
56239260Sgibbs	    devices_changed_cb, zfsvfs);
56339260Sgibbs#endif
56439260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
56539260Sgibbs	    setuid_changed_cb, zfsvfs);
56639260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
56744618Smjacob	    exec_changed_cb, zfsvfs);
56839260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
56939260Sgibbs	    snapdir_changed_cb, zfsvfs);
57039260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE),
57139260Sgibbs	    acl_mode_changed_cb, zfsvfs);
57239260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
573227174Sed	    acl_inherit_changed_cb, zfsvfs);
57439260Sgibbs	(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
57539260Sgibbs	    vscan_changed_cb, zfsvfs);
576227174Sed	return (error);
57739260Sgibbs}
57839260Sgibbs
57939260Sgibbsstatic int
58039260Sgibbszfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
58139260Sgibbs    uint64_t *userp, uint64_t *groupp)
58239260Sgibbs{
58339260Sgibbs	/*
58439260Sgibbs	 * Is it a valid type of object to track?
58539260Sgibbs	 */
586279219Sken	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
587279219Sken		return (SET_ERROR(ENOENT));
588279219Sken
589279219Sken	/*
590279219Sken	 * If we have a NULL data pointer
591279219Sken	 * then assume the id's aren't changing and
592279219Sken	 * return EEXIST to the dmu to let it know to
593279219Sken	 * use the same ids
594279219Sken	 */
595279219Sken	if (data == NULL)
596279219Sken		return (SET_ERROR(EEXIST));
597279219Sken
598279219Sken	if (bonustype == DMU_OT_ZNODE) {
599279219Sken		znode_phys_t *znp = data;
600279219Sken		*userp = znp->zp_uid;
601279219Sken		*groupp = znp->zp_gid;
602279219Sken	} else {
603279219Sken		int hdrsize;
604279219Sken		sa_hdr_phys_t *sap = data;
605279219Sken		sa_hdr_phys_t sa = *sap;
606279219Sken		boolean_t swap = B_FALSE;
607279219Sken
608279219Sken		ASSERT(bonustype == DMU_OT_SA);
609279219Sken
610279219Sken		if (sa.sa_magic == 0) {
611279219Sken			/*
612279219Sken			 * This should only happen for newly created
613279219Sken			 * files that haven't had the znode data filled
614279219Sken			 * in yet.
615279219Sken			 */
616279219Sken			*userp = 0;
617279219Sken			*groupp = 0;
618227174Sed			return (0);
6197913Sjoerg		}
6207913Sjoerg		if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
62144644Smjacob			sa.sa_magic = SA_MAGIC;
62239260Sgibbs			sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
62344644Smjacob			swap = B_TRUE;
62439260Sgibbs		} else {
62544644Smjacob			VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
62644644Smjacob		}
62744644Smjacob
62844644Smjacob		hdrsize = sa_hdrsize(&sa);
62939260Sgibbs		VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
630279219Sken		*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
63139260Sgibbs		    SA_UID_OFFSET));
632279219Sken		*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
63339260Sgibbs		    SA_GID_OFFSET));
634279219Sken		if (swap) {
63539260Sgibbs			*userp = BSWAP_64(*userp);
636279219Sken			*groupp = BSWAP_64(*groupp);
63739260Sgibbs		}
638279219Sken	}
63943629Smjacob	return (0);
64043629Smjacob}
64169248Skris
64243629Smjacobstatic void
643279219Skenfuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
644279219Sken    char *domainbuf, int buflen, uid_t *ridp)
645279219Sken{
646279219Sken	uint64_t fuid;
647279219Sken	const char *domain;
648279219Sken
649279219Sken	fuid = strtonum(fuidstr, NULL);
650279219Sken
651279219Sken	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
652279219Sken	if (domain)
653279219Sken		(void) strlcpy(domainbuf, domain, buflen);
654279219Sken	else
655279219Sken		domainbuf[0] = '\0';
656279219Sken	*ridp = FUID_RID(fuid);
657279219Sken}
658279219Sken
659279219Skenstatic uint64_t
660279219Skenzfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
661279219Sken{
662279219Sken	switch (type) {
663279219Sken	case ZFS_PROP_USERUSED:
664279219Sken		return (DMU_USERUSED_OBJECT);
665279219Sken	case ZFS_PROP_GROUPUSED:
666279219Sken		return (DMU_GROUPUSED_OBJECT);
667279219Sken	case ZFS_PROP_USERQUOTA:
668279219Sken		return (zfsvfs->z_userquota_obj);
669279219Sken	case ZFS_PROP_GROUPQUOTA:
670279219Sken		return (zfsvfs->z_groupquota_obj);
671279219Sken	}
672279219Sken	return (0);
673279219Sken}
674279219Sken
675279219Skenint
676279219Skenzfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
677279219Sken    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
678279219Sken{
679279219Sken	int error;
680279219Sken	zap_cursor_t zc;
681279219Sken	zap_attribute_t za;
682279219Sken	zfs_useracct_t *buf = vbuf;
68343629Smjacob	uint64_t obj;
684279219Sken
685279219Sken	if (!dmu_objset_userspace_present(zfsvfs->z_os))
686279219Sken		return (SET_ERROR(ENOTSUP));
687279219Sken
68843629Smjacob	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
689279219Sken	if (obj == 0) {
690279219Sken		*bufsizep = 0;
691279219Sken		return (0);
692279219Sken	}
693279219Sken
694279219Sken	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
695279219Sken	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
69643629Smjacob	    zap_cursor_advance(&zc)) {
697279219Sken		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
698279219Sken		    *bufsizep)
699279219Sken			break;
700279219Sken
701279219Sken		fuidstr_to_sid(zfsvfs, za.za_name,
70243629Smjacob		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
703279219Sken
704279219Sken		buf->zu_space = za.za_first_integer;
705279219Sken		buf++;
706279219Sken	}
707279219Sken	if (error == ENOENT)
708279219Sken		error = 0;
709279219Sken
71043629Smjacob	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
711279219Sken	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
712279219Sken	*cookiep = zap_cursor_serialize(&zc);
713279219Sken	zap_cursor_fini(&zc);
714279219Sken	return (error);
715279219Sken}
716279219Sken
71743629Smjacob/*
718279219Sken * buf must be big enough (eg, 32 bytes)
719279219Sken */
720279219Skenstatic int
721279219Skenid_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
722279219Sken    char *buf, boolean_t addok)
72343629Smjacob{
724279219Sken	uint64_t fuid;
72543629Smjacob	int domainid = 0;
726279219Sken
727279219Sken	if (domain && domain[0]) {
728279219Sken		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
729279219Sken		if (domainid == -1)
730279219Sken			return (SET_ERROR(ENOENT));
731279219Sken	}
732279219Sken	fuid = FUID_ENCODE(domainid, rid);
733279219Sken	(void) sprintf(buf, "%llx", (longlong_t)fuid);
734279219Sken	return (0);
735279219Sken}
736279219Sken
737279219Skenint
738279219Skenzfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
739279219Sken    const char *domain, uint64_t rid, uint64_t *valp)
740279219Sken{
741279219Sken	char buf[32];
742279219Sken	int err;
743279219Sken	uint64_t obj;
744279219Sken
745279219Sken	*valp = 0;
746279219Sken
747279219Sken	if (!dmu_objset_userspace_present(zfsvfs->z_os))
748279219Sken		return (SET_ERROR(ENOTSUP));
749279219Sken
750279219Sken	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
751279219Sken	if (obj == 0)
752279219Sken		return (0);
753279219Sken
754279219Sken	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
755279219Sken	if (err)
756279219Sken		return (err);
757279219Sken
758279219Sken	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
759279219Sken	if (err == ENOENT)
760279219Sken		err = 0;
761279219Sken	return (err);
762279219Sken}
763279219Sken
764279219Skenint
765279219Skenzfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
766279219Sken    const char *domain, uint64_t rid, uint64_t quota)
767279219Sken{
768279219Sken	char buf[32];
769279219Sken	int err;
770279219Sken	dmu_tx_t *tx;
771279219Sken	uint64_t *objp;
772279219Sken	boolean_t fuid_dirtied;
773279219Sken
774279219Sken	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
775279219Sken		return (SET_ERROR(EINVAL));
776279219Sken
777279219Sken	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
778279219Sken		return (SET_ERROR(ENOTSUP));
779279219Sken
780279219Sken	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
781279219Sken	    &zfsvfs->z_groupquota_obj;
782279219Sken
783279219Sken	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
784279219Sken	if (err)
785279219Sken		return (err);
786279219Sken	fuid_dirtied = zfsvfs->z_fuid_dirty;
787279219Sken
788279219Sken	tx = dmu_tx_create(zfsvfs->z_os);
789279219Sken	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
790279219Sken	if (*objp == 0) {
791279219Sken		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
792279219Sken		    zfs_userquota_prop_prefixes[type]);
793279219Sken	}
794279219Sken	if (fuid_dirtied)
795279219Sken		zfs_fuid_txhold(zfsvfs, tx);
796279219Sken	err = dmu_tx_assign(tx, TXG_WAIT);
797279219Sken	if (err) {
798279219Sken		dmu_tx_abort(tx);
799279219Sken		return (err);
800279219Sken	}
801279219Sken
802279219Sken	mutex_enter(&zfsvfs->z_lock);
803279219Sken	if (*objp == 0) {
804279219Sken		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
805279219Sken		    DMU_OT_NONE, 0, tx);
806279219Sken		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
807279219Sken		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
808279219Sken	}
809279219Sken	mutex_exit(&zfsvfs->z_lock);
810279219Sken
811279219Sken	if (quota == 0) {
812279219Sken		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
813279219Sken		if (err == ENOENT)
814279219Sken			err = 0;
815279219Sken	} else {
816279219Sken		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
817279219Sken	}
818279219Sken	ASSERT(err == 0);
819279219Sken	if (fuid_dirtied)
820279219Sken		zfs_fuid_sync(zfsvfs, tx);
821279219Sken	dmu_tx_commit(tx);
822279219Sken	return (err);
823279219Sken}
824279219Sken
825279219Skenboolean_t
826279219Skenzfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
827279219Sken{
828279219Sken	char buf[32];
829279219Sken	uint64_t used, quota, usedobj, quotaobj;
830279219Sken	int err;
831279219Sken
832279219Sken	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
833279219Sken	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
834279219Sken
835279219Sken	if (quotaobj == 0 || zfsvfs->z_replay)
836279219Sken		return (B_FALSE);
837279219Sken
838279219Sken	(void) sprintf(buf, "%llx", (longlong_t)fuid);
839279219Sken	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
840279219Sken	if (err != 0)
841279219Sken		return (B_FALSE);
842279219Sken
843279219Sken	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
844279219Sken	if (err != 0)
845279219Sken		return (B_FALSE);
846279219Sken	return (used >= quota);
847279219Sken}
848279219Sken
849279219Skenboolean_t
850279219Skenzfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
85143629Smjacob{
852279219Sken	uint64_t fuid;
853279219Sken	uint64_t quotaobj;
85443629Smjacob
855279219Sken	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
85643629Smjacob
857279219Sken	fuid = isgroup ? zp->z_gid : zp->z_uid;
858279219Sken
859279219Sken	if (quotaobj == 0 || zfsvfs->z_replay)
860279219Sken		return (B_FALSE);
861279219Sken
862279219Sken	return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
863279219Sken}
864279219Sken
865279219Skenint
866279219Skenzfsvfs_create(const char *osname, zfsvfs_t **zfvp)
867279219Sken{
868279219Sken	objset_t *os;
869279219Sken	zfsvfs_t *zfsvfs;
870279219Sken	uint64_t zval;
871279219Sken	int i, error;
872279219Sken	uint64_t sa_obj;
873279219Sken
874279219Sken	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
875279219Sken
876279219Sken	/*
877279219Sken	 * We claim to always be readonly so we can open snapshots;
878279219Sken	 * other ZPL code will prevent us from writing to snapshots.
879279219Sken	 */
880279219Sken	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
881279219Sken	if (error) {
882279219Sken		kmem_free(zfsvfs, sizeof (zfsvfs_t));
883279219Sken		return (error);
884279219Sken	}
885279219Sken
886279219Sken	/*
887279219Sken	 * Initialize the zfs-specific filesystem structure.
888279219Sken	 * Should probably make this a kmem cache, shuffle fields,
889279219Sken	 * and just bzero up to z_hold_mtx[].
890279219Sken	 */
891279219Sken	zfsvfs->z_vfs = NULL;
892279219Sken	zfsvfs->z_parent = zfsvfs;
893279219Sken	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
894279219Sken	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
895279219Sken	zfsvfs->z_os = os;
896279219Sken
897279219Sken	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
898279219Sken	if (error) {
899279219Sken		goto out;
900279219Sken	} else if (zfsvfs->z_version >
901279219Sken	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
902279219Sken		(void) printf("Can't mount a version %lld file system "
903279219Sken		    "on a version %lld pool\n. Pool must be upgraded to mount "
904279219Sken		    "this file system.", (u_longlong_t)zfsvfs->z_version,
905279219Sken		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
906279219Sken		error = SET_ERROR(ENOTSUP);
907279219Sken		goto out;
908279219Sken	}
909279219Sken	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
910279219Sken		goto out;
911279219Sken	zfsvfs->z_norm = (int)zval;
912279219Sken
913279219Sken	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
914279219Sken		goto out;
915279219Sken	zfsvfs->z_utf8 = (zval != 0);
916279219Sken
917279219Sken	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
918279219Sken		goto out;
919279219Sken	zfsvfs->z_case = (uint_t)zval;
920279219Sken
921279219Sken	/*
922279219Sken	 * Fold case on file systems that are always or sometimes case
923279219Sken	 * insensitive.
924279219Sken	 */
925279219Sken	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
926279219Sken	    zfsvfs->z_case == ZFS_CASE_MIXED)
927279219Sken		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
928279219Sken
929279219Sken	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
930279219Sken	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
931279219Sken
932279219Sken	if (zfsvfs->z_use_sa) {
933279219Sken		/* should either have both of these objects or none */
934279219Sken		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
935279219Sken		    &sa_obj);
936279219Sken		if (error)
937279219Sken			return (error);
938279219Sken	} else {
939279219Sken		/*
940279219Sken		 * Pre SA versions file systems should never touch
941279219Sken		 * either the attribute registration or layout objects.
942279219Sken		 */
943279219Sken		sa_obj = 0;
944279219Sken	}
945279219Sken
946279219Sken	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
947279219Sken	    &zfsvfs->z_attr_table);
948279219Sken	if (error)
949279219Sken		goto out;
950279219Sken
951279219Sken	if (zfsvfs->z_version >= ZPL_VERSION_SA)
952279219Sken		sa_register_update_callback(os, zfs_sa_upgrade);
953279219Sken
954279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
955279219Sken	    &zfsvfs->z_root);
956279219Sken	if (error)
957279219Sken		goto out;
958279219Sken	ASSERT(zfsvfs->z_root != 0);
959279219Sken
960279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
961279219Sken	    &zfsvfs->z_unlinkedobj);
962279219Sken	if (error)
963279219Sken		goto out;
964279219Sken
965279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ,
966279219Sken	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
967279219Sken	    8, 1, &zfsvfs->z_userquota_obj);
968279219Sken	if (error && error != ENOENT)
969279219Sken		goto out;
970279219Sken
971279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ,
972279219Sken	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
973279219Sken	    8, 1, &zfsvfs->z_groupquota_obj);
974279219Sken	if (error && error != ENOENT)
975279219Sken		goto out;
976279219Sken
977279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
978279219Sken	    &zfsvfs->z_fuid_obj);
979279219Sken	if (error && error != ENOENT)
980279219Sken		goto out;
981279219Sken
982279219Sken	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
983279219Sken	    &zfsvfs->z_shares_dir);
984279219Sken	if (error && error != ENOENT)
985279219Sken		goto out;
986279219Sken
987279219Sken	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
988279219Sken	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
989279219Sken	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
990279219Sken	    offsetof(znode_t, z_link_node));
991279219Sken	rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
992279219Sken	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
993279219Sken	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
994279219Sken	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
995279219Sken		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
996279219Sken
997279219Sken	*zfvp = zfsvfs;
998279219Sken	return (0);
999279219Sken
1000279219Skenout:
1001279219Sken	dmu_objset_disown(os, zfsvfs);
1002279219Sken	*zfvp = NULL;
1003279219Sken	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1004279219Sken	return (error);
1005279219Sken}
1006279219Sken
1007279219Skenstatic int
1008279219Skenzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1009279219Sken{
1010279219Sken	int error;
1011279219Sken
1012279219Sken	error = zfs_register_callbacks(zfsvfs->z_vfs);
1013279219Sken	if (error)
1014279219Sken		return (error);
1015279219Sken
1016279219Sken	/*
1017279219Sken	 * Set the objset user_ptr to track its zfsvfs.
1018279219Sken	 */
1019279219Sken	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1020279219Sken	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1021279219Sken	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1022279219Sken
1023279219Sken	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1024279219Sken
1025279219Sken	/*
1026279219Sken	 * If we are not mounting (ie: online recv), then we don't
1027279219Sken	 * have to worry about replaying the log as we blocked all
1028279219Sken	 * operations out since we closed the ZIL.
1029279219Sken	 */
1030279219Sken	if (mounting) {
1031279219Sken		boolean_t readonly;
1032279219Sken
1033279219Sken		/*
1034279219Sken		 * During replay we remove the read only flag to
1035279219Sken		 * allow replays to succeed.
1036279219Sken		 */
1037279219Sken		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1038279219Sken		if (readonly != 0)
1039279219Sken			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1040279219Sken		else
1041279219Sken			zfs_unlinked_drain(zfsvfs);
1042279219Sken
1043279219Sken		/*
1044279219Sken		 * Parse and replay the intent log.
1045279219Sken		 *
1046279219Sken		 * Because of ziltest, this must be done after
1047279219Sken		 * zfs_unlinked_drain().  (Further note: ziltest
1048279219Sken		 * doesn't use readonly mounts, where
1049279219Sken		 * zfs_unlinked_drain() isn't called.)  This is because
1050279219Sken		 * ziltest causes spa_sync() to think it's committed,
1051279219Sken		 * but actually it is not, so the intent log contains
1052279219Sken		 * many txg's worth of changes.
1053279219Sken		 *
1054279219Sken		 * In particular, if object N is in the unlinked set in
1055279219Sken		 * the last txg to actually sync, then it could be
1056279219Sken		 * actually freed in a later txg and then reallocated
1057279219Sken		 * in a yet later txg.  This would write a "create
1058279219Sken		 * object N" record to the intent log.  Normally, this
1059279219Sken		 * would be fine because the spa_sync() would have
1060279219Sken		 * written out the fact that object N is free, before
1061279219Sken		 * we could write the "create object N" intent log
1062279219Sken		 * record.
1063279219Sken		 *
1064279219Sken		 * But when we are in ziltest mode, we advance the "open
1065279219Sken		 * txg" without actually spa_sync()-ing the changes to
1066279219Sken		 * disk.  So we would see that object N is still
1067279219Sken		 * allocated and in the unlinked set, and there is an
1068279219Sken		 * intent log record saying to allocate it.
1069279219Sken		 */
1070279219Sken		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1071279219Sken			if (zil_replay_disable) {
1072279219Sken				zil_destroy(zfsvfs->z_log, B_FALSE);
1073279219Sken			} else {
1074279219Sken				zfsvfs->z_replay = B_TRUE;
1075279219Sken				zil_replay(zfsvfs->z_os, zfsvfs,
1076279219Sken				    zfs_replay_vector);
1077279219Sken				zfsvfs->z_replay = B_FALSE;
1078279219Sken			}
1079279219Sken		}
1080279219Sken		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
1081279219Sken	}
1082279219Sken
1083279219Sken	return (0);
1084279219Sken}
1085279219Sken
1086279219Skenextern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1087279219Sken
1088279219Skenvoid
1089279219Skenzfsvfs_free(zfsvfs_t *zfsvfs)
1090279219Sken{
1091279219Sken	int i;
1092279219Sken
1093279219Sken	/*
1094279219Sken	 * This is a barrier to prevent the filesystem from going away in
1095279219Sken	 * zfs_znode_move() until we can safely ensure that the filesystem is
1096279219Sken	 * not unmounted. We consider the filesystem valid before the barrier
1097279219Sken	 * and invalid after the barrier.
1098279219Sken	 */
1099279219Sken	rw_enter(&zfsvfs_lock, RW_READER);
1100279219Sken	rw_exit(&zfsvfs_lock);
1101279219Sken
1102279219Sken	zfs_fuid_destroy(zfsvfs);
1103279219Sken
1104279219Sken	mutex_destroy(&zfsvfs->z_znodes_lock);
1105279219Sken	mutex_destroy(&zfsvfs->z_lock);
1106279219Sken	list_destroy(&zfsvfs->z_all_znodes);
1107279219Sken	rrw_destroy(&zfsvfs->z_teardown_lock);
1108279219Sken	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1109279219Sken	rw_destroy(&zfsvfs->z_fuid_lock);
1110279219Sken	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1111279219Sken		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1112279219Sken	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1113279219Sken}
1114279219Sken
1115279219Skenstatic void
1116279219Skenzfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1117279219Sken{
1118279219Sken	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1119279219Sken	if (zfsvfs->z_vfs) {
1120279219Sken		if (zfsvfs->z_use_fuids) {
1121279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1122279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1123279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1124279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1125279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1126279219Sken			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1127279219Sken		} else {
1128279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1129279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1130279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1131279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1132279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1133279219Sken			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1134279219Sken		}
1135279219Sken	}
1136279219Sken	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1137279219Sken}
1138279219Sken
1139279219Skenstatic int
1140279219Skenzfs_domount(vfs_t *vfsp, char *osname)
1141279219Sken{
1142279219Sken	uint64_t recordsize, fsid_guid;
1143279219Sken	int error = 0;
1144279219Sken	zfsvfs_t *zfsvfs;
1145279219Sken	vnode_t *vp;
1146279219Sken
1147279219Sken	ASSERT(vfsp);
1148279219Sken	ASSERT(osname);
1149279219Sken
1150279219Sken	error = zfsvfs_create(osname, &zfsvfs);
1151279219Sken	if (error)
1152279219Sken		return (error);
1153279219Sken	zfsvfs->z_vfs = vfsp;
1154279219Sken
1155279219Sken#ifdef illumos
1156279219Sken	/* Initialize the generic filesystem structure. */
1157279219Sken	vfsp->vfs_bcount = 0;
1158279219Sken	vfsp->vfs_data = NULL;
1159279219Sken
1160279219Sken	if (zfs_create_unique_device(&mount_dev) == -1) {
1161279219Sken		error = SET_ERROR(ENODEV);
1162279219Sken		goto out;
1163279219Sken	}
1164279219Sken	ASSERT(vfs_devismounted(mount_dev) == 0);
1165279219Sken#endif
1166279219Sken
1167279219Sken	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1168279219Sken	    NULL))
1169279219Sken		goto out;
1170279219Sken	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1171279219Sken	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1172279219Sken
1173279219Sken	vfsp->vfs_data = zfsvfs;
1174279219Sken	vfsp->mnt_flag |= MNT_LOCAL;
1175279219Sken	vfsp->mnt_kern_flag |= MNTK_MPSAFE;
1176279219Sken	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1177279219Sken	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
117843629Smjacob	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
1179279219Sken
1180279219Sken	/*
118143629Smjacob	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1182279219Sken	 * separates our fsid from any other filesystem types, and a
1183279219Sken	 * 56-bit objset unique ID.  The objset unique ID is unique to
1184279219Sken	 * all objsets open on this system, provided by unique_create().
1185279219Sken	 * The 8-bit fs type must be put in the low bits of fsid[1]
1186279219Sken	 * because that's where other Solaris filesystems put it.
1187279219Sken	 */
1188279219Sken	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1189279219Sken	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1190279219Sken	vfsp->vfs_fsid.val[0] = fsid_guid;
1191279219Sken	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1192279219Sken	    vfsp->mnt_vfc->vfc_typenum & 0xFF;
1193279219Sken
1194279219Sken	/*
1195279219Sken	 * Set features for file system.
1196279219Sken	 */
1197279219Sken	zfs_set_fuid_feature(zfsvfs);
1198279219Sken	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1199279219Sken		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1200279219Sken		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1201279219Sken		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1202279219Sken	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1203279219Sken		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1204279219Sken		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1205279219Sken	}
1206279219Sken	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1207279219Sken
1208279219Sken	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1209279219Sken		uint64_t pval;
1210279219Sken
1211279219Sken		atime_changed_cb(zfsvfs, B_FALSE);
1212279219Sken		readonly_changed_cb(zfsvfs, B_TRUE);
1213279219Sken		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1214279219Sken			goto out;
1215279219Sken		xattr_changed_cb(zfsvfs, pval);
1216279219Sken		zfsvfs->z_issnap = B_TRUE;
121743629Smjacob		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
121843629Smjacob
121943629Smjacob		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
122043629Smjacob		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1221279219Sken		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1222279219Sken	} else {
1223279219Sken		error = zfsvfs_setup(zfsvfs, B_TRUE);
1224279219Sken	}
1225279219Sken
1226279219Sken	vfs_mountedfrom(vfsp, osname);
1227279219Sken	/* Grab extra reference. */
1228279219Sken	VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0);
1229279219Sken	VOP_UNLOCK(vp, 0);
1230279219Sken
1231279219Sken	if (!zfsvfs->z_issnap)
1232279219Sken		zfsctl_create(zfsvfs);
1233279219Skenout:
1234279219Sken	if (error) {
1235279219Sken		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1236279219Sken		zfsvfs_free(zfsvfs);
1237279219Sken	} else {
1238279219Sken		atomic_add_32(&zfs_active_fs_count, 1);
1239279219Sken	}
1240279219Sken
1241279219Sken	return (error);
1242279219Sken}
1243279219Sken
1244279219Skenvoid
1245279219Skenzfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1246279219Sken{
1247279219Sken	objset_t *os = zfsvfs->z_os;
1248279219Sken	struct dsl_dataset *ds;
1249279219Sken
1250279219Sken	/*
1251279219Sken	 * Unregister properties.
1252279219Sken	 */
1253279219Sken	if (!dmu_objset_is_snapshot(os)) {
1254279219Sken		ds = dmu_objset_ds(os);
1255279219Sken		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
1256279219Sken		    zfsvfs) == 0);
1257279219Sken
1258279219Sken		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
1259279219Sken		    zfsvfs) == 0);
1260279219Sken
1261279219Sken		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
1262279219Sken		    zfsvfs) == 0);
1263279219Sken
1264279219Sken		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
1265279219Sken		    zfsvfs) == 0);
1266279219Sken
1267279219Sken		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
1268279219Sken		    zfsvfs) == 0);
1269279219Sken
1270279219Sken		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
1271279219Sken		    zfsvfs) == 0);
1272279219Sken
1273279219Sken		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
1274279219Sken		    zfsvfs) == 0);
1275279219Sken
1276279219Sken		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
1277279219Sken		    zfsvfs) == 0);
1278279219Sken
1279279219Sken		VERIFY(dsl_prop_unregister(ds, "aclinherit",
1280279219Sken		    acl_inherit_changed_cb, zfsvfs) == 0);
1281279219Sken
1282279219Sken		VERIFY(dsl_prop_unregister(ds, "vscan",
1283279219Sken		    vscan_changed_cb, zfsvfs) == 0);
1284279219Sken	}
1285279219Sken}
1286279219Sken
1287279219Sken#ifdef SECLABEL
1288279219Sken/*
1289279219Sken * Convert a decimal digit string to a uint64_t integer.
1290279219Sken */
1291279219Skenstatic int
1292279219Skenstr_to_uint64(char *str, uint64_t *objnum)
1293279219Sken{
1294279219Sken	uint64_t num = 0;
1295279219Sken
1296279219Sken	while (*str) {
1297279219Sken		if (*str < '0' || *str > '9')
1298279219Sken			return (SET_ERROR(EINVAL));
1299279219Sken
1300279219Sken		num = num*10 + *str++ - '0';
1301279219Sken	}
1302279219Sken
1303279219Sken	*objnum = num;
1304279219Sken	return (0);
1305279219Sken}
1306279219Sken
1307279219Sken/*
1308279219Sken * The boot path passed from the boot loader is in the form of
1309279219Sken * "rootpool-name/root-filesystem-object-number'. Convert this
1310279219Sken * string to a dataset name: "rootpool-name/root-filesystem-name".
1311279219Sken */
1312279219Skenstatic int
1313279219Skenzfs_parse_bootfs(char *bpath, char *outpath)
1314279219Sken{
1315279219Sken	char *slashp;
1316279219Sken	uint64_t objnum;
1317279219Sken	int error;
1318279219Sken
1319279219Sken	if (*bpath == 0 || *bpath == '/')
1320279219Sken		return (SET_ERROR(EINVAL));
1321279219Sken
1322279219Sken	(void) strcpy(outpath, bpath);
1323279219Sken
1324279219Sken	slashp = strchr(bpath, '/');
1325279219Sken
1326279219Sken	/* if no '/', just return the pool name */
1327279219Sken	if (slashp == NULL) {
13287913Sjoerg		return (0);
13297913Sjoerg	}
1330279219Sken
1331279219Sken	/* if not a number, just return the root dataset name */
1332279219Sken	if (str_to_uint64(slashp+1, &objnum)) {
1333279219Sken		return (0);
1334279219Sken	}
1335279219Sken
1336279219Sken	*slashp = '\0';
1337279219Sken	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1338279219Sken	*slashp = '/';
1339279219Sken
1340279219Sken	return (error);
1341279219Sken}
1342279219Sken
1343279219Sken/*
1344279219Sken * zfs_check_global_label:
1345279261Sken *	Check that the hex label string is appropriate for the dataset
1346279261Sken *	being mounted into the global_zone proper.
1347279261Sken *
1348279261Sken *	Return an error if the hex label string is not default or
1349279261Sken *	admin_low/admin_high.  For admin_low labels, the corresponding
1350279261Sken *	dataset must be readonly.
1351279219Sken */
1352279219Skenint
1353279219Skenzfs_check_global_label(const char *dsname, const char *hexsl)
1354279219Sken{
1355279219Sken	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1356279219Sken		return (0);
1357279261Sken	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1358279261Sken		return (0);
1359279261Sken	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1360279261Sken		/* must be readonly */
1361279261Sken		uint64_t rdonly;
1362279261Sken
1363279219Sken		if (dsl_prop_get_integer(dsname,
1364279219Sken		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1365279219Sken			return (SET_ERROR(EACCES));
1366279219Sken		return (rdonly ? 0 : EACCES);
1367279219Sken	}
1368279219Sken	return (SET_ERROR(EACCES));
1369279219Sken}
1370279219Sken
1371279219Sken/*
1372279219Sken * zfs_mount_label_policy:
1373279219Sken *	Determine whether the mount is allowed according to MAC check.
1374279261Sken *	by comparing (where appropriate) label of the dataset against
1375279261Sken *	the label of the zone being mounted into.  If the dataset has
1376279261Sken *	no label, create one.
1377279261Sken *
1378279261Sken *	Returns:
1379279219Sken *		 0 :	access allowed
1380279219Sken *		>0 :	error code, such as EACCES
1381279219Sken */
1382279261Skenstatic int
1383279261Skenzfs_mount_label_policy(vfs_t *vfsp, char *osname)
1384279219Sken{
1385279219Sken	int		error, retv;
1386279219Sken	zone_t		*mntzone = NULL;
1387279219Sken	ts_label_t	*mnt_tsl;
1388279219Sken	bslabel_t	*mnt_sl;
1389279219Sken	bslabel_t	ds_sl;
1390279219Sken	char		ds_hexsl[MAXNAMELEN];
1391279219Sken
1392279219Sken	retv = EACCES;				/* assume the worst */
1393279219Sken
1394279219Sken	/*
1395279261Sken	 * Start by getting the dataset label if it exists.
1396279261Sken	 */
1397279261Sken	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1398279219Sken	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1399279219Sken	if (error)
1400279219Sken		return (SET_ERROR(EACCES));
1401279219Sken
1402279219Sken	/*
1403279219Sken	 * If labeling is NOT enabled, then disallow the mount of datasets
1404279219Sken	 * which have a non-default label already.  No other label checks
1405279219Sken	 * are needed.
1406279219Sken	 */
1407279219Sken	if (!is_system_labeled()) {
1408279219Sken		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1409279219Sken			return (0);
1410279219Sken		return (SET_ERROR(EACCES));
1411279219Sken	}
1412279219Sken
1413279219Sken	/*
1414279219Sken	 * Get the label of the mountpoint.  If mounting into the global
1415279219Sken	 * zone (i.e. mountpoint is not within an active zone and the
1416280230Sken	 * zoned property is off), the label must be default or
1417280230Sken	 * admin_low/admin_high only; no other checks are needed.
1418279219Sken	 */
1419279219Sken	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1420279219Sken	if (mntzone->zone_id == GLOBAL_ZONEID) {
1421279219Sken		uint64_t zoned;
1422279219Sken
1423279219Sken		zone_rele(mntzone);
1424279219Sken
1425279219Sken		if (dsl_prop_get_integer(osname,
1426279219Sken		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1427279219Sken			return (SET_ERROR(EACCES));
1428279219Sken		if (!zoned)
1429279219Sken			return (zfs_check_global_label(osname, ds_hexsl));
1430279219Sken		else
1431279219Sken			/*
1432279219Sken			 * This is the case of a zone dataset being mounted
1433279219Sken			 * initially, before the zone has been fully created;
1434279219Sken			 * allow this mount into global zone.
1435279219Sken			 */
1436279219Sken			return (0);
1437279219Sken	}
1438279219Sken
1439279219Sken	mnt_tsl = mntzone->zone_slabel;
1440279219Sken	ASSERT(mnt_tsl != NULL);
1441279219Sken	label_hold(mnt_tsl);
1442279219Sken	mnt_sl = label2bslabel(mnt_tsl);
1443279219Sken
1444279219Sken	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1445279219Sken		/*
1446279219Sken		 * The dataset doesn't have a real label, so fabricate one.
1447279219Sken		 */
1448279219Sken		char *str = NULL;
1449279219Sken
1450279219Sken		if (l_to_str_internal(mnt_sl, &str) == 0 &&
1451279219Sken		    dsl_prop_set_string(osname,
1452279219Sken		    zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1453279219Sken		    ZPROP_SRC_LOCAL, str) == 0)
1454279219Sken			retv = 0;
1455279219Sken		if (str != NULL)
1456279219Sken			kmem_free(str, strlen(str) + 1);
1457279219Sken	} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1458279219Sken		/*
1459279219Sken		 * Now compare labels to complete the MAC check.  If the
1460279219Sken		 * labels are equal then allow access.  If the mountpoint
1461279219Sken		 * label dominates the dataset label, allow readonly access.
1462279219Sken		 * Otherwise, access is denied.
1463279219Sken		 */
1464279219Sken		if (blequal(mnt_sl, &ds_sl))
1465279219Sken			retv = 0;
1466279219Sken		else if (bldominates(mnt_sl, &ds_sl)) {
1467279219Sken			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1468279219Sken			retv = 0;
1469279219Sken		}
1470279219Sken	}
1471279219Sken
1472279219Sken	label_rele(mnt_tsl);
1473279219Sken	zone_rele(mntzone);
1474279219Sken	return (retv);
1475279219Sken}
1476279219Sken#endif	/* SECLABEL */
1477279219Sken
1478279219Sken#ifdef OPENSOLARIS_MOUNTROOT
1479279219Skenstatic int
1480279219Skenzfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1481279219Sken{
1482279219Sken	int error = 0;
1483279219Sken	static int zfsrootdone = 0;
1484279219Sken	zfsvfs_t *zfsvfs = NULL;
1485279219Sken	znode_t *zp = NULL;
1486279219Sken	vnode_t *vp = NULL;
1487279219Sken	char *zfs_bootfs;
1488279219Sken	char *zfs_devid;
1489279219Sken
1490279219Sken	ASSERT(vfsp);
1491279219Sken
1492279219Sken	/*
1493279219Sken	 * The filesystem that we mount as root is defined in the
1494279219Sken	 * boot property "zfs-bootfs" with a format of
1495279219Sken	 * "poolname/root-dataset-objnum".
1496279219Sken	 */
1497279219Sken	if (why == ROOT_INIT) {
1498279219Sken		if (zfsrootdone++)
1499279219Sken			return (SET_ERROR(EBUSY));
1500279219Sken		/*
1501279219Sken		 * the process of doing a spa_load will require the
1502279219Sken		 * clock to be set before we could (for example) do
1503279219Sken		 * something better by looking at the timestamp on
1504279219Sken		 * an uberblock, so just set it to -1.
1505279219Sken		 */
1506279219Sken		clkset(-1);
1507279219Sken
1508279219Sken		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1509279219Sken			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1510279219Sken			    "bootfs name");
1511279219Sken			return (SET_ERROR(EINVAL));
1512279219Sken		}
1513279219Sken		zfs_devid = spa_get_bootprop("diskdevid");
1514279219Sken		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1515279219Sken		if (zfs_devid)
1516279219Sken			spa_free_bootprop(zfs_devid);
1517279219Sken		if (error) {
1518279219Sken			spa_free_bootprop(zfs_bootfs);
1519279219Sken			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1520279219Sken			    error);
1521279219Sken			return (error);
1522279219Sken		}
1523279219Sken		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1524279219Sken			spa_free_bootprop(zfs_bootfs);
1525279219Sken			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1526279219Sken			    error);
1527279219Sken			return (error);
1528279219Sken		}
1529279219Sken
1530279219Sken		spa_free_bootprop(zfs_bootfs);
1531279219Sken
1532279219Sken		if (error = vfs_lock(vfsp))
1533279219Sken			return (error);
1534279219Sken
1535279219Sken		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1536279219Sken			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1537279219Sken			goto out;
1538279219Sken		}
1539279219Sken
1540279219Sken		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1541279219Sken		ASSERT(zfsvfs);
1542279219Sken		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1543279219Sken			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1544279219Sken			goto out;
1545279219Sken		}
1546279219Sken
1547279219Sken		vp = ZTOV(zp);
1548279219Sken		mutex_enter(&vp->v_lock);
1549279219Sken		vp->v_flag |= VROOT;
1550279219Sken		mutex_exit(&vp->v_lock);
1551279219Sken		rootvp = vp;
1552279219Sken
1553279219Sken		/*
1554279219Sken		 * Leave rootvp held.  The root file system is never unmounted.
1555279219Sken		 */
1556279219Sken
1557279219Sken		vfs_add((struct vnode *)0, vfsp,
1558279219Sken		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1559279219Skenout:
1560279219Sken		vfs_unlock(vfsp);
1561279219Sken		return (error);
1562279219Sken	} else if (why == ROOT_REMOUNT) {
1563279219Sken		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1564279219Sken		vfsp->vfs_flag |= VFS_REMOUNT;
1565279219Sken
1566279219Sken		/* refresh mount options */
1567279219Sken		zfs_unregister_callbacks(vfsp->vfs_data);
1568279219Sken		return (zfs_register_callbacks(vfsp));
1569279219Sken
1570279219Sken	} else if (why == ROOT_UNMOUNT) {
1571279219Sken		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1572279219Sken		(void) zfs_sync(vfsp, 0, 0);
1573279219Sken		return (0);
1574279219Sken	}
1575279219Sken
1576279219Sken	/*
1577279219Sken	 * if "why" is equal to anything else other than ROOT_INIT,
1578279219Sken	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1579279219Sken	 */
1580279219Sken	return (SET_ERROR(ENOTSUP));
1581279219Sken}
1582279219Sken#endif	/* OPENSOLARIS_MOUNTROOT */
1583279219Sken
1584279219Skenstatic int
1585279219Skengetpoolname(const char *osname, char *poolname)
1586279219Sken{
1587279219Sken	char *p;
1588279219Sken
1589227174Sed	p = strchr(osname, '/');
15909541Sjoerg	if (p == NULL) {
15919541Sjoerg		if (strlen(osname) >= MAXNAMELEN)
15929541Sjoerg			return (ENAMETOOLONG);
15939541Sjoerg		(void) strcpy(poolname, osname);
15949541Sjoerg	} else {
15959541Sjoerg		if (p - osname >= MAXNAMELEN)
15969541Sjoerg			return (ENAMETOOLONG);
15979541Sjoerg		(void) strncpy(poolname, osname, p - osname);
15989541Sjoerg		poolname[p - osname] = '\0';
1599	}
1600	return (0);
1601}
1602
1603/*ARGSUSED*/
1604static int
1605zfs_mount(vfs_t *vfsp)
1606{
1607	kthread_t	*td = curthread;
1608	vnode_t		*mvp = vfsp->mnt_vnodecovered;
1609	cred_t		*cr = td->td_ucred;
1610	char		*osname;
1611	int		error = 0;
1612	int		canwrite;
1613
1614#ifdef illumos
1615	if (mvp->v_type != VDIR)
1616		return (SET_ERROR(ENOTDIR));
1617
1618	mutex_enter(&mvp->v_lock);
1619	if ((uap->flags & MS_REMOUNT) == 0 &&
1620	    (uap->flags & MS_OVERLAY) == 0 &&
1621	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1622		mutex_exit(&mvp->v_lock);
1623		return (SET_ERROR(EBUSY));
1624	}
1625	mutex_exit(&mvp->v_lock);
1626
1627	/*
1628	 * ZFS does not support passing unparsed data in via MS_DATA.
1629	 * Users should use the MS_OPTIONSTR interface; this means
1630	 * that all option parsing is already done and the options struct
1631	 * can be interrogated.
1632	 */
1633	if ((uap->flags & MS_DATA) && uap->datalen > 0)
1634#else
1635	if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS))
1636		return (SET_ERROR(EPERM));
1637
1638	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1639		return (SET_ERROR(EINVAL));
1640#endif	/* ! illumos */
1641
1642	/*
1643	 * If full-owner-access is enabled and delegated administration is
1644	 * turned on, we must set nosuid.
1645	 */
1646	if (zfs_super_owner &&
1647	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1648		secpolicy_fs_mount_clearopts(cr, vfsp);
1649	}
1650
1651	/*
1652	 * Check for mount privilege?
1653	 *
1654	 * If we don't have privilege then see if
1655	 * we have local permission to allow it
1656	 */
1657	error = secpolicy_fs_mount(cr, mvp, vfsp);
1658	if (error) {
1659		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
1660			goto out;
1661
1662		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
1663			vattr_t		vattr;
1664
1665			/*
1666			 * Make sure user is the owner of the mount point
1667			 * or has sufficient privileges.
1668			 */
1669
1670			vattr.va_mask = AT_UID;
1671
1672			vn_lock(mvp, LK_SHARED | LK_RETRY);
1673			if (VOP_GETATTR(mvp, &vattr, cr)) {
1674				VOP_UNLOCK(mvp, 0);
1675				goto out;
1676			}
1677
1678			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
1679			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
1680				VOP_UNLOCK(mvp, 0);
1681				goto out;
1682			}
1683			VOP_UNLOCK(mvp, 0);
1684		}
1685
1686		secpolicy_fs_mount_clearopts(cr, vfsp);
1687	}
1688
1689	/*
1690	 * Refuse to mount a filesystem if we are in a local zone and the
1691	 * dataset is not visible.
1692	 */
1693	if (!INGLOBALZONE(curthread) &&
1694	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1695		error = SET_ERROR(EPERM);
1696		goto out;
1697	}
1698
1699#ifdef SECLABEL
1700	error = zfs_mount_label_policy(vfsp, osname);
1701	if (error)
1702		goto out;
1703#endif
1704
1705	vfsp->vfs_flag |= MNT_NFS4ACLS;
1706
1707	/*
1708	 * When doing a remount, we simply refresh our temporary properties
1709	 * according to those options set in the current VFS options.
1710	 */
1711	if (vfsp->vfs_flag & MS_REMOUNT) {
1712		/* refresh mount options */
1713		zfs_unregister_callbacks(vfsp->vfs_data);
1714		error = zfs_register_callbacks(vfsp);
1715		goto out;
1716	}
1717
1718	/* Initial root mount: try hard to import the requested root pool. */
1719	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
1720	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
1721		char pname[MAXNAMELEN];
1722
1723		error = getpoolname(osname, pname);
1724		if (error == 0)
1725			error = spa_import_rootpool(pname);
1726		if (error)
1727			goto out;
1728	}
1729	DROP_GIANT();
1730	error = zfs_domount(vfsp, osname);
1731	PICKUP_GIANT();
1732
1733#ifdef sun
1734	/*
1735	 * Add an extra VFS_HOLD on our parent vfs so that it can't
1736	 * disappear due to a forced unmount.
1737	 */
1738	if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
1739		VFS_HOLD(mvp->v_vfsp);
1740#endif	/* sun */
1741
1742out:
1743	return (error);
1744}
1745
1746static int
1747zfs_statfs(vfs_t *vfsp, struct statfs *statp)
1748{
1749	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1750	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1751
1752	statp->f_version = STATFS_VERSION;
1753
1754	ZFS_ENTER(zfsvfs);
1755
1756	dmu_objset_space(zfsvfs->z_os,
1757	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1758
1759	/*
1760	 * The underlying storage pool actually uses multiple block sizes.
1761	 * We report the fragsize as the smallest block size we support,
1762	 * and we report our blocksize as the filesystem's maximum blocksize.
1763	 */
1764	statp->f_bsize = SPA_MINBLOCKSIZE;
1765	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
1766
1767	/*
1768	 * The following report "total" blocks of various kinds in the
1769	 * file system, but reported in terms of f_frsize - the
1770	 * "fragment" size.
1771	 */
1772
1773	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1774	statp->f_bfree = availbytes / statp->f_bsize;
1775	statp->f_bavail = statp->f_bfree; /* no root reservation */
1776
1777	/*
1778	 * statvfs() should really be called statufs(), because it assumes
1779	 * static metadata.  ZFS doesn't preallocate files, so the best
1780	 * we can do is report the max that could possibly fit in f_files,
1781	 * and that minus the number actually used in f_ffree.
1782	 * For f_ffree, report the smaller of the number of object available
1783	 * and the number of blocks (each object will take at least a block).
1784	 */
1785	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1786	statp->f_files = statp->f_ffree + usedobjs;
1787
1788	/*
1789	 * We're a zfs filesystem.
1790	 */
1791	(void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
1792
1793	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1794	    sizeof(statp->f_mntfromname));
1795	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1796	    sizeof(statp->f_mntonname));
1797
1798	statp->f_namemax = ZFS_MAXNAMELEN;
1799
1800	ZFS_EXIT(zfsvfs);
1801	return (0);
1802}
1803
1804int
1805zfs_vnode_lock(vnode_t *vp, int flags)
1806{
1807	int error;
1808
1809	ASSERT(vp != NULL);
1810
1811	error = vn_lock(vp, flags);
1812	return (error);
1813}
1814
1815static int
1816zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
1817{
1818	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1819	znode_t *rootzp;
1820	int error;
1821
1822	ZFS_ENTER_NOERROR(zfsvfs);
1823
1824	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1825	if (error == 0)
1826		*vpp = ZTOV(rootzp);
1827
1828	ZFS_EXIT(zfsvfs);
1829
1830	if (error == 0) {
1831		error = zfs_vnode_lock(*vpp, flags);
1832		if (error == 0)
1833			(*vpp)->v_vflag |= VV_ROOT;
1834	}
1835	if (error != 0)
1836		*vpp = NULL;
1837
1838	return (error);
1839}
1840
1841/*
1842 * Teardown the zfsvfs::z_os.
1843 *
1844 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1845 * and 'z_teardown_inactive_lock' held.
1846 */
1847static int
1848zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1849{
1850	znode_t	*zp;
1851
1852	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1853
1854	if (!unmounting) {
1855		/*
1856		 * We purge the parent filesystem's vfsp as the parent
1857		 * filesystem and all of its snapshots have their vnode's
1858		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1859		 * 'z_parent' is self referential for non-snapshots.
1860		 */
1861		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1862#ifdef FREEBSD_NAMECACHE
1863		cache_purgevfs(zfsvfs->z_parent->z_vfs);
1864#endif
1865	}
1866
1867	/*
1868	 * Close the zil. NB: Can't close the zil while zfs_inactive
1869	 * threads are blocked as zil_close can call zfs_inactive.
1870	 */
1871	if (zfsvfs->z_log) {
1872		zil_close(zfsvfs->z_log);
1873		zfsvfs->z_log = NULL;
1874	}
1875
1876	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1877
1878	/*
1879	 * If we are not unmounting (ie: online recv) and someone already
1880	 * unmounted this file system while we were doing the switcheroo,
1881	 * or a reopen of z_os failed then just bail out now.
1882	 */
1883	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1884		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1885		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1886		return (SET_ERROR(EIO));
1887	}
1888
1889	/*
1890	 * At this point there are no vops active, and any new vops will
1891	 * fail with EIO since we have z_teardown_lock for writer (only
1892	 * relavent for forced unmount).
1893	 *
1894	 * Release all holds on dbufs.
1895	 */
1896	mutex_enter(&zfsvfs->z_znodes_lock);
1897	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1898	    zp = list_next(&zfsvfs->z_all_znodes, zp))
1899		if (zp->z_sa_hdl) {
1900			ASSERT(ZTOV(zp)->v_count >= 0);
1901			zfs_znode_dmu_fini(zp);
1902		}
1903	mutex_exit(&zfsvfs->z_znodes_lock);
1904
1905	/*
1906	 * If we are unmounting, set the unmounted flag and let new vops
1907	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1908	 * other vops will fail with EIO.
1909	 */
1910	if (unmounting) {
1911		zfsvfs->z_unmounted = B_TRUE;
1912		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1913		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1914	}
1915
1916	/*
1917	 * z_os will be NULL if there was an error in attempting to reopen
1918	 * zfsvfs, so just return as the properties had already been
1919	 * unregistered and cached data had been evicted before.
1920	 */
1921	if (zfsvfs->z_os == NULL)
1922		return (0);
1923
1924	/*
1925	 * Unregister properties.
1926	 */
1927	zfs_unregister_callbacks(zfsvfs);
1928
1929	/*
1930	 * Evict cached data
1931	 */
1932	if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
1933	    !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
1934		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1935	dmu_objset_evict_dbufs(zfsvfs->z_os);
1936
1937	return (0);
1938}
1939
1940/*ARGSUSED*/
1941static int
1942zfs_umount(vfs_t *vfsp, int fflag)
1943{
1944	kthread_t *td = curthread;
1945	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1946	objset_t *os;
1947	cred_t *cr = td->td_ucred;
1948	int ret;
1949
1950	ret = secpolicy_fs_unmount(cr, vfsp);
1951	if (ret) {
1952		if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1953		    ZFS_DELEG_PERM_MOUNT, cr))
1954			return (ret);
1955	}
1956
1957	/*
1958	 * We purge the parent filesystem's vfsp as the parent filesystem
1959	 * and all of its snapshots have their vnode's v_vfsp set to the
1960	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
1961	 * referential for non-snapshots.
1962	 */
1963	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1964
1965	/*
1966	 * Unmount any snapshots mounted under .zfs before unmounting the
1967	 * dataset itself.
1968	 */
1969	if (zfsvfs->z_ctldir != NULL) {
1970		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1971			return (ret);
1972		ret = vflush(vfsp, 0, 0, td);
1973		ASSERT(ret == EBUSY);
1974		if (!(fflag & MS_FORCE)) {
1975			if (zfsvfs->z_ctldir->v_count > 1)
1976				return (EBUSY);
1977			ASSERT(zfsvfs->z_ctldir->v_count == 1);
1978		}
1979		zfsctl_destroy(zfsvfs);
1980		ASSERT(zfsvfs->z_ctldir == NULL);
1981	}
1982
1983	if (fflag & MS_FORCE) {
1984		/*
1985		 * Mark file system as unmounted before calling
1986		 * vflush(FORCECLOSE). This way we ensure no future vnops
1987		 * will be called and risk operating on DOOMED vnodes.
1988		 */
1989		rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1990		zfsvfs->z_unmounted = B_TRUE;
1991		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1992	}
1993
1994	/*
1995	 * Flush all the files.
1996	 */
1997	ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
1998	if (ret != 0) {
1999		if (!zfsvfs->z_issnap) {
2000			zfsctl_create(zfsvfs);
2001			ASSERT(zfsvfs->z_ctldir != NULL);
2002		}
2003		return (ret);
2004	}
2005
2006	if (!(fflag & MS_FORCE)) {
2007		/*
2008		 * Check the number of active vnodes in the file system.
2009		 * Our count is maintained in the vfs structure, but the
2010		 * number is off by 1 to indicate a hold on the vfs
2011		 * structure itself.
2012		 *
2013		 * The '.zfs' directory maintains a reference of its
2014		 * own, and any active references underneath are
2015		 * reflected in the vnode count.
2016		 */
2017		if (zfsvfs->z_ctldir == NULL) {
2018			if (vfsp->vfs_count > 1)
2019				return (SET_ERROR(EBUSY));
2020		} else {
2021			if (vfsp->vfs_count > 2 ||
2022			    zfsvfs->z_ctldir->v_count > 1)
2023				return (SET_ERROR(EBUSY));
2024		}
2025	}
2026
2027	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
2028	os = zfsvfs->z_os;
2029
2030	/*
2031	 * z_os will be NULL if there was an error in
2032	 * attempting to reopen zfsvfs.
2033	 */
2034	if (os != NULL) {
2035		/*
2036		 * Unset the objset user_ptr.
2037		 */
2038		mutex_enter(&os->os_user_ptr_lock);
2039		dmu_objset_set_user(os, NULL);
2040		mutex_exit(&os->os_user_ptr_lock);
2041
2042		/*
2043		 * Finally release the objset
2044		 */
2045		dmu_objset_disown(os, zfsvfs);
2046	}
2047
2048	/*
2049	 * We can now safely destroy the '.zfs' directory node.
2050	 */
2051	if (zfsvfs->z_ctldir != NULL)
2052		zfsctl_destroy(zfsvfs);
2053	if (zfsvfs->z_issnap) {
2054		vnode_t *svp = vfsp->mnt_vnodecovered;
2055
2056		if (svp->v_count >= 2)
2057			VN_RELE(svp);
2058	}
2059	zfs_freevfs(vfsp);
2060
2061	return (0);
2062}
2063
2064static int
2065zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
2066{
2067	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
2068	znode_t		*zp;
2069	int 		err;
2070
2071	/*
2072	 * zfs_zget() can't operate on virtual entries like .zfs/ or
2073	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
2074	 * This will make NFS to switch to LOOKUP instead of using VGET.
2075	 */
2076	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
2077	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
2078		return (EOPNOTSUPP);
2079
2080	ZFS_ENTER(zfsvfs);
2081	err = zfs_zget(zfsvfs, ino, &zp);
2082	if (err == 0 && zp->z_unlinked) {
2083		VN_RELE(ZTOV(zp));
2084		err = EINVAL;
2085	}
2086	if (err == 0)
2087		*vpp = ZTOV(zp);
2088	ZFS_EXIT(zfsvfs);
2089	if (err == 0)
2090		err = zfs_vnode_lock(*vpp, flags);
2091	if (err != 0)
2092		*vpp = NULL;
2093	else
2094		(*vpp)->v_hash = ino;
2095	return (err);
2096}
2097
2098static int
2099zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
2100    struct ucred **credanonp, int *numsecflavors, int **secflavors)
2101{
2102	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2103
2104	/*
2105	 * If this is regular file system vfsp is the same as
2106	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
2107	 * zfsvfs->z_parent->z_vfs represents parent file system
2108	 * which we have to use here, because only this file system
2109	 * has mnt_export configured.
2110	 */
2111	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
2112	    credanonp, numsecflavors, secflavors));
2113}
2114
2115CTASSERT(SHORT_FID_LEN <= sizeof(struct fid));
2116CTASSERT(LONG_FID_LEN <= sizeof(struct fid));
2117
2118static int
2119zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
2120{
2121	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
2122	znode_t		*zp;
2123	uint64_t	object = 0;
2124	uint64_t	fid_gen = 0;
2125	uint64_t	gen_mask;
2126	uint64_t	zp_gen;
2127	int 		i, err;
2128
2129	*vpp = NULL;
2130
2131	ZFS_ENTER(zfsvfs);
2132
2133	/*
2134	 * On FreeBSD we can get snapshot's mount point or its parent file
2135	 * system mount point depending if snapshot is already mounted or not.
2136	 */
2137	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
2138		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
2139		uint64_t	objsetid = 0;
2140		uint64_t	setgen = 0;
2141
2142		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
2143			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
2144
2145		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
2146			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
2147
2148		ZFS_EXIT(zfsvfs);
2149
2150		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
2151		if (err)
2152			return (SET_ERROR(EINVAL));
2153		ZFS_ENTER(zfsvfs);
2154	}
2155
2156	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2157		zfid_short_t	*zfid = (zfid_short_t *)fidp;
2158
2159		for (i = 0; i < sizeof (zfid->zf_object); i++)
2160			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2161
2162		for (i = 0; i < sizeof (zfid->zf_gen); i++)
2163			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2164	} else {
2165		ZFS_EXIT(zfsvfs);
2166		return (SET_ERROR(EINVAL));
2167	}
2168
2169	/*
2170	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
2171	 * directory tree. If the object == zfsvfs->z_shares_dir, then
2172	 * we are in the .zfs/shares directory tree.
2173	 */
2174	if ((fid_gen == 0 &&
2175	     (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
2176	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
2177		*vpp = zfsvfs->z_ctldir;
2178		ASSERT(*vpp != NULL);
2179		if (object == ZFSCTL_INO_SNAPDIR) {
2180			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
2181			    0, NULL, NULL, NULL, NULL, NULL) == 0);
2182		} else if (object == zfsvfs->z_shares_dir) {
2183			VERIFY(zfsctl_root_lookup(*vpp, "shares", vpp, NULL,
2184			    0, NULL, NULL, NULL, NULL, NULL) == 0);
2185		} else {
2186			VN_HOLD(*vpp);
2187		}
2188		ZFS_EXIT(zfsvfs);
2189		err = zfs_vnode_lock(*vpp, flags);
2190		if (err != 0)
2191			*vpp = NULL;
2192		return (err);
2193	}
2194
2195	gen_mask = -1ULL >> (64 - 8 * i);
2196
2197	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
2198	if (err = zfs_zget(zfsvfs, object, &zp)) {
2199		ZFS_EXIT(zfsvfs);
2200		return (err);
2201	}
2202	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
2203	    sizeof (uint64_t));
2204	zp_gen = zp_gen & gen_mask;
2205	if (zp_gen == 0)
2206		zp_gen = 1;
2207	if (zp->z_unlinked || zp_gen != fid_gen) {
2208		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2209		VN_RELE(ZTOV(zp));
2210		ZFS_EXIT(zfsvfs);
2211		return (SET_ERROR(EINVAL));
2212	}
2213
2214	*vpp = ZTOV(zp);
2215	ZFS_EXIT(zfsvfs);
2216	err = zfs_vnode_lock(*vpp, flags | LK_RETRY);
2217	if (err == 0)
2218		vnode_create_vobject(*vpp, zp->z_size, curthread);
2219	else
2220		*vpp = NULL;
2221	return (err);
2222}
2223
2224/*
2225 * Block out VOPs and close zfsvfs_t::z_os
2226 *
2227 * Note, if successful, then we return with the 'z_teardown_lock' and
2228 * 'z_teardown_inactive_lock' write held.
2229 */
2230int
2231zfs_suspend_fs(zfsvfs_t *zfsvfs)
2232{
2233	int error;
2234
2235	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2236		return (error);
2237	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
2238
2239	return (0);
2240}
2241
2242/*
2243 * Reopen zfsvfs_t::z_os and release VOPs.
2244 */
2245int
2246zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
2247{
2248	int err;
2249
2250	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
2251	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2252
2253	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
2254	    &zfsvfs->z_os);
2255	if (err) {
2256		zfsvfs->z_os = NULL;
2257	} else {
2258		znode_t *zp;
2259		uint64_t sa_obj = 0;
2260
2261		/*
2262		 * Make sure version hasn't changed
2263		 */
2264
2265		err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
2266		    &zfsvfs->z_version);
2267
2268		if (err)
2269			goto bail;
2270
2271		err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
2272		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
2273
2274		if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
2275			goto bail;
2276
2277		if ((err = sa_setup(zfsvfs->z_os, sa_obj,
2278		    zfs_attr_table,  ZPL_END, &zfsvfs->z_attr_table)) != 0)
2279			goto bail;
2280
2281		if (zfsvfs->z_version >= ZPL_VERSION_SA)
2282			sa_register_update_callback(zfsvfs->z_os,
2283			    zfs_sa_upgrade);
2284
2285		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2286
2287		zfs_set_fuid_feature(zfsvfs);
2288
2289		/*
2290		 * Attempt to re-establish all the active znodes with
2291		 * their dbufs.  If a zfs_rezget() fails, then we'll let
2292		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2293		 * when they try to use their znode.
2294		 */
2295		mutex_enter(&zfsvfs->z_znodes_lock);
2296		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2297		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2298			(void) zfs_rezget(zp);
2299		}
2300		mutex_exit(&zfsvfs->z_znodes_lock);
2301	}
2302
2303bail:
2304	/* release the VOPs */
2305	rw_exit(&zfsvfs->z_teardown_inactive_lock);
2306	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
2307
2308	if (err) {
2309		/*
2310		 * Since we couldn't reopen zfsvfs::z_os, or
2311		 * setup the sa framework force unmount this file system.
2312		 */
2313		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
2314			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
2315	}
2316	return (err);
2317}
2318
2319static void
2320zfs_freevfs(vfs_t *vfsp)
2321{
2322	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2323
2324#ifdef sun
2325	/*
2326	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
2327	 * from zfs_mount().  Release it here.  If we came through
2328	 * zfs_mountroot() instead, we didn't grab an extra hold, so
2329	 * skip the VFS_RELE for rootvfs.
2330	 */
2331	if (zfsvfs->z_issnap && (vfsp != rootvfs))
2332		VFS_RELE(zfsvfs->z_parent->z_vfs);
2333#endif	/* sun */
2334
2335	zfsvfs_free(zfsvfs);
2336
2337	atomic_add_32(&zfs_active_fs_count, -1);
2338}
2339
2340#ifdef __i386__
2341static int desiredvnodes_backup;
2342#endif
2343
2344static void
2345zfs_vnodes_adjust(void)
2346{
2347#ifdef __i386__
2348	int newdesiredvnodes;
2349
2350	desiredvnodes_backup = desiredvnodes;
2351
2352	/*
2353	 * We calculate newdesiredvnodes the same way it is done in
2354	 * vntblinit(). If it is equal to desiredvnodes, it means that
2355	 * it wasn't tuned by the administrator and we can tune it down.
2356	 */
2357	newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 *
2358	    vm_kmem_size / (5 * (sizeof(struct vm_object) +
2359	    sizeof(struct vnode))));
2360	if (newdesiredvnodes == desiredvnodes)
2361		desiredvnodes = (3 * newdesiredvnodes) / 4;
2362#endif
2363}
2364
2365static void
2366zfs_vnodes_adjust_back(void)
2367{
2368
2369#ifdef __i386__
2370	desiredvnodes = desiredvnodes_backup;
2371#endif
2372}
2373
2374void
2375zfs_init(void)
2376{
2377
2378	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
2379
2380	/*
2381	 * Initialize .zfs directory structures
2382	 */
2383	zfsctl_init();
2384
2385	/*
2386	 * Initialize znode cache, vnode ops, etc...
2387	 */
2388	zfs_znode_init();
2389
2390	/*
2391	 * Reduce number of vnodes. Originally number of vnodes is calculated
2392	 * with UFS inode in mind. We reduce it here, because it's too big for
2393	 * ZFS/i386.
2394	 */
2395	zfs_vnodes_adjust();
2396
2397	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2398}
2399
2400void
2401zfs_fini(void)
2402{
2403	zfsctl_fini();
2404	zfs_znode_fini();
2405	zfs_vnodes_adjust_back();
2406}
2407
2408int
2409zfs_busy(void)
2410{
2411	return (zfs_active_fs_count != 0);
2412}
2413
2414int
2415zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2416{
2417	int error;
2418	objset_t *os = zfsvfs->z_os;
2419	dmu_tx_t *tx;
2420
2421	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2422		return (SET_ERROR(EINVAL));
2423
2424	if (newvers < zfsvfs->z_version)
2425		return (SET_ERROR(EINVAL));
2426
2427	if (zfs_spa_version_map(newvers) >
2428	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
2429		return (SET_ERROR(ENOTSUP));
2430
2431	tx = dmu_tx_create(os);
2432	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2433	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2434		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2435		    ZFS_SA_ATTRS);
2436		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2437	}
2438	error = dmu_tx_assign(tx, TXG_WAIT);
2439	if (error) {
2440		dmu_tx_abort(tx);
2441		return (error);
2442	}
2443
2444	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2445	    8, 1, &newvers, tx);
2446
2447	if (error) {
2448		dmu_tx_commit(tx);
2449		return (error);
2450	}
2451
2452	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2453		uint64_t sa_obj;
2454
2455		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2456		    SPA_VERSION_SA);
2457		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2458		    DMU_OT_NONE, 0, tx);
2459
2460		error = zap_add(os, MASTER_NODE_OBJ,
2461		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2462		ASSERT0(error);
2463
2464		VERIFY(0 == sa_set_sa_object(os, sa_obj));
2465		sa_register_update_callback(os, zfs_sa_upgrade);
2466	}
2467
2468	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2469	    "from %llu to %llu", zfsvfs->z_version, newvers);
2470
2471	dmu_tx_commit(tx);
2472
2473	zfsvfs->z_version = newvers;
2474
2475	zfs_set_fuid_feature(zfsvfs);
2476
2477	return (0);
2478}
2479
2480/*
2481 * Read a property stored within the master node.
2482 */
2483int
2484zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2485{
2486	const char *pname;
2487	int error = ENOENT;
2488
2489	/*
2490	 * Look up the file system's value for the property.  For the
2491	 * version property, we look up a slightly different string.
2492	 */
2493	if (prop == ZFS_PROP_VERSION)
2494		pname = ZPL_VERSION_STR;
2495	else
2496		pname = zfs_prop_to_name(prop);
2497
2498	if (os != NULL)
2499		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2500
2501	if (error == ENOENT) {
2502		/* No value set, use the default value */
2503		switch (prop) {
2504		case ZFS_PROP_VERSION:
2505			*value = ZPL_VERSION;
2506			break;
2507		case ZFS_PROP_NORMALIZE:
2508		case ZFS_PROP_UTF8ONLY:
2509			*value = 0;
2510			break;
2511		case ZFS_PROP_CASE:
2512			*value = ZFS_CASE_SENSITIVE;
2513			break;
2514		default:
2515			return (error);
2516		}
2517		error = 0;
2518	}
2519	return (error);
2520}
2521
2522#ifdef _KERNEL
2523void
2524zfsvfs_update_fromname(const char *oldname, const char *newname)
2525{
2526	char tmpbuf[MAXPATHLEN];
2527	struct mount *mp;
2528	char *fromname;
2529	size_t oldlen;
2530
2531	oldlen = strlen(oldname);
2532
2533	mtx_lock(&mountlist_mtx);
2534	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2535		fromname = mp->mnt_stat.f_mntfromname;
2536		if (strcmp(fromname, oldname) == 0) {
2537			(void)strlcpy(fromname, newname,
2538			    sizeof(mp->mnt_stat.f_mntfromname));
2539			continue;
2540		}
2541		if (strncmp(fromname, oldname, oldlen) == 0 &&
2542		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
2543			(void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s",
2544			    newname, fromname + oldlen);
2545			(void)strlcpy(fromname, tmpbuf,
2546			    sizeof(mp->mnt_stat.f_mntfromname));
2547			continue;
2548		}
2549	}
2550	mtx_unlock(&mountlist_mtx);
2551}
2552#endif
2553