zfs_vfsops.c revision 242554
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23229565Smm * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24229565Smm * All rights reserved.
25168404Spjd */
26168404Spjd
27219089Spjd/* Portions Copyright 2010 Robert Milkowski */
28219089Spjd
29168404Spjd#include <sys/types.h>
30168404Spjd#include <sys/param.h>
31168404Spjd#include <sys/systm.h>
32168404Spjd#include <sys/kernel.h>
33168404Spjd#include <sys/sysmacros.h>
34168404Spjd#include <sys/kmem.h>
35168404Spjd#include <sys/acl.h>
36168404Spjd#include <sys/vnode.h>
37168404Spjd#include <sys/vfs.h>
38168404Spjd#include <sys/mntent.h>
39168404Spjd#include <sys/mount.h>
40168404Spjd#include <sys/cmn_err.h>
41168404Spjd#include <sys/zfs_znode.h>
42168404Spjd#include <sys/zfs_dir.h>
43168404Spjd#include <sys/zil.h>
44168404Spjd#include <sys/fs/zfs.h>
45168404Spjd#include <sys/dmu.h>
46168404Spjd#include <sys/dsl_prop.h>
47168404Spjd#include <sys/dsl_dataset.h>
48185029Spjd#include <sys/dsl_deleg.h>
49168404Spjd#include <sys/spa.h>
50168404Spjd#include <sys/zap.h>
51219089Spjd#include <sys/sa.h>
52168404Spjd#include <sys/varargs.h>
53168962Spjd#include <sys/policy.h>
54168404Spjd#include <sys/atomic.h>
55168404Spjd#include <sys/zfs_ioctl.h>
56168404Spjd#include <sys/zfs_ctldir.h>
57185029Spjd#include <sys/zfs_fuid.h>
58168962Spjd#include <sys/sunddi.h>
59168404Spjd#include <sys/dnlc.h>
60185029Spjd#include <sys/dmu_objset.h>
61185029Spjd#include <sys/spa_boot.h>
62219089Spjd#include <sys/sa.h>
63232728Smm#include <sys/jail.h>
64219089Spjd#include "zfs_comutil.h"
65168404Spjd
66168404Spjdstruct mtx zfs_debug_mtx;
67168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
68185029Spjd
69168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
70185029Spjd
71219089Spjdint zfs_super_owner;
72185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
73185029Spjd    "File system owner can perform privileged operation on his file systems");
74185029Spjd
75219089Spjdint zfs_debug_level;
76168713SpjdTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
77168404SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
78168404Spjd    "Debug level");
79168404Spjd
80185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
81185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION;
82185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
83185029Spjd    "ZFS_ACL_VERSION");
84185029Spjdstatic int zfs_version_spa = SPA_VERSION;
85185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
86185029Spjd    "SPA_VERSION");
87185029Spjdstatic int zfs_version_zpl = ZPL_VERSION;
88185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
89185029Spjd    "ZPL_VERSION");
90185029Spjd
91191990Sattiliostatic int zfs_mount(vfs_t *vfsp);
92191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag);
93191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
94191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
95168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
96191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor);
97196982Spjdstatic int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
98196982Spjd    struct ucred **credanonp, int *numsecflavors, int **secflavors);
99222167Srmacklemstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
100168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs);
101168404Spjdstatic void zfs_freevfs(vfs_t *vfsp);
102168404Spjd
103168404Spjdstatic struct vfsops zfs_vfsops = {
104168404Spjd	.vfs_mount =		zfs_mount,
105168404Spjd	.vfs_unmount =		zfs_umount,
106168404Spjd	.vfs_root =		zfs_root,
107168404Spjd	.vfs_statfs =		zfs_statfs,
108168404Spjd	.vfs_vget =		zfs_vget,
109168404Spjd	.vfs_sync =		zfs_sync,
110196982Spjd	.vfs_checkexp =		zfs_checkexp,
111168404Spjd	.vfs_fhtovp =		zfs_fhtovp,
112168404Spjd};
113168404Spjd
114185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
115168404Spjd
116168404Spjd/*
117168404Spjd * We need to keep a count of active fs's.
118168404Spjd * This is necessary to prevent our module
119168404Spjd * from being unloaded after a umount -f
120168404Spjd */
121168404Spjdstatic uint32_t	zfs_active_fs_count = 0;
122168404Spjd
123168404Spjd/*ARGSUSED*/
124168404Spjdstatic int
125191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor)
126168404Spjd{
127168404Spjd
128168404Spjd	/*
129168404Spjd	 * Data integrity is job one.  We don't want a compromised kernel
130168404Spjd	 * writing to the storage pool, so we never sync during panic.
131168404Spjd	 */
132168404Spjd	if (panicstr)
133168404Spjd		return (0);
134168404Spjd
135168404Spjd	if (vfsp != NULL) {
136168404Spjd		/*
137168404Spjd		 * Sync a specific filesystem.
138168404Spjd		 */
139168404Spjd		zfsvfs_t *zfsvfs = vfsp->vfs_data;
140209962Smm		dsl_pool_t *dp;
141168404Spjd		int error;
142168404Spjd
143191990Sattilio		error = vfs_stdsync(vfsp, waitfor);
144168404Spjd		if (error != 0)
145168404Spjd			return (error);
146168404Spjd
147168404Spjd		ZFS_ENTER(zfsvfs);
148209962Smm		dp = dmu_objset_pool(zfsvfs->z_os);
149209962Smm
150209962Smm		/*
151209962Smm		 * If the system is shutting down, then skip any
152209962Smm		 * filesystems which may exist on a suspended pool.
153209962Smm		 */
154209962Smm		if (sys_shutdown && spa_suspended(dp->dp_spa)) {
155209962Smm			ZFS_EXIT(zfsvfs);
156209962Smm			return (0);
157209962Smm		}
158209962Smm
159168404Spjd		if (zfsvfs->z_log != NULL)
160219089Spjd			zil_commit(zfsvfs->z_log, 0);
161219089Spjd
162168404Spjd		ZFS_EXIT(zfsvfs);
163168404Spjd	} else {
164168404Spjd		/*
165168404Spjd		 * Sync all ZFS filesystems.  This is what happens when you
166168404Spjd		 * run sync(1M).  Unlike other filesystems, ZFS honors the
167168404Spjd		 * request by waiting for all pools to commit all dirty data.
168168404Spjd		 */
169168404Spjd		spa_sync_allpools();
170168404Spjd	}
171168404Spjd
172168404Spjd	return (0);
173168404Spjd}
174168404Spjd
175219089Spjd#ifndef __FreeBSD__
176219089Spjdstatic int
177219089Spjdzfs_create_unique_device(dev_t *dev)
178219089Spjd{
179219089Spjd	major_t new_major;
180219089Spjd
181219089Spjd	do {
182219089Spjd		ASSERT3U(zfs_minor, <=, MAXMIN32);
183219089Spjd		minor_t start = zfs_minor;
184219089Spjd		do {
185219089Spjd			mutex_enter(&zfs_dev_mtx);
186219089Spjd			if (zfs_minor >= MAXMIN32) {
187219089Spjd				/*
188219089Spjd				 * If we're still using the real major
189219089Spjd				 * keep out of /dev/zfs and /dev/zvol minor
190219089Spjd				 * number space.  If we're using a getudev()'ed
191219089Spjd				 * major number, we can use all of its minors.
192219089Spjd				 */
193219089Spjd				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
194219089Spjd					zfs_minor = ZFS_MIN_MINOR;
195219089Spjd				else
196219089Spjd					zfs_minor = 0;
197219089Spjd			} else {
198219089Spjd				zfs_minor++;
199219089Spjd			}
200219089Spjd			*dev = makedevice(zfs_major, zfs_minor);
201219089Spjd			mutex_exit(&zfs_dev_mtx);
202219089Spjd		} while (vfs_devismounted(*dev) && zfs_minor != start);
203219089Spjd		if (zfs_minor == start) {
204219089Spjd			/*
205219089Spjd			 * We are using all ~262,000 minor numbers for the
206219089Spjd			 * current major number.  Create a new major number.
207219089Spjd			 */
208219089Spjd			if ((new_major = getudev()) == (major_t)-1) {
209219089Spjd				cmn_err(CE_WARN,
210219089Spjd				    "zfs_mount: Can't get unique major "
211219089Spjd				    "device number.");
212219089Spjd				return (-1);
213219089Spjd			}
214219089Spjd			mutex_enter(&zfs_dev_mtx);
215219089Spjd			zfs_major = new_major;
216219089Spjd			zfs_minor = 0;
217219089Spjd
218219089Spjd			mutex_exit(&zfs_dev_mtx);
219219089Spjd		} else {
220219089Spjd			break;
221219089Spjd		}
222219089Spjd		/* CONSTANTCONDITION */
223219089Spjd	} while (1);
224219089Spjd
225219089Spjd	return (0);
226219089Spjd}
227219089Spjd#endif	/* !__FreeBSD__ */
228219089Spjd
229168404Spjdstatic void
230168404Spjdatime_changed_cb(void *arg, uint64_t newval)
231168404Spjd{
232168404Spjd	zfsvfs_t *zfsvfs = arg;
233168404Spjd
234168404Spjd	if (newval == TRUE) {
235168404Spjd		zfsvfs->z_atime = TRUE;
236168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
237168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
238168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
239168404Spjd	} else {
240168404Spjd		zfsvfs->z_atime = FALSE;
241168404Spjd		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
242168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
243168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
244168404Spjd	}
245168404Spjd}
246168404Spjd
247168404Spjdstatic void
248168404Spjdxattr_changed_cb(void *arg, uint64_t newval)
249168404Spjd{
250168404Spjd	zfsvfs_t *zfsvfs = arg;
251168404Spjd
252168404Spjd	if (newval == TRUE) {
253168404Spjd		/* XXX locking on vfs_flag? */
254168404Spjd#ifdef TODO
255168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
256168404Spjd#endif
257168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
258168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
259168404Spjd	} else {
260168404Spjd		/* XXX locking on vfs_flag? */
261168404Spjd#ifdef TODO
262168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
263168404Spjd#endif
264168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
265168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
266168404Spjd	}
267168404Spjd}
268168404Spjd
269168404Spjdstatic void
270168404Spjdblksz_changed_cb(void *arg, uint64_t newval)
271168404Spjd{
272168404Spjd	zfsvfs_t *zfsvfs = arg;
273168404Spjd
274168404Spjd	if (newval < SPA_MINBLOCKSIZE ||
275168404Spjd	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
276168404Spjd		newval = SPA_MAXBLOCKSIZE;
277168404Spjd
278168404Spjd	zfsvfs->z_max_blksz = newval;
279204101Spjd	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
280168404Spjd}
281168404Spjd
282168404Spjdstatic void
283168404Spjdreadonly_changed_cb(void *arg, uint64_t newval)
284168404Spjd{
285168404Spjd	zfsvfs_t *zfsvfs = arg;
286168404Spjd
287168404Spjd	if (newval) {
288168404Spjd		/* XXX locking on vfs_flag? */
289168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
290168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
291168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
292168404Spjd	} else {
293168404Spjd		/* XXX locking on vfs_flag? */
294168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
295168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
296168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
297168404Spjd	}
298168404Spjd}
299168404Spjd
300168404Spjdstatic void
301168404Spjdsetuid_changed_cb(void *arg, uint64_t newval)
302168404Spjd{
303168404Spjd	zfsvfs_t *zfsvfs = arg;
304168404Spjd
305168404Spjd	if (newval == FALSE) {
306168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
307168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
308168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
309168404Spjd	} else {
310168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
311168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
312168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
313168404Spjd	}
314168404Spjd}
315168404Spjd
316168404Spjdstatic void
317168404Spjdexec_changed_cb(void *arg, uint64_t newval)
318168404Spjd{
319168404Spjd	zfsvfs_t *zfsvfs = arg;
320168404Spjd
321168404Spjd	if (newval == FALSE) {
322168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
323168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
324168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
325168404Spjd	} else {
326168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
327168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
328168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
329168404Spjd	}
330168404Spjd}
331168404Spjd
332185029Spjd/*
333185029Spjd * The nbmand mount option can be changed at mount time.
334185029Spjd * We can't allow it to be toggled on live file systems or incorrect
335185029Spjd * behavior may be seen from cifs clients
336185029Spjd *
337185029Spjd * This property isn't registered via dsl_prop_register(), but this callback
338185029Spjd * will be called when a file system is first mounted
339185029Spjd */
340168404Spjdstatic void
341185029Spjdnbmand_changed_cb(void *arg, uint64_t newval)
342185029Spjd{
343185029Spjd	zfsvfs_t *zfsvfs = arg;
344185029Spjd	if (newval == FALSE) {
345185029Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
346185029Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
347185029Spjd	} else {
348185029Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
349185029Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
350185029Spjd	}
351185029Spjd}
352185029Spjd
353185029Spjdstatic void
354168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval)
355168404Spjd{
356168404Spjd	zfsvfs_t *zfsvfs = arg;
357168404Spjd
358168404Spjd	zfsvfs->z_show_ctldir = newval;
359168404Spjd}
360168404Spjd
361168404Spjdstatic void
362185029Spjdvscan_changed_cb(void *arg, uint64_t newval)
363185029Spjd{
364185029Spjd	zfsvfs_t *zfsvfs = arg;
365185029Spjd
366185029Spjd	zfsvfs->z_vscan = newval;
367185029Spjd}
368185029Spjd
369185029Spjdstatic void
370224174Smmacl_mode_changed_cb(void *arg, uint64_t newval)
371224174Smm{
372224174Smm	zfsvfs_t *zfsvfs = arg;
373224174Smm
374224174Smm	zfsvfs->z_acl_mode = newval;
375224174Smm}
376224174Smm
377224174Smmstatic void
378168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval)
379168404Spjd{
380168404Spjd	zfsvfs_t *zfsvfs = arg;
381168404Spjd
382168404Spjd	zfsvfs->z_acl_inherit = newval;
383168404Spjd}
384168404Spjd
385168404Spjdstatic int
386168404Spjdzfs_register_callbacks(vfs_t *vfsp)
387168404Spjd{
388168404Spjd	struct dsl_dataset *ds = NULL;
389168404Spjd	objset_t *os = NULL;
390168404Spjd	zfsvfs_t *zfsvfs = NULL;
391185029Spjd	uint64_t nbmand;
392219089Spjd	int readonly, do_readonly = B_FALSE;
393219089Spjd	int setuid, do_setuid = B_FALSE;
394219089Spjd	int exec, do_exec = B_FALSE;
395219089Spjd	int xattr, do_xattr = B_FALSE;
396219089Spjd	int atime, do_atime = B_FALSE;
397168404Spjd	int error = 0;
398168404Spjd
399168404Spjd	ASSERT(vfsp);
400168404Spjd	zfsvfs = vfsp->vfs_data;
401168404Spjd	ASSERT(zfsvfs);
402168404Spjd	os = zfsvfs->z_os;
403168404Spjd
404168404Spjd	/*
405196965Spjd	 * This function can be called for a snapshot when we update snapshot's
406196965Spjd	 * mount point, which isn't really supported.
407196965Spjd	 */
408196965Spjd	if (dmu_objset_is_snapshot(os))
409196965Spjd		return (EOPNOTSUPP);
410196965Spjd
411196965Spjd	/*
412168404Spjd	 * The act of registering our callbacks will destroy any mount
413168404Spjd	 * options we may have.  In order to enable temporary overrides
414168404Spjd	 * of mount options, we stash away the current values and
415168404Spjd	 * restore them after we register the callbacks.
416168404Spjd	 */
417219089Spjd	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
418219089Spjd	    !spa_writeable(dmu_objset_spa(os))) {
419168404Spjd		readonly = B_TRUE;
420168404Spjd		do_readonly = B_TRUE;
421168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
422168404Spjd		readonly = B_FALSE;
423168404Spjd		do_readonly = B_TRUE;
424168404Spjd	}
425168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
426168404Spjd		setuid = B_FALSE;
427168404Spjd		do_setuid = B_TRUE;
428168404Spjd	} else {
429168404Spjd		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
430168404Spjd			setuid = B_FALSE;
431168404Spjd			do_setuid = B_TRUE;
432168404Spjd		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
433168404Spjd			setuid = B_TRUE;
434168404Spjd			do_setuid = B_TRUE;
435168404Spjd		}
436168404Spjd	}
437168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
438168404Spjd		exec = B_FALSE;
439168404Spjd		do_exec = B_TRUE;
440168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
441168404Spjd		exec = B_TRUE;
442168404Spjd		do_exec = B_TRUE;
443168404Spjd	}
444168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
445168404Spjd		xattr = B_FALSE;
446168404Spjd		do_xattr = B_TRUE;
447168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
448168404Spjd		xattr = B_TRUE;
449168404Spjd		do_xattr = B_TRUE;
450168404Spjd	}
451185029Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
452185029Spjd		atime = B_FALSE;
453185029Spjd		do_atime = B_TRUE;
454185029Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
455185029Spjd		atime = B_TRUE;
456185029Spjd		do_atime = B_TRUE;
457185029Spjd	}
458168404Spjd
459168404Spjd	/*
460185029Spjd	 * nbmand is a special property.  It can only be changed at
461185029Spjd	 * mount time.
462185029Spjd	 *
463185029Spjd	 * This is weird, but it is documented to only be changeable
464185029Spjd	 * at mount time.
465185029Spjd	 */
466185029Spjd	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
467185029Spjd		nbmand = B_FALSE;
468185029Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
469185029Spjd		nbmand = B_TRUE;
470185029Spjd	} else {
471185029Spjd		char osname[MAXNAMELEN];
472185029Spjd
473185029Spjd		dmu_objset_name(os, osname);
474185029Spjd		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
475185029Spjd		    NULL)) {
476185029Spjd			return (error);
477185029Spjd		}
478185029Spjd	}
479185029Spjd
480185029Spjd	/*
481168404Spjd	 * Register property callbacks.
482168404Spjd	 *
483168404Spjd	 * It would probably be fine to just check for i/o error from
484168404Spjd	 * the first prop_register(), but I guess I like to go
485168404Spjd	 * overboard...
486168404Spjd	 */
487168404Spjd	ds = dmu_objset_ds(os);
488168404Spjd	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
489168404Spjd	error = error ? error : dsl_prop_register(ds,
490168404Spjd	    "xattr", xattr_changed_cb, zfsvfs);
491168404Spjd	error = error ? error : dsl_prop_register(ds,
492168404Spjd	    "recordsize", blksz_changed_cb, zfsvfs);
493168404Spjd	error = error ? error : dsl_prop_register(ds,
494168404Spjd	    "readonly", readonly_changed_cb, zfsvfs);
495168404Spjd	error = error ? error : dsl_prop_register(ds,
496168404Spjd	    "setuid", setuid_changed_cb, zfsvfs);
497168404Spjd	error = error ? error : dsl_prop_register(ds,
498168404Spjd	    "exec", exec_changed_cb, zfsvfs);
499168404Spjd	error = error ? error : dsl_prop_register(ds,
500168404Spjd	    "snapdir", snapdir_changed_cb, zfsvfs);
501168404Spjd	error = error ? error : dsl_prop_register(ds,
502224174Smm	    "aclmode", acl_mode_changed_cb, zfsvfs);
503224174Smm	error = error ? error : dsl_prop_register(ds,
504168404Spjd	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
505185029Spjd	error = error ? error : dsl_prop_register(ds,
506185029Spjd	    "vscan", vscan_changed_cb, zfsvfs);
507168404Spjd	if (error)
508168404Spjd		goto unregister;
509168404Spjd
510168404Spjd	/*
511168404Spjd	 * Invoke our callbacks to restore temporary mount options.
512168404Spjd	 */
513168404Spjd	if (do_readonly)
514168404Spjd		readonly_changed_cb(zfsvfs, readonly);
515168404Spjd	if (do_setuid)
516168404Spjd		setuid_changed_cb(zfsvfs, setuid);
517168404Spjd	if (do_exec)
518168404Spjd		exec_changed_cb(zfsvfs, exec);
519168404Spjd	if (do_xattr)
520168404Spjd		xattr_changed_cb(zfsvfs, xattr);
521185029Spjd	if (do_atime)
522185029Spjd		atime_changed_cb(zfsvfs, atime);
523168404Spjd
524185029Spjd	nbmand_changed_cb(zfsvfs, nbmand);
525185029Spjd
526168404Spjd	return (0);
527168404Spjd
528168404Spjdunregister:
529168404Spjd	/*
530168404Spjd	 * We may attempt to unregister some callbacks that are not
531168404Spjd	 * registered, but this is OK; it will simply return ENOMSG,
532168404Spjd	 * which we will ignore.
533168404Spjd	 */
534168404Spjd	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
535168404Spjd	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
536168404Spjd	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
537168404Spjd	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
538168404Spjd	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
539168404Spjd	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
540168404Spjd	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
541224174Smm	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
542168404Spjd	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
543168404Spjd	    zfsvfs);
544185029Spjd	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
545168404Spjd	return (error);
546168404Spjd
547168404Spjd}
548168404Spjd
549219089Spjdstatic int
550219089Spjdzfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
551219089Spjd    uint64_t *userp, uint64_t *groupp)
552209962Smm{
553219089Spjd	znode_phys_t *znp = data;
554219089Spjd	int error = 0;
555209962Smm
556219089Spjd	/*
557219089Spjd	 * Is it a valid type of object to track?
558219089Spjd	 */
559219089Spjd	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
560219089Spjd		return (ENOENT);
561209962Smm
562219089Spjd	/*
563219089Spjd	 * If we have a NULL data pointer
564219089Spjd	 * then assume the id's aren't changing and
565219089Spjd	 * return EEXIST to the dmu to let it know to
566219089Spjd	 * use the same ids
567219089Spjd	 */
568219089Spjd	if (data == NULL)
569219089Spjd		return (EEXIST);
570209962Smm
571219089Spjd	if (bonustype == DMU_OT_ZNODE) {
572219089Spjd		*userp = znp->zp_uid;
573219089Spjd		*groupp = znp->zp_gid;
574219089Spjd	} else {
575219089Spjd		int hdrsize;
576209962Smm
577219089Spjd		ASSERT(bonustype == DMU_OT_SA);
578219089Spjd		hdrsize = sa_hdrsize(data);
579209962Smm
580219089Spjd		if (hdrsize != 0) {
581219089Spjd			*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
582219089Spjd			    SA_UID_OFFSET));
583219089Spjd			*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
584219089Spjd			    SA_GID_OFFSET));
585219089Spjd		} else {
586219089Spjd			/*
587219089Spjd			 * This should only happen for newly created
588219089Spjd			 * files that haven't had the znode data filled
589219089Spjd			 * in yet.
590219089Spjd			 */
591219089Spjd			*userp = 0;
592219089Spjd			*groupp = 0;
593219089Spjd		}
594209962Smm	}
595219089Spjd	return (error);
596209962Smm}
597209962Smm
598209962Smmstatic void
599209962Smmfuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
600209962Smm    char *domainbuf, int buflen, uid_t *ridp)
601209962Smm{
602209962Smm	uint64_t fuid;
603209962Smm	const char *domain;
604209962Smm
605209962Smm	fuid = strtonum(fuidstr, NULL);
606209962Smm
607209962Smm	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
608209962Smm	if (domain)
609209962Smm		(void) strlcpy(domainbuf, domain, buflen);
610209962Smm	else
611209962Smm		domainbuf[0] = '\0';
612209962Smm	*ridp = FUID_RID(fuid);
613209962Smm}
614209962Smm
615209962Smmstatic uint64_t
616209962Smmzfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
617209962Smm{
618209962Smm	switch (type) {
619209962Smm	case ZFS_PROP_USERUSED:
620209962Smm		return (DMU_USERUSED_OBJECT);
621209962Smm	case ZFS_PROP_GROUPUSED:
622209962Smm		return (DMU_GROUPUSED_OBJECT);
623209962Smm	case ZFS_PROP_USERQUOTA:
624209962Smm		return (zfsvfs->z_userquota_obj);
625209962Smm	case ZFS_PROP_GROUPQUOTA:
626209962Smm		return (zfsvfs->z_groupquota_obj);
627209962Smm	}
628209962Smm	return (0);
629209962Smm}
630209962Smm
631209962Smmint
632209962Smmzfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
633209962Smm    uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
634209962Smm{
635209962Smm	int error;
636209962Smm	zap_cursor_t zc;
637209962Smm	zap_attribute_t za;
638209962Smm	zfs_useracct_t *buf = vbuf;
639209962Smm	uint64_t obj;
640209962Smm
641209962Smm	if (!dmu_objset_userspace_present(zfsvfs->z_os))
642209962Smm		return (ENOTSUP);
643209962Smm
644209962Smm	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
645209962Smm	if (obj == 0) {
646209962Smm		*bufsizep = 0;
647209962Smm		return (0);
648209962Smm	}
649209962Smm
650209962Smm	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
651209962Smm	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
652209962Smm	    zap_cursor_advance(&zc)) {
653209962Smm		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
654209962Smm		    *bufsizep)
655209962Smm			break;
656209962Smm
657209962Smm		fuidstr_to_sid(zfsvfs, za.za_name,
658209962Smm		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
659209962Smm
660209962Smm		buf->zu_space = za.za_first_integer;
661209962Smm		buf++;
662209962Smm	}
663209962Smm	if (error == ENOENT)
664209962Smm		error = 0;
665209962Smm
666209962Smm	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
667209962Smm	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
668209962Smm	*cookiep = zap_cursor_serialize(&zc);
669209962Smm	zap_cursor_fini(&zc);
670209962Smm	return (error);
671209962Smm}
672209962Smm
673209962Smm/*
674209962Smm * buf must be big enough (eg, 32 bytes)
675209962Smm */
676168404Spjdstatic int
677209962Smmid_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
678209962Smm    char *buf, boolean_t addok)
679209962Smm{
680209962Smm	uint64_t fuid;
681209962Smm	int domainid = 0;
682209962Smm
683209962Smm	if (domain && domain[0]) {
684209962Smm		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
685209962Smm		if (domainid == -1)
686209962Smm			return (ENOENT);
687209962Smm	}
688209962Smm	fuid = FUID_ENCODE(domainid, rid);
689209962Smm	(void) sprintf(buf, "%llx", (longlong_t)fuid);
690209962Smm	return (0);
691209962Smm}
692209962Smm
693209962Smmint
694209962Smmzfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
695209962Smm    const char *domain, uint64_t rid, uint64_t *valp)
696209962Smm{
697209962Smm	char buf[32];
698209962Smm	int err;
699209962Smm	uint64_t obj;
700209962Smm
701209962Smm	*valp = 0;
702209962Smm
703209962Smm	if (!dmu_objset_userspace_present(zfsvfs->z_os))
704209962Smm		return (ENOTSUP);
705209962Smm
706209962Smm	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
707209962Smm	if (obj == 0)
708209962Smm		return (0);
709209962Smm
710209962Smm	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
711209962Smm	if (err)
712209962Smm		return (err);
713209962Smm
714209962Smm	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
715209962Smm	if (err == ENOENT)
716209962Smm		err = 0;
717209962Smm	return (err);
718209962Smm}
719209962Smm
720209962Smmint
721209962Smmzfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
722209962Smm    const char *domain, uint64_t rid, uint64_t quota)
723209962Smm{
724209962Smm	char buf[32];
725209962Smm	int err;
726209962Smm	dmu_tx_t *tx;
727209962Smm	uint64_t *objp;
728209962Smm	boolean_t fuid_dirtied;
729209962Smm
730209962Smm	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
731209962Smm		return (EINVAL);
732209962Smm
733209962Smm	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
734209962Smm		return (ENOTSUP);
735209962Smm
736209962Smm	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
737209962Smm	    &zfsvfs->z_groupquota_obj;
738209962Smm
739209962Smm	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
740209962Smm	if (err)
741209962Smm		return (err);
742209962Smm	fuid_dirtied = zfsvfs->z_fuid_dirty;
743209962Smm
744209962Smm	tx = dmu_tx_create(zfsvfs->z_os);
745209962Smm	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
746209962Smm	if (*objp == 0) {
747209962Smm		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
748209962Smm		    zfs_userquota_prop_prefixes[type]);
749209962Smm	}
750209962Smm	if (fuid_dirtied)
751209962Smm		zfs_fuid_txhold(zfsvfs, tx);
752209962Smm	err = dmu_tx_assign(tx, TXG_WAIT);
753209962Smm	if (err) {
754209962Smm		dmu_tx_abort(tx);
755209962Smm		return (err);
756209962Smm	}
757209962Smm
758209962Smm	mutex_enter(&zfsvfs->z_lock);
759209962Smm	if (*objp == 0) {
760209962Smm		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
761209962Smm		    DMU_OT_NONE, 0, tx);
762209962Smm		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
763209962Smm		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
764209962Smm	}
765209962Smm	mutex_exit(&zfsvfs->z_lock);
766209962Smm
767209962Smm	if (quota == 0) {
768209962Smm		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
769209962Smm		if (err == ENOENT)
770209962Smm			err = 0;
771209962Smm	} else {
772209962Smm		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
773209962Smm	}
774209962Smm	ASSERT(err == 0);
775209962Smm	if (fuid_dirtied)
776209962Smm		zfs_fuid_sync(zfsvfs, tx);
777209962Smm	dmu_tx_commit(tx);
778209962Smm	return (err);
779209962Smm}
780209962Smm
781209962Smmboolean_t
782219089Spjdzfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
783209962Smm{
784209962Smm	char buf[32];
785209962Smm	uint64_t used, quota, usedobj, quotaobj;
786209962Smm	int err;
787209962Smm
788209962Smm	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
789209962Smm	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
790209962Smm
791209962Smm	if (quotaobj == 0 || zfsvfs->z_replay)
792209962Smm		return (B_FALSE);
793209962Smm
794209962Smm	(void) sprintf(buf, "%llx", (longlong_t)fuid);
795209962Smm	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
796209962Smm	if (err != 0)
797209962Smm		return (B_FALSE);
798209962Smm
799209962Smm	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
800209962Smm	if (err != 0)
801209962Smm		return (B_FALSE);
802209962Smm	return (used >= quota);
803209962Smm}
804209962Smm
805219089Spjdboolean_t
806219089Spjdzfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
807219089Spjd{
808219089Spjd	uint64_t fuid;
809219089Spjd	uint64_t quotaobj;
810219089Spjd
811219089Spjd	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
812219089Spjd
813219089Spjd	fuid = isgroup ? zp->z_gid : zp->z_uid;
814219089Spjd
815219089Spjd	if (quotaobj == 0 || zfsvfs->z_replay)
816219089Spjd		return (B_FALSE);
817219089Spjd
818219089Spjd	return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
819219089Spjd}
820219089Spjd
821209962Smmint
822219089Spjdzfsvfs_create(const char *osname, zfsvfs_t **zfvp)
823209962Smm{
824209962Smm	objset_t *os;
825209962Smm	zfsvfs_t *zfsvfs;
826209962Smm	uint64_t zval;
827209962Smm	int i, error;
828219089Spjd	uint64_t sa_obj;
829209962Smm
830219089Spjd	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
831219089Spjd
832219089Spjd	/*
833219089Spjd	 * We claim to always be readonly so we can open snapshots;
834219089Spjd	 * other ZPL code will prevent us from writing to snapshots.
835219089Spjd	 */
836219089Spjd	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
837219089Spjd	if (error) {
838219089Spjd		kmem_free(zfsvfs, sizeof (zfsvfs_t));
839209962Smm		return (error);
840209962Smm	}
841209962Smm
842209962Smm	/*
843209962Smm	 * Initialize the zfs-specific filesystem structure.
844209962Smm	 * Should probably make this a kmem cache, shuffle fields,
845209962Smm	 * and just bzero up to z_hold_mtx[].
846209962Smm	 */
847209962Smm	zfsvfs->z_vfs = NULL;
848209962Smm	zfsvfs->z_parent = zfsvfs;
849209962Smm	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
850209962Smm	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
851209962Smm	zfsvfs->z_os = os;
852209962Smm
853209962Smm	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
854209962Smm	if (error) {
855209962Smm		goto out;
856219089Spjd	} else if (zfsvfs->z_version >
857219089Spjd	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
858219089Spjd		(void) printf("Can't mount a version %lld file system "
859219089Spjd		    "on a version %lld pool\n. Pool must be upgraded to mount "
860219089Spjd		    "this file system.", (u_longlong_t)zfsvfs->z_version,
861219089Spjd		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
862209962Smm		error = ENOTSUP;
863209962Smm		goto out;
864209962Smm	}
865209962Smm	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
866209962Smm		goto out;
867209962Smm	zfsvfs->z_norm = (int)zval;
868209962Smm
869209962Smm	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
870209962Smm		goto out;
871209962Smm	zfsvfs->z_utf8 = (zval != 0);
872209962Smm
873209962Smm	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
874209962Smm		goto out;
875209962Smm	zfsvfs->z_case = (uint_t)zval;
876209962Smm
877209962Smm	/*
878209962Smm	 * Fold case on file systems that are always or sometimes case
879209962Smm	 * insensitive.
880209962Smm	 */
881209962Smm	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
882209962Smm	    zfsvfs->z_case == ZFS_CASE_MIXED)
883209962Smm		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
884209962Smm
885209962Smm	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
886219089Spjd	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
887209962Smm
888219089Spjd	if (zfsvfs->z_use_sa) {
889219089Spjd		/* should either have both of these objects or none */
890219089Spjd		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
891219089Spjd		    &sa_obj);
892219089Spjd		if (error)
893219089Spjd			return (error);
894219089Spjd	} else {
895219089Spjd		/*
896219089Spjd		 * Pre SA versions file systems should never touch
897219089Spjd		 * either the attribute registration or layout objects.
898219089Spjd		 */
899219089Spjd		sa_obj = 0;
900219089Spjd	}
901219089Spjd
902219089Spjd	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
903219089Spjd	    &zfsvfs->z_attr_table);
904219089Spjd	if (error)
905219089Spjd		goto out;
906219089Spjd
907219089Spjd	if (zfsvfs->z_version >= ZPL_VERSION_SA)
908219089Spjd		sa_register_update_callback(os, zfs_sa_upgrade);
909219089Spjd
910209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
911209962Smm	    &zfsvfs->z_root);
912209962Smm	if (error)
913209962Smm		goto out;
914209962Smm	ASSERT(zfsvfs->z_root != 0);
915209962Smm
916209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
917209962Smm	    &zfsvfs->z_unlinkedobj);
918209962Smm	if (error)
919209962Smm		goto out;
920209962Smm
921209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ,
922209962Smm	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
923209962Smm	    8, 1, &zfsvfs->z_userquota_obj);
924209962Smm	if (error && error != ENOENT)
925209962Smm		goto out;
926209962Smm
927209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ,
928209962Smm	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
929209962Smm	    8, 1, &zfsvfs->z_groupquota_obj);
930209962Smm	if (error && error != ENOENT)
931209962Smm		goto out;
932209962Smm
933209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
934209962Smm	    &zfsvfs->z_fuid_obj);
935209962Smm	if (error && error != ENOENT)
936209962Smm		goto out;
937209962Smm
938209962Smm	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
939209962Smm	    &zfsvfs->z_shares_dir);
940209962Smm	if (error && error != ENOENT)
941209962Smm		goto out;
942209962Smm
943209962Smm	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
944209962Smm	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
945209962Smm	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
946209962Smm	    offsetof(znode_t, z_link_node));
947209962Smm	rrw_init(&zfsvfs->z_teardown_lock);
948209962Smm	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
949209962Smm	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
950209962Smm	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
951209962Smm		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
952209962Smm
953219089Spjd	*zfvp = zfsvfs;
954209962Smm	return (0);
955209962Smm
956209962Smmout:
957219089Spjd	dmu_objset_disown(os, zfsvfs);
958219089Spjd	*zfvp = NULL;
959209962Smm	kmem_free(zfsvfs, sizeof (zfsvfs_t));
960209962Smm	return (error);
961209962Smm}
962209962Smm
963209962Smmstatic int
964185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
965168404Spjd{
966185029Spjd	int error;
967185029Spjd
968185029Spjd	error = zfs_register_callbacks(zfsvfs->z_vfs);
969185029Spjd	if (error)
970185029Spjd		return (error);
971185029Spjd
972185029Spjd	/*
973185029Spjd	 * Set the objset user_ptr to track its zfsvfs.
974185029Spjd	 */
975219089Spjd	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
976185029Spjd	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
977219089Spjd	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
978185029Spjd
979208689Smm	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
980208689Smm
981185029Spjd	/*
982185029Spjd	 * If we are not mounting (ie: online recv), then we don't
983185029Spjd	 * have to worry about replaying the log as we blocked all
984185029Spjd	 * operations out since we closed the ZIL.
985185029Spjd	 */
986185029Spjd	if (mounting) {
987185029Spjd		boolean_t readonly;
988185029Spjd
989185029Spjd		/*
990185029Spjd		 * During replay we remove the read only flag to
991185029Spjd		 * allow replays to succeed.
992185029Spjd		 */
993185029Spjd		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
994208689Smm		if (readonly != 0)
995208689Smm			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
996208689Smm		else
997208689Smm			zfs_unlinked_drain(zfsvfs);
998185029Spjd
999219089Spjd		/*
1000219089Spjd		 * Parse and replay the intent log.
1001219089Spjd		 *
1002219089Spjd		 * Because of ziltest, this must be done after
1003219089Spjd		 * zfs_unlinked_drain().  (Further note: ziltest
1004219089Spjd		 * doesn't use readonly mounts, where
1005219089Spjd		 * zfs_unlinked_drain() isn't called.)  This is because
1006219089Spjd		 * ziltest causes spa_sync() to think it's committed,
1007219089Spjd		 * but actually it is not, so the intent log contains
1008219089Spjd		 * many txg's worth of changes.
1009219089Spjd		 *
1010219089Spjd		 * In particular, if object N is in the unlinked set in
1011219089Spjd		 * the last txg to actually sync, then it could be
1012219089Spjd		 * actually freed in a later txg and then reallocated
1013219089Spjd		 * in a yet later txg.  This would write a "create
1014219089Spjd		 * object N" record to the intent log.  Normally, this
1015219089Spjd		 * would be fine because the spa_sync() would have
1016219089Spjd		 * written out the fact that object N is free, before
1017219089Spjd		 * we could write the "create object N" intent log
1018219089Spjd		 * record.
1019219089Spjd		 *
1020219089Spjd		 * But when we are in ziltest mode, we advance the "open
1021219089Spjd		 * txg" without actually spa_sync()-ing the changes to
1022219089Spjd		 * disk.  So we would see that object N is still
1023219089Spjd		 * allocated and in the unlinked set, and there is an
1024219089Spjd		 * intent log record saying to allocate it.
1025219089Spjd		 */
1026219089Spjd		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1027219089Spjd			if (zil_replay_disable) {
1028219089Spjd				zil_destroy(zfsvfs->z_log, B_FALSE);
1029219089Spjd			} else {
1030219089Spjd				zfsvfs->z_replay = B_TRUE;
1031219089Spjd				zil_replay(zfsvfs->z_os, zfsvfs,
1032219089Spjd				    zfs_replay_vector);
1033219089Spjd				zfsvfs->z_replay = B_FALSE;
1034219089Spjd			}
1035208689Smm		}
1036185029Spjd		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
1037185029Spjd	}
1038185029Spjd
1039185029Spjd	return (0);
1040185029Spjd}
1041185029Spjd
1042210470Smmextern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1043210470Smm
1044209962Smmvoid
1045209962Smmzfsvfs_free(zfsvfs_t *zfsvfs)
1046185029Spjd{
1047209962Smm	int i;
1048209962Smm
1049210470Smm	/*
1050210470Smm	 * This is a barrier to prevent the filesystem from going away in
1051210470Smm	 * zfs_znode_move() until we can safely ensure that the filesystem is
1052210470Smm	 * not unmounted. We consider the filesystem valid before the barrier
1053210470Smm	 * and invalid after the barrier.
1054210470Smm	 */
1055210470Smm	rw_enter(&zfsvfs_lock, RW_READER);
1056210470Smm	rw_exit(&zfsvfs_lock);
1057210470Smm
1058209962Smm	zfs_fuid_destroy(zfsvfs);
1059209962Smm
1060185029Spjd	mutex_destroy(&zfsvfs->z_znodes_lock);
1061209962Smm	mutex_destroy(&zfsvfs->z_lock);
1062185029Spjd	list_destroy(&zfsvfs->z_all_znodes);
1063185029Spjd	rrw_destroy(&zfsvfs->z_teardown_lock);
1064185029Spjd	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1065185029Spjd	rw_destroy(&zfsvfs->z_fuid_lock);
1066209962Smm	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1067209962Smm		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1068185029Spjd	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1069185029Spjd}
1070185029Spjd
1071209962Smmstatic void
1072209962Smmzfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1073209962Smm{
1074209962Smm	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1075219089Spjd	if (zfsvfs->z_vfs) {
1076219089Spjd		if (zfsvfs->z_use_fuids) {
1077219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1078219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1079219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1080219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1081219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1082219089Spjd			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1083219089Spjd		} else {
1084219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1085219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1086219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1087219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1088219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1089219089Spjd			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1090219089Spjd		}
1091209962Smm	}
1092219089Spjd	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1093209962Smm}
1094209962Smm
1095185029Spjdstatic int
1096185029Spjdzfs_domount(vfs_t *vfsp, char *osname)
1097185029Spjd{
1098209962Smm	uint64_t recordsize, fsid_guid;
1099168404Spjd	int error = 0;
1100168404Spjd	zfsvfs_t *zfsvfs;
1101209962Smm	vnode_t *vp;
1102168404Spjd
1103168404Spjd	ASSERT(vfsp);
1104168404Spjd	ASSERT(osname);
1105168404Spjd
1106219089Spjd	error = zfsvfs_create(osname, &zfsvfs);
1107209962Smm	if (error)
1108209962Smm		return (error);
1109168404Spjd	zfsvfs->z_vfs = vfsp;
1110168404Spjd
1111168404Spjd	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1112168404Spjd	    NULL))
1113168404Spjd		goto out;
1114204101Spjd	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1115204101Spjd	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1116168404Spjd
1117168404Spjd	vfsp->vfs_data = zfsvfs;
1118218386Strasz	vfsp->mnt_flag |= MNT_LOCAL;
1119168404Spjd	vfsp->mnt_kern_flag |= MNTK_MPSAFE;
1120168404Spjd	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1121193440Sps	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
1122168404Spjd
1123209962Smm	/*
1124209962Smm	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1125209962Smm	 * separates our fsid from any other filesystem types, and a
1126209962Smm	 * 56-bit objset unique ID.  The objset unique ID is unique to
1127209962Smm	 * all objsets open on this system, provided by unique_create().
1128209962Smm	 * The 8-bit fs type must be put in the low bits of fsid[1]
1129209962Smm	 * because that's where other Solaris filesystems put it.
1130209962Smm	 */
1131209962Smm	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1132209962Smm	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1133209962Smm	vfsp->vfs_fsid.val[0] = fsid_guid;
1134209962Smm	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1135209962Smm	    vfsp->mnt_vfc->vfc_typenum & 0xFF;
1136168404Spjd
1137185029Spjd	/*
1138185029Spjd	 * Set features for file system.
1139185029Spjd	 */
1140209962Smm	zfs_set_fuid_feature(zfsvfs);
1141185029Spjd	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1142185029Spjd		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1143185029Spjd		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1144185029Spjd		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1145185029Spjd	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1146185029Spjd		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1147185029Spjd		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1148185029Spjd	}
1149219089Spjd	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1150185029Spjd
1151168404Spjd	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1152185029Spjd		uint64_t pval;
1153168404Spjd
1154168404Spjd		atime_changed_cb(zfsvfs, B_FALSE);
1155168404Spjd		readonly_changed_cb(zfsvfs, B_TRUE);
1156185029Spjd		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1157168404Spjd			goto out;
1158185029Spjd		xattr_changed_cb(zfsvfs, pval);
1159168404Spjd		zfsvfs->z_issnap = B_TRUE;
1160219089Spjd		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1161209962Smm
1162219089Spjd		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1163209962Smm		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1164219089Spjd		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1165168404Spjd	} else {
1166185029Spjd		error = zfsvfs_setup(zfsvfs, B_TRUE);
1167168404Spjd	}
1168168404Spjd
1169168404Spjd	vfs_mountedfrom(vfsp, osname);
1170209962Smm	/* Grab extra reference. */
1171209962Smm	VERIFY(VFS_ROOT(vfsp, LK_EXCLUSIVE, &vp) == 0);
1172209962Smm	VOP_UNLOCK(vp, 0);
1173168404Spjd
1174168404Spjd	if (!zfsvfs->z_issnap)
1175168404Spjd		zfsctl_create(zfsvfs);
1176168404Spjdout:
1177168404Spjd	if (error) {
1178219089Spjd		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1179209962Smm		zfsvfs_free(zfsvfs);
1180168404Spjd	} else {
1181168404Spjd		atomic_add_32(&zfs_active_fs_count, 1);
1182168404Spjd	}
1183168404Spjd
1184168404Spjd	return (error);
1185168404Spjd}
1186168404Spjd
1187168404Spjdvoid
1188168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1189168404Spjd{
1190168404Spjd	objset_t *os = zfsvfs->z_os;
1191168404Spjd	struct dsl_dataset *ds;
1192168404Spjd
1193168404Spjd	/*
1194168404Spjd	 * Unregister properties.
1195168404Spjd	 */
1196168404Spjd	if (!dmu_objset_is_snapshot(os)) {
1197168404Spjd		ds = dmu_objset_ds(os);
1198168404Spjd		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
1199168404Spjd		    zfsvfs) == 0);
1200168404Spjd
1201168404Spjd		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
1202168404Spjd		    zfsvfs) == 0);
1203168404Spjd
1204168404Spjd		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
1205168404Spjd		    zfsvfs) == 0);
1206168404Spjd
1207168404Spjd		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
1208168404Spjd		    zfsvfs) == 0);
1209168404Spjd
1210168404Spjd		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
1211168404Spjd		    zfsvfs) == 0);
1212168404Spjd
1213168404Spjd		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
1214168404Spjd		    zfsvfs) == 0);
1215168404Spjd
1216168404Spjd		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
1217168404Spjd		    zfsvfs) == 0);
1218168404Spjd
1219224174Smm		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
1220224174Smm		    zfsvfs) == 0);
1221224174Smm
1222168404Spjd		VERIFY(dsl_prop_unregister(ds, "aclinherit",
1223168404Spjd		    acl_inherit_changed_cb, zfsvfs) == 0);
1224185029Spjd
1225185029Spjd		VERIFY(dsl_prop_unregister(ds, "vscan",
1226185029Spjd		    vscan_changed_cb, zfsvfs) == 0);
1227168404Spjd	}
1228168404Spjd}
1229168404Spjd
1230219089Spjd#ifdef SECLABEL
1231219089Spjd/*
1232219089Spjd * Convert a decimal digit string to a uint64_t integer.
1233219089Spjd */
1234219089Spjdstatic int
1235219089Spjdstr_to_uint64(char *str, uint64_t *objnum)
1236219089Spjd{
1237219089Spjd	uint64_t num = 0;
1238219089Spjd
1239219089Spjd	while (*str) {
1240219089Spjd		if (*str < '0' || *str > '9')
1241219089Spjd			return (EINVAL);
1242219089Spjd
1243219089Spjd		num = num*10 + *str++ - '0';
1244219089Spjd	}
1245219089Spjd
1246219089Spjd	*objnum = num;
1247219089Spjd	return (0);
1248219089Spjd}
1249219089Spjd
1250219089Spjd/*
1251219089Spjd * The boot path passed from the boot loader is in the form of
1252219089Spjd * "rootpool-name/root-filesystem-object-number'. Convert this
1253219089Spjd * string to a dataset name: "rootpool-name/root-filesystem-name".
1254219089Spjd */
1255219089Spjdstatic int
1256219089Spjdzfs_parse_bootfs(char *bpath, char *outpath)
1257219089Spjd{
1258219089Spjd	char *slashp;
1259219089Spjd	uint64_t objnum;
1260219089Spjd	int error;
1261219089Spjd
1262219089Spjd	if (*bpath == 0 || *bpath == '/')
1263219089Spjd		return (EINVAL);
1264219089Spjd
1265219089Spjd	(void) strcpy(outpath, bpath);
1266219089Spjd
1267219089Spjd	slashp = strchr(bpath, '/');
1268219089Spjd
1269219089Spjd	/* if no '/', just return the pool name */
1270219089Spjd	if (slashp == NULL) {
1271219089Spjd		return (0);
1272219089Spjd	}
1273219089Spjd
1274219089Spjd	/* if not a number, just return the root dataset name */
1275219089Spjd	if (str_to_uint64(slashp+1, &objnum)) {
1276219089Spjd		return (0);
1277219089Spjd	}
1278219089Spjd
1279219089Spjd	*slashp = '\0';
1280219089Spjd	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1281219089Spjd	*slashp = '/';
1282219089Spjd
1283219089Spjd	return (error);
1284219089Spjd}
1285219089Spjd
1286219089Spjd/*
1287219089Spjd * zfs_check_global_label:
1288219089Spjd *	Check that the hex label string is appropriate for the dataset
1289219089Spjd *	being mounted into the global_zone proper.
1290219089Spjd *
1291219089Spjd *	Return an error if the hex label string is not default or
1292219089Spjd *	admin_low/admin_high.  For admin_low labels, the corresponding
1293219089Spjd *	dataset must be readonly.
1294219089Spjd */
1295219089Spjdint
1296219089Spjdzfs_check_global_label(const char *dsname, const char *hexsl)
1297219089Spjd{
1298219089Spjd	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1299219089Spjd		return (0);
1300219089Spjd	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1301219089Spjd		return (0);
1302219089Spjd	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1303219089Spjd		/* must be readonly */
1304219089Spjd		uint64_t rdonly;
1305219089Spjd
1306219089Spjd		if (dsl_prop_get_integer(dsname,
1307219089Spjd		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1308219089Spjd			return (EACCES);
1309219089Spjd		return (rdonly ? 0 : EACCES);
1310219089Spjd	}
1311219089Spjd	return (EACCES);
1312219089Spjd}
1313219089Spjd
1314219089Spjd/*
1315219089Spjd * zfs_mount_label_policy:
1316219089Spjd *	Determine whether the mount is allowed according to MAC check.
1317219089Spjd *	by comparing (where appropriate) label of the dataset against
1318219089Spjd *	the label of the zone being mounted into.  If the dataset has
1319219089Spjd *	no label, create one.
1320219089Spjd *
1321219089Spjd *	Returns:
1322219089Spjd *		 0 :	access allowed
1323219089Spjd *		>0 :	error code, such as EACCES
1324219089Spjd */
1325219089Spjdstatic int
1326219089Spjdzfs_mount_label_policy(vfs_t *vfsp, char *osname)
1327219089Spjd{
1328219089Spjd	int		error, retv;
1329219089Spjd	zone_t		*mntzone = NULL;
1330219089Spjd	ts_label_t	*mnt_tsl;
1331219089Spjd	bslabel_t	*mnt_sl;
1332219089Spjd	bslabel_t	ds_sl;
1333219089Spjd	char		ds_hexsl[MAXNAMELEN];
1334219089Spjd
1335219089Spjd	retv = EACCES;				/* assume the worst */
1336219089Spjd
1337219089Spjd	/*
1338219089Spjd	 * Start by getting the dataset label if it exists.
1339219089Spjd	 */
1340219089Spjd	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1341219089Spjd	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1342219089Spjd	if (error)
1343219089Spjd		return (EACCES);
1344219089Spjd
1345219089Spjd	/*
1346219089Spjd	 * If labeling is NOT enabled, then disallow the mount of datasets
1347219089Spjd	 * which have a non-default label already.  No other label checks
1348219089Spjd	 * are needed.
1349219089Spjd	 */
1350219089Spjd	if (!is_system_labeled()) {
1351219089Spjd		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1352219089Spjd			return (0);
1353219089Spjd		return (EACCES);
1354219089Spjd	}
1355219089Spjd
1356219089Spjd	/*
1357219089Spjd	 * Get the label of the mountpoint.  If mounting into the global
1358219089Spjd	 * zone (i.e. mountpoint is not within an active zone and the
1359219089Spjd	 * zoned property is off), the label must be default or
1360219089Spjd	 * admin_low/admin_high only; no other checks are needed.
1361219089Spjd	 */
1362219089Spjd	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1363219089Spjd	if (mntzone->zone_id == GLOBAL_ZONEID) {
1364219089Spjd		uint64_t zoned;
1365219089Spjd
1366219089Spjd		zone_rele(mntzone);
1367219089Spjd
1368219089Spjd		if (dsl_prop_get_integer(osname,
1369219089Spjd		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1370219089Spjd			return (EACCES);
1371219089Spjd		if (!zoned)
1372219089Spjd			return (zfs_check_global_label(osname, ds_hexsl));
1373219089Spjd		else
1374219089Spjd			/*
1375219089Spjd			 * This is the case of a zone dataset being mounted
1376219089Spjd			 * initially, before the zone has been fully created;
1377219089Spjd			 * allow this mount into global zone.
1378219089Spjd			 */
1379219089Spjd			return (0);
1380219089Spjd	}
1381219089Spjd
1382219089Spjd	mnt_tsl = mntzone->zone_slabel;
1383219089Spjd	ASSERT(mnt_tsl != NULL);
1384219089Spjd	label_hold(mnt_tsl);
1385219089Spjd	mnt_sl = label2bslabel(mnt_tsl);
1386219089Spjd
1387219089Spjd	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1388219089Spjd		/*
1389219089Spjd		 * The dataset doesn't have a real label, so fabricate one.
1390219089Spjd		 */
1391219089Spjd		char *str = NULL;
1392219089Spjd
1393219089Spjd		if (l_to_str_internal(mnt_sl, &str) == 0 &&
1394219089Spjd		    dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1395219089Spjd		    ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
1396219089Spjd			retv = 0;
1397219089Spjd		if (str != NULL)
1398219089Spjd			kmem_free(str, strlen(str) + 1);
1399219089Spjd	} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1400219089Spjd		/*
1401219089Spjd		 * Now compare labels to complete the MAC check.  If the
1402219089Spjd		 * labels are equal then allow access.  If the mountpoint
1403219089Spjd		 * label dominates the dataset label, allow readonly access.
1404219089Spjd		 * Otherwise, access is denied.
1405219089Spjd		 */
1406219089Spjd		if (blequal(mnt_sl, &ds_sl))
1407219089Spjd			retv = 0;
1408219089Spjd		else if (bldominates(mnt_sl, &ds_sl)) {
1409219089Spjd			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1410219089Spjd			retv = 0;
1411219089Spjd		}
1412219089Spjd	}
1413219089Spjd
1414219089Spjd	label_rele(mnt_tsl);
1415219089Spjd	zone_rele(mntzone);
1416219089Spjd	return (retv);
1417219089Spjd}
1418219089Spjd#endif	/* SECLABEL */
1419219089Spjd
1420219089Spjd#ifdef OPENSOLARIS_MOUNTROOT
1421219089Spjdstatic int
1422219089Spjdzfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1423219089Spjd{
1424219089Spjd	int error = 0;
1425219089Spjd	static int zfsrootdone = 0;
1426219089Spjd	zfsvfs_t *zfsvfs = NULL;
1427219089Spjd	znode_t *zp = NULL;
1428219089Spjd	vnode_t *vp = NULL;
1429219089Spjd	char *zfs_bootfs;
1430219089Spjd	char *zfs_devid;
1431219089Spjd
1432219089Spjd	ASSERT(vfsp);
1433219089Spjd
1434219089Spjd	/*
1435219089Spjd	 * The filesystem that we mount as root is defined in the
1436219089Spjd	 * boot property "zfs-bootfs" with a format of
1437219089Spjd	 * "poolname/root-dataset-objnum".
1438219089Spjd	 */
1439219089Spjd	if (why == ROOT_INIT) {
1440219089Spjd		if (zfsrootdone++)
1441219089Spjd			return (EBUSY);
1442219089Spjd		/*
1443219089Spjd		 * the process of doing a spa_load will require the
1444219089Spjd		 * clock to be set before we could (for example) do
1445219089Spjd		 * something better by looking at the timestamp on
1446219089Spjd		 * an uberblock, so just set it to -1.
1447219089Spjd		 */
1448219089Spjd		clkset(-1);
1449219089Spjd
1450219089Spjd		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1451219089Spjd			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1452219089Spjd			    "bootfs name");
1453219089Spjd			return (EINVAL);
1454219089Spjd		}
1455219089Spjd		zfs_devid = spa_get_bootprop("diskdevid");
1456219089Spjd		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1457219089Spjd		if (zfs_devid)
1458219089Spjd			spa_free_bootprop(zfs_devid);
1459219089Spjd		if (error) {
1460219089Spjd			spa_free_bootprop(zfs_bootfs);
1461219089Spjd			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1462219089Spjd			    error);
1463219089Spjd			return (error);
1464219089Spjd		}
1465219089Spjd		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1466219089Spjd			spa_free_bootprop(zfs_bootfs);
1467219089Spjd			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1468219089Spjd			    error);
1469219089Spjd			return (error);
1470219089Spjd		}
1471219089Spjd
1472219089Spjd		spa_free_bootprop(zfs_bootfs);
1473219089Spjd
1474219089Spjd		if (error = vfs_lock(vfsp))
1475219089Spjd			return (error);
1476219089Spjd
1477219089Spjd		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1478219089Spjd			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1479219089Spjd			goto out;
1480219089Spjd		}
1481219089Spjd
1482219089Spjd		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1483219089Spjd		ASSERT(zfsvfs);
1484219089Spjd		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1485219089Spjd			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1486219089Spjd			goto out;
1487219089Spjd		}
1488219089Spjd
1489219089Spjd		vp = ZTOV(zp);
1490219089Spjd		mutex_enter(&vp->v_lock);
1491219089Spjd		vp->v_flag |= VROOT;
1492219089Spjd		mutex_exit(&vp->v_lock);
1493219089Spjd		rootvp = vp;
1494219089Spjd
1495219089Spjd		/*
1496219089Spjd		 * Leave rootvp held.  The root file system is never unmounted.
1497219089Spjd		 */
1498219089Spjd
1499219089Spjd		vfs_add((struct vnode *)0, vfsp,
1500219089Spjd		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1501219089Spjdout:
1502219089Spjd		vfs_unlock(vfsp);
1503219089Spjd		return (error);
1504219089Spjd	} else if (why == ROOT_REMOUNT) {
1505219089Spjd		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1506219089Spjd		vfsp->vfs_flag |= VFS_REMOUNT;
1507219089Spjd
1508219089Spjd		/* refresh mount options */
1509219089Spjd		zfs_unregister_callbacks(vfsp->vfs_data);
1510219089Spjd		return (zfs_register_callbacks(vfsp));
1511219089Spjd
1512219089Spjd	} else if (why == ROOT_UNMOUNT) {
1513219089Spjd		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1514219089Spjd		(void) zfs_sync(vfsp, 0, 0);
1515219089Spjd		return (0);
1516219089Spjd	}
1517219089Spjd
1518219089Spjd	/*
1519219089Spjd	 * if "why" is equal to anything else other than ROOT_INIT,
1520219089Spjd	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1521219089Spjd	 */
1522219089Spjd	return (ENOTSUP);
1523219089Spjd}
1524219089Spjd#endif	/* OPENSOLARIS_MOUNTROOT */
1525219089Spjd
1526242554Savgstatic int
1527242554Savggetpoolname(const char *osname, char *poolname)
1528242554Savg{
1529242554Savg	char *p;
1530242554Savg
1531242554Savg	p = strchr(osname, '/');
1532242554Savg	if (p == NULL) {
1533242554Savg		if (strlen(osname) >= MAXNAMELEN)
1534242554Savg			return (ENAMETOOLONG);
1535242554Savg		(void) strcpy(poolname, osname);
1536242554Savg	} else {
1537242554Savg		if (p - osname >= MAXNAMELEN)
1538242554Savg			return (ENAMETOOLONG);
1539242554Savg		(void) strncpy(poolname, osname, p - osname);
1540242554Savg		poolname[p - osname] = '\0';
1541242554Savg	}
1542242554Savg	return (0);
1543242554Savg}
1544242554Savg
1545168404Spjd/*ARGSUSED*/
1546168404Spjdstatic int
1547191990Sattiliozfs_mount(vfs_t *vfsp)
1548168404Spjd{
1549191990Sattilio	kthread_t	*td = curthread;
1550185029Spjd	vnode_t		*mvp = vfsp->mnt_vnodecovered;
1551185029Spjd	cred_t		*cr = td->td_ucred;
1552185029Spjd	char		*osname;
1553185029Spjd	int		error = 0;
1554185029Spjd	int		canwrite;
1555168404Spjd
1556232728Smm	if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS))
1557232728Smm		return (EPERM);
1558232728Smm
1559185029Spjd	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1560185029Spjd		return (EINVAL);
1561185029Spjd
1562168404Spjd	/*
1563185029Spjd	 * If full-owner-access is enabled and delegated administration is
1564185029Spjd	 * turned on, we must set nosuid.
1565185029Spjd	 */
1566185029Spjd	if (zfs_super_owner &&
1567185029Spjd	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1568185029Spjd		secpolicy_fs_mount_clearopts(cr, vfsp);
1569185029Spjd	}
1570185029Spjd
1571185029Spjd	/*
1572185029Spjd	 * Check for mount privilege?
1573185029Spjd	 *
1574185029Spjd	 * If we don't have privilege then see if
1575185029Spjd	 * we have local permission to allow it
1576185029Spjd	 */
1577185029Spjd	error = secpolicy_fs_mount(cr, mvp, vfsp);
1578185029Spjd	if (error) {
1579212694Smm		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
1580196944Spjd			goto out;
1581196944Spjd
1582196944Spjd		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
1583185029Spjd			vattr_t		vattr;
1584185029Spjd
1585185029Spjd			/*
1586185029Spjd			 * Make sure user is the owner of the mount point
1587185029Spjd			 * or has sufficient privileges.
1588185029Spjd			 */
1589185029Spjd
1590185029Spjd			vattr.va_mask = AT_UID;
1591185029Spjd
1592196662Spjd			vn_lock(mvp, LK_SHARED | LK_RETRY);
1593212694Smm			if (VOP_GETATTR(mvp, &vattr, cr)) {
1594196662Spjd				VOP_UNLOCK(mvp, 0);
1595185029Spjd				goto out;
1596185029Spjd			}
1597185029Spjd
1598185029Spjd			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
1599185029Spjd			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
1600196662Spjd				VOP_UNLOCK(mvp, 0);
1601185029Spjd				goto out;
1602185029Spjd			}
1603196662Spjd			VOP_UNLOCK(mvp, 0);
1604196944Spjd		}
1605185029Spjd
1606196944Spjd		secpolicy_fs_mount_clearopts(cr, vfsp);
1607185029Spjd	}
1608185029Spjd
1609185029Spjd	/*
1610185029Spjd	 * Refuse to mount a filesystem if we are in a local zone and the
1611185029Spjd	 * dataset is not visible.
1612185029Spjd	 */
1613185029Spjd	if (!INGLOBALZONE(curthread) &&
1614185029Spjd	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1615185029Spjd		error = EPERM;
1616185029Spjd		goto out;
1617185029Spjd	}
1618185029Spjd
1619219089Spjd#ifdef SECLABEL
1620219089Spjd	error = zfs_mount_label_policy(vfsp, osname);
1621219089Spjd	if (error)
1622219089Spjd		goto out;
1623219089Spjd#endif
1624219089Spjd
1625218386Strasz	vfsp->vfs_flag |= MNT_NFS4ACLS;
1626218386Strasz
1627185029Spjd	/*
1628168404Spjd	 * When doing a remount, we simply refresh our temporary properties
1629168404Spjd	 * according to those options set in the current VFS options.
1630168404Spjd	 */
1631185029Spjd	if (vfsp->vfs_flag & MS_REMOUNT) {
1632185029Spjd		/* refresh mount options */
1633185029Spjd		zfs_unregister_callbacks(vfsp->vfs_data);
1634185029Spjd		error = zfs_register_callbacks(vfsp);
1635185029Spjd		goto out;
1636185029Spjd	}
1637168404Spjd
1638242554Savg	/* Initial root mount: try hard to import the requested root pool. */
1639242554Savg	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
1640242554Savg	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
1641242554Savg		char pname[MAXNAMELEN];
1642242554Savg		spa_t *spa;
1643242554Savg		int prefer_cache;
1644242554Savg
1645242554Savg		error = getpoolname(osname, pname);
1646242554Savg		if (error)
1647242554Savg			goto out;
1648242554Savg
1649242554Savg		prefer_cache = 1;
1650242554Savg		TUNABLE_INT_FETCH("vfs.zfs.rootpool.prefer_cached_config",
1651242554Savg		    &prefer_cache);
1652242554Savg		mutex_enter(&spa_namespace_lock);
1653242554Savg		spa = spa_lookup(pname);
1654242554Savg		mutex_exit(&spa_namespace_lock);
1655242554Savg		if (!prefer_cache || spa == NULL) {
1656242554Savg			error = spa_import_rootpool(pname);
1657242554Savg			if (error)
1658242554Savg				goto out;
1659242554Savg		}
1660242554Savg	}
1661168510Spjd	DROP_GIANT();
1662185029Spjd	error = zfs_domount(vfsp, osname);
1663168510Spjd	PICKUP_GIANT();
1664209962Smm
1665215260Smm#ifdef sun
1666209962Smm	/*
1667209962Smm	 * Add an extra VFS_HOLD on our parent vfs so that it can't
1668209962Smm	 * disappear due to a forced unmount.
1669209962Smm	 */
1670209962Smm	if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
1671209962Smm		VFS_HOLD(mvp->v_vfsp);
1672215260Smm#endif	/* sun */
1673209962Smm
1674185029Spjdout:
1675168510Spjd	return (error);
1676168404Spjd}
1677168404Spjd
1678168404Spjdstatic int
1679191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp)
1680169170Spjd{
1681168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1682168404Spjd	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1683168404Spjd
1684168404Spjd	statp->f_version = STATFS_VERSION;
1685168404Spjd
1686168404Spjd	ZFS_ENTER(zfsvfs);
1687168404Spjd
1688168404Spjd	dmu_objset_space(zfsvfs->z_os,
1689168404Spjd	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1690168404Spjd
1691168404Spjd	/*
1692168404Spjd	 * The underlying storage pool actually uses multiple block sizes.
1693168404Spjd	 * We report the fragsize as the smallest block size we support,
1694168404Spjd	 * and we report our blocksize as the filesystem's maximum blocksize.
1695168404Spjd	 */
1696204101Spjd	statp->f_bsize = SPA_MINBLOCKSIZE;
1697204101Spjd	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
1698168404Spjd
1699168404Spjd	/*
1700168404Spjd	 * The following report "total" blocks of various kinds in the
1701168404Spjd	 * file system, but reported in terms of f_frsize - the
1702168404Spjd	 * "fragment" size.
1703168404Spjd	 */
1704168404Spjd
1705204101Spjd	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1706168404Spjd	statp->f_bfree = availbytes / statp->f_bsize;
1707168404Spjd	statp->f_bavail = statp->f_bfree; /* no root reservation */
1708168404Spjd
1709168404Spjd	/*
1710168404Spjd	 * statvfs() should really be called statufs(), because it assumes
1711168404Spjd	 * static metadata.  ZFS doesn't preallocate files, so the best
1712168404Spjd	 * we can do is report the max that could possibly fit in f_files,
1713168404Spjd	 * and that minus the number actually used in f_ffree.
1714168404Spjd	 * For f_ffree, report the smaller of the number of object available
1715168404Spjd	 * and the number of blocks (each object will take at least a block).
1716168404Spjd	 */
1717168404Spjd	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1718168404Spjd	statp->f_files = statp->f_ffree + usedobjs;
1719168404Spjd
1720168404Spjd	/*
1721168404Spjd	 * We're a zfs filesystem.
1722168404Spjd	 */
1723168404Spjd	(void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
1724168404Spjd
1725168404Spjd	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1726168404Spjd	    sizeof(statp->f_mntfromname));
1727168404Spjd	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1728168404Spjd	    sizeof(statp->f_mntonname));
1729168404Spjd
1730168404Spjd	statp->f_namemax = ZFS_MAXNAMELEN;
1731168404Spjd
1732168404Spjd	ZFS_EXIT(zfsvfs);
1733168404Spjd	return (0);
1734168404Spjd}
1735168404Spjd
1736219089Spjdint
1737219089Spjdzfs_vnode_lock(vnode_t *vp, int flags)
1738219089Spjd{
1739219089Spjd	int error;
1740219089Spjd
1741219089Spjd	ASSERT(vp != NULL);
1742219089Spjd
1743219089Spjd	/*
1744219089Spjd	 * Check if the file system wasn't forcibly unmounted in the meantime.
1745219089Spjd	 */
1746219089Spjd	error = vn_lock(vp, flags);
1747219089Spjd	if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) {
1748219089Spjd		VOP_UNLOCK(vp, 0);
1749219089Spjd		error = ENOENT;
1750219089Spjd	}
1751219089Spjd
1752219089Spjd	return (error);
1753219089Spjd}
1754219089Spjd
1755168404Spjdstatic int
1756191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
1757168404Spjd{
1758168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1759168404Spjd	znode_t *rootzp;
1760168404Spjd	int error;
1761168404Spjd
1762197459Spjd	ZFS_ENTER_NOERROR(zfsvfs);
1763168404Spjd
1764168404Spjd	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1765219089Spjd	if (error == 0)
1766219089Spjd		*vpp = ZTOV(rootzp);
1767206667Spjd
1768206667Spjd	ZFS_EXIT(zfsvfs);
1769206667Spjd
1770168404Spjd	if (error == 0) {
1771219089Spjd		error = zfs_vnode_lock(*vpp, flags);
1772219089Spjd		if (error == 0)
1773219089Spjd			(*vpp)->v_vflag |= VV_ROOT;
1774168404Spjd	}
1775219089Spjd	if (error != 0)
1776219089Spjd		*vpp = NULL;
1777168404Spjd
1778168404Spjd	return (error);
1779168404Spjd}
1780168404Spjd
1781185029Spjd/*
1782185029Spjd * Teardown the zfsvfs::z_os.
1783185029Spjd *
1784185029Spjd * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1785185029Spjd * and 'z_teardown_inactive_lock' held.
1786185029Spjd */
1787185029Spjdstatic int
1788185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1789185029Spjd{
1790185029Spjd	znode_t	*zp;
1791185029Spjd
1792185029Spjd	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1793185029Spjd
1794185029Spjd	if (!unmounting) {
1795185029Spjd		/*
1796185029Spjd		 * We purge the parent filesystem's vfsp as the parent
1797185029Spjd		 * filesystem and all of its snapshots have their vnode's
1798185029Spjd		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1799185029Spjd		 * 'z_parent' is self referential for non-snapshots.
1800185029Spjd		 */
1801185029Spjd		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1802197351Spjd#ifdef FREEBSD_NAMECACHE
1803197351Spjd		cache_purgevfs(zfsvfs->z_parent->z_vfs);
1804197351Spjd#endif
1805185029Spjd	}
1806185029Spjd
1807185029Spjd	/*
1808185029Spjd	 * Close the zil. NB: Can't close the zil while zfs_inactive
1809185029Spjd	 * threads are blocked as zil_close can call zfs_inactive.
1810185029Spjd	 */
1811185029Spjd	if (zfsvfs->z_log) {
1812185029Spjd		zil_close(zfsvfs->z_log);
1813185029Spjd		zfsvfs->z_log = NULL;
1814185029Spjd	}
1815185029Spjd
1816185029Spjd	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1817185029Spjd
1818185029Spjd	/*
1819185029Spjd	 * If we are not unmounting (ie: online recv) and someone already
1820185029Spjd	 * unmounted this file system while we were doing the switcheroo,
1821185029Spjd	 * or a reopen of z_os failed then just bail out now.
1822185029Spjd	 */
1823185029Spjd	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1824185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1825185029Spjd		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1826185029Spjd		return (EIO);
1827185029Spjd	}
1828185029Spjd
1829185029Spjd	/*
1830185029Spjd	 * At this point there are no vops active, and any new vops will
1831185029Spjd	 * fail with EIO since we have z_teardown_lock for writer (only
1832185029Spjd	 * relavent for forced unmount).
1833185029Spjd	 *
1834185029Spjd	 * Release all holds on dbufs.
1835185029Spjd	 */
1836185029Spjd	mutex_enter(&zfsvfs->z_znodes_lock);
1837185029Spjd	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1838185029Spjd	    zp = list_next(&zfsvfs->z_all_znodes, zp))
1839219089Spjd		if (zp->z_sa_hdl) {
1840196297Spjd			ASSERT(ZTOV(zp)->v_count >= 0);
1841185029Spjd			zfs_znode_dmu_fini(zp);
1842185029Spjd		}
1843185029Spjd	mutex_exit(&zfsvfs->z_znodes_lock);
1844185029Spjd
1845185029Spjd	/*
1846185029Spjd	 * If we are unmounting, set the unmounted flag and let new vops
1847185029Spjd	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1848185029Spjd	 * other vops will fail with EIO.
1849185029Spjd	 */
1850185029Spjd	if (unmounting) {
1851185029Spjd		zfsvfs->z_unmounted = B_TRUE;
1852185029Spjd		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1853185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1854197133Spjd
1855197133Spjd#ifdef __FreeBSD__
1856197133Spjd		/*
1857197133Spjd		 * Some znodes might not be fully reclaimed, wait for them.
1858197133Spjd		 */
1859197133Spjd		mutex_enter(&zfsvfs->z_znodes_lock);
1860197133Spjd		while (list_head(&zfsvfs->z_all_znodes) != NULL) {
1861197133Spjd			msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
1862197133Spjd			    "zteardown", 0);
1863197133Spjd		}
1864197133Spjd		mutex_exit(&zfsvfs->z_znodes_lock);
1865197133Spjd#endif
1866185029Spjd	}
1867185029Spjd
1868185029Spjd	/*
1869185029Spjd	 * z_os will be NULL if there was an error in attempting to reopen
1870185029Spjd	 * zfsvfs, so just return as the properties had already been
1871185029Spjd	 * unregistered and cached data had been evicted before.
1872185029Spjd	 */
1873185029Spjd	if (zfsvfs->z_os == NULL)
1874185029Spjd		return (0);
1875185029Spjd
1876185029Spjd	/*
1877185029Spjd	 * Unregister properties.
1878185029Spjd	 */
1879185029Spjd	zfs_unregister_callbacks(zfsvfs);
1880185029Spjd
1881185029Spjd	/*
1882185029Spjd	 * Evict cached data
1883185029Spjd	 */
1884219089Spjd	if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
1885219089Spjd		if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
1886219089Spjd			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1887219089Spjd	(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
1888185029Spjd
1889185029Spjd	return (0);
1890185029Spjd}
1891185029Spjd
1892168404Spjd/*ARGSUSED*/
1893168404Spjdstatic int
1894191990Sattiliozfs_umount(vfs_t *vfsp, int fflag)
1895168404Spjd{
1896209962Smm	kthread_t *td = curthread;
1897168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1898185029Spjd	objset_t *os;
1899209962Smm	cred_t *cr = td->td_ucred;
1900168404Spjd	int ret;
1901168404Spjd
1902185029Spjd	ret = secpolicy_fs_unmount(cr, vfsp);
1903185029Spjd	if (ret) {
1904212694Smm		if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1905212694Smm		    ZFS_DELEG_PERM_MOUNT, cr))
1906185029Spjd			return (ret);
1907185029Spjd	}
1908219089Spjd
1909185029Spjd	/*
1910185029Spjd	 * We purge the parent filesystem's vfsp as the parent filesystem
1911185029Spjd	 * and all of its snapshots have their vnode's v_vfsp set to the
1912185029Spjd	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
1913185029Spjd	 * referential for non-snapshots.
1914185029Spjd	 */
1915185029Spjd	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1916168404Spjd
1917168404Spjd	/*
1918168404Spjd	 * Unmount any snapshots mounted under .zfs before unmounting the
1919168404Spjd	 * dataset itself.
1920168404Spjd	 */
1921169170Spjd	if (zfsvfs->z_ctldir != NULL) {
1922168404Spjd		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1923168404Spjd			return (ret);
1924209962Smm		ret = vflush(vfsp, 0, 0, td);
1925168404Spjd		ASSERT(ret == EBUSY);
1926168404Spjd		if (!(fflag & MS_FORCE)) {
1927168404Spjd			if (zfsvfs->z_ctldir->v_count > 1)
1928168404Spjd				return (EBUSY);
1929168404Spjd			ASSERT(zfsvfs->z_ctldir->v_count == 1);
1930168404Spjd		}
1931168404Spjd		zfsctl_destroy(zfsvfs);
1932168404Spjd		ASSERT(zfsvfs->z_ctldir == NULL);
1933168404Spjd	}
1934168404Spjd
1935197459Spjd	if (fflag & MS_FORCE) {
1936197459Spjd		/*
1937197459Spjd		 * Mark file system as unmounted before calling
1938197459Spjd		 * vflush(FORCECLOSE). This way we ensure no future vnops
1939197459Spjd		 * will be called and risk operating on DOOMED vnodes.
1940197459Spjd		 */
1941197459Spjd		rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1942197459Spjd		zfsvfs->z_unmounted = B_TRUE;
1943197459Spjd		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1944197459Spjd	}
1945197459Spjd
1946168404Spjd	/*
1947168404Spjd	 * Flush all the files.
1948168404Spjd	 */
1949209962Smm	ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
1950168404Spjd	if (ret != 0) {
1951168404Spjd		if (!zfsvfs->z_issnap) {
1952168404Spjd			zfsctl_create(zfsvfs);
1953168404Spjd			ASSERT(zfsvfs->z_ctldir != NULL);
1954168404Spjd		}
1955168404Spjd		return (ret);
1956168404Spjd	}
1957168404Spjd
1958185029Spjd	if (!(fflag & MS_FORCE)) {
1959185029Spjd		/*
1960185029Spjd		 * Check the number of active vnodes in the file system.
1961185029Spjd		 * Our count is maintained in the vfs structure, but the
1962185029Spjd		 * number is off by 1 to indicate a hold on the vfs
1963185029Spjd		 * structure itself.
1964185029Spjd		 *
1965185029Spjd		 * The '.zfs' directory maintains a reference of its
1966185029Spjd		 * own, and any active references underneath are
1967185029Spjd		 * reflected in the vnode count.
1968185029Spjd		 */
1969185029Spjd		if (zfsvfs->z_ctldir == NULL) {
1970185029Spjd			if (vfsp->vfs_count > 1)
1971185029Spjd				return (EBUSY);
1972185029Spjd		} else {
1973185029Spjd			if (vfsp->vfs_count > 2 ||
1974185029Spjd			    zfsvfs->z_ctldir->v_count > 1)
1975185029Spjd				return (EBUSY);
1976185029Spjd		}
1977185029Spjd	} else {
1978168404Spjd		MNT_ILOCK(vfsp);
1979168404Spjd		vfsp->mnt_kern_flag |= MNTK_UNMOUNTF;
1980168404Spjd		MNT_IUNLOCK(vfsp);
1981185029Spjd	}
1982168404Spjd
1983185029Spjd	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1984185029Spjd	os = zfsvfs->z_os;
1985185029Spjd
1986185029Spjd	/*
1987185029Spjd	 * z_os will be NULL if there was an error in
1988185029Spjd	 * attempting to reopen zfsvfs.
1989185029Spjd	 */
1990185029Spjd	if (os != NULL) {
1991168404Spjd		/*
1992185029Spjd		 * Unset the objset user_ptr.
1993168404Spjd		 */
1994219089Spjd		mutex_enter(&os->os_user_ptr_lock);
1995185029Spjd		dmu_objset_set_user(os, NULL);
1996219089Spjd		mutex_exit(&os->os_user_ptr_lock);
1997185029Spjd
1998185029Spjd		/*
1999185029Spjd		 * Finally release the objset
2000185029Spjd		 */
2001219089Spjd		dmu_objset_disown(os, zfsvfs);
2002168404Spjd	}
2003168404Spjd
2004185029Spjd	/*
2005185029Spjd	 * We can now safely destroy the '.zfs' directory node.
2006185029Spjd	 */
2007185029Spjd	if (zfsvfs->z_ctldir != NULL)
2008185029Spjd		zfsctl_destroy(zfsvfs);
2009185029Spjd	if (zfsvfs->z_issnap) {
2010185029Spjd		vnode_t *svp = vfsp->mnt_vnodecovered;
2011185029Spjd
2012197515Spjd		if (svp->v_count >= 2)
2013192211Skmacy			VN_RELE(svp);
2014185029Spjd	}
2015168404Spjd	zfs_freevfs(vfsp);
2016168404Spjd
2017168404Spjd	return (0);
2018168404Spjd}
2019168404Spjd
2020168404Spjdstatic int
2021168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
2022168404Spjd{
2023168404Spjd	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
2024168404Spjd	znode_t		*zp;
2025168404Spjd	int 		err;
2026168404Spjd
2027197167Spjd	/*
2028215397Savg	 * zfs_zget() can't operate on virtual entries like .zfs/ or
2029211855Spjd	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
2030211855Spjd	 * This will make NFS to switch to LOOKUP instead of using VGET.
2031197167Spjd	 */
2032197167Spjd	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR)
2033197167Spjd		return (EOPNOTSUPP);
2034197167Spjd
2035168404Spjd	ZFS_ENTER(zfsvfs);
2036168404Spjd	err = zfs_zget(zfsvfs, ino, &zp);
2037168404Spjd	if (err == 0 && zp->z_unlinked) {
2038168404Spjd		VN_RELE(ZTOV(zp));
2039168404Spjd		err = EINVAL;
2040168404Spjd	}
2041219089Spjd	if (err == 0)
2042219089Spjd		*vpp = ZTOV(zp);
2043206667Spjd	ZFS_EXIT(zfsvfs);
2044219089Spjd	if (err == 0)
2045219089Spjd		err = zfs_vnode_lock(*vpp, flags);
2046168404Spjd	if (err != 0)
2047168404Spjd		*vpp = NULL;
2048171063Sdfr	return (err);
2049168404Spjd}
2050168404Spjd
2051168404Spjdstatic int
2052196982Spjdzfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
2053196982Spjd    struct ucred **credanonp, int *numsecflavors, int **secflavors)
2054196982Spjd{
2055196982Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2056196982Spjd
2057196982Spjd	/*
2058196982Spjd	 * If this is regular file system vfsp is the same as
2059196982Spjd	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
2060196982Spjd	 * zfsvfs->z_parent->z_vfs represents parent file system
2061196982Spjd	 * which we have to use here, because only this file system
2062196982Spjd	 * has mnt_export configured.
2063196982Spjd	 */
2064196982Spjd	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
2065196982Spjd	    credanonp, numsecflavors, secflavors));
2066196982Spjd}
2067196982Spjd
2068197151SpjdCTASSERT(SHORT_FID_LEN <= sizeof(struct fid));
2069197151SpjdCTASSERT(LONG_FID_LEN <= sizeof(struct fid));
2070196982Spjd
2071196982Spjdstatic int
2072222167Srmacklemzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
2073168404Spjd{
2074168404Spjd	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
2075168404Spjd	znode_t		*zp;
2076168404Spjd	uint64_t	object = 0;
2077168404Spjd	uint64_t	fid_gen = 0;
2078168404Spjd	uint64_t	gen_mask;
2079168404Spjd	uint64_t	zp_gen;
2080219089Spjd	int 		i, err;
2081168404Spjd
2082168404Spjd	*vpp = NULL;
2083168404Spjd
2084168404Spjd	ZFS_ENTER(zfsvfs);
2085168404Spjd
2086196979Spjd	/*
2087197177Spjd	 * On FreeBSD we can get snapshot's mount point or its parent file
2088197177Spjd	 * system mount point depending if snapshot is already mounted or not.
2089196979Spjd	 */
2090197177Spjd	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
2091168404Spjd		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
2092168404Spjd		uint64_t	objsetid = 0;
2093168404Spjd		uint64_t	setgen = 0;
2094168404Spjd
2095168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
2096168404Spjd			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
2097168404Spjd
2098168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
2099168404Spjd			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
2100168404Spjd
2101168404Spjd		ZFS_EXIT(zfsvfs);
2102168404Spjd
2103168404Spjd		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
2104168404Spjd		if (err)
2105168404Spjd			return (EINVAL);
2106168404Spjd		ZFS_ENTER(zfsvfs);
2107168404Spjd	}
2108168404Spjd
2109168404Spjd	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2110168404Spjd		zfid_short_t	*zfid = (zfid_short_t *)fidp;
2111168404Spjd
2112168404Spjd		for (i = 0; i < sizeof (zfid->zf_object); i++)
2113168404Spjd			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2114168404Spjd
2115168404Spjd		for (i = 0; i < sizeof (zfid->zf_gen); i++)
2116168404Spjd			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2117168404Spjd	} else {
2118168404Spjd		ZFS_EXIT(zfsvfs);
2119168404Spjd		return (EINVAL);
2120168404Spjd	}
2121168404Spjd
2122168404Spjd	/* A zero fid_gen means we are in the .zfs control directories */
2123168404Spjd	if (fid_gen == 0 &&
2124168404Spjd	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
2125168404Spjd		*vpp = zfsvfs->z_ctldir;
2126168404Spjd		ASSERT(*vpp != NULL);
2127168404Spjd		if (object == ZFSCTL_INO_SNAPDIR) {
2128168404Spjd			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
2129185029Spjd			    0, NULL, NULL, NULL, NULL, NULL) == 0);
2130168404Spjd		} else {
2131168404Spjd			VN_HOLD(*vpp);
2132168404Spjd		}
2133206667Spjd		ZFS_EXIT(zfsvfs);
2134222199Srmacklem		err = zfs_vnode_lock(*vpp, flags | LK_RETRY);
2135219089Spjd		if (err != 0)
2136219089Spjd			*vpp = NULL;
2137219089Spjd		return (err);
2138168404Spjd	}
2139168404Spjd
2140168404Spjd	gen_mask = -1ULL >> (64 - 8 * i);
2141168404Spjd
2142168404Spjd	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
2143168404Spjd	if (err = zfs_zget(zfsvfs, object, &zp)) {
2144168404Spjd		ZFS_EXIT(zfsvfs);
2145168404Spjd		return (err);
2146168404Spjd	}
2147219089Spjd	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
2148219089Spjd	    sizeof (uint64_t));
2149219089Spjd	zp_gen = zp_gen & gen_mask;
2150168404Spjd	if (zp_gen == 0)
2151168404Spjd		zp_gen = 1;
2152168404Spjd	if (zp->z_unlinked || zp_gen != fid_gen) {
2153168404Spjd		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2154168404Spjd		VN_RELE(ZTOV(zp));
2155168404Spjd		ZFS_EXIT(zfsvfs);
2156168404Spjd		return (EINVAL);
2157168404Spjd	}
2158168404Spjd
2159219089Spjd	*vpp = ZTOV(zp);
2160206667Spjd	ZFS_EXIT(zfsvfs);
2161222199Srmacklem	err = zfs_vnode_lock(*vpp, flags | LK_RETRY);
2162219089Spjd	if (err == 0)
2163219089Spjd		vnode_create_vobject(*vpp, zp->z_size, curthread);
2164219089Spjd	else
2165219089Spjd		*vpp = NULL;
2166219089Spjd	return (err);
2167168404Spjd}
2168168404Spjd
2169185029Spjd/*
2170185029Spjd * Block out VOPs and close zfsvfs_t::z_os
2171185029Spjd *
2172185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and
2173185029Spjd * 'z_teardown_inactive_lock' write held.
2174185029Spjd */
2175185029Spjdint
2176219089Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs)
2177168404Spjd{
2178185029Spjd	int error;
2179168404Spjd
2180185029Spjd	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2181185029Spjd		return (error);
2182219089Spjd	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
2183168404Spjd
2184185029Spjd	return (0);
2185185029Spjd}
2186168404Spjd
2187185029Spjd/*
2188185029Spjd * Reopen zfsvfs_t::z_os and release VOPs.
2189185029Spjd */
2190185029Spjdint
2191219089Spjdzfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
2192185029Spjd{
2193185029Spjd	int err;
2194168404Spjd
2195185029Spjd	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
2196185029Spjd	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2197185029Spjd
2198219089Spjd	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
2199219089Spjd	    &zfsvfs->z_os);
2200185029Spjd	if (err) {
2201185029Spjd		zfsvfs->z_os = NULL;
2202185029Spjd	} else {
2203185029Spjd		znode_t *zp;
2204219089Spjd		uint64_t sa_obj = 0;
2205185029Spjd
2206219089Spjd		/*
2207219089Spjd		 * Make sure version hasn't changed
2208219089Spjd		 */
2209219089Spjd
2210219089Spjd		err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
2211219089Spjd		    &zfsvfs->z_version);
2212219089Spjd
2213219089Spjd		if (err)
2214219089Spjd			goto bail;
2215219089Spjd
2216219089Spjd		err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
2217219089Spjd		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
2218219089Spjd
2219219089Spjd		if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
2220219089Spjd			goto bail;
2221219089Spjd
2222219089Spjd		if ((err = sa_setup(zfsvfs->z_os, sa_obj,
2223219089Spjd		    zfs_attr_table,  ZPL_END, &zfsvfs->z_attr_table)) != 0)
2224219089Spjd			goto bail;
2225219089Spjd
2226219089Spjd		if (zfsvfs->z_version >= ZPL_VERSION_SA)
2227219089Spjd			sa_register_update_callback(zfsvfs->z_os,
2228219089Spjd			    zfs_sa_upgrade);
2229219089Spjd
2230185029Spjd		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2231185029Spjd
2232219089Spjd		zfs_set_fuid_feature(zfsvfs);
2233219089Spjd
2234185029Spjd		/*
2235185029Spjd		 * Attempt to re-establish all the active znodes with
2236185029Spjd		 * their dbufs.  If a zfs_rezget() fails, then we'll let
2237185029Spjd		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2238185029Spjd		 * when they try to use their znode.
2239185029Spjd		 */
2240185029Spjd		mutex_enter(&zfsvfs->z_znodes_lock);
2241185029Spjd		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2242185029Spjd		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2243185029Spjd			(void) zfs_rezget(zp);
2244185029Spjd		}
2245185029Spjd		mutex_exit(&zfsvfs->z_znodes_lock);
2246168404Spjd	}
2247168404Spjd
2248219089Spjdbail:
2249185029Spjd	/* release the VOPs */
2250185029Spjd	rw_exit(&zfsvfs->z_teardown_inactive_lock);
2251185029Spjd	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
2252185029Spjd
2253185029Spjd	if (err) {
2254185029Spjd		/*
2255219089Spjd		 * Since we couldn't reopen zfsvfs::z_os, or
2256219089Spjd		 * setup the sa framework force unmount this file system.
2257185029Spjd		 */
2258185029Spjd		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
2259185029Spjd			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
2260168404Spjd	}
2261185029Spjd	return (err);
2262168404Spjd}
2263168404Spjd
2264168404Spjdstatic void
2265168404Spjdzfs_freevfs(vfs_t *vfsp)
2266168404Spjd{
2267168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2268168404Spjd
2269215260Smm#ifdef sun
2270209962Smm	/*
2271209962Smm	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
2272219089Spjd	 * from zfs_mount().  Release it here.  If we came through
2273219089Spjd	 * zfs_mountroot() instead, we didn't grab an extra hold, so
2274219089Spjd	 * skip the VFS_RELE for rootvfs.
2275209962Smm	 */
2276219089Spjd	if (zfsvfs->z_issnap && (vfsp != rootvfs))
2277209962Smm		VFS_RELE(zfsvfs->z_parent->z_vfs);
2278215260Smm#endif	/* sun */
2279168404Spjd
2280209962Smm	zfsvfs_free(zfsvfs);
2281185029Spjd
2282168404Spjd	atomic_add_32(&zfs_active_fs_count, -1);
2283168404Spjd}
2284168404Spjd
2285172135Spjd#ifdef __i386__
2286172135Spjdstatic int desiredvnodes_backup;
2287172135Spjd#endif
2288172135Spjd
2289172135Spjdstatic void
2290172135Spjdzfs_vnodes_adjust(void)
2291172135Spjd{
2292172135Spjd#ifdef __i386__
2293185029Spjd	int newdesiredvnodes;
2294172135Spjd
2295172135Spjd	desiredvnodes_backup = desiredvnodes;
2296172135Spjd
2297172135Spjd	/*
2298172135Spjd	 * We calculate newdesiredvnodes the same way it is done in
2299172135Spjd	 * vntblinit(). If it is equal to desiredvnodes, it means that
2300172135Spjd	 * it wasn't tuned by the administrator and we can tune it down.
2301172135Spjd	 */
2302185029Spjd	newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 *
2303185029Spjd	    vm_kmem_size / (5 * (sizeof(struct vm_object) +
2304185029Spjd	    sizeof(struct vnode))));
2305185029Spjd	if (newdesiredvnodes == desiredvnodes)
2306185029Spjd		desiredvnodes = (3 * newdesiredvnodes) / 4;
2307172135Spjd#endif
2308172135Spjd}
2309172135Spjd
2310172135Spjdstatic void
2311172135Spjdzfs_vnodes_adjust_back(void)
2312172135Spjd{
2313172135Spjd
2314172135Spjd#ifdef __i386__
2315172135Spjd	desiredvnodes = desiredvnodes_backup;
2316172135Spjd#endif
2317172135Spjd}
2318172135Spjd
2319168404Spjdvoid
2320168404Spjdzfs_init(void)
2321168404Spjd{
2322168404Spjd
2323202129Sdelphij	printf("ZFS filesystem version " ZPL_VERSION_STRING "\n");
2324168404Spjd
2325168404Spjd	/*
2326219089Spjd	 * Initialize .zfs directory structures
2327168404Spjd	 */
2328219089Spjd	zfsctl_init();
2329168404Spjd
2330168404Spjd	/*
2331219089Spjd	 * Initialize znode cache, vnode ops, etc...
2332168404Spjd	 */
2333219089Spjd	zfs_znode_init();
2334172135Spjd
2335172135Spjd	/*
2336219089Spjd	 * Reduce number of vnodes. Originally number of vnodes is calculated
2337172135Spjd	 * with UFS inode in mind. We reduce it here, because it's too big for
2338172135Spjd	 * ZFS/i386.
2339172135Spjd	 */
2340172135Spjd	zfs_vnodes_adjust();
2341209962Smm
2342209962Smm	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2343168404Spjd}
2344168404Spjd
2345168404Spjdvoid
2346168404Spjdzfs_fini(void)
2347168404Spjd{
2348168404Spjd	zfsctl_fini();
2349168404Spjd	zfs_znode_fini();
2350172135Spjd	zfs_vnodes_adjust_back();
2351168404Spjd}
2352168404Spjd
2353168404Spjdint
2354168404Spjdzfs_busy(void)
2355168404Spjd{
2356168404Spjd	return (zfs_active_fs_count != 0);
2357168404Spjd}
2358185029Spjd
2359185029Spjdint
2360209962Smmzfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2361185029Spjd{
2362185029Spjd	int error;
2363209962Smm	objset_t *os = zfsvfs->z_os;
2364185029Spjd	dmu_tx_t *tx;
2365185029Spjd
2366185029Spjd	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2367185029Spjd		return (EINVAL);
2368185029Spjd
2369209962Smm	if (newvers < zfsvfs->z_version)
2370209962Smm		return (EINVAL);
2371185029Spjd
2372219089Spjd	if (zfs_spa_version_map(newvers) >
2373219089Spjd	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
2374219089Spjd		return (ENOTSUP);
2375219089Spjd
2376185029Spjd	tx = dmu_tx_create(os);
2377209962Smm	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2378219089Spjd	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2379219089Spjd		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2380219089Spjd		    ZFS_SA_ATTRS);
2381219089Spjd		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2382219089Spjd	}
2383185029Spjd	error = dmu_tx_assign(tx, TXG_WAIT);
2384185029Spjd	if (error) {
2385185029Spjd		dmu_tx_abort(tx);
2386209962Smm		return (error);
2387185029Spjd	}
2388219089Spjd
2389209962Smm	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2390209962Smm	    8, 1, &newvers, tx);
2391185029Spjd
2392209962Smm	if (error) {
2393209962Smm		dmu_tx_commit(tx);
2394209962Smm		return (error);
2395209962Smm	}
2396209962Smm
2397219089Spjd	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2398219089Spjd		uint64_t sa_obj;
2399219089Spjd
2400219089Spjd		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2401219089Spjd		    SPA_VERSION_SA);
2402219089Spjd		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2403219089Spjd		    DMU_OT_NONE, 0, tx);
2404219089Spjd
2405219089Spjd		error = zap_add(os, MASTER_NODE_OBJ,
2406219089Spjd		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2407219089Spjd		ASSERT3U(error, ==, 0);
2408219089Spjd
2409219089Spjd		VERIFY(0 == sa_set_sa_object(os, sa_obj));
2410219089Spjd		sa_register_update_callback(os, zfs_sa_upgrade);
2411219089Spjd	}
2412219089Spjd
2413219089Spjd	spa_history_log_internal(LOG_DS_UPGRADE,
2414219089Spjd	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
2415209962Smm	    zfsvfs->z_version, newvers, dmu_objset_id(os));
2416209962Smm
2417185029Spjd	dmu_tx_commit(tx);
2418185029Spjd
2419209962Smm	zfsvfs->z_version = newvers;
2420209962Smm
2421219089Spjd	zfs_set_fuid_feature(zfsvfs);
2422209962Smm
2423209962Smm	return (0);
2424185029Spjd}
2425219089Spjd
2426185029Spjd/*
2427185029Spjd * Read a property stored within the master node.
2428185029Spjd */
2429185029Spjdint
2430185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2431185029Spjd{
2432185029Spjd	const char *pname;
2433185029Spjd	int error = ENOENT;
2434185029Spjd
2435185029Spjd	/*
2436185029Spjd	 * Look up the file system's value for the property.  For the
2437185029Spjd	 * version property, we look up a slightly different string.
2438185029Spjd	 */
2439185029Spjd	if (prop == ZFS_PROP_VERSION)
2440185029Spjd		pname = ZPL_VERSION_STR;
2441185029Spjd	else
2442185029Spjd		pname = zfs_prop_to_name(prop);
2443185029Spjd
2444185029Spjd	if (os != NULL)
2445185029Spjd		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2446185029Spjd
2447185029Spjd	if (error == ENOENT) {
2448185029Spjd		/* No value set, use the default value */
2449185029Spjd		switch (prop) {
2450185029Spjd		case ZFS_PROP_VERSION:
2451185029Spjd			*value = ZPL_VERSION;
2452185029Spjd			break;
2453185029Spjd		case ZFS_PROP_NORMALIZE:
2454185029Spjd		case ZFS_PROP_UTF8ONLY:
2455185029Spjd			*value = 0;
2456185029Spjd			break;
2457185029Spjd		case ZFS_PROP_CASE:
2458185029Spjd			*value = ZFS_CASE_SENSITIVE;
2459185029Spjd			break;
2460185029Spjd		default:
2461185029Spjd			return (error);
2462185029Spjd		}
2463185029Spjd		error = 0;
2464185029Spjd	}
2465185029Spjd	return (error);
2466185029Spjd}
2467229565Smm
2468229565Smm#ifdef _KERNEL
2469229565Smmvoid
2470229565Smmzfsvfs_update_fromname(const char *oldname, const char *newname)
2471229565Smm{
2472229565Smm	char tmpbuf[MAXPATHLEN];
2473229565Smm	struct mount *mp;
2474229565Smm	char *fromname;
2475229565Smm	size_t oldlen;
2476229565Smm
2477229565Smm	oldlen = strlen(oldname);
2478229565Smm
2479229565Smm	mtx_lock(&mountlist_mtx);
2480229565Smm	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2481229565Smm		fromname = mp->mnt_stat.f_mntfromname;
2482229565Smm		if (strcmp(fromname, oldname) == 0) {
2483229565Smm			(void)strlcpy(fromname, newname,
2484229565Smm			    sizeof(mp->mnt_stat.f_mntfromname));
2485229565Smm			continue;
2486229565Smm		}
2487229565Smm		if (strncmp(fromname, oldname, oldlen) == 0 &&
2488229565Smm		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
2489229565Smm			(void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s",
2490229565Smm			    newname, fromname + oldlen);
2491229565Smm			(void)strlcpy(fromname, tmpbuf,
2492229565Smm			    sizeof(mp->mnt_stat.f_mntfromname));
2493229565Smm			continue;
2494229565Smm		}
2495229565Smm	}
2496229565Smm	mtx_unlock(&mountlist_mtx);
2497229565Smm}
2498229565Smm#endif
2499