zfs_vfsops.c revision 196965
1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22185029Spjd * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24168404Spjd */
25168404Spjd
26168404Spjd#include <sys/types.h>
27168404Spjd#include <sys/param.h>
28168404Spjd#include <sys/systm.h>
29168404Spjd#include <sys/kernel.h>
30168404Spjd#include <sys/sysmacros.h>
31168404Spjd#include <sys/kmem.h>
32168404Spjd#include <sys/acl.h>
33168404Spjd#include <sys/vnode.h>
34168404Spjd#include <sys/vfs.h>
35168404Spjd#include <sys/mntent.h>
36168404Spjd#include <sys/mount.h>
37168404Spjd#include <sys/cmn_err.h>
38168404Spjd#include <sys/zfs_znode.h>
39168404Spjd#include <sys/zfs_dir.h>
40168404Spjd#include <sys/zil.h>
41168404Spjd#include <sys/fs/zfs.h>
42168404Spjd#include <sys/dmu.h>
43168404Spjd#include <sys/dsl_prop.h>
44168404Spjd#include <sys/dsl_dataset.h>
45185029Spjd#include <sys/dsl_deleg.h>
46168404Spjd#include <sys/spa.h>
47168404Spjd#include <sys/zap.h>
48168404Spjd#include <sys/varargs.h>
49168962Spjd#include <sys/policy.h>
50168404Spjd#include <sys/atomic.h>
51168404Spjd#include <sys/zfs_ioctl.h>
52168404Spjd#include <sys/zfs_ctldir.h>
53185029Spjd#include <sys/zfs_fuid.h>
54168962Spjd#include <sys/sunddi.h>
55168404Spjd#include <sys/dnlc.h>
56185029Spjd#include <sys/dmu_objset.h>
57185029Spjd#include <sys/spa_boot.h>
58185029Spjd#include <sys/vdev_impl.h>	/* VDEV_BOOT_VERSION */
59168404Spjd
60168404Spjdstruct mtx zfs_debug_mtx;
61168404SpjdMTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
62185029Spjd
63168404SpjdSYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
64185029Spjd
65185029Spjdint zfs_super_owner = 0;
66185029SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
67185029Spjd    "File system owner can perform privileged operation on his file systems");
68185029Spjd
69168404Spjdint zfs_debug_level = 0;
70168713SpjdTUNABLE_INT("vfs.zfs.debug", &zfs_debug_level);
71168404SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RW, &zfs_debug_level, 0,
72168404Spjd    "Debug level");
73168404Spjd
74185029SpjdSYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
75185029Spjdstatic int zfs_version_acl = ZFS_ACL_VERSION;
76185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
77185029Spjd    "ZFS_ACL_VERSION");
78185029Spjdstatic int zfs_version_dmu_backup_header = DMU_BACKUP_HEADER_VERSION;
79185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_header, CTLFLAG_RD,
80185029Spjd    &zfs_version_dmu_backup_header, 0, "DMU_BACKUP_HEADER_VERSION");
81185029Spjdstatic int zfs_version_dmu_backup_stream = DMU_BACKUP_STREAM_VERSION;
82185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, dmu_backup_stream, CTLFLAG_RD,
83185029Spjd    &zfs_version_dmu_backup_stream, 0, "DMU_BACKUP_STREAM_VERSION");
84185029Spjdstatic int zfs_version_spa = SPA_VERSION;
85185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
86185029Spjd    "SPA_VERSION");
87185029Spjdstatic int zfs_version_vdev_boot = VDEV_BOOT_VERSION;
88185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, vdev_boot, CTLFLAG_RD,
89185029Spjd    &zfs_version_vdev_boot, 0, "VDEV_BOOT_VERSION");
90185029Spjdstatic int zfs_version_zpl = ZPL_VERSION;
91185029SpjdSYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
92185029Spjd    "ZPL_VERSION");
93185029Spjd
94191990Sattiliostatic int zfs_mount(vfs_t *vfsp);
95191990Sattiliostatic int zfs_umount(vfs_t *vfsp, int fflag);
96191990Sattiliostatic int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
97191990Sattiliostatic int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
98168404Spjdstatic int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
99191990Sattiliostatic int zfs_sync(vfs_t *vfsp, int waitfor);
100168404Spjdstatic int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp);
101168404Spjdstatic void zfs_objset_close(zfsvfs_t *zfsvfs);
102168404Spjdstatic void zfs_freevfs(vfs_t *vfsp);
103168404Spjd
104168404Spjdstatic struct vfsops zfs_vfsops = {
105168404Spjd	.vfs_mount =		zfs_mount,
106168404Spjd	.vfs_unmount =		zfs_umount,
107168404Spjd	.vfs_root =		zfs_root,
108168404Spjd	.vfs_statfs =		zfs_statfs,
109168404Spjd	.vfs_vget =		zfs_vget,
110168404Spjd	.vfs_sync =		zfs_sync,
111168404Spjd	.vfs_fhtovp =		zfs_fhtovp,
112168404Spjd};
113168404Spjd
114185029SpjdVFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
115168404Spjd
116168404Spjd/*
117168404Spjd * We need to keep a count of active fs's.
118168404Spjd * This is necessary to prevent our module
119168404Spjd * from being unloaded after a umount -f
120168404Spjd */
121168404Spjdstatic uint32_t	zfs_active_fs_count = 0;
122168404Spjd
123168404Spjd/*ARGSUSED*/
124168404Spjdstatic int
125191990Sattiliozfs_sync(vfs_t *vfsp, int waitfor)
126168404Spjd{
127168404Spjd
128168404Spjd	/*
129168404Spjd	 * Data integrity is job one.  We don't want a compromised kernel
130168404Spjd	 * writing to the storage pool, so we never sync during panic.
131168404Spjd	 */
132168404Spjd	if (panicstr)
133168404Spjd		return (0);
134168404Spjd
135168404Spjd	if (vfsp != NULL) {
136168404Spjd		/*
137168404Spjd		 * Sync a specific filesystem.
138168404Spjd		 */
139168404Spjd		zfsvfs_t *zfsvfs = vfsp->vfs_data;
140168404Spjd		int error;
141168404Spjd
142191990Sattilio		error = vfs_stdsync(vfsp, waitfor);
143168404Spjd		if (error != 0)
144168404Spjd			return (error);
145168404Spjd
146168404Spjd		ZFS_ENTER(zfsvfs);
147168404Spjd		if (zfsvfs->z_log != NULL)
148168404Spjd			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
149168404Spjd		else
150168404Spjd			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
151168404Spjd		ZFS_EXIT(zfsvfs);
152168404Spjd	} else {
153168404Spjd		/*
154168404Spjd		 * Sync all ZFS filesystems.  This is what happens when you
155168404Spjd		 * run sync(1M).  Unlike other filesystems, ZFS honors the
156168404Spjd		 * request by waiting for all pools to commit all dirty data.
157168404Spjd		 */
158168404Spjd		spa_sync_allpools();
159168404Spjd	}
160168404Spjd
161168404Spjd	return (0);
162168404Spjd}
163168404Spjd
164168404Spjdstatic void
165168404Spjdatime_changed_cb(void *arg, uint64_t newval)
166168404Spjd{
167168404Spjd	zfsvfs_t *zfsvfs = arg;
168168404Spjd
169168404Spjd	if (newval == TRUE) {
170168404Spjd		zfsvfs->z_atime = TRUE;
171168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
172168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
173168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
174168404Spjd	} else {
175168404Spjd		zfsvfs->z_atime = FALSE;
176168404Spjd		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
177168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
178168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
179168404Spjd	}
180168404Spjd}
181168404Spjd
182168404Spjdstatic void
183168404Spjdxattr_changed_cb(void *arg, uint64_t newval)
184168404Spjd{
185168404Spjd	zfsvfs_t *zfsvfs = arg;
186168404Spjd
187168404Spjd	if (newval == TRUE) {
188168404Spjd		/* XXX locking on vfs_flag? */
189168404Spjd#ifdef TODO
190168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
191168404Spjd#endif
192168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
193168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
194168404Spjd	} else {
195168404Spjd		/* XXX locking on vfs_flag? */
196168404Spjd#ifdef TODO
197168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
198168404Spjd#endif
199168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
200168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
201168404Spjd	}
202168404Spjd}
203168404Spjd
204168404Spjdstatic void
205168404Spjdblksz_changed_cb(void *arg, uint64_t newval)
206168404Spjd{
207168404Spjd	zfsvfs_t *zfsvfs = arg;
208168404Spjd
209168404Spjd	if (newval < SPA_MINBLOCKSIZE ||
210168404Spjd	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
211168404Spjd		newval = SPA_MAXBLOCKSIZE;
212168404Spjd
213168404Spjd	zfsvfs->z_max_blksz = newval;
214168404Spjd	zfsvfs->z_vfs->vfs_bsize = newval;
215168404Spjd}
216168404Spjd
217168404Spjdstatic void
218168404Spjdreadonly_changed_cb(void *arg, uint64_t newval)
219168404Spjd{
220168404Spjd	zfsvfs_t *zfsvfs = arg;
221168404Spjd
222168404Spjd	if (newval) {
223168404Spjd		/* XXX locking on vfs_flag? */
224168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
225168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
226168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
227168404Spjd	} else {
228168404Spjd		/* XXX locking on vfs_flag? */
229168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
230168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
231168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
232168404Spjd	}
233168404Spjd}
234168404Spjd
235168404Spjdstatic void
236168404Spjdsetuid_changed_cb(void *arg, uint64_t newval)
237168404Spjd{
238168404Spjd	zfsvfs_t *zfsvfs = arg;
239168404Spjd
240168404Spjd	if (newval == FALSE) {
241168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
242168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
243168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
244168404Spjd	} else {
245168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
246168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
247168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
248168404Spjd	}
249168404Spjd}
250168404Spjd
251168404Spjdstatic void
252168404Spjdexec_changed_cb(void *arg, uint64_t newval)
253168404Spjd{
254168404Spjd	zfsvfs_t *zfsvfs = arg;
255168404Spjd
256168404Spjd	if (newval == FALSE) {
257168404Spjd		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
258168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
259168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
260168404Spjd	} else {
261168404Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
262168404Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
263168404Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
264168404Spjd	}
265168404Spjd}
266168404Spjd
267185029Spjd/*
268185029Spjd * The nbmand mount option can be changed at mount time.
269185029Spjd * We can't allow it to be toggled on live file systems or incorrect
270185029Spjd * behavior may be seen from cifs clients
271185029Spjd *
272185029Spjd * This property isn't registered via dsl_prop_register(), but this callback
273185029Spjd * will be called when a file system is first mounted
274185029Spjd */
275168404Spjdstatic void
276185029Spjdnbmand_changed_cb(void *arg, uint64_t newval)
277185029Spjd{
278185029Spjd	zfsvfs_t *zfsvfs = arg;
279185029Spjd	if (newval == FALSE) {
280185029Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
281185029Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
282185029Spjd	} else {
283185029Spjd		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
284185029Spjd		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
285185029Spjd	}
286185029Spjd}
287185029Spjd
288185029Spjdstatic void
289168404Spjdsnapdir_changed_cb(void *arg, uint64_t newval)
290168404Spjd{
291168404Spjd	zfsvfs_t *zfsvfs = arg;
292168404Spjd
293168404Spjd	zfsvfs->z_show_ctldir = newval;
294168404Spjd}
295168404Spjd
296168404Spjdstatic void
297185029Spjdvscan_changed_cb(void *arg, uint64_t newval)
298185029Spjd{
299185029Spjd	zfsvfs_t *zfsvfs = arg;
300185029Spjd
301185029Spjd	zfsvfs->z_vscan = newval;
302185029Spjd}
303185029Spjd
304185029Spjdstatic void
305168404Spjdacl_mode_changed_cb(void *arg, uint64_t newval)
306168404Spjd{
307168404Spjd	zfsvfs_t *zfsvfs = arg;
308168404Spjd
309168404Spjd	zfsvfs->z_acl_mode = newval;
310168404Spjd}
311168404Spjd
312168404Spjdstatic void
313168404Spjdacl_inherit_changed_cb(void *arg, uint64_t newval)
314168404Spjd{
315168404Spjd	zfsvfs_t *zfsvfs = arg;
316168404Spjd
317168404Spjd	zfsvfs->z_acl_inherit = newval;
318168404Spjd}
319168404Spjd
320168404Spjdstatic int
321168404Spjdzfs_register_callbacks(vfs_t *vfsp)
322168404Spjd{
323168404Spjd	struct dsl_dataset *ds = NULL;
324168404Spjd	objset_t *os = NULL;
325168404Spjd	zfsvfs_t *zfsvfs = NULL;
326185029Spjd	uint64_t nbmand;
327168404Spjd	int readonly, do_readonly = FALSE;
328168404Spjd	int setuid, do_setuid = FALSE;
329168404Spjd	int exec, do_exec = FALSE;
330168404Spjd	int xattr, do_xattr = FALSE;
331185029Spjd	int atime, do_atime = FALSE;
332168404Spjd	int error = 0;
333168404Spjd
334168404Spjd	ASSERT(vfsp);
335168404Spjd	zfsvfs = vfsp->vfs_data;
336168404Spjd	ASSERT(zfsvfs);
337168404Spjd	os = zfsvfs->z_os;
338168404Spjd
339168404Spjd	/*
340196965Spjd	 * This function can be called for a snapshot when we update snapshot's
341196965Spjd	 * mount point, which isn't really supported.
342196965Spjd	 */
343196965Spjd	if (dmu_objset_is_snapshot(os))
344196965Spjd		return (EOPNOTSUPP);
345196965Spjd
346196965Spjd	/*
347168404Spjd	 * The act of registering our callbacks will destroy any mount
348168404Spjd	 * options we may have.  In order to enable temporary overrides
349168404Spjd	 * of mount options, we stash away the current values and
350168404Spjd	 * restore them after we register the callbacks.
351168404Spjd	 */
352168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
353168404Spjd		readonly = B_TRUE;
354168404Spjd		do_readonly = B_TRUE;
355168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
356168404Spjd		readonly = B_FALSE;
357168404Spjd		do_readonly = B_TRUE;
358168404Spjd	}
359168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
360168404Spjd		setuid = B_FALSE;
361168404Spjd		do_setuid = B_TRUE;
362168404Spjd	} else {
363168404Spjd		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
364168404Spjd			setuid = B_FALSE;
365168404Spjd			do_setuid = B_TRUE;
366168404Spjd		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
367168404Spjd			setuid = B_TRUE;
368168404Spjd			do_setuid = B_TRUE;
369168404Spjd		}
370168404Spjd	}
371168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
372168404Spjd		exec = B_FALSE;
373168404Spjd		do_exec = B_TRUE;
374168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
375168404Spjd		exec = B_TRUE;
376168404Spjd		do_exec = B_TRUE;
377168404Spjd	}
378168404Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
379168404Spjd		xattr = B_FALSE;
380168404Spjd		do_xattr = B_TRUE;
381168404Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
382168404Spjd		xattr = B_TRUE;
383168404Spjd		do_xattr = B_TRUE;
384168404Spjd	}
385185029Spjd	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
386185029Spjd		atime = B_FALSE;
387185029Spjd		do_atime = B_TRUE;
388185029Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
389185029Spjd		atime = B_TRUE;
390185029Spjd		do_atime = B_TRUE;
391185029Spjd	}
392168404Spjd
393168404Spjd	/*
394185029Spjd	 * nbmand is a special property.  It can only be changed at
395185029Spjd	 * mount time.
396185029Spjd	 *
397185029Spjd	 * This is weird, but it is documented to only be changeable
398185029Spjd	 * at mount time.
399185029Spjd	 */
400185029Spjd	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
401185029Spjd		nbmand = B_FALSE;
402185029Spjd	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
403185029Spjd		nbmand = B_TRUE;
404185029Spjd	} else {
405185029Spjd		char osname[MAXNAMELEN];
406185029Spjd
407185029Spjd		dmu_objset_name(os, osname);
408185029Spjd		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
409185029Spjd		    NULL)) {
410185029Spjd			return (error);
411185029Spjd		}
412185029Spjd	}
413185029Spjd
414185029Spjd	/*
415168404Spjd	 * Register property callbacks.
416168404Spjd	 *
417168404Spjd	 * It would probably be fine to just check for i/o error from
418168404Spjd	 * the first prop_register(), but I guess I like to go
419168404Spjd	 * overboard...
420168404Spjd	 */
421168404Spjd	ds = dmu_objset_ds(os);
422168404Spjd	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
423168404Spjd	error = error ? error : dsl_prop_register(ds,
424168404Spjd	    "xattr", xattr_changed_cb, zfsvfs);
425168404Spjd	error = error ? error : dsl_prop_register(ds,
426168404Spjd	    "recordsize", blksz_changed_cb, zfsvfs);
427168404Spjd	error = error ? error : dsl_prop_register(ds,
428168404Spjd	    "readonly", readonly_changed_cb, zfsvfs);
429168404Spjd	error = error ? error : dsl_prop_register(ds,
430168404Spjd	    "setuid", setuid_changed_cb, zfsvfs);
431168404Spjd	error = error ? error : dsl_prop_register(ds,
432168404Spjd	    "exec", exec_changed_cb, zfsvfs);
433168404Spjd	error = error ? error : dsl_prop_register(ds,
434168404Spjd	    "snapdir", snapdir_changed_cb, zfsvfs);
435168404Spjd	error = error ? error : dsl_prop_register(ds,
436168404Spjd	    "aclmode", acl_mode_changed_cb, zfsvfs);
437168404Spjd	error = error ? error : dsl_prop_register(ds,
438168404Spjd	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
439185029Spjd	error = error ? error : dsl_prop_register(ds,
440185029Spjd	    "vscan", vscan_changed_cb, zfsvfs);
441168404Spjd	if (error)
442168404Spjd		goto unregister;
443168404Spjd
444168404Spjd	/*
445168404Spjd	 * Invoke our callbacks to restore temporary mount options.
446168404Spjd	 */
447168404Spjd	if (do_readonly)
448168404Spjd		readonly_changed_cb(zfsvfs, readonly);
449168404Spjd	if (do_setuid)
450168404Spjd		setuid_changed_cb(zfsvfs, setuid);
451168404Spjd	if (do_exec)
452168404Spjd		exec_changed_cb(zfsvfs, exec);
453168404Spjd	if (do_xattr)
454168404Spjd		xattr_changed_cb(zfsvfs, xattr);
455185029Spjd	if (do_atime)
456185029Spjd		atime_changed_cb(zfsvfs, atime);
457168404Spjd
458185029Spjd	nbmand_changed_cb(zfsvfs, nbmand);
459185029Spjd
460168404Spjd	return (0);
461168404Spjd
462168404Spjdunregister:
463168404Spjd	/*
464168404Spjd	 * We may attempt to unregister some callbacks that are not
465168404Spjd	 * registered, but this is OK; it will simply return ENOMSG,
466168404Spjd	 * which we will ignore.
467168404Spjd	 */
468168404Spjd	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
469168404Spjd	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
470168404Spjd	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
471168404Spjd	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
472168404Spjd	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
473168404Spjd	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
474168404Spjd	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
475168404Spjd	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
476168404Spjd	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
477168404Spjd	    zfsvfs);
478185029Spjd	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
479168404Spjd	return (error);
480168404Spjd
481168404Spjd}
482168404Spjd
483168404Spjdstatic int
484185029Spjdzfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
485168404Spjd{
486185029Spjd	int error;
487185029Spjd
488185029Spjd	error = zfs_register_callbacks(zfsvfs->z_vfs);
489185029Spjd	if (error)
490185029Spjd		return (error);
491185029Spjd
492185029Spjd	/*
493185029Spjd	 * Set the objset user_ptr to track its zfsvfs.
494185029Spjd	 */
495185029Spjd	mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
496185029Spjd	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
497185029Spjd	mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
498185029Spjd
499185029Spjd	/*
500185029Spjd	 * If we are not mounting (ie: online recv), then we don't
501185029Spjd	 * have to worry about replaying the log as we blocked all
502185029Spjd	 * operations out since we closed the ZIL.
503185029Spjd	 */
504185029Spjd	if (mounting) {
505185029Spjd		boolean_t readonly;
506185029Spjd
507185029Spjd		/*
508185029Spjd		 * During replay we remove the read only flag to
509185029Spjd		 * allow replays to succeed.
510185029Spjd		 */
511185029Spjd		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
512185029Spjd		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
513185029Spjd
514185029Spjd		/*
515185029Spjd		 * Parse and replay the intent log.
516185029Spjd		 */
517185029Spjd		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
518185029Spjd		    zfs_replay_vector, zfs_unlinked_drain);
519185029Spjd
520185029Spjd		zfs_unlinked_drain(zfsvfs);
521185029Spjd		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
522185029Spjd	}
523185029Spjd
524185029Spjd	if (!zil_disable)
525185029Spjd		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
526185029Spjd
527185029Spjd	return (0);
528185029Spjd}
529185029Spjd
530185029Spjdstatic void
531185029Spjdzfs_freezfsvfs(zfsvfs_t *zfsvfs)
532185029Spjd{
533185029Spjd	mutex_destroy(&zfsvfs->z_znodes_lock);
534185029Spjd	mutex_destroy(&zfsvfs->z_online_recv_lock);
535185029Spjd	list_destroy(&zfsvfs->z_all_znodes);
536185029Spjd	rrw_destroy(&zfsvfs->z_teardown_lock);
537185029Spjd	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
538185029Spjd	rw_destroy(&zfsvfs->z_fuid_lock);
539185029Spjd	kmem_free(zfsvfs, sizeof (zfsvfs_t));
540185029Spjd}
541185029Spjd
542185029Spjdstatic int
543185029Spjdzfs_domount(vfs_t *vfsp, char *osname)
544185029Spjd{
545168404Spjd	uint64_t recordsize, readonly;
546168404Spjd	int error = 0;
547168404Spjd	int mode;
548168404Spjd	zfsvfs_t *zfsvfs;
549168404Spjd	znode_t *zp = NULL;
550168404Spjd
551168404Spjd	ASSERT(vfsp);
552168404Spjd	ASSERT(osname);
553168404Spjd
554168404Spjd	/*
555168404Spjd	 * Initialize the zfs-specific filesystem structure.
556168404Spjd	 * Should probably make this a kmem cache, shuffle fields,
557168404Spjd	 * and just bzero up to z_hold_mtx[].
558168404Spjd	 */
559168404Spjd	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
560168404Spjd	zfsvfs->z_vfs = vfsp;
561168404Spjd	zfsvfs->z_parent = zfsvfs;
562168404Spjd	zfsvfs->z_assign = TXG_NOWAIT;
563168404Spjd	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
564168404Spjd	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
565168404Spjd
566168404Spjd	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
567185029Spjd	mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
568168404Spjd	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
569168404Spjd	    offsetof(znode_t, z_link_node));
570185029Spjd	rrw_init(&zfsvfs->z_teardown_lock);
571185029Spjd	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
572185029Spjd	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
573168404Spjd
574168404Spjd	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
575168404Spjd	    NULL))
576168404Spjd		goto out;
577168404Spjd	zfsvfs->z_vfs->vfs_bsize = recordsize;
578168404Spjd
579168404Spjd	vfsp->vfs_data = zfsvfs;
580168404Spjd	vfsp->mnt_flag |= MNT_LOCAL;
581168404Spjd	vfsp->mnt_kern_flag |= MNTK_MPSAFE;
582168404Spjd	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
583193440Sps	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
584168404Spjd
585168404Spjd	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
586168404Spjd		goto out;
587168404Spjd
588185029Spjd	mode = DS_MODE_OWNER;
589168404Spjd	if (readonly)
590185029Spjd		mode |= DS_MODE_READONLY;
591168404Spjd
592168404Spjd	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
593168404Spjd	if (error == EROFS) {
594185029Spjd		mode = DS_MODE_OWNER | DS_MODE_READONLY;
595168404Spjd		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
596168404Spjd		    &zfsvfs->z_os);
597168404Spjd	}
598168404Spjd
599168404Spjd	if (error)
600168404Spjd		goto out;
601168404Spjd
602185029Spjd	if (error = zfs_init_fs(zfsvfs, &zp))
603168404Spjd		goto out;
604168404Spjd
605185029Spjd	/*
606185029Spjd	 * Set features for file system.
607185029Spjd	 */
608185029Spjd	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
609185029Spjd	if (zfsvfs->z_use_fuids) {
610185029Spjd		vfs_set_feature(vfsp, VFSFT_XVATTR);
611185029Spjd		vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
612185029Spjd		vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS);
613185029Spjd		vfs_set_feature(vfsp, VFSFT_ACLONCREATE);
614185029Spjd	}
615185029Spjd	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
616185029Spjd		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
617185029Spjd		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
618185029Spjd		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
619185029Spjd	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
620185029Spjd		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
621185029Spjd		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
622185029Spjd	}
623185029Spjd
624168404Spjd	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
625185029Spjd		uint64_t pval;
626168404Spjd
627168404Spjd		ASSERT(mode & DS_MODE_READONLY);
628168404Spjd		atime_changed_cb(zfsvfs, B_FALSE);
629168404Spjd		readonly_changed_cb(zfsvfs, B_TRUE);
630185029Spjd		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
631168404Spjd			goto out;
632185029Spjd		xattr_changed_cb(zfsvfs, pval);
633168404Spjd		zfsvfs->z_issnap = B_TRUE;
634168404Spjd	} else {
635185029Spjd		error = zfsvfs_setup(zfsvfs, B_TRUE);
636168404Spjd	}
637168404Spjd
638168404Spjd	vfs_mountedfrom(vfsp, osname);
639168404Spjd
640168404Spjd	if (!zfsvfs->z_issnap)
641168404Spjd		zfsctl_create(zfsvfs);
642168404Spjdout:
643168404Spjd	if (error) {
644168404Spjd		if (zfsvfs->z_os)
645168404Spjd			dmu_objset_close(zfsvfs->z_os);
646185029Spjd		zfs_freezfsvfs(zfsvfs);
647168404Spjd	} else {
648168404Spjd		atomic_add_32(&zfs_active_fs_count, 1);
649168404Spjd	}
650168404Spjd
651168404Spjd	return (error);
652168404Spjd}
653168404Spjd
654168404Spjdvoid
655168404Spjdzfs_unregister_callbacks(zfsvfs_t *zfsvfs)
656168404Spjd{
657168404Spjd	objset_t *os = zfsvfs->z_os;
658168404Spjd	struct dsl_dataset *ds;
659168404Spjd
660168404Spjd	/*
661168404Spjd	 * Unregister properties.
662168404Spjd	 */
663168404Spjd	if (!dmu_objset_is_snapshot(os)) {
664168404Spjd		ds = dmu_objset_ds(os);
665168404Spjd		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
666168404Spjd		    zfsvfs) == 0);
667168404Spjd
668168404Spjd		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
669168404Spjd		    zfsvfs) == 0);
670168404Spjd
671168404Spjd		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
672168404Spjd		    zfsvfs) == 0);
673168404Spjd
674168404Spjd		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
675168404Spjd		    zfsvfs) == 0);
676168404Spjd
677168404Spjd		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
678168404Spjd		    zfsvfs) == 0);
679168404Spjd
680168404Spjd		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
681168404Spjd		    zfsvfs) == 0);
682168404Spjd
683168404Spjd		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
684168404Spjd		    zfsvfs) == 0);
685168404Spjd
686168404Spjd		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
687168404Spjd		    zfsvfs) == 0);
688168404Spjd
689168404Spjd		VERIFY(dsl_prop_unregister(ds, "aclinherit",
690168404Spjd		    acl_inherit_changed_cb, zfsvfs) == 0);
691185029Spjd
692185029Spjd		VERIFY(dsl_prop_unregister(ds, "vscan",
693185029Spjd		    vscan_changed_cb, zfsvfs) == 0);
694168404Spjd	}
695168404Spjd}
696168404Spjd
697168404Spjd/*ARGSUSED*/
698168404Spjdstatic int
699191990Sattiliozfs_mount(vfs_t *vfsp)
700168404Spjd{
701191990Sattilio	kthread_t	*td = curthread;
702185029Spjd	vnode_t		*mvp = vfsp->mnt_vnodecovered;
703185029Spjd	cred_t		*cr = td->td_ucred;
704185029Spjd	char		*osname;
705185029Spjd	int		error = 0;
706185029Spjd	int		canwrite;
707168404Spjd
708185029Spjd	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
709185029Spjd		return (EINVAL);
710185029Spjd
711168404Spjd	/*
712185029Spjd	 * If full-owner-access is enabled and delegated administration is
713185029Spjd	 * turned on, we must set nosuid.
714185029Spjd	 */
715185029Spjd	if (zfs_super_owner &&
716185029Spjd	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
717185029Spjd		secpolicy_fs_mount_clearopts(cr, vfsp);
718185029Spjd	}
719185029Spjd
720185029Spjd	/*
721185029Spjd	 * Check for mount privilege?
722185029Spjd	 *
723185029Spjd	 * If we don't have privilege then see if
724185029Spjd	 * we have local permission to allow it
725185029Spjd	 */
726185029Spjd	error = secpolicy_fs_mount(cr, mvp, vfsp);
727185029Spjd	if (error) {
728185029Spjd		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
729196944Spjd		if (error != 0)
730196944Spjd			goto out;
731196944Spjd
732196944Spjd		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
733185029Spjd			vattr_t		vattr;
734185029Spjd
735185029Spjd			/*
736185029Spjd			 * Make sure user is the owner of the mount point
737185029Spjd			 * or has sufficient privileges.
738185029Spjd			 */
739185029Spjd
740185029Spjd			vattr.va_mask = AT_UID;
741185029Spjd
742196662Spjd			vn_lock(mvp, LK_SHARED | LK_RETRY);
743185029Spjd			if (error = VOP_GETATTR(mvp, &vattr, cr)) {
744196662Spjd				VOP_UNLOCK(mvp, 0);
745185029Spjd				goto out;
746185029Spjd			}
747185029Spjd
748185029Spjd#if 0 /* CHECK THIS! Is probably needed for zfs_suser. */
749185029Spjd			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
750185029Spjd			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
751185029Spjd				error = EPERM;
752185029Spjd				goto out;
753185029Spjd			}
754185029Spjd#else
755185029Spjd			if (error = secpolicy_vnode_owner(mvp, cr, vattr.va_uid)) {
756196662Spjd				VOP_UNLOCK(mvp, 0);
757185029Spjd				goto out;
758185029Spjd			}
759185029Spjd
760185029Spjd			if (error = VOP_ACCESS(mvp, VWRITE, cr, td)) {
761196662Spjd				VOP_UNLOCK(mvp, 0);
762185029Spjd				goto out;
763185029Spjd			}
764196662Spjd			VOP_UNLOCK(mvp, 0);
765185029Spjd#endif
766196944Spjd		}
767185029Spjd
768196944Spjd		secpolicy_fs_mount_clearopts(cr, vfsp);
769185029Spjd	}
770185029Spjd
771185029Spjd	/*
772185029Spjd	 * Refuse to mount a filesystem if we are in a local zone and the
773185029Spjd	 * dataset is not visible.
774185029Spjd	 */
775185029Spjd	if (!INGLOBALZONE(curthread) &&
776185029Spjd	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
777185029Spjd		error = EPERM;
778185029Spjd		goto out;
779185029Spjd	}
780185029Spjd
781185029Spjd	/*
782168404Spjd	 * When doing a remount, we simply refresh our temporary properties
783168404Spjd	 * according to those options set in the current VFS options.
784168404Spjd	 */
785185029Spjd	if (vfsp->vfs_flag & MS_REMOUNT) {
786185029Spjd		/* refresh mount options */
787185029Spjd		zfs_unregister_callbacks(vfsp->vfs_data);
788185029Spjd		error = zfs_register_callbacks(vfsp);
789185029Spjd		goto out;
790185029Spjd	}
791168404Spjd
792168510Spjd	DROP_GIANT();
793185029Spjd	error = zfs_domount(vfsp, osname);
794168510Spjd	PICKUP_GIANT();
795185029Spjdout:
796168510Spjd	return (error);
797168404Spjd}
798168404Spjd
799168404Spjdstatic int
800191990Sattiliozfs_statfs(vfs_t *vfsp, struct statfs *statp)
801169170Spjd{
802168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
803168404Spjd	uint64_t refdbytes, availbytes, usedobjs, availobjs;
804168404Spjd
805168404Spjd	statp->f_version = STATFS_VERSION;
806168404Spjd
807168404Spjd	ZFS_ENTER(zfsvfs);
808168404Spjd
809168404Spjd	dmu_objset_space(zfsvfs->z_os,
810168404Spjd	    &refdbytes, &availbytes, &usedobjs, &availobjs);
811168404Spjd
812168404Spjd	/*
813168404Spjd	 * The underlying storage pool actually uses multiple block sizes.
814168404Spjd	 * We report the fragsize as the smallest block size we support,
815168404Spjd	 * and we report our blocksize as the filesystem's maximum blocksize.
816168404Spjd	 */
817168404Spjd	statp->f_bsize = zfsvfs->z_vfs->vfs_bsize;
818168404Spjd	statp->f_iosize = zfsvfs->z_vfs->vfs_bsize;
819168404Spjd
820168404Spjd	/*
821168404Spjd	 * The following report "total" blocks of various kinds in the
822168404Spjd	 * file system, but reported in terms of f_frsize - the
823168404Spjd	 * "fragment" size.
824168404Spjd	 */
825168404Spjd
826168404Spjd	statp->f_blocks = (refdbytes + availbytes) / statp->f_bsize;
827168404Spjd	statp->f_bfree = availbytes / statp->f_bsize;
828168404Spjd	statp->f_bavail = statp->f_bfree; /* no root reservation */
829168404Spjd
830168404Spjd	/*
831168404Spjd	 * statvfs() should really be called statufs(), because it assumes
832168404Spjd	 * static metadata.  ZFS doesn't preallocate files, so the best
833168404Spjd	 * we can do is report the max that could possibly fit in f_files,
834168404Spjd	 * and that minus the number actually used in f_ffree.
835168404Spjd	 * For f_ffree, report the smaller of the number of object available
836168404Spjd	 * and the number of blocks (each object will take at least a block).
837168404Spjd	 */
838168404Spjd	statp->f_ffree = MIN(availobjs, statp->f_bfree);
839168404Spjd	statp->f_files = statp->f_ffree + usedobjs;
840168404Spjd
841168404Spjd	/*
842168404Spjd	 * We're a zfs filesystem.
843168404Spjd	 */
844168404Spjd	(void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
845168404Spjd
846168404Spjd	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
847168404Spjd	    sizeof(statp->f_mntfromname));
848168404Spjd	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
849168404Spjd	    sizeof(statp->f_mntonname));
850168404Spjd
851168404Spjd	statp->f_namemax = ZFS_MAXNAMELEN;
852168404Spjd
853168404Spjd	ZFS_EXIT(zfsvfs);
854168404Spjd	return (0);
855168404Spjd}
856168404Spjd
857168404Spjdstatic int
858191990Sattiliozfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
859168404Spjd{
860168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
861168404Spjd	znode_t *rootzp;
862168404Spjd	int error;
863168404Spjd
864168404Spjd	ZFS_ENTER(zfsvfs);
865168404Spjd
866168404Spjd	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
867168404Spjd	if (error == 0) {
868168404Spjd		*vpp = ZTOV(rootzp);
869175202Sattilio		error = vn_lock(*vpp, flags);
870168404Spjd		(*vpp)->v_vflag |= VV_ROOT;
871168404Spjd	}
872168404Spjd
873168404Spjd	ZFS_EXIT(zfsvfs);
874168404Spjd	return (error);
875168404Spjd}
876168404Spjd
877185029Spjd/*
878185029Spjd * Teardown the zfsvfs::z_os.
879185029Spjd *
880185029Spjd * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
881185029Spjd * and 'z_teardown_inactive_lock' held.
882185029Spjd */
883185029Spjdstatic int
884185029Spjdzfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
885185029Spjd{
886185029Spjd	znode_t	*zp;
887185029Spjd
888185029Spjd	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
889185029Spjd
890185029Spjd	if (!unmounting) {
891185029Spjd		/*
892185029Spjd		 * We purge the parent filesystem's vfsp as the parent
893185029Spjd		 * filesystem and all of its snapshots have their vnode's
894185029Spjd		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
895185029Spjd		 * 'z_parent' is self referential for non-snapshots.
896185029Spjd		 */
897185029Spjd		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
898185029Spjd	}
899185029Spjd
900185029Spjd	/*
901185029Spjd	 * Close the zil. NB: Can't close the zil while zfs_inactive
902185029Spjd	 * threads are blocked as zil_close can call zfs_inactive.
903185029Spjd	 */
904185029Spjd	if (zfsvfs->z_log) {
905185029Spjd		zil_close(zfsvfs->z_log);
906185029Spjd		zfsvfs->z_log = NULL;
907185029Spjd	}
908185029Spjd
909185029Spjd	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
910185029Spjd
911185029Spjd	/*
912185029Spjd	 * If we are not unmounting (ie: online recv) and someone already
913185029Spjd	 * unmounted this file system while we were doing the switcheroo,
914185029Spjd	 * or a reopen of z_os failed then just bail out now.
915185029Spjd	 */
916185029Spjd	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
917185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
918185029Spjd		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
919185029Spjd		return (EIO);
920185029Spjd	}
921185029Spjd
922185029Spjd	/*
923185029Spjd	 * At this point there are no vops active, and any new vops will
924185029Spjd	 * fail with EIO since we have z_teardown_lock for writer (only
925185029Spjd	 * relavent for forced unmount).
926185029Spjd	 *
927185029Spjd	 * Release all holds on dbufs.
928185029Spjd	 */
929185029Spjd	mutex_enter(&zfsvfs->z_znodes_lock);
930185029Spjd	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
931185029Spjd	    zp = list_next(&zfsvfs->z_all_znodes, zp))
932185029Spjd		if (zp->z_dbuf) {
933196297Spjd			ASSERT(ZTOV(zp)->v_count >= 0);
934185029Spjd			zfs_znode_dmu_fini(zp);
935185029Spjd		}
936185029Spjd	mutex_exit(&zfsvfs->z_znodes_lock);
937185029Spjd
938185029Spjd	/*
939185029Spjd	 * If we are unmounting, set the unmounted flag and let new vops
940185029Spjd	 * unblock.  zfs_inactive will have the unmounted behavior, and all
941185029Spjd	 * other vops will fail with EIO.
942185029Spjd	 */
943185029Spjd	if (unmounting) {
944185029Spjd		zfsvfs->z_unmounted = B_TRUE;
945185029Spjd		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
946185029Spjd		rw_exit(&zfsvfs->z_teardown_inactive_lock);
947185029Spjd	}
948185029Spjd
949185029Spjd	/*
950185029Spjd	 * z_os will be NULL if there was an error in attempting to reopen
951185029Spjd	 * zfsvfs, so just return as the properties had already been
952185029Spjd	 * unregistered and cached data had been evicted before.
953185029Spjd	 */
954185029Spjd	if (zfsvfs->z_os == NULL)
955185029Spjd		return (0);
956185029Spjd
957185029Spjd	/*
958185029Spjd	 * Unregister properties.
959185029Spjd	 */
960185029Spjd	zfs_unregister_callbacks(zfsvfs);
961185029Spjd
962185029Spjd	/*
963185029Spjd	 * Evict cached data
964185029Spjd	 */
965185029Spjd	if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
966185029Spjd		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
967185029Spjd		(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
968185029Spjd	}
969185029Spjd
970185029Spjd	return (0);
971185029Spjd}
972185029Spjd
973168404Spjd/*ARGSUSED*/
974168404Spjdstatic int
975191990Sattiliozfs_umount(vfs_t *vfsp, int fflag)
976168404Spjd{
977168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
978185029Spjd	objset_t *os;
979191990Sattilio	cred_t *cr = curthread->td_ucred;
980168404Spjd	int ret;
981168404Spjd
982185029Spjd	if (fflag & MS_FORCE) {
983185029Spjd		/* TODO: Force unmount is not well implemented yet, so deny it. */
984192211Skmacy		ZFS_LOG(0, "Force unmount is experimental - report any problems.");
985185029Spjd	}
986168404Spjd
987185029Spjd	ret = secpolicy_fs_unmount(cr, vfsp);
988185029Spjd	if (ret) {
989185029Spjd		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
990185029Spjd		    ZFS_DELEG_PERM_MOUNT, cr);
991185029Spjd		if (ret)
992185029Spjd			return (ret);
993185029Spjd	}
994185029Spjd	/*
995185029Spjd	 * We purge the parent filesystem's vfsp as the parent filesystem
996185029Spjd	 * and all of its snapshots have their vnode's v_vfsp set to the
997185029Spjd	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
998185029Spjd	 * referential for non-snapshots.
999185029Spjd	 */
1000185029Spjd	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1001168404Spjd
1002168404Spjd	/*
1003168404Spjd	 * Unmount any snapshots mounted under .zfs before unmounting the
1004168404Spjd	 * dataset itself.
1005168404Spjd	 */
1006169170Spjd	if (zfsvfs->z_ctldir != NULL) {
1007168404Spjd		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1008168404Spjd			return (ret);
1009191990Sattilio		ret = vflush(vfsp, 0, 0, curthread);
1010168404Spjd		ASSERT(ret == EBUSY);
1011168404Spjd		if (!(fflag & MS_FORCE)) {
1012168404Spjd			if (zfsvfs->z_ctldir->v_count > 1)
1013168404Spjd				return (EBUSY);
1014168404Spjd			ASSERT(zfsvfs->z_ctldir->v_count == 1);
1015168404Spjd		}
1016168404Spjd		zfsctl_destroy(zfsvfs);
1017168404Spjd		ASSERT(zfsvfs->z_ctldir == NULL);
1018168404Spjd	}
1019168404Spjd
1020168404Spjd	/*
1021168404Spjd	 * Flush all the files.
1022168404Spjd	 */
1023191990Sattilio	ret = vflush(vfsp, 1, (fflag & MS_FORCE) ? FORCECLOSE : 0, curthread);
1024168404Spjd	if (ret != 0) {
1025168404Spjd		if (!zfsvfs->z_issnap) {
1026168404Spjd			zfsctl_create(zfsvfs);
1027168404Spjd			ASSERT(zfsvfs->z_ctldir != NULL);
1028168404Spjd		}
1029168404Spjd		return (ret);
1030168404Spjd	}
1031168404Spjd
1032185029Spjd	if (!(fflag & MS_FORCE)) {
1033185029Spjd		/*
1034185029Spjd		 * Check the number of active vnodes in the file system.
1035185029Spjd		 * Our count is maintained in the vfs structure, but the
1036185029Spjd		 * number is off by 1 to indicate a hold on the vfs
1037185029Spjd		 * structure itself.
1038185029Spjd		 *
1039185029Spjd		 * The '.zfs' directory maintains a reference of its
1040185029Spjd		 * own, and any active references underneath are
1041185029Spjd		 * reflected in the vnode count.
1042185029Spjd		 */
1043185029Spjd		if (zfsvfs->z_ctldir == NULL) {
1044185029Spjd			if (vfsp->vfs_count > 1)
1045185029Spjd				return (EBUSY);
1046185029Spjd		} else {
1047185029Spjd			if (vfsp->vfs_count > 2 ||
1048185029Spjd			    zfsvfs->z_ctldir->v_count > 1)
1049185029Spjd				return (EBUSY);
1050185029Spjd		}
1051185029Spjd	} else {
1052168404Spjd		MNT_ILOCK(vfsp);
1053168404Spjd		vfsp->mnt_kern_flag |= MNTK_UNMOUNTF;
1054168404Spjd		MNT_IUNLOCK(vfsp);
1055185029Spjd	}
1056168404Spjd
1057185029Spjd	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1058185029Spjd	os = zfsvfs->z_os;
1059185029Spjd
1060185029Spjd	/*
1061185029Spjd	 * z_os will be NULL if there was an error in
1062185029Spjd	 * attempting to reopen zfsvfs.
1063185029Spjd	 */
1064185029Spjd	if (os != NULL) {
1065168404Spjd		/*
1066185029Spjd		 * Unset the objset user_ptr.
1067168404Spjd		 */
1068185029Spjd		mutex_enter(&os->os->os_user_ptr_lock);
1069185029Spjd		dmu_objset_set_user(os, NULL);
1070185029Spjd		mutex_exit(&os->os->os_user_ptr_lock);
1071185029Spjd
1072185029Spjd		/*
1073185029Spjd		 * Finally release the objset
1074185029Spjd		 */
1075185029Spjd		dmu_objset_close(os);
1076168404Spjd	}
1077168404Spjd
1078185029Spjd	/*
1079185029Spjd	 * We can now safely destroy the '.zfs' directory node.
1080185029Spjd	 */
1081185029Spjd	if (zfsvfs->z_ctldir != NULL)
1082185029Spjd		zfsctl_destroy(zfsvfs);
1083185029Spjd	if (zfsvfs->z_issnap) {
1084185029Spjd		vnode_t *svp = vfsp->mnt_vnodecovered;
1085185029Spjd
1086192211Skmacy		ASSERT(svp->v_count == 2 || svp->v_count == 1);
1087192211Skmacy		if (svp->v_count == 2)
1088192211Skmacy			VN_RELE(svp);
1089185029Spjd	}
1090168404Spjd	zfs_freevfs(vfsp);
1091168404Spjd
1092168404Spjd	return (0);
1093168404Spjd}
1094168404Spjd
1095168404Spjdstatic int
1096168404Spjdzfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
1097168404Spjd{
1098168404Spjd	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1099168404Spjd	znode_t		*zp;
1100168404Spjd	int 		err;
1101168404Spjd
1102168404Spjd	ZFS_ENTER(zfsvfs);
1103168404Spjd	err = zfs_zget(zfsvfs, ino, &zp);
1104168404Spjd	if (err == 0 && zp->z_unlinked) {
1105168404Spjd		VN_RELE(ZTOV(zp));
1106168404Spjd		err = EINVAL;
1107168404Spjd	}
1108168404Spjd	if (err != 0)
1109168404Spjd		*vpp = NULL;
1110168404Spjd	else {
1111168404Spjd		*vpp = ZTOV(zp);
1112175202Sattilio		vn_lock(*vpp, flags);
1113168404Spjd	}
1114168404Spjd	ZFS_EXIT(zfsvfs);
1115171063Sdfr	return (err);
1116168404Spjd}
1117168404Spjd
1118168404Spjdstatic int
1119168404Spjdzfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
1120168404Spjd{
1121168404Spjd	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1122168404Spjd	znode_t		*zp;
1123168404Spjd	uint64_t	object = 0;
1124168404Spjd	uint64_t	fid_gen = 0;
1125168404Spjd	uint64_t	gen_mask;
1126168404Spjd	uint64_t	zp_gen;
1127168404Spjd	int		i, err;
1128168404Spjd
1129168404Spjd	*vpp = NULL;
1130168404Spjd
1131168404Spjd	ZFS_ENTER(zfsvfs);
1132168404Spjd
1133168404Spjd	if (fidp->fid_len == LONG_FID_LEN) {
1134168404Spjd		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
1135168404Spjd		uint64_t	objsetid = 0;
1136168404Spjd		uint64_t	setgen = 0;
1137168404Spjd
1138168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1139168404Spjd			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1140168404Spjd
1141168404Spjd		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1142168404Spjd			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1143168404Spjd
1144168404Spjd		ZFS_EXIT(zfsvfs);
1145168404Spjd
1146168404Spjd		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1147168404Spjd		if (err)
1148168404Spjd			return (EINVAL);
1149168404Spjd		ZFS_ENTER(zfsvfs);
1150168404Spjd	}
1151168404Spjd
1152168404Spjd	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1153168404Spjd		zfid_short_t	*zfid = (zfid_short_t *)fidp;
1154168404Spjd
1155168404Spjd		for (i = 0; i < sizeof (zfid->zf_object); i++)
1156168404Spjd			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1157168404Spjd
1158168404Spjd		for (i = 0; i < sizeof (zfid->zf_gen); i++)
1159168404Spjd			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1160168404Spjd	} else {
1161168404Spjd		ZFS_EXIT(zfsvfs);
1162168404Spjd		return (EINVAL);
1163168404Spjd	}
1164168404Spjd
1165168404Spjd	/* A zero fid_gen means we are in the .zfs control directories */
1166168404Spjd	if (fid_gen == 0 &&
1167168404Spjd	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
1168168404Spjd		*vpp = zfsvfs->z_ctldir;
1169168404Spjd		ASSERT(*vpp != NULL);
1170168404Spjd		if (object == ZFSCTL_INO_SNAPDIR) {
1171168404Spjd			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
1172185029Spjd			    0, NULL, NULL, NULL, NULL, NULL) == 0);
1173168404Spjd		} else {
1174168404Spjd			VN_HOLD(*vpp);
1175168404Spjd		}
1176168404Spjd		ZFS_EXIT(zfsvfs);
1177169194Spjd		/* XXX: LK_RETRY? */
1178175202Sattilio		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1179168404Spjd		return (0);
1180168404Spjd	}
1181168404Spjd
1182168404Spjd	gen_mask = -1ULL >> (64 - 8 * i);
1183168404Spjd
1184168404Spjd	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
1185168404Spjd	if (err = zfs_zget(zfsvfs, object, &zp)) {
1186168404Spjd		ZFS_EXIT(zfsvfs);
1187168404Spjd		return (err);
1188168404Spjd	}
1189168404Spjd	zp_gen = zp->z_phys->zp_gen & gen_mask;
1190168404Spjd	if (zp_gen == 0)
1191168404Spjd		zp_gen = 1;
1192168404Spjd	if (zp->z_unlinked || zp_gen != fid_gen) {
1193168404Spjd		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
1194168404Spjd		VN_RELE(ZTOV(zp));
1195168404Spjd		ZFS_EXIT(zfsvfs);
1196168404Spjd		return (EINVAL);
1197168404Spjd	}
1198168404Spjd
1199168404Spjd	*vpp = ZTOV(zp);
1200169194Spjd	/* XXX: LK_RETRY? */
1201175202Sattilio	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1202185029Spjd	vnode_create_vobject(*vpp, zp->z_phys->zp_size, curthread);
1203168404Spjd	ZFS_EXIT(zfsvfs);
1204168404Spjd	return (0);
1205168404Spjd}
1206168404Spjd
1207185029Spjd/*
1208185029Spjd * Block out VOPs and close zfsvfs_t::z_os
1209185029Spjd *
1210185029Spjd * Note, if successful, then we return with the 'z_teardown_lock' and
1211185029Spjd * 'z_teardown_inactive_lock' write held.
1212185029Spjd */
1213185029Spjdint
1214185029Spjdzfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode)
1215168404Spjd{
1216185029Spjd	int error;
1217168404Spjd
1218185029Spjd	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1219185029Spjd		return (error);
1220168404Spjd
1221185029Spjd	*mode = zfsvfs->z_os->os_mode;
1222185029Spjd	dmu_objset_name(zfsvfs->z_os, name);
1223185029Spjd	dmu_objset_close(zfsvfs->z_os);
1224168404Spjd
1225185029Spjd	return (0);
1226185029Spjd}
1227168404Spjd
1228185029Spjd/*
1229185029Spjd * Reopen zfsvfs_t::z_os and release VOPs.
1230185029Spjd */
1231185029Spjdint
1232185029Spjdzfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
1233185029Spjd{
1234185029Spjd	int err;
1235168404Spjd
1236185029Spjd	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
1237185029Spjd	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
1238185029Spjd
1239185029Spjd	err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
1240185029Spjd	if (err) {
1241185029Spjd		zfsvfs->z_os = NULL;
1242185029Spjd	} else {
1243185029Spjd		znode_t *zp;
1244185029Spjd
1245185029Spjd		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
1246185029Spjd
1247185029Spjd		/*
1248185029Spjd		 * Attempt to re-establish all the active znodes with
1249185029Spjd		 * their dbufs.  If a zfs_rezget() fails, then we'll let
1250185029Spjd		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1251185029Spjd		 * when they try to use their znode.
1252185029Spjd		 */
1253185029Spjd		mutex_enter(&zfsvfs->z_znodes_lock);
1254185029Spjd		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1255185029Spjd		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1256185029Spjd			(void) zfs_rezget(zp);
1257185029Spjd		}
1258185029Spjd		mutex_exit(&zfsvfs->z_znodes_lock);
1259185029Spjd
1260168404Spjd	}
1261168404Spjd
1262185029Spjd	/* release the VOPs */
1263185029Spjd	rw_exit(&zfsvfs->z_teardown_inactive_lock);
1264185029Spjd	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1265185029Spjd
1266185029Spjd	if (err) {
1267185029Spjd		/*
1268185029Spjd		 * Since we couldn't reopen zfsvfs::z_os, force
1269185029Spjd		 * unmount this file system.
1270185029Spjd		 */
1271185029Spjd		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
1272185029Spjd			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
1273168404Spjd	}
1274185029Spjd	return (err);
1275168404Spjd}
1276168404Spjd
1277168404Spjdstatic void
1278168404Spjdzfs_freevfs(vfs_t *vfsp)
1279168404Spjd{
1280168404Spjd	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1281168404Spjd	int i;
1282168404Spjd
1283168404Spjd	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1284168404Spjd		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1285168404Spjd
1286185029Spjd	zfs_fuid_destroy(zfsvfs);
1287185029Spjd	zfs_freezfsvfs(zfsvfs);
1288185029Spjd
1289168404Spjd	atomic_add_32(&zfs_active_fs_count, -1);
1290168404Spjd}
1291168404Spjd
1292172135Spjd#ifdef __i386__
1293172135Spjdstatic int desiredvnodes_backup;
1294172135Spjd#endif
1295172135Spjd
1296172135Spjdstatic void
1297172135Spjdzfs_vnodes_adjust(void)
1298172135Spjd{
1299172135Spjd#ifdef __i386__
1300185029Spjd	int newdesiredvnodes;
1301172135Spjd
1302172135Spjd	desiredvnodes_backup = desiredvnodes;
1303172135Spjd
1304172135Spjd	/*
1305172135Spjd	 * We calculate newdesiredvnodes the same way it is done in
1306172135Spjd	 * vntblinit(). If it is equal to desiredvnodes, it means that
1307172135Spjd	 * it wasn't tuned by the administrator and we can tune it down.
1308172135Spjd	 */
1309185029Spjd	newdesiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 *
1310185029Spjd	    vm_kmem_size / (5 * (sizeof(struct vm_object) +
1311185029Spjd	    sizeof(struct vnode))));
1312185029Spjd	if (newdesiredvnodes == desiredvnodes)
1313185029Spjd		desiredvnodes = (3 * newdesiredvnodes) / 4;
1314172135Spjd#endif
1315172135Spjd}
1316172135Spjd
1317172135Spjdstatic void
1318172135Spjdzfs_vnodes_adjust_back(void)
1319172135Spjd{
1320172135Spjd
1321172135Spjd#ifdef __i386__
1322172135Spjd	desiredvnodes = desiredvnodes_backup;
1323172135Spjd#endif
1324172135Spjd}
1325172135Spjd
1326168404Spjdvoid
1327168404Spjdzfs_init(void)
1328168404Spjd{
1329168404Spjd
1330185029Spjd	printf("ZFS filesystem version " SPA_VERSION_STRING "\n");
1331168404Spjd
1332168404Spjd	/*
1333185029Spjd	 * Initialize znode cache, vnode ops, etc...
1334168404Spjd	 */
1335185029Spjd	zfs_znode_init();
1336168404Spjd
1337168404Spjd	/*
1338185029Spjd	 * Initialize .zfs directory structures
1339168404Spjd	 */
1340185029Spjd	zfsctl_init();
1341172135Spjd
1342172135Spjd	/*
1343185029Spjd	 * Reduce number of vnode. Originally number of vnodes is calculated
1344172135Spjd	 * with UFS inode in mind. We reduce it here, because it's too big for
1345172135Spjd	 * ZFS/i386.
1346172135Spjd	 */
1347172135Spjd	zfs_vnodes_adjust();
1348168404Spjd}
1349168404Spjd
1350168404Spjdvoid
1351168404Spjdzfs_fini(void)
1352168404Spjd{
1353168404Spjd	zfsctl_fini();
1354168404Spjd	zfs_znode_fini();
1355172135Spjd	zfs_vnodes_adjust_back();
1356168404Spjd}
1357168404Spjd
1358168404Spjdint
1359168404Spjdzfs_busy(void)
1360168404Spjd{
1361168404Spjd	return (zfs_active_fs_count != 0);
1362168404Spjd}
1363185029Spjd
1364185029Spjdint
1365185029Spjdzfs_set_version(const char *name, uint64_t newvers)
1366185029Spjd{
1367185029Spjd	int error;
1368185029Spjd	objset_t *os;
1369185029Spjd	dmu_tx_t *tx;
1370185029Spjd	uint64_t curvers;
1371185029Spjd
1372185029Spjd	/*
1373185029Spjd	 * XXX for now, require that the filesystem be unmounted.  Would
1374185029Spjd	 * be nice to find the zfsvfs_t and just update that if
1375185029Spjd	 * possible.
1376185029Spjd	 */
1377185029Spjd
1378185029Spjd	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
1379185029Spjd		return (EINVAL);
1380185029Spjd
1381185029Spjd	error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os);
1382185029Spjd	if (error)
1383185029Spjd		return (error);
1384185029Spjd
1385185029Spjd	error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1386185029Spjd	    8, 1, &curvers);
1387185029Spjd	if (error)
1388185029Spjd		goto out;
1389185029Spjd	if (newvers < curvers) {
1390185029Spjd		error = EINVAL;
1391185029Spjd		goto out;
1392185029Spjd	}
1393185029Spjd
1394185029Spjd	tx = dmu_tx_create(os);
1395185029Spjd	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR);
1396185029Spjd	error = dmu_tx_assign(tx, TXG_WAIT);
1397185029Spjd	if (error) {
1398185029Spjd		dmu_tx_abort(tx);
1399185029Spjd		goto out;
1400185029Spjd	}
1401185029Spjd	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
1402185029Spjd	    &newvers, tx);
1403185029Spjd
1404185029Spjd	spa_history_internal_log(LOG_DS_UPGRADE,
1405185029Spjd	    dmu_objset_spa(os), tx, CRED(),
1406185029Spjd	    "oldver=%llu newver=%llu dataset = %llu", curvers, newvers,
1407185029Spjd	    dmu_objset_id(os));
1408185029Spjd	dmu_tx_commit(tx);
1409185029Spjd
1410185029Spjdout:
1411185029Spjd	dmu_objset_close(os);
1412185029Spjd	return (error);
1413185029Spjd}
1414185029Spjd/*
1415185029Spjd * Read a property stored within the master node.
1416185029Spjd */
1417185029Spjdint
1418185029Spjdzfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
1419185029Spjd{
1420185029Spjd	const char *pname;
1421185029Spjd	int error = ENOENT;
1422185029Spjd
1423185029Spjd	/*
1424185029Spjd	 * Look up the file system's value for the property.  For the
1425185029Spjd	 * version property, we look up a slightly different string.
1426185029Spjd	 */
1427185029Spjd	if (prop == ZFS_PROP_VERSION)
1428185029Spjd		pname = ZPL_VERSION_STR;
1429185029Spjd	else
1430185029Spjd		pname = zfs_prop_to_name(prop);
1431185029Spjd
1432185029Spjd	if (os != NULL)
1433185029Spjd		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
1434185029Spjd
1435185029Spjd	if (error == ENOENT) {
1436185029Spjd		/* No value set, use the default value */
1437185029Spjd		switch (prop) {
1438185029Spjd		case ZFS_PROP_VERSION:
1439185029Spjd			*value = ZPL_VERSION;
1440185029Spjd			break;
1441185029Spjd		case ZFS_PROP_NORMALIZE:
1442185029Spjd		case ZFS_PROP_UTF8ONLY:
1443185029Spjd			*value = 0;
1444185029Spjd			break;
1445185029Spjd		case ZFS_PROP_CASE:
1446185029Spjd			*value = ZFS_CASE_SENSITIVE;
1447185029Spjd			break;
1448185029Spjd		default:
1449185029Spjd			return (error);
1450185029Spjd		}
1451185029Spjd		error = 0;
1452185029Spjd	}
1453185029Spjd	return (error);
1454185029Spjd}
1455