1/*
2 * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/types.h>
28#include <sys/param.h>
29#include <sys/kernel.h>
30#include <sys/systm.h>
31#include <sys/malloc.h>
32#include <sys/mount.h>
33#include <sys/cred.h>
34#include <sys/vfs.h>
35#include <sys/priv.h>
36#include <sys/libkern.h>
37
38#include <sys/mutex.h>
39#include <sys/vnode.h>
40#include <sys/taskq.h>
41
42#include <sys/ccompat.h>
43
44MALLOC_DECLARE(M_MOUNT);
45
46void
47vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
48    int flags __unused)
49{
50	struct vfsopt *opt;
51	size_t namesize;
52	int locked;
53
54	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
55		MNT_ILOCK(vfsp);
56
57	if (vfsp->mnt_opt == NULL) {
58		void *opts;
59
60		MNT_IUNLOCK(vfsp);
61		opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
62		MNT_ILOCK(vfsp);
63		if (vfsp->mnt_opt == NULL) {
64			vfsp->mnt_opt = opts;
65			TAILQ_INIT(vfsp->mnt_opt);
66		} else {
67			free(opts, M_MOUNT);
68		}
69	}
70
71	MNT_IUNLOCK(vfsp);
72
73	opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
74	namesize = strlen(name) + 1;
75	opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
76	strlcpy(opt->name, name, namesize);
77	opt->pos = -1;
78	opt->seen = 1;
79	if (arg == NULL) {
80		opt->value = NULL;
81		opt->len = 0;
82	} else {
83		opt->len = strlen(arg) + 1;
84		opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
85		memcpy(opt->value, arg, opt->len);
86	}
87
88	MNT_ILOCK(vfsp);
89	TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
90	if (!locked)
91		MNT_IUNLOCK(vfsp);
92}
93
94void
95vfs_clearmntopt(vfs_t *vfsp, const char *name)
96{
97	int locked;
98
99	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
100		MNT_ILOCK(vfsp);
101	vfs_deleteopt(vfsp->mnt_opt, name);
102	if (!locked)
103		MNT_IUNLOCK(vfsp);
104}
105
106int
107vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
108{
109	struct vfsoptlist *opts = vfsp->mnt_optnew;
110	int error;
111
112	if (opts == NULL)
113		return (0);
114	error = vfs_getopt(opts, opt, (void **)argp, NULL);
115	return (error != 0 ? 0 : 1);
116}
117
118int
119mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
120    char *fspec, int fsflags, vfs_t *parent_vfsp)
121{
122	struct vfsconf *vfsp;
123	struct mount *mp;
124	vnode_t *vp, *mvp;
125	int error;
126
127	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
128
129	vp = *vpp;
130	*vpp = NULL;
131	error = 0;
132
133	/*
134	 * Be ultra-paranoid about making sure the type and fspath
135	 * variables will fit in our mp buffers, including the
136	 * terminating NUL.
137	 */
138	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
139		error = ENAMETOOLONG;
140	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
141		error = ENODEV;
142	if (error == 0 && vp->v_type != VDIR)
143		error = ENOTDIR;
144	/*
145	 * We need vnode lock to protect v_mountedhere and vnode interlock
146	 * to protect v_iflag.
147	 */
148	if (error == 0) {
149		VI_LOCK(vp);
150		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
151			vp->v_iflag |= VI_MOUNT;
152		else
153			error = EBUSY;
154		VI_UNLOCK(vp);
155	}
156	if (error != 0) {
157		vput(vp);
158		return (error);
159	}
160	vn_seqc_write_begin(vp);
161	VOP_UNLOCK1(vp);
162
163	/*
164	 * Allocate and initialize the filesystem.
165	 * We don't want regular user that triggered snapshot mount to be able
166	 * to unmount it, so pass credentials of the parent mount.
167	 */
168	mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
169
170	mp->mnt_optnew = NULL;
171	vfs_setmntopt(mp, "from", fspec, 0);
172	mp->mnt_optnew = mp->mnt_opt;
173	mp->mnt_opt = NULL;
174
175	/*
176	 * Set the mount level flags.
177	 */
178	mp->mnt_flag = fsflags & MNT_UPDATEMASK;
179	/*
180	 * Snapshots are always read-only.
181	 */
182	mp->mnt_flag |= MNT_RDONLY;
183	/*
184	 * We don't want snapshots to allow access to vulnerable setuid
185	 * programs, so we turn off setuid when mounting snapshots.
186	 */
187	mp->mnt_flag |= MNT_NOSUID;
188	/*
189	 * We don't want snapshots to be visible in regular
190	 * mount(8) and df(1) output.
191	 */
192	mp->mnt_flag |= MNT_IGNORE;
193
194	error = VFS_MOUNT(mp);
195	if (error != 0) {
196		/*
197		 * Clear VI_MOUNT and decrement the use count "atomically",
198		 * under the vnode lock.  This is not strictly required,
199		 * but makes it easier to reason about the life-cycle and
200		 * ownership of the covered vnode.
201		 */
202		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
203		VI_LOCK(vp);
204		vp->v_iflag &= ~VI_MOUNT;
205		VI_UNLOCK(vp);
206		vn_seqc_write_end(vp);
207		vput(vp);
208		vfs_unbusy(mp);
209		vfs_freeopts(mp->mnt_optnew);
210		mp->mnt_vnodecovered = NULL;
211		vfs_mount_destroy(mp);
212		return (error);
213	}
214
215	if (mp->mnt_opt != NULL)
216		vfs_freeopts(mp->mnt_opt);
217	mp->mnt_opt = mp->mnt_optnew;
218	(void) VFS_STATFS(mp, &mp->mnt_stat);
219
220#ifdef VFS_SUPPORTS_EXJAIL_CLONE
221	/*
222	 * Clone the mnt_exjail credentials of the parent, as required.
223	 */
224	vfs_exjail_clone(parent_vfsp, mp);
225#endif
226
227	/*
228	 * Prevent external consumers of mount options from reading
229	 * mnt_optnew.
230	 */
231	mp->mnt_optnew = NULL;
232
233	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
234#ifdef FREEBSD_NAMECACHE
235	cache_purge(vp);
236#endif
237	VI_LOCK(vp);
238	vp->v_iflag &= ~VI_MOUNT;
239#ifdef VIRF_MOUNTPOINT
240	vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
241#endif
242	vp->v_mountedhere = mp;
243	VI_UNLOCK(vp);
244	/* Put the new filesystem on the mount list. */
245	mtx_lock(&mountlist_mtx);
246	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
247	mtx_unlock(&mountlist_mtx);
248	vfs_event_signal(NULL, VQ_MOUNT, 0);
249	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
250		panic("mount: lost mount");
251	vn_seqc_write_end(vp);
252	VOP_UNLOCK1(vp);
253#if __FreeBSD_version >= 1300048
254	vfs_op_exit(mp);
255#endif
256	vfs_unbusy(mp);
257	*vpp = mvp;
258	return (0);
259}
260
261/*
262 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
263 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
264 * the file system as a result of releasing the vnode. Note, file systems
265 * already have to handle the race where the vnode is incremented before the
266 * inactive routine is called and does its locking.
267 *
268 * Warning: Excessive use of this routine can lead to performance problems.
269 * This is because taskqs throttle back allocation if too many are created.
270 */
271void
272vn_rele_async(vnode_t *vp, taskq_t *taskq)
273{
274	VERIFY3U(vp->v_usecount, >, 0);
275	if (refcount_release_if_not_last(&vp->v_usecount)) {
276#if __FreeBSD_version < 1300045
277		vdrop(vp);
278#endif
279		return;
280	}
281	VERIFY3U(taskq_dispatch((taskq_t *)taskq,
282	    (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
283}
284