null_vfsops.c revision 250852
1169689Skan/*-
2169689Skan * Copyright (c) 1992, 1993, 1995
3117395Skan *	The Regents of the University of California.  All rights reserved.
418334Speter *
5132718Skan * This code is derived from software donated to Berkeley by
618334Speter * Jan-Simon Pendry.
7132718Skan *
818334Speter * Redistribution and use in source and binary forms, with or without
918334Speter * modification, are permitted provided that the following conditions
1018334Speter * are met:
1118334Speter * 1. Redistributions of source code must retain the above copyright
12132718Skan *    notice, this list of conditions and the following disclaimer.
1318334Speter * 2. Redistributions in binary form must reproduce the above copyright
1418334Speter *    notice, this list of conditions and the following disclaimer in the
1518334Speter *    documentation and/or other materials provided with the distribution.
1618334Speter * 4. Neither the name of the University nor the names of its contributors
1718334Speter *    may be used to endorse or promote products derived from this software
18132718Skan *    without specific prior written permission.
19169689Skan *
20169689Skan * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2118334Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22169689Skan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23169689Skan * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26169689Skan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27169689Skan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2818334Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29169689Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30169689Skan * SUCH DAMAGE.
31169689Skan *
3218334Speter *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
33169689Skan *
34169689Skan * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
35169689Skan * $FreeBSD: head/sys/fs/nullfs/null_vfsops.c 250852 2013-05-21 11:31:56Z kib $
36169689Skan */
37169689Skan
38169689Skan/*
39169689Skan * Null Layer
40169689Skan * (See null_vnops.c for a description of what this does.)
41169689Skan */
4218334Speter
43169689Skan#include <sys/param.h>
44169689Skan#include <sys/systm.h>
45169689Skan#include <sys/fcntl.h>
4618334Speter#include <sys/kernel.h>
47169689Skan#include <sys/lock.h>
48169689Skan#include <sys/malloc.h>
49169689Skan#include <sys/mount.h>
5018334Speter#include <sys/namei.h>
51169689Skan#include <sys/proc.h>
52169689Skan#include <sys/vnode.h>
53169689Skan#include <sys/jail.h>
5418334Speter
55169689Skan#include <fs/nullfs/null.h>
56169689Skan
57169689Skanstatic MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
5818334Speter
59169689Skanstatic vfs_fhtovp_t	nullfs_fhtovp;
60169689Skanstatic vfs_mount_t	nullfs_mount;
61169689Skanstatic vfs_quotactl_t	nullfs_quotactl;
6218334Speterstatic vfs_root_t	nullfs_root;
63169689Skanstatic vfs_sync_t	nullfs_sync;
64169689Skanstatic vfs_statfs_t	nullfs_statfs;
65169689Skanstatic vfs_unmount_t	nullfs_unmount;
6618334Speterstatic vfs_vget_t	nullfs_vget;
67169689Skanstatic vfs_extattrctl_t	nullfs_extattrctl;
68169689Skan
69169689Skan/*
70169689Skan * Mount null layer
71169689Skan */
72169689Skanstatic int
73169689Skannullfs_mount(struct mount *mp)
74169689Skan{
75169689Skan	int error = 0;
7618334Speter	struct vnode *lowerrootvp, *vp;
77169689Skan	struct vnode *nullm_rootvp;
78169689Skan	struct null_mount *xmp;
79169689Skan	struct thread *td = curthread;
80169689Skan	char *target;
8118334Speter	int isvnunlocked = 0, len;
82169689Skan	struct nameidata nd, *ndp = &nd;
83169689Skan
84169689Skan	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
85169689Skan
8618334Speter	if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_NULLFS))
87169689Skan		return (EPERM);
88169689Skan	if (mp->mnt_flag & MNT_ROOTFS)
89169689Skan		return (EOPNOTSUPP);
90169689Skan
91169689Skan	/*
9218334Speter	 * Update is a no-op
93169689Skan	 */
94169689Skan	if (mp->mnt_flag & MNT_UPDATE) {
95169689Skan		/*
96169689Skan		 * Only support update mounts for NFS export.
97169689Skan		 */
98169689Skan		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
99169689Skan			return (0);
100169689Skan		else
101169689Skan			return (EOPNOTSUPP);
102169689Skan	}
103169689Skan
104169689Skan	/*
105169689Skan	 * Get argument
106169689Skan	 */
107169689Skan	error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
108169689Skan	if (error || target[len - 1] != '\0')
109169689Skan		return (EINVAL);
11018334Speter
111169689Skan	/*
112169689Skan	 * Unlock lower node to avoid possible deadlock.
113169689Skan	 */
114169689Skan	if ((mp->mnt_vnodecovered->v_op == &null_vnodeops) &&
115169689Skan	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
116169689Skan		VOP_UNLOCK(mp->mnt_vnodecovered, 0);
117169689Skan		isvnunlocked = 1;
118169689Skan	}
119169689Skan	/*
120169689Skan	 * Find lower node
121169689Skan	 */
122169689Skan	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target, curthread);
123169689Skan	error = namei(ndp);
12418334Speter
125169689Skan	/*
126169689Skan	 * Re-lock vnode.
127169689Skan	 * XXXKIB This is deadlock-prone as well.
128169689Skan	 */
129169689Skan	if (isvnunlocked)
130169689Skan		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
13118334Speter
132169689Skan	if (error)
13318334Speter		return (error);
134169689Skan	NDFREE(ndp, NDF_ONLY_PNBUF);
135169689Skan
136169689Skan	/*
13718334Speter	 * Sanity check on lower vnode
138169689Skan	 */
139169689Skan	lowerrootvp = ndp->ni_vp;
140169689Skan
14118334Speter	/*
142169689Skan	 * Check multi null mount to avoid `lock against myself' panic.
143169689Skan	 */
144169689Skan	if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
14518334Speter		NULLFSDEBUG("nullfs_mount: multi null mount?\n");
146169689Skan		vput(lowerrootvp);
147169689Skan		return (EDEADLK);
148169689Skan	}
14918334Speter
150169689Skan	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
151169689Skan	    M_NULLFSMNT, M_WAITOK | M_ZERO);
15218334Speter
153169689Skan	/*
154169689Skan	 * Save reference to underlying FS
155169689Skan	 */
156169689Skan	xmp->nullm_vfs = lowerrootvp->v_mount;
157169689Skan
158169689Skan	/*
159169689Skan	 * Save reference.  Each mount also holds
160169689Skan	 * a reference on the root vnode.
161169689Skan	 */
16218334Speter	error = null_nodeget(mp, lowerrootvp, &vp);
163169689Skan	/*
164169689Skan	 * Make sure the node alias worked
16518334Speter	 */
166169689Skan	if (error) {
167169689Skan		free(xmp, M_NULLFSMNT);
168169689Skan		return (error);
16918334Speter	}
170169689Skan
17118334Speter	/*
172169689Skan	 * Keep a held reference to the root vnode.
173169689Skan	 * It is vrele'd in nullfs_unmount.
174169689Skan	 */
17518334Speter	nullm_rootvp = vp;
176169689Skan	nullm_rootvp->v_vflag |= VV_ROOT;
177169689Skan	xmp->nullm_rootvp = nullm_rootvp;
178169689Skan
179	/*
180	 * Unlock the node (either the lower or the alias)
181	 */
182	VOP_UNLOCK(vp, 0);
183
184	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
185		MNT_ILOCK(mp);
186		mp->mnt_flag |= MNT_LOCAL;
187		MNT_IUNLOCK(mp);
188	}
189
190	xmp->nullm_flags |= NULLM_CACHE;
191	if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0)
192		xmp->nullm_flags &= ~NULLM_CACHE;
193
194	MNT_ILOCK(mp);
195	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
196		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
197		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
198		    MNTK_EXTENDED_SHARED);
199	}
200	mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT;
201	MNT_IUNLOCK(mp);
202	mp->mnt_data = xmp;
203	vfs_getnewfsid(mp);
204	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
205		MNT_ILOCK(xmp->nullm_vfs);
206		TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
207		    mnt_upper_link);
208		MNT_IUNLOCK(xmp->nullm_vfs);
209	}
210
211	vfs_mountedfrom(mp, target);
212
213	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
214		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
215	return (0);
216}
217
218/*
219 * Free reference to null layer
220 */
221static int
222nullfs_unmount(mp, mntflags)
223	struct mount *mp;
224	int mntflags;
225{
226	struct null_mount *mntdata;
227	struct mount *ump;
228	int error, flags;
229
230	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
231
232	if (mntflags & MNT_FORCE)
233		flags = FORCECLOSE;
234	else
235		flags = 0;
236
237	/* There is 1 extra root vnode reference (nullm_rootvp). */
238	error = vflush(mp, 1, flags, curthread);
239	if (error)
240		return (error);
241
242	/*
243	 * Finally, throw away the null_mount structure
244	 */
245	mntdata = mp->mnt_data;
246	ump = mntdata->nullm_vfs;
247	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
248		MNT_ILOCK(ump);
249		while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
250			ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
251			msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
252		}
253		TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
254		MNT_IUNLOCK(ump);
255	}
256	mp->mnt_data = NULL;
257	free(mntdata, M_NULLFSMNT);
258	return (0);
259}
260
261static int
262nullfs_root(mp, flags, vpp)
263	struct mount *mp;
264	int flags;
265	struct vnode **vpp;
266{
267	struct vnode *vp;
268
269	NULLFSDEBUG("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
270	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
271	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
272
273	/*
274	 * Return locked reference to root.
275	 */
276	vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
277	VREF(vp);
278
279	ASSERT_VOP_UNLOCKED(vp, "root vnode is locked");
280	vn_lock(vp, flags | LK_RETRY);
281	*vpp = vp;
282	return 0;
283}
284
285static int
286nullfs_quotactl(mp, cmd, uid, arg)
287	struct mount *mp;
288	int cmd;
289	uid_t uid;
290	void *arg;
291{
292	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg);
293}
294
295static int
296nullfs_statfs(mp, sbp)
297	struct mount *mp;
298	struct statfs *sbp;
299{
300	int error;
301	struct statfs mstat;
302
303	NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
304	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
305	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
306
307	bzero(&mstat, sizeof(mstat));
308
309	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat);
310	if (error)
311		return (error);
312
313	/* now copy across the "interesting" information and fake the rest */
314	sbp->f_type = mstat.f_type;
315	sbp->f_flags = (sbp->f_flags & (MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID |
316	    MNT_UNION | MNT_NOSYMFOLLOW)) | (mstat.f_flags & ~MNT_ROOTFS);
317	sbp->f_bsize = mstat.f_bsize;
318	sbp->f_iosize = mstat.f_iosize;
319	sbp->f_blocks = mstat.f_blocks;
320	sbp->f_bfree = mstat.f_bfree;
321	sbp->f_bavail = mstat.f_bavail;
322	sbp->f_files = mstat.f_files;
323	sbp->f_ffree = mstat.f_ffree;
324	return (0);
325}
326
327static int
328nullfs_sync(mp, waitfor)
329	struct mount *mp;
330	int waitfor;
331{
332	/*
333	 * XXX - Assumes no data cached at null layer.
334	 */
335	return (0);
336}
337
338static int
339nullfs_vget(mp, ino, flags, vpp)
340	struct mount *mp;
341	ino_t ino;
342	int flags;
343	struct vnode **vpp;
344{
345	int error;
346
347	KASSERT((flags & LK_TYPE_MASK) != 0,
348	    ("nullfs_vget: no lock requested"));
349
350	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
351	if (error != 0)
352		return (error);
353	return (null_nodeget(mp, *vpp, vpp));
354}
355
356static int
357nullfs_fhtovp(mp, fidp, flags, vpp)
358	struct mount *mp;
359	struct fid *fidp;
360	int flags;
361	struct vnode **vpp;
362{
363	int error;
364
365	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
366	    vpp);
367	if (error != 0)
368		return (error);
369	return (null_nodeget(mp, *vpp, vpp));
370}
371
372static int
373nullfs_extattrctl(mp, cmd, filename_vp, namespace, attrname)
374	struct mount *mp;
375	int cmd;
376	struct vnode *filename_vp;
377	int namespace;
378	const char *attrname;
379{
380
381	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
382	    filename_vp, namespace, attrname));
383}
384
385static void
386nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
387{
388	struct vnode *vp;
389
390	vp = null_hashget(mp, lowervp);
391	if (vp == NULL)
392		return;
393	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
394	vgone(vp);
395	vput(vp);
396}
397
398static void
399nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
400{
401	struct vnode *vp;
402	struct null_node *xp;
403
404	vp = null_hashget(mp, lowervp);
405	if (vp == NULL)
406		return;
407	xp = VTONULL(vp);
408	xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
409	vhold(vp);
410	vunref(vp);
411
412	if (vp->v_usecount == 0) {
413		/*
414		 * If vunref() dropped the last use reference on the
415		 * nullfs vnode, it must be reclaimed, and its lock
416		 * was split from the lower vnode lock.  Need to do
417		 * extra unlock before allowing the final vdrop() to
418		 * free the vnode.
419		 */
420		KASSERT((vp->v_iflag & VI_DOOMED) != 0,
421		    ("not reclaimed nullfs vnode %p", vp));
422		VOP_UNLOCK(vp, 0);
423	} else {
424		/*
425		 * Otherwise, the nullfs vnode still shares the lock
426		 * with the lower vnode, and must not be unlocked.
427		 * Also clear the NULLV_NOUNLOCK, the flag is not
428		 * relevant for future reclamations.
429		 */
430		ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
431		KASSERT((vp->v_iflag & VI_DOOMED) == 0,
432		    ("reclaimed nullfs vnode %p", vp));
433		xp->null_flags &= ~NULLV_NOUNLOCK;
434	}
435	vdrop(vp);
436}
437
438static struct vfsops null_vfsops = {
439	.vfs_extattrctl =	nullfs_extattrctl,
440	.vfs_fhtovp =		nullfs_fhtovp,
441	.vfs_init =		nullfs_init,
442	.vfs_mount =		nullfs_mount,
443	.vfs_quotactl =		nullfs_quotactl,
444	.vfs_root =		nullfs_root,
445	.vfs_statfs =		nullfs_statfs,
446	.vfs_sync =		nullfs_sync,
447	.vfs_uninit =		nullfs_uninit,
448	.vfs_unmount =		nullfs_unmount,
449	.vfs_vget =		nullfs_vget,
450	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
451	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
452};
453
454VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL);
455