1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1992, 1993, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software donated to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35/*
36 * Null Layer
37 * (See null_vnops.c for a description of what this does.)
38 */
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/fcntl.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/mount.h>
47#include <sys/namei.h>
48#include <sys/proc.h>
49#include <sys/sysctl.h>
50#include <sys/vnode.h>
51#include <sys/jail.h>
52
53#include <fs/nullfs/null.h>
54
55static MALLOC_DEFINE(M_NULLFSMNT, "nullfs_mount", "NULLFS mount structure");
56
57static vfs_fhtovp_t	nullfs_fhtovp;
58static vfs_mount_t	nullfs_mount;
59static vfs_quotactl_t	nullfs_quotactl;
60static vfs_root_t	nullfs_root;
61static vfs_sync_t	nullfs_sync;
62static vfs_statfs_t	nullfs_statfs;
63static vfs_unmount_t	nullfs_unmount;
64static vfs_vget_t	nullfs_vget;
65static vfs_extattrctl_t	nullfs_extattrctl;
66
67SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs");
68
69static bool null_cache_vnodes = true;
70SYSCTL_BOOL(_vfs_nullfs, OID_AUTO, cache_vnodes, CTLFLAG_RWTUN,
71    &null_cache_vnodes, 0,
72    "cache free nullfs vnodes");
73
74/*
75 * Mount null layer
76 */
77static int
78nullfs_mount(struct mount *mp)
79{
80	struct vnode *lowerrootvp;
81	struct vnode *nullm_rootvp;
82	struct null_mount *xmp;
83	struct null_node *nn;
84	struct nameidata nd, *ndp;
85	char *target;
86	int error, len;
87	bool isvnunlocked;
88
89	NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
90
91	if (mp->mnt_flag & MNT_ROOTFS)
92		return (EOPNOTSUPP);
93
94	/*
95	 * Update is a no-op
96	 */
97	if (mp->mnt_flag & MNT_UPDATE) {
98		/*
99		 * Only support update mounts for NFS export.
100		 */
101		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
102			return (0);
103		else
104			return (EOPNOTSUPP);
105	}
106
107	/*
108	 * Get argument
109	 */
110	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len);
111	if (error != 0)
112		error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
113	if (error || target[len - 1] != '\0')
114		return (EINVAL);
115
116	/*
117	 * Unlock lower node to avoid possible deadlock.
118	 */
119	if (mp->mnt_vnodecovered->v_op == &null_vnodeops &&
120	    VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
121		VOP_UNLOCK(mp->mnt_vnodecovered);
122		isvnunlocked = true;
123	} else {
124		isvnunlocked = false;
125	}
126
127	/*
128	 * Find lower node
129	 */
130	ndp = &nd;
131	NDINIT(ndp, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, target);
132	error = namei(ndp);
133
134	/*
135	 * Re-lock vnode.
136	 * XXXKIB This is deadlock-prone as well.
137	 */
138	if (isvnunlocked)
139		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
140
141	if (error)
142		return (error);
143	NDFREE_PNBUF(ndp);
144
145	/*
146	 * Sanity check on lower vnode
147	 */
148	lowerrootvp = ndp->ni_vp;
149
150	/*
151	 * Check multi null mount to avoid `lock against myself' panic.
152	 */
153	if (mp->mnt_vnodecovered->v_op == &null_vnodeops) {
154		nn = VTONULL(mp->mnt_vnodecovered);
155		if (nn == NULL || lowerrootvp == nn->null_lowervp) {
156			NULLFSDEBUG("nullfs_mount: multi null mount?\n");
157			vput(lowerrootvp);
158			return (EDEADLK);
159		}
160	}
161
162	/*
163	 * Lower vnode must be the same type as the covered vnode - we
164	 * don't allow mounting directories to files or vice versa.
165	 */
166	if ((lowerrootvp->v_type != VDIR && lowerrootvp->v_type != VREG) ||
167	    lowerrootvp->v_type != mp->mnt_vnodecovered->v_type) {
168		NULLFSDEBUG("nullfs_mount: target must be same type as fspath");
169		vput(lowerrootvp);
170		return (EINVAL);
171	}
172
173	xmp = malloc(sizeof(struct null_mount), M_NULLFSMNT,
174	    M_WAITOK | M_ZERO);
175
176	/*
177	 * Save pointer to underlying FS and the reference to the
178	 * lower root vnode.
179	 */
180	xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
181	    &xmp->upper_node);
182	if (xmp->nullm_vfs == NULL) {
183		vput(lowerrootvp);
184		free(xmp, M_NULLFSMNT);
185		return (ENOENT);
186	}
187	vref(lowerrootvp);
188	xmp->nullm_lowerrootvp = lowerrootvp;
189	mp->mnt_data = xmp;
190
191	/*
192	 * Make sure the node alias worked.
193	 */
194	error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
195	if (error != 0) {
196		vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
197		vrele(lowerrootvp);
198		free(xmp, M_NULLFSMNT);
199		return (error);
200	}
201
202	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) {
203		MNT_ILOCK(mp);
204		mp->mnt_flag |= MNT_LOCAL;
205		MNT_IUNLOCK(mp);
206	}
207
208	if (vfs_getopt(mp->mnt_optnew, "cache", NULL, NULL) == 0) {
209		xmp->nullm_flags |= NULLM_CACHE;
210	} else if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) {
211		;
212	} else if (null_cache_vnodes &&
213	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
214		xmp->nullm_flags |= NULLM_CACHE;
215	}
216
217	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
218		vfs_register_for_notification(xmp->nullm_vfs, mp,
219		    &xmp->notify_node);
220	}
221
222	if (lowerrootvp == mp->mnt_vnodecovered) {
223		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
224		lowerrootvp->v_vflag |= VV_CROSSLOCK;
225		VOP_UNLOCK(lowerrootvp);
226	}
227
228	MNT_ILOCK(mp);
229	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
230		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
231		    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
232		    MNTK_EXTENDED_SHARED);
233	}
234	mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
235	mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
236	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
237	MNT_IUNLOCK(mp);
238	vfs_getnewfsid(mp);
239	vfs_mountedfrom(mp, target);
240	vput(nullm_rootvp);
241
242	NULLFSDEBUG("nullfs_mount: lower %s, alias at %s\n",
243		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
244	return (0);
245}
246
247/*
248 * Free reference to null layer
249 */
250static int
251nullfs_unmount(struct mount *mp, int mntflags)
252{
253	struct null_mount *mntdata;
254	int error, flags;
255
256	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
257
258	if (mntflags & MNT_FORCE)
259		flags = FORCECLOSE;
260	else
261		flags = 0;
262
263	for (;;) {
264		/* There is 1 extra root vnode reference (nullm_rootvp). */
265		error = vflush(mp, 0, flags, curthread);
266		if (error)
267			return (error);
268		MNT_ILOCK(mp);
269		if (mp->mnt_nvnodelistsize == 0) {
270			MNT_IUNLOCK(mp);
271			break;
272		}
273		MNT_IUNLOCK(mp);
274		if ((mntflags & MNT_FORCE) == 0)
275			return (EBUSY);
276	}
277
278	/*
279	 * Finally, throw away the null_mount structure
280	 */
281	mntdata = mp->mnt_data;
282	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
283		vfs_unregister_for_notification(mntdata->nullm_vfs,
284		    &mntdata->notify_node);
285	}
286	if (mntdata->nullm_lowerrootvp == mp->mnt_vnodecovered) {
287		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
288		mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
289		VOP_UNLOCK(mp->mnt_vnodecovered);
290	}
291	vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
292	vrele(mntdata->nullm_lowerrootvp);
293	mp->mnt_data = NULL;
294	free(mntdata, M_NULLFSMNT);
295	return (0);
296}
297
298static int
299nullfs_root(struct mount *mp, int flags, struct vnode **vpp)
300{
301	struct vnode *vp;
302	struct null_mount *mntdata;
303	int error;
304
305	mntdata = MOUNTTONULLMOUNT(mp);
306	NULLFSDEBUG("nullfs_root(mp = %p, vp = %p)\n", mp,
307	    mntdata->nullm_lowerrootvp);
308
309	error = vget(mntdata->nullm_lowerrootvp, flags);
310	if (error == 0) {
311		error = null_nodeget(mp, mntdata->nullm_lowerrootvp, &vp);
312		if (error == 0) {
313			*vpp = vp;
314		}
315	}
316	return (error);
317}
318
319static int
320nullfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy)
321{
322	struct mount *lowermp;
323	struct null_mount *mntdata;
324	int error;
325	bool unbusy;
326
327	mntdata = MOUNTTONULLMOUNT(mp);
328	lowermp = atomic_load_ptr(&mntdata->nullm_vfs);
329	KASSERT(*mp_busy == true, ("upper mount not busy"));
330	/*
331	 * See comment in sys_quotactl() for an explanation of why the
332	 * lower mount needs to be busied by the caller of VFS_QUOTACTL()
333	 * but may be unbusied by the implementation.  We must unbusy
334	 * the upper mount for the same reason; otherwise a namei lookup
335	 * issued by the VFS_QUOTACTL() implementation could traverse the
336	 * upper mount and deadlock.
337	 */
338	vfs_unbusy(mp);
339	*mp_busy = false;
340	unbusy = true;
341	error = vfs_busy(lowermp, 0);
342	if (error == 0)
343		error = VFS_QUOTACTL(lowermp, cmd, uid, arg, &unbusy);
344	if (unbusy)
345		vfs_unbusy(lowermp);
346
347	return (error);
348}
349
350static int
351nullfs_statfs(struct mount *mp, struct statfs *sbp)
352{
353	int error;
354	struct statfs *mstat;
355
356	NULLFSDEBUG("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
357	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
358	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
359
360	mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
361
362	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, mstat);
363	if (error) {
364		free(mstat, M_STATFS);
365		return (error);
366	}
367
368	/* now copy across the "interesting" information and fake the rest */
369	sbp->f_type = mstat->f_type;
370	sbp->f_flags &= MNT_RDONLY | MNT_NOEXEC | MNT_NOSUID | MNT_UNION |
371	    MNT_NOSYMFOLLOW | MNT_AUTOMOUNTED | MNT_EXPORTED | MNT_IGNORE;
372	mstat->f_flags &= ~(MNT_ROOTFS | MNT_AUTOMOUNTED | MNT_EXPORTED);
373	sbp->f_flags |= mstat->f_flags;
374	sbp->f_bsize = mstat->f_bsize;
375	sbp->f_iosize = mstat->f_iosize;
376	sbp->f_blocks = mstat->f_blocks;
377	sbp->f_bfree = mstat->f_bfree;
378	sbp->f_bavail = mstat->f_bavail;
379	sbp->f_files = mstat->f_files;
380	sbp->f_ffree = mstat->f_ffree;
381
382	free(mstat, M_STATFS);
383	return (0);
384}
385
386static int
387nullfs_sync(struct mount *mp, int waitfor)
388{
389	/*
390	 * XXX - Assumes no data cached at null layer.
391	 */
392	return (0);
393}
394
395static int
396nullfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
397{
398	int error;
399
400	KASSERT((flags & LK_TYPE_MASK) != 0,
401	    ("nullfs_vget: no lock requested"));
402
403	error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
404	if (error != 0)
405		return (error);
406	return (null_nodeget(mp, *vpp, vpp));
407}
408
409static int
410nullfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp)
411{
412	int error;
413
414	error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, flags,
415	    vpp);
416	if (error != 0)
417		return (error);
418	return (null_nodeget(mp, *vpp, vpp));
419}
420
421static int
422nullfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
423    int namespace, const char *attrname)
424{
425
426	return (VFS_EXTATTRCTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd,
427	    filename_vp, namespace, attrname));
428}
429
430static void
431nullfs_reclaim_lowervp(struct mount *mp, struct vnode *lowervp)
432{
433	struct vnode *vp;
434
435	vp = null_hashget(mp, lowervp);
436	if (vp == NULL)
437		return;
438	VTONULL(vp)->null_flags |= NULLV_NOUNLOCK;
439	vgone(vp);
440	vput(vp);
441}
442
443static void
444nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp)
445{
446	struct vnode *vp;
447	struct null_node *xp;
448
449	vp = null_hashget(mp, lowervp);
450	if (vp == NULL)
451		return;
452	xp = VTONULL(vp);
453	xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK;
454	vhold(vp);
455	vunref(vp);
456
457	if (vp->v_usecount == 0) {
458		/*
459		 * If vunref() dropped the last use reference on the
460		 * nullfs vnode, it must be reclaimed, and its lock
461		 * was split from the lower vnode lock.  Need to do
462		 * extra unlock before allowing the final vdrop() to
463		 * free the vnode.
464		 */
465		KASSERT(VN_IS_DOOMED(vp),
466		    ("not reclaimed nullfs vnode %p", vp));
467		VOP_UNLOCK(vp);
468	} else {
469		/*
470		 * Otherwise, the nullfs vnode still shares the lock
471		 * with the lower vnode, and must not be unlocked.
472		 * Also clear the NULLV_NOUNLOCK, the flag is not
473		 * relevant for future reclamations.
474		 */
475		ASSERT_VOP_ELOCKED(vp, "unlink_lowervp");
476		KASSERT(!VN_IS_DOOMED(vp),
477		    ("reclaimed nullfs vnode %p", vp));
478		xp->null_flags &= ~NULLV_NOUNLOCK;
479	}
480	vdrop(vp);
481}
482
483static struct vfsops null_vfsops = {
484	.vfs_extattrctl =	nullfs_extattrctl,
485	.vfs_fhtovp =		nullfs_fhtovp,
486	.vfs_init =		nullfs_init,
487	.vfs_mount =		nullfs_mount,
488	.vfs_quotactl =		nullfs_quotactl,
489	.vfs_root =		nullfs_root,
490	.vfs_statfs =		nullfs_statfs,
491	.vfs_sync =		nullfs_sync,
492	.vfs_uninit =		nullfs_uninit,
493	.vfs_unmount =		nullfs_unmount,
494	.vfs_vget =		nullfs_vget,
495	.vfs_reclaim_lowervp =	nullfs_reclaim_lowervp,
496	.vfs_unlink_lowervp =	nullfs_unlink_lowervp,
497};
498
499VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL | VFCF_FILEMOUNT);
500