1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/param.h>
26#include <sys/errno.h>
27#include <sys/vfs.h>
28#include <sys/vfs_opreg.h>
29#include <sys/vnode.h>
30#include <sys/uio.h>
31#include <sys/pathname.h>
32#include <sys/kmem.h>
33#include <sys/cred.h>
34#include <sys/statvfs.h>
35#include <sys/fs/lofs_info.h>
36#include <sys/fs/lofs_node.h>
37#include <sys/mount.h>
38#include <sys/mntent.h>
39#include <sys/mkdev.h>
40#include <sys/priv.h>
41#include <sys/sysmacros.h>
42#include <sys/systm.h>
43#include <sys/cmn_err.h>
44#include <sys/policy.h>
45#include <sys/tsol/label.h>
46#include "fs/fs_subr.h"
47
48/*
49 * This is the loadable module wrapper.
50 */
51#include <sys/modctl.h>
52
53static mntopts_t lofs_mntopts;
54
55static int lofsinit(int, char *);
56
57static vfsdef_t vfw = {
58	VFSDEF_VERSION,
59	"lofs",
60	lofsinit,
61	VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
62	&lofs_mntopts
63};
64
65/*
66 * LOFS mount options table
67 */
68static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
69static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
70static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL };
71static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL };
72
73static mntopt_t mntopts[] = {
74/*
75 *	option name		cancel option	default arg	flags
76 *		private data
77 */
78	{ MNTOPT_XATTR,		xattr_cancel,	NULL,		0,
79		(void *)0 },
80	{ MNTOPT_NOXATTR,	noxattr_cancel,	NULL,		0,
81		(void *)0 },
82	{ MNTOPT_LOFS_SUB,	sub_cancel,	NULL,		0,
83		(void *)0 },
84	{ MNTOPT_LOFS_NOSUB,	nosub_cancel,	NULL,		0,
85		(void *)0 },
86};
87
88static mntopts_t lofs_mntopts = {
89	sizeof (mntopts) / sizeof (mntopt_t),
90	mntopts
91};
92
93/*
94 * Module linkage information for the kernel.
95 */
96
97static struct modlfs modlfs = {
98	&mod_fsops, "filesystem for lofs", &vfw
99};
100
101static struct modlinkage modlinkage = {
102	MODREV_1, (void *)&modlfs, NULL
103};
104
105/*
106 * This is the module initialization routine.
107 */
108
109int
110_init(void)
111{
112	int status;
113
114	lofs_subrinit();
115	status = mod_install(&modlinkage);
116	if (status != 0) {
117		/*
118		 * Cleanup previously initialized work.
119		 */
120		lofs_subrfini();
121	}
122
123	return (status);
124}
125
126/*
127 * Don't allow the lofs module to be unloaded for now.
128 * There is a memory leak if it gets unloaded.
129 */
130
131int
132_fini(void)
133{
134	return (EBUSY);
135}
136
137int
138_info(struct modinfo *modinfop)
139{
140	return (mod_info(&modlinkage, modinfop));
141}
142
143
144static int lofsfstype;
145vfsops_t *lo_vfsops;
146
147/*
148 * lo mount vfsop
149 * Set up mount info record and attach it to vfs struct.
150 */
151/*ARGSUSED*/
152static int
153lo_mount(struct vfs *vfsp,
154	struct vnode *vp,
155	struct mounta *uap,
156	struct cred *cr)
157{
158	int error;
159	struct vnode *srootvp = NULL;	/* the server's root */
160	struct vnode *realrootvp;
161	struct loinfo *li;
162	int nodev;
163
164	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
165
166	if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0)
167		return (EPERM);
168
169	/*
170	 * Loopback devices which get "nodevices" added can be done without
171	 * "nodevices" set because we cannot import devices into a zone
172	 * with loopback.  Note that we have all zone privileges when
173	 * this happens; if not, we'd have gotten "nosuid".
174	 */
175	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
176		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
177
178	mutex_enter(&vp->v_lock);
179	if (!(uap->flags & MS_OVERLAY) &&
180	    (vp->v_count != 1 || (vp->v_flag & VROOT))) {
181		mutex_exit(&vp->v_lock);
182		return (EBUSY);
183	}
184	mutex_exit(&vp->v_lock);
185
186	/*
187	 * Find real root, and make vfs point to real vfs
188	 */
189
190	if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
191	    UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))
192		return (error);
193
194	/*
195	 * Enforce MAC policy if needed.
196	 *
197	 * Loopback mounts must not allow writing up. The dominance test
198	 * is intended to prevent a global zone caller from accidentally
199	 * creating write-up conditions between two labeled zones.
200	 * Local zones can't violate MAC on their own without help from
201	 * the global zone because they can't name a pathname that
202	 * they don't already have.
203	 *
204	 * The special case check for the NET_MAC_AWARE process flag is
205	 * to support the case of the automounter in the global zone. We
206	 * permit automounting of local zone directories such as home
207	 * directories, into the global zone as required by setlabel,
208	 * zonecopy, and saving of desktop sessions. Such mounts are
209	 * trusted not to expose the contents of one zone's directories
210	 * to another by leaking them through the global zone.
211	 */
212	if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) {
213		char	specname[MAXPATHLEN];
214		zone_t	*from_zptr;
215		zone_t	*to_zptr;
216
217		if (vnodetopath(NULL, realrootvp, specname,
218		    sizeof (specname), CRED()) != 0) {
219			VN_RELE(realrootvp);
220			return (EACCES);
221		}
222
223		from_zptr = zone_find_by_path(specname);
224		to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
225
226		/*
227		 * Special case for scratch zones used for Live Upgrade:
228		 * this is used to mount the zone's root from /root to /a in
229		 * the scratch zone.  As with the other special case, this
230		 * appears to be outside of the zone because it's not under
231		 * the zone rootpath, which is $ZONEPATH/lu in the scratch
232		 * zone case.
233		 */
234
235		if (from_zptr != to_zptr &&
236		    !(to_zptr->zone_flags & ZF_IS_SCRATCH)) {
237			/*
238			 * We know at this point that the labels aren't equal
239			 * because the zone pointers aren't equal, and zones
240			 * can't share a label.
241			 *
242			 * If the source is the global zone then making
243			 * it available to a local zone must be done in
244			 * read-only mode as the label will become admin_low.
245			 *
246			 * If it is a mount between local zones then if
247			 * the current process is in the global zone and has
248			 * the NET_MAC_AWARE flag, then regular read-write
249			 * access is allowed.  If it's in some other zone, but
250			 * the label on the mount point dominates the original
251			 * source, then allow the mount as read-only
252			 * ("read-down").
253			 */
254			if (from_zptr->zone_id == GLOBAL_ZONEID) {
255				/* make the mount read-only */
256				vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
257			} else { /* cross-zone mount */
258				if (to_zptr->zone_id == GLOBAL_ZONEID &&
259				    /* LINTED: no consequent */
260				    getpflags(NET_MAC_AWARE, cr) != 0) {
261					/* Allow the mount as read-write */
262				} else if (bldominates(
263				    label2bslabel(to_zptr->zone_slabel),
264				    label2bslabel(from_zptr->zone_slabel))) {
265					/* make the mount read-only */
266					vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
267				} else {
268					VN_RELE(realrootvp);
269					zone_rele(to_zptr);
270					zone_rele(from_zptr);
271					return (EACCES);
272				}
273			}
274		}
275		zone_rele(to_zptr);
276		zone_rele(from_zptr);
277	}
278
279	/*
280	 * realrootvp may be an AUTOFS node, in which case we perform a
281	 * VOP_ACCESS() to trigger the mount of the intended filesystem.
282	 * This causes a loopback mount of the intended filesystem instead
283	 * of the AUTOFS filesystem.
284	 *
285	 * If a lofs mount creates a mount loop (such that a lofs vfs is
286	 * mounted on an autofs node and that lofs vfs points back to the
287	 * autofs node which it is mounted on) then a VOP_ACCESS call will
288	 * create a deadlock. Once this deadlock is released, VOP_ACCESS will
289	 * return EINTR. In such a case we don't want the lofs vfs to be
290	 * created as the loop could panic the system.
291	 */
292	if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) {
293		VN_RELE(realrootvp);
294		return (error);
295	}
296
297	/*
298	 * We're interested in the top most filesystem.
299	 * This is specially important when uap->spec is a trigger
300	 * AUTOFS node, since we're really interested in mounting the
301	 * filesystem AUTOFS mounted as result of the VOP_ACCESS()
302	 * call not the AUTOFS node itself.
303	 */
304	if (vn_mountedvfs(realrootvp) != NULL) {
305		if (error = traverse(&realrootvp)) {
306			VN_RELE(realrootvp);
307			return (error);
308		}
309	}
310
311	/*
312	 * Allocate a vfs info struct and attach it
313	 */
314	li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP);
315	li->li_realvfs = realrootvp->v_vfsp;
316	li->li_mountvfs = vfsp;
317
318	/*
319	 * Set mount flags to be inherited by loopback vfs's
320	 */
321	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
322		li->li_mflag |= VFS_RDONLY;
323	}
324	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
325		li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES);
326	}
327	if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
328		li->li_mflag |= VFS_NODEVICES;
329	}
330	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
331		li->li_mflag |= VFS_NOSETUID;
332	}
333	/*
334	 * Permissive flags are added to the "deny" bitmap.
335	 */
336	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
337		li->li_dflag |= VFS_XATTR;
338	}
339	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
340		li->li_dflag |= VFS_NBMAND;
341	}
342
343	/*
344	 * Propagate inheritable mount flags from the real vfs.
345	 */
346	if ((li->li_realvfs->vfs_flag & VFS_RDONLY) &&
347	    !vfs_optionisset(vfsp, MNTOPT_RO, NULL))
348		vfs_setmntopt(vfsp, MNTOPT_RO, NULL,
349		    VFS_NODISPLAY);
350	if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) &&
351	    !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
352		vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL,
353		    VFS_NODISPLAY);
354	if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) &&
355	    !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
356		vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL,
357		    VFS_NODISPLAY);
358	/*
359	 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags
360	 * such as VFS_RDONLY, are handled differently.  An explicit
361	 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR.
362	 */
363	if ((li->li_realvfs->vfs_flag & VFS_XATTR) &&
364	    !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) &&
365	    !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
366		vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL,
367		    VFS_NODISPLAY);
368	if ((li->li_realvfs->vfs_flag & VFS_NBMAND) &&
369	    !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) &&
370	    !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
371		vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL,
372		    VFS_NODISPLAY);
373
374	li->li_refct = 0;
375	vfsp->vfs_data = (caddr_t)li;
376	vfsp->vfs_bcount = 0;
377	vfsp->vfs_fstype = lofsfstype;
378	vfsp->vfs_bsize = li->li_realvfs->vfs_bsize;
379
380	vfsp->vfs_dev = li->li_realvfs->vfs_dev;
381	vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0];
382	vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1];
383
384	if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) {
385		li->li_flag |= LO_NOSUB;
386	}
387
388	/*
389	 * Propagate any VFS features
390	 */
391
392	vfs_propagate_features(li->li_realvfs, vfsp);
393
394	/*
395	 * Setup the hashtable. If the root of this mount isn't a directory,
396	 * there's no point in allocating a large hashtable. A table with one
397	 * bucket is sufficient.
398	 */
399	if (realrootvp->v_type != VDIR)
400		lsetup(li, 1);
401	else
402		lsetup(li, 0);
403
404	/*
405	 * Make the root vnode
406	 */
407	srootvp = makelonode(realrootvp, li, 0);
408	srootvp->v_flag |= VROOT;
409	li->li_rootvp = srootvp;
410
411#ifdef LODEBUG
412	lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n",
413	    vfsp, li->li_realvfs, srootvp, realrootvp, li);
414#endif
415	return (0);
416}
417
418/*
419 * Undo loopback mount
420 */
421static int
422lo_unmount(struct vfs *vfsp, int flag, struct cred *cr)
423{
424	struct loinfo *li;
425
426	if (secpolicy_fs_unmount(cr, vfsp) != 0)
427		return (EPERM);
428
429	/*
430	 * Forced unmount is not supported by this file system
431	 * and thus, ENOTSUP, is being returned.
432	 */
433	if (flag & MS_FORCE)
434		return (ENOTSUP);
435
436	li = vtoli(vfsp);
437#ifdef LODEBUG
438	lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li);
439#endif
440	if (li->li_refct != 1 || li->li_rootvp->v_count != 1) {
441#ifdef LODEBUG
442		lo_dprint(4, "refct %d v_ct %d\n", li->li_refct,
443		    li->li_rootvp->v_count);
444#endif
445		return (EBUSY);
446	}
447	VN_RELE(li->li_rootvp);
448	return (0);
449}
450
451/*
452 * Find root of lofs mount.
453 */
454static int
455lo_root(struct vfs *vfsp, struct vnode **vpp)
456{
457	*vpp = vtoli(vfsp)->li_rootvp;
458#ifdef LODEBUG
459	lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp);
460#endif
461	/*
462	 * If the root of the filesystem is a special file, return the specvp
463	 * version of the vnode. We don't save the specvp vnode in our
464	 * hashtable since that's exclusively for lnodes.
465	 */
466	if (IS_DEVVP(*vpp)) {
467		struct vnode *svp;
468
469		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred);
470		if (svp == NULL)
471			return (ENOSYS);
472		*vpp = svp;
473	} else {
474		VN_HOLD(*vpp);
475	}
476
477	return (0);
478}
479
480/*
481 * Get file system statistics.
482 */
483static int
484lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp)
485{
486	vnode_t *realrootvp;
487
488#ifdef LODEBUG
489	lo_dprint(4, "lostatvfs %p\n", vfsp);
490#endif
491	/*
492	 * Using realrootvp->v_vfsp (instead of the realvfsp that was
493	 * cached) is necessary to make lofs work woth forced UFS unmounts.
494	 * In the case of a forced unmount, UFS stores a set of dummy vfsops
495	 * in all the (i)vnodes in the filesystem. The dummy ops simply
496	 * returns back EIO.
497	 */
498	(void) lo_realvfs(vfsp, &realrootvp);
499	if (realrootvp != NULL)
500		return (VFS_STATVFS(realrootvp->v_vfsp, sbp));
501	else
502		return (EIO);
503}
504
505/*
506 * LOFS doesn't have any data or metadata to flush, pending I/O on the
507 * underlying filesystem will be flushed when such filesystem is synched.
508 */
509/* ARGSUSED */
510static int
511lo_sync(struct vfs *vfsp,
512	short flag,
513	struct cred *cr)
514{
515#ifdef LODEBUG
516	lo_dprint(4, "lo_sync: %p\n", vfsp);
517#endif
518	return (0);
519}
520
521/*
522 * Obtain the vnode from the underlying filesystem.
523 */
524static int
525lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
526{
527	vnode_t *realrootvp;
528
529#ifdef LODEBUG
530	lo_dprint(4, "lo_vget: %p\n", vfsp);
531#endif
532	(void) lo_realvfs(vfsp, &realrootvp);
533	if (realrootvp != NULL)
534		return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp));
535	else
536		return (EIO);
537}
538
539/*
540 * Free mount-specific data.
541 */
542static void
543lo_freevfs(struct vfs *vfsp)
544{
545	struct loinfo *li = vtoli(vfsp);
546
547	ldestroy(li);
548	kmem_free(li, sizeof (struct loinfo));
549}
550
551static int
552lofsinit(int fstyp, char *name)
553{
554	static const fs_operation_def_t lo_vfsops_template[] = {
555		VFSNAME_MOUNT,		{ .vfs_mount = lo_mount },
556		VFSNAME_UNMOUNT,	{ .vfs_unmount = lo_unmount },
557		VFSNAME_ROOT,		{ .vfs_root = lo_root },
558		VFSNAME_STATVFS,	{ .vfs_statvfs = lo_statvfs },
559		VFSNAME_SYNC,		{ .vfs_sync = lo_sync },
560		VFSNAME_VGET,		{ .vfs_vget = lo_vget },
561		VFSNAME_FREEVFS,	{ .vfs_freevfs = lo_freevfs },
562		NULL,			NULL
563	};
564	int error;
565
566	error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops);
567	if (error != 0) {
568		cmn_err(CE_WARN, "lofsinit: bad vfs ops template");
569		return (error);
570	}
571
572	error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops);
573	if (error != 0) {
574		(void) vfs_freevfsops_by_type(fstyp);
575		cmn_err(CE_WARN, "lofsinit: bad vnode ops template");
576		return (error);
577	}
578
579	lofsfstype = fstyp;
580
581	return (0);
582}
583