1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/errno.h>
31#include <sys/vnode.h>
32#include <sys/vfs.h>
33#include <sys/vfs_opreg.h>
34#include <sys/uio.h>
35#include <sys/cred.h>
36#include <sys/pathname.h>
37#include <sys/debug.h>
38#include <sys/fs/lofs_node.h>
39#include <sys/fs/lofs_info.h>
40#include <fs/fs_subr.h>
41#include <vm/as.h>
42#include <vm/seg.h>
43
44/*
45 * These are the vnode ops routines which implement the vnode interface to
46 * the looped-back file system.  These routines just take their parameters,
47 * and then calling the appropriate real vnode routine(s) to do the work.
48 */
49
50static int
51lo_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
52{
53	vnode_t *vp = *vpp;
54	vnode_t *rvp;
55	vnode_t *oldvp;
56	int error;
57
58#ifdef LODEBUG
59	lo_dprint(4, "lo_open vp %p cnt=%d realvp %p cnt=%d\n",
60	    vp, vp->v_count, realvp(vp), realvp(vp)->v_count);
61#endif
62
63	oldvp = vp;
64	vp = rvp = realvp(vp);
65	/*
66	 * Need to hold new reference to vp since VOP_OPEN() may
67	 * decide to release it.
68	 */
69	VN_HOLD(vp);
70	error = VOP_OPEN(&rvp, flag, cr, ct);
71
72	if (!error && rvp != vp) {
73		/*
74		 * the FS which we called should have released the
75		 * new reference on vp
76		 */
77		*vpp = makelonode(rvp, vtoli(oldvp->v_vfsp), 0);
78		if ((*vpp)->v_type == VDIR) {
79			/*
80			 * Copy over any looping flags to the new lnode.
81			 */
82			(vtol(*vpp))->lo_looping |= (vtol(oldvp))->lo_looping;
83		}
84		if (IS_DEVVP(*vpp)) {
85			vnode_t *svp;
86
87			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
88			VN_RELE(*vpp);
89			if (svp == NULL)
90				error = ENOSYS;
91			else
92				*vpp = svp;
93		}
94		VN_RELE(oldvp);
95	} else {
96		ASSERT(rvp->v_count > 1);
97		VN_RELE(rvp);
98	}
99
100	return (error);
101}
102
103static int
104lo_close(
105	vnode_t *vp,
106	int flag,
107	int count,
108	offset_t offset,
109	struct cred *cr,
110	caller_context_t *ct)
111{
112#ifdef LODEBUG
113	lo_dprint(4, "lo_close vp %p realvp %p\n", vp, realvp(vp));
114#endif
115	vp = realvp(vp);
116	return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
117}
118
119static int
120lo_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
121	caller_context_t *ct)
122{
123#ifdef LODEBUG
124	lo_dprint(4, "lo_read vp %p realvp %p\n", vp, realvp(vp));
125#endif
126	vp = realvp(vp);
127	return (VOP_READ(vp, uiop, ioflag, cr, ct));
128}
129
130static int
131lo_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
132	caller_context_t *ct)
133{
134#ifdef LODEBUG
135	lo_dprint(4, "lo_write vp %p realvp %p\n", vp, realvp(vp));
136#endif
137	vp = realvp(vp);
138	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
139}
140
141static int
142lo_ioctl(
143	vnode_t *vp,
144	int cmd,
145	intptr_t arg,
146	int flag,
147	struct cred *cr,
148	int *rvalp,
149	caller_context_t *ct)
150{
151#ifdef LODEBUG
152	lo_dprint(4, "lo_ioctl vp %p realvp %p\n", vp, realvp(vp));
153#endif
154	vp = realvp(vp);
155	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
156}
157
158static int
159lo_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
160{
161	vp = realvp(vp);
162	return (VOP_SETFL(vp, oflags, nflags, cr, ct));
163}
164
165static int
166lo_getattr(
167	vnode_t *vp,
168	struct vattr *vap,
169	int flags,
170	struct cred *cr,
171	caller_context_t *ct)
172{
173	int error;
174
175#ifdef LODEBUG
176	lo_dprint(4, "lo_getattr vp %p realvp %p\n", vp, realvp(vp));
177#endif
178	if (error = VOP_GETATTR(realvp(vp), vap, flags, cr, ct))
179		return (error);
180
181	return (0);
182}
183
184static int
185lo_setattr(
186	vnode_t *vp,
187	struct vattr *vap,
188	int flags,
189	struct cred *cr,
190	caller_context_t *ct)
191{
192#ifdef LODEBUG
193	lo_dprint(4, "lo_setattr vp %p realvp %p\n", vp, realvp(vp));
194#endif
195	vp = realvp(vp);
196	return (VOP_SETATTR(vp, vap, flags, cr, ct));
197}
198
199static int
200lo_access(
201	vnode_t *vp,
202	int mode,
203	int flags,
204	struct cred *cr,
205	caller_context_t *ct)
206{
207#ifdef LODEBUG
208	lo_dprint(4, "lo_access vp %p realvp %p\n", vp, realvp(vp));
209#endif
210	if (mode & VWRITE) {
211		if (vp->v_type == VREG && vn_is_readonly(vp))
212			return (EROFS);
213	}
214	vp = realvp(vp);
215	return (VOP_ACCESS(vp, mode, flags, cr, ct));
216}
217
218static int
219lo_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
220{
221#ifdef LODEBUG
222	lo_dprint(4, "lo_fsync vp %p realvp %p\n", vp, realvp(vp));
223#endif
224	vp = realvp(vp);
225	return (VOP_FSYNC(vp, syncflag, cr, ct));
226}
227
228/*ARGSUSED*/
229static void
230lo_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
231{
232#ifdef LODEBUG
233	lo_dprint(4, "lo_inactive %p, realvp %p\n", vp, realvp(vp));
234#endif
235	freelonode(vtol(vp));
236}
237
238/* ARGSUSED */
239static int
240lo_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
241{
242#ifdef LODEBUG
243	lo_dprint(4, "lo_fid %p, realvp %p\n", vp, realvp(vp));
244#endif
245	vp = realvp(vp);
246	return (VOP_FID(vp, fidp, ct));
247}
248
249/*
250 * Given a vnode of lofs type, lookup nm name and
251 * return a shadow vnode (of lofs type) of the
252 * real vnode found.
253 *
254 * Due to the nature of lofs, there is a potential
255 * looping in path traversal.
256 *
257 * starting from the mount point of an lofs;
258 * a loop is defined to be a traversal path
259 * where the mount point or the real vnode of
260 * the root of this lofs is encountered twice.
261 * Once at the start of traversal and second
262 * when the looping is found.
263 *
264 * When a loop is encountered, a shadow of the
265 * covered vnode is returned to stop the looping.
266 *
267 * This normally works, but with the advent of
268 * the new automounter, returning the shadow of the
269 * covered vnode (autonode, in this case) does not
270 * stop the loop.  Because further lookup on this
271 * lonode will cause the autonode to call lo_lookup()
272 * on the lonode covering it.
273 *
274 * example "/net/jurassic/net/jurassic" is a loop.
275 * returning the shadow of the autonode corresponding to
276 * "/net/jurassic/net/jurassic" will not terminate the
277 * loop.   To solve this problem we allow the loop to go
278 * through one more level component lookup.  Whichever
279 * directory is then looked up in "/net/jurassic/net/jurassic"
280 * the vnode returned is the vnode covered by the autonode
281 * "net" and this will terminate the loop.
282 *
283 * Lookup for dot dot has to be dealt with separately.
284 * It will be nice to have a "one size fits all" kind
285 * of solution, so that we don't have so many ifs statement
286 * in the lo_lookup() to handle dotdot.  But, since
287 * there are so many special cases to handle different
288 * kinds looping above, we need special codes to handle
289 * dotdot lookup as well.
290 */
291static int
292lo_lookup(
293	vnode_t *dvp,
294	char *nm,
295	vnode_t **vpp,
296	struct pathname *pnp,
297	int flags,
298	vnode_t *rdir,
299	struct cred *cr,
300	caller_context_t *ct,
301	int *direntflags,
302	pathname_t *realpnp)
303{
304	vnode_t *vp = NULL, *tvp = NULL, *nonlovp;
305	int error, is_indirectloop;
306	vnode_t *realdvp = realvp(dvp);
307	struct loinfo *li = vtoli(dvp->v_vfsp);
308	int looping = 0;
309	int autoloop = 0;
310	int doingdotdot = 0;
311	int nosub = 0;
312	int mkflag = 0;
313
314	/*
315	 * If name is empty and no XATTR flags are set, then return
316	 * dvp (empty name == lookup ".").  If an XATTR flag is set
317	 * then we need to call VOP_LOOKUP to get the xattr dir.
318	 */
319	if (nm[0] == '\0' && ! (flags & (CREATE_XATTR_DIR|LOOKUP_XATTR))) {
320		VN_HOLD(dvp);
321		*vpp = dvp;
322		return (0);
323	}
324
325	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
326		doingdotdot++;
327		/*
328		 * Handle ".." out of mounted filesystem
329		 */
330		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
331			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
332			ASSERT(realdvp != NULL);
333		}
334	}
335
336	*vpp = NULL;	/* default(error) case */
337
338	/*
339	 * Do the normal lookup
340	 */
341	if (error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
342	    ct, direntflags, realpnp)) {
343		vp = NULL;
344		goto out;
345	}
346
347	/*
348	 * We do this check here to avoid returning a stale file handle to the
349	 * caller.
350	 */
351	if (nm[0] == '.' && nm[1] == '\0') {
352		ASSERT(vp == realdvp);
353		VN_HOLD(dvp);
354		VN_RELE(vp);
355		*vpp = dvp;
356		return (0);
357	}
358
359	if (doingdotdot) {
360		if ((vtol(dvp))->lo_looping & LO_LOOPING) {
361			vfs_t *vfsp;
362
363			error = vn_vfsrlock_wait(realdvp);
364			if (error)
365				goto out;
366			vfsp = vn_mountedvfs(realdvp);
367			/*
368			 * In the standard case if the looping flag is set and
369			 * performing dotdot we would be returning from a
370			 * covered vnode, implying vfsp could not be null. The
371			 * exceptions being if we have looping and overlay
372			 * mounts or looping and covered file systems.
373			 */
374			if (vfsp == NULL) {
375				/*
376				 * Overlay mount or covered file system,
377				 * so just make the shadow node.
378				 */
379				vn_vfsunlock(realdvp);
380				*vpp = makelonode(vp, li, 0);
381				(vtol(*vpp))->lo_looping |= LO_LOOPING;
382				return (0);
383			}
384			/*
385			 * When looping get the actual found vnode
386			 * instead of the vnode covered.
387			 * Here we have to hold the lock for realdvp
388			 * since an unmount during the traversal to the
389			 * root vnode would turn *vfsp into garbage
390			 * which would be fatal.
391			 */
392			error = VFS_ROOT(vfsp, &tvp);
393			vn_vfsunlock(realdvp);
394
395			if (error)
396				goto out;
397
398			if ((tvp == li->li_rootvp) && (vp == realvp(tvp))) {
399				/*
400				 * we're back at the real vnode
401				 * of the rootvp
402				 *
403				 * return the rootvp
404				 * Ex: /mnt/mnt/..
405				 * where / has been lofs-mounted
406				 * onto /mnt.  Return the lofs
407				 * node mounted at /mnt.
408				 */
409				*vpp = tvp;
410				VN_RELE(vp);
411				return (0);
412			} else {
413				/*
414				 * We are returning from a covered
415				 * node whose vfs_mountedhere is
416				 * not pointing to vfs of the current
417				 * root vnode.
418				 * This is a condn where in we
419				 * returned a covered node say Zc
420				 * but Zc is not the cover of current
421				 * root.
422				 * i.e.., if X is the root vnode
423				 * lookup(Zc,"..") is taking us to
424				 * X.
425				 * Ex: /net/X/net/X/Y
426				 *
427				 * If LO_AUTOLOOP (autofs/lofs looping detected)
428				 * has been set then we are encountering the
429				 * cover of Y (Y being any directory vnode
430				 * under /net/X/net/X/).
431				 * When performing a dotdot set the
432				 * returned vp to the vnode covered
433				 * by the mounted lofs, ie /net/X/net/X
434				 */
435				VN_RELE(tvp);
436				if ((vtol(dvp))->lo_looping & LO_AUTOLOOP) {
437					VN_RELE(vp);
438					vp = li->li_rootvp;
439					vp = vp->v_vfsp->vfs_vnodecovered;
440					VN_HOLD(vp);
441					*vpp = makelonode(vp, li, 0);
442					(vtol(*vpp))->lo_looping |= LO_LOOPING;
443					return (0);
444				}
445			}
446		} else {
447			/*
448			 * No frills just make the shadow node.
449			 */
450			*vpp = makelonode(vp, li, 0);
451			return (0);
452		}
453	}
454
455	nosub = (vtoli(dvp->v_vfsp)->li_flag & LO_NOSUB);
456
457	/*
458	 * If this vnode is mounted on, then we
459	 * traverse to the vnode which is the root of
460	 * the mounted file system.
461	 */
462	if (!nosub && (error = traverse(&vp)))
463		goto out;
464
465	/*
466	 * Make a lnode for the real vnode.
467	 */
468	if (vp->v_type != VDIR || nosub) {
469		*vpp = makelonode(vp, li, 0);
470		if (IS_DEVVP(*vpp)) {
471			vnode_t *svp;
472
473			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
474			VN_RELE(*vpp);
475			if (svp == NULL)
476				error = ENOSYS;
477			else
478				*vpp = svp;
479		}
480		return (error);
481	}
482
483	/*
484	 * if the found vnode (vp) is not of type lofs
485	 * then we're just going to make a shadow of that
486	 * vp and get out.
487	 *
488	 * If the found vnode (vp) is of lofs type, and
489	 * we're not doing dotdot, check if we are
490	 * looping.
491	 */
492	if (!doingdotdot && vfs_matchops(vp->v_vfsp, lo_vfsops)) {
493		/*
494		 * Check if we're looping, i.e.
495		 * vp equals the root vp of the lofs, directly
496		 * or indirectly, return the covered node.
497		 */
498
499		if (!((vtol(dvp))->lo_looping & LO_LOOPING)) {
500			if (vp == li->li_rootvp) {
501				/*
502				 * Direct looping condn.
503				 * Ex:- X is / mounted directory so lookup of
504				 * /X/X is a direct looping condn.
505				 */
506				tvp = vp;
507				vp = vp->v_vfsp->vfs_vnodecovered;
508				VN_HOLD(vp);
509				VN_RELE(tvp);
510				looping++;
511			} else {
512				/*
513				 * Indirect looping can be defined as
514				 * real lookup returning rootvp of the current
515				 * tree in any level of recursion.
516				 *
517				 * This check is useful if there are multiple
518				 * levels of lofs indirections. Suppose vnode X
519				 * in the current lookup has as its real vnode
520				 * another lofs node. Y = realvp(X) Y should be
521				 * a lofs node for the check to continue or Y
522				 * is not the rootvp of X.
523				 * Ex:- say X and Y are two vnodes
524				 * say real(Y) is X and real(X) is Z
525				 * parent vnode for X and Y is Z
526				 * lookup(Y,"path") say we are looking for Y
527				 * again under Y and we have to return Yc.
528				 * but the lookup of Y under Y doesnot return
529				 * Y the root vnode again here is why.
530				 * 1. lookup(Y,"path of Y") will go to
531				 * 2. lookup(real(Y),"path of Y") and then to
532				 * 3. lookup(real(X),"path of Y").
533				 * and now what lookup level 1 sees is the
534				 * outcome of 2 but the vnode Y is due to
535				 * lookup(Z,"path of Y") so we have to skip
536				 * intermediate levels to find if in any level
537				 * there is a looping.
538				 */
539				is_indirectloop = 0;
540				nonlovp = vp;
541				while (
542				    vfs_matchops(nonlovp->v_vfsp, lo_vfsops) &&
543				    !(is_indirectloop)) {
544					if (li->li_rootvp  == nonlovp) {
545						is_indirectloop++;
546						break;
547					}
548					nonlovp = realvp(nonlovp);
549				}
550
551				if (is_indirectloop) {
552					VN_RELE(vp);
553					vp = nonlovp;
554					vp = vp->v_vfsp->vfs_vnodecovered;
555					VN_HOLD(vp);
556					looping++;
557				}
558			}
559		} else {
560			/*
561			 * come here only because of the interaction between
562			 * the autofs and lofs.
563			 *
564			 * Lookup of "/net/X/net/X" will return a shadow of
565			 * an autonode X_a which we call X_l.
566			 *
567			 * Lookup of anything under X_l, will trigger a call to
568			 * auto_lookup(X_a,nm) which will eventually call
569			 * lo_lookup(X_lr,nm) where X_lr is the root vnode of
570			 * the current lofs.
571			 *
572			 * We come here only when we are called with X_l as dvp
573			 * and look for something underneath.
574			 *
575			 * Now that an autofs/lofs looping condition has been
576			 * identified any directory vnode contained within
577			 * dvp will be set to the vnode covered by the
578			 * mounted autofs. Thus all directories within dvp
579			 * will appear empty hence teminating the looping.
580			 * The LO_AUTOLOOP flag is set on the returned lonode
581			 * to indicate the termination of the autofs/lofs
582			 * looping. This is required for the correct behaviour
583			 * when performing a dotdot.
584			 */
585			realdvp = realvp(dvp);
586			while (vfs_matchops(realdvp->v_vfsp, lo_vfsops)) {
587				realdvp = realvp(realdvp);
588			}
589
590			error = VFS_ROOT(realdvp->v_vfsp, &tvp);
591			if (error)
592				goto out;
593			/*
594			 * tvp now contains the rootvp of the vfs of the
595			 * real vnode of dvp. The directory vnode vp is set
596			 * to the covered vnode to terminate looping. No
597			 * distinction is made between any vp as all directory
598			 * vnodes contained in dvp are returned as the covered
599			 * vnode.
600			 */
601			VN_RELE(vp);
602			vp = tvp;	/* possibly is an autonode */
603
604			/*
605			 * Need to find the covered vnode
606			 */
607			if (vp->v_vfsp->vfs_vnodecovered == NULL) {
608				/*
609				 * We don't have a covered vnode so this isn't
610				 * an autonode. To find the autonode simply
611				 * find the vnode covered by the lofs rootvp.
612				 */
613				vp = li->li_rootvp;
614				vp = vp->v_vfsp->vfs_vnodecovered;
615				VN_RELE(tvp);
616				error = VFS_ROOT(vp->v_vfsp, &tvp);
617				if (error)
618					goto out;
619				vp = tvp;	/* now this is an autonode */
620				if (vp->v_vfsp->vfs_vnodecovered == NULL) {
621					/*
622					 * Still can't find a covered vnode.
623					 * Fail the lookup, or we'd loop.
624					 */
625					error = ENOENT;
626					goto out;
627				}
628			}
629			vp = vp->v_vfsp->vfs_vnodecovered;
630			VN_HOLD(vp);
631			VN_RELE(tvp);
632			/*
633			 * Force the creation of a new lnode even if the hash
634			 * table contains a lnode that references this vnode.
635			 */
636			mkflag = LOF_FORCE;
637			autoloop++;
638		}
639	}
640	*vpp = makelonode(vp, li, mkflag);
641
642	if ((looping) ||
643	    (((vtol(dvp))->lo_looping & LO_LOOPING) && !doingdotdot)) {
644		(vtol(*vpp))->lo_looping |= LO_LOOPING;
645	}
646
647	if (autoloop) {
648		(vtol(*vpp))->lo_looping |= LO_AUTOLOOP;
649	}
650
651out:
652	if (error != 0 && vp != NULL)
653		VN_RELE(vp);
654#ifdef LODEBUG
655	lo_dprint(4,
656	"lo_lookup dvp %x realdvp %x nm '%s' newvp %x real vp %x error %d\n",
657	    dvp, realvp(dvp), nm, *vpp, vp, error);
658#endif
659	return (error);
660}
661
662/*ARGSUSED*/
663static int
664lo_create(
665	vnode_t *dvp,
666	char *nm,
667	struct vattr *va,
668	enum vcexcl exclusive,
669	int mode,
670	vnode_t **vpp,
671	struct cred *cr,
672	int flag,
673	caller_context_t *ct,
674	vsecattr_t *vsecp)
675{
676	int error;
677	vnode_t *vp = NULL;
678
679#ifdef LODEBUG
680	lo_dprint(4, "lo_create vp %p realvp %p\n", dvp, realvp(dvp));
681#endif
682	if (*nm == '\0') {
683		ASSERT(vpp && dvp == *vpp);
684		vp = realvp(*vpp);
685	}
686
687	error = VOP_CREATE(realvp(dvp), nm, va, exclusive, mode, &vp, cr, flag,
688	    ct, vsecp);
689	if (!error) {
690		*vpp = makelonode(vp, vtoli(dvp->v_vfsp), 0);
691		if (IS_DEVVP(*vpp)) {
692			vnode_t *svp;
693
694			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
695			VN_RELE(*vpp);
696			if (svp == NULL)
697				error = ENOSYS;
698			else
699				*vpp = svp;
700		}
701	}
702	return (error);
703}
704
705static int
706lo_remove(
707	vnode_t *dvp,
708	char *nm,
709	struct cred *cr,
710	caller_context_t *ct,
711	int flags)
712{
713#ifdef LODEBUG
714	lo_dprint(4, "lo_remove vp %p realvp %p\n", dvp, realvp(dvp));
715#endif
716	dvp = realvp(dvp);
717	return (VOP_REMOVE(dvp, nm, cr, ct, flags));
718}
719
720static int
721lo_link(
722	vnode_t *tdvp,
723	vnode_t *vp,
724	char *tnm,
725	struct cred *cr,
726	caller_context_t *ct,
727	int flags)
728{
729	vnode_t *realvp;
730
731#ifdef LODEBUG
732	lo_dprint(4, "lo_link vp %p realvp %p\n", vp, realvp(vp));
733#endif
734
735	/*
736	 * The source and destination vnodes may be in different lofs
737	 * filesystems sharing the same underlying filesystem, so we need to
738	 * make sure that the filesystem containing the source vnode is not
739	 * mounted read-only (vn_link() has already checked the target vnode).
740	 *
741	 * In a situation such as:
742	 *
743	 * /data	- regular filesystem
744	 * /foo		- lofs mount of /data/foo
745	 * /bar		- read-only lofs mount of /data/bar
746	 *
747	 * This disallows a link from /bar/somefile to /foo/somefile,
748	 * which would otherwise allow changes to somefile on the read-only
749	 * mounted /bar.
750	 */
751
752	if (vn_is_readonly(vp)) {
753		return (EROFS);
754	}
755	while (vn_matchops(vp, lo_vnodeops)) {
756		vp = realvp(vp);
757	}
758
759	/*
760	 * In the case where the source vnode is on another stacking
761	 * filesystem (such as specfs), the loop above will
762	 * terminate before finding the true underlying vnode.
763	 *
764	 * We use VOP_REALVP here to continue the search.
765	 */
766	if (VOP_REALVP(vp, &realvp, ct) == 0)
767		vp = realvp;
768
769	while (vn_matchops(tdvp, lo_vnodeops)) {
770		tdvp = realvp(tdvp);
771	}
772	if (vp->v_vfsp != tdvp->v_vfsp)
773		return (EXDEV);
774	return (VOP_LINK(tdvp, vp, tnm, cr, ct, flags));
775}
776
777static int
778lo_rename(
779	vnode_t *odvp,
780	char *onm,
781	vnode_t *ndvp,
782	char *nnm,
783	struct cred *cr,
784	caller_context_t *ct,
785	int flags)
786{
787	vnode_t *tnvp;
788
789#ifdef LODEBUG
790	lo_dprint(4, "lo_rename vp %p realvp %p\n", odvp, realvp(odvp));
791#endif
792	/*
793	 * If we are coming from a loop back mounted fs, that has been
794	 * mounted in the same filesystem as where we want to move to,
795	 * and that filesystem is read/write, but the lofs filesystem is
796	 * read only, we don't want to allow a rename of the file. The
797	 * vn_rename code checks to be sure the target is read/write already
798	 * so that is not necessary here. However, consider the following
799	 * example:
800	 *		/ - regular root fs
801	 *		/foo - directory in root
802	 *		/foo/bar - file in foo directory(in root fs)
803	 *		/baz - directory in root
804	 *		mount -F lofs -o ro /foo /baz - all still in root
805	 *			directory
806	 * The fact that we mounted /foo on /baz read only should stop us
807	 * from renaming the file /foo/bar /bar, but it doesn't since
808	 * / is read/write. We are still renaming here since we are still
809	 * in the same filesystem, it is just that we do not check to see
810	 * if the filesystem we are coming from in this case is read only.
811	 */
812	if (odvp->v_vfsp->vfs_flag & VFS_RDONLY)
813		return (EROFS);
814	/*
815	 * We need to make sure we're not trying to remove a mount point for a
816	 * filesystem mounted on top of lofs, which only we know about.
817	 */
818	if (vn_matchops(ndvp, lo_vnodeops))	/* Not our problem. */
819		goto rename;
820
821	/*
822	 * XXXci - Once case-insensitive behavior is implemented, it should
823	 * be added here.
824	 */
825	if (VOP_LOOKUP(ndvp, nnm, &tnvp, NULL, 0, NULL, cr,
826	    ct, NULL, NULL) != 0)
827		goto rename;
828	if (tnvp->v_type != VDIR) {
829		VN_RELE(tnvp);
830		goto rename;
831	}
832	if (vn_mountedvfs(tnvp)) {
833		VN_RELE(tnvp);
834		return (EBUSY);
835	}
836	VN_RELE(tnvp);
837rename:
838	/*
839	 * Since the case we're dealing with above can happen at any layer in
840	 * the stack of lofs filesystems, we need to recurse down the stack,
841	 * checking to see if there are any instances of a filesystem mounted on
842	 * top of lofs. In order to keep on using the lofs version of
843	 * VOP_RENAME(), we make sure that while the target directory is of type
844	 * lofs, the source directory (the one used for getting the fs-specific
845	 * version of VOP_RENAME()) is also of type lofs.
846	 */
847	if (vn_matchops(ndvp, lo_vnodeops)) {
848		ndvp = realvp(ndvp);	/* Check the next layer */
849	} else {
850		/*
851		 * We can go fast here
852		 */
853		while (vn_matchops(odvp, lo_vnodeops)) {
854			odvp = realvp(odvp);
855		}
856		if (odvp->v_vfsp != ndvp->v_vfsp)
857			return (EXDEV);
858	}
859	return (VOP_RENAME(odvp, onm, ndvp, nnm, cr, ct, flags));
860}
861
862static int
863lo_mkdir(
864	vnode_t *dvp,
865	char *nm,
866	struct vattr *va,
867	vnode_t **vpp,
868	struct cred *cr,
869	caller_context_t *ct,
870	int flags,
871	vsecattr_t *vsecp)
872{
873	int error;
874
875#ifdef LODEBUG
876	lo_dprint(4, "lo_mkdir vp %p realvp %p\n", dvp, realvp(dvp));
877#endif
878	error = VOP_MKDIR(realvp(dvp), nm, va, vpp, cr, ct, flags, vsecp);
879	if (!error)
880		*vpp = makelonode(*vpp, vtoli(dvp->v_vfsp), 0);
881	return (error);
882}
883
884static int
885lo_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
886{
887#ifdef LODEBUG
888	lo_dprint(4, "lo_realvp %p\n", vp);
889#endif
890	while (vn_matchops(vp, lo_vnodeops))
891		vp = realvp(vp);
892
893	if (VOP_REALVP(vp, vpp, ct) != 0)
894		*vpp = vp;
895	return (0);
896}
897
898static int
899lo_rmdir(
900	vnode_t *dvp,
901	char *nm,
902	vnode_t *cdir,
903	struct cred *cr,
904	caller_context_t *ct,
905	int flags)
906{
907	vnode_t *rvp = cdir;
908
909#ifdef LODEBUG
910	lo_dprint(4, "lo_rmdir vp %p realvp %p\n", dvp, realvp(dvp));
911#endif
912	/* if cdir is lofs vnode ptr get its real vnode ptr */
913	if (vn_matchops(dvp, vn_getops(rvp)))
914		(void) lo_realvp(cdir, &rvp, ct);
915	dvp = realvp(dvp);
916	return (VOP_RMDIR(dvp, nm, rvp, cr, ct, flags));
917}
918
919static int
920lo_symlink(
921	vnode_t *dvp,
922	char *lnm,
923	struct vattr *tva,
924	char *tnm,
925	struct cred *cr,
926	caller_context_t *ct,
927	int flags)
928{
929#ifdef LODEBUG
930	lo_dprint(4, "lo_symlink vp %p realvp %p\n", dvp, realvp(dvp));
931#endif
932	dvp = realvp(dvp);
933	return (VOP_SYMLINK(dvp, lnm, tva, tnm, cr, ct, flags));
934}
935
936static int
937lo_readlink(
938	vnode_t *vp,
939	struct uio *uiop,
940	struct cred *cr,
941	caller_context_t *ct)
942{
943	vp = realvp(vp);
944	return (VOP_READLINK(vp, uiop, cr, ct));
945}
946
947static int
948lo_readdir(
949	vnode_t *vp,
950	struct uio *uiop,
951	struct cred *cr,
952	int *eofp,
953	caller_context_t *ct,
954	int flags)
955{
956#ifdef LODEBUG
957	lo_dprint(4, "lo_readdir vp %p realvp %p\n", vp, realvp(vp));
958#endif
959	vp = realvp(vp);
960	return (VOP_READDIR(vp, uiop, cr, eofp, ct, flags));
961}
962
963static int
964lo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
965{
966	vp = realvp(vp);
967	return (VOP_RWLOCK(vp, write_lock, ct));
968}
969
970static void
971lo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
972{
973	vp = realvp(vp);
974	VOP_RWUNLOCK(vp, write_lock, ct);
975}
976
977static int
978lo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
979{
980	vp = realvp(vp);
981	return (VOP_SEEK(vp, ooff, noffp, ct));
982}
983
984static int
985lo_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
986{
987	while (vn_matchops(vp1, lo_vnodeops))
988		vp1 = realvp(vp1);
989	while (vn_matchops(vp2, lo_vnodeops))
990		vp2 = realvp(vp2);
991	return (VOP_CMP(vp1, vp2, ct));
992}
993
994static int
995lo_frlock(
996	vnode_t *vp,
997	int cmd,
998	struct flock64 *bfp,
999	int flag,
1000	offset_t offset,
1001	struct flk_callback *flk_cbp,
1002	cred_t *cr,
1003	caller_context_t *ct)
1004{
1005	vp = realvp(vp);
1006	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1007}
1008
1009static int
1010lo_space(
1011	vnode_t *vp,
1012	int cmd,
1013	struct flock64 *bfp,
1014	int flag,
1015	offset_t offset,
1016	struct cred *cr,
1017	caller_context_t *ct)
1018{
1019	vp = realvp(vp);
1020	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
1021}
1022
1023static int
1024lo_getpage(
1025	vnode_t *vp,
1026	offset_t off,
1027	size_t len,
1028	uint_t *prot,
1029	struct page *parr[],
1030	size_t psz,
1031	struct seg *seg,
1032	caddr_t addr,
1033	enum seg_rw rw,
1034	struct cred *cr,
1035	caller_context_t *ct)
1036{
1037	vp = realvp(vp);
1038	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
1039	    ct));
1040}
1041
1042static int
1043lo_putpage(
1044	vnode_t *vp,
1045	offset_t off,
1046	size_t len,
1047	int flags,
1048	struct cred *cr,
1049	caller_context_t *ct)
1050{
1051	vp = realvp(vp);
1052	return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
1053}
1054
1055static int
1056lo_map(
1057	vnode_t *vp,
1058	offset_t off,
1059	struct as *as,
1060	caddr_t *addrp,
1061	size_t len,
1062	uchar_t prot,
1063	uchar_t maxprot,
1064	uint_t flags,
1065	struct cred *cr,
1066	caller_context_t *ct)
1067{
1068	vp = realvp(vp);
1069	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
1070}
1071
1072static int
1073lo_addmap(
1074	vnode_t *vp,
1075	offset_t off,
1076	struct as *as,
1077	caddr_t addr,
1078	size_t len,
1079	uchar_t prot,
1080	uchar_t maxprot,
1081	uint_t flags,
1082	struct cred *cr,
1083	caller_context_t *ct)
1084{
1085	vp = realvp(vp);
1086	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1087	    ct));
1088}
1089
1090static int
1091lo_delmap(
1092	vnode_t *vp,
1093	offset_t off,
1094	struct as *as,
1095	caddr_t addr,
1096	size_t len,
1097	uint_t prot,
1098	uint_t maxprot,
1099	uint_t flags,
1100	struct cred *cr,
1101	caller_context_t *ct)
1102{
1103	vp = realvp(vp);
1104	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
1105	    ct));
1106}
1107
1108static int
1109lo_poll(
1110	vnode_t *vp,
1111	short events,
1112	int anyyet,
1113	short *reventsp,
1114	struct pollhead **phpp,
1115	caller_context_t *ct)
1116{
1117	vp = realvp(vp);
1118	return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
1119}
1120
1121static int
1122lo_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
1123    caller_context_t *ct)
1124{
1125	vp = realvp(vp);
1126	return (VOP_DUMP(vp, addr, bn, count, ct));
1127}
1128
1129static int
1130lo_pathconf(
1131	vnode_t *vp,
1132	int cmd,
1133	ulong_t *valp,
1134	struct cred *cr,
1135	caller_context_t *ct)
1136{
1137	vp = realvp(vp);
1138	return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
1139}
1140
1141static int
1142lo_pageio(
1143	vnode_t *vp,
1144	struct page *pp,
1145	u_offset_t io_off,
1146	size_t io_len,
1147	int flags,
1148	cred_t *cr,
1149	caller_context_t *ct)
1150{
1151	vp = realvp(vp);
1152	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
1153}
1154
1155static void
1156lo_dispose(
1157	vnode_t *vp,
1158	page_t *pp,
1159	int fl,
1160	int dn,
1161	cred_t *cr,
1162	caller_context_t *ct)
1163{
1164	vp = realvp(vp);
1165	if (vp != NULL && !VN_ISKAS(vp))
1166		VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
1167}
1168
1169static int
1170lo_setsecattr(
1171	vnode_t *vp,
1172	vsecattr_t *secattr,
1173	int flags,
1174	struct cred *cr,
1175	caller_context_t *ct)
1176{
1177	if (vn_is_readonly(vp))
1178		return (EROFS);
1179	vp = realvp(vp);
1180	return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
1181}
1182
1183static int
1184lo_getsecattr(
1185	vnode_t *vp,
1186	vsecattr_t *secattr,
1187	int flags,
1188	struct cred *cr,
1189	caller_context_t *ct)
1190{
1191	vp = realvp(vp);
1192	return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
1193}
1194
1195static int
1196lo_shrlock(
1197	vnode_t *vp,
1198	int cmd,
1199	struct shrlock *shr,
1200	int flag,
1201	cred_t *cr,
1202	caller_context_t *ct)
1203{
1204	vp = realvp(vp);
1205	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
1206}
1207
1208/*
1209 * Loopback vnode operations vector.
1210 */
1211
1212struct vnodeops *lo_vnodeops;
1213
1214const fs_operation_def_t lo_vnodeops_template[] = {
1215	VOPNAME_OPEN,		{ .vop_open = lo_open },
1216	VOPNAME_CLOSE,		{ .vop_close = lo_close },
1217	VOPNAME_READ,		{ .vop_read = lo_read },
1218	VOPNAME_WRITE,		{ .vop_write = lo_write },
1219	VOPNAME_IOCTL,		{ .vop_ioctl = lo_ioctl },
1220	VOPNAME_SETFL,		{ .vop_setfl = lo_setfl },
1221	VOPNAME_GETATTR,	{ .vop_getattr = lo_getattr },
1222	VOPNAME_SETATTR,	{ .vop_setattr = lo_setattr },
1223	VOPNAME_ACCESS,		{ .vop_access = lo_access },
1224	VOPNAME_LOOKUP,		{ .vop_lookup = lo_lookup },
1225	VOPNAME_CREATE,		{ .vop_create = lo_create },
1226	VOPNAME_REMOVE,		{ .vop_remove = lo_remove },
1227	VOPNAME_LINK,		{ .vop_link = lo_link },
1228	VOPNAME_RENAME,		{ .vop_rename = lo_rename },
1229	VOPNAME_MKDIR,		{ .vop_mkdir = lo_mkdir },
1230	VOPNAME_RMDIR,		{ .vop_rmdir = lo_rmdir },
1231	VOPNAME_READDIR,	{ .vop_readdir = lo_readdir },
1232	VOPNAME_SYMLINK,	{ .vop_symlink = lo_symlink },
1233	VOPNAME_READLINK,	{ .vop_readlink = lo_readlink },
1234	VOPNAME_FSYNC,		{ .vop_fsync = lo_fsync },
1235	VOPNAME_INACTIVE,	{ .vop_inactive = lo_inactive },
1236	VOPNAME_FID,		{ .vop_fid = lo_fid },
1237	VOPNAME_RWLOCK,		{ .vop_rwlock = lo_rwlock },
1238	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lo_rwunlock },
1239	VOPNAME_SEEK,		{ .vop_seek = lo_seek },
1240	VOPNAME_CMP,		{ .vop_cmp = lo_cmp },
1241	VOPNAME_FRLOCK,		{ .vop_frlock = lo_frlock },
1242	VOPNAME_SPACE,		{ .vop_space = lo_space },
1243	VOPNAME_REALVP,		{ .vop_realvp = lo_realvp },
1244	VOPNAME_GETPAGE,	{ .vop_getpage = lo_getpage },
1245	VOPNAME_PUTPAGE,	{ .vop_putpage = lo_putpage },
1246	VOPNAME_MAP,		{ .vop_map = lo_map },
1247	VOPNAME_ADDMAP,		{ .vop_addmap = lo_addmap },
1248	VOPNAME_DELMAP,		{ .vop_delmap = lo_delmap },
1249	VOPNAME_POLL,		{ .vop_poll = lo_poll },
1250	VOPNAME_DUMP,		{ .vop_dump = lo_dump },
1251	VOPNAME_DUMPCTL,	{ .error = fs_error },	/* XXX - why? */
1252	VOPNAME_PATHCONF,	{ .vop_pathconf = lo_pathconf },
1253	VOPNAME_PAGEIO,		{ .vop_pageio = lo_pageio },
1254	VOPNAME_DISPOSE,	{ .vop_dispose = lo_dispose },
1255	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lo_setsecattr },
1256	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lo_getsecattr },
1257	VOPNAME_SHRLOCK,	{ .vop_shrlock = lo_shrlock },
1258	NULL,			NULL
1259};
1260