union_vnops.c revision 1.4
1/*	$NetBSD: union_vnops.c,v 1.4 2003/06/29 15:11:48 thorpej Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
5 * Copyright (c) 1992, 1993, 1994, 1995
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
40 */
41
42#include <sys/cdefs.h>
43__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.4 2003/06/29 15:11:48 thorpej Exp $");
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/proc.h>
48#include <sys/file.h>
49#include <sys/time.h>
50#include <sys/stat.h>
51#include <sys/vnode.h>
52#include <sys/mount.h>
53#include <sys/namei.h>
54#include <sys/malloc.h>
55#include <sys/buf.h>
56#include <sys/queue.h>
57#include <sys/lock.h>
58#include <fs/union/union.h>
59#include <miscfs/genfs/genfs.h>
60
61int union_lookup	__P((void *));
62int union_create	__P((void *));
63int union_whiteout	__P((void *));
64int union_mknod		__P((void *));
65int union_open		__P((void *));
66int union_close		__P((void *));
67int union_access	__P((void *));
68int union_getattr	__P((void *));
69int union_setattr	__P((void *));
70int union_read		__P((void *));
71int union_write		__P((void *));
72int union_lease		__P((void *));
73int union_ioctl		__P((void *));
74int union_poll		__P((void *));
75int union_revoke	__P((void *));
76int union_mmap		__P((void *));
77int union_fsync		__P((void *));
78int union_seek		__P((void *));
79int union_remove	__P((void *));
80int union_link		__P((void *));
81int union_rename	__P((void *));
82int union_mkdir		__P((void *));
83int union_rmdir		__P((void *));
84int union_symlink	__P((void *));
85int union_readdir	__P((void *));
86int union_readlink	__P((void *));
87int union_abortop	__P((void *));
88int union_inactive	__P((void *));
89int union_reclaim	__P((void *));
90int union_lock		__P((void *));
91int union_unlock	__P((void *));
92int union_bmap		__P((void *));
93int union_print		__P((void *));
94int union_islocked	__P((void *));
95int union_pathconf	__P((void *));
96int union_advlock	__P((void *));
97int union_strategy	__P((void *));
98int union_getpages	__P((void *));
99int union_putpages	__P((void *));
100int union_kqfilter	__P((void *));
101
102static void union_fixup __P((struct union_node *));
103static int union_lookup1 __P((struct vnode *, struct vnode **,
104			      struct vnode **, struct componentname *));
105
106
107/*
108 * Global vfs data structures
109 */
110int (**union_vnodeop_p) __P((void *));
111const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
112	{ &vop_default_desc, vn_default_error },
113	{ &vop_lookup_desc, union_lookup },		/* lookup */
114	{ &vop_create_desc, union_create },		/* create */
115	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
116	{ &vop_mknod_desc, union_mknod },		/* mknod */
117	{ &vop_open_desc, union_open },			/* open */
118	{ &vop_close_desc, union_close },		/* close */
119	{ &vop_access_desc, union_access },		/* access */
120	{ &vop_getattr_desc, union_getattr },		/* getattr */
121	{ &vop_setattr_desc, union_setattr },		/* setattr */
122	{ &vop_read_desc, union_read },			/* read */
123	{ &vop_write_desc, union_write },		/* write */
124	{ &vop_lease_desc, union_lease },		/* lease */
125	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
126	{ &vop_poll_desc, union_poll },			/* select */
127	{ &vop_revoke_desc, union_revoke },		/* revoke */
128	{ &vop_mmap_desc, union_mmap },			/* mmap */
129	{ &vop_fsync_desc, union_fsync },		/* fsync */
130	{ &vop_seek_desc, union_seek },			/* seek */
131	{ &vop_remove_desc, union_remove },		/* remove */
132	{ &vop_link_desc, union_link },			/* link */
133	{ &vop_rename_desc, union_rename },		/* rename */
134	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
135	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
136	{ &vop_symlink_desc, union_symlink },		/* symlink */
137	{ &vop_readdir_desc, union_readdir },		/* readdir */
138	{ &vop_readlink_desc, union_readlink },		/* readlink */
139	{ &vop_abortop_desc, union_abortop },		/* abortop */
140	{ &vop_inactive_desc, union_inactive },		/* inactive */
141	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
142	{ &vop_lock_desc, union_lock },			/* lock */
143	{ &vop_unlock_desc, union_unlock },		/* unlock */
144	{ &vop_bmap_desc, union_bmap },			/* bmap */
145	{ &vop_strategy_desc, union_strategy },		/* strategy */
146	{ &vop_print_desc, union_print },		/* print */
147	{ &vop_islocked_desc, union_islocked },		/* islocked */
148	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
149	{ &vop_advlock_desc, union_advlock },		/* advlock */
150	{ &vop_getpages_desc, union_getpages },		/* getpages */
151	{ &vop_putpages_desc, union_putpages },		/* putpages */
152	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
153#ifdef notdef
154	{ &vop_blkatoff_desc, union_blkatoff },		/* blkatoff */
155	{ &vop_valloc_desc, union_valloc },		/* valloc */
156	{ &vop_vfree_desc, union_vfree },		/* vfree */
157	{ &vop_truncate_desc, union_truncate },		/* truncate */
158	{ &vop_update_desc, union_update },		/* update */
159	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
160#endif
161	{ NULL, NULL }
162};
163const struct vnodeopv_desc union_vnodeop_opv_desc =
164	{ &union_vnodeop_p, union_vnodeop_entries };
165
166#define FIXUP(un) { \
167	if (((un)->un_flags & UN_ULOCK) == 0) { \
168		union_fixup(un); \
169	} \
170}
171
172static void
173union_fixup(un)
174	struct union_node *un;
175{
176
177	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
178	un->un_flags |= UN_ULOCK;
179}
180
181static int
182union_lookup1(udvp, dvpp, vpp, cnp)
183	struct vnode *udvp;
184	struct vnode **dvpp;
185	struct vnode **vpp;
186	struct componentname *cnp;
187{
188	int error;
189	struct vnode *tdvp;
190	struct vnode *dvp;
191	struct mount *mp;
192
193	dvp = *dvpp;
194
195	/*
196	 * If stepping up the directory tree, check for going
197	 * back across the mount point, in which case do what
198	 * lookup would do by stepping back down the mount
199	 * hierarchy.
200	 */
201	if (cnp->cn_flags & ISDOTDOT) {
202		while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
203			/*
204			 * Don't do the NOCROSSMOUNT check
205			 * at this level.  By definition,
206			 * union fs deals with namespaces, not
207			 * filesystems.
208			 */
209			tdvp = dvp;
210			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
211			vput(tdvp);
212			VREF(dvp);
213			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
214		}
215	}
216
217        error = VOP_LOOKUP(dvp, &tdvp, cnp);
218	if (error)
219		return (error);
220
221	/*
222	 * The parent directory will have been unlocked, unless lookup
223	 * found the last component.  In which case, re-lock the node
224	 * here to allow it to be unlocked again (phew) in union_lookup.
225	 */
226	if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
227		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
228
229	dvp = tdvp;
230
231	/*
232	 * Lastly check if the current node is a mount point in
233	 * which case walk up the mount hierarchy making sure not to
234	 * bump into the root of the mount tree (ie. dvp != udvp).
235	 */
236	while (dvp != udvp && (dvp->v_type == VDIR) &&
237	       (mp = dvp->v_mountedhere)) {
238
239		if (vfs_busy(mp, 0, 0))
240			continue;
241
242		error = VFS_ROOT(mp, &tdvp, cnp->cn_lwp);
243		vfs_unbusy(mp);
244		if (error) {
245			vput(dvp);
246			return (error);
247		}
248
249		vput(dvp);
250		dvp = tdvp;
251	}
252
253	*vpp = dvp;
254	return (0);
255}
256
257int
258union_lookup(v)
259	void *v;
260{
261	struct vop_lookup_args /* {
262		struct vnodeop_desc *a_desc;
263		struct vnode *a_dvp;
264		struct vnode **a_vpp;
265		struct componentname *a_cnp;
266	} */ *ap = v;
267	int error;
268	int uerror, lerror;
269	struct vnode *uppervp, *lowervp;
270	struct vnode *upperdvp, *lowerdvp;
271	struct vnode *dvp = ap->a_dvp;
272	struct union_node *dun = VTOUNION(dvp);
273	struct componentname *cnp = ap->a_cnp;
274	int lockparent = cnp->cn_flags & LOCKPARENT;
275	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
276	struct ucred *saved_cred = NULL;
277	int iswhiteout;
278	struct vattr va;
279
280#ifdef notyet
281	if (cnp->cn_namelen == 3 &&
282			cnp->cn_nameptr[2] == '.' &&
283			cnp->cn_nameptr[1] == '.' &&
284			cnp->cn_nameptr[0] == '.') {
285		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
286		if (dvp == NULLVP)
287			return (ENOENT);
288		VREF(dvp);
289		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
290		if (!lockparent || !(cnp->cn_flags & ISLASTCN))
291			VOP_UNLOCK(ap->a_dvp, 0);
292		return (0);
293	}
294#endif
295
296	if ((cnp->cn_flags & ISLASTCN) &&
297	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
298	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
299		return (EROFS);
300
301	cnp->cn_flags |= LOCKPARENT;
302
303	upperdvp = dun->un_uppervp;
304	lowerdvp = dun->un_lowervp;
305	uppervp = NULLVP;
306	lowervp = NULLVP;
307	iswhiteout = 0;
308
309	/*
310	 * do the lookup in the upper level.
311	 * if that level comsumes additional pathnames,
312	 * then assume that something special is going
313	 * on and just return that vnode.
314	 */
315	if (upperdvp != NULLVP) {
316		FIXUP(dun);
317		/*
318		 * If we're doing `..' in the underlying filesystem,
319		 * we must drop our lock on the union node before
320		 * going up the tree in the lower file system--if we block
321		 * on the lowervp lock, and that's held by someone else
322		 * coming down the tree and who's waiting for our lock,
323		 * we would be hosed.
324		 */
325		if (cnp->cn_flags & ISDOTDOT) {
326			/* retain lock on underlying VP */
327			dun->un_flags |= UN_KLOCK;
328			VOP_UNLOCK(dvp, 0);
329		}
330		uerror = union_lookup1(um->um_uppervp, &upperdvp,
331					&uppervp, cnp);
332
333		if (cnp->cn_flags & ISDOTDOT) {
334			if (dun->un_uppervp == upperdvp) {
335				/*
336				 * we got the underlying bugger back locked...
337				 * now take back the union node lock.  Since we
338				 *  hold the uppervp lock, we can diddle union
339				 * locking flags at will. :)
340				 */
341				dun->un_flags |= UN_ULOCK;
342			}
343			/*
344			 * if upperdvp got swapped out, it means we did
345			 * some mount point magic, and we do not have
346			 * dun->un_uppervp locked currently--so we get it
347			 * locked here (don't set the UN_ULOCK flag).
348			 */
349			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
350		}
351		if (cnp->cn_consume != 0) {
352			*ap->a_vpp = uppervp;
353			if (!lockparent)
354				cnp->cn_flags &= ~LOCKPARENT;
355			return (uerror);
356		}
357		if (uerror == ENOENT || uerror == EJUSTRETURN) {
358			if (cnp->cn_flags & ISWHITEOUT) {
359				iswhiteout = 1;
360			} else if (lowerdvp != NULLVP) {
361				lerror = VOP_GETATTR(upperdvp, &va,
362					cnp->cn_cred, cnp->cn_lwp);
363				if (lerror == 0 && (va.va_flags & OPAQUE))
364					iswhiteout = 1;
365			}
366		}
367	} else {
368		uerror = ENOENT;
369	}
370
371	/*
372	 * in a similar way to the upper layer, do the lookup
373	 * in the lower layer.   this time, if there is some
374	 * component magic going on, then vput whatever we got
375	 * back from the upper layer and return the lower vnode
376	 * instead.
377	 */
378	if (lowerdvp != NULLVP && !iswhiteout) {
379		int nameiop;
380
381		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
382
383		/*
384		 * Only do a LOOKUP on the bottom node, since
385		 * we won't be making changes to it anyway.
386		 */
387		nameiop = cnp->cn_nameiop;
388		cnp->cn_nameiop = LOOKUP;
389		if (um->um_op == UNMNT_BELOW) {
390			saved_cred = cnp->cn_cred;
391			cnp->cn_cred = um->um_cred;
392		}
393		/*
394		 * we shouldn't have to worry about locking interactions
395		 * between the lower layer and our union layer (w.r.t.
396		 * `..' processing) because we don't futz with lowervp
397		 * locks in the union-node instantiation code path.
398		 */
399		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
400				&lowervp, cnp);
401		if (um->um_op == UNMNT_BELOW)
402			cnp->cn_cred = saved_cred;
403		cnp->cn_nameiop = nameiop;
404
405		if (lowervp != lowerdvp)
406			VOP_UNLOCK(lowerdvp, 0);
407
408		if (cnp->cn_consume != 0) {
409			if (uppervp != NULLVP) {
410				if (uppervp == upperdvp)
411					vrele(uppervp);
412				else
413					vput(uppervp);
414				uppervp = NULLVP;
415			}
416			*ap->a_vpp = lowervp;
417			if (!lockparent)
418				cnp->cn_flags &= ~LOCKPARENT;
419			return (lerror);
420		}
421	} else {
422		lerror = ENOENT;
423		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
424			lowervp = LOWERVP(dun->un_pvp);
425			if (lowervp != NULLVP) {
426				VREF(lowervp);
427				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
428				lerror = 0;
429			}
430		}
431	}
432
433	if (!lockparent)
434		cnp->cn_flags &= ~LOCKPARENT;
435
436	/*
437	 * EJUSTRETURN is used by underlying filesystems to indicate that
438	 * a directory modification op was started successfully.
439	 * This will only happen in the upper layer, since
440	 * the lower layer only does LOOKUPs.
441	 * If this union is mounted read-only, bounce it now.
442	 */
443
444	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
445	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
446	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
447		uerror = EROFS;
448
449	/*
450	 * at this point, we have uerror and lerror indicating
451	 * possible errors with the lookups in the upper and lower
452	 * layers.  additionally, uppervp and lowervp are (locked)
453	 * references to existing vnodes in the upper and lower layers.
454	 *
455	 * there are now three cases to consider.
456	 * 1. if both layers returned an error, then return whatever
457	 *    error the upper layer generated.
458	 *
459	 * 2. if the top layer failed and the bottom layer succeeded
460	 *    then two subcases occur.
461	 *    a.  the bottom vnode is not a directory, in which
462	 *	  case just return a new union vnode referencing
463	 *	  an empty top layer and the existing bottom layer.
464	 *    b.  the bottom vnode is a directory, in which case
465	 *	  create a new directory in the top-level and
466	 *	  continue as in case 3.
467	 *
468	 * 3. if the top layer succeeded then return a new union
469	 *    vnode referencing whatever the new top layer and
470	 *    whatever the bottom layer returned.
471	 */
472
473	*ap->a_vpp = NULLVP;
474
475
476	/* case 1. */
477	if ((uerror != 0) && (lerror != 0)) {
478		return (uerror);
479	}
480
481	/* case 2. */
482	if (uerror != 0 /* && (lerror == 0) */ ) {
483		if (lowervp->v_type == VDIR) { /* case 2b. */
484			/*
485			 * We may be racing another process to make the
486			 * upper-level shadow directory.  Be careful with
487			 * locks/etc!
488			 */
489			dun->un_flags &= ~UN_ULOCK;
490			VOP_UNLOCK(upperdvp, 0);
491			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
492			vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
493			dun->un_flags |= UN_ULOCK;
494
495			if (uerror) {
496				if (lowervp != NULLVP) {
497					vput(lowervp);
498					lowervp = NULLVP;
499				}
500				return (uerror);
501			}
502		}
503	}
504
505	if (lowervp != NULLVP)
506		VOP_UNLOCK(lowervp, 0);
507
508	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
509			      uppervp, lowervp, 1, cnp->cn_lwp);
510
511	if (error) {
512		if (uppervp != NULLVP)
513			vput(uppervp);
514		if (lowervp != NULLVP)
515			vrele(lowervp);
516	} else {
517		if (*ap->a_vpp != dvp)
518			if (!lockparent || !(cnp->cn_flags & ISLASTCN))
519				VOP_UNLOCK(dvp, 0);
520		if (cnp->cn_namelen == 1 &&
521		    cnp->cn_nameptr[0] == '.' &&
522		    *ap->a_vpp != dvp) {
523			panic("union_lookup -> . (%p) != startdir (%p)",
524			    ap->a_vpp, dvp);
525		}
526	}
527
528	return (error);
529}
530
531int
532union_create(v)
533	void *v;
534{
535	struct vop_create_args /* {
536		struct vnode *a_dvp;
537		struct vnode **a_vpp;
538		struct componentname *a_cnp;
539		struct vattr *a_vap;
540	} */ *ap = v;
541	struct union_node *un = VTOUNION(ap->a_dvp);
542	struct vnode *dvp = un->un_uppervp;
543	struct componentname *cnp = ap->a_cnp;
544
545	if (dvp != NULLVP) {
546		int error;
547		struct vnode *vp;
548		struct mount *mp;
549
550		FIXUP(un);
551
552		VREF(dvp);
553		un->un_flags |= UN_KLOCK;
554		mp = ap->a_dvp->v_mount;
555		vput(ap->a_dvp);
556		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
557		if (error)
558			return (error);
559
560		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
561				NULLVP, 1, cnp->cn_lwp);
562		if (error)
563			vput(vp);
564		return (error);
565	}
566
567	vput(ap->a_dvp);
568	return (EROFS);
569}
570
571int
572union_whiteout(v)
573	void *v;
574{
575	struct vop_whiteout_args /* {
576		struct vnode *a_dvp;
577		struct componentname *a_cnp;
578		int a_flags;
579	} */ *ap = v;
580	struct union_node *un = VTOUNION(ap->a_dvp);
581	struct componentname *cnp = ap->a_cnp;
582
583	if (un->un_uppervp == NULLVP)
584		return (EOPNOTSUPP);
585
586	FIXUP(un);
587	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
588}
589
590int
591union_mknod(v)
592	void *v;
593{
594	struct vop_mknod_args /* {
595		struct vnode *a_dvp;
596		struct vnode **a_vpp;
597		struct componentname *a_cnp;
598		struct vattr *a_vap;
599	} */ *ap = v;
600	struct union_node *un = VTOUNION(ap->a_dvp);
601	struct vnode *dvp = un->un_uppervp;
602	struct componentname *cnp = ap->a_cnp;
603
604	if (dvp != NULLVP) {
605		int error;
606		struct vnode *vp;
607		struct mount *mp;
608
609		FIXUP(un);
610
611		VREF(dvp);
612		un->un_flags |= UN_KLOCK;
613		mp = ap->a_dvp->v_mount;
614		vput(ap->a_dvp);
615		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
616		if (error)
617			return (error);
618
619		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
620				      cnp, vp, NULLVP, 1, cnp->cn_lwp);
621		if (error)
622		    vput(vp);
623		return (error);
624	}
625
626	vput(ap->a_dvp);
627	return (EROFS);
628}
629
630int
631union_open(v)
632	void *v;
633{
634	struct vop_open_args /* {
635		struct vnodeop_desc *a_desc;
636		struct vnode *a_vp;
637		int a_mode;
638		struct ucred *a_cred;
639		struct lwp *a_l;
640	} */ *ap = v;
641	struct union_node *un = VTOUNION(ap->a_vp);
642	struct vnode *tvp;
643	int mode = ap->a_mode;
644	struct ucred *cred = ap->a_cred;
645	struct lwp *l = ap->a_l;
646	int error;
647
648	/*
649	 * If there is an existing upper vp then simply open that.
650	 */
651	tvp = un->un_uppervp;
652	if (tvp == NULLVP) {
653		/*
654		 * If the lower vnode is being opened for writing, then
655		 * copy the file contents to the upper vnode and open that,
656		 * otherwise can simply open the lower vnode.
657		 */
658		tvp = un->un_lowervp;
659		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
660			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
661			if (error == 0)
662				error = VOP_OPEN(un->un_uppervp, mode, cred, l);
663			return (error);
664		}
665
666		/*
667		 * Just open the lower vnode, but check for nodev mount flag
668		 */
669		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
670		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
671			return ENXIO;
672		un->un_openl++;
673		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
674		error = VOP_OPEN(tvp, mode, cred, l);
675		VOP_UNLOCK(tvp, 0);
676
677		return (error);
678	}
679	/*
680	 * Just open the upper vnode, checking for nodev mount flag first
681	 */
682	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
683	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
684		return ENXIO;
685
686	FIXUP(un);
687
688	error = VOP_OPEN(tvp, mode, cred, l);
689
690	return (error);
691}
692
693int
694union_close(v)
695	void *v;
696{
697	struct vop_close_args /* {
698		struct vnode *a_vp;
699		int  a_fflag;
700		struct ucred *a_cred;
701		struct lwp *a_l;
702	} */ *ap = v;
703	struct union_node *un = VTOUNION(ap->a_vp);
704	struct vnode *vp;
705
706	vp = un->un_uppervp;
707	if (vp == NULLVP) {
708#ifdef UNION_DIAGNOSTIC
709		if (un->un_openl <= 0)
710			panic("union: un_openl cnt");
711#endif
712		--un->un_openl;
713		vp = un->un_lowervp;
714	}
715
716#ifdef DIAGNOSTIC
717	if (vp == NULLVP) {
718		vprint("empty union vnode", vp);
719		panic("union_close empty vnode");
720	}
721#endif
722
723	ap->a_vp = vp;
724	return (VCALL(vp, VOFFSET(vop_close), ap));
725}
726
727/*
728 * Check access permission on the union vnode.
729 * The access check being enforced is to check
730 * against both the underlying vnode, and any
731 * copied vnode.  This ensures that no additional
732 * file permissions are given away simply because
733 * the user caused an implicit file copy.
734 */
735int
736union_access(v)
737	void *v;
738{
739	struct vop_access_args /* {
740		struct vnodeop_desc *a_desc;
741		struct vnode *a_vp;
742		int a_mode;
743		struct ucred *a_cred;
744		struct lwp *a_l;
745	} */ *ap = v;
746	struct vnode *vp = ap->a_vp;
747	struct union_node *un = VTOUNION(vp);
748	int error = EACCES;
749	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
750
751	/*
752	 * Disallow write attempts on read-only file systems;
753	 * unless the file is a socket, fifo, or a block or
754	 * character device resident on the file system.
755	 */
756	if (ap->a_mode & VWRITE) {
757		switch (vp->v_type) {
758		case VDIR:
759		case VLNK:
760		case VREG:
761			if (vp->v_mount->mnt_flag & MNT_RDONLY)
762				return (EROFS);
763			break;
764		case VBAD:
765		case VBLK:
766		case VCHR:
767		case VSOCK:
768		case VFIFO:
769		case VNON:
770		default:
771			break;
772		}
773	}
774
775
776	if ((vp = un->un_uppervp) != NULLVP) {
777		FIXUP(un);
778		ap->a_vp = vp;
779		return (VCALL(vp, VOFFSET(vop_access), ap));
780	}
781
782	if ((vp = un->un_lowervp) != NULLVP) {
783		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
784		ap->a_vp = vp;
785		error = VCALL(vp, VOFFSET(vop_access), ap);
786		if (error == 0) {
787			if (um->um_op == UNMNT_BELOW) {
788				ap->a_cred = um->um_cred;
789				error = VCALL(vp, VOFFSET(vop_access), ap);
790			}
791		}
792		VOP_UNLOCK(vp, 0);
793		if (error)
794			return (error);
795	}
796
797	return (error);
798}
799
800/*
801 * We handle getattr only to change the fsid and
802 * track object sizes
803 */
804int
805union_getattr(v)
806	void *v;
807{
808	struct vop_getattr_args /* {
809		struct vnode *a_vp;
810		struct vattr *a_vap;
811		struct ucred *a_cred;
812		struct lwp *a_l;
813	} */ *ap = v;
814	int error;
815	struct union_node *un = VTOUNION(ap->a_vp);
816	struct vnode *vp = un->un_uppervp;
817	struct vattr *vap;
818	struct vattr va;
819
820
821	/*
822	 * Some programs walk the filesystem hierarchy by counting
823	 * links to directories to avoid stat'ing all the time.
824	 * This means the link count on directories needs to be "correct".
825	 * The only way to do that is to call getattr on both layers
826	 * and fix up the link count.  The link count will not necessarily
827	 * be accurate but will be large enough to defeat the tree walkers.
828	 *
829	 * To make life more interesting, some filesystems don't keep
830	 * track of link counts in the expected way, and return a
831	 * link count of `1' for those directories; if either of the
832	 * component directories returns a link count of `1', we return a 1.
833	 */
834
835	vap = ap->a_vap;
836
837	vp = un->un_uppervp;
838	if (vp != NULLVP) {
839		/*
840		 * It's not clear whether VOP_GETATTR is to be
841		 * called with the vnode locked or not.  stat() calls
842		 * it with (vp) locked, and fstat calls it with
843		 * (vp) unlocked.
844		 * In the mean time, compensate here by checking
845		 * the union_node's lock flag.
846		 */
847		if (un->un_flags & UN_LOCKED)
848			FIXUP(un);
849
850		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_l);
851		if (error)
852			return (error);
853		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
854	}
855
856	if (vp == NULLVP) {
857		vp = un->un_lowervp;
858	} else if (vp->v_type == VDIR) {
859		vp = un->un_lowervp;
860		if (vp != NULLVP)
861			vap = &va;
862	} else {
863		vp = NULLVP;
864	}
865
866	if (vp != NULLVP) {
867		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_l);
868		if (error)
869			return (error);
870		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
871	}
872
873	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
874		/*
875		 * Link count manipulation:
876		 *	- If both return "2", return 2 (no subdirs)
877		 *	- If one or the other return "1", return "1" (ENOCLUE)
878		 */
879		if ((ap->a_vap->va_nlink == 2) &&
880		    (vap->va_nlink == 2))
881			;
882		else if (ap->a_vap->va_nlink != 1) {
883			if (vap->va_nlink == 1)
884				ap->a_vap->va_nlink = 1;
885			else
886				ap->a_vap->va_nlink += vap->va_nlink;
887		}
888	}
889	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
890	return (0);
891}
892
893int
894union_setattr(v)
895	void *v;
896{
897	struct vop_setattr_args /* {
898		struct vnode *a_vp;
899		struct vattr *a_vap;
900		struct ucred *a_cred;
901		struct lwp *a_l;
902	} */ *ap = v;
903	struct vattr *vap = ap->a_vap;
904	struct vnode *vp = ap->a_vp;
905	struct union_node *un = VTOUNION(vp);
906	int error;
907
908  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
909	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
910	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
911	    (vp->v_mount->mnt_flag & MNT_RDONLY))
912		return (EROFS);
913	if (vap->va_size != VNOVAL) {
914 		switch (vp->v_type) {
915 		case VDIR:
916 			return (EISDIR);
917 		case VCHR:
918 		case VBLK:
919 		case VSOCK:
920 		case VFIFO:
921			break;
922		case VREG:
923		case VLNK:
924 		default:
925			/*
926			 * Disallow write attempts if the filesystem is
927			 * mounted read-only.
928			 */
929			if (vp->v_mount->mnt_flag & MNT_RDONLY)
930				return (EROFS);
931		}
932	}
933
934	/*
935	 * Handle case of truncating lower object to zero size,
936	 * by creating a zero length upper object.  This is to
937	 * handle the case of open with O_TRUNC and O_CREAT.
938	 */
939	if ((un->un_uppervp == NULLVP) &&
940	    /* assert(un->un_lowervp != NULLVP) */
941	    (un->un_lowervp->v_type == VREG)) {
942		error = union_copyup(un, (vap->va_size != 0),
943						ap->a_cred, ap->a_l);
944		if (error)
945			return (error);
946	}
947
948	/*
949	 * Try to set attributes in upper layer,
950	 * otherwise return read-only filesystem error.
951	 */
952	if (un->un_uppervp != NULLVP) {
953		FIXUP(un);
954		error = VOP_SETATTR(un->un_uppervp, vap,
955					ap->a_cred, ap->a_l);
956		if ((error == 0) && (vap->va_size != VNOVAL))
957			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
958	} else {
959		error = EROFS;
960	}
961
962	return (error);
963}
964
965int
966union_read(v)
967	void *v;
968{
969	struct vop_read_args /* {
970		struct vnode *a_vp;
971		struct uio *a_uio;
972		int  a_ioflag;
973		struct ucred *a_cred;
974	} */ *ap = v;
975	int error;
976	struct vnode *vp = OTHERVP(ap->a_vp);
977	int dolock = (vp == LOWERVP(ap->a_vp));
978
979	if (dolock)
980		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
981	else
982		FIXUP(VTOUNION(ap->a_vp));
983	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
984	if (dolock)
985		VOP_UNLOCK(vp, 0);
986
987	/*
988	 * XXX
989	 * perhaps the size of the underlying object has changed under
990	 * our feet.  take advantage of the offset information present
991	 * in the uio structure.
992	 */
993	if (error == 0) {
994		struct union_node *un = VTOUNION(ap->a_vp);
995		off_t cur = ap->a_uio->uio_offset;
996
997		if (vp == un->un_uppervp) {
998			if (cur > un->un_uppersz)
999				union_newsize(ap->a_vp, cur, VNOVAL);
1000		} else {
1001			if (cur > un->un_lowersz)
1002				union_newsize(ap->a_vp, VNOVAL, cur);
1003		}
1004	}
1005
1006	return (error);
1007}
1008
1009int
1010union_write(v)
1011	void *v;
1012{
1013	struct vop_read_args /* {
1014		struct vnode *a_vp;
1015		struct uio *a_uio;
1016		int  a_ioflag;
1017		struct ucred *a_cred;
1018	} */ *ap = v;
1019	int error;
1020	struct vnode *vp;
1021	struct union_node *un = VTOUNION(ap->a_vp);
1022
1023	vp = UPPERVP(ap->a_vp);
1024	if (vp == NULLVP)
1025		panic("union: missing upper layer in write");
1026
1027	FIXUP(un);
1028	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1029
1030	/*
1031	 * the size of the underlying object may be changed by the
1032	 * write.
1033	 */
1034	if (error == 0) {
1035		off_t cur = ap->a_uio->uio_offset;
1036
1037		if (cur > un->un_uppersz)
1038			union_newsize(ap->a_vp, cur, VNOVAL);
1039	}
1040
1041	return (error);
1042}
1043
1044int
1045union_lease(v)
1046	void *v;
1047{
1048	struct vop_lease_args /* {
1049		struct vnode *a_vp;
1050		struct lwp *a_l;
1051		struct ucred *a_cred;
1052		int a_flag;
1053	} */ *ap = v;
1054	struct vnode *ovp = OTHERVP(ap->a_vp);
1055
1056	ap->a_vp = ovp;
1057	return (VCALL(ovp, VOFFSET(vop_lease), ap));
1058}
1059
1060int
1061union_ioctl(v)
1062	void *v;
1063{
1064	struct vop_ioctl_args /* {
1065		struct vnode *a_vp;
1066		int  a_command;
1067		caddr_t  a_data;
1068		int  a_fflag;
1069		struct ucred *a_cred;
1070		struct lwp *a_l;
1071	} */ *ap = v;
1072	struct vnode *ovp = OTHERVP(ap->a_vp);
1073
1074	ap->a_vp = ovp;
1075	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1076}
1077
1078int
1079union_poll(v)
1080	void *v;
1081{
1082	struct vop_poll_args /* {
1083		struct vnode *a_vp;
1084		int a_events;
1085		struct lwp *a_l;
1086	} */ *ap = v;
1087	struct vnode *ovp = OTHERVP(ap->a_vp);
1088
1089	ap->a_vp = ovp;
1090	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1091}
1092
1093int
1094union_revoke(v)
1095	void *v;
1096{
1097	struct vop_revoke_args /* {
1098		struct vnode *a_vp;
1099		int a_flags;
1100		struct lwp *a_l;
1101	} */ *ap = v;
1102	struct vnode *vp = ap->a_vp;
1103
1104	if (UPPERVP(vp))
1105		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1106	if (LOWERVP(vp))
1107		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1108	vgone(vp);
1109	return (0);
1110}
1111
1112int
1113union_mmap(v)
1114	void *v;
1115{
1116	struct vop_mmap_args /* {
1117		struct vnode *a_vp;
1118		int  a_fflags;
1119		struct ucred *a_cred;
1120		struct lwp *a_l;
1121	} */ *ap = v;
1122	struct vnode *ovp = OTHERVP(ap->a_vp);
1123
1124	ap->a_vp = ovp;
1125	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1126}
1127
1128int
1129union_fsync(v)
1130	void *v;
1131{
1132	struct vop_fsync_args /* {
1133		struct vnode *a_vp;
1134		struct ucred *a_cred;
1135		int  a_flags;
1136		off_t offhi;
1137		off_t offlo;
1138		struct lwp *a_l;
1139	} */ *ap = v;
1140	int error = 0;
1141	struct vnode *targetvp;
1142
1143	/*
1144	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1145	 * bother syncing the underlying vnodes, since (a) they'll be
1146	 * fsync'ed when reclaimed and (b) we could deadlock if
1147	 * they're locked; otherwise, pass it through to the
1148	 * underlying layer.
1149	 */
1150	if (ap->a_flags & FSYNC_RECLAIM)
1151		return 0;
1152
1153	targetvp = OTHERVP(ap->a_vp);
1154
1155	if (targetvp != NULLVP) {
1156		int dolock = (targetvp == LOWERVP(ap->a_vp));
1157
1158		if (dolock)
1159			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1160		else
1161			FIXUP(VTOUNION(ap->a_vp));
1162		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1163			    ap->a_offlo, ap->a_offhi, ap->a_l);
1164		if (dolock)
1165			VOP_UNLOCK(targetvp, 0);
1166	}
1167
1168	return (error);
1169}
1170
1171int
1172union_seek(v)
1173	void *v;
1174{
1175	struct vop_seek_args /* {
1176		struct vnode *a_vp;
1177		off_t  a_oldoff;
1178		off_t  a_newoff;
1179		struct ucred *a_cred;
1180	} */ *ap = v;
1181	struct vnode *ovp = OTHERVP(ap->a_vp);
1182
1183	ap->a_vp = ovp;
1184	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1185}
1186
1187int
1188union_remove(v)
1189	void *v;
1190{
1191	struct vop_remove_args /* {
1192		struct vnode *a_dvp;
1193		struct vnode *a_vp;
1194		struct componentname *a_cnp;
1195	} */ *ap = v;
1196	int error;
1197	struct union_node *dun = VTOUNION(ap->a_dvp);
1198	struct union_node *un = VTOUNION(ap->a_vp);
1199	struct componentname *cnp = ap->a_cnp;
1200
1201	if (dun->un_uppervp == NULLVP)
1202		panic("union remove: null upper vnode");
1203
1204	if (un->un_uppervp != NULLVP) {
1205		struct vnode *dvp = dun->un_uppervp;
1206		struct vnode *vp = un->un_uppervp;
1207
1208		FIXUP(dun);
1209		VREF(dvp);
1210		dun->un_flags |= UN_KLOCK;
1211		vput(ap->a_dvp);
1212		FIXUP(un);
1213		VREF(vp);
1214		un->un_flags |= UN_KLOCK;
1215		vput(ap->a_vp);
1216
1217		if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_lwp))
1218			cnp->cn_flags |= DOWHITEOUT;
1219		error = VOP_REMOVE(dvp, vp, cnp);
1220		if (!error)
1221			union_removed_upper(un);
1222	} else {
1223		FIXUP(dun);
1224		error = union_mkwhiteout(
1225			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1226			dun->un_uppervp, ap->a_cnp, un->un_path);
1227		vput(ap->a_dvp);
1228		vput(ap->a_vp);
1229	}
1230
1231	return (error);
1232}
1233
1234int
1235union_link(v)
1236	void *v;
1237{
1238	struct vop_link_args /* {
1239		struct vnode *a_dvp;
1240		struct vnode *a_vp;
1241		struct componentname *a_cnp;
1242	} */ *ap = v;
1243	int error = 0;
1244	struct componentname *cnp = ap->a_cnp;
1245	struct lwp *l = cnp->cn_lwp;
1246	struct union_node *dun;
1247	struct vnode *vp;
1248	struct vnode *dvp;
1249
1250	dun = VTOUNION(ap->a_dvp);
1251
1252#ifdef DIAGNOSTIC
1253	if (!(ap->a_cnp->cn_flags & LOCKPARENT)) {
1254		printf("union_link called without LOCKPARENT set!\n");
1255		error = EIO; /* need some error code for "caller is a bozo" */
1256	} else
1257#endif
1258
1259
1260	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1261		vp = ap->a_vp;
1262	} else {
1263		struct union_node *un = VTOUNION(ap->a_vp);
1264		if (un->un_uppervp == NULLVP) {
1265			/*
1266			 * Needs to be copied before we can link it.
1267			 */
1268			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1269			if (dun->un_uppervp == un->un_dirvp) {
1270				dun->un_flags &= ~UN_ULOCK;
1271				VOP_UNLOCK(dun->un_uppervp, 0);
1272			}
1273			error = union_copyup(un, 1, cnp->cn_cred, l);
1274			if (dun->un_uppervp == un->un_dirvp) {
1275				/*
1276				 * During copyup, we dropped the lock on the
1277				 * dir and invalidated any saved namei lookup
1278				 * state for the directory we'll be entering
1279				 * the link in.  We need to re-run the lookup
1280				 * in that directory to reset any state needed
1281				 * for VOP_LINK.
1282				 * Call relookup on the union-layer to reset
1283				 * the state.
1284				 */
1285				vp  = NULLVP;
1286				if (dun->un_uppervp == NULLVP)
1287					 panic("union: null upperdvp?");
1288				/*
1289				 * relookup starts with an unlocked node,
1290				 * and since LOCKPARENT is set returns
1291				 * the starting directory locked.
1292				 */
1293				VOP_UNLOCK(ap->a_dvp, 0);
1294				error = relookup(ap->a_dvp, &vp, ap->a_cnp);
1295				if (error) {
1296					vrele(ap->a_dvp);
1297					VOP_UNLOCK(ap->a_vp, 0);
1298					return EROFS;	/* ? */
1299				}
1300				if (vp != NULLVP) {
1301					/*
1302					 * The name we want to create has
1303					 * mysteriously appeared (a race?)
1304					 */
1305					error = EEXIST;
1306					VOP_UNLOCK(ap->a_vp, 0);
1307					goto croak;
1308				}
1309			}
1310			VOP_UNLOCK(ap->a_vp, 0);
1311		}
1312		vp = un->un_uppervp;
1313	}
1314
1315	dvp = dun->un_uppervp;
1316	if (dvp == NULLVP)
1317		error = EROFS;
1318
1319	if (error) {
1320croak:
1321		vput(ap->a_dvp);
1322		return (error);
1323	}
1324
1325	FIXUP(dun);
1326	VREF(dvp);
1327	dun->un_flags |= UN_KLOCK;
1328	vput(ap->a_dvp);
1329
1330	return (VOP_LINK(dvp, vp, cnp));
1331}
1332
1333int
1334union_rename(v)
1335	void *v;
1336{
1337	struct vop_rename_args  /* {
1338		struct vnode *a_fdvp;
1339		struct vnode *a_fvp;
1340		struct componentname *a_fcnp;
1341		struct vnode *a_tdvp;
1342		struct vnode *a_tvp;
1343		struct componentname *a_tcnp;
1344	} */ *ap = v;
1345	int error;
1346
1347	struct vnode *fdvp = ap->a_fdvp;
1348	struct vnode *fvp = ap->a_fvp;
1349	struct vnode *tdvp = ap->a_tdvp;
1350	struct vnode *tvp = ap->a_tvp;
1351
1352	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1353		struct union_node *un = VTOUNION(fdvp);
1354		if (un->un_uppervp == NULLVP) {
1355			/*
1356			 * this should never happen in normal
1357			 * operation but might if there was
1358			 * a problem creating the top-level shadow
1359			 * directory.
1360			 */
1361			error = EXDEV;
1362			goto bad;
1363		}
1364
1365		fdvp = un->un_uppervp;
1366		VREF(fdvp);
1367		vrele(ap->a_fdvp);
1368	}
1369
1370	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1371		struct union_node *un = VTOUNION(fvp);
1372		if (un->un_uppervp == NULLVP) {
1373			/* XXX: should do a copyup */
1374			error = EXDEV;
1375			goto bad;
1376		}
1377
1378		if (un->un_lowervp != NULLVP)
1379			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1380
1381		fvp = un->un_uppervp;
1382		VREF(fvp);
1383		vrele(ap->a_fvp);
1384	}
1385
1386	if (tdvp->v_op == union_vnodeop_p) {
1387		struct union_node *un = VTOUNION(tdvp);
1388		if (un->un_uppervp == NULLVP) {
1389			/*
1390			 * this should never happen in normal
1391			 * operation but might if there was
1392			 * a problem creating the top-level shadow
1393			 * directory.
1394			 */
1395			error = EXDEV;
1396			goto bad;
1397		}
1398
1399		tdvp = un->un_uppervp;
1400		VREF(tdvp);
1401		un->un_flags |= UN_KLOCK;
1402		vput(ap->a_tdvp);
1403	}
1404
1405	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1406		struct union_node *un = VTOUNION(tvp);
1407
1408		tvp = un->un_uppervp;
1409		if (tvp != NULLVP) {
1410			VREF(tvp);
1411			un->un_flags |= UN_KLOCK;
1412		}
1413		vput(ap->a_tvp);
1414	}
1415
1416	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1417
1418bad:
1419	vrele(fdvp);
1420	vrele(fvp);
1421	vput(tdvp);
1422	if (tvp != NULLVP)
1423		vput(tvp);
1424
1425	return (error);
1426}
1427
1428int
1429union_mkdir(v)
1430	void *v;
1431{
1432	struct vop_mkdir_args /* {
1433		struct vnode *a_dvp;
1434		struct vnode **a_vpp;
1435		struct componentname *a_cnp;
1436		struct vattr *a_vap;
1437	} */ *ap = v;
1438	struct union_node *un = VTOUNION(ap->a_dvp);
1439	struct vnode *dvp = un->un_uppervp;
1440	struct componentname *cnp = ap->a_cnp;
1441
1442	if (dvp != NULLVP) {
1443		int error;
1444		struct vnode *vp;
1445
1446		FIXUP(un);
1447		VREF(dvp);
1448		un->un_flags |= UN_KLOCK;
1449		VOP_UNLOCK(ap->a_dvp, 0);
1450		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1451		if (error) {
1452			vrele(ap->a_dvp);
1453			return (error);
1454		}
1455
1456		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1457				NULLVP, cnp, vp, NULLVP, 1, cnp->cn_lwp);
1458		vrele(ap->a_dvp);
1459		if (error)
1460			vput(vp);
1461		return (error);
1462	}
1463
1464	vput(ap->a_dvp);
1465	return (EROFS);
1466}
1467
1468int
1469union_rmdir(v)
1470	void *v;
1471{
1472	struct vop_rmdir_args /* {
1473		struct vnode *a_dvp;
1474		struct vnode *a_vp;
1475		struct componentname *a_cnp;
1476	} */ *ap = v;
1477	int error;
1478	struct union_node *dun = VTOUNION(ap->a_dvp);
1479	struct union_node *un = VTOUNION(ap->a_vp);
1480	struct componentname *cnp = ap->a_cnp;
1481
1482	if (dun->un_uppervp == NULLVP)
1483		panic("union rmdir: null upper vnode");
1484
1485	if (un->un_uppervp != NULLVP) {
1486		struct vnode *dvp = dun->un_uppervp;
1487		struct vnode *vp = un->un_uppervp;
1488
1489		FIXUP(dun);
1490		VREF(dvp);
1491		dun->un_flags |= UN_KLOCK;
1492		vput(ap->a_dvp);
1493		FIXUP(un);
1494		VREF(vp);
1495		un->un_flags |= UN_KLOCK;
1496		vput(ap->a_vp);
1497
1498		if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_lwp))
1499			cnp->cn_flags |= DOWHITEOUT;
1500		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1501		if (!error)
1502			union_removed_upper(un);
1503	} else {
1504		FIXUP(dun);
1505		error = union_mkwhiteout(
1506			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1507			dun->un_uppervp, ap->a_cnp, un->un_path);
1508		vput(ap->a_dvp);
1509		vput(ap->a_vp);
1510	}
1511
1512	return (error);
1513}
1514
1515int
1516union_symlink(v)
1517	void *v;
1518{
1519	struct vop_symlink_args /* {
1520		struct vnode *a_dvp;
1521		struct vnode **a_vpp;
1522		struct componentname *a_cnp;
1523		struct vattr *a_vap;
1524		char *a_target;
1525	} */ *ap = v;
1526	struct union_node *un = VTOUNION(ap->a_dvp);
1527	struct vnode *dvp = un->un_uppervp;
1528	struct componentname *cnp = ap->a_cnp;
1529
1530	if (dvp != NULLVP) {
1531		int error;
1532
1533		FIXUP(un);
1534		VREF(dvp);
1535		un->un_flags |= UN_KLOCK;
1536		vput(ap->a_dvp);
1537		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1538				    ap->a_target);
1539		return (error);
1540	}
1541
1542	vput(ap->a_dvp);
1543	return (EROFS);
1544}
1545
1546/*
1547 * union_readdir works in concert with getdirentries and
1548 * readdir(3) to provide a list of entries in the unioned
1549 * directories.  getdirentries is responsible for walking
1550 * down the union stack.  readdir(3) is responsible for
1551 * eliminating duplicate names from the returned data stream.
1552 */
1553int
1554union_readdir(v)
1555	void *v;
1556{
1557	struct vop_readdir_args /* {
1558		struct vnodeop_desc *a_desc;
1559		struct vnode *a_vp;
1560		struct uio *a_uio;
1561		struct ucred *a_cred;
1562		int *a_eofflag;
1563		u_long *a_cookies;
1564		int a_ncookies;
1565	} */ *ap = v;
1566	struct union_node *un = VTOUNION(ap->a_vp);
1567	struct vnode *uvp = un->un_uppervp;
1568
1569	if (uvp == NULLVP)
1570		return (0);
1571
1572	FIXUP(un);
1573	ap->a_vp = uvp;
1574	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1575}
1576
1577int
1578union_readlink(v)
1579	void *v;
1580{
1581	struct vop_readlink_args /* {
1582		struct vnode *a_vp;
1583		struct uio *a_uio;
1584		struct ucred *a_cred;
1585	} */ *ap = v;
1586	int error;
1587	struct vnode *vp = OTHERVP(ap->a_vp);
1588	int dolock = (vp == LOWERVP(ap->a_vp));
1589
1590	if (dolock)
1591		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1592	else
1593		FIXUP(VTOUNION(ap->a_vp));
1594	ap->a_vp = vp;
1595	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1596	if (dolock)
1597		VOP_UNLOCK(vp, 0);
1598
1599	return (error);
1600}
1601
1602int
1603union_abortop(v)
1604	void *v;
1605{
1606	struct vop_abortop_args /* {
1607		struct vnode *a_dvp;
1608		struct componentname *a_cnp;
1609	} */ *ap = v;
1610	int error;
1611	struct vnode *vp = OTHERVP(ap->a_dvp);
1612	struct union_node *un = VTOUNION(ap->a_dvp);
1613	int islocked = un->un_flags & UN_LOCKED;
1614	int dolock = (vp == LOWERVP(ap->a_dvp));
1615
1616	if (islocked) {
1617		if (dolock)
1618			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1619		else
1620			FIXUP(VTOUNION(ap->a_dvp));
1621	}
1622	ap->a_dvp = vp;
1623	error = VCALL(vp, VOFFSET(vop_abortop), ap);
1624	if (islocked && dolock)
1625		VOP_UNLOCK(vp, 0);
1626
1627	return (error);
1628}
1629
1630int
1631union_inactive(v)
1632	void *v;
1633{
1634	struct vop_inactive_args /* {
1635		struct vnode *a_vp;
1636		struct lwp *a_l;
1637	} */ *ap = v;
1638	struct vnode *vp = ap->a_vp;
1639	struct union_node *un = VTOUNION(vp);
1640	struct vnode **vpp;
1641
1642	/*
1643	 * Do nothing (and _don't_ bypass).
1644	 * Wait to vrele lowervp until reclaim,
1645	 * so that until then our union_node is in the
1646	 * cache and reusable.
1647	 *
1648	 * NEEDSWORK: Someday, consider inactive'ing
1649	 * the lowervp and then trying to reactivate it
1650	 * with capabilities (v_id)
1651	 * like they do in the name lookup cache code.
1652	 * That's too much work for now.
1653	 */
1654
1655	if (un->un_dircache != 0) {
1656		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1657			vrele(*vpp);
1658		free(un->un_dircache, M_TEMP);
1659		un->un_dircache = 0;
1660	}
1661
1662	VOP_UNLOCK(vp, 0);
1663
1664	if ((un->un_flags & UN_CACHED) == 0)
1665		vgone(vp);
1666
1667	return (0);
1668}
1669
1670int
1671union_reclaim(v)
1672	void *v;
1673{
1674	struct vop_reclaim_args /* {
1675		struct vnode *a_vp;
1676	} */ *ap = v;
1677
1678	union_freevp(ap->a_vp);
1679
1680	return (0);
1681}
1682
1683int
1684union_lock(v)
1685	void *v;
1686{
1687	struct vop_lock_args /* {
1688		struct vnode *a_vp;
1689		int a_flags;
1690	} */ *ap = v;
1691	struct vnode *vp = ap->a_vp;
1692	int flags = ap->a_flags;
1693	struct union_node *un;
1694	int error;
1695#ifdef DIAGNOSTIC
1696	int drain = 0;
1697#endif
1698
1699	genfs_nolock(ap);
1700	/*
1701	 * Need to do real lockmgr-style locking here.
1702	 * in the mean time, draining won't work quite right,
1703	 * which could lead to a few race conditions.
1704	 * the following test was here, but is not quite right, we
1705	 * still need to take the lock:
1706	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1707		return (0);
1708	 */
1709	flags &= ~LK_INTERLOCK;
1710
1711	un = VTOUNION(vp);
1712#ifdef DIAGNOSTIC
1713	if (un->un_flags & (UN_DRAINING|UN_DRAINED)) {
1714		if (un->un_flags & UN_DRAINED)
1715			panic("union: %p: warning: locking decommissioned lock", vp);
1716		if ((flags & LK_TYPE_MASK) != LK_RELEASE)
1717			panic("union: %p: non-release on draining lock: %d",
1718			    vp, flags & LK_TYPE_MASK);
1719		un->un_flags &= ~UN_DRAINING;
1720		if ((flags & LK_REENABLE) == 0)
1721			un->un_flags |= UN_DRAINED;
1722	}
1723#endif
1724
1725	/*
1726	 * Don't pass DRAIN through to sub-vnode lock; keep track of
1727	 * DRAIN state at this level, and just get an exclusive lock
1728	 * on the underlying vnode.
1729	 */
1730	if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
1731#ifdef DIAGNOSTIC
1732		drain = 1;
1733#endif
1734		flags = LK_EXCLUSIVE | (flags & ~LK_TYPE_MASK);
1735	}
1736start:
1737	un = VTOUNION(vp);
1738
1739	if (un->un_uppervp != NULLVP) {
1740		if (((un->un_flags & UN_ULOCK) == 0) &&
1741		    (vp->v_usecount != 0)) {
1742			/*
1743			 * We MUST always use the order of: take upper
1744			 * vp lock, manipulate union node flags, drop
1745			 * upper vp lock.  This code must not be an
1746			 */
1747			error = vn_lock(un->un_uppervp, flags);
1748			if (error)
1749				return (error);
1750			un->un_flags |= UN_ULOCK;
1751		}
1752#ifdef DIAGNOSTIC
1753		if (un->un_flags & UN_KLOCK) {
1754			vprint("union: dangling klock", vp);
1755			panic("union: dangling upper lock (%p)", vp);
1756		}
1757#endif
1758	}
1759
1760	/* XXX ignores LK_NOWAIT */
1761	if (un->un_flags & UN_LOCKED) {
1762#ifdef DIAGNOSTIC
1763		if (curproc && un->un_pid == curproc->p_pid &&
1764			    un->un_pid > -1 && curproc->p_pid > -1)
1765			panic("union: locking against myself");
1766#endif
1767		un->un_flags |= UN_WANTED;
1768		tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0);
1769		goto start;
1770	}
1771
1772#ifdef DIAGNOSTIC
1773	if (curproc)
1774		un->un_pid = curproc->p_pid;
1775	else
1776		un->un_pid = -1;
1777	if (drain)
1778		un->un_flags |= UN_DRAINING;
1779#endif
1780
1781	un->un_flags |= UN_LOCKED;
1782	return (0);
1783}
1784
1785/*
1786 * When operations want to vput() a union node yet retain a lock on
1787 * the upper vnode (say, to do some further operations like link(),
1788 * mkdir(), ...), they set UN_KLOCK on the union node, then call
1789 * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1790 * unlocks the union node (leaving the upper vnode alone), clears the
1791 * KLOCK flag, and then returns to vput().  The caller then does whatever
1792 * is left to do with the upper vnode, and ensures that it gets unlocked.
1793 *
1794 * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1795 */
1796int
1797union_unlock(v)
1798	void *v;
1799{
1800	struct vop_unlock_args /* {
1801		struct vnode *a_vp;
1802		int a_flags;
1803	} */ *ap = v;
1804	struct union_node *un = VTOUNION(ap->a_vp);
1805
1806#ifdef DIAGNOSTIC
1807	if ((un->un_flags & UN_LOCKED) == 0)
1808		panic("union: unlock unlocked node");
1809	if (curproc && un->un_pid != curproc->p_pid &&
1810			curproc->p_pid > -1 && un->un_pid > -1)
1811		panic("union: unlocking other process's union node");
1812	if (un->un_flags & UN_DRAINED)
1813		panic("union: %p: warning: unlocking decommissioned lock", ap->a_vp);
1814#endif
1815
1816	un->un_flags &= ~UN_LOCKED;
1817
1818	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1819		VOP_UNLOCK(un->un_uppervp, 0);
1820
1821	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1822
1823	if (un->un_flags & UN_WANTED) {
1824		un->un_flags &= ~UN_WANTED;
1825		wakeup((caddr_t) &un->un_flags);
1826	}
1827
1828#ifdef DIAGNOSTIC
1829	un->un_pid = 0;
1830	if (un->un_flags & UN_DRAINING) {
1831		un->un_flags |= UN_DRAINED;
1832		un->un_flags &= ~UN_DRAINING;
1833	}
1834#endif
1835	genfs_nounlock(ap);
1836
1837	return (0);
1838}
1839
1840int
1841union_bmap(v)
1842	void *v;
1843{
1844	struct vop_bmap_args /* {
1845		struct vnode *a_vp;
1846		daddr_t  a_bn;
1847		struct vnode **a_vpp;
1848		daddr_t *a_bnp;
1849		int *a_runp;
1850	} */ *ap = v;
1851	int error;
1852	struct vnode *vp = OTHERVP(ap->a_vp);
1853	int dolock = (vp == LOWERVP(ap->a_vp));
1854
1855	if (dolock)
1856		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1857	else
1858		FIXUP(VTOUNION(ap->a_vp));
1859	ap->a_vp = vp;
1860	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1861	if (dolock)
1862		VOP_UNLOCK(vp, 0);
1863
1864	return (error);
1865}
1866
1867int
1868union_print(v)
1869	void *v;
1870{
1871	struct vop_print_args /* {
1872		struct vnode *a_vp;
1873	} */ *ap = v;
1874	struct vnode *vp = ap->a_vp;
1875
1876	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1877			vp, UPPERVP(vp), LOWERVP(vp));
1878	if (UPPERVP(vp) != NULLVP)
1879		vprint("union: upper", UPPERVP(vp));
1880	if (LOWERVP(vp) != NULLVP)
1881		vprint("union: lower", LOWERVP(vp));
1882	if (VTOUNION(vp)->un_dircache) {
1883		struct vnode **vpp;
1884		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1885			vprint("dircache:", *vpp);
1886	}
1887
1888	return (0);
1889}
1890
1891int
1892union_islocked(v)
1893	void *v;
1894{
1895	struct vop_islocked_args /* {
1896		struct vnode *a_vp;
1897	} */ *ap = v;
1898
1899	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
1900}
1901
1902int
1903union_pathconf(v)
1904	void *v;
1905{
1906	struct vop_pathconf_args /* {
1907		struct vnode *a_vp;
1908		int a_name;
1909		int *a_retval;
1910	} */ *ap = v;
1911	int error;
1912	struct vnode *vp = OTHERVP(ap->a_vp);
1913	int dolock = (vp == LOWERVP(ap->a_vp));
1914
1915	if (dolock)
1916		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1917	else
1918		FIXUP(VTOUNION(ap->a_vp));
1919	ap->a_vp = vp;
1920	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1921	if (dolock)
1922		VOP_UNLOCK(vp, 0);
1923
1924	return (error);
1925}
1926
1927int
1928union_advlock(v)
1929	void *v;
1930{
1931	struct vop_advlock_args /* {
1932		struct vnode *a_vp;
1933		caddr_t  a_id;
1934		int  a_op;
1935		struct flock *a_fl;
1936		int  a_flags;
1937	} */ *ap = v;
1938	struct vnode *ovp = OTHERVP(ap->a_vp);
1939
1940	ap->a_vp = ovp;
1941	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1942}
1943
1944
1945/*
1946 * XXX - vop_strategy must be hand coded because it has no
1947 * vnode in its arguments.
1948 * This goes away with a merged VM/buffer cache.
1949 */
1950int
1951union_strategy(v)
1952	void *v;
1953{
1954	struct vop_strategy_args /* {
1955		struct buf *a_bp;
1956	} */ *ap = v;
1957	struct buf *bp = ap->a_bp;
1958	int error;
1959	struct vnode *savedvp;
1960
1961	savedvp = bp->b_vp;
1962	bp->b_vp = OTHERVP(bp->b_vp);
1963
1964#ifdef DIAGNOSTIC
1965	if (bp->b_vp == NULLVP)
1966		panic("union_strategy: nil vp");
1967	if (((bp->b_flags & B_READ) == 0) &&
1968	    (bp->b_vp == LOWERVP(savedvp)))
1969		panic("union_strategy: writing to lowervp");
1970#endif
1971
1972	error = VOP_STRATEGY(bp);
1973	bp->b_vp = savedvp;
1974
1975	return (error);
1976}
1977
1978int
1979union_getpages(v)
1980	void *v;
1981{
1982	struct vop_getpages_args /* {
1983		struct vnode *a_vp;
1984		voff_t a_offset;
1985		struct vm_page **a_m;
1986		int *a_count;
1987		int a_centeridx;
1988		vm_prot_t a_access_type;
1989		int a_advice;
1990		int a_flags;
1991	} */ *ap = v;
1992	struct vnode *vp = ap->a_vp;
1993	int error;
1994
1995	/*
1996	 * just pass the request on to the underlying layer.
1997	 */
1998
1999	if (ap->a_flags & PGO_LOCKED) {
2000		return EBUSY;
2001	}
2002	ap->a_vp = OTHERVP(vp);
2003	simple_unlock(&vp->v_interlock);
2004	simple_lock(&ap->a_vp->v_interlock);
2005	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
2006	return error;
2007}
2008
2009int
2010union_putpages(v)
2011	void *v;
2012{
2013	struct vop_putpages_args /* {
2014		struct vnode *a_vp;
2015		voff_t a_offlo;
2016		voff_t a_offhi;
2017		int a_flags;
2018	} */ *ap = v;
2019	struct vnode *vp = ap->a_vp;
2020	int error;
2021
2022	/*
2023	 * just pass the request on to the underlying layer.
2024	 */
2025
2026	ap->a_vp = OTHERVP(vp);
2027	simple_unlock(&vp->v_interlock);
2028	simple_lock(&ap->a_vp->v_interlock);
2029	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
2030	return error;
2031}
2032
2033int
2034union_kqfilter(void *v)
2035{
2036	struct vop_kqfilter_args /* {
2037		struct vnode	*a_vp;
2038		struct knote	*a_kn;
2039	} */ *ap = v;
2040	int error;
2041
2042	/*
2043	 * We watch either the upper layer file (if it already exists),
2044	 * or the lower layer one. If there is lower layer file only
2045	 * at this moment, we will keep watching that lower layer file
2046	 * even if upper layer file would be created later on.
2047	 */
2048	if (UPPERVP(ap->a_vp))
2049		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
2050	else if (LOWERVP(ap->a_vp))
2051		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
2052	else {
2053		/* panic? */
2054		error = EOPNOTSUPP;
2055	}
2056
2057	return (error);
2058}
2059