union_vnops.c revision 140779
1/*-
2 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
3 * Copyright (c) 1992, 1993, 1994, 1995
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
34 * $FreeBSD: head/sys/fs/unionfs/union_vnops.c 140779 2005-01-24 23:53:54Z phk $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/fcntl.h>
40#include <sys/stat.h>
41#include <sys/kernel.h>
42#include <sys/vnode.h>
43#include <sys/mount.h>
44#include <sys/namei.h>
45#include <sys/malloc.h>
46#include <sys/bio.h>
47#include <sys/buf.h>
48#include <sys/lock.h>
49#include <sys/sysctl.h>
50#include <sys/unistd.h>
51#include <sys/acl.h>
52#include <sys/event.h>
53#include <sys/extattr.h>
54#include <sys/mac.h>
55#include <fs/unionfs/union.h>
56
57#include <vm/vm.h>
58#include <vm/vnode_pager.h>
59
60#include <vm/vm_page.h>
61#include <vm/vm_object.h>
62
63int uniondebug = 0;
64
65#if UDEBUG_ENABLED
66SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
67#else
68SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
69#endif
70
71static vop_access_t	union_access;
72static vop_aclcheck_t	union_aclcheck;
73static vop_advlock_t	union_advlock;
74static vop_close_t	union_close;
75static vop_closeextattr_t	union_closeextattr;
76static vop_create_t	union_create;
77static vop_createvobject_t	union_createvobject;
78static vop_deleteextattr_t	union_deleteextattr;
79static vop_destroyvobject_t	union_destroyvobject;
80static vop_fsync_t	union_fsync;
81static vop_getacl_t	union_getacl;
82static vop_getattr_t	union_getattr;
83static vop_getextattr_t	union_getextattr;
84static vop_getvobject_t	union_getvobject;
85static vop_inactive_t	union_inactive;
86static vop_ioctl_t	union_ioctl;
87static vop_lease_t	union_lease;
88static vop_link_t	union_link;
89static vop_listextattr_t	union_listextattr;
90static vop_lookup_t	union_lookup;
91static int	union_lookup1(struct vnode *udvp, struct vnode **dvp,
92				   struct vnode **vpp,
93				   struct componentname *cnp);
94static vop_mkdir_t	union_mkdir;
95static vop_mknod_t	union_mknod;
96static vop_open_t	union_open;
97static vop_openextattr_t	union_openextattr;
98static vop_pathconf_t	union_pathconf;
99static vop_print_t	union_print;
100static vop_read_t	union_read;
101static vop_readdir_t	union_readdir;
102static vop_readlink_t	union_readlink;
103static vop_getwritemount_t	union_getwritemount;
104static vop_reclaim_t	union_reclaim;
105static vop_remove_t	union_remove;
106static vop_rename_t	union_rename;
107static vop_rmdir_t	union_rmdir;
108static vop_poll_t	union_poll;
109static vop_setacl_t	union_setacl;
110static vop_setattr_t	union_setattr;
111static vop_setlabel_t	union_setlabel;
112static vop_setextattr_t	union_setextattr;
113static vop_strategy_t	union_strategy;
114static vop_symlink_t	union_symlink;
115static vop_whiteout_t	union_whiteout;
116static vop_write_t	union_write;
117
118static __inline
119struct vnode *
120union_lock_upper(struct union_node *un, struct thread *td)
121{
122	struct vnode *uppervp;
123
124	if ((uppervp = un->un_uppervp) != NULL) {
125		VREF(uppervp);
126		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
127	}
128	KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0"));
129	return(uppervp);
130}
131
132static __inline
133void
134union_unlock_upper(struct vnode *uppervp, struct thread *td)
135{
136	vput(uppervp);
137}
138
139static __inline
140struct vnode *
141union_lock_other(struct union_node *un, struct thread *td)
142{
143	struct vnode *vp;
144
145	if (un->un_uppervp != NULL) {
146		vp = union_lock_upper(un, td);
147	} else if ((vp = un->un_lowervp) != NULL) {
148		VREF(vp);
149		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
150	}
151	return(vp);
152}
153
154static __inline
155void
156union_unlock_other(struct vnode *vp, struct thread *td)
157{
158	vput(vp);
159}
160
161/*
162 *	union_lookup:
163 *
164 *	udvp	must be exclusively locked on call and will remain
165 *		exclusively locked on return.  This is the mount point
166 *		for our filesystem.
167 *
168 *	dvp	Our base directory, locked and referenced.
169 *		The passed dvp will be dereferenced and unlocked on return
170 *		and a new dvp will be returned which is locked and
171 *		referenced in the same variable.
172 *
173 *	vpp	is filled in with the result if no error occured,
174 *		locked and ref'd.
175 *
176 *		If an error is returned, *vpp is set to NULLVP.  If no
177 *		error occurs, *vpp is returned with a reference and an
178 *		exclusive lock.
179 */
180
181static int
182union_lookup1(udvp, pdvp, vpp, cnp)
183	struct vnode *udvp;
184	struct vnode **pdvp;
185	struct vnode **vpp;
186	struct componentname *cnp;
187{
188	int error;
189	struct thread *td = cnp->cn_thread;
190	struct vnode *dvp = *pdvp;
191	struct vnode *tdvp;
192	struct mount *mp;
193
194	/*
195	 * If stepping up the directory tree, check for going
196	 * back across the mount point, in which case do what
197	 * lookup would do by stepping back down the mount
198	 * hierarchy.
199	 */
200	if (cnp->cn_flags & ISDOTDOT) {
201		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
202			/*
203			 * Don't do the NOCROSSMOUNT check
204			 * at this level.  By definition,
205			 * union fs deals with namespaces, not
206			 * filesystems.
207			 */
208			tdvp = dvp;
209			dvp = dvp->v_mount->mnt_vnodecovered;
210			VREF(dvp);
211			vput(tdvp);
212			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
213		}
214	}
215
216	/*
217	 * Set return dvp to be the upperdvp 'parent directory.
218	 */
219	*pdvp = dvp;
220
221	/*
222	 * If the VOP_LOOKUP() call generates an error, tdvp is invalid and
223	 * no changes will have been made to dvp, so we are set to return.
224	 */
225
226        error = VOP_LOOKUP(dvp, &tdvp, cnp);
227	if (error) {
228		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
229		*vpp = NULL;
230		return (error);
231	}
232
233	/*
234	 * The parent directory will have been unlocked, unless lookup
235	 * found the last component or if dvp == tdvp (tdvp must be locked).
236	 *
237	 * We want our dvp to remain locked and ref'd.  We also want tdvp
238	 * to remain locked and ref'd.
239	 */
240	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
241
242	if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0)
243		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
244
245	/*
246	 * Lastly check if the current node is a mount point in
247	 * which case walk up the mount hierarchy making sure not to
248	 * bump into the root of the mount tree (ie. dvp != udvp).
249	 *
250	 * We use dvp as a temporary variable here, it is no longer related
251	 * to the dvp above.  However, we have to ensure that both *pdvp and
252	 * tdvp are locked on return.
253	 */
254
255	dvp = tdvp;
256	while (
257	    dvp != udvp &&
258	    (dvp->v_type == VDIR) &&
259	    (mp = dvp->v_mountedhere)
260	) {
261		int relock_pdvp = 0;
262
263		if (vfs_busy(mp, 0, 0, td))
264			continue;
265
266		if (dvp == *pdvp)
267			relock_pdvp = 1;
268		vput(dvp);
269		dvp = NULL;
270		error = VFS_ROOT(mp, &dvp, td);
271
272		vfs_unbusy(mp, td);
273
274		if (relock_pdvp)
275			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td);
276
277		if (error) {
278			*vpp = NULL;
279			return (error);
280		}
281	}
282	*vpp = dvp;
283	return (0);
284}
285
286static int
287union_lookup(ap)
288	struct vop_lookup_args /* {
289		struct vnodeop_desc *a_desc;
290		struct vnode *a_dvp;
291		struct vnode **a_vpp;
292		struct componentname *a_cnp;
293	} */ *ap;
294{
295	int error;
296	int uerror, lerror;
297	struct vnode *uppervp, *lowervp;
298	struct vnode *upperdvp, *lowerdvp;
299	struct vnode *dvp = ap->a_dvp;		/* starting dir */
300	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
301	struct componentname *cnp = ap->a_cnp;
302	struct thread *td = cnp->cn_thread;
303	int lockparent = cnp->cn_flags & LOCKPARENT;
304	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
305	struct ucred *saved_cred = NULL;
306	int iswhiteout;
307	struct vattr va;
308
309	*ap->a_vpp = NULLVP;
310
311	/*
312	 * Disallow write attempts to the filesystem mounted read-only.
313	 */
314	if ((cnp->cn_flags & ISLASTCN) &&
315	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
316	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
317		return (EROFS);
318	}
319
320	/*
321	 * For any lookups we do, always return with the parent locked.
322	 */
323	cnp->cn_flags |= LOCKPARENT;
324
325	lowerdvp = dun->un_lowervp;
326	uppervp = NULLVP;
327	lowervp = NULLVP;
328	iswhiteout = 0;
329
330	uerror = ENOENT;
331	lerror = ENOENT;
332
333	/*
334	 * Get a private lock on uppervp and a reference, effectively
335	 * taking it out of the union_node's control.
336	 *
337	 * We must lock upperdvp while holding our lock on dvp
338	 * to avoid a deadlock.
339	 */
340	upperdvp = union_lock_upper(dun, td);
341
342	/*
343	 * Do the lookup in the upper level.
344	 * If that level consumes additional pathnames,
345	 * then assume that something special is going
346	 * on and just return that vnode.
347	 */
348	if (upperdvp != NULLVP) {
349		/*
350		 * We do not have to worry about the DOTDOT case, we've
351		 * already unlocked dvp.
352		 */
353		UDEBUG(("A %p\n", upperdvp));
354
355		/*
356		 * Do the lookup.   We must supply a locked and referenced
357		 * upperdvp to the function and will get a new locked and
358		 * referenced upperdvp back, with the old having been
359		 * dereferenced.
360		 *
361		 * If an error is returned, uppervp will be NULLVP.  If no
362		 * error occurs, uppervp will be the locked and referenced.
363		 * Return vnode, or possibly NULL, depending on what is being
364		 * requested.  It is possible that the returned uppervp
365		 * will be the same as upperdvp.
366		 */
367		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
368		UDEBUG((
369		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
370		    uerror,
371		    upperdvp,
372		    vrefcnt(upperdvp),
373		    VOP_ISLOCKED(upperdvp, NULL),
374		    uppervp,
375		    (uppervp ? vrefcnt(uppervp) : -99),
376		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
377		));
378
379		/*
380		 * Disallow write attempts to the filesystem mounted read-only.
381		 */
382		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
383		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
384		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
385			error = EROFS;
386			goto out;
387		}
388
389		/*
390		 * Special case: If cn_consume != 0 then skip out.  The result
391		 * of the lookup is transfered to our return variable.  If
392		 * an error occured we have to throw away the results.
393		 */
394
395		if (cnp->cn_consume != 0) {
396			if ((error = uerror) == 0) {
397				*ap->a_vpp = uppervp;
398				uppervp = NULL;
399			}
400			goto out;
401		}
402
403		/*
404		 * Calculate whiteout, fall through.
405		 */
406
407		if (uerror == ENOENT || uerror == EJUSTRETURN) {
408			if (cnp->cn_flags & ISWHITEOUT) {
409				iswhiteout = 1;
410			} else if (lowerdvp != NULLVP) {
411				int terror;
412
413				terror = VOP_GETATTR(upperdvp, &va,
414					cnp->cn_cred, cnp->cn_thread);
415				if (terror == 0 && (va.va_flags & OPAQUE))
416					iswhiteout = 1;
417			}
418		}
419	}
420
421	/*
422	 * In a similar way to the upper layer, do the lookup
423	 * in the lower layer.   This time, if there is some
424	 * component magic going on, then vput whatever we got
425	 * back from the upper layer and return the lower vnode
426	 * instead.
427	 */
428
429	if (lowerdvp != NULLVP && !iswhiteout) {
430		int nameiop;
431
432		UDEBUG(("B %p\n", lowerdvp));
433
434		/*
435		 * Force only LOOKUPs on the lower node, since
436		 * we won't be making changes to it anyway.
437		 */
438		nameiop = cnp->cn_nameiop;
439		cnp->cn_nameiop = LOOKUP;
440		if (um->um_op == UNMNT_BELOW) {
441			saved_cred = cnp->cn_cred;
442			cnp->cn_cred = um->um_cred;
443		}
444
445		/*
446		 * We shouldn't have to worry about locking interactions
447		 * between the lower layer and our union layer (w.r.t.
448		 * `..' processing) because we don't futz with lowervp
449		 * locks in the union-node instantiation code path.
450		 *
451		 * union_lookup1() requires lowervp to be locked on entry,
452		 * and it will be unlocked on return.  The ref count will
453		 * not change.  On return lowervp doesn't represent anything
454		 * to us so we NULL it out.
455		 */
456		VREF(lowerdvp);
457		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td);
458		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
459		if (lowerdvp == lowervp)
460			vrele(lowerdvp);
461		else
462			vput(lowerdvp);
463		lowerdvp = NULL;	/* lowerdvp invalid after vput */
464
465		if (um->um_op == UNMNT_BELOW)
466			cnp->cn_cred = saved_cred;
467		cnp->cn_nameiop = nameiop;
468
469		if (cnp->cn_consume != 0 || lerror == EACCES) {
470			if ((error = lerror) == 0) {
471				*ap->a_vpp = lowervp;
472				lowervp = NULL;
473			}
474			goto out;
475		}
476	} else {
477		UDEBUG(("C %p\n", lowerdvp));
478		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
479			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
480				VREF(lowervp);
481				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td);
482				lerror = 0;
483			}
484		}
485	}
486
487	/*
488	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
489	 *
490	 * 1. If both layers returned an error, select the upper layer.
491	 *
492	 * 2. If the upper layer failed and the bottom layer succeeded,
493	 *    two subcases occur:
494	 *
495	 *	a.  The bottom vnode is not a directory, in which case
496	 *	    just return a new union vnode referencing an
497	 *	    empty top layer and the existing bottom layer.
498	 *
499	 *	b.  The bottom vnode is a directory, in which case
500	 *	    create a new directory in the top layer and
501	 *	    and fall through to case 3.
502	 *
503	 * 3. If the top layer succeeded, then return a new union
504	 *    vnode referencing whatever the new top layer and
505	 *    whatever the bottom layer returned.
506	 */
507
508	/* case 1. */
509	if ((uerror != 0) && (lerror != 0)) {
510		error = uerror;
511		goto out;
512	}
513
514	/* case 2. */
515	if (uerror != 0 /* && (lerror == 0) */ ) {
516		if (lowervp->v_type == VDIR) { /* case 2b. */
517			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
518			/*
519			 * Oops, uppervp has a problem, we may have to shadow.
520			 */
521			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
522			if (uerror) {
523				error = uerror;
524				goto out;
525			}
526		}
527	}
528
529	/*
530	 * Must call union_allocvp() with both the upper and lower vnodes
531	 * referenced and the upper vnode locked.   ap->a_vpp is returned
532	 * referenced and locked.  lowervp, uppervp, and upperdvp are
533	 * absorbed by union_allocvp() whether it succeeds or fails.
534	 *
535	 * upperdvp is the parent directory of uppervp which may be
536	 * different, depending on the path, from dvp->un_uppervp.  That's
537	 * why it is a separate argument.  Note that it must be unlocked.
538	 *
539	 * dvp must be locked on entry to the call and will be locked on
540	 * return.
541	 */
542
543	if (uppervp && uppervp != upperdvp)
544		VOP_UNLOCK(uppervp, 0, td);
545	if (lowervp)
546		VOP_UNLOCK(lowervp, 0, td);
547	if (upperdvp)
548		VOP_UNLOCK(upperdvp, 0, td);
549
550	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
551			      uppervp, lowervp, 1);
552
553	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99));
554
555	uppervp = NULL;
556	upperdvp = NULL;
557	lowervp = NULL;
558
559	/*
560	 *	Termination Code
561	 *
562	 *	- put away any extra junk laying around.  Note that lowervp
563	 *	  (if not NULL) will never be the same as *ap->a_vp and
564	 *	  neither will uppervp, because when we set that state we
565	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
566	 *	  may match uppervp or *ap->a_vpp.
567	 *
568	 *	- relock/unlock dvp if appropriate.
569	 */
570
571out:
572	if (upperdvp) {
573		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
574			vrele(upperdvp);
575		else
576			vput(upperdvp);
577	}
578
579	if (uppervp)
580		vput(uppervp);
581
582	if (lowervp)
583		vput(lowervp);
584
585	/*
586	 * Restore LOCKPARENT state
587	 */
588
589	if (!lockparent)
590		cnp->cn_flags &= ~LOCKPARENT;
591
592	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
593		((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99),
594		lowervp, uppervp));
595
596	if (error == 0 || error == EJUSTRETURN) {
597		/*
598		 * dvp lock state, determine whether to relock dvp.
599		 * We are expected to unlock dvp unless:
600		 *
601		 *	- there was an error (other than EJUSTRETURN), or
602		 *	- we hit the last component and lockparent is true
603		 */
604		if (*ap->a_vpp != dvp) {
605			if (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)
606				VOP_UNLOCK(dvp, 0, td);
607		}
608
609		if (cnp->cn_namelen == 1 &&
610		    cnp->cn_nameptr[0] == '.' &&
611		    *ap->a_vpp != dvp) {
612#ifdef	DIAGNOSTIC
613			vprint("union_lookup: vp", *ap->a_vpp);
614			vprint("union_lookup: dvp", dvp);
615#endif
616			panic("union_lookup returning . (%p) != startdir (%p)",
617			    *ap->a_vpp, dvp);
618		}
619	}
620
621	return (error);
622}
623
624/*
625 * 	union_create:
626 *
627 * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
628 * locked if no error occurs, otherwise it is garbage.
629 */
630
631static int
632union_create(ap)
633	struct vop_create_args /* {
634		struct vnode *a_dvp;
635		struct vnode **a_vpp;
636		struct componentname *a_cnp;
637		struct vattr *a_vap;
638	} */ *ap;
639{
640	struct union_node *dun = VTOUNION(ap->a_dvp);
641	struct componentname *cnp = ap->a_cnp;
642	struct thread *td = cnp->cn_thread;
643	struct vnode *dvp;
644	int error = EROFS;
645
646	if ((dvp = union_lock_upper(dun, td)) != NULL) {
647		struct vnode *vp;
648		struct mount *mp;
649
650		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
651		if (error == 0) {
652			mp = ap->a_dvp->v_mount;
653			VOP_UNLOCK(vp, 0, td);
654			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp)));
655			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
656				cnp, vp, NULLVP, 1);
657			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
658		}
659		union_unlock_upper(dvp, td);
660	}
661	return (error);
662}
663
664static int
665union_whiteout(ap)
666	struct vop_whiteout_args /* {
667		struct vnode *a_dvp;
668		struct componentname *a_cnp;
669		int a_flags;
670	} */ *ap;
671{
672	struct union_node *un = VTOUNION(ap->a_dvp);
673	struct componentname *cnp = ap->a_cnp;
674	struct vnode *uppervp;
675	int error;
676
677	switch (ap->a_flags) {
678	case CREATE:
679	case DELETE:
680		uppervp = union_lock_upper(un, cnp->cn_thread);
681		if (uppervp != NULLVP) {
682			error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
683			union_unlock_upper(uppervp, cnp->cn_thread);
684		} else
685			error = EOPNOTSUPP;
686		break;
687	case LOOKUP:
688		error = EOPNOTSUPP;
689		break;
690	default:
691		panic("union_whiteout: unknown op");
692	}
693	return (error);
694}
695
696/*
697 * 	union_mknod:
698 *
699 *	a_dvp is locked on entry and should remain locked on return.
700 *	a_vpp is garbagre whether an error occurs or not.
701 */
702
703static int
704union_mknod(ap)
705	struct vop_mknod_args /* {
706		struct vnode *a_dvp;
707		struct vnode **a_vpp;
708		struct componentname *a_cnp;
709		struct vattr *a_vap;
710	} */ *ap;
711{
712	struct union_node *dun = VTOUNION(ap->a_dvp);
713	struct componentname *cnp = ap->a_cnp;
714	struct vnode *dvp;
715	int error = EROFS;
716
717	if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) {
718		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
719		union_unlock_upper(dvp, cnp->cn_thread);
720	}
721	return (error);
722}
723
724/*
725 *	union_open:
726 *
727 *	run open VOP.  When opening the underlying vnode we have to mimic
728 *	vn_open().  What we *really* need to do to avoid screwups if the
729 *	open semantics change is to call vn_open().  For example, ufs blows
730 *	up if you open a file but do not vmio it prior to writing.
731 */
732
733static int
734union_open(ap)
735	struct vop_open_args /* {
736		struct vnodeop_desc *a_desc;
737		struct vnode *a_vp;
738		int a_mode;
739		struct ucred *a_cred;
740		struct thread *a_td;
741	} */ *ap;
742{
743	struct union_node *un = VTOUNION(ap->a_vp);
744	struct vnode *tvp;
745	int mode = ap->a_mode;
746	struct ucred *cred = ap->a_cred;
747	struct thread *td = ap->a_td;
748	int error = 0;
749	int tvpisupper = 1;
750
751	/*
752	 * If there is an existing upper vp then simply open that.
753	 * The upper vp takes precedence over the lower vp.  When opening
754	 * a lower vp for writing copy it to the uppervp and then open the
755	 * uppervp.
756	 *
757	 * At the end of this section tvp will be left locked.
758	 */
759	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
760		/*
761		 * If the lower vnode is being opened for writing, then
762		 * copy the file contents to the upper vnode and open that,
763		 * otherwise can simply open the lower vnode.
764		 */
765		tvp = un->un_lowervp;
766		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
767			int docopy = !(mode & O_TRUNC);
768			error = union_copyup(un, docopy, cred, td);
769			tvp = union_lock_upper(un, td);
770		} else {
771			un->un_openl++;
772			VREF(tvp);
773			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
774			tvpisupper = 0;
775		}
776	}
777
778	/*
779	 * We are holding the correct vnode, open it.
780	 */
781
782	if (error == 0)
783		error = VOP_OPEN(tvp, mode, cred, td, -1);
784
785	/*
786	 * Release any locks held.
787	 */
788	if (tvpisupper) {
789		if (tvp)
790			union_unlock_upper(tvp, td);
791	} else {
792		vput(tvp);
793	}
794	return (error);
795}
796
797/*
798 *	union_close:
799 *
800 *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
801 *	the case we do not change it.
802 */
803
804static int
805union_close(ap)
806	struct vop_close_args /* {
807		struct vnode *a_vp;
808		int  a_fflag;
809		struct ucred *a_cred;
810		struct thread *a_td;
811	} */ *ap;
812{
813	struct union_node *un = VTOUNION(ap->a_vp);
814	struct vnode *vp;
815
816	if ((vp = un->un_uppervp) == NULLVP) {
817#ifdef UNION_DIAGNOSTIC
818		if (un->un_openl <= 0)
819			panic("union: un_openl cnt");
820#endif
821		--un->un_openl;
822		vp = un->un_lowervp;
823	}
824	ap->a_vp = vp;
825	return (VOP_CLOSE_AP(ap));
826}
827
828/*
829 * Check access permission on the union vnode.
830 * The access check being enforced is to check
831 * against both the underlying vnode, and any
832 * copied vnode.  This ensures that no additional
833 * file permissions are given away simply because
834 * the user caused an implicit file copy.
835 */
836static int
837union_access(ap)
838	struct vop_access_args /* {
839		struct vnodeop_desc *a_desc;
840		struct vnode *a_vp;
841		int a_mode;
842		struct ucred *a_cred;
843		struct thread *a_td;
844	} */ *ap;
845{
846	struct union_node *un = VTOUNION(ap->a_vp);
847	struct thread *td = ap->a_td;
848	int error = EACCES;
849	struct vnode *vp;
850
851	/*
852	 * Disallow write attempts on filesystems mounted read-only.
853	 */
854	if ((ap->a_mode & VWRITE) &&
855	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
856		switch (ap->a_vp->v_type) {
857		case VREG:
858		case VDIR:
859		case VLNK:
860			return (EROFS);
861		default:
862			break;
863		}
864	}
865
866	if ((vp = union_lock_upper(un, td)) != NULLVP) {
867		ap->a_vp = vp;
868		error = VOP_ACCESS_AP(ap);
869		union_unlock_upper(vp, td);
870		return(error);
871	}
872
873	if ((vp = un->un_lowervp) != NULLVP) {
874		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
875		ap->a_vp = vp;
876
877		/*
878		 * Remove VWRITE from a_mode if our mount point is RW, because
879		 * we want to allow writes and lowervp may be read-only.
880		 */
881		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
882			ap->a_mode &= ~VWRITE;
883
884		error = VOP_ACCESS_AP(ap);
885		if (error == 0) {
886			struct union_mount *um;
887
888			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
889
890			if (um->um_op == UNMNT_BELOW) {
891				ap->a_cred = um->um_cred;
892				error = VOP_ACCESS_AP(ap);
893			}
894		}
895		VOP_UNLOCK(vp, 0, td);
896	}
897	return(error);
898}
899
900/*
901 * We handle getattr only to change the fsid and
902 * track object sizes
903 *
904 * It's not clear whether VOP_GETATTR is to be
905 * called with the vnode locked or not.  stat() calls
906 * it with (vp) locked, and fstat() calls it with
907 * (vp) unlocked.
908 *
909 * Because of this we cannot use our normal locking functions
910 * if we do not intend to lock the main a_vp node.  At the moment
911 * we are running without any specific locking at all, but beware
912 * to any programmer that care must be taken if locking is added
913 * to this function.
914 */
915
916static int
917union_getattr(ap)
918	struct vop_getattr_args /* {
919		struct vnode *a_vp;
920		struct vattr *a_vap;
921		struct ucred *a_cred;
922		struct thread *a_td;
923	} */ *ap;
924{
925	int error;
926	struct union_node *un = VTOUNION(ap->a_vp);
927	struct union_mount *um = MOUNTTOUNIONMOUNT(ap->a_vp->v_mount);
928	struct vnode *vp;
929	struct vattr *vap;
930	struct vattr va;
931
932	/*
933	 * Some programs walk the filesystem hierarchy by counting
934	 * links to directories to avoid stat'ing all the time.
935	 * This means the link count on directories needs to be "correct".
936	 * The only way to do that is to call getattr on both layers
937	 * and fix up the link count.  The link count will not necessarily
938	 * be accurate but will be large enough to defeat the tree walkers.
939	 */
940
941	vap = ap->a_vap;
942
943	if ((vp = un->un_uppervp) != NULLVP) {
944		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
945		if (error)
946			return (error);
947		/* XXX isn't this dangerous without a lock? */
948		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
949	}
950
951	if (vp == NULLVP) {
952		vp = un->un_lowervp;
953	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
954		vp = un->un_lowervp;
955		vap = &va;
956	} else {
957		vp = NULLVP;
958	}
959
960	if (vp != NULLVP) {
961		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
962		if (error)
963			return (error);
964		/* XXX isn't this dangerous without a lock? */
965		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
966	}
967
968	if (ap->a_vap->va_fsid == um->um_upperdev)
969		ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
970
971	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
972		ap->a_vap->va_nlink += vap->va_nlink;
973	return (0);
974}
975
976static int
977union_setattr(ap)
978	struct vop_setattr_args /* {
979		struct vnode *a_vp;
980		struct vattr *a_vap;
981		struct ucred *a_cred;
982		struct thread *a_td;
983	} */ *ap;
984{
985	struct union_node *un = VTOUNION(ap->a_vp);
986	struct thread *td = ap->a_td;
987	struct vattr *vap = ap->a_vap;
988	struct vnode *uppervp;
989	int error;
990
991	/*
992	 * Disallow write attempts on filesystems mounted read-only.
993	 */
994	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
995	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
996	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
997	     vap->va_mtime.tv_sec != VNOVAL ||
998	     vap->va_mode != (mode_t)VNOVAL)) {
999		return (EROFS);
1000	}
1001
1002	/*
1003	 * Handle case of truncating lower object to zero size
1004	 * by creating a zero length upper object.  This is to
1005	 * handle the case of open with O_TRUNC and O_CREAT.
1006	 */
1007	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
1008		error = union_copyup(un, (ap->a_vap->va_size != 0),
1009			    ap->a_cred, ap->a_td);
1010		if (error)
1011			return (error);
1012	}
1013
1014	/*
1015	 * Try to set attributes in upper layer,
1016	 * otherwise return read-only filesystem error.
1017	 */
1018	error = EROFS;
1019	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1020		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
1021					ap->a_cred, ap->a_td);
1022		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
1023			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
1024		union_unlock_upper(uppervp, td);
1025	}
1026	return (error);
1027}
1028
1029static int
1030union_read(ap)
1031	struct vop_read_args /* {
1032		struct vnode *a_vp;
1033		struct uio *a_uio;
1034		int  a_ioflag;
1035		struct ucred *a_cred;
1036	} */ *ap;
1037{
1038	struct union_node *un = VTOUNION(ap->a_vp);
1039	struct thread *td = ap->a_uio->uio_td;
1040	struct vnode *uvp;
1041	int error;
1042
1043	uvp = union_lock_other(un, td);
1044	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
1045
1046	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1047	union_unlock_other(uvp, td);
1048
1049	/*
1050	 * XXX
1051	 * Perhaps the size of the underlying object has changed under
1052	 * our feet.  Take advantage of the offset information present
1053	 * in the uio structure.
1054	 */
1055	if (error == 0) {
1056		struct union_node *un = VTOUNION(ap->a_vp);
1057		off_t cur = ap->a_uio->uio_offset;
1058
1059		if (uvp == un->un_uppervp) {
1060			if (cur > un->un_uppersz)
1061				union_newsize(ap->a_vp, cur, VNOVAL);
1062		} else {
1063			if (cur > un->un_lowersz)
1064				union_newsize(ap->a_vp, VNOVAL, cur);
1065		}
1066	}
1067	return (error);
1068}
1069
1070static int
1071union_write(ap)
1072	struct vop_write_args /* {
1073		struct vnode *a_vp;
1074		struct uio *a_uio;
1075		int  a_ioflag;
1076		struct ucred *a_cred;
1077	} */ *ap;
1078{
1079	struct union_node *un = VTOUNION(ap->a_vp);
1080	struct thread *td = ap->a_uio->uio_td;
1081	struct vnode *uppervp;
1082	int error;
1083
1084	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
1085		panic("union: missing upper layer in write");
1086
1087	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1088
1089	/*
1090	 * The size of the underlying object may be changed by the
1091	 * write.
1092	 */
1093	if (error == 0) {
1094		off_t cur = ap->a_uio->uio_offset;
1095
1096		if (cur > un->un_uppersz)
1097			union_newsize(ap->a_vp, cur, VNOVAL);
1098	}
1099	union_unlock_upper(uppervp, td);
1100	return (error);
1101}
1102
1103static int
1104union_lease(ap)
1105	struct vop_lease_args /* {
1106		struct vnode *a_vp;
1107		struct thread *a_td;
1108		struct ucred *a_cred;
1109		int a_flag;
1110	} */ *ap;
1111{
1112	struct vnode *ovp = OTHERVP(ap->a_vp);
1113
1114	ap->a_vp = ovp;
1115	return (VOP_LEASE_AP(ap));
1116}
1117
1118static int
1119union_ioctl(ap)
1120	struct vop_ioctl_args /* {
1121		struct vnode *a_vp;
1122		u_long  a_command;
1123		caddr_t  a_data;
1124		int  a_fflag;
1125		struct ucred *a_cred;
1126		struct thread *a_td;
1127	} */ *ap;
1128{
1129	struct vnode *ovp = OTHERVP(ap->a_vp);
1130
1131	ap->a_vp = ovp;
1132	return (VOP_IOCTL_AP(ap));
1133}
1134
1135static int
1136union_poll(ap)
1137	struct vop_poll_args /* {
1138		struct vnode *a_vp;
1139		int  a_events;
1140		struct ucred *a_cred;
1141		struct thread *a_td;
1142	} */ *ap;
1143{
1144	struct vnode *ovp = OTHERVP(ap->a_vp);
1145
1146	ap->a_vp = ovp;
1147	return (VOP_POLL_AP(ap));
1148}
1149
1150static int
1151union_fsync(ap)
1152	struct vop_fsync_args /* {
1153		struct vnode *a_vp;
1154		struct ucred *a_cred;
1155		int  a_waitfor;
1156		struct thread *a_td;
1157	} */ *ap;
1158{
1159	int error = 0;
1160	struct thread *td = ap->a_td;
1161	struct vnode *targetvp;
1162	struct union_node *un = VTOUNION(ap->a_vp);
1163
1164	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
1165		error = VOP_FSYNC(targetvp, ap->a_waitfor, td);
1166		union_unlock_other(targetvp, td);
1167	}
1168
1169	return (error);
1170}
1171
1172/*
1173 *	union_remove:
1174 *
1175 *	Remove the specified cnp.  The dvp and vp are passed to us locked
1176 *	and must remain locked on return.
1177 */
1178
1179static int
1180union_remove(ap)
1181	struct vop_remove_args /* {
1182		struct vnode *a_dvp;
1183		struct vnode *a_vp;
1184		struct componentname *a_cnp;
1185	} */ *ap;
1186{
1187	struct union_node *dun = VTOUNION(ap->a_dvp);
1188	struct union_node *un = VTOUNION(ap->a_vp);
1189	struct componentname *cnp = ap->a_cnp;
1190	struct thread *td = cnp->cn_thread;
1191	struct vnode *uppervp;
1192	struct vnode *upperdvp;
1193	int error;
1194
1195	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1196		panic("union remove: null upper vnode");
1197
1198	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1199		if (union_dowhiteout(un, cnp->cn_cred, td))
1200			cnp->cn_flags |= DOWHITEOUT;
1201		if (cnp->cn_flags & DOWHITEOUT)		/* XXX fs corruption */
1202			error = EOPNOTSUPP;
1203		else
1204			error = VOP_REMOVE(upperdvp, uppervp, cnp);
1205		if (!error)
1206			union_removed_upper(un);
1207		union_unlock_upper(uppervp, td);
1208	} else {
1209		error = union_mkwhiteout(
1210			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1211			    upperdvp, ap->a_cnp, un->un_path);
1212	}
1213	union_unlock_upper(upperdvp, td);
1214	return (error);
1215}
1216
1217/*
1218 *	union_link:
1219 *
1220 *	tdvp and vp will be locked on entry.
1221 *	tdvp and vp should remain locked on return.
1222 */
1223
1224static int
1225union_link(ap)
1226	struct vop_link_args /* {
1227		struct vnode *a_tdvp;
1228		struct vnode *a_vp;
1229		struct componentname *a_cnp;
1230	} */ *ap;
1231{
1232	struct componentname *cnp = ap->a_cnp;
1233	struct thread *td = cnp->cn_thread;
1234	struct union_node *dun = VTOUNION(ap->a_tdvp);
1235	struct vnode *vp;
1236	struct vnode *tdvp;
1237	int error = 0;
1238
1239	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
1240		vp = ap->a_vp;
1241	} else {
1242		struct union_node *tun = VTOUNION(ap->a_vp);
1243
1244		if (tun->un_uppervp == NULLVP) {
1245#if 0
1246			if (dun->un_uppervp == tun->un_dirvp) {
1247				if (dun->un_flags & UN_ULOCK) {
1248					dun->un_flags &= ~UN_ULOCK;
1249					VOP_UNLOCK(dun->un_uppervp, 0, td);
1250				}
1251			}
1252#endif
1253			error = union_copyup(tun, 1, cnp->cn_cred, td);
1254#if 0
1255			if (dun->un_uppervp == tun->un_dirvp) {
1256				vn_lock(dun->un_uppervp,
1257					    LK_EXCLUSIVE | LK_RETRY, td);
1258				dun->un_flags |= UN_ULOCK;
1259			}
1260#endif
1261			if (error)
1262				return (error);
1263		}
1264		vp = tun->un_uppervp;
1265		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1266	}
1267
1268	/*
1269	 * Make sure upper is locked, then unlock the union directory we were
1270	 * called with to avoid a deadlock while we are calling VOP_LINK() on
1271	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
1272	 * is expected to be locked on return.
1273	 */
1274
1275	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
1276		return (EROFS);
1277
1278	VOP_UNLOCK(ap->a_tdvp, 0, td);		/* unlock calling node */
1279	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
1280
1281	/*
1282	 * Unlock tun->un_uppervp if we locked it above.
1283	 */
1284	if (ap->a_tdvp->v_op == ap->a_vp->v_op)
1285		VOP_UNLOCK(vp, 0, td);
1286	/*
1287	 * We have to unlock tdvp prior to relocking our calling node in
1288	 * order to avoid a deadlock.  We also have to unlock ap->a_vp
1289	 * before relocking the directory, but then we have to relock
1290	 * ap->a_vp as our caller expects.
1291	 */
1292	VOP_UNLOCK(ap->a_vp, 0, td);
1293	union_unlock_upper(tdvp, td);
1294	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td);
1295	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
1296	return (error);
1297}
1298
1299static int
1300union_rename(ap)
1301	struct vop_rename_args  /* {
1302		struct vnode *a_fdvp;
1303		struct vnode *a_fvp;
1304		struct componentname *a_fcnp;
1305		struct vnode *a_tdvp;
1306		struct vnode *a_tvp;
1307		struct componentname *a_tcnp;
1308	} */ *ap;
1309{
1310	int error;
1311	struct vnode *fdvp = ap->a_fdvp;
1312	struct vnode *fvp = ap->a_fvp;
1313	struct vnode *tdvp = ap->a_tdvp;
1314	struct vnode *tvp = ap->a_tvp;
1315
1316	/*
1317	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
1318	 * replace the fdvp, release the original one and ref the new one.
1319	 */
1320
1321	if (fdvp->v_op == &union_vnodeops) {	/* always true */
1322		struct union_node *un = VTOUNION(fdvp);
1323		if (un->un_uppervp == NULLVP) {
1324			/*
1325			 * this should never happen in normal
1326			 * operation but might if there was
1327			 * a problem creating the top-level shadow
1328			 * directory.
1329			 */
1330			error = EXDEV;
1331			goto bad;
1332		}
1333		fdvp = un->un_uppervp;
1334		VREF(fdvp);
1335		vrele(ap->a_fdvp);
1336	}
1337
1338	/*
1339	 * Figure out what fvp to pass to our upper or lower vnode.  If we
1340	 * replace the fvp, release the original one and ref the new one.
1341	 */
1342
1343	if (fvp->v_op == &union_vnodeops) {	/* always true */
1344		struct union_node *un = VTOUNION(fvp);
1345#if 0
1346		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
1347#endif
1348
1349		if (un->un_uppervp == NULLVP) {
1350			switch(fvp->v_type) {
1351			case VREG:
1352				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1353				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread);
1354				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread);
1355				if (error)
1356					goto bad;
1357				break;
1358			case VDIR:
1359				/*
1360				 * XXX not yet.
1361				 *
1362				 * There is only one way to rename a directory
1363				 * based in the lowervp, and that is to copy
1364				 * the entire directory hierarchy.  Otherwise
1365				 * it would not last across a reboot.
1366				 */
1367#if 0
1368				vrele(fvp);
1369				fvp = NULL;
1370				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1371				error = union_mkshadow(um, fdvp,
1372					    ap->a_fcnp, &un->un_uppervp);
1373				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread);
1374				if (un->un_uppervp)
1375					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread);
1376				if (error)
1377					goto bad;
1378				break;
1379#endif
1380			default:
1381				error = EXDEV;
1382				goto bad;
1383			}
1384		}
1385
1386		if (un->un_lowervp != NULLVP)
1387			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1388		fvp = un->un_uppervp;
1389		VREF(fvp);
1390		vrele(ap->a_fvp);
1391	}
1392
1393	/*
1394	 * Figure out what tdvp (destination directory) to pass to the
1395	 * lower level.  If we replace it with uppervp, we need to vput the
1396	 * old one.  The exclusive lock is transfered to what we will pass
1397	 * down in the VOP_RENAME() and we replace uppervp with a simple
1398	 * reference.
1399	 */
1400
1401	if (tdvp->v_op == &union_vnodeops) {
1402		struct union_node *un = VTOUNION(tdvp);
1403
1404		if (un->un_uppervp == NULLVP) {
1405			/*
1406			 * This should never happen in normal
1407			 * operation but might if there was
1408			 * a problem creating the top-level shadow
1409			 * directory.
1410			 */
1411			error = EXDEV;
1412			goto bad;
1413		}
1414
1415		/*
1416		 * New tdvp is a lock and reference on uppervp.
1417		 * Put away the old tdvp.
1418		 */
1419		tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1420		vput(ap->a_tdvp);
1421	}
1422
1423	/*
1424	 * Figure out what tvp (destination file) to pass to the
1425	 * lower level.
1426	 *
1427	 * If the uppervp file does not exist, put away the (wrong)
1428	 * file and change tvp to NULL.
1429	 */
1430
1431	if (tvp != NULLVP && tvp->v_op == &union_vnodeops) {
1432		struct union_node *un = VTOUNION(tvp);
1433
1434		tvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1435		vput(ap->a_tvp);
1436		/* note: tvp may be NULL */
1437	}
1438
1439	/*
1440	 * VOP_RENAME() releases/vputs prior to returning, so we have no
1441	 * cleanup to do.
1442	 */
1443
1444	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1445
1446	/*
1447	 * Error.  We still have to release / vput the various elements.
1448	 */
1449
1450bad:
1451	vrele(fdvp);
1452	if (fvp)
1453		vrele(fvp);
1454	vput(tdvp);
1455	if (tvp != NULLVP) {
1456		if (tvp != tdvp)
1457			vput(tvp);
1458		else
1459			vrele(tvp);
1460	}
1461	return (error);
1462}
1463
1464static int
1465union_mkdir(ap)
1466	struct vop_mkdir_args /* {
1467		struct vnode *a_dvp;
1468		struct vnode **a_vpp;
1469		struct componentname *a_cnp;
1470		struct vattr *a_vap;
1471	} */ *ap;
1472{
1473	struct union_node *dun = VTOUNION(ap->a_dvp);
1474	struct componentname *cnp = ap->a_cnp;
1475	struct thread *td = cnp->cn_thread;
1476	struct vnode *upperdvp;
1477	int error = EROFS;
1478
1479	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
1480		struct vnode *vp;
1481
1482		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
1483		union_unlock_upper(upperdvp, td);
1484
1485		if (error == 0) {
1486			VOP_UNLOCK(vp, 0, td);
1487			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp)));
1488			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
1489				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
1490			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
1491		}
1492	}
1493	return (error);
1494}
1495
1496static int
1497union_rmdir(ap)
1498	struct vop_rmdir_args /* {
1499		struct vnode *a_dvp;
1500		struct vnode *a_vp;
1501		struct componentname *a_cnp;
1502	} */ *ap;
1503{
1504	struct union_node *dun = VTOUNION(ap->a_dvp);
1505	struct union_node *un = VTOUNION(ap->a_vp);
1506	struct componentname *cnp = ap->a_cnp;
1507	struct thread *td = cnp->cn_thread;
1508	struct vnode *upperdvp;
1509	struct vnode *uppervp;
1510	int error;
1511
1512	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1513		panic("union rmdir: null upper vnode");
1514
1515	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1516		if (union_dowhiteout(un, cnp->cn_cred, td))
1517			cnp->cn_flags |= DOWHITEOUT;
1518		if (cnp->cn_flags & DOWHITEOUT)		/* XXX fs corruption */
1519			error = EOPNOTSUPP;
1520		else
1521			error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
1522		if (!error)
1523			union_removed_upper(un);
1524		union_unlock_upper(uppervp, td);
1525	} else {
1526		error = union_mkwhiteout(
1527			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1528			    dun->un_uppervp, ap->a_cnp, un->un_path);
1529	}
1530	union_unlock_upper(upperdvp, td);
1531	return (error);
1532}
1533
1534/*
1535 *	union_symlink:
1536 *
1537 *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
1538 *	(unused).
1539 */
1540
1541static int
1542union_symlink(ap)
1543	struct vop_symlink_args /* {
1544		struct vnode *a_dvp;
1545		struct vnode **a_vpp;
1546		struct componentname *a_cnp;
1547		struct vattr *a_vap;
1548		char *a_target;
1549	} */ *ap;
1550{
1551	struct union_node *dun = VTOUNION(ap->a_dvp);
1552	struct componentname *cnp = ap->a_cnp;
1553	struct thread *td = cnp->cn_thread;
1554	struct vnode *dvp;
1555	int error = EROFS;
1556
1557	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
1558		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1559			    ap->a_target);
1560		union_unlock_upper(dvp, td);
1561	}
1562	return (error);
1563}
1564
1565/*
1566 * union_readdir ()works in concert with getdirentries() and
1567 * readdir(3) to provide a list of entries in the unioned
1568 * directories.  getdirentries()  is responsible for walking
1569 * down the union stack.  readdir(3) is responsible for
1570 * eliminating duplicate names from the returned data stream.
1571 */
1572static int
1573union_readdir(ap)
1574	struct vop_readdir_args /* {
1575		struct vnode *a_vp;
1576		struct uio *a_uio;
1577		struct ucred *a_cred;
1578		int *a_eofflag;
1579		u_long *a_cookies;
1580		int a_ncookies;
1581	} */ *ap;
1582{
1583	struct union_node *un = VTOUNION(ap->a_vp);
1584	struct thread *td = ap->a_uio->uio_td;
1585	struct vnode *uvp;
1586	int error = 0;
1587
1588	if ((uvp = union_lock_upper(un, td)) != NULLVP) {
1589		ap->a_vp = uvp;
1590		error = VOP_READDIR_AP(ap);
1591		union_unlock_upper(uvp, td);
1592	}
1593	return(error);
1594}
1595
1596static int
1597union_readlink(ap)
1598	struct vop_readlink_args /* {
1599		struct vnode *a_vp;
1600		struct uio *a_uio;
1601		struct ucred *a_cred;
1602	} */ *ap;
1603{
1604	int error;
1605	struct union_node *un = VTOUNION(ap->a_vp);
1606	struct uio *uio = ap->a_uio;
1607	struct thread *td = uio->uio_td;
1608	struct vnode *vp;
1609
1610	vp = union_lock_other(un, td);
1611	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
1612
1613	ap->a_vp = vp;
1614	error = VOP_READLINK_AP(ap);
1615	union_unlock_other(vp, td);
1616
1617	return (error);
1618}
1619
1620static int
1621union_getwritemount(ap)
1622	struct vop_getwritemount_args /* {
1623		struct vnode *a_vp;
1624		struct mount **a_mpp;
1625	} */ *ap;
1626{
1627	struct vnode *vp = ap->a_vp;
1628	struct vnode *uvp = UPPERVP(vp);
1629
1630	if (uvp == NULL) {
1631		VI_LOCK(vp);
1632		if (vp->v_iflag & VI_FREE) {
1633			VI_UNLOCK(vp);
1634			return (EOPNOTSUPP);
1635		}
1636		VI_UNLOCK(vp);
1637		return (EACCES);
1638	}
1639	return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp));
1640}
1641
1642/*
1643 *	union_inactive:
1644 *
1645 *	Called with the vnode locked.  We are expected to unlock the vnode.
1646 */
1647
1648static int
1649union_inactive(ap)
1650	struct vop_inactive_args /* {
1651		struct vnode *a_vp;
1652		struct thread *a_td;
1653	} */ *ap;
1654{
1655	struct vnode *vp = ap->a_vp;
1656	struct thread *td = ap->a_td;
1657	struct union_node *un = VTOUNION(vp);
1658
1659	/*
1660	 * Do nothing (and _don't_ bypass).
1661	 * Wait to vrele lowervp until reclaim,
1662	 * so that until then our union_node is in the
1663	 * cache and reusable.
1664	 *
1665	 */
1666
1667	if (un->un_dircache != NULL)
1668		union_dircache_free(un);
1669
1670#if 0
1671	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
1672		un->un_flags &= ~UN_ULOCK;
1673		VOP_UNLOCK(un->un_uppervp, 0, td);
1674	}
1675#endif
1676
1677	VOP_UNLOCK(vp, 0, td);
1678
1679	if ((un->un_flags & UN_CACHED) == 0)
1680		vgone(vp);
1681
1682	return (0);
1683}
1684
1685static int
1686union_reclaim(ap)
1687	struct vop_reclaim_args /* {
1688		struct vnode *a_vp;
1689	} */ *ap;
1690{
1691	union_freevp(ap->a_vp);
1692
1693	return (0);
1694}
1695
1696/*
1697 * unionvp do not hold a VM object and there is no need to create one for
1698 * upper or lower vp because it is done in the union_open()
1699 */
1700static int
1701union_createvobject(ap)
1702	struct vop_createvobject_args /* {
1703		struct vnode *vp;
1704		struct ucred *cred;
1705		struct thread *td;
1706	} */ *ap;
1707{
1708
1709	return (0);
1710}
1711
1712/*
1713 * We have nothing to destroy and this operation shouldn't be bypassed.
1714 */
1715static int
1716union_destroyvobject(ap)
1717	struct vop_destroyvobject_args /* {
1718		struct vnode *vp;
1719	} */ *ap;
1720{
1721	struct vnode *vp = ap->a_vp;
1722
1723	vp->v_object = NULL;
1724	return (0);
1725}
1726
1727/*
1728 * Get VM object from the upper or lower vp
1729 */
1730static int
1731union_getvobject(ap)
1732	struct vop_getvobject_args /* {
1733		struct vnode *vp;
1734		struct vm_object **objpp;
1735	} */ *ap;
1736{
1737	struct vnode *ovp = OTHERVP(ap->a_vp);
1738
1739	if (ovp == NULL)
1740		return EINVAL;
1741	return (VOP_GETVOBJECT(ovp, ap->a_objpp));
1742}
1743
1744static int
1745union_print(ap)
1746	struct vop_print_args /* {
1747		struct vnode *a_vp;
1748	} */ *ap;
1749{
1750	struct vnode *vp = ap->a_vp;
1751
1752	printf("\tvp=%p, uppervp=%p, lowervp=%p\n",
1753	       vp, UPPERVP(vp), LOWERVP(vp));
1754	if (UPPERVP(vp) != NULLVP)
1755		vprint("union: upper", UPPERVP(vp));
1756	if (LOWERVP(vp) != NULLVP)
1757		vprint("union: lower", LOWERVP(vp));
1758
1759	return (0);
1760}
1761
1762static int
1763union_pathconf(ap)
1764	struct vop_pathconf_args /* {
1765		struct vnode *a_vp;
1766		int a_name;
1767		int *a_retval;
1768	} */ *ap;
1769{
1770	int error;
1771	struct thread *td = curthread;		/* XXX */
1772	struct union_node *un = VTOUNION(ap->a_vp);
1773	struct vnode *vp;
1774
1775	vp = union_lock_other(un, td);
1776	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
1777
1778	ap->a_vp = vp;
1779	error = VOP_PATHCONF_AP(ap);
1780	union_unlock_other(vp, td);
1781
1782	return (error);
1783}
1784
1785static int
1786union_advlock(ap)
1787	struct vop_advlock_args /* {
1788		struct vnode *a_vp;
1789		caddr_t  a_id;
1790		int  a_op;
1791		struct flock *a_fl;
1792		int  a_flags;
1793	} */ *ap;
1794{
1795	register struct vnode *ovp = OTHERVP(ap->a_vp);
1796
1797	ap->a_vp = ovp;
1798	return (VOP_ADVLOCK_AP(ap));
1799}
1800
1801
1802/*
1803 * XXX - vop_strategy must be hand coded because it has no
1804 * YYY - and it is not coherent with anything
1805 *
1806 * vnode in its arguments.
1807 * This goes away with a merged VM/buffer cache.
1808 */
1809static int
1810union_strategy(ap)
1811	struct vop_strategy_args /* {
1812		struct vnode *a_vp;
1813		struct buf *a_bp;
1814	} */ *ap;
1815{
1816	struct buf *bp = ap->a_bp;
1817	struct vnode *othervp = OTHERVP(ap->a_vp);
1818
1819#ifdef DIAGNOSTIC
1820	if (othervp == NULLVP)
1821		panic("union_strategy: nil vp");
1822	if ((bp->b_iocmd == BIO_WRITE) &&
1823	    (othervp == LOWERVP(ap->a_vp)))
1824		panic("union_strategy: writing to lowervp");
1825#endif
1826	return (VOP_STRATEGY(othervp, bp));
1827}
1828
1829static int
1830union_getacl(ap)
1831	struct vop_getacl_args /* {
1832		struct vnode *a_vp;
1833		acl_type_t a_type;
1834		struct acl *a_aclp;
1835		struct ucred *a_cred;
1836		struct thread *a_td;
1837	} */ *ap;
1838{
1839	int error;
1840	struct union_node *un = VTOUNION(ap->a_vp);
1841	struct vnode *vp;
1842
1843	vp = union_lock_other(un, ap->a_td);
1844	ap->a_vp = vp;
1845	error = VOP_GETACL_AP(ap);
1846	union_unlock_other(vp, ap->a_td);
1847
1848	return (error);
1849}
1850
1851static int
1852union_setacl(ap)
1853	struct vop_setacl_args /* {
1854		struct vnode *a_vp;
1855		acl_type_t a_type;
1856		struct acl *a_aclp;
1857		struct ucred *a_cred;
1858		struct thread *a_td;
1859	} */ *ap;
1860{
1861	int error;
1862	struct union_node *un = VTOUNION(ap->a_vp);
1863	struct vnode *vp;
1864
1865	vp = union_lock_other(un, ap->a_td);
1866	ap->a_vp = vp;
1867	error = VOP_SETACL_AP(ap);
1868	union_unlock_other(vp, ap->a_td);
1869
1870	return (error);
1871}
1872
1873static int
1874union_aclcheck(ap)
1875	struct vop_aclcheck_args /* {
1876		struct vnode *a_vp;
1877		acl_type_t a_type;
1878		struct acl *a_aclp;
1879		struct ucred *a_cred;
1880		struct thread *a_td;
1881	} */ *ap;
1882{
1883	struct vnode *ovp = OTHERVP(ap->a_vp);
1884
1885	ap->a_vp = ovp;
1886	return (VOP_ACLCHECK_AP(ap));
1887}
1888
1889static int
1890union_closeextattr(ap)
1891	struct vop_closeextattr_args /* {
1892		struct vnode *a_vp;
1893		int a_commit;
1894		struct ucred *a_cred;
1895		struct thread *a_td;
1896	} */ *ap;
1897{
1898	int error;
1899	struct union_node *un = VTOUNION(ap->a_vp);
1900	struct vnode *vp;
1901
1902	vp = union_lock_other(un, ap->a_td);
1903	ap->a_vp = vp;
1904	error = VOP_CLOSEEXTATTR_AP(ap);
1905	union_unlock_other(vp, ap->a_td);
1906
1907	return (error);
1908}
1909
1910static int
1911union_getextattr(ap)
1912	struct vop_getextattr_args /* {
1913		struct vnode *a_vp;
1914		int a_attrnamespace;
1915		const char *a_name;
1916		struct uio *a_uio;
1917		size_t *a_size;
1918		struct ucred *a_cred;
1919		struct thread *a_td;
1920	} */ *ap;
1921{
1922	int error;
1923	struct union_node *un = VTOUNION(ap->a_vp);
1924	struct vnode *vp;
1925
1926	vp = union_lock_other(un, ap->a_td);
1927	ap->a_vp = vp;
1928	error = VOP_GETEXTATTR_AP(ap);
1929	union_unlock_other(vp, ap->a_td);
1930
1931	return (error);
1932}
1933
1934static int
1935union_listextattr(ap)
1936	struct vop_listextattr_args /* {
1937		struct vnode *a_vp;
1938		int a_attrnamespace;
1939		struct uio *a_uio;
1940		size_t *a_size;
1941		struct ucred *a_cred;
1942		struct thread *a_td;
1943	} */ *ap;
1944{
1945	int error;
1946	struct union_node *un = VTOUNION(ap->a_vp);
1947	struct vnode *vp;
1948
1949	vp = union_lock_other(un, ap->a_td);
1950	ap->a_vp = vp;
1951	error = VOP_LISTEXTATTR_AP(ap);
1952	union_unlock_other(vp, ap->a_td);
1953
1954	return (error);
1955}
1956
1957static int
1958union_openextattr(ap)
1959	struct vop_openextattr_args /* {
1960		struct vnode *a_vp;
1961		struct ucred *a_cred;
1962		struct thread *a_td;
1963	} */ *ap;
1964{
1965	int error;
1966	struct union_node *un = VTOUNION(ap->a_vp);
1967	struct vnode *vp;
1968
1969	vp = union_lock_other(un, ap->a_td);
1970	ap->a_vp = vp;
1971	error = VOP_OPENEXTATTR_AP(ap);
1972	union_unlock_other(vp, ap->a_td);
1973
1974	return (error);
1975}
1976
1977static int
1978union_deleteextattr(ap)
1979	struct vop_deleteextattr_args /* {
1980		struct vnode *a_vp;
1981		int a_attrnamespace;
1982		const char *a_name;
1983		struct ucred *a_cred;
1984		struct thread *a_td;
1985	} */ *ap;
1986{
1987	int error;
1988	struct union_node *un = VTOUNION(ap->a_vp);
1989	struct vnode *vp;
1990
1991	vp = union_lock_other(un, ap->a_td);
1992	ap->a_vp = vp;
1993	error = VOP_DELETEEXTATTR_AP(ap);
1994	union_unlock_other(vp, ap->a_td);
1995
1996	return (error);
1997}
1998
1999static int
2000union_setextattr(ap)
2001	struct vop_setextattr_args /* {
2002		struct vnode *a_vp;
2003		int a_attrnamespace;
2004		const char *a_name;
2005		struct uio *a_uio;
2006		struct ucred *a_cred;
2007		struct thread *a_td;
2008	} */ *ap;
2009{
2010	int error;
2011	struct union_node *un = VTOUNION(ap->a_vp);
2012	struct vnode *vp;
2013
2014	vp = union_lock_other(un, ap->a_td);
2015	ap->a_vp = vp;
2016	error = VOP_SETEXTATTR_AP(ap);
2017	union_unlock_other(vp, ap->a_td);
2018
2019	return (error);
2020}
2021
2022static int
2023union_setlabel(ap)
2024	struct vop_setlabel_args /* {
2025		struct vnode *a_vp;
2026		struct label *a_label;
2027		struct ucred *a_cred;
2028		struct thread *a_td;
2029	} */ *ap;
2030{
2031	int error;
2032	struct union_node *un = VTOUNION(ap->a_vp);
2033	struct vnode *vp;
2034
2035	vp = union_lock_other(un, ap->a_td);
2036	ap->a_vp = vp;
2037	error = VOP_SETLABEL_AP(ap);
2038	union_unlock_other(vp, ap->a_td);
2039
2040	return (error);
2041}
2042
2043/*
2044 * Global vfs data structures
2045 */
2046struct vop_vector union_vnodeops = {
2047	.vop_default =		&default_vnodeops,
2048
2049	.vop_access =		union_access,
2050	.vop_aclcheck =		union_aclcheck,
2051	.vop_advlock =		union_advlock,
2052	.vop_bmap =		VOP_EOPNOTSUPP,
2053	.vop_close =		union_close,
2054	.vop_closeextattr =	union_closeextattr,
2055	.vop_create =		union_create,
2056	.vop_createvobject =	union_createvobject,
2057	.vop_deleteextattr =	union_deleteextattr,
2058	.vop_destroyvobject =	union_destroyvobject,
2059	.vop_fsync =		union_fsync,
2060	.vop_getacl =		union_getacl,
2061	.vop_getattr =		union_getattr,
2062	.vop_getextattr =	union_getextattr,
2063	.vop_getvobject =	union_getvobject,
2064	.vop_getwritemount =	union_getwritemount,
2065	.vop_inactive =		union_inactive,
2066	.vop_ioctl =		union_ioctl,
2067	.vop_lease =		union_lease,
2068	.vop_link =		union_link,
2069	.vop_listextattr =	union_listextattr,
2070	.vop_lookup =		union_lookup,
2071	.vop_mkdir =		union_mkdir,
2072	.vop_mknod =		union_mknod,
2073	.vop_open =		union_open,
2074	.vop_openextattr =	union_openextattr,
2075	.vop_pathconf =		union_pathconf,
2076	.vop_poll =		union_poll,
2077	.vop_print =		union_print,
2078	.vop_read =		union_read,
2079	.vop_readdir =		union_readdir,
2080	.vop_readlink =		union_readlink,
2081	.vop_reclaim =		union_reclaim,
2082	.vop_remove =		union_remove,
2083	.vop_rename =		union_rename,
2084	.vop_rmdir =		union_rmdir,
2085	.vop_setacl =		union_setacl,
2086	.vop_setattr =		union_setattr,
2087	.vop_setextattr =	union_setextattr,
2088	.vop_setlabel =		union_setlabel,
2089	.vop_strategy =		union_strategy,
2090	.vop_symlink =		union_symlink,
2091	.vop_whiteout =		union_whiteout,
2092	.vop_write =		union_write,
2093};
2094