union_vnops.c revision 111841
1/*
2 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
3 * Copyright (c) 1992, 1993, 1994, 1995
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Jan-Simon Pendry.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed by the University of
20 *	California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
38 * $FreeBSD: head/sys/fs/unionfs/union_vnops.c 111841 2003-03-03 19:15:40Z njl $
39 */
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/fcntl.h>
44#include <sys/stat.h>
45#include <sys/kernel.h>
46#include <sys/vnode.h>
47#include <sys/mount.h>
48#include <sys/namei.h>
49#include <sys/malloc.h>
50#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/lock.h>
53#include <sys/sysctl.h>
54#include <fs/unionfs/union.h>
55
56#include <vm/vm.h>
57#include <vm/vnode_pager.h>
58
59#include <vm/vm_page.h>
60#include <vm/vm_object.h>
61
62int uniondebug = 0;
63
64#if UDEBUG_ENABLED
65SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, "");
66#else
67SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, "");
68#endif
69
70static int	union_access(struct vop_access_args *ap);
71static int	union_advlock(struct vop_advlock_args *ap);
72static int	union_close(struct vop_close_args *ap);
73static int	union_create(struct vop_create_args *ap);
74static int	union_createvobject(struct vop_createvobject_args *ap);
75static int	union_destroyvobject(struct vop_destroyvobject_args *ap);
76static int	union_fsync(struct vop_fsync_args *ap);
77static int	union_getattr(struct vop_getattr_args *ap);
78static int	union_getvobject(struct vop_getvobject_args *ap);
79static int	union_inactive(struct vop_inactive_args *ap);
80static int	union_ioctl(struct vop_ioctl_args *ap);
81static int	union_lease(struct vop_lease_args *ap);
82static int	union_link(struct vop_link_args *ap);
83static int	union_lookup(struct vop_lookup_args *ap);
84static int	union_lookup1(struct vnode *udvp, struct vnode **dvp,
85				   struct vnode **vpp,
86				   struct componentname *cnp);
87static int	union_mkdir(struct vop_mkdir_args *ap);
88static int	union_mknod(struct vop_mknod_args *ap);
89static int	union_open(struct vop_open_args *ap);
90static int	union_pathconf(struct vop_pathconf_args *ap);
91static int	union_print(struct vop_print_args *ap);
92static int	union_read(struct vop_read_args *ap);
93static int	union_readdir(struct vop_readdir_args *ap);
94static int	union_readlink(struct vop_readlink_args *ap);
95static int	union_getwritemount(struct vop_getwritemount_args *ap);
96static int	union_reclaim(struct vop_reclaim_args *ap);
97static int	union_remove(struct vop_remove_args *ap);
98static int	union_rename(struct vop_rename_args *ap);
99static int	union_revoke(struct vop_revoke_args *ap);
100static int	union_rmdir(struct vop_rmdir_args *ap);
101static int	union_poll(struct vop_poll_args *ap);
102static int	union_setattr(struct vop_setattr_args *ap);
103static int	union_strategy(struct vop_strategy_args *ap);
104static int	union_symlink(struct vop_symlink_args *ap);
105static int	union_whiteout(struct vop_whiteout_args *ap);
106static int	union_write(struct vop_read_args *ap);
107
108static __inline
109struct vnode *
110union_lock_upper(struct union_node *un, struct thread *td)
111{
112	struct vnode *uppervp;
113
114	if ((uppervp = un->un_uppervp) != NULL) {
115		VREF(uppervp);
116		vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
117	}
118	KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0"));
119	return(uppervp);
120}
121
122static __inline
123void
124union_unlock_upper(struct vnode *uppervp, struct thread *td)
125{
126	vput(uppervp);
127}
128
129static __inline
130struct vnode *
131union_lock_other(struct union_node *un, struct thread *td)
132{
133	struct vnode *vp;
134
135	if (un->un_uppervp != NULL) {
136		vp = union_lock_upper(un, td);
137	} else if ((vp = un->un_lowervp) != NULL) {
138		VREF(vp);
139		vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
140	}
141	return(vp);
142}
143
144static __inline
145void
146union_unlock_other(struct vnode *vp, struct thread *td)
147{
148	vput(vp);
149}
150
151/*
152 *	union_lookup:
153 *
154 *	udvp	must be exclusively locked on call and will remain
155 *		exclusively locked on return.  This is the mount point
156 *		for our filesystem.
157 *
158 *	dvp	Our base directory, locked and referenced.
159 *		The passed dvp will be dereferenced and unlocked on return
160 *		and a new dvp will be returned which is locked and
161 *		referenced in the same variable.
162 *
163 *	vpp	is filled in with the result if no error occured,
164 *		locked and ref'd.
165 *
166 *		If an error is returned, *vpp is set to NULLVP.  If no
167 *		error occurs, *vpp is returned with a reference and an
168 *		exclusive lock.
169 */
170
171static int
172union_lookup1(udvp, pdvp, vpp, cnp)
173	struct vnode *udvp;
174	struct vnode **pdvp;
175	struct vnode **vpp;
176	struct componentname *cnp;
177{
178	int error;
179	struct thread *td = cnp->cn_thread;
180	struct vnode *dvp = *pdvp;
181	struct vnode *tdvp;
182	struct mount *mp;
183
184	/*
185	 * If stepping up the directory tree, check for going
186	 * back across the mount point, in which case do what
187	 * lookup would do by stepping back down the mount
188	 * hierarchy.
189	 */
190	if (cnp->cn_flags & ISDOTDOT) {
191		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
192			/*
193			 * Don't do the NOCROSSMOUNT check
194			 * at this level.  By definition,
195			 * union fs deals with namespaces, not
196			 * filesystems.
197			 */
198			tdvp = dvp;
199			dvp = dvp->v_mount->mnt_vnodecovered;
200			VREF(dvp);
201			vput(tdvp);
202			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
203		}
204	}
205
206	/*
207	 * Set return dvp to be the upperdvp 'parent directory.
208	 */
209	*pdvp = dvp;
210
211	/*
212	 * If the VOP_LOOKUP() call generates an error, tdvp is invalid and
213	 * no changes will have been made to dvp, so we are set to return.
214	 */
215
216        error = VOP_LOOKUP(dvp, &tdvp, cnp);
217	if (error) {
218		UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags));
219		*vpp = NULL;
220		return (error);
221	}
222
223	/*
224	 * The parent directory will have been unlocked, unless lookup
225	 * found the last component or if dvp == tdvp (tdvp must be locked).
226	 *
227	 * We want our dvp to remain locked and ref'd.  We also want tdvp
228	 * to remain locked and ref'd.
229	 */
230	UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags));
231
232	if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0)
233		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
234
235	/*
236	 * Lastly check if the current node is a mount point in
237	 * which case walk up the mount hierarchy making sure not to
238	 * bump into the root of the mount tree (ie. dvp != udvp).
239	 *
240	 * We use dvp as a temporary variable here, it is no longer related
241	 * to the dvp above.  However, we have to ensure that both *pdvp and
242	 * tdvp are locked on return.
243	 */
244
245	dvp = tdvp;
246	while (
247	    dvp != udvp &&
248	    (dvp->v_type == VDIR) &&
249	    (mp = dvp->v_mountedhere)
250	) {
251		int relock_pdvp = 0;
252
253		if (vfs_busy(mp, 0, 0, td))
254			continue;
255
256		if (dvp == *pdvp)
257			relock_pdvp = 1;
258		vput(dvp);
259		dvp = NULL;
260		error = VFS_ROOT(mp, &dvp);
261
262		vfs_unbusy(mp, td);
263
264		if (relock_pdvp)
265			vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td);
266
267		if (error) {
268			*vpp = NULL;
269			return (error);
270		}
271	}
272	*vpp = dvp;
273	return (0);
274}
275
276static int
277union_lookup(ap)
278	struct vop_lookup_args /* {
279		struct vnodeop_desc *a_desc;
280		struct vnode *a_dvp;
281		struct vnode **a_vpp;
282		struct componentname *a_cnp;
283	} */ *ap;
284{
285	int error;
286	int uerror, lerror;
287	struct vnode *uppervp, *lowervp;
288	struct vnode *upperdvp, *lowerdvp;
289	struct vnode *dvp = ap->a_dvp;		/* starting dir */
290	struct union_node *dun = VTOUNION(dvp);	/* associated union node */
291	struct componentname *cnp = ap->a_cnp;
292	struct thread *td = cnp->cn_thread;
293	int lockparent = cnp->cn_flags & LOCKPARENT;
294	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
295	struct ucred *saved_cred = NULL;
296	int iswhiteout;
297	struct vattr va;
298
299	*ap->a_vpp = NULLVP;
300
301	/*
302	 * Disallow write attempts to the filesystem mounted read-only.
303	 */
304	if ((cnp->cn_flags & ISLASTCN) &&
305	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
306	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
307		return (EROFS);
308	}
309
310	/*
311	 * For any lookups we do, always return with the parent locked.
312	 */
313	cnp->cn_flags |= LOCKPARENT;
314
315	lowerdvp = dun->un_lowervp;
316	uppervp = NULLVP;
317	lowervp = NULLVP;
318	iswhiteout = 0;
319
320	uerror = ENOENT;
321	lerror = ENOENT;
322
323	/*
324	 * Get a private lock on uppervp and a reference, effectively
325	 * taking it out of the union_node's control.
326	 *
327	 * We must lock upperdvp while holding our lock on dvp
328	 * to avoid a deadlock.
329	 */
330	upperdvp = union_lock_upper(dun, td);
331
332	/*
333	 * Do the lookup in the upper level.
334	 * If that level consumes additional pathnames,
335	 * then assume that something special is going
336	 * on and just return that vnode.
337	 */
338	if (upperdvp != NULLVP) {
339		/*
340		 * We do not have to worry about the DOTDOT case, we've
341		 * already unlocked dvp.
342		 */
343		UDEBUG(("A %p\n", upperdvp));
344
345		/*
346		 * Do the lookup.   We must supply a locked and referenced
347		 * upperdvp to the function and will get a new locked and
348		 * referenced upperdvp back, with the old having been
349		 * dereferenced.
350		 *
351		 * If an error is returned, uppervp will be NULLVP.  If no
352		 * error occurs, uppervp will be the locked and referenced.
353		 * Return vnode, or possibly NULL, depending on what is being
354		 * requested.  It is possible that the returned uppervp
355		 * will be the same as upperdvp.
356		 */
357		uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp);
358		UDEBUG((
359		    "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n",
360		    uerror,
361		    upperdvp,
362		    vrefcnt(upperdvp),
363		    VOP_ISLOCKED(upperdvp, NULL),
364		    uppervp,
365		    (uppervp ? vrefcnt(uppervp) : -99),
366		    (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99)
367		));
368
369		/*
370		 * Disallow write attempts to the filesystem mounted read-only.
371		 */
372		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
373		    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
374		    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
375			error = EROFS;
376			goto out;
377		}
378
379		/*
380		 * Special case: If cn_consume != 0 then skip out.  The result
381		 * of the lookup is transfered to our return variable.  If
382		 * an error occured we have to throw away the results.
383		 */
384
385		if (cnp->cn_consume != 0) {
386			if ((error = uerror) == 0) {
387				*ap->a_vpp = uppervp;
388				uppervp = NULL;
389			}
390			goto out;
391		}
392
393		/*
394		 * Calculate whiteout, fall through.
395		 */
396
397		if (uerror == ENOENT || uerror == EJUSTRETURN) {
398			if (cnp->cn_flags & ISWHITEOUT) {
399				iswhiteout = 1;
400			} else if (lowerdvp != NULLVP) {
401				int terror;
402
403				terror = VOP_GETATTR(upperdvp, &va,
404					cnp->cn_cred, cnp->cn_thread);
405				if (terror == 0 && (va.va_flags & OPAQUE))
406					iswhiteout = 1;
407			}
408		}
409	}
410
411	/*
412	 * In a similar way to the upper layer, do the lookup
413	 * in the lower layer.   This time, if there is some
414	 * component magic going on, then vput whatever we got
415	 * back from the upper layer and return the lower vnode
416	 * instead.
417	 */
418
419	if (lowerdvp != NULLVP && !iswhiteout) {
420		int nameiop;
421
422		UDEBUG(("B %p\n", lowerdvp));
423
424		/*
425		 * Force only LOOKUPs on the lower node, since
426		 * we won't be making changes to it anyway.
427		 */
428		nameiop = cnp->cn_nameiop;
429		cnp->cn_nameiop = LOOKUP;
430		if (um->um_op == UNMNT_BELOW) {
431			saved_cred = cnp->cn_cred;
432			cnp->cn_cred = um->um_cred;
433		}
434
435		/*
436		 * We shouldn't have to worry about locking interactions
437		 * between the lower layer and our union layer (w.r.t.
438		 * `..' processing) because we don't futz with lowervp
439		 * locks in the union-node instantiation code path.
440		 *
441		 * union_lookup1() requires lowervp to be locked on entry,
442		 * and it will be unlocked on return.  The ref count will
443		 * not change.  On return lowervp doesn't represent anything
444		 * to us so we NULL it out.
445		 */
446		VREF(lowerdvp);
447		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td);
448		lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp);
449		if (lowerdvp == lowervp)
450			vrele(lowerdvp);
451		else
452			vput(lowerdvp);
453		lowerdvp = NULL;	/* lowerdvp invalid after vput */
454
455		if (um->um_op == UNMNT_BELOW)
456			cnp->cn_cred = saved_cred;
457		cnp->cn_nameiop = nameiop;
458
459		if (cnp->cn_consume != 0 || lerror == EACCES) {
460			if ((error = lerror) == 0) {
461				*ap->a_vpp = lowervp;
462				lowervp = NULL;
463			}
464			goto out;
465		}
466	} else {
467		UDEBUG(("C %p\n", lowerdvp));
468		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
469			if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) {
470				VREF(lowervp);
471				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td);
472				lerror = 0;
473			}
474		}
475	}
476
477	/*
478	 * Ok.  Now we have uerror, uppervp, upperdvp, lerror, and lowervp.
479	 *
480	 * 1. If both layers returned an error, select the upper layer.
481	 *
482	 * 2. If the upper layer failed and the bottom layer succeeded,
483	 *    two subcases occur:
484	 *
485	 *	a.  The bottom vnode is not a directory, in which case
486	 *	    just return a new union vnode referencing an
487	 *	    empty top layer and the existing bottom layer.
488	 *
489	 *	b.  The bottom vnode is a directory, in which case
490	 *	    create a new directory in the top layer and
491	 *	    and fall through to case 3.
492	 *
493	 * 3. If the top layer succeeded, then return a new union
494	 *    vnode referencing whatever the new top layer and
495	 *    whatever the bottom layer returned.
496	 */
497
498	/* case 1. */
499	if ((uerror != 0) && (lerror != 0)) {
500		error = uerror;
501		goto out;
502	}
503
504	/* case 2. */
505	if (uerror != 0 /* && (lerror == 0) */ ) {
506		if (lowervp->v_type == VDIR) { /* case 2b. */
507			KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL"));
508			/*
509			 * Oops, uppervp has a problem, we may have to shadow.
510			 */
511			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
512			if (uerror) {
513				error = uerror;
514				goto out;
515			}
516		}
517	}
518
519	/*
520	 * Must call union_allocvp() with both the upper and lower vnodes
521	 * referenced and the upper vnode locked.   ap->a_vpp is returned
522	 * referenced and locked.  lowervp, uppervp, and upperdvp are
523	 * absorbed by union_allocvp() whether it succeeds or fails.
524	 *
525	 * upperdvp is the parent directory of uppervp which may be
526	 * different, depending on the path, from dvp->un_uppervp.  That's
527	 * why it is a separate argument.  Note that it must be unlocked.
528	 *
529	 * dvp must be locked on entry to the call and will be locked on
530	 * return.
531	 */
532
533	if (uppervp && uppervp != upperdvp)
534		VOP_UNLOCK(uppervp, 0, td);
535	if (lowervp)
536		VOP_UNLOCK(lowervp, 0, td);
537	if (upperdvp)
538		VOP_UNLOCK(upperdvp, 0, td);
539
540	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
541			      uppervp, lowervp, 1);
542
543	UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99));
544
545	uppervp = NULL;
546	upperdvp = NULL;
547	lowervp = NULL;
548
549	/*
550	 *	Termination Code
551	 *
552	 *	- put away any extra junk laying around.  Note that lowervp
553	 *	  (if not NULL) will never be the same as *ap->a_vp and
554	 *	  neither will uppervp, because when we set that state we
555	 *	  NULL-out lowervp or uppervp.  On the otherhand, upperdvp
556	 *	  may match uppervp or *ap->a_vpp.
557	 *
558	 *	- relock/unlock dvp if appropriate.
559	 */
560
561out:
562	if (upperdvp) {
563		if (upperdvp == uppervp || upperdvp == *ap->a_vpp)
564			vrele(upperdvp);
565		else
566			vput(upperdvp);
567	}
568
569	if (uppervp)
570		vput(uppervp);
571
572	if (lowervp)
573		vput(lowervp);
574
575	/*
576	 * Restore LOCKPARENT state
577	 */
578
579	if (!lockparent)
580		cnp->cn_flags &= ~LOCKPARENT;
581
582	UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp,
583		((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99),
584		lowervp, uppervp));
585
586	/*
587	 * dvp lock state, determine whether to relock dvp.  dvp is expected
588	 * to be locked on return if:
589	 *
590	 *	- there was an error (except not EJUSTRETURN), or
591	 *	- we hit the last component and lockparent is true
592	 *
593	 * dvp_is_locked is the current state of the dvp lock, not counting
594	 * the possibility that *ap->a_vpp == dvp (in which case it is locked
595	 * anyway).  Note that *ap->a_vpp == dvp only if no error occured.
596	 */
597
598	if (*ap->a_vpp != dvp) {
599		if ((error == 0 || error == EJUSTRETURN) &&
600		    (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) {
601			VOP_UNLOCK(dvp, 0, td);
602		}
603	}
604
605	/*
606	 * Diagnostics
607	 */
608
609#ifdef DIAGNOSTIC
610	if (cnp->cn_namelen == 1 &&
611	    cnp->cn_nameptr[0] == '.' &&
612	    *ap->a_vpp != dvp) {
613		panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp);
614	}
615#endif
616
617	return (error);
618}
619
620/*
621 * 	union_create:
622 *
623 * a_dvp is locked on entry and remains locked on return.  a_vpp is returned
624 * locked if no error occurs, otherwise it is garbage.
625 */
626
627static int
628union_create(ap)
629	struct vop_create_args /* {
630		struct vnode *a_dvp;
631		struct vnode **a_vpp;
632		struct componentname *a_cnp;
633		struct vattr *a_vap;
634	} */ *ap;
635{
636	struct union_node *dun = VTOUNION(ap->a_dvp);
637	struct componentname *cnp = ap->a_cnp;
638	struct thread *td = cnp->cn_thread;
639	struct vnode *dvp;
640	int error = EROFS;
641
642	if ((dvp = union_lock_upper(dun, td)) != NULL) {
643		struct vnode *vp;
644		struct mount *mp;
645
646		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
647		if (error == 0) {
648			mp = ap->a_dvp->v_mount;
649			VOP_UNLOCK(vp, 0, td);
650			UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp)));
651			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
652				cnp, vp, NULLVP, 1);
653			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
654		}
655		union_unlock_upper(dvp, td);
656	}
657	return (error);
658}
659
660static int
661union_whiteout(ap)
662	struct vop_whiteout_args /* {
663		struct vnode *a_dvp;
664		struct componentname *a_cnp;
665		int a_flags;
666	} */ *ap;
667{
668	struct union_node *un = VTOUNION(ap->a_dvp);
669	struct componentname *cnp = ap->a_cnp;
670	struct vnode *uppervp;
671	int error = EOPNOTSUPP;
672
673	if ((uppervp = union_lock_upper(un, cnp->cn_thread)) != NULLVP) {
674		error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags);
675		union_unlock_upper(uppervp, cnp->cn_thread);
676	}
677	return(error);
678}
679
680/*
681 * 	union_mknod:
682 *
683 *	a_dvp is locked on entry and should remain locked on return.
684 *	a_vpp is garbagre whether an error occurs or not.
685 */
686
687static int
688union_mknod(ap)
689	struct vop_mknod_args /* {
690		struct vnode *a_dvp;
691		struct vnode **a_vpp;
692		struct componentname *a_cnp;
693		struct vattr *a_vap;
694	} */ *ap;
695{
696	struct union_node *dun = VTOUNION(ap->a_dvp);
697	struct componentname *cnp = ap->a_cnp;
698	struct vnode *dvp;
699	int error = EROFS;
700
701	if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) {
702		error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap);
703		union_unlock_upper(dvp, cnp->cn_thread);
704	}
705	return (error);
706}
707
708/*
709 *	union_open:
710 *
711 *	run open VOP.  When opening the underlying vnode we have to mimic
712 *	vn_open().  What we *really* need to do to avoid screwups if the
713 *	open semantics change is to call vn_open().  For example, ufs blows
714 *	up if you open a file but do not vmio it prior to writing.
715 */
716
717static int
718union_open(ap)
719	struct vop_open_args /* {
720		struct vnodeop_desc *a_desc;
721		struct vnode *a_vp;
722		int a_mode;
723		struct ucred *a_cred;
724		struct thread *a_td;
725	} */ *ap;
726{
727	struct union_node *un = VTOUNION(ap->a_vp);
728	struct vnode *tvp;
729	int mode = ap->a_mode;
730	struct ucred *cred = ap->a_cred;
731	struct thread *td = ap->a_td;
732	int error = 0;
733	int tvpisupper = 1;
734
735	/*
736	 * If there is an existing upper vp then simply open that.
737	 * The upper vp takes precedence over the lower vp.  When opening
738	 * a lower vp for writing copy it to the uppervp and then open the
739	 * uppervp.
740	 *
741	 * At the end of this section tvp will be left locked.
742	 */
743	if ((tvp = union_lock_upper(un, td)) == NULLVP) {
744		/*
745		 * If the lower vnode is being opened for writing, then
746		 * copy the file contents to the upper vnode and open that,
747		 * otherwise can simply open the lower vnode.
748		 */
749		tvp = un->un_lowervp;
750		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
751			int docopy = !(mode & O_TRUNC);
752			error = union_copyup(un, docopy, cred, td);
753			tvp = union_lock_upper(un, td);
754		} else {
755			un->un_openl++;
756			VREF(tvp);
757			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
758			tvpisupper = 0;
759		}
760	}
761
762	/*
763	 * We are holding the correct vnode, open it.
764	 */
765
766	if (error == 0)
767		error = VOP_OPEN(tvp, mode, cred, td);
768
769	/*
770	 * This is absolutely necessary or UFS will blow up.
771	 */
772        if (error == 0 && vn_canvmio(tvp) == TRUE) {
773                error = vfs_object_create(tvp, td, cred);
774        }
775
776	/*
777	 * Release any locks held.
778	 */
779	if (tvpisupper) {
780		if (tvp)
781			union_unlock_upper(tvp, td);
782	} else {
783		vput(tvp);
784	}
785	return (error);
786}
787
788/*
789 *	union_close:
790 *
791 *	It is unclear whether a_vp is passed locked or unlocked.  Whatever
792 *	the case we do not change it.
793 */
794
795static int
796union_close(ap)
797	struct vop_close_args /* {
798		struct vnode *a_vp;
799		int  a_fflag;
800		struct ucred *a_cred;
801		struct thread *a_td;
802	} */ *ap;
803{
804	struct union_node *un = VTOUNION(ap->a_vp);
805	struct vnode *vp;
806
807	if ((vp = un->un_uppervp) == NULLVP) {
808#ifdef UNION_DIAGNOSTIC
809		if (un->un_openl <= 0)
810			panic("union: un_openl cnt");
811#endif
812		--un->un_openl;
813		vp = un->un_lowervp;
814	}
815	ap->a_vp = vp;
816	return (VCALL(vp, VOFFSET(vop_close), ap));
817}
818
819/*
820 * Check access permission on the union vnode.
821 * The access check being enforced is to check
822 * against both the underlying vnode, and any
823 * copied vnode.  This ensures that no additional
824 * file permissions are given away simply because
825 * the user caused an implicit file copy.
826 */
827static int
828union_access(ap)
829	struct vop_access_args /* {
830		struct vnodeop_desc *a_desc;
831		struct vnode *a_vp;
832		int a_mode;
833		struct ucred *a_cred;
834		struct thread *a_td;
835	} */ *ap;
836{
837	struct union_node *un = VTOUNION(ap->a_vp);
838	struct thread *td = ap->a_td;
839	int error = EACCES;
840	struct vnode *vp;
841
842	/*
843	 * Disallow write attempts on filesystems mounted read-only.
844	 */
845	if ((ap->a_mode & VWRITE) &&
846	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
847		switch (ap->a_vp->v_type) {
848		case VREG:
849		case VDIR:
850		case VLNK:
851			return (EROFS);
852		default:
853			break;
854		}
855	}
856
857	if ((vp = union_lock_upper(un, td)) != NULLVP) {
858		ap->a_vp = vp;
859		error = VCALL(vp, VOFFSET(vop_access), ap);
860		union_unlock_upper(vp, td);
861		return(error);
862	}
863
864	if ((vp = un->un_lowervp) != NULLVP) {
865		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
866		ap->a_vp = vp;
867
868		/*
869		 * Remove VWRITE from a_mode if our mount point is RW, because
870		 * we want to allow writes and lowervp may be read-only.
871		 */
872		if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0)
873			ap->a_mode &= ~VWRITE;
874
875		error = VCALL(vp, VOFFSET(vop_access), ap);
876		if (error == 0) {
877			struct union_mount *um;
878
879			um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount);
880
881			if (um->um_op == UNMNT_BELOW) {
882				ap->a_cred = um->um_cred;
883				error = VCALL(vp, VOFFSET(vop_access), ap);
884			}
885		}
886		VOP_UNLOCK(vp, 0, td);
887	}
888	return(error);
889}
890
891/*
892 * We handle getattr only to change the fsid and
893 * track object sizes
894 *
895 * It's not clear whether VOP_GETATTR is to be
896 * called with the vnode locked or not.  stat() calls
897 * it with (vp) locked, and fstat() calls it with
898 * (vp) unlocked.
899 *
900 * Because of this we cannot use our normal locking functions
901 * if we do not intend to lock the main a_vp node.  At the moment
902 * we are running without any specific locking at all, but beware
903 * to any programmer that care must be taken if locking is added
904 * to this function.
905 */
906
907static int
908union_getattr(ap)
909	struct vop_getattr_args /* {
910		struct vnode *a_vp;
911		struct vattr *a_vap;
912		struct ucred *a_cred;
913		struct thread *a_td;
914	} */ *ap;
915{
916	int error;
917	struct union_node *un = VTOUNION(ap->a_vp);
918	struct vnode *vp;
919	struct vattr *vap;
920	struct vattr va;
921
922	/*
923	 * Some programs walk the filesystem hierarchy by counting
924	 * links to directories to avoid stat'ing all the time.
925	 * This means the link count on directories needs to be "correct".
926	 * The only way to do that is to call getattr on both layers
927	 * and fix up the link count.  The link count will not necessarily
928	 * be accurate but will be large enough to defeat the tree walkers.
929	 */
930
931	vap = ap->a_vap;
932
933	if ((vp = un->un_uppervp) != NULLVP) {
934		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
935		if (error)
936			return (error);
937		/* XXX isn't this dangerous without a lock? */
938		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
939	}
940
941	if (vp == NULLVP) {
942		vp = un->un_lowervp;
943	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
944		vp = un->un_lowervp;
945		vap = &va;
946	} else {
947		vp = NULLVP;
948	}
949
950	if (vp != NULLVP) {
951		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td);
952		if (error)
953			return (error);
954		/* XXX isn't this dangerous without a lock? */
955		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
956	}
957
958	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
959		ap->a_vap->va_nlink += vap->va_nlink;
960	return (0);
961}
962
963static int
964union_setattr(ap)
965	struct vop_setattr_args /* {
966		struct vnode *a_vp;
967		struct vattr *a_vap;
968		struct ucred *a_cred;
969		struct thread *a_td;
970	} */ *ap;
971{
972	struct union_node *un = VTOUNION(ap->a_vp);
973	struct thread *td = ap->a_td;
974	struct vattr *vap = ap->a_vap;
975	struct vnode *uppervp;
976	int error;
977
978	/*
979	 * Disallow write attempts on filesystems mounted read-only.
980	 */
981	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
982	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
983	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
984	     vap->va_mtime.tv_sec != VNOVAL ||
985	     vap->va_mode != (mode_t)VNOVAL)) {
986		return (EROFS);
987	}
988
989	/*
990	 * Handle case of truncating lower object to zero size
991	 * by creating a zero length upper object.  This is to
992	 * handle the case of open with O_TRUNC and O_CREAT.
993	 */
994	if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) {
995		error = union_copyup(un, (ap->a_vap->va_size != 0),
996			    ap->a_cred, ap->a_td);
997		if (error)
998			return (error);
999	}
1000
1001	/*
1002	 * Try to set attributes in upper layer,
1003	 * otherwise return read-only filesystem error.
1004	 */
1005	error = EROFS;
1006	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1007		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
1008					ap->a_cred, ap->a_td);
1009		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
1010			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
1011		union_unlock_upper(uppervp, td);
1012	}
1013	return (error);
1014}
1015
1016static int
1017union_read(ap)
1018	struct vop_read_args /* {
1019		struct vnode *a_vp;
1020		struct uio *a_uio;
1021		int  a_ioflag;
1022		struct ucred *a_cred;
1023	} */ *ap;
1024{
1025	struct union_node *un = VTOUNION(ap->a_vp);
1026	struct thread *td = ap->a_uio->uio_td;
1027	struct vnode *uvp;
1028	int error;
1029
1030	uvp = union_lock_other(un, td);
1031	KASSERT(uvp != NULL, ("union_read: backing vnode missing!"));
1032
1033	error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1034	union_unlock_other(uvp, td);
1035
1036	/*
1037	 * XXX
1038	 * Perhaps the size of the underlying object has changed under
1039	 * our feet.  Take advantage of the offset information present
1040	 * in the uio structure.
1041	 */
1042	if (error == 0) {
1043		struct union_node *un = VTOUNION(ap->a_vp);
1044		off_t cur = ap->a_uio->uio_offset;
1045
1046		if (uvp == un->un_uppervp) {
1047			if (cur > un->un_uppersz)
1048				union_newsize(ap->a_vp, cur, VNOVAL);
1049		} else {
1050			if (cur > un->un_lowersz)
1051				union_newsize(ap->a_vp, VNOVAL, cur);
1052		}
1053	}
1054	return (error);
1055}
1056
1057static int
1058union_write(ap)
1059	struct vop_read_args /* {
1060		struct vnode *a_vp;
1061		struct uio *a_uio;
1062		int  a_ioflag;
1063		struct ucred *a_cred;
1064	} */ *ap;
1065{
1066	struct union_node *un = VTOUNION(ap->a_vp);
1067	struct thread *td = ap->a_uio->uio_td;
1068	struct vnode *uppervp;
1069	int error;
1070
1071	if ((uppervp = union_lock_upper(un, td)) == NULLVP)
1072		panic("union: missing upper layer in write");
1073
1074	error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1075
1076	/*
1077	 * The size of the underlying object may be changed by the
1078	 * write.
1079	 */
1080	if (error == 0) {
1081		off_t cur = ap->a_uio->uio_offset;
1082
1083		if (cur > un->un_uppersz)
1084			union_newsize(ap->a_vp, cur, VNOVAL);
1085	}
1086	union_unlock_upper(uppervp, td);
1087	return (error);
1088}
1089
1090static int
1091union_lease(ap)
1092	struct vop_lease_args /* {
1093		struct vnode *a_vp;
1094		struct thread *a_td;
1095		struct ucred *a_cred;
1096		int a_flag;
1097	} */ *ap;
1098{
1099	struct vnode *ovp = OTHERVP(ap->a_vp);
1100
1101	ap->a_vp = ovp;
1102	return (VCALL(ovp, VOFFSET(vop_lease), ap));
1103}
1104
1105static int
1106union_ioctl(ap)
1107	struct vop_ioctl_args /* {
1108		struct vnode *a_vp;
1109		u_long  a_command;
1110		caddr_t  a_data;
1111		int  a_fflag;
1112		struct ucred *a_cred;
1113		struct thread *a_td;
1114	} */ *ap;
1115{
1116	struct vnode *ovp = OTHERVP(ap->a_vp);
1117
1118	ap->a_vp = ovp;
1119	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1120}
1121
1122static int
1123union_poll(ap)
1124	struct vop_poll_args /* {
1125		struct vnode *a_vp;
1126		int  a_events;
1127		struct ucred *a_cred;
1128		struct thread *a_td;
1129	} */ *ap;
1130{
1131	struct vnode *ovp = OTHERVP(ap->a_vp);
1132
1133	ap->a_vp = ovp;
1134	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1135}
1136
1137static int
1138union_revoke(ap)
1139	struct vop_revoke_args /* {
1140		struct vnode *a_vp;
1141		int a_flags;
1142		struct thread *a_td;
1143	} */ *ap;
1144{
1145	struct vnode *vp = ap->a_vp;
1146
1147	if (UPPERVP(vp))
1148		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1149	if (LOWERVP(vp))
1150		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1151	vgone(vp);
1152	return (0);
1153}
1154
1155static int
1156union_fsync(ap)
1157	struct vop_fsync_args /* {
1158		struct vnode *a_vp;
1159		struct ucred *a_cred;
1160		int  a_waitfor;
1161		struct thread *a_td;
1162	} */ *ap;
1163{
1164	int error = 0;
1165	struct thread *td = ap->a_td;
1166	struct vnode *targetvp;
1167	struct union_node *un = VTOUNION(ap->a_vp);
1168
1169	if ((targetvp = union_lock_other(un, td)) != NULLVP) {
1170		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, td);
1171		union_unlock_other(targetvp, td);
1172	}
1173
1174	return (error);
1175}
1176
1177/*
1178 *	union_remove:
1179 *
1180 *	Remove the specified cnp.  The dvp and vp are passed to us locked
1181 *	and must remain locked on return.
1182 */
1183
1184static int
1185union_remove(ap)
1186	struct vop_remove_args /* {
1187		struct vnode *a_dvp;
1188		struct vnode *a_vp;
1189		struct componentname *a_cnp;
1190	} */ *ap;
1191{
1192	struct union_node *dun = VTOUNION(ap->a_dvp);
1193	struct union_node *un = VTOUNION(ap->a_vp);
1194	struct componentname *cnp = ap->a_cnp;
1195	struct thread *td = cnp->cn_thread;
1196	struct vnode *uppervp;
1197	struct vnode *upperdvp;
1198	int error;
1199
1200	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1201		panic("union remove: null upper vnode");
1202
1203	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1204		if (union_dowhiteout(un, cnp->cn_cred, td))
1205			cnp->cn_flags |= DOWHITEOUT;
1206		error = VOP_REMOVE(upperdvp, uppervp, cnp);
1207#if 0
1208		/* XXX */
1209		if (!error)
1210			union_removed_upper(un);
1211#endif
1212		union_unlock_upper(uppervp, td);
1213	} else {
1214		error = union_mkwhiteout(
1215			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1216			    upperdvp, ap->a_cnp, un->un_path);
1217	}
1218	union_unlock_upper(upperdvp, td);
1219	return (error);
1220}
1221
1222/*
1223 *	union_link:
1224 *
1225 *	tdvp and vp will be locked on entry.
1226 *	tdvp and vp should remain locked on return.
1227 */
1228
1229static int
1230union_link(ap)
1231	struct vop_link_args /* {
1232		struct vnode *a_tdvp;
1233		struct vnode *a_vp;
1234		struct componentname *a_cnp;
1235	} */ *ap;
1236{
1237	struct componentname *cnp = ap->a_cnp;
1238	struct thread *td = cnp->cn_thread;
1239	struct union_node *dun = VTOUNION(ap->a_tdvp);
1240	struct vnode *vp;
1241	struct vnode *tdvp;
1242	int error = 0;
1243
1244	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
1245		vp = ap->a_vp;
1246	} else {
1247		struct union_node *tun = VTOUNION(ap->a_vp);
1248
1249		if (tun->un_uppervp == NULLVP) {
1250#if 0
1251			if (dun->un_uppervp == tun->un_dirvp) {
1252				if (dun->un_flags & UN_ULOCK) {
1253					dun->un_flags &= ~UN_ULOCK;
1254					VOP_UNLOCK(dun->un_uppervp, 0, td);
1255				}
1256			}
1257#endif
1258			error = union_copyup(tun, 1, cnp->cn_cred, td);
1259#if 0
1260			if (dun->un_uppervp == tun->un_dirvp) {
1261				vn_lock(dun->un_uppervp,
1262					    LK_EXCLUSIVE | LK_RETRY, td);
1263				dun->un_flags |= UN_ULOCK;
1264			}
1265#endif
1266			if (error)
1267				return (error);
1268		}
1269		vp = tun->un_uppervp;
1270		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1271	}
1272
1273	/*
1274	 * Make sure upper is locked, then unlock the union directory we were
1275	 * called with to avoid a deadlock while we are calling VOP_LINK() on
1276	 * the upper (with tdvp locked and vp not locked).  Our ap->a_tdvp
1277	 * is expected to be locked on return.
1278	 */
1279
1280	if ((tdvp = union_lock_upper(dun, td)) == NULLVP)
1281		return (EROFS);
1282
1283	VOP_UNLOCK(ap->a_tdvp, 0, td);		/* unlock calling node */
1284	error = VOP_LINK(tdvp, vp, cnp);	/* call link on upper */
1285
1286	/*
1287	 * Unlock tun->un_uppervp if we locked it above.
1288	 */
1289	if (ap->a_tdvp->v_op == ap->a_vp->v_op)
1290		VOP_UNLOCK(vp, 0, td);
1291	/*
1292	 * We have to unlock tdvp prior to relocking our calling node in
1293	 * order to avoid a deadlock.  We also have to unlock ap->a_vp
1294	 * before relocking the directory, but then we have to relock
1295	 * ap->a_vp as our caller expects.
1296	 */
1297	VOP_UNLOCK(ap->a_vp, 0, td);
1298	union_unlock_upper(tdvp, td);
1299	vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td);
1300	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td);
1301	return (error);
1302}
1303
1304static int
1305union_rename(ap)
1306	struct vop_rename_args  /* {
1307		struct vnode *a_fdvp;
1308		struct vnode *a_fvp;
1309		struct componentname *a_fcnp;
1310		struct vnode *a_tdvp;
1311		struct vnode *a_tvp;
1312		struct componentname *a_tcnp;
1313	} */ *ap;
1314{
1315	int error;
1316	struct vnode *fdvp = ap->a_fdvp;
1317	struct vnode *fvp = ap->a_fvp;
1318	struct vnode *tdvp = ap->a_tdvp;
1319	struct vnode *tvp = ap->a_tvp;
1320
1321	/*
1322	 * Figure out what fdvp to pass to our upper or lower vnode.  If we
1323	 * replace the fdvp, release the original one and ref the new one.
1324	 */
1325
1326	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1327		struct union_node *un = VTOUNION(fdvp);
1328		if (un->un_uppervp == NULLVP) {
1329			/*
1330			 * this should never happen in normal
1331			 * operation but might if there was
1332			 * a problem creating the top-level shadow
1333			 * directory.
1334			 */
1335			error = EXDEV;
1336			goto bad;
1337		}
1338		fdvp = un->un_uppervp;
1339		VREF(fdvp);
1340		vrele(ap->a_fdvp);
1341	}
1342
1343	/*
1344	 * Figure out what fvp to pass to our upper or lower vnode.  If we
1345	 * replace the fvp, release the original one and ref the new one.
1346	 */
1347
1348	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1349		struct union_node *un = VTOUNION(fvp);
1350#if 0
1351		struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount);
1352#endif
1353
1354		if (un->un_uppervp == NULLVP) {
1355			switch(fvp->v_type) {
1356			case VREG:
1357				vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1358				error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread);
1359				VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread);
1360				if (error)
1361					goto bad;
1362				break;
1363			case VDIR:
1364				/*
1365				 * XXX not yet.
1366				 *
1367				 * There is only one way to rename a directory
1368				 * based in the lowervp, and that is to copy
1369				 * the entire directory hierarchy.  Otherwise
1370				 * it would not last across a reboot.
1371				 */
1372#if 0
1373				vrele(fvp);
1374				fvp = NULL;
1375				vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread);
1376				error = union_mkshadow(um, fdvp,
1377					    ap->a_fcnp, &un->un_uppervp);
1378				VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread);
1379				if (un->un_uppervp)
1380					VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread);
1381				if (error)
1382					goto bad;
1383				break;
1384#endif
1385			default:
1386				error = EXDEV;
1387				goto bad;
1388			}
1389		}
1390
1391		if (un->un_lowervp != NULLVP)
1392			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1393		fvp = un->un_uppervp;
1394		VREF(fvp);
1395		vrele(ap->a_fvp);
1396	}
1397
1398	/*
1399	 * Figure out what tdvp (destination directory) to pass to the
1400	 * lower level.  If we replace it with uppervp, we need to vput the
1401	 * old one.  The exclusive lock is transfered to what we will pass
1402	 * down in the VOP_RENAME() and we replace uppervp with a simple
1403	 * reference.
1404	 */
1405
1406	if (tdvp->v_op == union_vnodeop_p) {
1407		struct union_node *un = VTOUNION(tdvp);
1408
1409		if (un->un_uppervp == NULLVP) {
1410			/*
1411			 * This should never happen in normal
1412			 * operation but might if there was
1413			 * a problem creating the top-level shadow
1414			 * directory.
1415			 */
1416			error = EXDEV;
1417			goto bad;
1418		}
1419
1420		/*
1421		 * New tdvp is a lock and reference on uppervp.
1422		 * Put away the old tdvp.
1423		 */
1424		tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1425		vput(ap->a_tdvp);
1426	}
1427
1428	/*
1429	 * Figure out what tvp (destination file) to pass to the
1430	 * lower level.
1431	 *
1432	 * If the uppervp file does not exist, put away the (wrong)
1433	 * file and change tvp to NULL.
1434	 */
1435
1436	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1437		struct union_node *un = VTOUNION(tvp);
1438
1439		tvp = union_lock_upper(un, ap->a_tcnp->cn_thread);
1440		vput(ap->a_tvp);
1441		/* note: tvp may be NULL */
1442	}
1443
1444	/*
1445	 * VOP_RENAME() releases/vputs prior to returning, so we have no
1446	 * cleanup to do.
1447	 */
1448
1449	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1450
1451	/*
1452	 * Error.  We still have to release / vput the various elements.
1453	 */
1454
1455bad:
1456	vrele(fdvp);
1457	if (fvp)
1458		vrele(fvp);
1459	vput(tdvp);
1460	if (tvp != NULLVP) {
1461		if (tvp != tdvp)
1462			vput(tvp);
1463		else
1464			vrele(tvp);
1465	}
1466	return (error);
1467}
1468
1469static int
1470union_mkdir(ap)
1471	struct vop_mkdir_args /* {
1472		struct vnode *a_dvp;
1473		struct vnode **a_vpp;
1474		struct componentname *a_cnp;
1475		struct vattr *a_vap;
1476	} */ *ap;
1477{
1478	struct union_node *dun = VTOUNION(ap->a_dvp);
1479	struct componentname *cnp = ap->a_cnp;
1480	struct thread *td = cnp->cn_thread;
1481	struct vnode *upperdvp;
1482	int error = EROFS;
1483
1484	if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) {
1485		struct vnode *vp;
1486
1487		error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap);
1488		union_unlock_upper(upperdvp, td);
1489
1490		if (error == 0) {
1491			VOP_UNLOCK(vp, 0, td);
1492			UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp)));
1493			error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount,
1494				ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1);
1495			UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp)));
1496		}
1497	}
1498	return (error);
1499}
1500
1501static int
1502union_rmdir(ap)
1503	struct vop_rmdir_args /* {
1504		struct vnode *a_dvp;
1505		struct vnode *a_vp;
1506		struct componentname *a_cnp;
1507	} */ *ap;
1508{
1509	struct union_node *dun = VTOUNION(ap->a_dvp);
1510	struct union_node *un = VTOUNION(ap->a_vp);
1511	struct componentname *cnp = ap->a_cnp;
1512	struct thread *td = cnp->cn_thread;
1513	struct vnode *upperdvp;
1514	struct vnode *uppervp;
1515	int error;
1516
1517	if ((upperdvp = union_lock_upper(dun, td)) == NULLVP)
1518		panic("union rmdir: null upper vnode");
1519
1520	if ((uppervp = union_lock_upper(un, td)) != NULLVP) {
1521		if (union_dowhiteout(un, cnp->cn_cred, td))
1522			cnp->cn_flags |= DOWHITEOUT;
1523		error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp);
1524		union_unlock_upper(uppervp, td);
1525	} else {
1526		error = union_mkwhiteout(
1527			    MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount),
1528			    dun->un_uppervp, ap->a_cnp, un->un_path);
1529	}
1530	union_unlock_upper(upperdvp, td);
1531	return (error);
1532}
1533
1534/*
1535 *	union_symlink:
1536 *
1537 *	dvp is locked on entry and remains locked on return.  a_vpp is garbage
1538 *	(unused).
1539 */
1540
1541static int
1542union_symlink(ap)
1543	struct vop_symlink_args /* {
1544		struct vnode *a_dvp;
1545		struct vnode **a_vpp;
1546		struct componentname *a_cnp;
1547		struct vattr *a_vap;
1548		char *a_target;
1549	} */ *ap;
1550{
1551	struct union_node *dun = VTOUNION(ap->a_dvp);
1552	struct componentname *cnp = ap->a_cnp;
1553	struct thread *td = cnp->cn_thread;
1554	struct vnode *dvp;
1555	int error = EROFS;
1556
1557	if ((dvp = union_lock_upper(dun, td)) != NULLVP) {
1558		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1559			    ap->a_target);
1560		union_unlock_upper(dvp, td);
1561	}
1562	return (error);
1563}
1564
1565/*
1566 * union_readdir ()works in concert with getdirentries() and
1567 * readdir(3) to provide a list of entries in the unioned
1568 * directories.  getdirentries()  is responsible for walking
1569 * down the union stack.  readdir(3) is responsible for
1570 * eliminating duplicate names from the returned data stream.
1571 */
1572static int
1573union_readdir(ap)
1574	struct vop_readdir_args /* {
1575		struct vnode *a_vp;
1576		struct uio *a_uio;
1577		struct ucred *a_cred;
1578		int *a_eofflag;
1579		u_long *a_cookies;
1580		int a_ncookies;
1581	} */ *ap;
1582{
1583	struct union_node *un = VTOUNION(ap->a_vp);
1584	struct thread *td = ap->a_uio->uio_td;
1585	struct vnode *uvp;
1586	int error = 0;
1587
1588	if ((uvp = union_lock_upper(un, td)) != NULLVP) {
1589		ap->a_vp = uvp;
1590		error = VCALL(uvp, VOFFSET(vop_readdir), ap);
1591		union_unlock_upper(uvp, td);
1592	}
1593	return(error);
1594}
1595
1596static int
1597union_readlink(ap)
1598	struct vop_readlink_args /* {
1599		struct vnode *a_vp;
1600		struct uio *a_uio;
1601		struct ucred *a_cred;
1602	} */ *ap;
1603{
1604	int error;
1605	struct union_node *un = VTOUNION(ap->a_vp);
1606	struct uio *uio = ap->a_uio;
1607	struct thread *td = uio->uio_td;
1608	struct vnode *vp;
1609
1610	vp = union_lock_other(un, td);
1611	KASSERT(vp != NULL, ("union_readlink: backing vnode missing!"));
1612
1613	ap->a_vp = vp;
1614	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1615	union_unlock_other(vp, td);
1616
1617	return (error);
1618}
1619
1620static int
1621union_getwritemount(ap)
1622	struct vop_getwritemount_args /* {
1623		struct vnode *a_vp;
1624		struct mount **a_mpp;
1625	} */ *ap;
1626{
1627	struct vnode *vp = ap->a_vp;
1628	struct vnode *uvp = UPPERVP(vp);
1629
1630	if (uvp == NULL) {
1631		VI_LOCK(vp);
1632		if (vp->v_iflag & VI_FREE) {
1633			VI_UNLOCK(vp);
1634			return (EOPNOTSUPP);
1635		}
1636		VI_UNLOCK(vp);
1637		return (EACCES);
1638	}
1639	return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp));
1640}
1641
1642/*
1643 *	union_inactive:
1644 *
1645 *	Called with the vnode locked.  We are expected to unlock the vnode.
1646 */
1647
1648static int
1649union_inactive(ap)
1650	struct vop_inactive_args /* {
1651		struct vnode *a_vp;
1652		struct thread *a_td;
1653	} */ *ap;
1654{
1655	struct vnode *vp = ap->a_vp;
1656	struct thread *td = ap->a_td;
1657	struct union_node *un = VTOUNION(vp);
1658	struct vnode **vpp;
1659
1660	/*
1661	 * Do nothing (and _don't_ bypass).
1662	 * Wait to vrele lowervp until reclaim,
1663	 * so that until then our union_node is in the
1664	 * cache and reusable.
1665	 *
1666	 */
1667
1668	if (un->un_dircache != 0) {
1669		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1670			vrele(*vpp);
1671		free (un->un_dircache, M_TEMP);
1672		un->un_dircache = 0;
1673	}
1674
1675#if 0
1676	if ((un->un_flags & UN_ULOCK) && un->un_uppervp) {
1677		un->un_flags &= ~UN_ULOCK;
1678		VOP_UNLOCK(un->un_uppervp, 0, td);
1679	}
1680#endif
1681
1682	VOP_UNLOCK(vp, 0, td);
1683
1684	if ((un->un_flags & UN_CACHED) == 0)
1685		vgone(vp);
1686
1687	return (0);
1688}
1689
1690static int
1691union_reclaim(ap)
1692	struct vop_reclaim_args /* {
1693		struct vnode *a_vp;
1694	} */ *ap;
1695{
1696	union_freevp(ap->a_vp);
1697
1698	return (0);
1699}
1700
1701/*
1702 * unionvp do not hold a VM object and there is no need to create one for
1703 * upper or lower vp because it is done in the union_open()
1704 */
1705static int
1706union_createvobject(ap)
1707	struct vop_createvobject_args /* {
1708		struct vnode *vp;
1709		struct ucred *cred;
1710		struct thread *td;
1711	} */ *ap;
1712{
1713	struct vnode *vp = ap->a_vp;
1714
1715	vp->v_vflag |= VV_OBJBUF;
1716	return (0);
1717}
1718
1719/*
1720 * We have nothing to destroy and this operation shouldn't be bypassed.
1721 */
1722static int
1723union_destroyvobject(ap)
1724	struct vop_destroyvobject_args /* {
1725		struct vnode *vp;
1726	} */ *ap;
1727{
1728	struct vnode *vp = ap->a_vp;
1729
1730	vp->v_vflag &= ~VV_OBJBUF;
1731	return (0);
1732}
1733
1734/*
1735 * Get VM object from the upper or lower vp
1736 */
1737static int
1738union_getvobject(ap)
1739	struct vop_getvobject_args /* {
1740		struct vnode *vp;
1741		struct vm_object **objpp;
1742	} */ *ap;
1743{
1744	struct vnode *ovp = OTHERVP(ap->a_vp);
1745
1746	if (ovp == NULL)
1747		return EINVAL;
1748	return (VOP_GETVOBJECT(ovp, ap->a_objpp));
1749}
1750
1751static int
1752union_print(ap)
1753	struct vop_print_args /* {
1754		struct vnode *a_vp;
1755	} */ *ap;
1756{
1757	struct vnode *vp = ap->a_vp;
1758
1759	printf("\tvp=%p, uppervp=%p, lowervp=%p\n",
1760	       vp, UPPERVP(vp), LOWERVP(vp));
1761	if (UPPERVP(vp) != NULLVP)
1762		vprint("union: upper", UPPERVP(vp));
1763	if (LOWERVP(vp) != NULLVP)
1764		vprint("union: lower", LOWERVP(vp));
1765
1766	return (0);
1767}
1768
1769static int
1770union_pathconf(ap)
1771	struct vop_pathconf_args /* {
1772		struct vnode *a_vp;
1773		int a_name;
1774		int *a_retval;
1775	} */ *ap;
1776{
1777	int error;
1778	struct thread *td = curthread;		/* XXX */
1779	struct union_node *un = VTOUNION(ap->a_vp);
1780	struct vnode *vp;
1781
1782	vp = union_lock_other(un, td);
1783	KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!"));
1784
1785	ap->a_vp = vp;
1786	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1787	union_unlock_other(vp, td);
1788
1789	return (error);
1790}
1791
1792static int
1793union_advlock(ap)
1794	struct vop_advlock_args /* {
1795		struct vnode *a_vp;
1796		caddr_t  a_id;
1797		int  a_op;
1798		struct flock *a_fl;
1799		int  a_flags;
1800	} */ *ap;
1801{
1802	register struct vnode *ovp = OTHERVP(ap->a_vp);
1803
1804	ap->a_vp = ovp;
1805	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1806}
1807
1808
1809/*
1810 * XXX - vop_strategy must be hand coded because it has no
1811 * YYY - and it is not coherent with anything
1812 *
1813 * vnode in its arguments.
1814 * This goes away with a merged VM/buffer cache.
1815 */
1816static int
1817union_strategy(ap)
1818	struct vop_strategy_args /* {
1819		struct vnode *a_vp;
1820		struct buf *a_bp;
1821	} */ *ap;
1822{
1823	struct buf *bp = ap->a_bp;
1824	struct vnode *othervp = OTHERVP(bp->b_vp);
1825
1826#ifdef DIAGNOSTIC
1827	if (othervp == NULLVP)
1828		panic("union_strategy: nil vp");
1829	if ((bp->b_iocmd == BIO_WRITE) &&
1830	    (othervp == LOWERVP(bp->b_vp)))
1831		panic("union_strategy: writing to lowervp");
1832#endif
1833	return (VOP_STRATEGY(othervp, bp));
1834}
1835
1836/*
1837 * Global vfs data structures
1838 */
1839vop_t **union_vnodeop_p;
1840static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
1841	{ &vop_default_desc,		(vop_t *) vop_defaultop },
1842	{ &vop_access_desc,		(vop_t *) union_access },
1843	{ &vop_advlock_desc,		(vop_t *) union_advlock },
1844	{ &vop_bmap_desc,		(vop_t *) vop_eopnotsupp },
1845	{ &vop_close_desc,		(vop_t *) union_close },
1846	{ &vop_create_desc,		(vop_t *) union_create },
1847	{ &vop_createvobject_desc,	(vop_t *) union_createvobject },
1848	{ &vop_destroyvobject_desc,	(vop_t *) union_destroyvobject },
1849	{ &vop_fsync_desc,		(vop_t *) union_fsync },
1850	{ &vop_getattr_desc,		(vop_t *) union_getattr },
1851	{ &vop_getvobject_desc,		(vop_t *) union_getvobject },
1852	{ &vop_inactive_desc,		(vop_t *) union_inactive },
1853	{ &vop_ioctl_desc,		(vop_t *) union_ioctl },
1854	{ &vop_lease_desc,		(vop_t *) union_lease },
1855	{ &vop_link_desc,		(vop_t *) union_link },
1856	{ &vop_lookup_desc,		(vop_t *) union_lookup },
1857	{ &vop_mkdir_desc,		(vop_t *) union_mkdir },
1858	{ &vop_mknod_desc,		(vop_t *) union_mknod },
1859	{ &vop_open_desc,		(vop_t *) union_open },
1860	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
1861	{ &vop_poll_desc,		(vop_t *) union_poll },
1862	{ &vop_print_desc,		(vop_t *) union_print },
1863	{ &vop_read_desc,		(vop_t *) union_read },
1864	{ &vop_readdir_desc,		(vop_t *) union_readdir },
1865	{ &vop_readlink_desc,		(vop_t *) union_readlink },
1866	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
1867	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
1868	{ &vop_remove_desc,		(vop_t *) union_remove },
1869	{ &vop_rename_desc,		(vop_t *) union_rename },
1870	{ &vop_revoke_desc,		(vop_t *) union_revoke },
1871	{ &vop_rmdir_desc,		(vop_t *) union_rmdir },
1872	{ &vop_setattr_desc,		(vop_t *) union_setattr },
1873	{ &vop_strategy_desc,		(vop_t *) union_strategy },
1874	{ &vop_symlink_desc,		(vop_t *) union_symlink },
1875	{ &vop_whiteout_desc,		(vop_t *) union_whiteout },
1876	{ &vop_write_desc,		(vop_t *) union_write },
1877	{ NULL, NULL }
1878};
1879static struct vnodeopv_desc union_vnodeop_opv_desc =
1880	{ &union_vnodeop_p, union_vnodeop_entries };
1881
1882VNODEOP_SET(union_vnodeop_opv_desc);
1883