union_vnops.c revision 1.13
1/*	$NetBSD: union_vnops.c,v 1.13 2005/11/02 12:38:59 yamt Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.13 2005/11/02 12:38:59 yamt Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <fs/union/union.h>
91#include <miscfs/genfs/genfs.h>
92
93int union_lookup(void *);
94int union_create(void *);
95int union_whiteout(void *);
96int union_mknod(void *);
97int union_open(void *);
98int union_close(void *);
99int union_access(void *);
100int union_getattr(void *);
101int union_setattr(void *);
102int union_read(void *);
103int union_write(void *);
104int union_lease(void *);
105int union_ioctl(void *);
106int union_poll(void *);
107int union_revoke(void *);
108int union_mmap(void *);
109int union_fsync(void *);
110int union_seek(void *);
111int union_remove(void *);
112int union_link(void *);
113int union_rename(void *);
114int union_mkdir(void *);
115int union_rmdir(void *);
116int union_symlink(void *);
117int union_readdir(void *);
118int union_readlink(void *);
119int union_abortop(void *);
120int union_inactive(void *);
121int union_reclaim(void *);
122int union_lock(void *);
123int union_unlock(void *);
124int union_bmap(void *);
125int union_print(void *);
126int union_islocked(void *);
127int union_pathconf(void *);
128int union_advlock(void *);
129int union_strategy(void *);
130int union_getpages(void *);
131int union_putpages(void *);
132int union_kqfilter(void *);
133
134static void union_fixup(struct union_node *);
135static int union_lookup1(struct vnode *, struct vnode **,
136			      struct vnode **, struct componentname *);
137
138
139/*
140 * Global vfs data structures
141 */
142int (**union_vnodeop_p)(void *);
143const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
144	{ &vop_default_desc, vn_default_error },
145	{ &vop_lookup_desc, union_lookup },		/* lookup */
146	{ &vop_create_desc, union_create },		/* create */
147	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
148	{ &vop_mknod_desc, union_mknod },		/* mknod */
149	{ &vop_open_desc, union_open },			/* open */
150	{ &vop_close_desc, union_close },		/* close */
151	{ &vop_access_desc, union_access },		/* access */
152	{ &vop_getattr_desc, union_getattr },		/* getattr */
153	{ &vop_setattr_desc, union_setattr },		/* setattr */
154	{ &vop_read_desc, union_read },			/* read */
155	{ &vop_write_desc, union_write },		/* write */
156	{ &vop_lease_desc, union_lease },		/* lease */
157	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
158	{ &vop_poll_desc, union_poll },			/* select */
159	{ &vop_revoke_desc, union_revoke },		/* revoke */
160	{ &vop_mmap_desc, union_mmap },			/* mmap */
161	{ &vop_fsync_desc, union_fsync },		/* fsync */
162	{ &vop_seek_desc, union_seek },			/* seek */
163	{ &vop_remove_desc, union_remove },		/* remove */
164	{ &vop_link_desc, union_link },			/* link */
165	{ &vop_rename_desc, union_rename },		/* rename */
166	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
167	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
168	{ &vop_symlink_desc, union_symlink },		/* symlink */
169	{ &vop_readdir_desc, union_readdir },		/* readdir */
170	{ &vop_readlink_desc, union_readlink },		/* readlink */
171	{ &vop_abortop_desc, union_abortop },		/* abortop */
172	{ &vop_inactive_desc, union_inactive },		/* inactive */
173	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
174	{ &vop_lock_desc, union_lock },			/* lock */
175	{ &vop_unlock_desc, union_unlock },		/* unlock */
176	{ &vop_bmap_desc, union_bmap },			/* bmap */
177	{ &vop_strategy_desc, union_strategy },		/* strategy */
178	{ &vop_print_desc, union_print },		/* print */
179	{ &vop_islocked_desc, union_islocked },		/* islocked */
180	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
181	{ &vop_advlock_desc, union_advlock },		/* advlock */
182	{ &vop_getpages_desc, union_getpages },		/* getpages */
183	{ &vop_putpages_desc, union_putpages },		/* putpages */
184	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
185#ifdef notdef
186	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
187#endif
188	{ NULL, NULL }
189};
190const struct vnodeopv_desc union_vnodeop_opv_desc =
191	{ &union_vnodeop_p, union_vnodeop_entries };
192
193#define FIXUP(un) { \
194	if (((un)->un_flags & UN_ULOCK) == 0) { \
195		union_fixup(un); \
196	} \
197}
198
199static void
200union_fixup(un)
201	struct union_node *un;
202{
203
204	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
205	un->un_flags |= UN_ULOCK;
206}
207
208static int
209union_lookup1(udvp, dvpp, vpp, cnp)
210	struct vnode *udvp;
211	struct vnode **dvpp;
212	struct vnode **vpp;
213	struct componentname *cnp;
214{
215	int error;
216	struct vnode *tdvp;
217	struct vnode *dvp;
218	struct mount *mp;
219
220	dvp = *dvpp;
221
222	/*
223	 * If stepping up the directory tree, check for going
224	 * back across the mount point, in which case do what
225	 * lookup would do by stepping back down the mount
226	 * hierarchy.
227	 */
228	if (cnp->cn_flags & ISDOTDOT) {
229		while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
230			/*
231			 * Don't do the NOCROSSMOUNT check
232			 * at this level.  By definition,
233			 * union fs deals with namespaces, not
234			 * filesystems.
235			 */
236			tdvp = dvp;
237			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
238			vput(tdvp);
239			VREF(dvp);
240			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
241		}
242	}
243
244        error = VOP_LOOKUP(dvp, &tdvp, cnp);
245	if (error)
246		return (error);
247
248	/*
249	 * The parent directory will have been unlocked, unless lookup
250	 * found the last component.  In which case, re-lock the node
251	 * here to allow it to be unlocked again (phew) in union_lookup.
252	 */
253	if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
254		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
255
256	dvp = tdvp;
257
258	/*
259	 * Lastly check if the current node is a mount point in
260	 * which case walk up the mount hierarchy making sure not to
261	 * bump into the root of the mount tree (ie. dvp != udvp).
262	 */
263	while (dvp != udvp && (dvp->v_type == VDIR) &&
264	       (mp = dvp->v_mountedhere)) {
265
266		if (vfs_busy(mp, 0, 0))
267			continue;
268
269		error = VFS_ROOT(mp, &tdvp);
270		vfs_unbusy(mp);
271		if (error) {
272			vput(dvp);
273			return (error);
274		}
275
276		vput(dvp);
277		dvp = tdvp;
278	}
279
280	*vpp = dvp;
281	return (0);
282}
283
284int
285union_lookup(v)
286	void *v;
287{
288	struct vop_lookup_args /* {
289		struct vnodeop_desc *a_desc;
290		struct vnode *a_dvp;
291		struct vnode **a_vpp;
292		struct componentname *a_cnp;
293	} */ *ap = v;
294	int error;
295	int uerror, lerror;
296	struct vnode *uppervp, *lowervp;
297	struct vnode *upperdvp, *lowerdvp;
298	struct vnode *dvp = ap->a_dvp;
299	struct union_node *dun = VTOUNION(dvp);
300	struct componentname *cnp = ap->a_cnp;
301	int lockparent = cnp->cn_flags & LOCKPARENT;
302	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
303	struct ucred *saved_cred = NULL;
304	int iswhiteout;
305	struct vattr va;
306
307#ifdef notyet
308	if (cnp->cn_namelen == 3 &&
309			cnp->cn_nameptr[2] == '.' &&
310			cnp->cn_nameptr[1] == '.' &&
311			cnp->cn_nameptr[0] == '.') {
312		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
313		if (dvp == NULLVP)
314			return (ENOENT);
315		VREF(dvp);
316		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
317		if (!lockparent || !(cnp->cn_flags & ISLASTCN))
318			VOP_UNLOCK(ap->a_dvp, 0);
319		return (0);
320	}
321#endif
322
323	if ((cnp->cn_flags & ISLASTCN) &&
324	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
325	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
326		return (EROFS);
327
328	cnp->cn_flags |= LOCKPARENT;
329
330	upperdvp = dun->un_uppervp;
331	lowerdvp = dun->un_lowervp;
332	uppervp = NULLVP;
333	lowervp = NULLVP;
334	iswhiteout = 0;
335
336	/*
337	 * do the lookup in the upper level.
338	 * if that level comsumes additional pathnames,
339	 * then assume that something special is going
340	 * on and just return that vnode.
341	 */
342	if (upperdvp != NULLVP) {
343		FIXUP(dun);
344		/*
345		 * If we're doing `..' in the underlying filesystem,
346		 * we must drop our lock on the union node before
347		 * going up the tree in the lower file system--if we block
348		 * on the lowervp lock, and that's held by someone else
349		 * coming down the tree and who's waiting for our lock,
350		 * we would be hosed.
351		 */
352		if (cnp->cn_flags & ISDOTDOT) {
353			/* retain lock on underlying VP */
354			dun->un_flags |= UN_KLOCK;
355			VOP_UNLOCK(dvp, 0);
356		}
357		uerror = union_lookup1(um->um_uppervp, &upperdvp,
358					&uppervp, cnp);
359
360		if (cnp->cn_flags & ISDOTDOT) {
361			if (dun->un_uppervp == upperdvp) {
362				/*
363				 * we got the underlying bugger back locked...
364				 * now take back the union node lock.  Since we
365				 *  hold the uppervp lock, we can diddle union
366				 * locking flags at will. :)
367				 */
368				dun->un_flags |= UN_ULOCK;
369			}
370			/*
371			 * if upperdvp got swapped out, it means we did
372			 * some mount point magic, and we do not have
373			 * dun->un_uppervp locked currently--so we get it
374			 * locked here (don't set the UN_ULOCK flag).
375			 */
376			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
377		}
378		if (cnp->cn_consume != 0) {
379			*ap->a_vpp = uppervp;
380			if (!lockparent)
381				cnp->cn_flags &= ~LOCKPARENT;
382			return (uerror);
383		}
384		if (uerror == ENOENT || uerror == EJUSTRETURN) {
385			if (cnp->cn_flags & ISWHITEOUT) {
386				iswhiteout = 1;
387			} else if (lowerdvp != NULLVP) {
388				lerror = VOP_GETATTR(upperdvp, &va,
389					cnp->cn_cred, cnp->cn_proc);
390				if (lerror == 0 && (va.va_flags & OPAQUE))
391					iswhiteout = 1;
392			}
393		}
394	} else {
395		uerror = ENOENT;
396	}
397
398	/*
399	 * in a similar way to the upper layer, do the lookup
400	 * in the lower layer.   this time, if there is some
401	 * component magic going on, then vput whatever we got
402	 * back from the upper layer and return the lower vnode
403	 * instead.
404	 */
405	if (lowerdvp != NULLVP && !iswhiteout) {
406		int nameiop;
407
408		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
409
410		/*
411		 * Only do a LOOKUP on the bottom node, since
412		 * we won't be making changes to it anyway.
413		 */
414		nameiop = cnp->cn_nameiop;
415		cnp->cn_nameiop = LOOKUP;
416		if (um->um_op == UNMNT_BELOW) {
417			saved_cred = cnp->cn_cred;
418			cnp->cn_cred = um->um_cred;
419		}
420		/*
421		 * we shouldn't have to worry about locking interactions
422		 * between the lower layer and our union layer (w.r.t.
423		 * `..' processing) because we don't futz with lowervp
424		 * locks in the union-node instantiation code path.
425		 */
426		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
427				&lowervp, cnp);
428		if (um->um_op == UNMNT_BELOW)
429			cnp->cn_cred = saved_cred;
430		cnp->cn_nameiop = nameiop;
431
432		if (lowervp != lowerdvp)
433			VOP_UNLOCK(lowerdvp, 0);
434
435		if (cnp->cn_consume != 0) {
436			if (uppervp != NULLVP) {
437				if (uppervp == upperdvp)
438					vrele(uppervp);
439				else
440					vput(uppervp);
441				uppervp = NULLVP;
442			}
443			*ap->a_vpp = lowervp;
444			if (!lockparent)
445				cnp->cn_flags &= ~LOCKPARENT;
446			return (lerror);
447		}
448	} else {
449		lerror = ENOENT;
450		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
451			lowervp = LOWERVP(dun->un_pvp);
452			if (lowervp != NULLVP) {
453				VREF(lowervp);
454				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
455				lerror = 0;
456			}
457		}
458	}
459
460	if (!lockparent)
461		cnp->cn_flags &= ~LOCKPARENT;
462
463	/*
464	 * EJUSTRETURN is used by underlying filesystems to indicate that
465	 * a directory modification op was started successfully.
466	 * This will only happen in the upper layer, since
467	 * the lower layer only does LOOKUPs.
468	 * If this union is mounted read-only, bounce it now.
469	 */
470
471	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
472	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
473	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
474		uerror = EROFS;
475
476	/*
477	 * at this point, we have uerror and lerror indicating
478	 * possible errors with the lookups in the upper and lower
479	 * layers.  additionally, uppervp and lowervp are (locked)
480	 * references to existing vnodes in the upper and lower layers.
481	 *
482	 * there are now three cases to consider.
483	 * 1. if both layers returned an error, then return whatever
484	 *    error the upper layer generated.
485	 *
486	 * 2. if the top layer failed and the bottom layer succeeded
487	 *    then two subcases occur.
488	 *    a.  the bottom vnode is not a directory, in which
489	 *	  case just return a new union vnode referencing
490	 *	  an empty top layer and the existing bottom layer.
491	 *    b.  the bottom vnode is a directory, in which case
492	 *	  create a new directory in the top-level and
493	 *	  continue as in case 3.
494	 *
495	 * 3. if the top layer succeeded then return a new union
496	 *    vnode referencing whatever the new top layer and
497	 *    whatever the bottom layer returned.
498	 */
499
500	*ap->a_vpp = NULLVP;
501
502
503	/* case 1. */
504	if ((uerror != 0) && (lerror != 0)) {
505		return (uerror);
506	}
507
508	/* case 2. */
509	if (uerror != 0 /* && (lerror == 0) */ ) {
510		if (lowervp->v_type == VDIR) { /* case 2b. */
511			/*
512			 * We may be racing another process to make the
513			 * upper-level shadow directory.  Be careful with
514			 * locks/etc!
515			 */
516			dun->un_flags &= ~UN_ULOCK;
517			VOP_UNLOCK(upperdvp, 0);
518			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
519			vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
520			dun->un_flags |= UN_ULOCK;
521
522			if (uerror) {
523				if (lowervp != NULLVP) {
524					vput(lowervp);
525					lowervp = NULLVP;
526				}
527				return (uerror);
528			}
529		}
530	}
531
532	if (lowervp != NULLVP)
533		VOP_UNLOCK(lowervp, 0);
534
535	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
536			      uppervp, lowervp, 1);
537
538	if (error) {
539		if (uppervp != NULLVP)
540			vput(uppervp);
541		if (lowervp != NULLVP)
542			vrele(lowervp);
543	} else {
544		if (*ap->a_vpp != dvp)
545			if (!lockparent || !(cnp->cn_flags & ISLASTCN))
546				VOP_UNLOCK(dvp, 0);
547		if (cnp->cn_namelen == 1 &&
548		    cnp->cn_nameptr[0] == '.' &&
549		    *ap->a_vpp != dvp) {
550			panic("union_lookup -> . (%p) != startdir (%p)",
551			    ap->a_vpp, dvp);
552		}
553	}
554
555	return (error);
556}
557
558int
559union_create(v)
560	void *v;
561{
562	struct vop_create_args /* {
563		struct vnode *a_dvp;
564		struct vnode **a_vpp;
565		struct componentname *a_cnp;
566		struct vattr *a_vap;
567	} */ *ap = v;
568	struct union_node *un = VTOUNION(ap->a_dvp);
569	struct vnode *dvp = un->un_uppervp;
570	struct componentname *cnp = ap->a_cnp;
571
572	if (dvp != NULLVP) {
573		int error;
574		struct vnode *vp;
575		struct mount *mp;
576
577		FIXUP(un);
578
579		VREF(dvp);
580		un->un_flags |= UN_KLOCK;
581		mp = ap->a_dvp->v_mount;
582		vput(ap->a_dvp);
583		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
584		if (error)
585			return (error);
586
587		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
588				NULLVP, 1);
589		if (error)
590			vput(vp);
591		return (error);
592	}
593
594	vput(ap->a_dvp);
595	return (EROFS);
596}
597
598int
599union_whiteout(v)
600	void *v;
601{
602	struct vop_whiteout_args /* {
603		struct vnode *a_dvp;
604		struct componentname *a_cnp;
605		int a_flags;
606	} */ *ap = v;
607	struct union_node *un = VTOUNION(ap->a_dvp);
608	struct componentname *cnp = ap->a_cnp;
609
610	if (un->un_uppervp == NULLVP)
611		return (EOPNOTSUPP);
612
613	FIXUP(un);
614	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
615}
616
617int
618union_mknod(v)
619	void *v;
620{
621	struct vop_mknod_args /* {
622		struct vnode *a_dvp;
623		struct vnode **a_vpp;
624		struct componentname *a_cnp;
625		struct vattr *a_vap;
626	} */ *ap = v;
627	struct union_node *un = VTOUNION(ap->a_dvp);
628	struct vnode *dvp = un->un_uppervp;
629	struct componentname *cnp = ap->a_cnp;
630
631	if (dvp != NULLVP) {
632		int error;
633		struct vnode *vp;
634		struct mount *mp;
635
636		FIXUP(un);
637
638		VREF(dvp);
639		un->un_flags |= UN_KLOCK;
640		mp = ap->a_dvp->v_mount;
641		vput(ap->a_dvp);
642		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
643		if (error)
644			return (error);
645
646		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
647				      cnp, vp, NULLVP, 1);
648		if (error)
649		    vput(vp);
650		return (error);
651	}
652
653	vput(ap->a_dvp);
654	return (EROFS);
655}
656
657int
658union_open(v)
659	void *v;
660{
661	struct vop_open_args /* {
662		struct vnodeop_desc *a_desc;
663		struct vnode *a_vp;
664		int a_mode;
665		struct ucred *a_cred;
666		struct proc *a_p;
667	} */ *ap = v;
668	struct union_node *un = VTOUNION(ap->a_vp);
669	struct vnode *tvp;
670	int mode = ap->a_mode;
671	struct ucred *cred = ap->a_cred;
672	struct proc *p = ap->a_p;
673	int error;
674
675	/*
676	 * If there is an existing upper vp then simply open that.
677	 */
678	tvp = un->un_uppervp;
679	if (tvp == NULLVP) {
680		/*
681		 * If the lower vnode is being opened for writing, then
682		 * copy the file contents to the upper vnode and open that,
683		 * otherwise can simply open the lower vnode.
684		 */
685		tvp = un->un_lowervp;
686		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
687			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p);
688			if (error == 0)
689				error = VOP_OPEN(un->un_uppervp, mode, cred, p);
690			return (error);
691		}
692
693		/*
694		 * Just open the lower vnode, but check for nodev mount flag
695		 */
696		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
697		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
698			return ENXIO;
699		un->un_openl++;
700		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
701		error = VOP_OPEN(tvp, mode, cred, p);
702		VOP_UNLOCK(tvp, 0);
703
704		return (error);
705	}
706	/*
707	 * Just open the upper vnode, checking for nodev mount flag first
708	 */
709	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
710	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
711		return ENXIO;
712
713	FIXUP(un);
714
715	error = VOP_OPEN(tvp, mode, cred, p);
716
717	return (error);
718}
719
720int
721union_close(v)
722	void *v;
723{
724	struct vop_close_args /* {
725		struct vnode *a_vp;
726		int  a_fflag;
727		struct ucred *a_cred;
728		struct proc *a_p;
729	} */ *ap = v;
730	struct union_node *un = VTOUNION(ap->a_vp);
731	struct vnode *vp;
732
733	vp = un->un_uppervp;
734	if (vp == NULLVP) {
735#ifdef UNION_DIAGNOSTIC
736		if (un->un_openl <= 0)
737			panic("union: un_openl cnt");
738#endif
739		--un->un_openl;
740		vp = un->un_lowervp;
741	}
742
743#ifdef DIAGNOSTIC
744	if (vp == NULLVP) {
745		vprint("empty union vnode", vp);
746		panic("union_close empty vnode");
747	}
748#endif
749
750	ap->a_vp = vp;
751	return (VCALL(vp, VOFFSET(vop_close), ap));
752}
753
754/*
755 * Check access permission on the union vnode.
756 * The access check being enforced is to check
757 * against both the underlying vnode, and any
758 * copied vnode.  This ensures that no additional
759 * file permissions are given away simply because
760 * the user caused an implicit file copy.
761 */
762int
763union_access(v)
764	void *v;
765{
766	struct vop_access_args /* {
767		struct vnodeop_desc *a_desc;
768		struct vnode *a_vp;
769		int a_mode;
770		struct ucred *a_cred;
771		struct proc *a_p;
772	} */ *ap = v;
773	struct vnode *vp = ap->a_vp;
774	struct union_node *un = VTOUNION(vp);
775	int error = EACCES;
776	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
777
778	/*
779	 * Disallow write attempts on read-only file systems;
780	 * unless the file is a socket, fifo, or a block or
781	 * character device resident on the file system.
782	 */
783	if (ap->a_mode & VWRITE) {
784		switch (vp->v_type) {
785		case VDIR:
786		case VLNK:
787		case VREG:
788			if (vp->v_mount->mnt_flag & MNT_RDONLY)
789				return (EROFS);
790			break;
791		case VBAD:
792		case VBLK:
793		case VCHR:
794		case VSOCK:
795		case VFIFO:
796		case VNON:
797		default:
798			break;
799		}
800	}
801
802
803	if ((vp = un->un_uppervp) != NULLVP) {
804		FIXUP(un);
805		ap->a_vp = vp;
806		return (VCALL(vp, VOFFSET(vop_access), ap));
807	}
808
809	if ((vp = un->un_lowervp) != NULLVP) {
810		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
811		ap->a_vp = vp;
812		error = VCALL(vp, VOFFSET(vop_access), ap);
813		if (error == 0) {
814			if (um->um_op == UNMNT_BELOW) {
815				ap->a_cred = um->um_cred;
816				error = VCALL(vp, VOFFSET(vop_access), ap);
817			}
818		}
819		VOP_UNLOCK(vp, 0);
820		if (error)
821			return (error);
822	}
823
824	return (error);
825}
826
827/*
828 * We handle getattr only to change the fsid and
829 * track object sizes
830 */
831int
832union_getattr(v)
833	void *v;
834{
835	struct vop_getattr_args /* {
836		struct vnode *a_vp;
837		struct vattr *a_vap;
838		struct ucred *a_cred;
839		struct proc *a_p;
840	} */ *ap = v;
841	int error;
842	struct union_node *un = VTOUNION(ap->a_vp);
843	struct vnode *vp = un->un_uppervp;
844	struct vattr *vap;
845	struct vattr va;
846
847
848	/*
849	 * Some programs walk the filesystem hierarchy by counting
850	 * links to directories to avoid stat'ing all the time.
851	 * This means the link count on directories needs to be "correct".
852	 * The only way to do that is to call getattr on both layers
853	 * and fix up the link count.  The link count will not necessarily
854	 * be accurate but will be large enough to defeat the tree walkers.
855	 *
856	 * To make life more interesting, some filesystems don't keep
857	 * track of link counts in the expected way, and return a
858	 * link count of `1' for those directories; if either of the
859	 * component directories returns a link count of `1', we return a 1.
860	 */
861
862	vap = ap->a_vap;
863
864	vp = un->un_uppervp;
865	if (vp != NULLVP) {
866		/*
867		 * It's not clear whether VOP_GETATTR is to be
868		 * called with the vnode locked or not.  stat() calls
869		 * it with (vp) locked, and fstat calls it with
870		 * (vp) unlocked.
871		 * In the mean time, compensate here by checking
872		 * the union_node's lock flag.
873		 */
874		if (un->un_flags & UN_LOCKED)
875			FIXUP(un);
876
877		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
878		if (error)
879			return (error);
880		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
881	}
882
883	if (vp == NULLVP) {
884		vp = un->un_lowervp;
885	} else if (vp->v_type == VDIR) {
886		vp = un->un_lowervp;
887		if (vp != NULLVP)
888			vap = &va;
889	} else {
890		vp = NULLVP;
891	}
892
893	if (vp != NULLVP) {
894		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
895		if (error)
896			return (error);
897		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
898	}
899
900	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
901		/*
902		 * Link count manipulation:
903		 *	- If both return "2", return 2 (no subdirs)
904		 *	- If one or the other return "1", return "1" (ENOCLUE)
905		 */
906		if ((ap->a_vap->va_nlink == 2) &&
907		    (vap->va_nlink == 2))
908			;
909		else if (ap->a_vap->va_nlink != 1) {
910			if (vap->va_nlink == 1)
911				ap->a_vap->va_nlink = 1;
912			else
913				ap->a_vap->va_nlink += vap->va_nlink;
914		}
915	}
916	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
917	return (0);
918}
919
920int
921union_setattr(v)
922	void *v;
923{
924	struct vop_setattr_args /* {
925		struct vnode *a_vp;
926		struct vattr *a_vap;
927		struct ucred *a_cred;
928		struct proc *a_p;
929	} */ *ap = v;
930	struct vattr *vap = ap->a_vap;
931	struct vnode *vp = ap->a_vp;
932	struct union_node *un = VTOUNION(vp);
933	int error;
934
935  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
936	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
937	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
938	    (vp->v_mount->mnt_flag & MNT_RDONLY))
939		return (EROFS);
940	if (vap->va_size != VNOVAL) {
941 		switch (vp->v_type) {
942 		case VDIR:
943 			return (EISDIR);
944 		case VCHR:
945 		case VBLK:
946 		case VSOCK:
947 		case VFIFO:
948			break;
949		case VREG:
950		case VLNK:
951 		default:
952			/*
953			 * Disallow write attempts if the filesystem is
954			 * mounted read-only.
955			 */
956			if (vp->v_mount->mnt_flag & MNT_RDONLY)
957				return (EROFS);
958		}
959	}
960
961	/*
962	 * Handle case of truncating lower object to zero size,
963	 * by creating a zero length upper object.  This is to
964	 * handle the case of open with O_TRUNC and O_CREAT.
965	 */
966	if ((un->un_uppervp == NULLVP) &&
967	    /* assert(un->un_lowervp != NULLVP) */
968	    (un->un_lowervp->v_type == VREG)) {
969		error = union_copyup(un, (vap->va_size != 0),
970						ap->a_cred, ap->a_p);
971		if (error)
972			return (error);
973	}
974
975	/*
976	 * Try to set attributes in upper layer,
977	 * otherwise return read-only filesystem error.
978	 */
979	if (un->un_uppervp != NULLVP) {
980		FIXUP(un);
981		error = VOP_SETATTR(un->un_uppervp, vap,
982					ap->a_cred, ap->a_p);
983		if ((error == 0) && (vap->va_size != VNOVAL))
984			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
985	} else {
986		error = EROFS;
987	}
988
989	return (error);
990}
991
992int
993union_read(v)
994	void *v;
995{
996	struct vop_read_args /* {
997		struct vnode *a_vp;
998		struct uio *a_uio;
999		int  a_ioflag;
1000		struct ucred *a_cred;
1001	} */ *ap = v;
1002	int error;
1003	struct vnode *vp = OTHERVP(ap->a_vp);
1004	int dolock = (vp == LOWERVP(ap->a_vp));
1005
1006	if (dolock)
1007		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1008	else
1009		FIXUP(VTOUNION(ap->a_vp));
1010	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1011	if (dolock)
1012		VOP_UNLOCK(vp, 0);
1013
1014	/*
1015	 * XXX
1016	 * perhaps the size of the underlying object has changed under
1017	 * our feet.  take advantage of the offset information present
1018	 * in the uio structure.
1019	 */
1020	if (error == 0) {
1021		struct union_node *un = VTOUNION(ap->a_vp);
1022		off_t cur = ap->a_uio->uio_offset;
1023
1024		if (vp == un->un_uppervp) {
1025			if (cur > un->un_uppersz)
1026				union_newsize(ap->a_vp, cur, VNOVAL);
1027		} else {
1028			if (cur > un->un_lowersz)
1029				union_newsize(ap->a_vp, VNOVAL, cur);
1030		}
1031	}
1032
1033	return (error);
1034}
1035
1036int
1037union_write(v)
1038	void *v;
1039{
1040	struct vop_read_args /* {
1041		struct vnode *a_vp;
1042		struct uio *a_uio;
1043		int  a_ioflag;
1044		struct ucred *a_cred;
1045	} */ *ap = v;
1046	int error;
1047	struct vnode *vp;
1048	struct union_node *un = VTOUNION(ap->a_vp);
1049
1050	vp = UPPERVP(ap->a_vp);
1051	if (vp == NULLVP)
1052		panic("union: missing upper layer in write");
1053
1054	FIXUP(un);
1055	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1056
1057	/*
1058	 * the size of the underlying object may be changed by the
1059	 * write.
1060	 */
1061	if (error == 0) {
1062		off_t cur = ap->a_uio->uio_offset;
1063
1064		if (cur > un->un_uppersz)
1065			union_newsize(ap->a_vp, cur, VNOVAL);
1066	}
1067
1068	return (error);
1069}
1070
1071int
1072union_lease(v)
1073	void *v;
1074{
1075	struct vop_lease_args /* {
1076		struct vnode *a_vp;
1077		struct proc *a_p;
1078		struct ucred *a_cred;
1079		int a_flag;
1080	} */ *ap = v;
1081	struct vnode *ovp = OTHERVP(ap->a_vp);
1082
1083	ap->a_vp = ovp;
1084	return (VCALL(ovp, VOFFSET(vop_lease), ap));
1085}
1086
1087int
1088union_ioctl(v)
1089	void *v;
1090{
1091	struct vop_ioctl_args /* {
1092		struct vnode *a_vp;
1093		int  a_command;
1094		void *a_data;
1095		int  a_fflag;
1096		struct ucred *a_cred;
1097		struct proc *a_p;
1098	} */ *ap = v;
1099	struct vnode *ovp = OTHERVP(ap->a_vp);
1100
1101	ap->a_vp = ovp;
1102	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1103}
1104
1105int
1106union_poll(v)
1107	void *v;
1108{
1109	struct vop_poll_args /* {
1110		struct vnode *a_vp;
1111		int a_events;
1112		struct proc *a_p;
1113	} */ *ap = v;
1114	struct vnode *ovp = OTHERVP(ap->a_vp);
1115
1116	ap->a_vp = ovp;
1117	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1118}
1119
1120int
1121union_revoke(v)
1122	void *v;
1123{
1124	struct vop_revoke_args /* {
1125		struct vnode *a_vp;
1126		int a_flags;
1127		struct proc *a_p;
1128	} */ *ap = v;
1129	struct vnode *vp = ap->a_vp;
1130
1131	if (UPPERVP(vp))
1132		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1133	if (LOWERVP(vp))
1134		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1135	vgone(vp);
1136	return (0);
1137}
1138
1139int
1140union_mmap(v)
1141	void *v;
1142{
1143	struct vop_mmap_args /* {
1144		struct vnode *a_vp;
1145		int  a_fflags;
1146		struct ucred *a_cred;
1147		struct proc *a_p;
1148	} */ *ap = v;
1149	struct vnode *ovp = OTHERVP(ap->a_vp);
1150
1151	ap->a_vp = ovp;
1152	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1153}
1154
1155int
1156union_fsync(v)
1157	void *v;
1158{
1159	struct vop_fsync_args /* {
1160		struct vnode *a_vp;
1161		struct ucred *a_cred;
1162		int  a_flags;
1163		off_t offhi;
1164		off_t offlo;
1165		struct proc *a_p;
1166	} */ *ap = v;
1167	int error = 0;
1168	struct proc *p;
1169	struct vnode *targetvp;
1170
1171	/*
1172	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1173	 * bother syncing the underlying vnodes, since (a) they'll be
1174	 * fsync'ed when reclaimed and (b) we could deadlock if
1175	 * they're locked; otherwise, pass it through to the
1176	 * underlying layer.
1177	 */
1178	if (ap->a_flags & FSYNC_RECLAIM)
1179		return 0;
1180
1181	targetvp = OTHERVP(ap->a_vp);
1182	p = ap->a_p;
1183
1184	if (targetvp != NULLVP) {
1185		int dolock = (targetvp == LOWERVP(ap->a_vp));
1186
1187		if (dolock)
1188			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1189		else
1190			FIXUP(VTOUNION(ap->a_vp));
1191		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1192			    ap->a_offlo, ap->a_offhi, p);
1193		if (dolock)
1194			VOP_UNLOCK(targetvp, 0);
1195	}
1196
1197	return (error);
1198}
1199
1200int
1201union_seek(v)
1202	void *v;
1203{
1204	struct vop_seek_args /* {
1205		struct vnode *a_vp;
1206		off_t  a_oldoff;
1207		off_t  a_newoff;
1208		struct ucred *a_cred;
1209	} */ *ap = v;
1210	struct vnode *ovp = OTHERVP(ap->a_vp);
1211
1212	ap->a_vp = ovp;
1213	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1214}
1215
1216int
1217union_remove(v)
1218	void *v;
1219{
1220	struct vop_remove_args /* {
1221		struct vnode *a_dvp;
1222		struct vnode *a_vp;
1223		struct componentname *a_cnp;
1224	} */ *ap = v;
1225	int error;
1226	struct union_node *dun = VTOUNION(ap->a_dvp);
1227	struct union_node *un = VTOUNION(ap->a_vp);
1228	struct componentname *cnp = ap->a_cnp;
1229
1230	if (dun->un_uppervp == NULLVP)
1231		panic("union remove: null upper vnode");
1232
1233	if (un->un_uppervp != NULLVP) {
1234		struct vnode *dvp = dun->un_uppervp;
1235		struct vnode *vp = un->un_uppervp;
1236
1237		FIXUP(dun);
1238		VREF(dvp);
1239		dun->un_flags |= UN_KLOCK;
1240		vput(ap->a_dvp);
1241		FIXUP(un);
1242		VREF(vp);
1243		un->un_flags |= UN_KLOCK;
1244		vput(ap->a_vp);
1245
1246		if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc))
1247			cnp->cn_flags |= DOWHITEOUT;
1248		error = VOP_REMOVE(dvp, vp, cnp);
1249		if (!error)
1250			union_removed_upper(un);
1251	} else {
1252		FIXUP(dun);
1253		error = union_mkwhiteout(
1254			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1255			dun->un_uppervp, ap->a_cnp, un->un_path);
1256		vput(ap->a_dvp);
1257		vput(ap->a_vp);
1258	}
1259
1260	return (error);
1261}
1262
1263int
1264union_link(v)
1265	void *v;
1266{
1267	struct vop_link_args /* {
1268		struct vnode *a_dvp;
1269		struct vnode *a_vp;
1270		struct componentname *a_cnp;
1271	} */ *ap = v;
1272	int error = 0;
1273	struct componentname *cnp = ap->a_cnp;
1274	struct proc *p = cnp->cn_proc;
1275	struct union_node *dun;
1276	struct vnode *vp;
1277	struct vnode *dvp;
1278
1279	dun = VTOUNION(ap->a_dvp);
1280
1281#ifdef DIAGNOSTIC
1282	if (!(ap->a_cnp->cn_flags & LOCKPARENT)) {
1283		printf("union_link called without LOCKPARENT set!\n");
1284		error = EIO; /* need some error code for "caller is a bozo" */
1285	} else
1286#endif
1287
1288
1289	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1290		vp = ap->a_vp;
1291	} else {
1292		struct union_node *un = VTOUNION(ap->a_vp);
1293		if (un->un_uppervp == NULLVP) {
1294			/*
1295			 * Needs to be copied before we can link it.
1296			 */
1297			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1298			if (dun->un_uppervp == un->un_dirvp) {
1299				dun->un_flags &= ~UN_ULOCK;
1300				VOP_UNLOCK(dun->un_uppervp, 0);
1301			}
1302			error = union_copyup(un, 1, cnp->cn_cred, p);
1303			if (dun->un_uppervp == un->un_dirvp) {
1304				/*
1305				 * During copyup, we dropped the lock on the
1306				 * dir and invalidated any saved namei lookup
1307				 * state for the directory we'll be entering
1308				 * the link in.  We need to re-run the lookup
1309				 * in that directory to reset any state needed
1310				 * for VOP_LINK.
1311				 * Call relookup on the union-layer to reset
1312				 * the state.
1313				 */
1314				vp  = NULLVP;
1315				if (dun->un_uppervp == NULLVP)
1316					 panic("union: null upperdvp?");
1317				/*
1318				 * relookup starts with an unlocked node,
1319				 * and since LOCKPARENT is set returns
1320				 * the starting directory locked.
1321				 */
1322				VOP_UNLOCK(ap->a_dvp, 0);
1323				error = relookup(ap->a_dvp, &vp, ap->a_cnp);
1324				if (error) {
1325					vrele(ap->a_dvp);
1326					VOP_UNLOCK(ap->a_vp, 0);
1327					return EROFS;	/* ? */
1328				}
1329				if (vp != NULLVP) {
1330					/*
1331					 * The name we want to create has
1332					 * mysteriously appeared (a race?)
1333					 */
1334					error = EEXIST;
1335					VOP_UNLOCK(ap->a_vp, 0);
1336					goto croak;
1337				}
1338			}
1339			VOP_UNLOCK(ap->a_vp, 0);
1340		}
1341		vp = un->un_uppervp;
1342	}
1343
1344	dvp = dun->un_uppervp;
1345	if (dvp == NULLVP)
1346		error = EROFS;
1347
1348	if (error) {
1349croak:
1350		vput(ap->a_dvp);
1351		return (error);
1352	}
1353
1354	FIXUP(dun);
1355	VREF(dvp);
1356	dun->un_flags |= UN_KLOCK;
1357	vput(ap->a_dvp);
1358
1359	return (VOP_LINK(dvp, vp, cnp));
1360}
1361
1362int
1363union_rename(v)
1364	void *v;
1365{
1366	struct vop_rename_args  /* {
1367		struct vnode *a_fdvp;
1368		struct vnode *a_fvp;
1369		struct componentname *a_fcnp;
1370		struct vnode *a_tdvp;
1371		struct vnode *a_tvp;
1372		struct componentname *a_tcnp;
1373	} */ *ap = v;
1374	int error;
1375
1376	struct vnode *fdvp = ap->a_fdvp;
1377	struct vnode *fvp = ap->a_fvp;
1378	struct vnode *tdvp = ap->a_tdvp;
1379	struct vnode *tvp = ap->a_tvp;
1380
1381	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1382		struct union_node *un = VTOUNION(fdvp);
1383		if (un->un_uppervp == NULLVP) {
1384			/*
1385			 * this should never happen in normal
1386			 * operation but might if there was
1387			 * a problem creating the top-level shadow
1388			 * directory.
1389			 */
1390			error = EXDEV;
1391			goto bad;
1392		}
1393
1394		fdvp = un->un_uppervp;
1395		VREF(fdvp);
1396		vrele(ap->a_fdvp);
1397	}
1398
1399	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1400		struct union_node *un = VTOUNION(fvp);
1401		if (un->un_uppervp == NULLVP) {
1402			/* XXX: should do a copyup */
1403			error = EXDEV;
1404			goto bad;
1405		}
1406
1407		if (un->un_lowervp != NULLVP)
1408			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1409
1410		fvp = un->un_uppervp;
1411		VREF(fvp);
1412		vrele(ap->a_fvp);
1413	}
1414
1415	if (tdvp->v_op == union_vnodeop_p) {
1416		struct union_node *un = VTOUNION(tdvp);
1417		if (un->un_uppervp == NULLVP) {
1418			/*
1419			 * this should never happen in normal
1420			 * operation but might if there was
1421			 * a problem creating the top-level shadow
1422			 * directory.
1423			 */
1424			error = EXDEV;
1425			goto bad;
1426		}
1427
1428		tdvp = un->un_uppervp;
1429		VREF(tdvp);
1430		un->un_flags |= UN_KLOCK;
1431		vput(ap->a_tdvp);
1432	}
1433
1434	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1435		struct union_node *un = VTOUNION(tvp);
1436
1437		tvp = un->un_uppervp;
1438		if (tvp != NULLVP) {
1439			VREF(tvp);
1440			un->un_flags |= UN_KLOCK;
1441		}
1442		vput(ap->a_tvp);
1443	}
1444
1445	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
1446
1447bad:
1448	vrele(fdvp);
1449	vrele(fvp);
1450	vput(tdvp);
1451	if (tvp != NULLVP)
1452		vput(tvp);
1453
1454	return (error);
1455}
1456
1457int
1458union_mkdir(v)
1459	void *v;
1460{
1461	struct vop_mkdir_args /* {
1462		struct vnode *a_dvp;
1463		struct vnode **a_vpp;
1464		struct componentname *a_cnp;
1465		struct vattr *a_vap;
1466	} */ *ap = v;
1467	struct union_node *un = VTOUNION(ap->a_dvp);
1468	struct vnode *dvp = un->un_uppervp;
1469	struct componentname *cnp = ap->a_cnp;
1470
1471	if (dvp != NULLVP) {
1472		int error;
1473		struct vnode *vp;
1474
1475		FIXUP(un);
1476		VREF(dvp);
1477		un->un_flags |= UN_KLOCK;
1478		VOP_UNLOCK(ap->a_dvp, 0);
1479		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1480		if (error) {
1481			vrele(ap->a_dvp);
1482			return (error);
1483		}
1484
1485		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1486				NULLVP, cnp, vp, NULLVP, 1);
1487		vrele(ap->a_dvp);
1488		if (error)
1489			vput(vp);
1490		return (error);
1491	}
1492
1493	vput(ap->a_dvp);
1494	return (EROFS);
1495}
1496
1497int
1498union_rmdir(v)
1499	void *v;
1500{
1501	struct vop_rmdir_args /* {
1502		struct vnode *a_dvp;
1503		struct vnode *a_vp;
1504		struct componentname *a_cnp;
1505	} */ *ap = v;
1506	int error;
1507	struct union_node *dun = VTOUNION(ap->a_dvp);
1508	struct union_node *un = VTOUNION(ap->a_vp);
1509	struct componentname *cnp = ap->a_cnp;
1510
1511	if (dun->un_uppervp == NULLVP)
1512		panic("union rmdir: null upper vnode");
1513
1514	if (un->un_uppervp != NULLVP) {
1515		struct vnode *dvp = dun->un_uppervp;
1516		struct vnode *vp = un->un_uppervp;
1517
1518		FIXUP(dun);
1519		VREF(dvp);
1520		dun->un_flags |= UN_KLOCK;
1521		vput(ap->a_dvp);
1522		FIXUP(un);
1523		VREF(vp);
1524		un->un_flags |= UN_KLOCK;
1525		vput(ap->a_vp);
1526
1527		if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc))
1528			cnp->cn_flags |= DOWHITEOUT;
1529		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1530		if (!error)
1531			union_removed_upper(un);
1532	} else {
1533		FIXUP(dun);
1534		error = union_mkwhiteout(
1535			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1536			dun->un_uppervp, ap->a_cnp, un->un_path);
1537		vput(ap->a_dvp);
1538		vput(ap->a_vp);
1539	}
1540
1541	return (error);
1542}
1543
1544int
1545union_symlink(v)
1546	void *v;
1547{
1548	struct vop_symlink_args /* {
1549		struct vnode *a_dvp;
1550		struct vnode **a_vpp;
1551		struct componentname *a_cnp;
1552		struct vattr *a_vap;
1553		char *a_target;
1554	} */ *ap = v;
1555	struct union_node *un = VTOUNION(ap->a_dvp);
1556	struct vnode *dvp = un->un_uppervp;
1557	struct componentname *cnp = ap->a_cnp;
1558
1559	if (dvp != NULLVP) {
1560		int error;
1561
1562		FIXUP(un);
1563		VREF(dvp);
1564		un->un_flags |= UN_KLOCK;
1565		vput(ap->a_dvp);
1566		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1567				    ap->a_target);
1568		return (error);
1569	}
1570
1571	vput(ap->a_dvp);
1572	return (EROFS);
1573}
1574
1575/*
1576 * union_readdir works in concert with getdirentries and
1577 * readdir(3) to provide a list of entries in the unioned
1578 * directories.  getdirentries is responsible for walking
1579 * down the union stack.  readdir(3) is responsible for
1580 * eliminating duplicate names from the returned data stream.
1581 */
1582int
1583union_readdir(v)
1584	void *v;
1585{
1586	struct vop_readdir_args /* {
1587		struct vnodeop_desc *a_desc;
1588		struct vnode *a_vp;
1589		struct uio *a_uio;
1590		struct ucred *a_cred;
1591		int *a_eofflag;
1592		u_long *a_cookies;
1593		int a_ncookies;
1594	} */ *ap = v;
1595	struct union_node *un = VTOUNION(ap->a_vp);
1596	struct vnode *uvp = un->un_uppervp;
1597
1598	if (uvp == NULLVP)
1599		return (0);
1600
1601	FIXUP(un);
1602	ap->a_vp = uvp;
1603	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1604}
1605
1606int
1607union_readlink(v)
1608	void *v;
1609{
1610	struct vop_readlink_args /* {
1611		struct vnode *a_vp;
1612		struct uio *a_uio;
1613		struct ucred *a_cred;
1614	} */ *ap = v;
1615	int error;
1616	struct vnode *vp = OTHERVP(ap->a_vp);
1617	int dolock = (vp == LOWERVP(ap->a_vp));
1618
1619	if (dolock)
1620		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1621	else
1622		FIXUP(VTOUNION(ap->a_vp));
1623	ap->a_vp = vp;
1624	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1625	if (dolock)
1626		VOP_UNLOCK(vp, 0);
1627
1628	return (error);
1629}
1630
1631int
1632union_abortop(v)
1633	void *v;
1634{
1635	struct vop_abortop_args /* {
1636		struct vnode *a_dvp;
1637		struct componentname *a_cnp;
1638	} */ *ap = v;
1639	int error;
1640	struct vnode *vp = OTHERVP(ap->a_dvp);
1641	struct union_node *un = VTOUNION(ap->a_dvp);
1642	int islocked = un->un_flags & UN_LOCKED;
1643	int dolock = (vp == LOWERVP(ap->a_dvp));
1644
1645	if (islocked) {
1646		if (dolock)
1647			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1648		else
1649			FIXUP(VTOUNION(ap->a_dvp));
1650	}
1651	ap->a_dvp = vp;
1652	error = VCALL(vp, VOFFSET(vop_abortop), ap);
1653	if (islocked && dolock)
1654		VOP_UNLOCK(vp, 0);
1655
1656	return (error);
1657}
1658
1659int
1660union_inactive(v)
1661	void *v;
1662{
1663	struct vop_inactive_args /* {
1664		struct vnode *a_vp;
1665		struct proc *a_p;
1666	} */ *ap = v;
1667	struct vnode *vp = ap->a_vp;
1668	struct union_node *un = VTOUNION(vp);
1669	struct vnode **vpp;
1670
1671	/*
1672	 * Do nothing (and _don't_ bypass).
1673	 * Wait to vrele lowervp until reclaim,
1674	 * so that until then our union_node is in the
1675	 * cache and reusable.
1676	 *
1677	 * NEEDSWORK: Someday, consider inactive'ing
1678	 * the lowervp and then trying to reactivate it
1679	 * with capabilities (v_id)
1680	 * like they do in the name lookup cache code.
1681	 * That's too much work for now.
1682	 */
1683
1684	if (un->un_dircache != 0) {
1685		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1686			vrele(*vpp);
1687		free(un->un_dircache, M_TEMP);
1688		un->un_dircache = 0;
1689	}
1690
1691	VOP_UNLOCK(vp, 0);
1692
1693	if ((un->un_flags & UN_CACHED) == 0)
1694		vgone(vp);
1695
1696	return (0);
1697}
1698
1699int
1700union_reclaim(v)
1701	void *v;
1702{
1703	struct vop_reclaim_args /* {
1704		struct vnode *a_vp;
1705	} */ *ap = v;
1706
1707	union_freevp(ap->a_vp);
1708
1709	return (0);
1710}
1711
1712int
1713union_lock(v)
1714	void *v;
1715{
1716	struct vop_lock_args /* {
1717		struct vnode *a_vp;
1718		int a_flags;
1719	} */ *ap = v;
1720	struct vnode *vp = ap->a_vp;
1721	int flags = ap->a_flags;
1722	struct union_node *un;
1723	int error;
1724#ifdef DIAGNOSTIC
1725	int drain = 0;
1726#endif
1727
1728	genfs_nolock(ap);
1729	/*
1730	 * Need to do real lockmgr-style locking here.
1731	 * in the mean time, draining won't work quite right,
1732	 * which could lead to a few race conditions.
1733	 * the following test was here, but is not quite right, we
1734	 * still need to take the lock:
1735	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1736		return (0);
1737	 */
1738	flags &= ~LK_INTERLOCK;
1739
1740	un = VTOUNION(vp);
1741#ifdef DIAGNOSTIC
1742	if (un->un_flags & (UN_DRAINING|UN_DRAINED)) {
1743		if (un->un_flags & UN_DRAINED)
1744			panic("union: %p: warning: locking decommissioned lock", vp);
1745		if ((flags & LK_TYPE_MASK) != LK_RELEASE)
1746			panic("union: %p: non-release on draining lock: %d",
1747			    vp, flags & LK_TYPE_MASK);
1748		un->un_flags &= ~UN_DRAINING;
1749		if ((flags & LK_REENABLE) == 0)
1750			un->un_flags |= UN_DRAINED;
1751	}
1752#endif
1753
1754	/*
1755	 * Don't pass DRAIN through to sub-vnode lock; keep track of
1756	 * DRAIN state at this level, and just get an exclusive lock
1757	 * on the underlying vnode.
1758	 */
1759	if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
1760#ifdef DIAGNOSTIC
1761		drain = 1;
1762#endif
1763		flags = LK_EXCLUSIVE | (flags & ~LK_TYPE_MASK);
1764	}
1765start:
1766	un = VTOUNION(vp);
1767
1768	if (un->un_uppervp != NULLVP) {
1769		if (((un->un_flags & UN_ULOCK) == 0) &&
1770		    (vp->v_usecount != 0)) {
1771			/*
1772			 * We MUST always use the order of: take upper
1773			 * vp lock, manipulate union node flags, drop
1774			 * upper vp lock.  This code must not be an
1775			 */
1776			error = vn_lock(un->un_uppervp, flags);
1777			if (error)
1778				return (error);
1779			un->un_flags |= UN_ULOCK;
1780		}
1781#ifdef DIAGNOSTIC
1782		if (un->un_flags & UN_KLOCK) {
1783			vprint("union: dangling klock", vp);
1784			panic("union: dangling upper lock (%p)", vp);
1785		}
1786#endif
1787	}
1788
1789	/* XXX ignores LK_NOWAIT */
1790	if (un->un_flags & UN_LOCKED) {
1791#ifdef DIAGNOSTIC
1792		if (curproc && un->un_pid == curproc->p_pid &&
1793			    un->un_pid > -1 && curproc->p_pid > -1)
1794			panic("union: locking against myself");
1795#endif
1796		un->un_flags |= UN_WANTED;
1797		tsleep(&un->un_flags, PINOD, "unionlk2", 0);
1798		goto start;
1799	}
1800
1801#ifdef DIAGNOSTIC
1802	if (curproc)
1803		un->un_pid = curproc->p_pid;
1804	else
1805		un->un_pid = -1;
1806	if (drain)
1807		un->un_flags |= UN_DRAINING;
1808#endif
1809
1810	un->un_flags |= UN_LOCKED;
1811	return (0);
1812}
1813
1814/*
1815 * When operations want to vput() a union node yet retain a lock on
1816 * the upper vnode (say, to do some further operations like link(),
1817 * mkdir(), ...), they set UN_KLOCK on the union node, then call
1818 * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1819 * unlocks the union node (leaving the upper vnode alone), clears the
1820 * KLOCK flag, and then returns to vput().  The caller then does whatever
1821 * is left to do with the upper vnode, and ensures that it gets unlocked.
1822 *
1823 * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1824 */
1825int
1826union_unlock(v)
1827	void *v;
1828{
1829	struct vop_unlock_args /* {
1830		struct vnode *a_vp;
1831		int a_flags;
1832	} */ *ap = v;
1833	struct union_node *un = VTOUNION(ap->a_vp);
1834
1835#ifdef DIAGNOSTIC
1836	if ((un->un_flags & UN_LOCKED) == 0)
1837		panic("union: unlock unlocked node");
1838	if (curproc && un->un_pid != curproc->p_pid &&
1839			curproc->p_pid > -1 && un->un_pid > -1)
1840		panic("union: unlocking other process's union node");
1841	if (un->un_flags & UN_DRAINED)
1842		panic("union: %p: warning: unlocking decommissioned lock", ap->a_vp);
1843#endif
1844
1845	un->un_flags &= ~UN_LOCKED;
1846
1847	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1848		VOP_UNLOCK(un->un_uppervp, 0);
1849
1850	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1851
1852	if (un->un_flags & UN_WANTED) {
1853		un->un_flags &= ~UN_WANTED;
1854		wakeup( &un->un_flags);
1855	}
1856
1857#ifdef DIAGNOSTIC
1858	un->un_pid = 0;
1859	if (un->un_flags & UN_DRAINING) {
1860		un->un_flags |= UN_DRAINED;
1861		un->un_flags &= ~UN_DRAINING;
1862	}
1863#endif
1864	genfs_nounlock(ap);
1865
1866	return (0);
1867}
1868
1869int
1870union_bmap(v)
1871	void *v;
1872{
1873	struct vop_bmap_args /* {
1874		struct vnode *a_vp;
1875		daddr_t  a_bn;
1876		struct vnode **a_vpp;
1877		daddr_t *a_bnp;
1878		int *a_runp;
1879	} */ *ap = v;
1880	int error;
1881	struct vnode *vp = OTHERVP(ap->a_vp);
1882	int dolock = (vp == LOWERVP(ap->a_vp));
1883
1884	if (dolock)
1885		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1886	else
1887		FIXUP(VTOUNION(ap->a_vp));
1888	ap->a_vp = vp;
1889	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1890	if (dolock)
1891		VOP_UNLOCK(vp, 0);
1892
1893	return (error);
1894}
1895
1896int
1897union_print(v)
1898	void *v;
1899{
1900	struct vop_print_args /* {
1901		struct vnode *a_vp;
1902	} */ *ap = v;
1903	struct vnode *vp = ap->a_vp;
1904
1905	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1906			vp, UPPERVP(vp), LOWERVP(vp));
1907	if (UPPERVP(vp) != NULLVP)
1908		vprint("union: upper", UPPERVP(vp));
1909	if (LOWERVP(vp) != NULLVP)
1910		vprint("union: lower", LOWERVP(vp));
1911	if (VTOUNION(vp)->un_dircache) {
1912		struct vnode **vpp;
1913		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1914			vprint("dircache:", *vpp);
1915	}
1916
1917	return (0);
1918}
1919
1920int
1921union_islocked(v)
1922	void *v;
1923{
1924	struct vop_islocked_args /* {
1925		struct vnode *a_vp;
1926	} */ *ap = v;
1927
1928	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
1929}
1930
1931int
1932union_pathconf(v)
1933	void *v;
1934{
1935	struct vop_pathconf_args /* {
1936		struct vnode *a_vp;
1937		int a_name;
1938		int *a_retval;
1939	} */ *ap = v;
1940	int error;
1941	struct vnode *vp = OTHERVP(ap->a_vp);
1942	int dolock = (vp == LOWERVP(ap->a_vp));
1943
1944	if (dolock)
1945		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1946	else
1947		FIXUP(VTOUNION(ap->a_vp));
1948	ap->a_vp = vp;
1949	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1950	if (dolock)
1951		VOP_UNLOCK(vp, 0);
1952
1953	return (error);
1954}
1955
1956int
1957union_advlock(v)
1958	void *v;
1959{
1960	struct vop_advlock_args /* {
1961		struct vnode *a_vp;
1962		void *a_id;
1963		int  a_op;
1964		struct flock *a_fl;
1965		int  a_flags;
1966	} */ *ap = v;
1967	struct vnode *ovp = OTHERVP(ap->a_vp);
1968
1969	ap->a_vp = ovp;
1970	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1971}
1972
1973
1974/*
1975 * XXX - vop_strategy must be hand coded because it has no
1976 * vnode in its arguments.
1977 * This goes away with a merged VM/buffer cache.
1978 */
1979int
1980union_strategy(v)
1981	void *v;
1982{
1983	struct vop_strategy_args /* {
1984		struct vnode *a_vp;
1985		struct buf *a_bp;
1986	} */ *ap = v;
1987	struct vnode *ovp = OTHERVP(ap->a_vp);
1988	struct buf *bp = ap->a_bp;
1989
1990#ifdef DIAGNOSTIC
1991	if (ovp == NULLVP)
1992		panic("union_strategy: nil vp");
1993	if (((bp->b_flags & B_READ) == 0) &&
1994	    (ovp == LOWERVP(bp->b_vp)))
1995		panic("union_strategy: writing to lowervp");
1996#endif
1997
1998	return (VOP_STRATEGY(ovp, bp));
1999}
2000
2001int
2002union_getpages(v)
2003	void *v;
2004{
2005	struct vop_getpages_args /* {
2006		struct vnode *a_vp;
2007		voff_t a_offset;
2008		struct vm_page **a_m;
2009		int *a_count;
2010		int a_centeridx;
2011		vm_prot_t a_access_type;
2012		int a_advice;
2013		int a_flags;
2014	} */ *ap = v;
2015	struct vnode *vp = ap->a_vp;
2016	int error;
2017
2018	/*
2019	 * just pass the request on to the underlying layer.
2020	 */
2021
2022	if (ap->a_flags & PGO_LOCKED) {
2023		return EBUSY;
2024	}
2025	ap->a_vp = OTHERVP(vp);
2026	simple_unlock(&vp->v_interlock);
2027	simple_lock(&ap->a_vp->v_interlock);
2028	error = VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
2029	return error;
2030}
2031
2032int
2033union_putpages(v)
2034	void *v;
2035{
2036	struct vop_putpages_args /* {
2037		struct vnode *a_vp;
2038		voff_t a_offlo;
2039		voff_t a_offhi;
2040		int a_flags;
2041	} */ *ap = v;
2042	struct vnode *vp = ap->a_vp;
2043	int error;
2044
2045	/*
2046	 * just pass the request on to the underlying layer.
2047	 */
2048
2049	ap->a_vp = OTHERVP(vp);
2050	simple_unlock(&vp->v_interlock);
2051	simple_lock(&ap->a_vp->v_interlock);
2052	error = VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
2053	return error;
2054}
2055
2056int
2057union_kqfilter(void *v)
2058{
2059	struct vop_kqfilter_args /* {
2060		struct vnode	*a_vp;
2061		struct knote	*a_kn;
2062	} */ *ap = v;
2063	int error;
2064
2065	/*
2066	 * We watch either the upper layer file (if it already exists),
2067	 * or the lower layer one. If there is lower layer file only
2068	 * at this moment, we will keep watching that lower layer file
2069	 * even if upper layer file would be created later on.
2070	 */
2071	if (UPPERVP(ap->a_vp))
2072		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
2073	else if (LOWERVP(ap->a_vp))
2074		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
2075	else {
2076		/* panic? */
2077		error = EOPNOTSUPP;
2078	}
2079
2080	return (error);
2081}
2082