union_vnops.c revision 1.40
1/*	$NetBSD: union_vnops.c,v 1.40 2011/06/12 03:35:55 rmind Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.40 2011/06/12 03:35:55 rmind Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94
95int union_lookup(void *);
96int union_create(void *);
97int union_whiteout(void *);
98int union_mknod(void *);
99int union_open(void *);
100int union_close(void *);
101int union_access(void *);
102int union_getattr(void *);
103int union_setattr(void *);
104int union_read(void *);
105int union_write(void *);
106int union_ioctl(void *);
107int union_poll(void *);
108int union_revoke(void *);
109int union_mmap(void *);
110int union_fsync(void *);
111int union_seek(void *);
112int union_remove(void *);
113int union_link(void *);
114int union_rename(void *);
115int union_mkdir(void *);
116int union_rmdir(void *);
117int union_symlink(void *);
118int union_readdir(void *);
119int union_readlink(void *);
120int union_abortop(void *);
121int union_inactive(void *);
122int union_reclaim(void *);
123int union_lock(void *);
124int union_unlock(void *);
125int union_bmap(void *);
126int union_print(void *);
127int union_islocked(void *);
128int union_pathconf(void *);
129int union_advlock(void *);
130int union_strategy(void *);
131int union_getpages(void *);
132int union_putpages(void *);
133int union_kqfilter(void *);
134
135static void union_fixup(struct union_node *);
136static int union_lookup1(struct vnode *, struct vnode **,
137			      struct vnode **, struct componentname *);
138
139
140/*
141 * Global vfs data structures
142 */
143int (**union_vnodeop_p)(void *);
144const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
145	{ &vop_default_desc, vn_default_error },
146	{ &vop_lookup_desc, union_lookup },		/* lookup */
147	{ &vop_create_desc, union_create },		/* create */
148	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
149	{ &vop_mknod_desc, union_mknod },		/* mknod */
150	{ &vop_open_desc, union_open },			/* open */
151	{ &vop_close_desc, union_close },		/* close */
152	{ &vop_access_desc, union_access },		/* access */
153	{ &vop_getattr_desc, union_getattr },		/* getattr */
154	{ &vop_setattr_desc, union_setattr },		/* setattr */
155	{ &vop_read_desc, union_read },			/* read */
156	{ &vop_write_desc, union_write },		/* write */
157	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
158	{ &vop_poll_desc, union_poll },			/* select */
159	{ &vop_revoke_desc, union_revoke },		/* revoke */
160	{ &vop_mmap_desc, union_mmap },			/* mmap */
161	{ &vop_fsync_desc, union_fsync },		/* fsync */
162	{ &vop_seek_desc, union_seek },			/* seek */
163	{ &vop_remove_desc, union_remove },		/* remove */
164	{ &vop_link_desc, union_link },			/* link */
165	{ &vop_rename_desc, union_rename },		/* rename */
166	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
167	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
168	{ &vop_symlink_desc, union_symlink },		/* symlink */
169	{ &vop_readdir_desc, union_readdir },		/* readdir */
170	{ &vop_readlink_desc, union_readlink },		/* readlink */
171	{ &vop_abortop_desc, union_abortop },		/* abortop */
172	{ &vop_inactive_desc, union_inactive },		/* inactive */
173	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
174	{ &vop_lock_desc, union_lock },			/* lock */
175	{ &vop_unlock_desc, union_unlock },		/* unlock */
176	{ &vop_bmap_desc, union_bmap },			/* bmap */
177	{ &vop_strategy_desc, union_strategy },		/* strategy */
178	{ &vop_print_desc, union_print },		/* print */
179	{ &vop_islocked_desc, union_islocked },		/* islocked */
180	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
181	{ &vop_advlock_desc, union_advlock },		/* advlock */
182	{ &vop_getpages_desc, union_getpages },		/* getpages */
183	{ &vop_putpages_desc, union_putpages },		/* putpages */
184	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
185#ifdef notdef
186	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
187#endif
188	{ NULL, NULL }
189};
190const struct vnodeopv_desc union_vnodeop_opv_desc =
191	{ &union_vnodeop_p, union_vnodeop_entries };
192
193#define FIXUP(un) { \
194	if (((un)->un_flags & UN_ULOCK) == 0) { \
195		union_fixup(un); \
196	} \
197}
198
199static void
200union_fixup(struct union_node *un)
201{
202
203	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY);
204	un->un_flags |= UN_ULOCK;
205}
206
207static int
208union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
209	struct componentname *cnp)
210{
211	int error;
212	struct vnode *tdvp;
213	struct vnode *dvp;
214	struct mount *mp;
215
216	dvp = *dvpp;
217
218	/*
219	 * If stepping up the directory tree, check for going
220	 * back across the mount point, in which case do what
221	 * lookup would do by stepping back down the mount
222	 * hierarchy.
223	 */
224	if (cnp->cn_flags & ISDOTDOT) {
225		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
226			/*
227			 * Don't do the NOCROSSMOUNT check
228			 * at this level.  By definition,
229			 * union fs deals with namespaces, not
230			 * filesystems.
231			 */
232			tdvp = dvp;
233			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
234			VOP_UNLOCK(tdvp);
235			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
236		}
237	}
238
239        error = VOP_LOOKUP(dvp, &tdvp, cnp);
240	if (error)
241		return (error);
242
243	dvp = tdvp;
244
245	/*
246	 * Lastly check if the current node is a mount point in
247	 * which case walk up the mount hierarchy making sure not to
248	 * bump into the root of the mount tree (ie. dvp != udvp).
249	 */
250	while (dvp != udvp && (dvp->v_type == VDIR) &&
251	       (mp = dvp->v_mountedhere)) {
252		if (vfs_busy(mp, NULL))
253			continue;
254		vput(dvp);
255		error = VFS_ROOT(mp, &tdvp);
256		vfs_unbusy(mp, false, NULL);
257		if (error) {
258			return (error);
259		}
260		dvp = tdvp;
261	}
262
263	*vpp = dvp;
264	return (0);
265}
266
267int
268union_lookup(void *v)
269{
270	struct vop_lookup_args /* {
271		struct vnodeop_desc *a_desc;
272		struct vnode *a_dvp;
273		struct vnode **a_vpp;
274		struct componentname *a_cnp;
275	} */ *ap = v;
276	int error;
277	int uerror, lerror;
278	struct vnode *uppervp, *lowervp;
279	struct vnode *upperdvp, *lowerdvp;
280	struct vnode *dvp = ap->a_dvp;
281	struct union_node *dun = VTOUNION(dvp);
282	struct componentname *cnp = ap->a_cnp;
283	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
284	kauth_cred_t saved_cred = NULL;
285	int iswhiteout;
286	struct vattr va;
287
288#ifdef notyet
289	if (cnp->cn_namelen == 3 &&
290			cnp->cn_nameptr[2] == '.' &&
291			cnp->cn_nameptr[1] == '.' &&
292			cnp->cn_nameptr[0] == '.') {
293		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
294		if (dvp == NULLVP)
295			return (ENOENT);
296		vref(dvp);
297		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
298		return (0);
299	}
300#endif
301
302	if ((cnp->cn_flags & ISLASTCN) &&
303	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
304	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
305		return (EROFS);
306
307	upperdvp = dun->un_uppervp;
308	lowerdvp = dun->un_lowervp;
309	uppervp = NULLVP;
310	lowervp = NULLVP;
311	iswhiteout = 0;
312
313	/*
314	 * do the lookup in the upper level.
315	 * if that level comsumes additional pathnames,
316	 * then assume that something special is going
317	 * on and just return that vnode.
318	 */
319	if (upperdvp != NULLVP) {
320		FIXUP(dun);
321		/*
322		 * If we're doing `..' in the underlying filesystem,
323		 * we must drop our lock on the union node before
324		 * going up the tree in the lower file system--if we block
325		 * on the lowervp lock, and that's held by someone else
326		 * coming down the tree and who's waiting for our lock,
327		 * we would be hosed.
328		 */
329		if (cnp->cn_flags & ISDOTDOT) {
330			/* retain lock on underlying VP */
331			dun->un_flags |= UN_KLOCK;
332			VOP_UNLOCK(dvp);
333		}
334		uerror = union_lookup1(um->um_uppervp, &upperdvp,
335					&uppervp, cnp);
336
337		if (cnp->cn_flags & ISDOTDOT) {
338			if (dun->un_uppervp == upperdvp) {
339				/*
340				 * we got the underlying bugger back locked...
341				 * now take back the union node lock.  Since we
342				 *  hold the uppervp lock, we can diddle union
343				 * locking flags at will. :)
344				 */
345				dun->un_flags |= UN_ULOCK;
346			}
347			/*
348			 * if upperdvp got swapped out, it means we did
349			 * some mount point magic, and we do not have
350			 * dun->un_uppervp locked currently--so we get it
351			 * locked here (don't set the UN_ULOCK flag).
352			 */
353			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
354		}
355		if (cnp->cn_consume != 0) {
356			*ap->a_vpp = uppervp;
357			return (uerror);
358		}
359		if (uerror == ENOENT || uerror == EJUSTRETURN) {
360			if (cnp->cn_flags & ISWHITEOUT) {
361				iswhiteout = 1;
362			} else if (lowerdvp != NULLVP) {
363				lerror = VOP_GETATTR(upperdvp, &va,
364					cnp->cn_cred);
365				if (lerror == 0 && (va.va_flags & OPAQUE))
366					iswhiteout = 1;
367			}
368		}
369	} else {
370		uerror = ENOENT;
371	}
372
373	/*
374	 * in a similar way to the upper layer, do the lookup
375	 * in the lower layer.   this time, if there is some
376	 * component magic going on, then vput whatever we got
377	 * back from the upper layer and return the lower vnode
378	 * instead.
379	 */
380	if (lowerdvp != NULLVP && !iswhiteout) {
381		int nameiop;
382
383		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
384
385		/*
386		 * Only do a LOOKUP on the bottom node, since
387		 * we won't be making changes to it anyway.
388		 */
389		nameiop = cnp->cn_nameiop;
390		cnp->cn_nameiop = LOOKUP;
391		if (um->um_op == UNMNT_BELOW) {
392			saved_cred = cnp->cn_cred;
393			cnp->cn_cred = um->um_cred;
394		}
395
396		/*
397		 * we shouldn't have to worry about locking interactions
398		 * between the lower layer and our union layer (w.r.t.
399		 * `..' processing) because we don't futz with lowervp
400		 * locks in the union-node instantiation code path.
401		 */
402		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
403				&lowervp, cnp);
404		if (um->um_op == UNMNT_BELOW)
405			cnp->cn_cred = saved_cred;
406		cnp->cn_nameiop = nameiop;
407
408		if (lowervp != lowerdvp)
409			VOP_UNLOCK(lowerdvp);
410
411		if (cnp->cn_consume != 0) {
412			if (uppervp != NULLVP) {
413				if (uppervp == upperdvp)
414					vrele(uppervp);
415				else
416					vput(uppervp);
417				uppervp = NULLVP;
418			}
419			*ap->a_vpp = lowervp;
420			return (lerror);
421		}
422	} else {
423		lerror = ENOENT;
424		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
425			lowervp = LOWERVP(dun->un_pvp);
426			if (lowervp != NULLVP) {
427				vref(lowervp);
428				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
429				lerror = 0;
430			}
431		}
432	}
433
434	/*
435	 * EJUSTRETURN is used by underlying filesystems to indicate that
436	 * a directory modification op was started successfully.
437	 * This will only happen in the upper layer, since
438	 * the lower layer only does LOOKUPs.
439	 * If this union is mounted read-only, bounce it now.
440	 */
441
442	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
443	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
444	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
445		uerror = EROFS;
446
447	/*
448	 * at this point, we have uerror and lerror indicating
449	 * possible errors with the lookups in the upper and lower
450	 * layers.  additionally, uppervp and lowervp are (locked)
451	 * references to existing vnodes in the upper and lower layers.
452	 *
453	 * there are now three cases to consider.
454	 * 1. if both layers returned an error, then return whatever
455	 *    error the upper layer generated.
456	 *
457	 * 2. if the top layer failed and the bottom layer succeeded
458	 *    then two subcases occur.
459	 *    a.  the bottom vnode is not a directory, in which
460	 *	  case just return a new union vnode referencing
461	 *	  an empty top layer and the existing bottom layer.
462	 *    b.  the bottom vnode is a directory, in which case
463	 *	  create a new directory in the top-level and
464	 *	  continue as in case 3.
465	 *
466	 * 3. if the top layer succeeded then return a new union
467	 *    vnode referencing whatever the new top layer and
468	 *    whatever the bottom layer returned.
469	 */
470
471	*ap->a_vpp = NULLVP;
472
473
474	/* case 1. */
475	if ((uerror != 0) && (lerror != 0)) {
476		return (uerror);
477	}
478
479	/* case 2. */
480	if (uerror != 0 /* && (lerror == 0) */ ) {
481		if (lowervp->v_type == VDIR) { /* case 2b. */
482			/*
483			 * We may be racing another process to make the
484			 * upper-level shadow directory.  Be careful with
485			 * locks/etc!
486			 */
487			if (upperdvp) {
488				dun->un_flags &= ~UN_ULOCK;
489				VOP_UNLOCK(upperdvp);
490				uerror = union_mkshadow(um, upperdvp, cnp,
491				    &uppervp);
492				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
493				dun->un_flags |= UN_ULOCK;
494			}
495			if (uerror) {
496				if (lowervp != NULLVP) {
497					vput(lowervp);
498					lowervp = NULLVP;
499				}
500				return (uerror);
501			}
502		}
503	}
504
505	if (lowervp != NULLVP)
506		VOP_UNLOCK(lowervp);
507
508	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
509			      uppervp, lowervp, 1);
510
511	if (error) {
512		if (uppervp != NULLVP)
513			vput(uppervp);
514		if (lowervp != NULLVP)
515			vrele(lowervp);
516	}
517
518	return (error);
519}
520
521int
522union_create(void *v)
523{
524	struct vop_create_args /* {
525		struct vnode *a_dvp;
526		struct vnode **a_vpp;
527		struct componentname *a_cnp;
528		struct vattr *a_vap;
529	} */ *ap = v;
530	struct union_node *un = VTOUNION(ap->a_dvp);
531	struct vnode *dvp = un->un_uppervp;
532	struct componentname *cnp = ap->a_cnp;
533
534	if (dvp != NULLVP) {
535		int error;
536		struct vnode *vp;
537		struct mount *mp;
538
539		FIXUP(un);
540
541		vref(dvp);
542		un->un_flags |= UN_KLOCK;
543		mp = ap->a_dvp->v_mount;
544		vput(ap->a_dvp);
545		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
546		if (error)
547			return (error);
548
549		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
550				NULLVP, 1);
551		if (error)
552			vput(vp);
553		return (error);
554	}
555
556	vput(ap->a_dvp);
557	return (EROFS);
558}
559
560int
561union_whiteout(void *v)
562{
563	struct vop_whiteout_args /* {
564		struct vnode *a_dvp;
565		struct componentname *a_cnp;
566		int a_flags;
567	} */ *ap = v;
568	struct union_node *un = VTOUNION(ap->a_dvp);
569	struct componentname *cnp = ap->a_cnp;
570
571	if (un->un_uppervp == NULLVP)
572		return (EOPNOTSUPP);
573
574	FIXUP(un);
575	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
576}
577
578int
579union_mknod(void *v)
580{
581	struct vop_mknod_args /* {
582		struct vnode *a_dvp;
583		struct vnode **a_vpp;
584		struct componentname *a_cnp;
585		struct vattr *a_vap;
586	} */ *ap = v;
587	struct union_node *un = VTOUNION(ap->a_dvp);
588	struct vnode *dvp = un->un_uppervp;
589	struct componentname *cnp = ap->a_cnp;
590
591	if (dvp != NULLVP) {
592		int error;
593		struct vnode *vp;
594		struct mount *mp;
595
596		FIXUP(un);
597
598		vref(dvp);
599		un->un_flags |= UN_KLOCK;
600		mp = ap->a_dvp->v_mount;
601		vput(ap->a_dvp);
602		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
603		if (error)
604			return (error);
605
606		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
607				      cnp, vp, NULLVP, 1);
608		if (error)
609		    vput(vp);
610		return (error);
611	}
612
613	vput(ap->a_dvp);
614	return (EROFS);
615}
616
617int
618union_open(void *v)
619{
620	struct vop_open_args /* {
621		struct vnodeop_desc *a_desc;
622		struct vnode *a_vp;
623		int a_mode;
624		kauth_cred_t a_cred;
625	} */ *ap = v;
626	struct union_node *un = VTOUNION(ap->a_vp);
627	struct vnode *tvp;
628	int mode = ap->a_mode;
629	kauth_cred_t cred = ap->a_cred;
630	struct lwp *l = curlwp;
631	int error;
632
633	/*
634	 * If there is an existing upper vp then simply open that.
635	 */
636	tvp = un->un_uppervp;
637	if (tvp == NULLVP) {
638		/*
639		 * If the lower vnode is being opened for writing, then
640		 * copy the file contents to the upper vnode and open that,
641		 * otherwise can simply open the lower vnode.
642		 */
643		tvp = un->un_lowervp;
644		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
645			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
646			if (error == 0)
647				error = VOP_OPEN(un->un_uppervp, mode, cred);
648			return (error);
649		}
650
651		/*
652		 * Just open the lower vnode, but check for nodev mount flag
653		 */
654		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
655		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
656			return ENXIO;
657		un->un_openl++;
658		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
659		error = VOP_OPEN(tvp, mode, cred);
660		VOP_UNLOCK(tvp);
661
662		return (error);
663	}
664	/*
665	 * Just open the upper vnode, checking for nodev mount flag first
666	 */
667	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
668	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
669		return ENXIO;
670
671	FIXUP(un);
672
673	error = VOP_OPEN(tvp, mode, cred);
674
675	return (error);
676}
677
678int
679union_close(void *v)
680{
681	struct vop_close_args /* {
682		struct vnode *a_vp;
683		int  a_fflag;
684		kauth_cred_t a_cred;
685	} */ *ap = v;
686	struct union_node *un = VTOUNION(ap->a_vp);
687	struct vnode *vp;
688
689	vp = un->un_uppervp;
690	if (vp == NULLVP) {
691#ifdef UNION_DIAGNOSTIC
692		if (un->un_openl <= 0)
693			panic("union: un_openl cnt");
694#endif
695		--un->un_openl;
696		vp = un->un_lowervp;
697	}
698
699#ifdef DIAGNOSTIC
700	if (vp == NULLVP)
701		panic("union_close empty union vnode");
702#endif
703
704	ap->a_vp = vp;
705	return (VCALL(vp, VOFFSET(vop_close), ap));
706}
707
708/*
709 * Check access permission on the union vnode.
710 * The access check being enforced is to check
711 * against both the underlying vnode, and any
712 * copied vnode.  This ensures that no additional
713 * file permissions are given away simply because
714 * the user caused an implicit file copy.
715 */
716int
717union_access(void *v)
718{
719	struct vop_access_args /* {
720		struct vnodeop_desc *a_desc;
721		struct vnode *a_vp;
722		int a_mode;
723		kauth_cred_t a_cred;
724	} */ *ap = v;
725	struct vnode *vp = ap->a_vp;
726	struct union_node *un = VTOUNION(vp);
727	int error = EACCES;
728	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
729
730	/*
731	 * Disallow write attempts on read-only file systems;
732	 * unless the file is a socket, fifo, or a block or
733	 * character device resident on the file system.
734	 */
735	if (ap->a_mode & VWRITE) {
736		switch (vp->v_type) {
737		case VDIR:
738		case VLNK:
739		case VREG:
740			if (vp->v_mount->mnt_flag & MNT_RDONLY)
741				return (EROFS);
742			break;
743		case VBAD:
744		case VBLK:
745		case VCHR:
746		case VSOCK:
747		case VFIFO:
748		case VNON:
749		default:
750			break;
751		}
752	}
753
754
755	if ((vp = un->un_uppervp) != NULLVP) {
756		FIXUP(un);
757		ap->a_vp = vp;
758		return (VCALL(vp, VOFFSET(vop_access), ap));
759	}
760
761	if ((vp = un->un_lowervp) != NULLVP) {
762		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
763		ap->a_vp = vp;
764		error = VCALL(vp, VOFFSET(vop_access), ap);
765		if (error == 0) {
766			if (um->um_op == UNMNT_BELOW) {
767				ap->a_cred = um->um_cred;
768				error = VCALL(vp, VOFFSET(vop_access), ap);
769			}
770		}
771		VOP_UNLOCK(vp);
772		if (error)
773			return (error);
774	}
775
776	return (error);
777}
778
779/*
780 * We handle getattr only to change the fsid and
781 * track object sizes
782 */
783int
784union_getattr(void *v)
785{
786	struct vop_getattr_args /* {
787		struct vnode *a_vp;
788		struct vattr *a_vap;
789		kauth_cred_t a_cred;
790	} */ *ap = v;
791	int error;
792	struct union_node *un = VTOUNION(ap->a_vp);
793	struct vnode *vp = un->un_uppervp;
794	struct vattr *vap;
795	struct vattr va;
796
797
798	/*
799	 * Some programs walk the filesystem hierarchy by counting
800	 * links to directories to avoid stat'ing all the time.
801	 * This means the link count on directories needs to be "correct".
802	 * The only way to do that is to call getattr on both layers
803	 * and fix up the link count.  The link count will not necessarily
804	 * be accurate but will be large enough to defeat the tree walkers.
805	 *
806	 * To make life more interesting, some filesystems don't keep
807	 * track of link counts in the expected way, and return a
808	 * link count of `1' for those directories; if either of the
809	 * component directories returns a link count of `1', we return a 1.
810	 */
811
812	vap = ap->a_vap;
813
814	vp = un->un_uppervp;
815	if (vp != NULLVP) {
816		/*
817		 * It's not clear whether VOP_GETATTR is to be
818		 * called with the vnode locked or not.  stat() calls
819		 * it with (vp) locked, and fstat calls it with
820		 * (vp) unlocked.
821		 * In the mean time, compensate here by checking
822		 * the union_node's lock flag.
823		 */
824		if (un->un_flags & UN_LOCKED)
825			FIXUP(un);
826
827		error = VOP_GETATTR(vp, vap, ap->a_cred);
828		if (error)
829			return (error);
830		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
831	}
832
833	if (vp == NULLVP) {
834		vp = un->un_lowervp;
835	} else if (vp->v_type == VDIR) {
836		vp = un->un_lowervp;
837		if (vp != NULLVP)
838			vap = &va;
839	} else {
840		vp = NULLVP;
841	}
842
843	if (vp != NULLVP) {
844		error = VOP_GETATTR(vp, vap, ap->a_cred);
845		if (error)
846			return (error);
847		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
848	}
849
850	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
851		/*
852		 * Link count manipulation:
853		 *	- If both return "2", return 2 (no subdirs)
854		 *	- If one or the other return "1", return "1" (ENOCLUE)
855		 */
856		if ((ap->a_vap->va_nlink == 2) &&
857		    (vap->va_nlink == 2))
858			;
859		else if (ap->a_vap->va_nlink != 1) {
860			if (vap->va_nlink == 1)
861				ap->a_vap->va_nlink = 1;
862			else
863				ap->a_vap->va_nlink += vap->va_nlink;
864		}
865	}
866	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
867	return (0);
868}
869
870int
871union_setattr(void *v)
872{
873	struct vop_setattr_args /* {
874		struct vnode *a_vp;
875		struct vattr *a_vap;
876		kauth_cred_t a_cred;
877	} */ *ap = v;
878	struct vattr *vap = ap->a_vap;
879	struct vnode *vp = ap->a_vp;
880	struct union_node *un = VTOUNION(vp);
881	int error;
882
883  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
884	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
885	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
886	    (vp->v_mount->mnt_flag & MNT_RDONLY))
887		return (EROFS);
888	if (vap->va_size != VNOVAL) {
889 		switch (vp->v_type) {
890 		case VDIR:
891 			return (EISDIR);
892 		case VCHR:
893 		case VBLK:
894 		case VSOCK:
895 		case VFIFO:
896			break;
897		case VREG:
898		case VLNK:
899 		default:
900			/*
901			 * Disallow write attempts if the filesystem is
902			 * mounted read-only.
903			 */
904			if (vp->v_mount->mnt_flag & MNT_RDONLY)
905				return (EROFS);
906		}
907	}
908
909	/*
910	 * Handle case of truncating lower object to zero size,
911	 * by creating a zero length upper object.  This is to
912	 * handle the case of open with O_TRUNC and O_CREAT.
913	 */
914	if ((un->un_uppervp == NULLVP) &&
915	    /* assert(un->un_lowervp != NULLVP) */
916	    (un->un_lowervp->v_type == VREG)) {
917		error = union_copyup(un, (vap->va_size != 0),
918						ap->a_cred, curlwp);
919		if (error)
920			return (error);
921	}
922
923	/*
924	 * Try to set attributes in upper layer,
925	 * otherwise return read-only filesystem error.
926	 */
927	if (un->un_uppervp != NULLVP) {
928		FIXUP(un);
929		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
930		if ((error == 0) && (vap->va_size != VNOVAL))
931			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
932	} else {
933		error = EROFS;
934	}
935
936	return (error);
937}
938
939int
940union_read(void *v)
941{
942	struct vop_read_args /* {
943		struct vnode *a_vp;
944		struct uio *a_uio;
945		int  a_ioflag;
946		kauth_cred_t a_cred;
947	} */ *ap = v;
948	int error;
949	struct vnode *vp = OTHERVP(ap->a_vp);
950	int dolock = (vp == LOWERVP(ap->a_vp));
951
952	if (dolock)
953		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
954	else
955		FIXUP(VTOUNION(ap->a_vp));
956	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
957	if (dolock)
958		VOP_UNLOCK(vp);
959
960	/*
961	 * XXX
962	 * perhaps the size of the underlying object has changed under
963	 * our feet.  take advantage of the offset information present
964	 * in the uio structure.
965	 */
966	if (error == 0) {
967		struct union_node *un = VTOUNION(ap->a_vp);
968		off_t cur = ap->a_uio->uio_offset;
969
970		if (vp == un->un_uppervp) {
971			if (cur > un->un_uppersz)
972				union_newsize(ap->a_vp, cur, VNOVAL);
973		} else {
974			if (cur > un->un_lowersz)
975				union_newsize(ap->a_vp, VNOVAL, cur);
976		}
977	}
978
979	return (error);
980}
981
982int
983union_write(void *v)
984{
985	struct vop_read_args /* {
986		struct vnode *a_vp;
987		struct uio *a_uio;
988		int  a_ioflag;
989		kauth_cred_t a_cred;
990	} */ *ap = v;
991	int error;
992	struct vnode *vp;
993	struct union_node *un = VTOUNION(ap->a_vp);
994
995	vp = UPPERVP(ap->a_vp);
996	if (vp == NULLVP)
997		panic("union: missing upper layer in write");
998
999	FIXUP(un);
1000	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1001
1002	/*
1003	 * the size of the underlying object may be changed by the
1004	 * write.
1005	 */
1006	if (error == 0) {
1007		off_t cur = ap->a_uio->uio_offset;
1008
1009		if (cur > un->un_uppersz)
1010			union_newsize(ap->a_vp, cur, VNOVAL);
1011	}
1012
1013	return (error);
1014}
1015
1016int
1017union_ioctl(void *v)
1018{
1019	struct vop_ioctl_args /* {
1020		struct vnode *a_vp;
1021		int  a_command;
1022		void *a_data;
1023		int  a_fflag;
1024		kauth_cred_t a_cred;
1025	} */ *ap = v;
1026	struct vnode *ovp = OTHERVP(ap->a_vp);
1027
1028	ap->a_vp = ovp;
1029	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1030}
1031
1032int
1033union_poll(void *v)
1034{
1035	struct vop_poll_args /* {
1036		struct vnode *a_vp;
1037		int a_events;
1038	} */ *ap = v;
1039	struct vnode *ovp = OTHERVP(ap->a_vp);
1040
1041	ap->a_vp = ovp;
1042	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1043}
1044
1045int
1046union_revoke(void *v)
1047{
1048	struct vop_revoke_args /* {
1049		struct vnode *a_vp;
1050		int a_flags;
1051		struct proc *a_p;
1052	} */ *ap = v;
1053	struct vnode *vp = ap->a_vp;
1054
1055	if (UPPERVP(vp))
1056		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1057	if (LOWERVP(vp))
1058		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1059	vgone(vp);	/* XXXAD?? */
1060	return (0);
1061}
1062
1063int
1064union_mmap(void *v)
1065{
1066	struct vop_mmap_args /* {
1067		struct vnode *a_vp;
1068		vm_prot_t a_prot;
1069		kauth_cred_t a_cred;
1070	} */ *ap = v;
1071	struct vnode *ovp = OTHERVP(ap->a_vp);
1072
1073	ap->a_vp = ovp;
1074	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1075}
1076
1077int
1078union_fsync(void *v)
1079{
1080	struct vop_fsync_args /* {
1081		struct vnode *a_vp;
1082		kauth_cred_t a_cred;
1083		int  a_flags;
1084		off_t offhi;
1085		off_t offlo;
1086	} */ *ap = v;
1087	int error = 0;
1088	struct vnode *targetvp;
1089
1090	/*
1091	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1092	 * bother syncing the underlying vnodes, since (a) they'll be
1093	 * fsync'ed when reclaimed and (b) we could deadlock if
1094	 * they're locked; otherwise, pass it through to the
1095	 * underlying layer.
1096	 */
1097	if (ap->a_flags & FSYNC_RECLAIM)
1098		return 0;
1099
1100	targetvp = OTHERVP(ap->a_vp);
1101
1102	if (targetvp != NULLVP) {
1103		int dolock = (targetvp == LOWERVP(ap->a_vp));
1104
1105		if (dolock)
1106			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1107		else
1108			FIXUP(VTOUNION(ap->a_vp));
1109		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1110			    ap->a_offlo, ap->a_offhi);
1111		if (dolock)
1112			VOP_UNLOCK(targetvp);
1113	}
1114
1115	return (error);
1116}
1117
1118int
1119union_seek(void *v)
1120{
1121	struct vop_seek_args /* {
1122		struct vnode *a_vp;
1123		off_t  a_oldoff;
1124		off_t  a_newoff;
1125		kauth_cred_t a_cred;
1126	} */ *ap = v;
1127	struct vnode *ovp = OTHERVP(ap->a_vp);
1128
1129	ap->a_vp = ovp;
1130	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1131}
1132
1133int
1134union_remove(void *v)
1135{
1136	struct vop_remove_args /* {
1137		struct vnode *a_dvp;
1138		struct vnode *a_vp;
1139		struct componentname *a_cnp;
1140	} */ *ap = v;
1141	int error;
1142	struct union_node *dun = VTOUNION(ap->a_dvp);
1143	struct union_node *un = VTOUNION(ap->a_vp);
1144	struct componentname *cnp = ap->a_cnp;
1145
1146	if (dun->un_uppervp == NULLVP)
1147		panic("union remove: null upper vnode");
1148
1149	if (un->un_uppervp != NULLVP) {
1150		struct vnode *dvp = dun->un_uppervp;
1151		struct vnode *vp = un->un_uppervp;
1152
1153		FIXUP(dun);
1154		vref(dvp);
1155		dun->un_flags |= UN_KLOCK;
1156		vput(ap->a_dvp);
1157		FIXUP(un);
1158		vref(vp);
1159		un->un_flags |= UN_KLOCK;
1160		vput(ap->a_vp);
1161
1162		if (union_dowhiteout(un, cnp->cn_cred))
1163			cnp->cn_flags |= DOWHITEOUT;
1164		error = VOP_REMOVE(dvp, vp, cnp);
1165		if (!error)
1166			union_removed_upper(un);
1167	} else {
1168		FIXUP(dun);
1169		error = union_mkwhiteout(
1170			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1171			dun->un_uppervp, ap->a_cnp, un->un_path);
1172		vput(ap->a_dvp);
1173		vput(ap->a_vp);
1174	}
1175
1176	return (error);
1177}
1178
1179int
1180union_link(void *v)
1181{
1182	struct vop_link_args /* {
1183		struct vnode *a_dvp;
1184		struct vnode *a_vp;
1185		struct componentname *a_cnp;
1186	} */ *ap = v;
1187	int error = 0;
1188	struct componentname *cnp = ap->a_cnp;
1189	struct union_node *dun;
1190	struct vnode *vp;
1191	struct vnode *dvp;
1192
1193	dun = VTOUNION(ap->a_dvp);
1194
1195#ifdef DIAGNOSTIC
1196	if (!(ap->a_cnp->cn_flags & LOCKPARENT)) {
1197		printf("union_link called without LOCKPARENT set!\n");
1198		error = EIO; /* need some error code for "caller is a bozo" */
1199	} else
1200#endif
1201
1202
1203	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1204		vp = ap->a_vp;
1205	} else {
1206		struct union_node *un = VTOUNION(ap->a_vp);
1207		if (un->un_uppervp == NULLVP) {
1208			/*
1209			 * Needs to be copied before we can link it.
1210			 */
1211			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1212			if (dun->un_uppervp == un->un_dirvp) {
1213				dun->un_flags &= ~UN_ULOCK;
1214				VOP_UNLOCK(dun->un_uppervp);
1215			}
1216			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1217			if (dun->un_uppervp == un->un_dirvp) {
1218				/*
1219				 * During copyup, we dropped the lock on the
1220				 * dir and invalidated any saved namei lookup
1221				 * state for the directory we'll be entering
1222				 * the link in.  We need to re-run the lookup
1223				 * in that directory to reset any state needed
1224				 * for VOP_LINK.
1225				 * Call relookup on the union-layer to reset
1226				 * the state.
1227				 */
1228				vp  = NULLVP;
1229				if (dun->un_uppervp == NULLVP)
1230					 panic("union: null upperdvp?");
1231				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1232				if (error) {
1233					VOP_UNLOCK(ap->a_vp);
1234					return EROFS;	/* ? */
1235				}
1236				if (vp != NULLVP) {
1237					/*
1238					 * The name we want to create has
1239					 * mysteriously appeared (a race?)
1240					 */
1241					error = EEXIST;
1242					VOP_UNLOCK(ap->a_vp);
1243					vput(ap->a_dvp);
1244					vput(vp);
1245					return (error);
1246				}
1247			}
1248			VOP_UNLOCK(ap->a_vp);
1249		}
1250		vp = un->un_uppervp;
1251	}
1252
1253	dvp = dun->un_uppervp;
1254	if (dvp == NULLVP)
1255		error = EROFS;
1256
1257	if (error) {
1258		vput(ap->a_dvp);
1259		return (error);
1260	}
1261
1262	FIXUP(dun);
1263	vref(dvp);
1264	dun->un_flags |= UN_KLOCK;
1265	vput(ap->a_dvp);
1266
1267	return (VOP_LINK(dvp, vp, cnp));
1268}
1269
1270int
1271union_rename(void *v)
1272{
1273	struct vop_rename_args  /* {
1274		struct vnode *a_fdvp;
1275		struct vnode *a_fvp;
1276		struct componentname *a_fcnp;
1277		struct vnode *a_tdvp;
1278		struct vnode *a_tvp;
1279		struct componentname *a_tcnp;
1280	} */ *ap = v;
1281	int error;
1282
1283	struct vnode *fdvp = ap->a_fdvp;
1284	struct vnode *fvp = ap->a_fvp;
1285	struct vnode *tdvp = ap->a_tdvp;
1286	struct vnode *tvp = ap->a_tvp;
1287
1288	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1289		struct union_node *un = VTOUNION(fdvp);
1290		if (un->un_uppervp == NULLVP) {
1291			/*
1292			 * this should never happen in normal
1293			 * operation but might if there was
1294			 * a problem creating the top-level shadow
1295			 * directory.
1296			 */
1297			error = EXDEV;
1298			goto bad;
1299		}
1300
1301		fdvp = un->un_uppervp;
1302		vref(fdvp);
1303	}
1304
1305	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1306		struct union_node *un = VTOUNION(fvp);
1307		if (un->un_uppervp == NULLVP) {
1308			/* XXX: should do a copyup */
1309			error = EXDEV;
1310			goto bad;
1311		}
1312
1313		if (un->un_lowervp != NULLVP)
1314			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1315
1316		fvp = un->un_uppervp;
1317		vref(fvp);
1318	}
1319
1320	if (tdvp->v_op == union_vnodeop_p) {
1321		struct union_node *un = VTOUNION(tdvp);
1322		if (un->un_uppervp == NULLVP) {
1323			/*
1324			 * this should never happen in normal
1325			 * operation but might if there was
1326			 * a problem creating the top-level shadow
1327			 * directory.
1328			 */
1329			error = EXDEV;
1330			goto bad;
1331		}
1332
1333		tdvp = un->un_uppervp;
1334		vref(tdvp);
1335		un->un_flags |= UN_KLOCK;
1336		vput(ap->a_tdvp);
1337	}
1338
1339	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1340		struct union_node *un = VTOUNION(tvp);
1341
1342		tvp = un->un_uppervp;
1343		if (tvp != NULLVP) {
1344			vref(tvp);
1345			un->un_flags |= UN_KLOCK;
1346		}
1347		vput(ap->a_tvp);
1348	}
1349
1350	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1351	goto out;
1352
1353bad:
1354	vput(tdvp);
1355	if (tvp != NULLVP)
1356		vput(tvp);
1357	vrele(fdvp);
1358	vrele(fvp);
1359
1360out:
1361	if (fdvp != ap->a_fdvp) {
1362		vrele(ap->a_fdvp);
1363	}
1364	if (fvp != ap->a_fvp) {
1365		vrele(ap->a_fvp);
1366	}
1367	return (error);
1368}
1369
1370int
1371union_mkdir(void *v)
1372{
1373	struct vop_mkdir_args /* {
1374		struct vnode *a_dvp;
1375		struct vnode **a_vpp;
1376		struct componentname *a_cnp;
1377		struct vattr *a_vap;
1378	} */ *ap = v;
1379	struct union_node *un = VTOUNION(ap->a_dvp);
1380	struct vnode *dvp = un->un_uppervp;
1381	struct componentname *cnp = ap->a_cnp;
1382
1383	if (dvp != NULLVP) {
1384		int error;
1385		struct vnode *vp;
1386
1387		FIXUP(un);
1388		vref(dvp);
1389		un->un_flags |= UN_KLOCK;
1390		VOP_UNLOCK(ap->a_dvp);
1391		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1392		if (error) {
1393			vrele(ap->a_dvp);
1394			return (error);
1395		}
1396
1397		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1398				NULLVP, cnp, vp, NULLVP, 1);
1399		if (error)
1400			vput(vp);
1401		vrele(ap->a_dvp);
1402		return (error);
1403	}
1404
1405	vput(ap->a_dvp);
1406	return (EROFS);
1407}
1408
1409int
1410union_rmdir(void *v)
1411{
1412	struct vop_rmdir_args /* {
1413		struct vnode *a_dvp;
1414		struct vnode *a_vp;
1415		struct componentname *a_cnp;
1416	} */ *ap = v;
1417	int error;
1418	struct union_node *dun = VTOUNION(ap->a_dvp);
1419	struct union_node *un = VTOUNION(ap->a_vp);
1420	struct componentname *cnp = ap->a_cnp;
1421
1422	if (dun->un_uppervp == NULLVP)
1423		panic("union rmdir: null upper vnode");
1424
1425	if (un->un_uppervp != NULLVP) {
1426		struct vnode *dvp = dun->un_uppervp;
1427		struct vnode *vp = un->un_uppervp;
1428
1429		FIXUP(dun);
1430		vref(dvp);
1431		dun->un_flags |= UN_KLOCK;
1432		vput(ap->a_dvp);
1433		FIXUP(un);
1434		vref(vp);
1435		un->un_flags |= UN_KLOCK;
1436		vput(ap->a_vp);
1437
1438		if (union_dowhiteout(un, cnp->cn_cred))
1439			cnp->cn_flags |= DOWHITEOUT;
1440		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1441		if (!error)
1442			union_removed_upper(un);
1443	} else {
1444		FIXUP(dun);
1445		error = union_mkwhiteout(
1446			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1447			dun->un_uppervp, ap->a_cnp, un->un_path);
1448		vput(ap->a_dvp);
1449		vput(ap->a_vp);
1450	}
1451
1452	return (error);
1453}
1454
1455int
1456union_symlink(void *v)
1457{
1458	struct vop_symlink_args /* {
1459		struct vnode *a_dvp;
1460		struct vnode **a_vpp;
1461		struct componentname *a_cnp;
1462		struct vattr *a_vap;
1463		char *a_target;
1464	} */ *ap = v;
1465	struct union_node *un = VTOUNION(ap->a_dvp);
1466	struct vnode *dvp = un->un_uppervp;
1467	struct componentname *cnp = ap->a_cnp;
1468
1469	if (dvp != NULLVP) {
1470		int error;
1471
1472		FIXUP(un);
1473		vref(dvp);
1474		un->un_flags |= UN_KLOCK;
1475		vput(ap->a_dvp);
1476		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1477				    ap->a_target);
1478		return (error);
1479	}
1480
1481	vput(ap->a_dvp);
1482	return (EROFS);
1483}
1484
1485/*
1486 * union_readdir works in concert with getdirentries and
1487 * readdir(3) to provide a list of entries in the unioned
1488 * directories.  getdirentries is responsible for walking
1489 * down the union stack.  readdir(3) is responsible for
1490 * eliminating duplicate names from the returned data stream.
1491 */
1492int
1493union_readdir(void *v)
1494{
1495	struct vop_readdir_args /* {
1496		struct vnodeop_desc *a_desc;
1497		struct vnode *a_vp;
1498		struct uio *a_uio;
1499		kauth_cred_t a_cred;
1500		int *a_eofflag;
1501		u_long *a_cookies;
1502		int a_ncookies;
1503	} */ *ap = v;
1504	struct union_node *un = VTOUNION(ap->a_vp);
1505	struct vnode *uvp = un->un_uppervp;
1506
1507	if (uvp == NULLVP)
1508		return (0);
1509
1510	FIXUP(un);
1511	ap->a_vp = uvp;
1512	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1513}
1514
1515int
1516union_readlink(void *v)
1517{
1518	struct vop_readlink_args /* {
1519		struct vnode *a_vp;
1520		struct uio *a_uio;
1521		kauth_cred_t a_cred;
1522	} */ *ap = v;
1523	int error;
1524	struct vnode *vp = OTHERVP(ap->a_vp);
1525	int dolock = (vp == LOWERVP(ap->a_vp));
1526
1527	if (dolock)
1528		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1529	else
1530		FIXUP(VTOUNION(ap->a_vp));
1531	ap->a_vp = vp;
1532	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1533	if (dolock)
1534		VOP_UNLOCK(vp);
1535
1536	return (error);
1537}
1538
1539int
1540union_abortop(void *v)
1541{
1542	struct vop_abortop_args /* {
1543		struct vnode *a_dvp;
1544		struct componentname *a_cnp;
1545	} */ *ap = v;
1546	int error;
1547	struct vnode *vp = OTHERVP(ap->a_dvp);
1548	struct union_node *un = VTOUNION(ap->a_dvp);
1549	int islocked = un->un_flags & UN_LOCKED;
1550	int dolock = (vp == LOWERVP(ap->a_dvp));
1551
1552	if (islocked) {
1553		if (dolock)
1554			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1555		else
1556			FIXUP(VTOUNION(ap->a_dvp));
1557	}
1558	ap->a_dvp = vp;
1559	error = VCALL(vp, VOFFSET(vop_abortop), ap);
1560	if (islocked && dolock)
1561		VOP_UNLOCK(vp);
1562
1563	return (error);
1564}
1565
1566int
1567union_inactive(void *v)
1568{
1569	struct vop_inactive_args /* {
1570		const struct vnodeop_desc *a_desc;
1571		struct vnode *a_vp;
1572		bool *a_recycle;
1573	} */ *ap = v;
1574	struct vnode *vp = ap->a_vp;
1575	struct union_node *un = VTOUNION(vp);
1576	struct vnode **vpp;
1577
1578	/*
1579	 * Do nothing (and _don't_ bypass).
1580	 * Wait to vrele lowervp until reclaim,
1581	 * so that until then our union_node is in the
1582	 * cache and reusable.
1583	 *
1584	 * NEEDSWORK: Someday, consider inactive'ing
1585	 * the lowervp and then trying to reactivate it
1586	 * with capabilities (v_id)
1587	 * like they do in the name lookup cache code.
1588	 * That's too much work for now.
1589	 */
1590
1591	if (un->un_dircache != 0) {
1592		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1593			vrele(*vpp);
1594		free(un->un_dircache, M_TEMP);
1595		un->un_dircache = 0;
1596	}
1597
1598	*ap->a_recycle = ((un->un_flags & UN_CACHED) == 0);
1599	VOP_UNLOCK(vp);
1600
1601	return (0);
1602}
1603
1604int
1605union_reclaim(void *v)
1606{
1607	struct vop_reclaim_args /* {
1608		struct vnode *a_vp;
1609	} */ *ap = v;
1610
1611	union_freevp(ap->a_vp);
1612
1613	return (0);
1614}
1615
1616int
1617union_lock(void *v)
1618{
1619	struct vop_lock_args /* {
1620		struct vnode *a_vp;
1621		int a_flags;
1622	} */ *ap = v;
1623	struct vnode *vp = ap->a_vp;
1624	int flags = ap->a_flags;
1625	struct union_node *un;
1626	int error;
1627
1628	/* XXX unionfs can't handle shared locks yet */
1629	if ((flags & LK_SHARED) != 0) {
1630		flags = (flags & ~LK_SHARED) | LK_EXCLUSIVE;
1631	}
1632
1633	/*
1634	 * Need to do real lockmgr-style locking here.
1635	 * in the mean time, draining won't work quite right,
1636	 * which could lead to a few race conditions.
1637	 * the following test was here, but is not quite right, we
1638	 * still need to take the lock:
1639	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
1640		return (0);
1641	 */
1642
1643	un = VTOUNION(vp);
1644start:
1645	un = VTOUNION(vp);
1646
1647	if (un->un_uppervp != NULLVP) {
1648		if (((un->un_flags & UN_ULOCK) == 0) &&
1649		    (vp->v_usecount != 0)) {
1650			/*
1651			 * We MUST always use the order of: take upper
1652			 * vp lock, manipulate union node flags, drop
1653			 * upper vp lock.  This code must not be an
1654			 * exception.
1655			 */
1656			error = vn_lock(un->un_uppervp, flags);
1657			if (error)
1658				return (error);
1659			un->un_flags |= UN_ULOCK;
1660		}
1661#ifdef DIAGNOSTIC
1662		if (un->un_flags & UN_KLOCK) {
1663			vprint("union: dangling klock", vp);
1664			panic("union: dangling upper lock (%p)", vp);
1665		}
1666#endif
1667	}
1668
1669	/* XXX ignores LK_NOWAIT */
1670	if (un->un_flags & UN_LOCKED) {
1671#ifdef DIAGNOSTIC
1672		if (curproc && un->un_pid == curproc->p_pid &&
1673			    un->un_pid > -1 && curproc->p_pid > -1)
1674			panic("union: locking against myself");
1675#endif
1676		un->un_flags |= UN_WANTED;
1677		tsleep(&un->un_flags, PINOD, "unionlk2", 0);
1678		goto start;
1679	}
1680
1681#ifdef DIAGNOSTIC
1682	if (curproc)
1683		un->un_pid = curproc->p_pid;
1684	else
1685		un->un_pid = -1;
1686#endif
1687
1688	un->un_flags |= UN_LOCKED;
1689	return (0);
1690}
1691
1692/*
1693 * When operations want to vput() a union node yet retain a lock on
1694 * the upper vnode (say, to do some further operations like link(),
1695 * mkdir(), ...), they set UN_KLOCK on the union node, then call
1696 * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1697 * unlocks the union node (leaving the upper vnode alone), clears the
1698 * KLOCK flag, and then returns to vput().  The caller then does whatever
1699 * is left to do with the upper vnode, and ensures that it gets unlocked.
1700 *
1701 * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1702 */
1703int
1704union_unlock(void *v)
1705{
1706	struct vop_unlock_args /* {
1707		struct vnode *a_vp;
1708		int a_flags;
1709	} */ *ap = v;
1710	struct union_node *un = VTOUNION(ap->a_vp);
1711
1712#ifdef DIAGNOSTIC
1713	if ((un->un_flags & UN_LOCKED) == 0)
1714		panic("union: unlock unlocked node");
1715	if (curproc && un->un_pid != curproc->p_pid &&
1716			curproc->p_pid > -1 && un->un_pid > -1)
1717		panic("union: unlocking other process's union node");
1718#endif
1719
1720	un->un_flags &= ~UN_LOCKED;
1721
1722	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
1723		VOP_UNLOCK(un->un_uppervp);
1724
1725	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
1726
1727	if (un->un_flags & UN_WANTED) {
1728		un->un_flags &= ~UN_WANTED;
1729		wakeup( &un->un_flags);
1730	}
1731
1732#ifdef DIAGNOSTIC
1733	un->un_pid = 0;
1734#endif
1735
1736	return (0);
1737}
1738
1739int
1740union_bmap(void *v)
1741{
1742	struct vop_bmap_args /* {
1743		struct vnode *a_vp;
1744		daddr_t  a_bn;
1745		struct vnode **a_vpp;
1746		daddr_t *a_bnp;
1747		int *a_runp;
1748	} */ *ap = v;
1749	int error;
1750	struct vnode *vp = OTHERVP(ap->a_vp);
1751	int dolock = (vp == LOWERVP(ap->a_vp));
1752
1753	if (dolock)
1754		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1755	else
1756		FIXUP(VTOUNION(ap->a_vp));
1757	ap->a_vp = vp;
1758	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1759	if (dolock)
1760		VOP_UNLOCK(vp);
1761
1762	return (error);
1763}
1764
1765int
1766union_print(void *v)
1767{
1768	struct vop_print_args /* {
1769		struct vnode *a_vp;
1770	} */ *ap = v;
1771	struct vnode *vp = ap->a_vp;
1772
1773	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1774			vp, UPPERVP(vp), LOWERVP(vp));
1775	if (UPPERVP(vp) != NULLVP)
1776		vprint("union: upper", UPPERVP(vp));
1777	if (LOWERVP(vp) != NULLVP)
1778		vprint("union: lower", LOWERVP(vp));
1779	if (VTOUNION(vp)->un_dircache) {
1780		struct vnode **vpp;
1781		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1782			vprint("dircache:", *vpp);
1783	}
1784
1785	return (0);
1786}
1787
1788int
1789union_islocked(void *v)
1790{
1791	struct vop_islocked_args /* {
1792		struct vnode *a_vp;
1793	} */ *ap = v;
1794
1795	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? LK_EXCLUSIVE : 0);
1796}
1797
1798int
1799union_pathconf(void *v)
1800{
1801	struct vop_pathconf_args /* {
1802		struct vnode *a_vp;
1803		int a_name;
1804		int *a_retval;
1805	} */ *ap = v;
1806	int error;
1807	struct vnode *vp = OTHERVP(ap->a_vp);
1808	int dolock = (vp == LOWERVP(ap->a_vp));
1809
1810	if (dolock)
1811		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1812	else
1813		FIXUP(VTOUNION(ap->a_vp));
1814	ap->a_vp = vp;
1815	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1816	if (dolock)
1817		VOP_UNLOCK(vp);
1818
1819	return (error);
1820}
1821
1822int
1823union_advlock(void *v)
1824{
1825	struct vop_advlock_args /* {
1826		struct vnode *a_vp;
1827		void *a_id;
1828		int  a_op;
1829		struct flock *a_fl;
1830		int  a_flags;
1831	} */ *ap = v;
1832	struct vnode *ovp = OTHERVP(ap->a_vp);
1833
1834	ap->a_vp = ovp;
1835	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1836}
1837
1838
1839/*
1840 * XXX - vop_strategy must be hand coded because it has no
1841 * vnode in its arguments.
1842 * This goes away with a merged VM/buffer cache.
1843 */
1844int
1845union_strategy(void *v)
1846{
1847	struct vop_strategy_args /* {
1848		struct vnode *a_vp;
1849		struct buf *a_bp;
1850	} */ *ap = v;
1851	struct vnode *ovp = OTHERVP(ap->a_vp);
1852	struct buf *bp = ap->a_bp;
1853
1854#ifdef DIAGNOSTIC
1855	if (ovp == NULLVP)
1856		panic("union_strategy: nil vp");
1857	if (((bp->b_flags & B_READ) == 0) &&
1858	    (ovp == LOWERVP(bp->b_vp)))
1859		panic("union_strategy: writing to lowervp");
1860#endif
1861
1862	return (VOP_STRATEGY(ovp, bp));
1863}
1864
1865int
1866union_getpages(void *v)
1867{
1868	struct vop_getpages_args /* {
1869		struct vnode *a_vp;
1870		voff_t a_offset;
1871		struct vm_page **a_m;
1872		int *a_count;
1873		int a_centeridx;
1874		vm_prot_t a_access_type;
1875		int a_advice;
1876		int a_flags;
1877	} */ *ap = v;
1878	struct vnode *vp = ap->a_vp;
1879
1880	KASSERT(mutex_owned(vp->v_interlock));
1881
1882	if (ap->a_flags & PGO_LOCKED) {
1883		return EBUSY;
1884	}
1885	ap->a_vp = OTHERVP(vp);
1886	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1887
1888	/* Just pass the request on to the underlying layer. */
1889	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1890}
1891
1892int
1893union_putpages(void *v)
1894{
1895	struct vop_putpages_args /* {
1896		struct vnode *a_vp;
1897		voff_t a_offlo;
1898		voff_t a_offhi;
1899		int a_flags;
1900	} */ *ap = v;
1901	struct vnode *vp = ap->a_vp;
1902
1903	KASSERT(mutex_owned(vp->v_interlock));
1904
1905	ap->a_vp = OTHERVP(vp);
1906	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1907
1908	if (ap->a_flags & PGO_RECLAIM) {
1909		mutex_exit(vp->v_interlock);
1910		return 0;
1911	}
1912
1913	/* Just pass the request on to the underlying layer. */
1914	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1915}
1916
1917int
1918union_kqfilter(void *v)
1919{
1920	struct vop_kqfilter_args /* {
1921		struct vnode	*a_vp;
1922		struct knote	*a_kn;
1923	} */ *ap = v;
1924	int error;
1925
1926	/*
1927	 * We watch either the upper layer file (if it already exists),
1928	 * or the lower layer one. If there is lower layer file only
1929	 * at this moment, we will keep watching that lower layer file
1930	 * even if upper layer file would be created later on.
1931	 */
1932	if (UPPERVP(ap->a_vp))
1933		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1934	else if (LOWERVP(ap->a_vp))
1935		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1936	else {
1937		/* panic? */
1938		error = EOPNOTSUPP;
1939	}
1940
1941	return (error);
1942}
1943