union_vnops.c revision 1.77
1/*	$NetBSD: union_vnops.c,v 1.77 2021/06/29 22:39:20 dholland Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.77 2021/06/29 22:39:20 dholland Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_parsepath(void *);
97int union_lookup(void *);
98int union_create(void *);
99int union_whiteout(void *);
100int union_mknod(void *);
101int union_open(void *);
102int union_close(void *);
103int union_access(void *);
104int union_getattr(void *);
105int union_setattr(void *);
106int union_read(void *);
107int union_write(void *);
108int union_ioctl(void *);
109int union_poll(void *);
110int union_revoke(void *);
111int union_mmap(void *);
112int union_fsync(void *);
113int union_seek(void *);
114int union_remove(void *);
115int union_link(void *);
116int union_rename(void *);
117int union_mkdir(void *);
118int union_rmdir(void *);
119int union_symlink(void *);
120int union_readdir(void *);
121int union_readlink(void *);
122int union_abortop(void *);
123int union_inactive(void *);
124int union_reclaim(void *);
125int union_lock(void *);
126int union_unlock(void *);
127int union_bmap(void *);
128int union_print(void *);
129int union_islocked(void *);
130int union_pathconf(void *);
131int union_advlock(void *);
132int union_strategy(void *);
133int union_bwrite(void *);
134int union_getpages(void *);
135int union_putpages(void *);
136int union_kqfilter(void *);
137
138static int union_lookup1(struct vnode *, struct vnode **,
139			      struct vnode **, struct componentname *);
140
141
142/*
143 * Global vfs data structures
144 */
145int (**union_vnodeop_p)(void *);
146const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147	{ &vop_default_desc, vn_default_error },
148	{ &vop_parsepath_desc, union_parsepath },	/* parsepath */
149	{ &vop_lookup_desc, union_lookup },		/* lookup */
150	{ &vop_create_desc, union_create },		/* create */
151	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
152	{ &vop_mknod_desc, union_mknod },		/* mknod */
153	{ &vop_open_desc, union_open },			/* open */
154	{ &vop_close_desc, union_close },		/* close */
155	{ &vop_access_desc, union_access },		/* access */
156	{ &vop_accessx_desc, genfs_accessx },		/* accessx */
157	{ &vop_getattr_desc, union_getattr },		/* getattr */
158	{ &vop_setattr_desc, union_setattr },		/* setattr */
159	{ &vop_read_desc, union_read },			/* read */
160	{ &vop_write_desc, union_write },		/* write */
161	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
162	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
163	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
164	{ &vop_poll_desc, union_poll },			/* select */
165	{ &vop_revoke_desc, union_revoke },		/* revoke */
166	{ &vop_mmap_desc, union_mmap },			/* mmap */
167	{ &vop_fsync_desc, union_fsync },		/* fsync */
168	{ &vop_seek_desc, union_seek },			/* seek */
169	{ &vop_remove_desc, union_remove },		/* remove */
170	{ &vop_link_desc, union_link },			/* link */
171	{ &vop_rename_desc, union_rename },		/* rename */
172	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
173	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
174	{ &vop_symlink_desc, union_symlink },		/* symlink */
175	{ &vop_readdir_desc, union_readdir },		/* readdir */
176	{ &vop_readlink_desc, union_readlink },		/* readlink */
177	{ &vop_abortop_desc, union_abortop },		/* abortop */
178	{ &vop_inactive_desc, union_inactive },		/* inactive */
179	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
180	{ &vop_lock_desc, union_lock },			/* lock */
181	{ &vop_unlock_desc, union_unlock },		/* unlock */
182	{ &vop_bmap_desc, union_bmap },			/* bmap */
183	{ &vop_strategy_desc, union_strategy },		/* strategy */
184	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
185	{ &vop_print_desc, union_print },		/* print */
186	{ &vop_islocked_desc, union_islocked },		/* islocked */
187	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
188	{ &vop_advlock_desc, union_advlock },		/* advlock */
189	{ &vop_getpages_desc, union_getpages },		/* getpages */
190	{ &vop_putpages_desc, union_putpages },		/* putpages */
191	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
192	{ NULL, NULL }
193};
194const struct vnodeopv_desc union_vnodeop_opv_desc =
195	{ &union_vnodeop_p, union_vnodeop_entries };
196
197#define NODE_IS_SPECIAL(vp) \
198	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
199	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
200
201int
202union_parsepath(void *v)
203{
204	struct vop_parsepath_args /* {
205		struct vnode *a_dvp;
206		const char *a_name;
207		size_t *a_retval;
208	} */ *ap = v;
209	struct vnode *upperdvp, *lowerdvp;
210	size_t upper, lower;
211	int error;
212
213	upperdvp = UPPERVP(ap->a_dvp);
214	lowerdvp = LOWERVP(ap->a_dvp);
215
216	error = VOP_PARSEPATH(upperdvp, ap->a_name, &upper);
217	if (error) {
218		return error;
219	}
220
221	error = VOP_PARSEPATH(lowerdvp, ap->a_name, &lower);
222	if (error) {
223		return error;
224	}
225
226	/*
227	 * If they're different, use the larger one. This is not a
228	 * comprehensive solution, but it's sufficient for the
229	 * non-default cases of parsepath that currently exist.
230	 */
231	*ap->a_retval = MAX(upper, lower);
232	return 0;
233}
234
235static int
236union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
237	struct componentname *cnp)
238{
239	int error;
240	struct vnode *tdvp;
241	struct vnode *dvp;
242	struct mount *mp;
243
244	dvp = *dvpp;
245
246	/*
247	 * If stepping up the directory tree, check for going
248	 * back across the mount point, in which case do what
249	 * lookup would do by stepping back down the mount
250	 * hierarchy.
251	 */
252	if (cnp->cn_flags & ISDOTDOT) {
253		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
254			/*
255			 * Don't do the NOCROSSMOUNT check
256			 * at this level.  By definition,
257			 * union fs deals with namespaces, not
258			 * filesystems.
259			 */
260			tdvp = dvp;
261			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
262			VOP_UNLOCK(tdvp);
263			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
264		}
265	}
266
267        error = VOP_LOOKUP(dvp, &tdvp, cnp);
268	if (error)
269		return (error);
270	if (dvp != tdvp) {
271		if (cnp->cn_flags & ISDOTDOT)
272			VOP_UNLOCK(dvp);
273		error = vn_lock(tdvp, LK_EXCLUSIVE);
274		if (cnp->cn_flags & ISDOTDOT)
275			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
276		if (error) {
277			vrele(tdvp);
278			return error;
279		}
280		dvp = tdvp;
281	}
282
283	/*
284	 * Lastly check if the current node is a mount point in
285	 * which case walk up the mount hierarchy making sure not to
286	 * bump into the root of the mount tree (ie. dvp != udvp).
287	 */
288	while (dvp != udvp && (dvp->v_type == VDIR) &&
289	       (mp = dvp->v_mountedhere)) {
290		if (vfs_busy(mp))
291			continue;
292		vput(dvp);
293		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdvp);
294		vfs_unbusy(mp);
295		if (error) {
296			return (error);
297		}
298		dvp = tdvp;
299	}
300
301	*vpp = dvp;
302	return (0);
303}
304
305int
306union_lookup(void *v)
307{
308	struct vop_lookup_v2_args /* {
309		struct vnodeop_desc *a_desc;
310		struct vnode *a_dvp;
311		struct vnode **a_vpp;
312		struct componentname *a_cnp;
313	} */ *ap = v;
314	int error;
315	int uerror, lerror;
316	struct vnode *uppervp, *lowervp;
317	struct vnode *upperdvp, *lowerdvp;
318	struct vnode *dvp = ap->a_dvp;
319	struct union_node *dun = VTOUNION(dvp);
320	struct componentname *cnp = ap->a_cnp;
321	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
322	kauth_cred_t saved_cred = NULL;
323	int iswhiteout;
324	struct vattr va;
325
326#ifdef notyet
327	if (cnp->cn_namelen == 3 &&
328			cnp->cn_nameptr[2] == '.' &&
329			cnp->cn_nameptr[1] == '.' &&
330			cnp->cn_nameptr[0] == '.') {
331		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
332		if (dvp == NULLVP)
333			return (ENOENT);
334		vref(dvp);
335		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
336		return (0);
337	}
338#endif
339
340	if ((cnp->cn_flags & ISLASTCN) &&
341	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
342	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
343		return (EROFS);
344
345start:
346	upperdvp = dun->un_uppervp;
347	lowerdvp = dun->un_lowervp;
348	uppervp = NULLVP;
349	lowervp = NULLVP;
350	iswhiteout = 0;
351
352	/*
353	 * do the lookup in the upper level.
354	 * if that level comsumes additional pathnames,
355	 * then assume that something special is going
356	 * on and just return that vnode.
357	 */
358	if (upperdvp != NULLVP) {
359		uerror = union_lookup1(um->um_uppervp, &upperdvp,
360					&uppervp, cnp);
361		if (uerror == ENOENT || uerror == EJUSTRETURN) {
362			if (cnp->cn_flags & ISWHITEOUT) {
363				iswhiteout = 1;
364			} else if (lowerdvp != NULLVP) {
365				lerror = VOP_GETATTR(upperdvp, &va,
366					cnp->cn_cred);
367				if (lerror == 0 && (va.va_flags & OPAQUE))
368					iswhiteout = 1;
369			}
370		}
371	} else {
372		uerror = ENOENT;
373	}
374
375	/*
376	 * in a similar way to the upper layer, do the lookup
377	 * in the lower layer.   this time, if there is some
378	 * component magic going on, then vput whatever we got
379	 * back from the upper layer and return the lower vnode
380	 * instead.
381	 */
382	if (lowerdvp != NULLVP && !iswhiteout) {
383		int nameiop;
384
385		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
386
387		/*
388		 * Only do a LOOKUP on the bottom node, since
389		 * we won't be making changes to it anyway.
390		 */
391		nameiop = cnp->cn_nameiop;
392		cnp->cn_nameiop = LOOKUP;
393		if (um->um_op == UNMNT_BELOW) {
394			saved_cred = cnp->cn_cred;
395			cnp->cn_cred = um->um_cred;
396		}
397
398		/*
399		 * we shouldn't have to worry about locking interactions
400		 * between the lower layer and our union layer (w.r.t.
401		 * `..' processing) because we don't futz with lowervp
402		 * locks in the union-node instantiation code path.
403		 */
404		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
405				&lowervp, cnp);
406		if (um->um_op == UNMNT_BELOW)
407			cnp->cn_cred = saved_cred;
408		cnp->cn_nameiop = nameiop;
409
410		if (lowervp != lowerdvp)
411			VOP_UNLOCK(lowerdvp);
412	} else {
413		lerror = ENOENT;
414		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
415			lowervp = LOWERVP(dun->un_pvp);
416			if (lowervp != NULLVP) {
417				vref(lowervp);
418				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
419				lerror = 0;
420			}
421		}
422	}
423
424	/*
425	 * EJUSTRETURN is used by underlying filesystems to indicate that
426	 * a directory modification op was started successfully.
427	 * This will only happen in the upper layer, since
428	 * the lower layer only does LOOKUPs.
429	 * If this union is mounted read-only, bounce it now.
430	 */
431
432	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
433	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
434	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
435		uerror = EROFS;
436
437	/*
438	 * at this point, we have uerror and lerror indicating
439	 * possible errors with the lookups in the upper and lower
440	 * layers.  additionally, uppervp and lowervp are (locked)
441	 * references to existing vnodes in the upper and lower layers.
442	 *
443	 * there are now three cases to consider.
444	 * 1. if both layers returned an error, then return whatever
445	 *    error the upper layer generated.
446	 *
447	 * 2. if the top layer failed and the bottom layer succeeded
448	 *    then two subcases occur.
449	 *    a.  the bottom vnode is not a directory, in which
450	 *	  case just return a new union vnode referencing
451	 *	  an empty top layer and the existing bottom layer.
452	 *    b.  the bottom vnode is a directory, in which case
453	 *	  create a new directory in the top-level and
454	 *	  continue as in case 3.
455	 *
456	 * 3. if the top layer succeeded then return a new union
457	 *    vnode referencing whatever the new top layer and
458	 *    whatever the bottom layer returned.
459	 */
460
461	*ap->a_vpp = NULLVP;
462
463
464	/* case 1. */
465	if ((uerror != 0) && (lerror != 0)) {
466		return (uerror);
467	}
468
469	/* case 2. */
470	if (uerror != 0 /* && (lerror == 0) */ ) {
471		if (lowervp->v_type == VDIR) { /* case 2b. */
472			/*
473			 * We may be racing another process to make the
474			 * upper-level shadow directory.  Be careful with
475			 * locks/etc!
476			 * If we have to create a shadow directory and want
477			 * to commit the node we have to restart the lookup
478			 * to get the componentname right.
479			 */
480			if (upperdvp) {
481				VOP_UNLOCK(upperdvp);
482				uerror = union_mkshadow(um, upperdvp, cnp,
483				    &uppervp);
484				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
485				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
486					vrele(uppervp);
487					if (lowervp != NULLVP)
488						vput(lowervp);
489					goto start;
490				}
491			}
492			if (uerror) {
493				if (lowervp != NULLVP) {
494					vput(lowervp);
495					lowervp = NULLVP;
496				}
497				return (uerror);
498			}
499		}
500	} else { /* uerror == 0 */
501		if (uppervp != upperdvp)
502			VOP_UNLOCK(uppervp);
503	}
504
505	if (lowervp != NULLVP)
506		VOP_UNLOCK(lowervp);
507
508	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
509			      uppervp, lowervp, 1);
510
511	if (error) {
512		if (uppervp != NULLVP)
513			vrele(uppervp);
514		if (lowervp != NULLVP)
515			vrele(lowervp);
516		return error;
517	}
518
519	return 0;
520}
521
522int
523union_create(void *v)
524{
525	struct vop_create_v3_args /* {
526		struct vnode *a_dvp;
527		struct vnode **a_vpp;
528		struct componentname *a_cnp;
529		struct vattr *a_vap;
530	} */ *ap = v;
531	struct union_node *un = VTOUNION(ap->a_dvp);
532	struct vnode *dvp = un->un_uppervp;
533	struct componentname *cnp = ap->a_cnp;
534
535	if (dvp != NULLVP) {
536		int error;
537		struct vnode *vp;
538		struct mount *mp;
539
540		mp = ap->a_dvp->v_mount;
541
542		vp = NULL;
543		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
544		if (error)
545			return (error);
546
547		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
548				NULLVP, 1);
549		if (error)
550			vrele(vp);
551		return (error);
552	}
553
554	return (EROFS);
555}
556
557int
558union_whiteout(void *v)
559{
560	struct vop_whiteout_args /* {
561		struct vnode *a_dvp;
562		struct componentname *a_cnp;
563		int a_flags;
564	} */ *ap = v;
565	struct union_node *un = VTOUNION(ap->a_dvp);
566	struct componentname *cnp = ap->a_cnp;
567
568	if (un->un_uppervp == NULLVP)
569		return (EOPNOTSUPP);
570
571	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
572}
573
574int
575union_mknod(void *v)
576{
577	struct vop_mknod_v3_args /* {
578		struct vnode *a_dvp;
579		struct vnode **a_vpp;
580		struct componentname *a_cnp;
581		struct vattr *a_vap;
582	} */ *ap = v;
583	struct union_node *un = VTOUNION(ap->a_dvp);
584	struct vnode *dvp = un->un_uppervp;
585	struct componentname *cnp = ap->a_cnp;
586
587	if (dvp != NULLVP) {
588		int error;
589		struct vnode *vp;
590		struct mount *mp;
591
592		mp = ap->a_dvp->v_mount;
593		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
594		if (error)
595			return (error);
596
597		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
598				      cnp, vp, NULLVP, 1);
599		if (error)
600			vrele(vp);
601		return (error);
602	}
603
604	return (EROFS);
605}
606
607int
608union_open(void *v)
609{
610	struct vop_open_args /* {
611		struct vnodeop_desc *a_desc;
612		struct vnode *a_vp;
613		int a_mode;
614		kauth_cred_t a_cred;
615	} */ *ap = v;
616	struct union_node *un = VTOUNION(ap->a_vp);
617	struct vnode *tvp;
618	int mode = ap->a_mode;
619	kauth_cred_t cred = ap->a_cred;
620	struct lwp *l = curlwp;
621	int error;
622
623	/*
624	 * If there is an existing upper vp then simply open that.
625	 */
626	tvp = un->un_uppervp;
627	if (tvp == NULLVP) {
628		/*
629		 * If the lower vnode is being opened for writing, then
630		 * copy the file contents to the upper vnode and open that,
631		 * otherwise can simply open the lower vnode.
632		 */
633		tvp = un->un_lowervp;
634		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
635			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
636			if (error == 0)
637				error = VOP_OPEN(un->un_uppervp, mode, cred);
638			if (error == 0) {
639				mutex_enter(un->un_uppervp->v_interlock);
640				un->un_uppervp->v_writecount++;
641				mutex_exit(un->un_uppervp->v_interlock);
642			}
643			return (error);
644		}
645
646		/*
647		 * Just open the lower vnode, but check for nodev mount flag
648		 */
649		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
650		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
651			return ENXIO;
652		un->un_openl++;
653		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
654		error = VOP_OPEN(tvp, mode, cred);
655		VOP_UNLOCK(tvp);
656
657		return (error);
658	}
659	/*
660	 * Just open the upper vnode, checking for nodev mount flag first
661	 */
662	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
663	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
664		return ENXIO;
665
666	error = VOP_OPEN(tvp, mode, cred);
667	if (error == 0 && (ap->a_mode & FWRITE)) {
668		mutex_enter(tvp->v_interlock);
669		tvp->v_writecount++;
670		mutex_exit(tvp->v_interlock);
671	}
672
673	return (error);
674}
675
676int
677union_close(void *v)
678{
679	struct vop_close_args /* {
680		struct vnode *a_vp;
681		int  a_fflag;
682		kauth_cred_t a_cred;
683	} */ *ap = v;
684	struct union_node *un = VTOUNION(ap->a_vp);
685	struct vnode *vp;
686	int error;
687	bool do_lock;
688
689	vp = un->un_uppervp;
690	if (vp != NULLVP) {
691		do_lock = false;
692	} else {
693		KASSERT(un->un_openl > 0);
694		--un->un_openl;
695		vp = un->un_lowervp;
696		do_lock = true;
697	}
698
699	KASSERT(vp != NULLVP);
700	ap->a_vp = vp;
701	if ((ap->a_fflag & FWRITE)) {
702		KASSERT(vp == un->un_uppervp);
703		mutex_enter(vp->v_interlock);
704		vp->v_writecount--;
705		mutex_exit(vp->v_interlock);
706	}
707	if (do_lock)
708		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
709	error = VCALL(vp, VOFFSET(vop_close), ap);
710	if (do_lock)
711		VOP_UNLOCK(vp);
712
713	return error;
714}
715
716/*
717 * Check access permission on the union vnode.
718 * The access check being enforced is to check
719 * against both the underlying vnode, and any
720 * copied vnode.  This ensures that no additional
721 * file permissions are given away simply because
722 * the user caused an implicit file copy.
723 */
724int
725union_access(void *v)
726{
727	struct vop_access_args /* {
728		struct vnodeop_desc *a_desc;
729		struct vnode *a_vp;
730		accmode_t a_accmode;
731		kauth_cred_t a_cred;
732	} */ *ap = v;
733	struct vnode *vp = ap->a_vp;
734	struct union_node *un = VTOUNION(vp);
735	int error = EACCES;
736	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
737
738	/*
739	 * Disallow write attempts on read-only file systems;
740	 * unless the file is a socket, fifo, or a block or
741	 * character device resident on the file system.
742	 */
743	if (ap->a_accmode & VWRITE) {
744		switch (vp->v_type) {
745		case VDIR:
746		case VLNK:
747		case VREG:
748			if (vp->v_mount->mnt_flag & MNT_RDONLY)
749				return (EROFS);
750			break;
751		case VBAD:
752		case VBLK:
753		case VCHR:
754		case VSOCK:
755		case VFIFO:
756		case VNON:
757		default:
758			break;
759		}
760	}
761
762
763	if ((vp = un->un_uppervp) != NULLVP) {
764		ap->a_vp = vp;
765		return (VCALL(vp, VOFFSET(vop_access), ap));
766	}
767
768	if ((vp = un->un_lowervp) != NULLVP) {
769		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
770		ap->a_vp = vp;
771		error = VCALL(vp, VOFFSET(vop_access), ap);
772		if (error == 0) {
773			if (um->um_op == UNMNT_BELOW) {
774				ap->a_cred = um->um_cred;
775				error = VCALL(vp, VOFFSET(vop_access), ap);
776			}
777		}
778		VOP_UNLOCK(vp);
779		if (error)
780			return (error);
781	}
782
783	return (error);
784}
785
786/*
787 * We handle getattr only to change the fsid and
788 * track object sizes
789 */
790int
791union_getattr(void *v)
792{
793	struct vop_getattr_args /* {
794		struct vnode *a_vp;
795		struct vattr *a_vap;
796		kauth_cred_t a_cred;
797	} */ *ap = v;
798	int error;
799	struct union_node *un = VTOUNION(ap->a_vp);
800	struct vnode *vp = un->un_uppervp;
801	struct vattr *vap;
802	struct vattr va;
803
804
805	/*
806	 * Some programs walk the filesystem hierarchy by counting
807	 * links to directories to avoid stat'ing all the time.
808	 * This means the link count on directories needs to be "correct".
809	 * The only way to do that is to call getattr on both layers
810	 * and fix up the link count.  The link count will not necessarily
811	 * be accurate but will be large enough to defeat the tree walkers.
812	 *
813	 * To make life more interesting, some filesystems don't keep
814	 * track of link counts in the expected way, and return a
815	 * link count of `1' for those directories; if either of the
816	 * component directories returns a link count of `1', we return a 1.
817	 */
818
819	vap = ap->a_vap;
820
821	vp = un->un_uppervp;
822	if (vp != NULLVP) {
823		error = VOP_GETATTR(vp, vap, ap->a_cred);
824		if (error)
825			return (error);
826		mutex_enter(&un->un_lock);
827		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
828	}
829
830	if (vp == NULLVP) {
831		vp = un->un_lowervp;
832	} else if (vp->v_type == VDIR) {
833		vp = un->un_lowervp;
834		if (vp != NULLVP)
835			vap = &va;
836	} else {
837		vp = NULLVP;
838	}
839
840	if (vp != NULLVP) {
841		if (vp == un->un_lowervp)
842			vn_lock(vp, LK_SHARED | LK_RETRY);
843		error = VOP_GETATTR(vp, vap, ap->a_cred);
844		if (vp == un->un_lowervp)
845			VOP_UNLOCK(vp);
846		if (error)
847			return (error);
848		mutex_enter(&un->un_lock);
849		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
850	}
851
852	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
853		/*
854		 * Link count manipulation:
855		 *	- If both return "2", return 2 (no subdirs)
856		 *	- If one or the other return "1", return "1" (ENOCLUE)
857		 */
858		if ((ap->a_vap->va_nlink == 2) &&
859		    (vap->va_nlink == 2))
860			;
861		else if (ap->a_vap->va_nlink != 1) {
862			if (vap->va_nlink == 1)
863				ap->a_vap->va_nlink = 1;
864			else
865				ap->a_vap->va_nlink += vap->va_nlink;
866		}
867	}
868	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
869	return (0);
870}
871
872int
873union_setattr(void *v)
874{
875	struct vop_setattr_args /* {
876		struct vnode *a_vp;
877		struct vattr *a_vap;
878		kauth_cred_t a_cred;
879	} */ *ap = v;
880	struct vattr *vap = ap->a_vap;
881	struct vnode *vp = ap->a_vp;
882	struct union_node *un = VTOUNION(vp);
883	bool size_only;		/* All but va_size are VNOVAL. */
884	int error;
885
886	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
887	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
888	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
889
890	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
891		return (EROFS);
892	if (vap->va_size != VNOVAL) {
893 		switch (vp->v_type) {
894 		case VDIR:
895 			return (EISDIR);
896 		case VCHR:
897 		case VBLK:
898 		case VSOCK:
899 		case VFIFO:
900			break;
901		case VREG:
902		case VLNK:
903 		default:
904			/*
905			 * Disallow write attempts if the filesystem is
906			 * mounted read-only.
907			 */
908			if (vp->v_mount->mnt_flag & MNT_RDONLY)
909				return (EROFS);
910		}
911	}
912
913	/*
914	 * Handle case of truncating lower object to zero size,
915	 * by creating a zero length upper object.  This is to
916	 * handle the case of open with O_TRUNC and O_CREAT.
917	 */
918	if ((un->un_uppervp == NULLVP) &&
919	    /* assert(un->un_lowervp != NULLVP) */
920	    (un->un_lowervp->v_type == VREG)) {
921		error = union_copyup(un, (vap->va_size != 0),
922						ap->a_cred, curlwp);
923		if (error)
924			return (error);
925	}
926
927	/*
928	 * Try to set attributes in upper layer, ignore size change to zero
929	 * for devices to handle O_TRUNC and return read-only filesystem error
930	 * otherwise.
931	 */
932	if (un->un_uppervp != NULLVP) {
933		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
934		if ((error == 0) && (vap->va_size != VNOVAL)) {
935			mutex_enter(&un->un_lock);
936			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
937		}
938	} else {
939		KASSERT(un->un_lowervp != NULLVP);
940		if (NODE_IS_SPECIAL(un->un_lowervp)) {
941			if (size_only &&
942			    (vap->va_size == 0 || vap->va_size == VNOVAL))
943				error = 0;
944			else
945				error = EROFS;
946		} else {
947			error = EROFS;
948		}
949	}
950
951	return (error);
952}
953
954int
955union_read(void *v)
956{
957	struct vop_read_args /* {
958		struct vnode *a_vp;
959		struct uio *a_uio;
960		int  a_ioflag;
961		kauth_cred_t a_cred;
962	} */ *ap = v;
963	int error;
964	struct vnode *vp = OTHERVP(ap->a_vp);
965	int dolock = (vp == LOWERVP(ap->a_vp));
966
967	if (dolock)
968		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
969	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
970	if (dolock)
971		VOP_UNLOCK(vp);
972
973	/*
974	 * XXX
975	 * perhaps the size of the underlying object has changed under
976	 * our feet.  take advantage of the offset information present
977	 * in the uio structure.
978	 */
979	if (error == 0) {
980		struct union_node *un = VTOUNION(ap->a_vp);
981		off_t cur = ap->a_uio->uio_offset;
982		off_t usz = VNOVAL, lsz = VNOVAL;
983
984		mutex_enter(&un->un_lock);
985		if (vp == un->un_uppervp) {
986			if (cur > un->un_uppersz)
987				usz = cur;
988		} else {
989			if (cur > un->un_lowersz)
990				lsz = cur;
991		}
992
993		if (usz != VNOVAL || lsz != VNOVAL)
994			union_newsize(ap->a_vp, usz, lsz);
995		else
996			mutex_exit(&un->un_lock);
997	}
998
999	return (error);
1000}
1001
1002int
1003union_write(void *v)
1004{
1005	struct vop_read_args /* {
1006		struct vnode *a_vp;
1007		struct uio *a_uio;
1008		int  a_ioflag;
1009		kauth_cred_t a_cred;
1010	} */ *ap = v;
1011	int error;
1012	struct vnode *vp;
1013	struct union_node *un = VTOUNION(ap->a_vp);
1014
1015	vp = UPPERVP(ap->a_vp);
1016	if (vp == NULLVP) {
1017		vp = LOWERVP(ap->a_vp);
1018		if (NODE_IS_SPECIAL(vp)) {
1019			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1020			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1021			    ap->a_cred);
1022			VOP_UNLOCK(vp);
1023			return error;
1024		}
1025		panic("union: missing upper layer in write");
1026	}
1027
1028	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1029
1030	/*
1031	 * the size of the underlying object may be changed by the
1032	 * write.
1033	 */
1034	if (error == 0) {
1035		off_t cur = ap->a_uio->uio_offset;
1036
1037		mutex_enter(&un->un_lock);
1038		if (cur > un->un_uppersz)
1039			union_newsize(ap->a_vp, cur, VNOVAL);
1040		else
1041			mutex_exit(&un->un_lock);
1042	}
1043
1044	return (error);
1045}
1046
1047int
1048union_ioctl(void *v)
1049{
1050	struct vop_ioctl_args /* {
1051		struct vnode *a_vp;
1052		int  a_command;
1053		void *a_data;
1054		int  a_fflag;
1055		kauth_cred_t a_cred;
1056	} */ *ap = v;
1057	struct vnode *ovp = OTHERVP(ap->a_vp);
1058
1059	ap->a_vp = ovp;
1060	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1061}
1062
1063int
1064union_poll(void *v)
1065{
1066	struct vop_poll_args /* {
1067		struct vnode *a_vp;
1068		int a_events;
1069	} */ *ap = v;
1070	struct vnode *ovp = OTHERVP(ap->a_vp);
1071
1072	ap->a_vp = ovp;
1073	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1074}
1075
1076int
1077union_revoke(void *v)
1078{
1079	struct vop_revoke_args /* {
1080		struct vnode *a_vp;
1081		int a_flags;
1082		struct proc *a_p;
1083	} */ *ap = v;
1084	struct vnode *vp = ap->a_vp;
1085
1086	if (UPPERVP(vp))
1087		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1088	if (LOWERVP(vp))
1089		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1090	vgone(vp);	/* XXXAD?? */
1091	return (0);
1092}
1093
1094int
1095union_mmap(void *v)
1096{
1097	struct vop_mmap_args /* {
1098		struct vnode *a_vp;
1099		vm_prot_t a_prot;
1100		kauth_cred_t a_cred;
1101	} */ *ap = v;
1102	struct vnode *ovp = OTHERVP(ap->a_vp);
1103
1104	ap->a_vp = ovp;
1105	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1106}
1107
1108int
1109union_fsync(void *v)
1110{
1111	struct vop_fsync_args /* {
1112		struct vnode *a_vp;
1113		kauth_cred_t a_cred;
1114		int  a_flags;
1115		off_t offhi;
1116		off_t offlo;
1117	} */ *ap = v;
1118	int error = 0;
1119	struct vnode *targetvp;
1120
1121	/*
1122	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1123	 * bother syncing the underlying vnodes, since (a) they'll be
1124	 * fsync'ed when reclaimed and (b) we could deadlock if
1125	 * they're locked; otherwise, pass it through to the
1126	 * underlying layer.
1127	 */
1128	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1129		error = spec_fsync(v);
1130		if (error)
1131			return error;
1132	}
1133
1134	if (ap->a_flags & FSYNC_RECLAIM)
1135		return 0;
1136
1137	targetvp = OTHERVP(ap->a_vp);
1138	if (targetvp != NULLVP) {
1139		int dolock = (targetvp == LOWERVP(ap->a_vp));
1140
1141		if (dolock)
1142			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1143		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1144			    ap->a_offlo, ap->a_offhi);
1145		if (dolock)
1146			VOP_UNLOCK(targetvp);
1147	}
1148
1149	return (error);
1150}
1151
1152int
1153union_seek(void *v)
1154{
1155	struct vop_seek_args /* {
1156		struct vnode *a_vp;
1157		off_t  a_oldoff;
1158		off_t  a_newoff;
1159		kauth_cred_t a_cred;
1160	} */ *ap = v;
1161	struct vnode *ovp = OTHERVP(ap->a_vp);
1162
1163	ap->a_vp = ovp;
1164	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1165}
1166
1167int
1168union_remove(void *v)
1169{
1170	struct vop_remove_v2_args /* {
1171		struct vnode *a_dvp;
1172		struct vnode *a_vp;
1173		struct componentname *a_cnp;
1174	} */ *ap = v;
1175	int error;
1176	struct union_node *dun = VTOUNION(ap->a_dvp);
1177	struct union_node *un = VTOUNION(ap->a_vp);
1178	struct componentname *cnp = ap->a_cnp;
1179
1180	if (dun->un_uppervp == NULLVP)
1181		panic("union remove: null upper vnode");
1182
1183	if (un->un_uppervp != NULLVP) {
1184		struct vnode *dvp = dun->un_uppervp;
1185		struct vnode *vp = un->un_uppervp;
1186
1187		/* Account for VOP_REMOVE to vrele vp.  */
1188		vref(vp);
1189		if (union_dowhiteout(un, cnp->cn_cred))
1190			cnp->cn_flags |= DOWHITEOUT;
1191		error = VOP_REMOVE(dvp, vp, cnp);
1192		if (!error)
1193			union_removed_upper(un);
1194		vrele(ap->a_vp);
1195	} else {
1196		error = union_mkwhiteout(
1197			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1198			dun->un_uppervp, ap->a_cnp, un);
1199		vput(ap->a_vp);
1200	}
1201
1202	return (error);
1203}
1204
1205int
1206union_link(void *v)
1207{
1208	struct vop_link_v2_args /* {
1209		struct vnode *a_dvp;
1210		struct vnode *a_vp;
1211		struct componentname *a_cnp;
1212	} */ *ap = v;
1213	int error = 0;
1214	struct componentname *cnp = ap->a_cnp;
1215	struct union_node *dun;
1216	struct vnode *vp;
1217	struct vnode *dvp;
1218
1219	dun = VTOUNION(ap->a_dvp);
1220
1221	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1222
1223	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1224		vp = ap->a_vp;
1225	} else {
1226		struct union_node *un = VTOUNION(ap->a_vp);
1227		if (un->un_uppervp == NULLVP) {
1228			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1229
1230			/*
1231			 * Needs to be copied before we can link it.
1232			 */
1233			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1234			if (droplock)
1235				VOP_UNLOCK(dun->un_uppervp);
1236			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1237			if (droplock) {
1238				vn_lock(dun->un_uppervp,
1239				    LK_EXCLUSIVE | LK_RETRY);
1240				/*
1241				 * During copyup, we dropped the lock on the
1242				 * dir and invalidated any saved namei lookup
1243				 * state for the directory we'll be entering
1244				 * the link in.  We need to re-run the lookup
1245				 * in that directory to reset any state needed
1246				 * for VOP_LINK.
1247				 * Call relookup on the union-layer to reset
1248				 * the state.
1249				 */
1250				vp  = NULLVP;
1251				if (dun->un_uppervp == NULLVP)
1252					 panic("union: null upperdvp?");
1253				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1254				if (error) {
1255					VOP_UNLOCK(ap->a_vp);
1256					return EROFS;	/* ? */
1257				}
1258				if (vp != NULLVP) {
1259					/*
1260					 * The name we want to create has
1261					 * mysteriously appeared (a race?)
1262					 */
1263					error = EEXIST;
1264					VOP_UNLOCK(ap->a_vp);
1265					vput(vp);
1266					return (error);
1267				}
1268			}
1269			VOP_UNLOCK(ap->a_vp);
1270		}
1271		vp = un->un_uppervp;
1272	}
1273
1274	dvp = dun->un_uppervp;
1275	if (dvp == NULLVP)
1276		error = EROFS;
1277
1278	if (error)
1279		return (error);
1280
1281	return VOP_LINK(dvp, vp, cnp);
1282}
1283
1284int
1285union_rename(void *v)
1286{
1287	struct vop_rename_args  /* {
1288		struct vnode *a_fdvp;
1289		struct vnode *a_fvp;
1290		struct componentname *a_fcnp;
1291		struct vnode *a_tdvp;
1292		struct vnode *a_tvp;
1293		struct componentname *a_tcnp;
1294	} */ *ap = v;
1295	int error;
1296
1297	struct vnode *fdvp = ap->a_fdvp;
1298	struct vnode *fvp = ap->a_fvp;
1299	struct vnode *tdvp = ap->a_tdvp;
1300	struct vnode *tvp = ap->a_tvp;
1301
1302	/*
1303	 * Account for VOP_RENAME to vrele all nodes.
1304	 * Note: VOP_RENAME will unlock tdvp.
1305	 */
1306
1307	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1308		struct union_node *un = VTOUNION(fdvp);
1309		if (un->un_uppervp == NULLVP) {
1310			/*
1311			 * this should never happen in normal
1312			 * operation but might if there was
1313			 * a problem creating the top-level shadow
1314			 * directory.
1315			 */
1316			error = EXDEV;
1317			goto bad;
1318		}
1319
1320		fdvp = un->un_uppervp;
1321		vref(fdvp);
1322	}
1323
1324	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1325		struct union_node *un = VTOUNION(fvp);
1326		if (un->un_uppervp == NULLVP) {
1327			/* XXX: should do a copyup */
1328			error = EXDEV;
1329			goto bad;
1330		}
1331
1332		if (un->un_lowervp != NULLVP)
1333			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1334
1335		fvp = un->un_uppervp;
1336		vref(fvp);
1337	}
1338
1339	if (tdvp->v_op == union_vnodeop_p) {
1340		struct union_node *un = VTOUNION(tdvp);
1341		if (un->un_uppervp == NULLVP) {
1342			/*
1343			 * this should never happen in normal
1344			 * operation but might if there was
1345			 * a problem creating the top-level shadow
1346			 * directory.
1347			 */
1348			error = EXDEV;
1349			goto bad;
1350		}
1351
1352		tdvp = un->un_uppervp;
1353		vref(tdvp);
1354	}
1355
1356	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1357		struct union_node *un = VTOUNION(tvp);
1358
1359		tvp = un->un_uppervp;
1360		if (tvp != NULLVP) {
1361			vref(tvp);
1362		}
1363	}
1364
1365	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1366	goto out;
1367
1368bad:
1369	vput(tdvp);
1370	if (tvp != NULLVP)
1371		vput(tvp);
1372	vrele(fdvp);
1373	vrele(fvp);
1374
1375out:
1376	if (fdvp != ap->a_fdvp) {
1377		vrele(ap->a_fdvp);
1378	}
1379	if (fvp != ap->a_fvp) {
1380		vrele(ap->a_fvp);
1381	}
1382	if (tdvp != ap->a_tdvp) {
1383		vrele(ap->a_tdvp);
1384	}
1385	if (tvp != ap->a_tvp) {
1386		vrele(ap->a_tvp);
1387	}
1388	return (error);
1389}
1390
1391int
1392union_mkdir(void *v)
1393{
1394	struct vop_mkdir_v3_args /* {
1395		struct vnode *a_dvp;
1396		struct vnode **a_vpp;
1397		struct componentname *a_cnp;
1398		struct vattr *a_vap;
1399	} */ *ap = v;
1400	struct union_node *un = VTOUNION(ap->a_dvp);
1401	struct vnode *dvp = un->un_uppervp;
1402	struct componentname *cnp = ap->a_cnp;
1403
1404	if (dvp != NULLVP) {
1405		int error;
1406		struct vnode *vp;
1407
1408		vp = NULL;
1409		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1410		if (error) {
1411			vrele(ap->a_dvp);
1412			return (error);
1413		}
1414
1415		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1416				NULLVP, cnp, vp, NULLVP, 1);
1417		if (error)
1418			vrele(vp);
1419		return (error);
1420	}
1421
1422	return (EROFS);
1423}
1424
1425int
1426union_rmdir(void *v)
1427{
1428	struct vop_rmdir_v2_args /* {
1429		struct vnode *a_dvp;
1430		struct vnode *a_vp;
1431		struct componentname *a_cnp;
1432	} */ *ap = v;
1433	int error;
1434	struct union_node *dun = VTOUNION(ap->a_dvp);
1435	struct union_node *un = VTOUNION(ap->a_vp);
1436	struct componentname *cnp = ap->a_cnp;
1437
1438	if (dun->un_uppervp == NULLVP)
1439		panic("union rmdir: null upper vnode");
1440
1441	error = union_check_rmdir(un, cnp->cn_cred);
1442	if (error) {
1443		vput(ap->a_vp);
1444		return error;
1445	}
1446
1447	if (un->un_uppervp != NULLVP) {
1448		struct vnode *dvp = dun->un_uppervp;
1449		struct vnode *vp = un->un_uppervp;
1450
1451		/* Account for VOP_RMDIR to vrele vp.  */
1452		vref(vp);
1453		if (union_dowhiteout(un, cnp->cn_cred))
1454			cnp->cn_flags |= DOWHITEOUT;
1455		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1456		if (!error)
1457			union_removed_upper(un);
1458		vrele(ap->a_vp);
1459	} else {
1460		error = union_mkwhiteout(
1461			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1462			dun->un_uppervp, ap->a_cnp, un);
1463		vput(ap->a_vp);
1464	}
1465
1466	return (error);
1467}
1468
1469int
1470union_symlink(void *v)
1471{
1472	struct vop_symlink_v3_args /* {
1473		struct vnode *a_dvp;
1474		struct vnode **a_vpp;
1475		struct componentname *a_cnp;
1476		struct vattr *a_vap;
1477		char *a_target;
1478	} */ *ap = v;
1479	struct union_node *un = VTOUNION(ap->a_dvp);
1480	struct vnode *dvp = un->un_uppervp;
1481	struct componentname *cnp = ap->a_cnp;
1482
1483	if (dvp != NULLVP) {
1484		int error;
1485
1486		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1487				    ap->a_target);
1488		return (error);
1489	}
1490
1491	return (EROFS);
1492}
1493
1494/*
1495 * union_readdir works in concert with getdirentries and
1496 * readdir(3) to provide a list of entries in the unioned
1497 * directories.  getdirentries is responsible for walking
1498 * down the union stack.  readdir(3) is responsible for
1499 * eliminating duplicate names from the returned data stream.
1500 */
1501int
1502union_readdir(void *v)
1503{
1504	struct vop_readdir_args /* {
1505		struct vnodeop_desc *a_desc;
1506		struct vnode *a_vp;
1507		struct uio *a_uio;
1508		kauth_cred_t a_cred;
1509		int *a_eofflag;
1510		u_long *a_cookies;
1511		int a_ncookies;
1512	} */ *ap = v;
1513	struct union_node *un = VTOUNION(ap->a_vp);
1514	struct vnode *vp;
1515	int dolock, error;
1516
1517	if (un->un_hooknode) {
1518		KASSERT(un->un_uppervp == NULLVP);
1519		KASSERT(un->un_lowervp != NULLVP);
1520		vp = un->un_lowervp;
1521		dolock = 1;
1522	} else {
1523		vp = un->un_uppervp;
1524		dolock = 0;
1525	}
1526	if (vp == NULLVP)
1527		return 0;
1528
1529	if (dolock)
1530		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1531	ap->a_vp = vp;
1532	error = VCALL(vp, VOFFSET(vop_readdir), ap);
1533	if (dolock)
1534		VOP_UNLOCK(vp);
1535
1536	return error;
1537}
1538
1539int
1540union_readlink(void *v)
1541{
1542	struct vop_readlink_args /* {
1543		struct vnode *a_vp;
1544		struct uio *a_uio;
1545		kauth_cred_t a_cred;
1546	} */ *ap = v;
1547	int error;
1548	struct vnode *vp = OTHERVP(ap->a_vp);
1549	int dolock = (vp == LOWERVP(ap->a_vp));
1550
1551	if (dolock)
1552		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1553	ap->a_vp = vp;
1554	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1555	if (dolock)
1556		VOP_UNLOCK(vp);
1557
1558	return (error);
1559}
1560
1561int
1562union_abortop(void *v)
1563{
1564	struct vop_abortop_args /* {
1565		struct vnode *a_dvp;
1566		struct componentname *a_cnp;
1567	} */ *ap = v;
1568
1569	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1570
1571	ap->a_dvp = UPPERVP(ap->a_dvp);
1572	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1573}
1574
1575int
1576union_inactive(void *v)
1577{
1578	struct vop_inactive_v2_args /* {
1579		const struct vnodeop_desc *a_desc;
1580		struct vnode *a_vp;
1581		bool *a_recycle;
1582	} */ *ap = v;
1583	struct vnode *vp = ap->a_vp;
1584	struct union_node *un = VTOUNION(vp);
1585	struct vnode **vpp;
1586
1587	/*
1588	 * Do nothing (and _don't_ bypass).
1589	 * Wait to vrele lowervp until reclaim,
1590	 * so that until then our union_node is in the
1591	 * cache and reusable.
1592	 *
1593	 * NEEDSWORK: Someday, consider inactive'ing
1594	 * the lowervp and then trying to reactivate it
1595	 * with capabilities (v_id)
1596	 * like they do in the name lookup cache code.
1597	 * That's too much work for now.
1598	 */
1599
1600	if (un->un_dircache != 0) {
1601		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1602			vrele(*vpp);
1603		free(un->un_dircache, M_TEMP);
1604		un->un_dircache = 0;
1605	}
1606
1607	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1608
1609	return (0);
1610}
1611
1612int
1613union_reclaim(void *v)
1614{
1615	struct vop_reclaim_v2_args /* {
1616		struct vnode *a_vp;
1617	} */ *ap = v;
1618	struct vnode *vp = ap->a_vp;
1619	struct vnode *uvp = UPPERVP(vp);
1620
1621	VOP_UNLOCK(vp);
1622
1623	if (uvp != NULL) {
1624		mutex_enter(uvp->v_interlock);
1625		KASSERT(vp->v_interlock == uvp->v_interlock);
1626		uvp->v_writecount -= vp->v_writecount;
1627		mutex_exit(uvp->v_interlock);
1628	}
1629
1630	union_freevp(vp);
1631
1632	return (0);
1633}
1634
1635static int
1636union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1637{
1638	struct vop_lock_args ap;
1639
1640	ap.a_desc = VDESC(vop_lock);
1641	ap.a_vp = lockvp;
1642	ap.a_flags = flags;
1643
1644	if (lockvp == vp)
1645		return genfs_lock(&ap);
1646	else
1647		return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1648}
1649
1650static int
1651union_unlock1(struct vnode *vp, struct vnode *lockvp)
1652{
1653	struct vop_unlock_args ap;
1654
1655	ap.a_desc = VDESC(vop_unlock);
1656	ap.a_vp = lockvp;
1657
1658	if (lockvp == vp)
1659		return genfs_unlock(&ap);
1660	else
1661		return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1662}
1663
1664int
1665union_lock(void *v)
1666{
1667	struct vop_lock_args /* {
1668		struct vnode *a_vp;
1669		int a_flags;
1670	} */ *ap = v;
1671	struct vnode *vp = ap->a_vp, *lockvp;
1672	struct union_node *un = VTOUNION(vp);
1673	int flags = ap->a_flags;
1674	int error;
1675
1676	if ((flags & LK_NOWAIT) != 0) {
1677		if (!mutex_tryenter(&un->un_lock))
1678			return EBUSY;
1679		lockvp = LOCKVP(vp);
1680		error = union_lock1(vp, lockvp, flags);
1681		mutex_exit(&un->un_lock);
1682		if (error)
1683			return error;
1684		if (mutex_tryenter(vp->v_interlock)) {
1685			error = vdead_check(vp, VDEAD_NOWAIT);
1686			mutex_exit(vp->v_interlock);
1687		} else
1688			error = EBUSY;
1689		if (error)
1690			union_unlock1(vp, lockvp);
1691		return error;
1692	}
1693
1694	mutex_enter(&un->un_lock);
1695	for (;;) {
1696		lockvp = LOCKVP(vp);
1697		mutex_exit(&un->un_lock);
1698		error = union_lock1(vp, lockvp, flags);
1699		if (error != 0)
1700			return error;
1701		mutex_enter(&un->un_lock);
1702		if (lockvp == LOCKVP(vp))
1703			break;
1704		union_unlock1(vp, lockvp);
1705	}
1706	mutex_exit(&un->un_lock);
1707
1708	mutex_enter(vp->v_interlock);
1709	error = vdead_check(vp, VDEAD_NOWAIT);
1710	if (error) {
1711		union_unlock1(vp, lockvp);
1712		error = vdead_check(vp, 0);
1713		KASSERT(error == ENOENT);
1714	}
1715	mutex_exit(vp->v_interlock);
1716	return error;
1717}
1718
1719int
1720union_unlock(void *v)
1721{
1722	struct vop_unlock_args /* {
1723		struct vnode *a_vp;
1724		int a_flags;
1725	} */ *ap = v;
1726	struct vnode *vp = ap->a_vp, *lockvp;
1727
1728	lockvp = LOCKVP(vp);
1729	union_unlock1(vp, lockvp);
1730
1731	return 0;
1732}
1733
1734int
1735union_bmap(void *v)
1736{
1737	struct vop_bmap_args /* {
1738		struct vnode *a_vp;
1739		daddr_t  a_bn;
1740		struct vnode **a_vpp;
1741		daddr_t *a_bnp;
1742		int *a_runp;
1743	} */ *ap = v;
1744	int error;
1745	struct vnode *vp = OTHERVP(ap->a_vp);
1746	int dolock = (vp == LOWERVP(ap->a_vp));
1747
1748	if (dolock)
1749		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1750	ap->a_vp = vp;
1751	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1752	if (dolock)
1753		VOP_UNLOCK(vp);
1754
1755	return (error);
1756}
1757
1758int
1759union_print(void *v)
1760{
1761	struct vop_print_args /* {
1762		struct vnode *a_vp;
1763	} */ *ap = v;
1764	struct vnode *vp = ap->a_vp;
1765
1766	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1767			vp, UPPERVP(vp), LOWERVP(vp));
1768	if (UPPERVP(vp) != NULLVP)
1769		vprint("union: upper", UPPERVP(vp));
1770	if (LOWERVP(vp) != NULLVP)
1771		vprint("union: lower", LOWERVP(vp));
1772	if (VTOUNION(vp)->un_dircache) {
1773		struct vnode **vpp;
1774		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1775			vprint("dircache:", *vpp);
1776	}
1777
1778	return (0);
1779}
1780
1781int
1782union_islocked(void *v)
1783{
1784	struct vop_islocked_args /* {
1785		struct vnode *a_vp;
1786	} */ *ap = v;
1787	struct vnode *vp;
1788	struct union_node *un;
1789
1790	un = VTOUNION(ap->a_vp);
1791	mutex_enter(&un->un_lock);
1792	vp = LOCKVP(ap->a_vp);
1793	mutex_exit(&un->un_lock);
1794
1795	if (vp == ap->a_vp)
1796		return genfs_islocked(ap);
1797	else
1798		return VOP_ISLOCKED(vp);
1799}
1800
1801int
1802union_pathconf(void *v)
1803{
1804	struct vop_pathconf_args /* {
1805		struct vnode *a_vp;
1806		int a_name;
1807		int *a_retval;
1808	} */ *ap = v;
1809	int error;
1810	struct vnode *vp = OTHERVP(ap->a_vp);
1811	int dolock = (vp == LOWERVP(ap->a_vp));
1812
1813	if (dolock)
1814		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1815	ap->a_vp = vp;
1816	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1817	if (dolock)
1818		VOP_UNLOCK(vp);
1819
1820	return (error);
1821}
1822
1823int
1824union_advlock(void *v)
1825{
1826	struct vop_advlock_args /* {
1827		struct vnode *a_vp;
1828		void *a_id;
1829		int  a_op;
1830		struct flock *a_fl;
1831		int  a_flags;
1832	} */ *ap = v;
1833	struct vnode *ovp = OTHERVP(ap->a_vp);
1834
1835	ap->a_vp = ovp;
1836	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1837}
1838
1839int
1840union_strategy(void *v)
1841{
1842	struct vop_strategy_args /* {
1843		struct vnode *a_vp;
1844		struct buf *a_bp;
1845	} */ *ap = v;
1846	struct vnode *ovp = OTHERVP(ap->a_vp);
1847	struct buf *bp = ap->a_bp;
1848
1849	KASSERT(ovp != NULLVP);
1850	if (!NODE_IS_SPECIAL(ovp))
1851		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1852
1853	return (VOP_STRATEGY(ovp, bp));
1854}
1855
1856int
1857union_bwrite(void *v)
1858{
1859	struct vop_bwrite_args /* {
1860		struct vnode *a_vp;
1861		struct buf *a_bp;
1862	} */ *ap = v;
1863	struct vnode *ovp = OTHERVP(ap->a_vp);
1864	struct buf *bp = ap->a_bp;
1865
1866	KASSERT(ovp != NULLVP);
1867	if (!NODE_IS_SPECIAL(ovp))
1868		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1869
1870	return (VOP_BWRITE(ovp, bp));
1871}
1872
1873int
1874union_getpages(void *v)
1875{
1876	struct vop_getpages_args /* {
1877		struct vnode *a_vp;
1878		voff_t a_offset;
1879		struct vm_page **a_m;
1880		int *a_count;
1881		int a_centeridx;
1882		vm_prot_t a_access_type;
1883		int a_advice;
1884		int a_flags;
1885	} */ *ap = v;
1886	struct vnode *vp = ap->a_vp;
1887
1888	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1889
1890	if (ap->a_flags & PGO_LOCKED) {
1891		return EBUSY;
1892	}
1893	ap->a_vp = OTHERVP(vp);
1894	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1895
1896	/* Just pass the request on to the underlying layer. */
1897	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1898}
1899
1900int
1901union_putpages(void *v)
1902{
1903	struct vop_putpages_args /* {
1904		struct vnode *a_vp;
1905		voff_t a_offlo;
1906		voff_t a_offhi;
1907		int a_flags;
1908	} */ *ap = v;
1909	struct vnode *vp = ap->a_vp;
1910
1911	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1912
1913	ap->a_vp = OTHERVP(vp);
1914	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1915
1916	if (ap->a_flags & PGO_RECLAIM) {
1917		rw_exit(vp->v_uobj.vmobjlock);
1918		return 0;
1919	}
1920
1921	/* Just pass the request on to the underlying layer. */
1922	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1923}
1924
1925int
1926union_kqfilter(void *v)
1927{
1928	struct vop_kqfilter_args /* {
1929		struct vnode	*a_vp;
1930		struct knote	*a_kn;
1931	} */ *ap = v;
1932	int error;
1933
1934	/*
1935	 * We watch either the upper layer file (if it already exists),
1936	 * or the lower layer one. If there is lower layer file only
1937	 * at this moment, we will keep watching that lower layer file
1938	 * even if upper layer file would be created later on.
1939	 */
1940	if (UPPERVP(ap->a_vp))
1941		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1942	else if (LOWERVP(ap->a_vp))
1943		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1944	else {
1945		/* panic? */
1946		error = EOPNOTSUPP;
1947	}
1948
1949	return (error);
1950}
1951