1/*	$NetBSD: union_vnops.c,v 1.83 2022/03/19 13:48:04 hannken Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.83 2022/03/19 13:48:04 hannken Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_parsepath(void *);
97int union_lookup(void *);
98int union_create(void *);
99int union_whiteout(void *);
100int union_mknod(void *);
101int union_open(void *);
102int union_close(void *);
103int union_access(void *);
104int union_getattr(void *);
105int union_setattr(void *);
106int union_read(void *);
107int union_write(void *);
108int union_ioctl(void *);
109int union_poll(void *);
110int union_revoke(void *);
111int union_mmap(void *);
112int union_fsync(void *);
113int union_seek(void *);
114int union_remove(void *);
115int union_link(void *);
116int union_rename(void *);
117int union_mkdir(void *);
118int union_rmdir(void *);
119int union_symlink(void *);
120int union_readdir(void *);
121int union_readlink(void *);
122int union_abortop(void *);
123int union_inactive(void *);
124int union_reclaim(void *);
125int union_lock(void *);
126int union_unlock(void *);
127int union_bmap(void *);
128int union_print(void *);
129int union_islocked(void *);
130int union_pathconf(void *);
131int union_advlock(void *);
132int union_strategy(void *);
133int union_bwrite(void *);
134int union_getpages(void *);
135int union_putpages(void *);
136int union_kqfilter(void *);
137
138static int union_lookup1(struct vnode *, struct vnode **,
139			      struct vnode **, struct componentname *);
140
141
142/*
143 * Global vfs data structures
144 */
145int (**union_vnodeop_p)(void *);
146const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147	{ &vop_default_desc, vn_default_error },
148	{ &vop_parsepath_desc, union_parsepath },	/* parsepath */
149	{ &vop_lookup_desc, union_lookup },		/* lookup */
150	{ &vop_create_desc, union_create },		/* create */
151	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
152	{ &vop_mknod_desc, union_mknod },		/* mknod */
153	{ &vop_open_desc, union_open },			/* open */
154	{ &vop_close_desc, union_close },		/* close */
155	{ &vop_access_desc, union_access },		/* access */
156	{ &vop_accessx_desc, genfs_accessx },		/* accessx */
157	{ &vop_getattr_desc, union_getattr },		/* getattr */
158	{ &vop_setattr_desc, union_setattr },		/* setattr */
159	{ &vop_read_desc, union_read },			/* read */
160	{ &vop_write_desc, union_write },		/* write */
161	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
162	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
163	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
164	{ &vop_poll_desc, union_poll },			/* select */
165	{ &vop_revoke_desc, union_revoke },		/* revoke */
166	{ &vop_mmap_desc, union_mmap },			/* mmap */
167	{ &vop_fsync_desc, union_fsync },		/* fsync */
168	{ &vop_seek_desc, union_seek },			/* seek */
169	{ &vop_remove_desc, union_remove },		/* remove */
170	{ &vop_link_desc, union_link },			/* link */
171	{ &vop_rename_desc, union_rename },		/* rename */
172	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
173	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
174	{ &vop_symlink_desc, union_symlink },		/* symlink */
175	{ &vop_readdir_desc, union_readdir },		/* readdir */
176	{ &vop_readlink_desc, union_readlink },		/* readlink */
177	{ &vop_abortop_desc, union_abortop },		/* abortop */
178	{ &vop_inactive_desc, union_inactive },		/* inactive */
179	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
180	{ &vop_lock_desc, union_lock },			/* lock */
181	{ &vop_unlock_desc, union_unlock },		/* unlock */
182	{ &vop_bmap_desc, union_bmap },			/* bmap */
183	{ &vop_strategy_desc, union_strategy },		/* strategy */
184	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
185	{ &vop_print_desc, union_print },		/* print */
186	{ &vop_islocked_desc, union_islocked },		/* islocked */
187	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
188	{ &vop_advlock_desc, union_advlock },		/* advlock */
189	{ &vop_getpages_desc, union_getpages },		/* getpages */
190	{ &vop_putpages_desc, union_putpages },		/* putpages */
191	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
192	{ NULL, NULL }
193};
194const struct vnodeopv_desc union_vnodeop_opv_desc =
195	{ &union_vnodeop_p, union_vnodeop_entries };
196
197#define NODE_IS_SPECIAL(vp) \
198	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
199	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
200
201int
202union_parsepath(void *v)
203{
204	struct vop_parsepath_args /* {
205		struct vnode *a_dvp;
206		const char *a_name;
207		size_t *a_retval;
208	} */ *ap = v;
209	struct vnode *upperdvp, *lowerdvp;
210	size_t upper, lower;
211	int error;
212
213	upperdvp = UPPERVP(ap->a_dvp);
214	lowerdvp = LOWERVP(ap->a_dvp);
215
216	if (upperdvp != NULLVP) {
217		error = VOP_PARSEPATH(upperdvp, ap->a_name, &upper);
218		if (error) {
219			return error;
220		}
221	} else {
222		upper = 0;
223	}
224
225	if (lowerdvp != NULLVP) {
226		error = VOP_PARSEPATH(lowerdvp, ap->a_name, &lower);
227		if (error) {
228			return error;
229		}
230	} else {
231		lower = 0;
232	}
233
234	if (upper == 0 && lower == 0) {
235		panic("%s: missing both layers", __func__);
236	}
237
238	/*
239	 * If they're different, use the larger one. This is not a
240	 * comprehensive solution, but it's sufficient for the
241	 * non-default cases of parsepath that currently exist.
242	 */
243	*ap->a_retval = MAX(upper, lower);
244	return 0;
245}
246
247static int
248union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
249	struct componentname *cnp)
250{
251	int error;
252	struct vnode *tdvp;
253	struct vnode *dvp;
254	struct mount *mp;
255
256	dvp = *dvpp;
257
258	/*
259	 * If stepping up the directory tree, check for going
260	 * back across the mount point, in which case do what
261	 * lookup would do by stepping back down the mount
262	 * hierarchy.
263	 */
264	if (cnp->cn_flags & ISDOTDOT) {
265		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
266			/*
267			 * Don't do the NOCROSSMOUNT check
268			 * at this level.  By definition,
269			 * union fs deals with namespaces, not
270			 * filesystems.
271			 */
272			tdvp = dvp;
273			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
274			VOP_UNLOCK(tdvp);
275			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
276		}
277	}
278
279        error = VOP_LOOKUP(dvp, &tdvp, cnp);
280	if (error)
281		return (error);
282	if (dvp != tdvp) {
283		if (cnp->cn_flags & ISDOTDOT)
284			VOP_UNLOCK(dvp);
285		error = vn_lock(tdvp, LK_EXCLUSIVE);
286		if (cnp->cn_flags & ISDOTDOT)
287			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
288		if (error) {
289			vrele(tdvp);
290			return error;
291		}
292		dvp = tdvp;
293	}
294
295	/*
296	 * Lastly check if the current node is a mount point in
297	 * which case walk up the mount hierarchy making sure not to
298	 * bump into the root of the mount tree (ie. dvp != udvp).
299	 */
300	while (dvp != udvp && (dvp->v_type == VDIR) &&
301	       (mp = dvp->v_mountedhere)) {
302		if (vfs_busy(mp))
303			continue;
304		vput(dvp);
305		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdvp);
306		vfs_unbusy(mp);
307		if (error) {
308			return (error);
309		}
310		dvp = tdvp;
311	}
312
313	*vpp = dvp;
314	return (0);
315}
316
317int
318union_lookup(void *v)
319{
320	struct vop_lookup_v2_args /* {
321		struct vnodeop_desc *a_desc;
322		struct vnode *a_dvp;
323		struct vnode **a_vpp;
324		struct componentname *a_cnp;
325	} */ *ap = v;
326	int error;
327	int uerror, lerror;
328	struct vnode *uppervp, *lowervp;
329	struct vnode *upperdvp, *lowerdvp;
330	struct vnode *dvp = ap->a_dvp;
331	struct union_node *dun = VTOUNION(dvp);
332	struct componentname *cnp = ap->a_cnp;
333	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
334	kauth_cred_t saved_cred = NULL;
335	int iswhiteout;
336	struct vattr va;
337
338#ifdef notyet
339	if (cnp->cn_namelen == 3 &&
340			cnp->cn_nameptr[2] == '.' &&
341			cnp->cn_nameptr[1] == '.' &&
342			cnp->cn_nameptr[0] == '.') {
343		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
344		if (dvp == NULLVP)
345			return (ENOENT);
346		vref(dvp);
347		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
348		return (0);
349	}
350#endif
351
352	if ((cnp->cn_flags & ISLASTCN) &&
353	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
354	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
355		return (EROFS);
356
357start:
358	upperdvp = dun->un_uppervp;
359	lowerdvp = dun->un_lowervp;
360	uppervp = NULLVP;
361	lowervp = NULLVP;
362	iswhiteout = 0;
363
364	/*
365	 * do the lookup in the upper level.
366	 * if that level comsumes additional pathnames,
367	 * then assume that something special is going
368	 * on and just return that vnode.
369	 */
370	if (upperdvp != NULLVP) {
371		uerror = union_lookup1(um->um_uppervp, &upperdvp,
372					&uppervp, cnp);
373		if (uerror == ENOENT || uerror == EJUSTRETURN) {
374			if (cnp->cn_flags & ISWHITEOUT) {
375				iswhiteout = 1;
376			} else if (lowerdvp != NULLVP) {
377				lerror = VOP_GETATTR(upperdvp, &va,
378					cnp->cn_cred);
379				if (lerror == 0 && (va.va_flags & OPAQUE))
380					iswhiteout = 1;
381			}
382		}
383	} else {
384		uerror = ENOENT;
385	}
386
387	/*
388	 * in a similar way to the upper layer, do the lookup
389	 * in the lower layer.   this time, if there is some
390	 * component magic going on, then vput whatever we got
391	 * back from the upper layer and return the lower vnode
392	 * instead.
393	 */
394	if (lowerdvp != NULLVP && !iswhiteout) {
395		int nameiop;
396
397		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
398
399		/*
400		 * Only do a LOOKUP on the bottom node, since
401		 * we won't be making changes to it anyway.
402		 */
403		nameiop = cnp->cn_nameiop;
404		cnp->cn_nameiop = LOOKUP;
405		if (um->um_op == UNMNT_BELOW) {
406			saved_cred = cnp->cn_cred;
407			cnp->cn_cred = um->um_cred;
408		}
409
410		/*
411		 * we shouldn't have to worry about locking interactions
412		 * between the lower layer and our union layer (w.r.t.
413		 * `..' processing) because we don't futz with lowervp
414		 * locks in the union-node instantiation code path.
415		 */
416		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
417				&lowervp, cnp);
418		if (um->um_op == UNMNT_BELOW)
419			cnp->cn_cred = saved_cred;
420		cnp->cn_nameiop = nameiop;
421
422		if (lowervp != lowerdvp)
423			VOP_UNLOCK(lowerdvp);
424	} else {
425		lerror = ENOENT;
426		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
427			lowervp = LOWERVP(dun->un_pvp);
428			if (lowervp != NULLVP) {
429				vref(lowervp);
430				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
431				lerror = 0;
432			}
433		}
434	}
435
436	/*
437	 * EJUSTRETURN is used by underlying filesystems to indicate that
438	 * a directory modification op was started successfully.
439	 * This will only happen in the upper layer, since
440	 * the lower layer only does LOOKUPs.
441	 * If this union is mounted read-only, bounce it now.
442	 */
443
444	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
445	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
446	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
447		uerror = EROFS;
448
449	/*
450	 * at this point, we have uerror and lerror indicating
451	 * possible errors with the lookups in the upper and lower
452	 * layers.  additionally, uppervp and lowervp are (locked)
453	 * references to existing vnodes in the upper and lower layers.
454	 *
455	 * there are now three cases to consider.
456	 * 1. if both layers returned an error, then return whatever
457	 *    error the upper layer generated.
458	 *
459	 * 2. if the top layer failed and the bottom layer succeeded
460	 *    then two subcases occur.
461	 *    a.  the bottom vnode is not a directory, in which
462	 *	  case just return a new union vnode referencing
463	 *	  an empty top layer and the existing bottom layer.
464	 *    b.  the bottom vnode is a directory, in which case
465	 *	  create a new directory in the top-level and
466	 *	  continue as in case 3.
467	 *
468	 * 3. if the top layer succeeded then return a new union
469	 *    vnode referencing whatever the new top layer and
470	 *    whatever the bottom layer returned.
471	 */
472
473	*ap->a_vpp = NULLVP;
474
475
476	/* case 1. */
477	if ((uerror != 0) && (lerror != 0)) {
478		return (uerror);
479	}
480
481	/* case 2. */
482	if (uerror != 0 /* && (lerror == 0) */ ) {
483		if (lowervp->v_type == VDIR) { /* case 2b. */
484			/*
485			 * We may be racing another process to make the
486			 * upper-level shadow directory.  Be careful with
487			 * locks/etc!
488			 * If we have to create a shadow directory and want
489			 * to commit the node we have to restart the lookup
490			 * to get the componentname right.
491			 */
492			if (upperdvp) {
493				VOP_UNLOCK(upperdvp);
494				uerror = union_mkshadow(um, upperdvp, cnp,
495				    &uppervp);
496				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
497				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
498					vrele(uppervp);
499					if (lowervp != NULLVP)
500						vput(lowervp);
501					goto start;
502				}
503			}
504			if (uerror) {
505				if (lowervp != NULLVP) {
506					vput(lowervp);
507					lowervp = NULLVP;
508				}
509				return (uerror);
510			}
511		}
512	} else { /* uerror == 0 */
513		if (uppervp != upperdvp)
514			VOP_UNLOCK(uppervp);
515	}
516
517	if (lowervp != NULLVP)
518		VOP_UNLOCK(lowervp);
519
520	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
521			      uppervp, lowervp, 1);
522
523	if (error) {
524		if (uppervp != NULLVP)
525			vrele(uppervp);
526		if (lowervp != NULLVP)
527			vrele(lowervp);
528		return error;
529	}
530
531	return 0;
532}
533
534int
535union_create(void *v)
536{
537	struct vop_create_v3_args /* {
538		struct vnode *a_dvp;
539		struct vnode **a_vpp;
540		struct componentname *a_cnp;
541		struct vattr *a_vap;
542	} */ *ap = v;
543	struct union_node *un = VTOUNION(ap->a_dvp);
544	struct vnode *dvp = un->un_uppervp;
545	struct componentname *cnp = ap->a_cnp;
546
547	if (dvp != NULLVP) {
548		int error;
549		struct vnode *vp;
550		struct mount *mp;
551
552		mp = ap->a_dvp->v_mount;
553
554		vp = NULL;
555		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
556		if (error)
557			return (error);
558
559		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
560				NULLVP, 1);
561		if (error)
562			vrele(vp);
563		return (error);
564	}
565
566	return (EROFS);
567}
568
569int
570union_whiteout(void *v)
571{
572	struct vop_whiteout_args /* {
573		struct vnode *a_dvp;
574		struct componentname *a_cnp;
575		int a_flags;
576	} */ *ap = v;
577	struct union_node *un = VTOUNION(ap->a_dvp);
578	struct componentname *cnp = ap->a_cnp;
579
580	if (un->un_uppervp == NULLVP)
581		return (EOPNOTSUPP);
582
583	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
584}
585
586int
587union_mknod(void *v)
588{
589	struct vop_mknod_v3_args /* {
590		struct vnode *a_dvp;
591		struct vnode **a_vpp;
592		struct componentname *a_cnp;
593		struct vattr *a_vap;
594	} */ *ap = v;
595	struct union_node *un = VTOUNION(ap->a_dvp);
596	struct vnode *dvp = un->un_uppervp;
597	struct componentname *cnp = ap->a_cnp;
598
599	if (dvp != NULLVP) {
600		int error;
601		struct vnode *vp;
602		struct mount *mp;
603
604		mp = ap->a_dvp->v_mount;
605		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
606		if (error)
607			return (error);
608
609		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
610				      cnp, vp, NULLVP, 1);
611		if (error)
612			vrele(vp);
613		return (error);
614	}
615
616	return (EROFS);
617}
618
619int
620union_open(void *v)
621{
622	struct vop_open_args /* {
623		struct vnodeop_desc *a_desc;
624		struct vnode *a_vp;
625		int a_mode;
626		kauth_cred_t a_cred;
627	} */ *ap = v;
628	struct union_node *un = VTOUNION(ap->a_vp);
629	struct vnode *tvp;
630	int mode = ap->a_mode;
631	kauth_cred_t cred = ap->a_cred;
632	struct lwp *l = curlwp;
633	int error;
634
635	/*
636	 * If there is an existing upper vp then simply open that.
637	 */
638	tvp = un->un_uppervp;
639	if (tvp == NULLVP) {
640		/*
641		 * If the lower vnode is being opened for writing, then
642		 * copy the file contents to the upper vnode and open that,
643		 * otherwise can simply open the lower vnode.
644		 */
645		tvp = un->un_lowervp;
646		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
647			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
648			if (error == 0)
649				error = VOP_OPEN(un->un_uppervp, mode, cred);
650			if (error == 0) {
651				mutex_enter(un->un_uppervp->v_interlock);
652				un->un_uppervp->v_writecount++;
653				mutex_exit(un->un_uppervp->v_interlock);
654			}
655			return (error);
656		}
657
658		/*
659		 * Just open the lower vnode, but check for nodev mount flag
660		 */
661		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
662		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
663			return ENXIO;
664		un->un_openl++;
665		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
666		error = VOP_OPEN(tvp, mode, cred);
667		VOP_UNLOCK(tvp);
668
669		return (error);
670	}
671	/*
672	 * Just open the upper vnode, checking for nodev mount flag first
673	 */
674	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
675	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
676		return ENXIO;
677
678	error = VOP_OPEN(tvp, mode, cred);
679	if (error == 0 && (ap->a_mode & FWRITE)) {
680		mutex_enter(tvp->v_interlock);
681		tvp->v_writecount++;
682		mutex_exit(tvp->v_interlock);
683	}
684
685	return (error);
686}
687
688int
689union_close(void *v)
690{
691	struct vop_close_args /* {
692		struct vnode *a_vp;
693		int  a_fflag;
694		kauth_cred_t a_cred;
695	} */ *ap = v;
696	struct union_node *un = VTOUNION(ap->a_vp);
697	struct vnode *vp;
698	int error;
699	bool do_lock;
700
701	vp = un->un_uppervp;
702	if (vp != NULLVP) {
703		do_lock = false;
704	} else {
705		KASSERT(un->un_openl > 0);
706		--un->un_openl;
707		vp = un->un_lowervp;
708		do_lock = true;
709	}
710
711	KASSERT(vp != NULLVP);
712	ap->a_vp = vp;
713	if ((ap->a_fflag & FWRITE)) {
714		KASSERT(vp == un->un_uppervp);
715		mutex_enter(vp->v_interlock);
716		vp->v_writecount--;
717		mutex_exit(vp->v_interlock);
718	}
719	if (do_lock)
720		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
721	error = VCALL(vp, VOFFSET(vop_close), ap);
722	if (do_lock)
723		VOP_UNLOCK(vp);
724
725	return error;
726}
727
728/*
729 * Check access permission on the union vnode.
730 * The access check being enforced is to check
731 * against both the underlying vnode, and any
732 * copied vnode.  This ensures that no additional
733 * file permissions are given away simply because
734 * the user caused an implicit file copy.
735 */
736int
737union_access(void *v)
738{
739	struct vop_access_args /* {
740		struct vnodeop_desc *a_desc;
741		struct vnode *a_vp;
742		accmode_t a_accmode;
743		kauth_cred_t a_cred;
744	} */ *ap = v;
745	struct vnode *vp = ap->a_vp;
746	struct union_node *un = VTOUNION(vp);
747	int error = EACCES;
748	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
749
750	/*
751	 * Disallow write attempts on read-only file systems;
752	 * unless the file is a socket, fifo, or a block or
753	 * character device resident on the file system.
754	 */
755	if (ap->a_accmode & VWRITE) {
756		switch (vp->v_type) {
757		case VDIR:
758		case VLNK:
759		case VREG:
760			if (vp->v_mount->mnt_flag & MNT_RDONLY)
761				return (EROFS);
762			break;
763		case VBAD:
764		case VBLK:
765		case VCHR:
766		case VSOCK:
767		case VFIFO:
768		case VNON:
769		default:
770			break;
771		}
772	}
773
774	/*
775	 * Copy up to prevent checking (and failing) against
776	 * underlying file system mounted read only.
777	 * Check for read access first to prevent implicit
778	 * copy of inaccessible underlying vnode.
779	 */
780	if (un->un_uppervp == NULLVP &&
781	    (un->un_lowervp->v_type == VREG) &&
782	    (ap->a_accmode & VWRITE)) {
783		vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY);
784		error = VOP_ACCESS(un->un_lowervp, VREAD, ap->a_cred);
785		VOP_UNLOCK(un->un_lowervp);
786		if (error == 0)
787			error = union_copyup(un, 1, ap->a_cred, curlwp);
788		if (error)
789			return error;
790	}
791
792	if ((vp = un->un_uppervp) != NULLVP) {
793		ap->a_vp = vp;
794		return (VCALL(vp, VOFFSET(vop_access), ap));
795	}
796
797	if ((vp = un->un_lowervp) != NULLVP) {
798		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
799		ap->a_vp = vp;
800		error = VCALL(vp, VOFFSET(vop_access), ap);
801		if (error == 0) {
802			if (um->um_op == UNMNT_BELOW) {
803				ap->a_cred = um->um_cred;
804				error = VCALL(vp, VOFFSET(vop_access), ap);
805			}
806		}
807		VOP_UNLOCK(vp);
808		if (error)
809			return (error);
810	}
811
812	return (error);
813}
814
815/*
816 * We handle getattr only to change the fsid and
817 * track object sizes
818 */
819int
820union_getattr(void *v)
821{
822	struct vop_getattr_args /* {
823		struct vnode *a_vp;
824		struct vattr *a_vap;
825		kauth_cred_t a_cred;
826	} */ *ap = v;
827	int error;
828	struct union_node *un = VTOUNION(ap->a_vp);
829	struct vnode *vp = un->un_uppervp;
830	struct vattr *vap;
831	struct vattr va;
832
833
834	/*
835	 * Some programs walk the filesystem hierarchy by counting
836	 * links to directories to avoid stat'ing all the time.
837	 * This means the link count on directories needs to be "correct".
838	 * The only way to do that is to call getattr on both layers
839	 * and fix up the link count.  The link count will not necessarily
840	 * be accurate but will be large enough to defeat the tree walkers.
841	 *
842	 * To make life more interesting, some filesystems don't keep
843	 * track of link counts in the expected way, and return a
844	 * link count of `1' for those directories; if either of the
845	 * component directories returns a link count of `1', we return a 1.
846	 */
847
848	vap = ap->a_vap;
849
850	vp = un->un_uppervp;
851	if (vp != NULLVP) {
852		error = VOP_GETATTR(vp, vap, ap->a_cred);
853		if (error)
854			return (error);
855		mutex_enter(&un->un_lock);
856		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
857	}
858
859	if (vp == NULLVP) {
860		vp = un->un_lowervp;
861	} else if (vp->v_type == VDIR) {
862		vp = un->un_lowervp;
863		if (vp != NULLVP)
864			vap = &va;
865	} else {
866		vp = NULLVP;
867	}
868
869	if (vp != NULLVP) {
870		if (vp == un->un_lowervp)
871			vn_lock(vp, LK_SHARED | LK_RETRY);
872		error = VOP_GETATTR(vp, vap, ap->a_cred);
873		if (vp == un->un_lowervp)
874			VOP_UNLOCK(vp);
875		if (error)
876			return (error);
877		mutex_enter(&un->un_lock);
878		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
879	}
880
881	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
882		/*
883		 * Link count manipulation:
884		 *	- If both return "2", return 2 (no subdirs)
885		 *	- If one or the other return "1", return "1" (ENOCLUE)
886		 */
887		if ((ap->a_vap->va_nlink == 2) &&
888		    (vap->va_nlink == 2))
889			;
890		else if (ap->a_vap->va_nlink != 1) {
891			if (vap->va_nlink == 1)
892				ap->a_vap->va_nlink = 1;
893			else
894				ap->a_vap->va_nlink += vap->va_nlink;
895		}
896	}
897	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
898	return (0);
899}
900
901int
902union_setattr(void *v)
903{
904	struct vop_setattr_args /* {
905		struct vnode *a_vp;
906		struct vattr *a_vap;
907		kauth_cred_t a_cred;
908	} */ *ap = v;
909	struct vattr *vap = ap->a_vap;
910	struct vnode *vp = ap->a_vp;
911	struct union_node *un = VTOUNION(vp);
912	bool size_only;		/* All but va_size are VNOVAL. */
913	int error;
914
915	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
916	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
917	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
918
919	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
920		return (EROFS);
921	if (vap->va_size != VNOVAL) {
922 		switch (vp->v_type) {
923 		case VDIR:
924 			return (EISDIR);
925 		case VCHR:
926 		case VBLK:
927 		case VSOCK:
928 		case VFIFO:
929			break;
930		case VREG:
931		case VLNK:
932 		default:
933			/*
934			 * Disallow write attempts if the filesystem is
935			 * mounted read-only.
936			 */
937			if (vp->v_mount->mnt_flag & MNT_RDONLY)
938				return (EROFS);
939		}
940	}
941
942	/*
943	 * Handle case of truncating lower object to zero size,
944	 * by creating a zero length upper object.  This is to
945	 * handle the case of open with O_TRUNC and O_CREAT.
946	 */
947	if ((un->un_uppervp == NULLVP) &&
948	    /* assert(un->un_lowervp != NULLVP) */
949	    (un->un_lowervp->v_type == VREG)) {
950		error = union_copyup(un, (vap->va_size != 0),
951						ap->a_cred, curlwp);
952		if (error)
953			return (error);
954	}
955
956	/*
957	 * Try to set attributes in upper layer, ignore size change to zero
958	 * for devices to handle O_TRUNC and return read-only filesystem error
959	 * otherwise.
960	 */
961	if (un->un_uppervp != NULLVP) {
962		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
963		if ((error == 0) && (vap->va_size != VNOVAL)) {
964			mutex_enter(&un->un_lock);
965			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
966		}
967	} else {
968		KASSERT(un->un_lowervp != NULLVP);
969		if (NODE_IS_SPECIAL(un->un_lowervp)) {
970			if (size_only &&
971			    (vap->va_size == 0 || vap->va_size == VNOVAL))
972				error = 0;
973			else
974				error = EROFS;
975		} else {
976			error = EROFS;
977		}
978	}
979
980	return (error);
981}
982
983int
984union_read(void *v)
985{
986	struct vop_read_args /* {
987		struct vnode *a_vp;
988		struct uio *a_uio;
989		int  a_ioflag;
990		kauth_cred_t a_cred;
991	} */ *ap = v;
992	int error;
993	struct vnode *vp = OTHERVP(ap->a_vp);
994	int dolock = (vp == LOWERVP(ap->a_vp));
995
996	if (dolock)
997		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
998	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
999	if (dolock)
1000		VOP_UNLOCK(vp);
1001
1002	/*
1003	 * XXX
1004	 * perhaps the size of the underlying object has changed under
1005	 * our feet.  take advantage of the offset information present
1006	 * in the uio structure.
1007	 */
1008	if (error == 0) {
1009		struct union_node *un = VTOUNION(ap->a_vp);
1010		off_t cur = ap->a_uio->uio_offset;
1011		off_t usz = VNOVAL, lsz = VNOVAL;
1012
1013		mutex_enter(&un->un_lock);
1014		if (vp == un->un_uppervp) {
1015			if (cur > un->un_uppersz)
1016				usz = cur;
1017		} else {
1018			if (cur > un->un_lowersz)
1019				lsz = cur;
1020		}
1021
1022		if (usz != VNOVAL || lsz != VNOVAL)
1023			union_newsize(ap->a_vp, usz, lsz);
1024		else
1025			mutex_exit(&un->un_lock);
1026	}
1027
1028	return (error);
1029}
1030
1031int
1032union_write(void *v)
1033{
1034	struct vop_read_args /* {
1035		struct vnode *a_vp;
1036		struct uio *a_uio;
1037		int  a_ioflag;
1038		kauth_cred_t a_cred;
1039	} */ *ap = v;
1040	int error;
1041	struct vnode *vp;
1042	struct union_node *un = VTOUNION(ap->a_vp);
1043
1044	vp = UPPERVP(ap->a_vp);
1045	if (vp == NULLVP) {
1046		vp = LOWERVP(ap->a_vp);
1047		if (NODE_IS_SPECIAL(vp)) {
1048			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1049			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1050			    ap->a_cred);
1051			VOP_UNLOCK(vp);
1052			return error;
1053		}
1054		panic("union: missing upper layer in write");
1055	}
1056
1057	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1058
1059	/*
1060	 * the size of the underlying object may be changed by the
1061	 * write.
1062	 */
1063	if (error == 0) {
1064		off_t cur = ap->a_uio->uio_offset;
1065
1066		mutex_enter(&un->un_lock);
1067		if (cur > un->un_uppersz)
1068			union_newsize(ap->a_vp, cur, VNOVAL);
1069		else
1070			mutex_exit(&un->un_lock);
1071	}
1072
1073	return (error);
1074}
1075
1076int
1077union_ioctl(void *v)
1078{
1079	struct vop_ioctl_args /* {
1080		struct vnode *a_vp;
1081		int  a_command;
1082		void *a_data;
1083		int  a_fflag;
1084		kauth_cred_t a_cred;
1085	} */ *ap = v;
1086	struct vnode *ovp = OTHERVP(ap->a_vp);
1087
1088	ap->a_vp = ovp;
1089	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1090}
1091
1092int
1093union_poll(void *v)
1094{
1095	struct vop_poll_args /* {
1096		struct vnode *a_vp;
1097		int a_events;
1098	} */ *ap = v;
1099	struct vnode *ovp = OTHERVP(ap->a_vp);
1100
1101	ap->a_vp = ovp;
1102	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1103}
1104
1105int
1106union_revoke(void *v)
1107{
1108	struct vop_revoke_args /* {
1109		struct vnode *a_vp;
1110		int a_flags;
1111		struct proc *a_p;
1112	} */ *ap = v;
1113	struct vnode *vp = ap->a_vp;
1114
1115	if (UPPERVP(vp))
1116		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1117	if (LOWERVP(vp))
1118		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1119	vgone(vp);	/* XXXAD?? */
1120	return (0);
1121}
1122
1123int
1124union_mmap(void *v)
1125{
1126	struct vop_mmap_args /* {
1127		struct vnode *a_vp;
1128		vm_prot_t a_prot;
1129		kauth_cred_t a_cred;
1130	} */ *ap = v;
1131	struct vnode *ovp = OTHERVP(ap->a_vp);
1132
1133	ap->a_vp = ovp;
1134	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1135}
1136
1137int
1138union_fsync(void *v)
1139{
1140	struct vop_fsync_args /* {
1141		struct vnode *a_vp;
1142		kauth_cred_t a_cred;
1143		int  a_flags;
1144		off_t offhi;
1145		off_t offlo;
1146	} */ *ap = v;
1147	int error = 0;
1148	struct vnode *targetvp;
1149
1150	/*
1151	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1152	 * bother syncing the underlying vnodes, since (a) they'll be
1153	 * fsync'ed when reclaimed and (b) we could deadlock if
1154	 * they're locked; otherwise, pass it through to the
1155	 * underlying layer.
1156	 */
1157	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1158		error = spec_fsync(v);
1159		if (error)
1160			return error;
1161	}
1162
1163	if (ap->a_flags & FSYNC_RECLAIM)
1164		return 0;
1165
1166	targetvp = OTHERVP(ap->a_vp);
1167	if (targetvp != NULLVP) {
1168		int dolock = (targetvp == LOWERVP(ap->a_vp));
1169
1170		if (dolock)
1171			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1172		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1173			    ap->a_offlo, ap->a_offhi);
1174		if (dolock)
1175			VOP_UNLOCK(targetvp);
1176	}
1177
1178	return (error);
1179}
1180
1181int
1182union_seek(void *v)
1183{
1184	struct vop_seek_args /* {
1185		struct vnode *a_vp;
1186		off_t  a_oldoff;
1187		off_t  a_newoff;
1188		kauth_cred_t a_cred;
1189	} */ *ap = v;
1190	struct vnode *ovp = OTHERVP(ap->a_vp);
1191
1192	ap->a_vp = ovp;
1193	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1194}
1195
1196int
1197union_remove(void *v)
1198{
1199	struct vop_remove_v3_args /* {
1200		struct vnode *a_dvp;
1201		struct vnode *a_vp;
1202		struct componentname *a_cnp;
1203		nlink_t ctx_vp_new_nlink;
1204	} */ *ap = v;
1205	int error;
1206	struct union_node *dun = VTOUNION(ap->a_dvp);
1207	struct union_node *un = VTOUNION(ap->a_vp);
1208	struct componentname *cnp = ap->a_cnp;
1209
1210	if (dun->un_uppervp == NULLVP)
1211		panic("union remove: null upper vnode");
1212
1213	if (un->un_uppervp != NULLVP) {
1214		struct vnode *dvp = dun->un_uppervp;
1215		struct vnode *vp = un->un_uppervp;
1216
1217		/* Account for VOP_REMOVE to vrele vp.  */
1218		vref(vp);
1219		if (union_dowhiteout(un, cnp->cn_cred))
1220			cnp->cn_flags |= DOWHITEOUT;
1221		error = VOP_REMOVE(dvp, vp, cnp);
1222		if (!error)
1223			union_removed_upper(un);
1224		vrele(ap->a_vp);
1225	} else {
1226		error = union_mkwhiteout(
1227			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1228			dun->un_uppervp, ap->a_cnp, un);
1229		vput(ap->a_vp);
1230	}
1231
1232	return (error);
1233}
1234
1235int
1236union_link(void *v)
1237{
1238	struct vop_link_v2_args /* {
1239		struct vnode *a_dvp;
1240		struct vnode *a_vp;
1241		struct componentname *a_cnp;
1242	} */ *ap = v;
1243	int error = 0;
1244	struct componentname *cnp = ap->a_cnp;
1245	struct union_node *dun;
1246	struct vnode *vp;
1247	struct vnode *dvp;
1248
1249	dun = VTOUNION(ap->a_dvp);
1250
1251	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1252
1253	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1254		vp = ap->a_vp;
1255	} else {
1256		struct union_node *un = VTOUNION(ap->a_vp);
1257		if (un->un_uppervp == NULLVP) {
1258			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1259
1260			/*
1261			 * Needs to be copied before we can link it.
1262			 */
1263			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1264			if (droplock)
1265				VOP_UNLOCK(dun->un_uppervp);
1266			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1267			if (droplock) {
1268				vn_lock(dun->un_uppervp,
1269				    LK_EXCLUSIVE | LK_RETRY);
1270				/*
1271				 * During copyup, we dropped the lock on the
1272				 * dir and invalidated any saved namei lookup
1273				 * state for the directory we'll be entering
1274				 * the link in.  We need to re-run the lookup
1275				 * in that directory to reset any state needed
1276				 * for VOP_LINK.
1277				 * Call relookup on the union-layer to reset
1278				 * the state.
1279				 */
1280				vp  = NULLVP;
1281				if (dun->un_uppervp == NULLVP)
1282					 panic("union: null upperdvp?");
1283				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1284				if (error) {
1285					VOP_UNLOCK(ap->a_vp);
1286					return EROFS;	/* ? */
1287				}
1288				if (vp != NULLVP) {
1289					/*
1290					 * The name we want to create has
1291					 * mysteriously appeared (a race?)
1292					 */
1293					error = EEXIST;
1294					VOP_UNLOCK(ap->a_vp);
1295					vput(vp);
1296					return (error);
1297				}
1298			}
1299			VOP_UNLOCK(ap->a_vp);
1300		}
1301		vp = un->un_uppervp;
1302	}
1303
1304	dvp = dun->un_uppervp;
1305	if (dvp == NULLVP)
1306		error = EROFS;
1307
1308	if (error)
1309		return (error);
1310
1311	return VOP_LINK(dvp, vp, cnp);
1312}
1313
1314int
1315union_rename(void *v)
1316{
1317	struct vop_rename_args /* {
1318		struct vnode *a_fdvp;
1319		struct vnode *a_fvp;
1320		struct componentname *a_fcnp;
1321		struct vnode *a_tdvp;
1322		struct vnode *a_tvp;
1323		struct componentname *a_tcnp;
1324	} */ *ap = v;
1325	int error;
1326
1327	struct vnode *fdvp = ap->a_fdvp;
1328	struct vnode *fvp = ap->a_fvp;
1329	struct vnode *tdvp = ap->a_tdvp;
1330	struct vnode *tvp = ap->a_tvp;
1331
1332	/*
1333	 * Account for VOP_RENAME to vrele all nodes.
1334	 * Note: VOP_RENAME will unlock tdvp.
1335	 */
1336
1337	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1338		struct union_node *un = VTOUNION(fdvp);
1339		if (un->un_uppervp == NULLVP) {
1340			/*
1341			 * this should never happen in normal
1342			 * operation but might if there was
1343			 * a problem creating the top-level shadow
1344			 * directory.
1345			 */
1346			error = EXDEV;
1347			goto bad;
1348		}
1349
1350		fdvp = un->un_uppervp;
1351		vref(fdvp);
1352	}
1353
1354	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1355		struct union_node *un = VTOUNION(fvp);
1356		if (un->un_uppervp == NULLVP) {
1357			/* XXX: should do a copyup */
1358			error = EXDEV;
1359			goto bad;
1360		}
1361
1362		if (un->un_lowervp != NULLVP)
1363			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1364
1365		fvp = un->un_uppervp;
1366		vref(fvp);
1367	}
1368
1369	if (tdvp->v_op == union_vnodeop_p) {
1370		struct union_node *un = VTOUNION(tdvp);
1371		if (un->un_uppervp == NULLVP) {
1372			/*
1373			 * this should never happen in normal
1374			 * operation but might if there was
1375			 * a problem creating the top-level shadow
1376			 * directory.
1377			 */
1378			error = EXDEV;
1379			goto bad;
1380		}
1381
1382		tdvp = un->un_uppervp;
1383		vref(tdvp);
1384	}
1385
1386	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1387		struct union_node *un = VTOUNION(tvp);
1388
1389		tvp = un->un_uppervp;
1390		if (tvp != NULLVP) {
1391			vref(tvp);
1392		}
1393	}
1394
1395	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1396	goto out;
1397
1398bad:
1399	vput(tdvp);
1400	if (tvp != NULLVP)
1401		vput(tvp);
1402	vrele(fdvp);
1403	vrele(fvp);
1404
1405out:
1406	if (fdvp != ap->a_fdvp) {
1407		vrele(ap->a_fdvp);
1408	}
1409	if (fvp != ap->a_fvp) {
1410		vrele(ap->a_fvp);
1411	}
1412	if (tdvp != ap->a_tdvp) {
1413		vrele(ap->a_tdvp);
1414	}
1415	if (tvp != ap->a_tvp) {
1416		vrele(ap->a_tvp);
1417	}
1418	return (error);
1419}
1420
1421int
1422union_mkdir(void *v)
1423{
1424	struct vop_mkdir_v3_args /* {
1425		struct vnode *a_dvp;
1426		struct vnode **a_vpp;
1427		struct componentname *a_cnp;
1428		struct vattr *a_vap;
1429	} */ *ap = v;
1430	struct union_node *un = VTOUNION(ap->a_dvp);
1431	struct vnode *dvp = un->un_uppervp;
1432	struct componentname *cnp = ap->a_cnp;
1433
1434	if (dvp != NULLVP) {
1435		int error;
1436		struct vnode *vp;
1437
1438		vp = NULL;
1439		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1440		if (error) {
1441			vrele(ap->a_dvp);
1442			return (error);
1443		}
1444
1445		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1446				NULLVP, cnp, vp, NULLVP, 1);
1447		if (error)
1448			vrele(vp);
1449		return (error);
1450	}
1451
1452	return (EROFS);
1453}
1454
1455int
1456union_rmdir(void *v)
1457{
1458	struct vop_rmdir_v2_args /* {
1459		struct vnode *a_dvp;
1460		struct vnode *a_vp;
1461		struct componentname *a_cnp;
1462	} */ *ap = v;
1463	int error;
1464	struct union_node *dun = VTOUNION(ap->a_dvp);
1465	struct union_node *un = VTOUNION(ap->a_vp);
1466	struct componentname *cnp = ap->a_cnp;
1467
1468	if (dun->un_uppervp == NULLVP)
1469		panic("union rmdir: null upper vnode");
1470
1471	error = union_check_rmdir(un, cnp->cn_cred);
1472	if (error) {
1473		vput(ap->a_vp);
1474		return error;
1475	}
1476
1477	if (un->un_uppervp != NULLVP) {
1478		struct vnode *dvp = dun->un_uppervp;
1479		struct vnode *vp = un->un_uppervp;
1480
1481		/* Account for VOP_RMDIR to vrele vp.  */
1482		vref(vp);
1483		if (union_dowhiteout(un, cnp->cn_cred))
1484			cnp->cn_flags |= DOWHITEOUT;
1485		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1486		if (!error)
1487			union_removed_upper(un);
1488		vrele(ap->a_vp);
1489	} else {
1490		error = union_mkwhiteout(
1491			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1492			dun->un_uppervp, ap->a_cnp, un);
1493		vput(ap->a_vp);
1494	}
1495
1496	return (error);
1497}
1498
1499int
1500union_symlink(void *v)
1501{
1502	struct vop_symlink_v3_args /* {
1503		struct vnode *a_dvp;
1504		struct vnode **a_vpp;
1505		struct componentname *a_cnp;
1506		struct vattr *a_vap;
1507		char *a_target;
1508	} */ *ap = v;
1509	struct union_node *un = VTOUNION(ap->a_dvp);
1510	struct vnode *dvp = un->un_uppervp;
1511	struct componentname *cnp = ap->a_cnp;
1512
1513	if (dvp != NULLVP) {
1514		int error;
1515
1516		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1517				    ap->a_target);
1518		return (error);
1519	}
1520
1521	return (EROFS);
1522}
1523
1524/*
1525 * union_readdir works in concert with getdirentries and
1526 * readdir(3) to provide a list of entries in the unioned
1527 * directories.  getdirentries is responsible for walking
1528 * down the union stack.  readdir(3) is responsible for
1529 * eliminating duplicate names from the returned data stream.
1530 */
1531int
1532union_readdir(void *v)
1533{
1534	struct vop_readdir_args /* {
1535		struct vnodeop_desc *a_desc;
1536		struct vnode *a_vp;
1537		struct uio *a_uio;
1538		kauth_cred_t a_cred;
1539		int *a_eofflag;
1540		u_long *a_cookies;
1541		int a_ncookies;
1542	} */ *ap = v;
1543	struct union_node *un = VTOUNION(ap->a_vp);
1544	struct vnode *vp;
1545	int dolock, error;
1546
1547	if (un->un_hooknode) {
1548		KASSERT(un->un_uppervp == NULLVP);
1549		KASSERT(un->un_lowervp != NULLVP);
1550		vp = un->un_lowervp;
1551		dolock = 1;
1552	} else {
1553		vp = un->un_uppervp;
1554		dolock = 0;
1555	}
1556	if (vp == NULLVP)
1557		return 0;
1558
1559	if (dolock)
1560		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1561	ap->a_vp = vp;
1562	error = VCALL(vp, VOFFSET(vop_readdir), ap);
1563	if (dolock)
1564		VOP_UNLOCK(vp);
1565
1566	return error;
1567}
1568
1569int
1570union_readlink(void *v)
1571{
1572	struct vop_readlink_args /* {
1573		struct vnode *a_vp;
1574		struct uio *a_uio;
1575		kauth_cred_t a_cred;
1576	} */ *ap = v;
1577	int error;
1578	struct vnode *vp = OTHERVP(ap->a_vp);
1579	int dolock = (vp == LOWERVP(ap->a_vp));
1580
1581	if (dolock)
1582		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1583	ap->a_vp = vp;
1584	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1585	if (dolock)
1586		VOP_UNLOCK(vp);
1587
1588	return (error);
1589}
1590
1591int
1592union_abortop(void *v)
1593{
1594	struct vop_abortop_args /* {
1595		struct vnode *a_dvp;
1596		struct componentname *a_cnp;
1597	} */ *ap = v;
1598
1599	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1600
1601	ap->a_dvp = UPPERVP(ap->a_dvp);
1602	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1603}
1604
1605int
1606union_inactive(void *v)
1607{
1608	struct vop_inactive_v2_args /* {
1609		const struct vnodeop_desc *a_desc;
1610		struct vnode *a_vp;
1611		bool *a_recycle;
1612	} */ *ap = v;
1613	struct vnode *vp = ap->a_vp;
1614	struct union_node *un = VTOUNION(vp);
1615	struct vnode **vpp;
1616
1617	/*
1618	 * Do nothing (and _don't_ bypass).
1619	 * Wait to vrele lowervp until reclaim,
1620	 * so that until then our union_node is in the
1621	 * cache and reusable.
1622	 *
1623	 * NEEDSWORK: Someday, consider inactive'ing
1624	 * the lowervp and then trying to reactivate it
1625	 * with capabilities (v_id)
1626	 * like they do in the name lookup cache code.
1627	 * That's too much work for now.
1628	 */
1629
1630	if (un->un_dircache != 0) {
1631		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1632			vrele(*vpp);
1633		free(un->un_dircache, M_TEMP);
1634		un->un_dircache = 0;
1635	}
1636
1637	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1638
1639	return (0);
1640}
1641
1642int
1643union_reclaim(void *v)
1644{
1645	struct vop_reclaim_v2_args /* {
1646		struct vnode *a_vp;
1647	} */ *ap = v;
1648	struct vnode *vp = ap->a_vp;
1649	struct vnode *uvp = UPPERVP(vp);
1650
1651	VOP_UNLOCK(vp);
1652
1653	if (uvp != NULL) {
1654		mutex_enter(uvp->v_interlock);
1655		KASSERT(vp->v_interlock == uvp->v_interlock);
1656		uvp->v_writecount -= vp->v_writecount;
1657		mutex_exit(uvp->v_interlock);
1658	}
1659
1660	union_freevp(vp);
1661
1662	return (0);
1663}
1664
1665static int
1666union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1667{
1668	struct vop_lock_args ap;
1669
1670	ap.a_desc = VDESC(vop_lock);
1671	ap.a_vp = lockvp;
1672	ap.a_flags = flags;
1673
1674	if (lockvp == vp)
1675		return genfs_lock(&ap);
1676	else
1677		return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1678}
1679
1680static int
1681union_unlock1(struct vnode *vp, struct vnode *lockvp)
1682{
1683	struct vop_unlock_args ap;
1684
1685	ap.a_desc = VDESC(vop_unlock);
1686	ap.a_vp = lockvp;
1687
1688	if (lockvp == vp)
1689		return genfs_unlock(&ap);
1690	else
1691		return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1692}
1693
1694int
1695union_lock(void *v)
1696{
1697	struct vop_lock_args /* {
1698		struct vnode *a_vp;
1699		int a_flags;
1700	} */ *ap = v;
1701	struct vnode *vp = ap->a_vp, *lockvp;
1702	struct union_node *un = VTOUNION(vp);
1703	int flags = ap->a_flags;
1704	int error;
1705
1706	if ((flags & LK_NOWAIT) != 0) {
1707		if (!mutex_tryenter(&un->un_lock))
1708			return EBUSY;
1709		lockvp = LOCKVP(vp);
1710		error = union_lock1(vp, lockvp, flags);
1711		mutex_exit(&un->un_lock);
1712		return error;
1713	}
1714
1715	mutex_enter(&un->un_lock);
1716	for (;;) {
1717		lockvp = LOCKVP(vp);
1718		mutex_exit(&un->un_lock);
1719		error = union_lock1(vp, lockvp, flags);
1720		if (error != 0 || (flags & (LK_DOWNGRADE | LK_UPGRADE)) != 0)
1721			return error;
1722		mutex_enter(&un->un_lock);
1723		if (lockvp == LOCKVP(vp))
1724			break;
1725		union_unlock1(vp, lockvp);
1726	}
1727	mutex_exit(&un->un_lock);
1728
1729	return error;
1730}
1731
1732int
1733union_unlock(void *v)
1734{
1735	struct vop_unlock_args /* {
1736		struct vnode *a_vp;
1737		int a_flags;
1738	} */ *ap = v;
1739	struct vnode *vp = ap->a_vp, *lockvp;
1740
1741	lockvp = LOCKVP(vp);
1742	union_unlock1(vp, lockvp);
1743
1744	return 0;
1745}
1746
1747int
1748union_bmap(void *v)
1749{
1750	struct vop_bmap_args /* {
1751		struct vnode *a_vp;
1752		daddr_t  a_bn;
1753		struct vnode **a_vpp;
1754		daddr_t *a_bnp;
1755		int *a_runp;
1756	} */ *ap = v;
1757	int error;
1758	struct vnode *vp = OTHERVP(ap->a_vp);
1759	int dolock = (vp == LOWERVP(ap->a_vp));
1760
1761	if (dolock)
1762		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1763	ap->a_vp = vp;
1764	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1765	if (dolock)
1766		VOP_UNLOCK(vp);
1767
1768	return (error);
1769}
1770
1771int
1772union_print(void *v)
1773{
1774	struct vop_print_args /* {
1775		struct vnode *a_vp;
1776	} */ *ap = v;
1777	struct vnode *vp = ap->a_vp;
1778
1779	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1780			vp, UPPERVP(vp), LOWERVP(vp));
1781	if (UPPERVP(vp) != NULLVP)
1782		vprint("union: upper", UPPERVP(vp));
1783	if (LOWERVP(vp) != NULLVP)
1784		vprint("union: lower", LOWERVP(vp));
1785	if (VTOUNION(vp)->un_dircache) {
1786		struct vnode **vpp;
1787		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1788			vprint("dircache:", *vpp);
1789	}
1790
1791	return (0);
1792}
1793
1794int
1795union_islocked(void *v)
1796{
1797	struct vop_islocked_args /* {
1798		struct vnode *a_vp;
1799	} */ *ap = v;
1800	struct vnode *vp;
1801	struct union_node *un;
1802
1803	un = VTOUNION(ap->a_vp);
1804	mutex_enter(&un->un_lock);
1805	vp = LOCKVP(ap->a_vp);
1806	mutex_exit(&un->un_lock);
1807
1808	if (vp == ap->a_vp)
1809		return genfs_islocked(ap);
1810	else
1811		return VOP_ISLOCKED(vp);
1812}
1813
1814int
1815union_pathconf(void *v)
1816{
1817	struct vop_pathconf_args /* {
1818		struct vnode *a_vp;
1819		int a_name;
1820		int *a_retval;
1821	} */ *ap = v;
1822	int error;
1823	struct vnode *vp = OTHERVP(ap->a_vp);
1824	int dolock = (vp == LOWERVP(ap->a_vp));
1825
1826	if (dolock)
1827		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1828	ap->a_vp = vp;
1829	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1830	if (dolock)
1831		VOP_UNLOCK(vp);
1832
1833	return (error);
1834}
1835
1836int
1837union_advlock(void *v)
1838{
1839	struct vop_advlock_args /* {
1840		struct vnode *a_vp;
1841		void *a_id;
1842		int  a_op;
1843		struct flock *a_fl;
1844		int  a_flags;
1845	} */ *ap = v;
1846	struct vnode *ovp = OTHERVP(ap->a_vp);
1847
1848	ap->a_vp = ovp;
1849	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1850}
1851
1852int
1853union_strategy(void *v)
1854{
1855	struct vop_strategy_args /* {
1856		struct vnode *a_vp;
1857		struct buf *a_bp;
1858	} */ *ap = v;
1859	struct vnode *ovp = OTHERVP(ap->a_vp);
1860	struct buf *bp = ap->a_bp;
1861
1862	KASSERT(ovp != NULLVP);
1863	if (!NODE_IS_SPECIAL(ovp))
1864		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1865
1866	return (VOP_STRATEGY(ovp, bp));
1867}
1868
1869int
1870union_bwrite(void *v)
1871{
1872	struct vop_bwrite_args /* {
1873		struct vnode *a_vp;
1874		struct buf *a_bp;
1875	} */ *ap = v;
1876	struct vnode *ovp = OTHERVP(ap->a_vp);
1877	struct buf *bp = ap->a_bp;
1878
1879	KASSERT(ovp != NULLVP);
1880	if (!NODE_IS_SPECIAL(ovp))
1881		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1882
1883	return (VOP_BWRITE(ovp, bp));
1884}
1885
1886int
1887union_getpages(void *v)
1888{
1889	struct vop_getpages_args /* {
1890		struct vnode *a_vp;
1891		voff_t a_offset;
1892		struct vm_page **a_m;
1893		int *a_count;
1894		int a_centeridx;
1895		vm_prot_t a_access_type;
1896		int a_advice;
1897		int a_flags;
1898	} */ *ap = v;
1899	struct vnode *vp = ap->a_vp;
1900
1901	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1902
1903	if (ap->a_flags & PGO_LOCKED) {
1904		return EBUSY;
1905	}
1906	ap->a_vp = OTHERVP(vp);
1907	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1908
1909	/* Just pass the request on to the underlying layer. */
1910	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1911}
1912
1913int
1914union_putpages(void *v)
1915{
1916	struct vop_putpages_args /* {
1917		struct vnode *a_vp;
1918		voff_t a_offlo;
1919		voff_t a_offhi;
1920		int a_flags;
1921	} */ *ap = v;
1922	struct vnode *vp = ap->a_vp;
1923
1924	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1925
1926	ap->a_vp = OTHERVP(vp);
1927	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1928
1929	if (ap->a_flags & PGO_RECLAIM) {
1930		rw_exit(vp->v_uobj.vmobjlock);
1931		return 0;
1932	}
1933
1934	/* Just pass the request on to the underlying layer. */
1935	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1936}
1937
1938int
1939union_kqfilter(void *v)
1940{
1941	struct vop_kqfilter_args /* {
1942		struct vnode	*a_vp;
1943		struct knote	*a_kn;
1944	} */ *ap = v;
1945	int error;
1946
1947	/*
1948	 * We watch either the upper layer file (if it already exists),
1949	 * or the lower layer one. If there is lower layer file only
1950	 * at this moment, we will keep watching that lower layer file
1951	 * even if upper layer file would be created later on.
1952	 */
1953	if (UPPERVP(ap->a_vp))
1954		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1955	else if (LOWERVP(ap->a_vp))
1956		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1957	else {
1958		/* panic? */
1959		error = EOPNOTSUPP;
1960	}
1961
1962	return (error);
1963}
1964