union_vnops.c revision 1.73
1/*	$NetBSD: union_vnops.c,v 1.73 2020/05/16 18:31:50 christos Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.73 2020/05/16 18:31:50 christos Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_accessx_desc, genfs_accessx },		/* accessx */
155	{ &vop_getattr_desc, union_getattr },		/* getattr */
156	{ &vop_setattr_desc, union_setattr },		/* setattr */
157	{ &vop_read_desc, union_read },			/* read */
158	{ &vop_write_desc, union_write },		/* write */
159	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
160	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
161	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
162	{ &vop_poll_desc, union_poll },			/* select */
163	{ &vop_revoke_desc, union_revoke },		/* revoke */
164	{ &vop_mmap_desc, union_mmap },			/* mmap */
165	{ &vop_fsync_desc, union_fsync },		/* fsync */
166	{ &vop_seek_desc, union_seek },			/* seek */
167	{ &vop_remove_desc, union_remove },		/* remove */
168	{ &vop_link_desc, union_link },			/* link */
169	{ &vop_rename_desc, union_rename },		/* rename */
170	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
171	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
172	{ &vop_symlink_desc, union_symlink },		/* symlink */
173	{ &vop_readdir_desc, union_readdir },		/* readdir */
174	{ &vop_readlink_desc, union_readlink },		/* readlink */
175	{ &vop_abortop_desc, union_abortop },		/* abortop */
176	{ &vop_inactive_desc, union_inactive },		/* inactive */
177	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
178	{ &vop_lock_desc, union_lock },			/* lock */
179	{ &vop_unlock_desc, union_unlock },		/* unlock */
180	{ &vop_bmap_desc, union_bmap },			/* bmap */
181	{ &vop_strategy_desc, union_strategy },		/* strategy */
182	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
183	{ &vop_print_desc, union_print },		/* print */
184	{ &vop_islocked_desc, union_islocked },		/* islocked */
185	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
186	{ &vop_advlock_desc, union_advlock },		/* advlock */
187	{ &vop_getpages_desc, union_getpages },		/* getpages */
188	{ &vop_putpages_desc, union_putpages },		/* putpages */
189	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
190	{ NULL, NULL }
191};
192const struct vnodeopv_desc union_vnodeop_opv_desc =
193	{ &union_vnodeop_p, union_vnodeop_entries };
194
195#define NODE_IS_SPECIAL(vp) \
196	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
197	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
198
199static int
200union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
201	struct componentname *cnp)
202{
203	int error;
204	struct vnode *tdvp;
205	struct vnode *dvp;
206	struct mount *mp;
207
208	dvp = *dvpp;
209
210	/*
211	 * If stepping up the directory tree, check for going
212	 * back across the mount point, in which case do what
213	 * lookup would do by stepping back down the mount
214	 * hierarchy.
215	 */
216	if (cnp->cn_flags & ISDOTDOT) {
217		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
218			/*
219			 * Don't do the NOCROSSMOUNT check
220			 * at this level.  By definition,
221			 * union fs deals with namespaces, not
222			 * filesystems.
223			 */
224			tdvp = dvp;
225			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
226			VOP_UNLOCK(tdvp);
227			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
228		}
229	}
230
231        error = VOP_LOOKUP(dvp, &tdvp, cnp);
232	if (error)
233		return (error);
234	if (dvp != tdvp) {
235		if (cnp->cn_flags & ISDOTDOT)
236			VOP_UNLOCK(dvp);
237		error = vn_lock(tdvp, LK_EXCLUSIVE);
238		if (cnp->cn_flags & ISDOTDOT)
239			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
240		if (error) {
241			vrele(tdvp);
242			return error;
243		}
244		dvp = tdvp;
245	}
246
247	/*
248	 * Lastly check if the current node is a mount point in
249	 * which case walk up the mount hierarchy making sure not to
250	 * bump into the root of the mount tree (ie. dvp != udvp).
251	 */
252	while (dvp != udvp && (dvp->v_type == VDIR) &&
253	       (mp = dvp->v_mountedhere)) {
254		if (vfs_busy(mp))
255			continue;
256		vput(dvp);
257		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdvp);
258		vfs_unbusy(mp);
259		if (error) {
260			return (error);
261		}
262		dvp = tdvp;
263	}
264
265	*vpp = dvp;
266	return (0);
267}
268
269int
270union_lookup(void *v)
271{
272	struct vop_lookup_v2_args /* {
273		struct vnodeop_desc *a_desc;
274		struct vnode *a_dvp;
275		struct vnode **a_vpp;
276		struct componentname *a_cnp;
277	} */ *ap = v;
278	int error;
279	int uerror, lerror;
280	struct vnode *uppervp, *lowervp;
281	struct vnode *upperdvp, *lowerdvp;
282	struct vnode *dvp = ap->a_dvp;
283	struct union_node *dun = VTOUNION(dvp);
284	struct componentname *cnp = ap->a_cnp;
285	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
286	kauth_cred_t saved_cred = NULL;
287	int iswhiteout;
288	struct vattr va;
289
290#ifdef notyet
291	if (cnp->cn_namelen == 3 &&
292			cnp->cn_nameptr[2] == '.' &&
293			cnp->cn_nameptr[1] == '.' &&
294			cnp->cn_nameptr[0] == '.') {
295		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
296		if (dvp == NULLVP)
297			return (ENOENT);
298		vref(dvp);
299		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
300		return (0);
301	}
302#endif
303
304	if ((cnp->cn_flags & ISLASTCN) &&
305	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
306	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
307		return (EROFS);
308
309start:
310	upperdvp = dun->un_uppervp;
311	lowerdvp = dun->un_lowervp;
312	uppervp = NULLVP;
313	lowervp = NULLVP;
314	iswhiteout = 0;
315
316	/*
317	 * do the lookup in the upper level.
318	 * if that level comsumes additional pathnames,
319	 * then assume that something special is going
320	 * on and just return that vnode.
321	 */
322	if (upperdvp != NULLVP) {
323		uerror = union_lookup1(um->um_uppervp, &upperdvp,
324					&uppervp, cnp);
325		if (cnp->cn_consume != 0) {
326			if (uppervp != upperdvp)
327				VOP_UNLOCK(uppervp);
328			*ap->a_vpp = uppervp;
329			return (uerror);
330		}
331		if (uerror == ENOENT || uerror == EJUSTRETURN) {
332			if (cnp->cn_flags & ISWHITEOUT) {
333				iswhiteout = 1;
334			} else if (lowerdvp != NULLVP) {
335				lerror = VOP_GETATTR(upperdvp, &va,
336					cnp->cn_cred);
337				if (lerror == 0 && (va.va_flags & OPAQUE))
338					iswhiteout = 1;
339			}
340		}
341	} else {
342		uerror = ENOENT;
343	}
344
345	/*
346	 * in a similar way to the upper layer, do the lookup
347	 * in the lower layer.   this time, if there is some
348	 * component magic going on, then vput whatever we got
349	 * back from the upper layer and return the lower vnode
350	 * instead.
351	 */
352	if (lowerdvp != NULLVP && !iswhiteout) {
353		int nameiop;
354
355		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
356
357		/*
358		 * Only do a LOOKUP on the bottom node, since
359		 * we won't be making changes to it anyway.
360		 */
361		nameiop = cnp->cn_nameiop;
362		cnp->cn_nameiop = LOOKUP;
363		if (um->um_op == UNMNT_BELOW) {
364			saved_cred = cnp->cn_cred;
365			cnp->cn_cred = um->um_cred;
366		}
367
368		/*
369		 * we shouldn't have to worry about locking interactions
370		 * between the lower layer and our union layer (w.r.t.
371		 * `..' processing) because we don't futz with lowervp
372		 * locks in the union-node instantiation code path.
373		 */
374		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
375				&lowervp, cnp);
376		if (um->um_op == UNMNT_BELOW)
377			cnp->cn_cred = saved_cred;
378		cnp->cn_nameiop = nameiop;
379
380		if (lowervp != lowerdvp)
381			VOP_UNLOCK(lowerdvp);
382
383		if (cnp->cn_consume != 0) {
384			if (uppervp != NULLVP) {
385				if (uppervp == upperdvp)
386					vrele(uppervp);
387				else
388					vput(uppervp);
389				uppervp = NULLVP;
390			}
391			*ap->a_vpp = lowervp;
392			return (lerror);
393		}
394	} else {
395		lerror = ENOENT;
396		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
397			lowervp = LOWERVP(dun->un_pvp);
398			if (lowervp != NULLVP) {
399				vref(lowervp);
400				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
401				lerror = 0;
402			}
403		}
404	}
405
406	/*
407	 * EJUSTRETURN is used by underlying filesystems to indicate that
408	 * a directory modification op was started successfully.
409	 * This will only happen in the upper layer, since
410	 * the lower layer only does LOOKUPs.
411	 * If this union is mounted read-only, bounce it now.
412	 */
413
414	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
415	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
416	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
417		uerror = EROFS;
418
419	/*
420	 * at this point, we have uerror and lerror indicating
421	 * possible errors with the lookups in the upper and lower
422	 * layers.  additionally, uppervp and lowervp are (locked)
423	 * references to existing vnodes in the upper and lower layers.
424	 *
425	 * there are now three cases to consider.
426	 * 1. if both layers returned an error, then return whatever
427	 *    error the upper layer generated.
428	 *
429	 * 2. if the top layer failed and the bottom layer succeeded
430	 *    then two subcases occur.
431	 *    a.  the bottom vnode is not a directory, in which
432	 *	  case just return a new union vnode referencing
433	 *	  an empty top layer and the existing bottom layer.
434	 *    b.  the bottom vnode is a directory, in which case
435	 *	  create a new directory in the top-level and
436	 *	  continue as in case 3.
437	 *
438	 * 3. if the top layer succeeded then return a new union
439	 *    vnode referencing whatever the new top layer and
440	 *    whatever the bottom layer returned.
441	 */
442
443	*ap->a_vpp = NULLVP;
444
445
446	/* case 1. */
447	if ((uerror != 0) && (lerror != 0)) {
448		return (uerror);
449	}
450
451	/* case 2. */
452	if (uerror != 0 /* && (lerror == 0) */ ) {
453		if (lowervp->v_type == VDIR) { /* case 2b. */
454			/*
455			 * We may be racing another process to make the
456			 * upper-level shadow directory.  Be careful with
457			 * locks/etc!
458			 * If we have to create a shadow directory and want
459			 * to commit the node we have to restart the lookup
460			 * to get the componentname right.
461			 */
462			if (upperdvp) {
463				VOP_UNLOCK(upperdvp);
464				uerror = union_mkshadow(um, upperdvp, cnp,
465				    &uppervp);
466				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
467				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
468					vrele(uppervp);
469					if (lowervp != NULLVP)
470						vput(lowervp);
471					goto start;
472				}
473			}
474			if (uerror) {
475				if (lowervp != NULLVP) {
476					vput(lowervp);
477					lowervp = NULLVP;
478				}
479				return (uerror);
480			}
481		}
482	} else { /* uerror == 0 */
483		if (uppervp != upperdvp)
484			VOP_UNLOCK(uppervp);
485	}
486
487	if (lowervp != NULLVP)
488		VOP_UNLOCK(lowervp);
489
490	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
491			      uppervp, lowervp, 1);
492
493	if (error) {
494		if (uppervp != NULLVP)
495			vrele(uppervp);
496		if (lowervp != NULLVP)
497			vrele(lowervp);
498		return error;
499	}
500
501	return 0;
502}
503
504int
505union_create(void *v)
506{
507	struct vop_create_v3_args /* {
508		struct vnode *a_dvp;
509		struct vnode **a_vpp;
510		struct componentname *a_cnp;
511		struct vattr *a_vap;
512	} */ *ap = v;
513	struct union_node *un = VTOUNION(ap->a_dvp);
514	struct vnode *dvp = un->un_uppervp;
515	struct componentname *cnp = ap->a_cnp;
516
517	if (dvp != NULLVP) {
518		int error;
519		struct vnode *vp;
520		struct mount *mp;
521
522		mp = ap->a_dvp->v_mount;
523
524		vp = NULL;
525		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
526		if (error)
527			return (error);
528
529		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
530				NULLVP, 1);
531		if (error)
532			vrele(vp);
533		return (error);
534	}
535
536	return (EROFS);
537}
538
539int
540union_whiteout(void *v)
541{
542	struct vop_whiteout_args /* {
543		struct vnode *a_dvp;
544		struct componentname *a_cnp;
545		int a_flags;
546	} */ *ap = v;
547	struct union_node *un = VTOUNION(ap->a_dvp);
548	struct componentname *cnp = ap->a_cnp;
549
550	if (un->un_uppervp == NULLVP)
551		return (EOPNOTSUPP);
552
553	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
554}
555
556int
557union_mknod(void *v)
558{
559	struct vop_mknod_v3_args /* {
560		struct vnode *a_dvp;
561		struct vnode **a_vpp;
562		struct componentname *a_cnp;
563		struct vattr *a_vap;
564	} */ *ap = v;
565	struct union_node *un = VTOUNION(ap->a_dvp);
566	struct vnode *dvp = un->un_uppervp;
567	struct componentname *cnp = ap->a_cnp;
568
569	if (dvp != NULLVP) {
570		int error;
571		struct vnode *vp;
572		struct mount *mp;
573
574		mp = ap->a_dvp->v_mount;
575		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
576		if (error)
577			return (error);
578
579		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
580				      cnp, vp, NULLVP, 1);
581		if (error)
582			vrele(vp);
583		return (error);
584	}
585
586	return (EROFS);
587}
588
589int
590union_open(void *v)
591{
592	struct vop_open_args /* {
593		struct vnodeop_desc *a_desc;
594		struct vnode *a_vp;
595		int a_mode;
596		kauth_cred_t a_cred;
597	} */ *ap = v;
598	struct union_node *un = VTOUNION(ap->a_vp);
599	struct vnode *tvp;
600	int mode = ap->a_mode;
601	kauth_cred_t cred = ap->a_cred;
602	struct lwp *l = curlwp;
603	int error;
604
605	/*
606	 * If there is an existing upper vp then simply open that.
607	 */
608	tvp = un->un_uppervp;
609	if (tvp == NULLVP) {
610		/*
611		 * If the lower vnode is being opened for writing, then
612		 * copy the file contents to the upper vnode and open that,
613		 * otherwise can simply open the lower vnode.
614		 */
615		tvp = un->un_lowervp;
616		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
617			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
618			if (error == 0)
619				error = VOP_OPEN(un->un_uppervp, mode, cred);
620			if (error == 0) {
621				mutex_enter(un->un_uppervp->v_interlock);
622				un->un_uppervp->v_writecount++;
623				mutex_exit(un->un_uppervp->v_interlock);
624			}
625			return (error);
626		}
627
628		/*
629		 * Just open the lower vnode, but check for nodev mount flag
630		 */
631		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
632		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
633			return ENXIO;
634		un->un_openl++;
635		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
636		error = VOP_OPEN(tvp, mode, cred);
637		VOP_UNLOCK(tvp);
638
639		return (error);
640	}
641	/*
642	 * Just open the upper vnode, checking for nodev mount flag first
643	 */
644	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
645	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
646		return ENXIO;
647
648	error = VOP_OPEN(tvp, mode, cred);
649	if (error == 0 && (ap->a_mode & FWRITE)) {
650		mutex_enter(tvp->v_interlock);
651		tvp->v_writecount++;
652		mutex_exit(tvp->v_interlock);
653	}
654
655	return (error);
656}
657
658int
659union_close(void *v)
660{
661	struct vop_close_args /* {
662		struct vnode *a_vp;
663		int  a_fflag;
664		kauth_cred_t a_cred;
665	} */ *ap = v;
666	struct union_node *un = VTOUNION(ap->a_vp);
667	struct vnode *vp;
668	int error;
669	bool do_lock;
670
671	vp = un->un_uppervp;
672	if (vp != NULLVP) {
673		do_lock = false;
674	} else {
675		KASSERT(un->un_openl > 0);
676		--un->un_openl;
677		vp = un->un_lowervp;
678		do_lock = true;
679	}
680
681	KASSERT(vp != NULLVP);
682	ap->a_vp = vp;
683	if ((ap->a_fflag & FWRITE)) {
684		KASSERT(vp == un->un_uppervp);
685		mutex_enter(vp->v_interlock);
686		vp->v_writecount--;
687		mutex_exit(vp->v_interlock);
688	}
689	if (do_lock)
690		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
691	error = VCALL(vp, VOFFSET(vop_close), ap);
692	if (do_lock)
693		VOP_UNLOCK(vp);
694
695	return error;
696}
697
698/*
699 * Check access permission on the union vnode.
700 * The access check being enforced is to check
701 * against both the underlying vnode, and any
702 * copied vnode.  This ensures that no additional
703 * file permissions are given away simply because
704 * the user caused an implicit file copy.
705 */
706int
707union_access(void *v)
708{
709	struct vop_access_args /* {
710		struct vnodeop_desc *a_desc;
711		struct vnode *a_vp;
712		accmode_t a_accmode;
713		kauth_cred_t a_cred;
714	} */ *ap = v;
715	struct vnode *vp = ap->a_vp;
716	struct union_node *un = VTOUNION(vp);
717	int error = EACCES;
718	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
719
720	/*
721	 * Disallow write attempts on read-only file systems;
722	 * unless the file is a socket, fifo, or a block or
723	 * character device resident on the file system.
724	 */
725	if (ap->a_accmode & VWRITE) {
726		switch (vp->v_type) {
727		case VDIR:
728		case VLNK:
729		case VREG:
730			if (vp->v_mount->mnt_flag & MNT_RDONLY)
731				return (EROFS);
732			break;
733		case VBAD:
734		case VBLK:
735		case VCHR:
736		case VSOCK:
737		case VFIFO:
738		case VNON:
739		default:
740			break;
741		}
742	}
743
744
745	if ((vp = un->un_uppervp) != NULLVP) {
746		ap->a_vp = vp;
747		return (VCALL(vp, VOFFSET(vop_access), ap));
748	}
749
750	if ((vp = un->un_lowervp) != NULLVP) {
751		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
752		ap->a_vp = vp;
753		error = VCALL(vp, VOFFSET(vop_access), ap);
754		if (error == 0) {
755			if (um->um_op == UNMNT_BELOW) {
756				ap->a_cred = um->um_cred;
757				error = VCALL(vp, VOFFSET(vop_access), ap);
758			}
759		}
760		VOP_UNLOCK(vp);
761		if (error)
762			return (error);
763	}
764
765	return (error);
766}
767
768/*
769 * We handle getattr only to change the fsid and
770 * track object sizes
771 */
772int
773union_getattr(void *v)
774{
775	struct vop_getattr_args /* {
776		struct vnode *a_vp;
777		struct vattr *a_vap;
778		kauth_cred_t a_cred;
779	} */ *ap = v;
780	int error;
781	struct union_node *un = VTOUNION(ap->a_vp);
782	struct vnode *vp = un->un_uppervp;
783	struct vattr *vap;
784	struct vattr va;
785
786
787	/*
788	 * Some programs walk the filesystem hierarchy by counting
789	 * links to directories to avoid stat'ing all the time.
790	 * This means the link count on directories needs to be "correct".
791	 * The only way to do that is to call getattr on both layers
792	 * and fix up the link count.  The link count will not necessarily
793	 * be accurate but will be large enough to defeat the tree walkers.
794	 *
795	 * To make life more interesting, some filesystems don't keep
796	 * track of link counts in the expected way, and return a
797	 * link count of `1' for those directories; if either of the
798	 * component directories returns a link count of `1', we return a 1.
799	 */
800
801	vap = ap->a_vap;
802
803	vp = un->un_uppervp;
804	if (vp != NULLVP) {
805		error = VOP_GETATTR(vp, vap, ap->a_cred);
806		if (error)
807			return (error);
808		mutex_enter(&un->un_lock);
809		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
810	}
811
812	if (vp == NULLVP) {
813		vp = un->un_lowervp;
814	} else if (vp->v_type == VDIR) {
815		vp = un->un_lowervp;
816		if (vp != NULLVP)
817			vap = &va;
818	} else {
819		vp = NULLVP;
820	}
821
822	if (vp != NULLVP) {
823		if (vp == un->un_lowervp)
824			vn_lock(vp, LK_SHARED | LK_RETRY);
825		error = VOP_GETATTR(vp, vap, ap->a_cred);
826		if (vp == un->un_lowervp)
827			VOP_UNLOCK(vp);
828		if (error)
829			return (error);
830		mutex_enter(&un->un_lock);
831		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
832	}
833
834	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
835		/*
836		 * Link count manipulation:
837		 *	- If both return "2", return 2 (no subdirs)
838		 *	- If one or the other return "1", return "1" (ENOCLUE)
839		 */
840		if ((ap->a_vap->va_nlink == 2) &&
841		    (vap->va_nlink == 2))
842			;
843		else if (ap->a_vap->va_nlink != 1) {
844			if (vap->va_nlink == 1)
845				ap->a_vap->va_nlink = 1;
846			else
847				ap->a_vap->va_nlink += vap->va_nlink;
848		}
849	}
850	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
851	return (0);
852}
853
854int
855union_setattr(void *v)
856{
857	struct vop_setattr_args /* {
858		struct vnode *a_vp;
859		struct vattr *a_vap;
860		kauth_cred_t a_cred;
861	} */ *ap = v;
862	struct vattr *vap = ap->a_vap;
863	struct vnode *vp = ap->a_vp;
864	struct union_node *un = VTOUNION(vp);
865	bool size_only;		/* All but va_size are VNOVAL. */
866	int error;
867
868	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
869	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
870	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
871
872	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
873		return (EROFS);
874	if (vap->va_size != VNOVAL) {
875 		switch (vp->v_type) {
876 		case VDIR:
877 			return (EISDIR);
878 		case VCHR:
879 		case VBLK:
880 		case VSOCK:
881 		case VFIFO:
882			break;
883		case VREG:
884		case VLNK:
885 		default:
886			/*
887			 * Disallow write attempts if the filesystem is
888			 * mounted read-only.
889			 */
890			if (vp->v_mount->mnt_flag & MNT_RDONLY)
891				return (EROFS);
892		}
893	}
894
895	/*
896	 * Handle case of truncating lower object to zero size,
897	 * by creating a zero length upper object.  This is to
898	 * handle the case of open with O_TRUNC and O_CREAT.
899	 */
900	if ((un->un_uppervp == NULLVP) &&
901	    /* assert(un->un_lowervp != NULLVP) */
902	    (un->un_lowervp->v_type == VREG)) {
903		error = union_copyup(un, (vap->va_size != 0),
904						ap->a_cred, curlwp);
905		if (error)
906			return (error);
907	}
908
909	/*
910	 * Try to set attributes in upper layer, ignore size change to zero
911	 * for devices to handle O_TRUNC and return read-only filesystem error
912	 * otherwise.
913	 */
914	if (un->un_uppervp != NULLVP) {
915		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
916		if ((error == 0) && (vap->va_size != VNOVAL)) {
917			mutex_enter(&un->un_lock);
918			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
919		}
920	} else {
921		KASSERT(un->un_lowervp != NULLVP);
922		if (NODE_IS_SPECIAL(un->un_lowervp)) {
923			if (size_only &&
924			    (vap->va_size == 0 || vap->va_size == VNOVAL))
925				error = 0;
926			else
927				error = EROFS;
928		} else {
929			error = EROFS;
930		}
931	}
932
933	return (error);
934}
935
936int
937union_read(void *v)
938{
939	struct vop_read_args /* {
940		struct vnode *a_vp;
941		struct uio *a_uio;
942		int  a_ioflag;
943		kauth_cred_t a_cred;
944	} */ *ap = v;
945	int error;
946	struct vnode *vp = OTHERVP(ap->a_vp);
947	int dolock = (vp == LOWERVP(ap->a_vp));
948
949	if (dolock)
950		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
951	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
952	if (dolock)
953		VOP_UNLOCK(vp);
954
955	/*
956	 * XXX
957	 * perhaps the size of the underlying object has changed under
958	 * our feet.  take advantage of the offset information present
959	 * in the uio structure.
960	 */
961	if (error == 0) {
962		struct union_node *un = VTOUNION(ap->a_vp);
963		off_t cur = ap->a_uio->uio_offset;
964		off_t usz = VNOVAL, lsz = VNOVAL;
965
966		mutex_enter(&un->un_lock);
967		if (vp == un->un_uppervp) {
968			if (cur > un->un_uppersz)
969				usz = cur;
970		} else {
971			if (cur > un->un_lowersz)
972				lsz = cur;
973		}
974
975		if (usz != VNOVAL || lsz != VNOVAL)
976			union_newsize(ap->a_vp, usz, lsz);
977		else
978			mutex_exit(&un->un_lock);
979	}
980
981	return (error);
982}
983
984int
985union_write(void *v)
986{
987	struct vop_read_args /* {
988		struct vnode *a_vp;
989		struct uio *a_uio;
990		int  a_ioflag;
991		kauth_cred_t a_cred;
992	} */ *ap = v;
993	int error;
994	struct vnode *vp;
995	struct union_node *un = VTOUNION(ap->a_vp);
996
997	vp = UPPERVP(ap->a_vp);
998	if (vp == NULLVP) {
999		vp = LOWERVP(ap->a_vp);
1000		if (NODE_IS_SPECIAL(vp)) {
1001			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1002			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1003			    ap->a_cred);
1004			VOP_UNLOCK(vp);
1005			return error;
1006		}
1007		panic("union: missing upper layer in write");
1008	}
1009
1010	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1011
1012	/*
1013	 * the size of the underlying object may be changed by the
1014	 * write.
1015	 */
1016	if (error == 0) {
1017		off_t cur = ap->a_uio->uio_offset;
1018
1019		mutex_enter(&un->un_lock);
1020		if (cur > un->un_uppersz)
1021			union_newsize(ap->a_vp, cur, VNOVAL);
1022		else
1023			mutex_exit(&un->un_lock);
1024	}
1025
1026	return (error);
1027}
1028
1029int
1030union_ioctl(void *v)
1031{
1032	struct vop_ioctl_args /* {
1033		struct vnode *a_vp;
1034		int  a_command;
1035		void *a_data;
1036		int  a_fflag;
1037		kauth_cred_t a_cred;
1038	} */ *ap = v;
1039	struct vnode *ovp = OTHERVP(ap->a_vp);
1040
1041	ap->a_vp = ovp;
1042	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1043}
1044
1045int
1046union_poll(void *v)
1047{
1048	struct vop_poll_args /* {
1049		struct vnode *a_vp;
1050		int a_events;
1051	} */ *ap = v;
1052	struct vnode *ovp = OTHERVP(ap->a_vp);
1053
1054	ap->a_vp = ovp;
1055	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1056}
1057
1058int
1059union_revoke(void *v)
1060{
1061	struct vop_revoke_args /* {
1062		struct vnode *a_vp;
1063		int a_flags;
1064		struct proc *a_p;
1065	} */ *ap = v;
1066	struct vnode *vp = ap->a_vp;
1067
1068	if (UPPERVP(vp))
1069		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1070	if (LOWERVP(vp))
1071		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1072	vgone(vp);	/* XXXAD?? */
1073	return (0);
1074}
1075
1076int
1077union_mmap(void *v)
1078{
1079	struct vop_mmap_args /* {
1080		struct vnode *a_vp;
1081		vm_prot_t a_prot;
1082		kauth_cred_t a_cred;
1083	} */ *ap = v;
1084	struct vnode *ovp = OTHERVP(ap->a_vp);
1085
1086	ap->a_vp = ovp;
1087	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1088}
1089
1090int
1091union_fsync(void *v)
1092{
1093	struct vop_fsync_args /* {
1094		struct vnode *a_vp;
1095		kauth_cred_t a_cred;
1096		int  a_flags;
1097		off_t offhi;
1098		off_t offlo;
1099	} */ *ap = v;
1100	int error = 0;
1101	struct vnode *targetvp;
1102
1103	/*
1104	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1105	 * bother syncing the underlying vnodes, since (a) they'll be
1106	 * fsync'ed when reclaimed and (b) we could deadlock if
1107	 * they're locked; otherwise, pass it through to the
1108	 * underlying layer.
1109	 */
1110	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1111		error = spec_fsync(v);
1112		if (error)
1113			return error;
1114	}
1115
1116	if (ap->a_flags & FSYNC_RECLAIM)
1117		return 0;
1118
1119	targetvp = OTHERVP(ap->a_vp);
1120	if (targetvp != NULLVP) {
1121		int dolock = (targetvp == LOWERVP(ap->a_vp));
1122
1123		if (dolock)
1124			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1125		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1126			    ap->a_offlo, ap->a_offhi);
1127		if (dolock)
1128			VOP_UNLOCK(targetvp);
1129	}
1130
1131	return (error);
1132}
1133
1134int
1135union_seek(void *v)
1136{
1137	struct vop_seek_args /* {
1138		struct vnode *a_vp;
1139		off_t  a_oldoff;
1140		off_t  a_newoff;
1141		kauth_cred_t a_cred;
1142	} */ *ap = v;
1143	struct vnode *ovp = OTHERVP(ap->a_vp);
1144
1145	ap->a_vp = ovp;
1146	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1147}
1148
1149int
1150union_remove(void *v)
1151{
1152	struct vop_remove_v2_args /* {
1153		struct vnode *a_dvp;
1154		struct vnode *a_vp;
1155		struct componentname *a_cnp;
1156	} */ *ap = v;
1157	int error;
1158	struct union_node *dun = VTOUNION(ap->a_dvp);
1159	struct union_node *un = VTOUNION(ap->a_vp);
1160	struct componentname *cnp = ap->a_cnp;
1161
1162	if (dun->un_uppervp == NULLVP)
1163		panic("union remove: null upper vnode");
1164
1165	if (un->un_uppervp != NULLVP) {
1166		struct vnode *dvp = dun->un_uppervp;
1167		struct vnode *vp = un->un_uppervp;
1168
1169		/* Account for VOP_REMOVE to vrele vp.  */
1170		vref(vp);
1171		if (union_dowhiteout(un, cnp->cn_cred))
1172			cnp->cn_flags |= DOWHITEOUT;
1173		error = VOP_REMOVE(dvp, vp, cnp);
1174		if (!error)
1175			union_removed_upper(un);
1176		vrele(ap->a_vp);
1177	} else {
1178		error = union_mkwhiteout(
1179			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1180			dun->un_uppervp, ap->a_cnp, un);
1181		vput(ap->a_vp);
1182	}
1183
1184	return (error);
1185}
1186
1187int
1188union_link(void *v)
1189{
1190	struct vop_link_v2_args /* {
1191		struct vnode *a_dvp;
1192		struct vnode *a_vp;
1193		struct componentname *a_cnp;
1194	} */ *ap = v;
1195	int error = 0;
1196	struct componentname *cnp = ap->a_cnp;
1197	struct union_node *dun;
1198	struct vnode *vp;
1199	struct vnode *dvp;
1200
1201	dun = VTOUNION(ap->a_dvp);
1202
1203	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1204
1205	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1206		vp = ap->a_vp;
1207	} else {
1208		struct union_node *un = VTOUNION(ap->a_vp);
1209		if (un->un_uppervp == NULLVP) {
1210			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1211
1212			/*
1213			 * Needs to be copied before we can link it.
1214			 */
1215			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1216			if (droplock)
1217				VOP_UNLOCK(dun->un_uppervp);
1218			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1219			if (droplock) {
1220				vn_lock(dun->un_uppervp,
1221				    LK_EXCLUSIVE | LK_RETRY);
1222				/*
1223				 * During copyup, we dropped the lock on the
1224				 * dir and invalidated any saved namei lookup
1225				 * state for the directory we'll be entering
1226				 * the link in.  We need to re-run the lookup
1227				 * in that directory to reset any state needed
1228				 * for VOP_LINK.
1229				 * Call relookup on the union-layer to reset
1230				 * the state.
1231				 */
1232				vp  = NULLVP;
1233				if (dun->un_uppervp == NULLVP)
1234					 panic("union: null upperdvp?");
1235				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1236				if (error) {
1237					VOP_UNLOCK(ap->a_vp);
1238					return EROFS;	/* ? */
1239				}
1240				if (vp != NULLVP) {
1241					/*
1242					 * The name we want to create has
1243					 * mysteriously appeared (a race?)
1244					 */
1245					error = EEXIST;
1246					VOP_UNLOCK(ap->a_vp);
1247					vput(vp);
1248					return (error);
1249				}
1250			}
1251			VOP_UNLOCK(ap->a_vp);
1252		}
1253		vp = un->un_uppervp;
1254	}
1255
1256	dvp = dun->un_uppervp;
1257	if (dvp == NULLVP)
1258		error = EROFS;
1259
1260	if (error)
1261		return (error);
1262
1263	return VOP_LINK(dvp, vp, cnp);
1264}
1265
1266int
1267union_rename(void *v)
1268{
1269	struct vop_rename_args  /* {
1270		struct vnode *a_fdvp;
1271		struct vnode *a_fvp;
1272		struct componentname *a_fcnp;
1273		struct vnode *a_tdvp;
1274		struct vnode *a_tvp;
1275		struct componentname *a_tcnp;
1276	} */ *ap = v;
1277	int error;
1278
1279	struct vnode *fdvp = ap->a_fdvp;
1280	struct vnode *fvp = ap->a_fvp;
1281	struct vnode *tdvp = ap->a_tdvp;
1282	struct vnode *tvp = ap->a_tvp;
1283
1284	/*
1285	 * Account for VOP_RENAME to vrele all nodes.
1286	 * Note: VOP_RENAME will unlock tdvp.
1287	 */
1288
1289	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1290		struct union_node *un = VTOUNION(fdvp);
1291		if (un->un_uppervp == NULLVP) {
1292			/*
1293			 * this should never happen in normal
1294			 * operation but might if there was
1295			 * a problem creating the top-level shadow
1296			 * directory.
1297			 */
1298			error = EXDEV;
1299			goto bad;
1300		}
1301
1302		fdvp = un->un_uppervp;
1303		vref(fdvp);
1304	}
1305
1306	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1307		struct union_node *un = VTOUNION(fvp);
1308		if (un->un_uppervp == NULLVP) {
1309			/* XXX: should do a copyup */
1310			error = EXDEV;
1311			goto bad;
1312		}
1313
1314		if (un->un_lowervp != NULLVP)
1315			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1316
1317		fvp = un->un_uppervp;
1318		vref(fvp);
1319	}
1320
1321	if (tdvp->v_op == union_vnodeop_p) {
1322		struct union_node *un = VTOUNION(tdvp);
1323		if (un->un_uppervp == NULLVP) {
1324			/*
1325			 * this should never happen in normal
1326			 * operation but might if there was
1327			 * a problem creating the top-level shadow
1328			 * directory.
1329			 */
1330			error = EXDEV;
1331			goto bad;
1332		}
1333
1334		tdvp = un->un_uppervp;
1335		vref(tdvp);
1336	}
1337
1338	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1339		struct union_node *un = VTOUNION(tvp);
1340
1341		tvp = un->un_uppervp;
1342		if (tvp != NULLVP) {
1343			vref(tvp);
1344		}
1345	}
1346
1347	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1348	goto out;
1349
1350bad:
1351	vput(tdvp);
1352	if (tvp != NULLVP)
1353		vput(tvp);
1354	vrele(fdvp);
1355	vrele(fvp);
1356
1357out:
1358	if (fdvp != ap->a_fdvp) {
1359		vrele(ap->a_fdvp);
1360	}
1361	if (fvp != ap->a_fvp) {
1362		vrele(ap->a_fvp);
1363	}
1364	if (tdvp != ap->a_tdvp) {
1365		vrele(ap->a_tdvp);
1366	}
1367	if (tvp != ap->a_tvp) {
1368		vrele(ap->a_tvp);
1369	}
1370	return (error);
1371}
1372
1373int
1374union_mkdir(void *v)
1375{
1376	struct vop_mkdir_v3_args /* {
1377		struct vnode *a_dvp;
1378		struct vnode **a_vpp;
1379		struct componentname *a_cnp;
1380		struct vattr *a_vap;
1381	} */ *ap = v;
1382	struct union_node *un = VTOUNION(ap->a_dvp);
1383	struct vnode *dvp = un->un_uppervp;
1384	struct componentname *cnp = ap->a_cnp;
1385
1386	if (dvp != NULLVP) {
1387		int error;
1388		struct vnode *vp;
1389
1390		vp = NULL;
1391		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1392		if (error) {
1393			vrele(ap->a_dvp);
1394			return (error);
1395		}
1396
1397		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1398				NULLVP, cnp, vp, NULLVP, 1);
1399		if (error)
1400			vrele(vp);
1401		return (error);
1402	}
1403
1404	return (EROFS);
1405}
1406
1407int
1408union_rmdir(void *v)
1409{
1410	struct vop_rmdir_v2_args /* {
1411		struct vnode *a_dvp;
1412		struct vnode *a_vp;
1413		struct componentname *a_cnp;
1414	} */ *ap = v;
1415	int error;
1416	struct union_node *dun = VTOUNION(ap->a_dvp);
1417	struct union_node *un = VTOUNION(ap->a_vp);
1418	struct componentname *cnp = ap->a_cnp;
1419
1420	if (dun->un_uppervp == NULLVP)
1421		panic("union rmdir: null upper vnode");
1422
1423	error = union_check_rmdir(un, cnp->cn_cred);
1424	if (error) {
1425		vput(ap->a_vp);
1426		return error;
1427	}
1428
1429	if (un->un_uppervp != NULLVP) {
1430		struct vnode *dvp = dun->un_uppervp;
1431		struct vnode *vp = un->un_uppervp;
1432
1433		/* Account for VOP_RMDIR to vrele vp.  */
1434		vref(vp);
1435		if (union_dowhiteout(un, cnp->cn_cred))
1436			cnp->cn_flags |= DOWHITEOUT;
1437		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1438		if (!error)
1439			union_removed_upper(un);
1440		vrele(ap->a_vp);
1441	} else {
1442		error = union_mkwhiteout(
1443			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1444			dun->un_uppervp, ap->a_cnp, un);
1445		vput(ap->a_vp);
1446	}
1447
1448	return (error);
1449}
1450
1451int
1452union_symlink(void *v)
1453{
1454	struct vop_symlink_v3_args /* {
1455		struct vnode *a_dvp;
1456		struct vnode **a_vpp;
1457		struct componentname *a_cnp;
1458		struct vattr *a_vap;
1459		char *a_target;
1460	} */ *ap = v;
1461	struct union_node *un = VTOUNION(ap->a_dvp);
1462	struct vnode *dvp = un->un_uppervp;
1463	struct componentname *cnp = ap->a_cnp;
1464
1465	if (dvp != NULLVP) {
1466		int error;
1467
1468		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1469				    ap->a_target);
1470		return (error);
1471	}
1472
1473	return (EROFS);
1474}
1475
1476/*
1477 * union_readdir works in concert with getdirentries and
1478 * readdir(3) to provide a list of entries in the unioned
1479 * directories.  getdirentries is responsible for walking
1480 * down the union stack.  readdir(3) is responsible for
1481 * eliminating duplicate names from the returned data stream.
1482 */
1483int
1484union_readdir(void *v)
1485{
1486	struct vop_readdir_args /* {
1487		struct vnodeop_desc *a_desc;
1488		struct vnode *a_vp;
1489		struct uio *a_uio;
1490		kauth_cred_t a_cred;
1491		int *a_eofflag;
1492		u_long *a_cookies;
1493		int a_ncookies;
1494	} */ *ap = v;
1495	struct union_node *un = VTOUNION(ap->a_vp);
1496	struct vnode *uvp = un->un_uppervp;
1497
1498	if (uvp == NULLVP)
1499		return (0);
1500
1501	ap->a_vp = uvp;
1502	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1503}
1504
1505int
1506union_readlink(void *v)
1507{
1508	struct vop_readlink_args /* {
1509		struct vnode *a_vp;
1510		struct uio *a_uio;
1511		kauth_cred_t a_cred;
1512	} */ *ap = v;
1513	int error;
1514	struct vnode *vp = OTHERVP(ap->a_vp);
1515	int dolock = (vp == LOWERVP(ap->a_vp));
1516
1517	if (dolock)
1518		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1519	ap->a_vp = vp;
1520	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1521	if (dolock)
1522		VOP_UNLOCK(vp);
1523
1524	return (error);
1525}
1526
1527int
1528union_abortop(void *v)
1529{
1530	struct vop_abortop_args /* {
1531		struct vnode *a_dvp;
1532		struct componentname *a_cnp;
1533	} */ *ap = v;
1534
1535	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1536
1537	ap->a_dvp = UPPERVP(ap->a_dvp);
1538	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1539}
1540
1541int
1542union_inactive(void *v)
1543{
1544	struct vop_inactive_v2_args /* {
1545		const struct vnodeop_desc *a_desc;
1546		struct vnode *a_vp;
1547		bool *a_recycle;
1548	} */ *ap = v;
1549	struct vnode *vp = ap->a_vp;
1550	struct union_node *un = VTOUNION(vp);
1551	struct vnode **vpp;
1552
1553	/*
1554	 * Do nothing (and _don't_ bypass).
1555	 * Wait to vrele lowervp until reclaim,
1556	 * so that until then our union_node is in the
1557	 * cache and reusable.
1558	 *
1559	 * NEEDSWORK: Someday, consider inactive'ing
1560	 * the lowervp and then trying to reactivate it
1561	 * with capabilities (v_id)
1562	 * like they do in the name lookup cache code.
1563	 * That's too much work for now.
1564	 */
1565
1566	if (un->un_dircache != 0) {
1567		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1568			vrele(*vpp);
1569		free(un->un_dircache, M_TEMP);
1570		un->un_dircache = 0;
1571	}
1572
1573	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1574
1575	return (0);
1576}
1577
1578int
1579union_reclaim(void *v)
1580{
1581	struct vop_reclaim_v2_args /* {
1582		struct vnode *a_vp;
1583	} */ *ap = v;
1584	struct vnode *vp = ap->a_vp;
1585	struct vnode *uvp = UPPERVP(vp);
1586
1587	VOP_UNLOCK(vp);
1588
1589	if (uvp != NULL) {
1590		mutex_enter(uvp->v_interlock);
1591		KASSERT(vp->v_interlock == uvp->v_interlock);
1592		uvp->v_writecount -= vp->v_writecount;
1593		mutex_exit(uvp->v_interlock);
1594	}
1595
1596	union_freevp(vp);
1597
1598	return (0);
1599}
1600
1601static int
1602union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1603{
1604	struct vop_lock_args ap;
1605
1606	ap.a_desc = VDESC(vop_lock);
1607	ap.a_vp = lockvp;
1608	ap.a_flags = flags;
1609
1610	if (lockvp == vp)
1611		return genfs_lock(&ap);
1612	else
1613		return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1614}
1615
1616static int
1617union_unlock1(struct vnode *vp, struct vnode *lockvp)
1618{
1619	struct vop_unlock_args ap;
1620
1621	ap.a_desc = VDESC(vop_unlock);
1622	ap.a_vp = lockvp;
1623
1624	if (lockvp == vp)
1625		return genfs_unlock(&ap);
1626	else
1627		return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1628}
1629
1630int
1631union_lock(void *v)
1632{
1633	struct vop_lock_args /* {
1634		struct vnode *a_vp;
1635		int a_flags;
1636	} */ *ap = v;
1637	struct vnode *vp = ap->a_vp, *lockvp;
1638	struct union_node *un = VTOUNION(vp);
1639	int flags = ap->a_flags;
1640	int error;
1641
1642	if ((flags & LK_NOWAIT) != 0) {
1643		if (!mutex_tryenter(&un->un_lock))
1644			return EBUSY;
1645		lockvp = LOCKVP(vp);
1646		error = union_lock1(vp, lockvp, flags);
1647		mutex_exit(&un->un_lock);
1648		if (error)
1649			return error;
1650		if (mutex_tryenter(vp->v_interlock)) {
1651			error = vdead_check(vp, VDEAD_NOWAIT);
1652			mutex_exit(vp->v_interlock);
1653		} else
1654			error = EBUSY;
1655		if (error)
1656			union_unlock1(vp, lockvp);
1657		return error;
1658	}
1659
1660	mutex_enter(&un->un_lock);
1661	for (;;) {
1662		lockvp = LOCKVP(vp);
1663		mutex_exit(&un->un_lock);
1664		error = union_lock1(vp, lockvp, flags);
1665		if (error != 0)
1666			return error;
1667		mutex_enter(&un->un_lock);
1668		if (lockvp == LOCKVP(vp))
1669			break;
1670		union_unlock1(vp, lockvp);
1671	}
1672	mutex_exit(&un->un_lock);
1673
1674	mutex_enter(vp->v_interlock);
1675	error = vdead_check(vp, VDEAD_NOWAIT);
1676	if (error) {
1677		union_unlock1(vp, lockvp);
1678		error = vdead_check(vp, 0);
1679		KASSERT(error == ENOENT);
1680	}
1681	mutex_exit(vp->v_interlock);
1682	return error;
1683}
1684
1685int
1686union_unlock(void *v)
1687{
1688	struct vop_unlock_args /* {
1689		struct vnode *a_vp;
1690		int a_flags;
1691	} */ *ap = v;
1692	struct vnode *vp = ap->a_vp, *lockvp;
1693
1694	lockvp = LOCKVP(vp);
1695	union_unlock1(vp, lockvp);
1696
1697	return 0;
1698}
1699
1700int
1701union_bmap(void *v)
1702{
1703	struct vop_bmap_args /* {
1704		struct vnode *a_vp;
1705		daddr_t  a_bn;
1706		struct vnode **a_vpp;
1707		daddr_t *a_bnp;
1708		int *a_runp;
1709	} */ *ap = v;
1710	int error;
1711	struct vnode *vp = OTHERVP(ap->a_vp);
1712	int dolock = (vp == LOWERVP(ap->a_vp));
1713
1714	if (dolock)
1715		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1716	ap->a_vp = vp;
1717	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1718	if (dolock)
1719		VOP_UNLOCK(vp);
1720
1721	return (error);
1722}
1723
1724int
1725union_print(void *v)
1726{
1727	struct vop_print_args /* {
1728		struct vnode *a_vp;
1729	} */ *ap = v;
1730	struct vnode *vp = ap->a_vp;
1731
1732	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1733			vp, UPPERVP(vp), LOWERVP(vp));
1734	if (UPPERVP(vp) != NULLVP)
1735		vprint("union: upper", UPPERVP(vp));
1736	if (LOWERVP(vp) != NULLVP)
1737		vprint("union: lower", LOWERVP(vp));
1738	if (VTOUNION(vp)->un_dircache) {
1739		struct vnode **vpp;
1740		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1741			vprint("dircache:", *vpp);
1742	}
1743
1744	return (0);
1745}
1746
1747int
1748union_islocked(void *v)
1749{
1750	struct vop_islocked_args /* {
1751		struct vnode *a_vp;
1752	} */ *ap = v;
1753	struct vnode *vp;
1754	struct union_node *un;
1755
1756	un = VTOUNION(ap->a_vp);
1757	mutex_enter(&un->un_lock);
1758	vp = LOCKVP(ap->a_vp);
1759	mutex_exit(&un->un_lock);
1760
1761	if (vp == ap->a_vp)
1762		return genfs_islocked(ap);
1763	else
1764		return VOP_ISLOCKED(vp);
1765}
1766
1767int
1768union_pathconf(void *v)
1769{
1770	struct vop_pathconf_args /* {
1771		struct vnode *a_vp;
1772		int a_name;
1773		int *a_retval;
1774	} */ *ap = v;
1775	int error;
1776	struct vnode *vp = OTHERVP(ap->a_vp);
1777	int dolock = (vp == LOWERVP(ap->a_vp));
1778
1779	if (dolock)
1780		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1781	ap->a_vp = vp;
1782	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1783	if (dolock)
1784		VOP_UNLOCK(vp);
1785
1786	return (error);
1787}
1788
1789int
1790union_advlock(void *v)
1791{
1792	struct vop_advlock_args /* {
1793		struct vnode *a_vp;
1794		void *a_id;
1795		int  a_op;
1796		struct flock *a_fl;
1797		int  a_flags;
1798	} */ *ap = v;
1799	struct vnode *ovp = OTHERVP(ap->a_vp);
1800
1801	ap->a_vp = ovp;
1802	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1803}
1804
1805int
1806union_strategy(void *v)
1807{
1808	struct vop_strategy_args /* {
1809		struct vnode *a_vp;
1810		struct buf *a_bp;
1811	} */ *ap = v;
1812	struct vnode *ovp = OTHERVP(ap->a_vp);
1813	struct buf *bp = ap->a_bp;
1814
1815	KASSERT(ovp != NULLVP);
1816	if (!NODE_IS_SPECIAL(ovp))
1817		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1818
1819	return (VOP_STRATEGY(ovp, bp));
1820}
1821
1822int
1823union_bwrite(void *v)
1824{
1825	struct vop_bwrite_args /* {
1826		struct vnode *a_vp;
1827		struct buf *a_bp;
1828	} */ *ap = v;
1829	struct vnode *ovp = OTHERVP(ap->a_vp);
1830	struct buf *bp = ap->a_bp;
1831
1832	KASSERT(ovp != NULLVP);
1833	if (!NODE_IS_SPECIAL(ovp))
1834		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1835
1836	return (VOP_BWRITE(ovp, bp));
1837}
1838
1839int
1840union_getpages(void *v)
1841{
1842	struct vop_getpages_args /* {
1843		struct vnode *a_vp;
1844		voff_t a_offset;
1845		struct vm_page **a_m;
1846		int *a_count;
1847		int a_centeridx;
1848		vm_prot_t a_access_type;
1849		int a_advice;
1850		int a_flags;
1851	} */ *ap = v;
1852	struct vnode *vp = ap->a_vp;
1853
1854	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1855
1856	if (ap->a_flags & PGO_LOCKED) {
1857		return EBUSY;
1858	}
1859	ap->a_vp = OTHERVP(vp);
1860	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1861
1862	/* Just pass the request on to the underlying layer. */
1863	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1864}
1865
1866int
1867union_putpages(void *v)
1868{
1869	struct vop_putpages_args /* {
1870		struct vnode *a_vp;
1871		voff_t a_offlo;
1872		voff_t a_offhi;
1873		int a_flags;
1874	} */ *ap = v;
1875	struct vnode *vp = ap->a_vp;
1876
1877	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1878
1879	ap->a_vp = OTHERVP(vp);
1880	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1881
1882	if (ap->a_flags & PGO_RECLAIM) {
1883		rw_exit(vp->v_uobj.vmobjlock);
1884		return 0;
1885	}
1886
1887	/* Just pass the request on to the underlying layer. */
1888	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1889}
1890
1891int
1892union_kqfilter(void *v)
1893{
1894	struct vop_kqfilter_args /* {
1895		struct vnode	*a_vp;
1896		struct knote	*a_kn;
1897	} */ *ap = v;
1898	int error;
1899
1900	/*
1901	 * We watch either the upper layer file (if it already exists),
1902	 * or the lower layer one. If there is lower layer file only
1903	 * at this moment, we will keep watching that lower layer file
1904	 * even if upper layer file would be created later on.
1905	 */
1906	if (UPPERVP(ap->a_vp))
1907		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1908	else if (LOWERVP(ap->a_vp))
1909		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1910	else {
1911		/* panic? */
1912		error = EOPNOTSUPP;
1913	}
1914
1915	return (error);
1916}
1917