union_vnops.c revision 1.61
1/*	$NetBSD: union_vnops.c,v 1.61 2014/05/17 04:07:15 dholland Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.61 2014/05/17 04:07:15 dholland Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_getattr_desc, union_getattr },		/* getattr */
155	{ &vop_setattr_desc, union_setattr },		/* setattr */
156	{ &vop_read_desc, union_read },			/* read */
157	{ &vop_write_desc, union_write },		/* write */
158	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
159	{ &vop_poll_desc, union_poll },			/* select */
160	{ &vop_revoke_desc, union_revoke },		/* revoke */
161	{ &vop_mmap_desc, union_mmap },			/* mmap */
162	{ &vop_fsync_desc, union_fsync },		/* fsync */
163	{ &vop_seek_desc, union_seek },			/* seek */
164	{ &vop_remove_desc, union_remove },		/* remove */
165	{ &vop_link_desc, union_link },			/* link */
166	{ &vop_rename_desc, union_rename },		/* rename */
167	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
168	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
169	{ &vop_symlink_desc, union_symlink },		/* symlink */
170	{ &vop_readdir_desc, union_readdir },		/* readdir */
171	{ &vop_readlink_desc, union_readlink },		/* readlink */
172	{ &vop_abortop_desc, union_abortop },		/* abortop */
173	{ &vop_inactive_desc, union_inactive },		/* inactive */
174	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
175	{ &vop_lock_desc, union_lock },			/* lock */
176	{ &vop_unlock_desc, union_unlock },		/* unlock */
177	{ &vop_bmap_desc, union_bmap },			/* bmap */
178	{ &vop_strategy_desc, union_strategy },		/* strategy */
179	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
180	{ &vop_print_desc, union_print },		/* print */
181	{ &vop_islocked_desc, union_islocked },		/* islocked */
182	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
183	{ &vop_advlock_desc, union_advlock },		/* advlock */
184	{ &vop_getpages_desc, union_getpages },		/* getpages */
185	{ &vop_putpages_desc, union_putpages },		/* putpages */
186	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
187	{ NULL, NULL }
188};
189const struct vnodeopv_desc union_vnodeop_opv_desc =
190	{ &union_vnodeop_p, union_vnodeop_entries };
191
192#define NODE_IS_SPECIAL(vp) \
193	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
194	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
195
196static int
197union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
198	struct componentname *cnp)
199{
200	int error;
201	struct vnode *tdvp;
202	struct vnode *dvp;
203	struct mount *mp;
204
205	dvp = *dvpp;
206
207	/*
208	 * If stepping up the directory tree, check for going
209	 * back across the mount point, in which case do what
210	 * lookup would do by stepping back down the mount
211	 * hierarchy.
212	 */
213	if (cnp->cn_flags & ISDOTDOT) {
214		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
215			/*
216			 * Don't do the NOCROSSMOUNT check
217			 * at this level.  By definition,
218			 * union fs deals with namespaces, not
219			 * filesystems.
220			 */
221			tdvp = dvp;
222			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
223			VOP_UNLOCK(tdvp);
224			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
225		}
226	}
227
228        error = VOP_LOOKUP(dvp, &tdvp, cnp);
229	if (error)
230		return (error);
231	if (dvp != tdvp) {
232		if (cnp->cn_flags & ISDOTDOT)
233			VOP_UNLOCK(dvp);
234		error = vn_lock(tdvp, LK_EXCLUSIVE);
235		if (cnp->cn_flags & ISDOTDOT)
236			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
237		if (error) {
238			vrele(tdvp);
239			return error;
240		}
241		dvp = tdvp;
242	}
243
244	/*
245	 * Lastly check if the current node is a mount point in
246	 * which case walk up the mount hierarchy making sure not to
247	 * bump into the root of the mount tree (ie. dvp != udvp).
248	 */
249	while (dvp != udvp && (dvp->v_type == VDIR) &&
250	       (mp = dvp->v_mountedhere)) {
251		if (vfs_busy(mp, NULL))
252			continue;
253		vput(dvp);
254		error = VFS_ROOT(mp, &tdvp);
255		vfs_unbusy(mp, false, NULL);
256		if (error) {
257			return (error);
258		}
259		dvp = tdvp;
260	}
261
262	*vpp = dvp;
263	return (0);
264}
265
266int
267union_lookup(void *v)
268{
269	struct vop_lookup_v2_args /* {
270		struct vnodeop_desc *a_desc;
271		struct vnode *a_dvp;
272		struct vnode **a_vpp;
273		struct componentname *a_cnp;
274	} */ *ap = v;
275	int error;
276	int uerror, lerror;
277	struct vnode *uppervp, *lowervp;
278	struct vnode *upperdvp, *lowerdvp;
279	struct vnode *dvp = ap->a_dvp;
280	struct union_node *dun = VTOUNION(dvp);
281	struct componentname *cnp = ap->a_cnp;
282	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
283	kauth_cred_t saved_cred = NULL;
284	int iswhiteout;
285	struct vattr va;
286
287#ifdef notyet
288	if (cnp->cn_namelen == 3 &&
289			cnp->cn_nameptr[2] == '.' &&
290			cnp->cn_nameptr[1] == '.' &&
291			cnp->cn_nameptr[0] == '.') {
292		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
293		if (dvp == NULLVP)
294			return (ENOENT);
295		vref(dvp);
296		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
297		return (0);
298	}
299#endif
300
301	if ((cnp->cn_flags & ISLASTCN) &&
302	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
303	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
304		return (EROFS);
305
306start:
307	upperdvp = dun->un_uppervp;
308	lowerdvp = dun->un_lowervp;
309	uppervp = NULLVP;
310	lowervp = NULLVP;
311	iswhiteout = 0;
312
313	/*
314	 * do the lookup in the upper level.
315	 * if that level comsumes additional pathnames,
316	 * then assume that something special is going
317	 * on and just return that vnode.
318	 */
319	if (upperdvp != NULLVP) {
320		uerror = union_lookup1(um->um_uppervp, &upperdvp,
321					&uppervp, cnp);
322		if (cnp->cn_consume != 0) {
323			if (uppervp != upperdvp)
324				VOP_UNLOCK(uppervp);
325			*ap->a_vpp = uppervp;
326			return (uerror);
327		}
328		if (uerror == ENOENT || uerror == EJUSTRETURN) {
329			if (cnp->cn_flags & ISWHITEOUT) {
330				iswhiteout = 1;
331			} else if (lowerdvp != NULLVP) {
332				lerror = VOP_GETATTR(upperdvp, &va,
333					cnp->cn_cred);
334				if (lerror == 0 && (va.va_flags & OPAQUE))
335					iswhiteout = 1;
336			}
337		}
338	} else {
339		uerror = ENOENT;
340	}
341
342	/*
343	 * in a similar way to the upper layer, do the lookup
344	 * in the lower layer.   this time, if there is some
345	 * component magic going on, then vput whatever we got
346	 * back from the upper layer and return the lower vnode
347	 * instead.
348	 */
349	if (lowerdvp != NULLVP && !iswhiteout) {
350		int nameiop;
351
352		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
353
354		/*
355		 * Only do a LOOKUP on the bottom node, since
356		 * we won't be making changes to it anyway.
357		 */
358		nameiop = cnp->cn_nameiop;
359		cnp->cn_nameiop = LOOKUP;
360		if (um->um_op == UNMNT_BELOW) {
361			saved_cred = cnp->cn_cred;
362			cnp->cn_cred = um->um_cred;
363		}
364
365		/*
366		 * we shouldn't have to worry about locking interactions
367		 * between the lower layer and our union layer (w.r.t.
368		 * `..' processing) because we don't futz with lowervp
369		 * locks in the union-node instantiation code path.
370		 */
371		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
372				&lowervp, cnp);
373		if (um->um_op == UNMNT_BELOW)
374			cnp->cn_cred = saved_cred;
375		cnp->cn_nameiop = nameiop;
376
377		if (lowervp != lowerdvp)
378			VOP_UNLOCK(lowerdvp);
379
380		if (cnp->cn_consume != 0) {
381			if (uppervp != NULLVP) {
382				if (uppervp == upperdvp)
383					vrele(uppervp);
384				else
385					vput(uppervp);
386				uppervp = NULLVP;
387			}
388			*ap->a_vpp = lowervp;
389			return (lerror);
390		}
391	} else {
392		lerror = ENOENT;
393		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
394			lowervp = LOWERVP(dun->un_pvp);
395			if (lowervp != NULLVP) {
396				vref(lowervp);
397				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
398				lerror = 0;
399			}
400		}
401	}
402
403	/*
404	 * EJUSTRETURN is used by underlying filesystems to indicate that
405	 * a directory modification op was started successfully.
406	 * This will only happen in the upper layer, since
407	 * the lower layer only does LOOKUPs.
408	 * If this union is mounted read-only, bounce it now.
409	 */
410
411	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
412	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
413	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
414		uerror = EROFS;
415
416	/*
417	 * at this point, we have uerror and lerror indicating
418	 * possible errors with the lookups in the upper and lower
419	 * layers.  additionally, uppervp and lowervp are (locked)
420	 * references to existing vnodes in the upper and lower layers.
421	 *
422	 * there are now three cases to consider.
423	 * 1. if both layers returned an error, then return whatever
424	 *    error the upper layer generated.
425	 *
426	 * 2. if the top layer failed and the bottom layer succeeded
427	 *    then two subcases occur.
428	 *    a.  the bottom vnode is not a directory, in which
429	 *	  case just return a new union vnode referencing
430	 *	  an empty top layer and the existing bottom layer.
431	 *    b.  the bottom vnode is a directory, in which case
432	 *	  create a new directory in the top-level and
433	 *	  continue as in case 3.
434	 *
435	 * 3. if the top layer succeeded then return a new union
436	 *    vnode referencing whatever the new top layer and
437	 *    whatever the bottom layer returned.
438	 */
439
440	*ap->a_vpp = NULLVP;
441
442
443	/* case 1. */
444	if ((uerror != 0) && (lerror != 0)) {
445		return (uerror);
446	}
447
448	/* case 2. */
449	if (uerror != 0 /* && (lerror == 0) */ ) {
450		if (lowervp->v_type == VDIR) { /* case 2b. */
451			/*
452			 * We may be racing another process to make the
453			 * upper-level shadow directory.  Be careful with
454			 * locks/etc!
455			 * If we have to create a shadow directory and want
456			 * to commit the node we have to restart the lookup
457			 * to get the componentname right.
458			 */
459			if (upperdvp) {
460				VOP_UNLOCK(upperdvp);
461				uerror = union_mkshadow(um, upperdvp, cnp,
462				    &uppervp);
463				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
464				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
465					vrele(uppervp);
466					if (lowervp != NULLVP)
467						vput(lowervp);
468					goto start;
469				}
470			}
471			if (uerror) {
472				if (lowervp != NULLVP) {
473					vput(lowervp);
474					lowervp = NULLVP;
475				}
476				return (uerror);
477			}
478		}
479	} else { /* uerror == 0 */
480		if (uppervp != upperdvp)
481			VOP_UNLOCK(uppervp);
482	}
483
484	if (lowervp != NULLVP)
485		VOP_UNLOCK(lowervp);
486
487	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
488			      uppervp, lowervp, 1);
489
490	if (error) {
491		if (uppervp != NULLVP)
492			vrele(uppervp);
493		if (lowervp != NULLVP)
494			vrele(lowervp);
495		return error;
496	}
497
498	return 0;
499}
500
501int
502union_create(void *v)
503{
504	struct vop_create_v3_args /* {
505		struct vnode *a_dvp;
506		struct vnode **a_vpp;
507		struct componentname *a_cnp;
508		struct vattr *a_vap;
509	} */ *ap = v;
510	struct union_node *un = VTOUNION(ap->a_dvp);
511	struct vnode *dvp = un->un_uppervp;
512	struct componentname *cnp = ap->a_cnp;
513
514	if (dvp != NULLVP) {
515		int error;
516		struct vnode *vp;
517		struct mount *mp;
518
519		mp = ap->a_dvp->v_mount;
520
521		vp = NULL;
522		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
523		if (error)
524			return (error);
525
526		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
527				NULLVP, 1);
528		if (error)
529			vrele(vp);
530		return (error);
531	}
532
533	return (EROFS);
534}
535
536int
537union_whiteout(void *v)
538{
539	struct vop_whiteout_args /* {
540		struct vnode *a_dvp;
541		struct componentname *a_cnp;
542		int a_flags;
543	} */ *ap = v;
544	struct union_node *un = VTOUNION(ap->a_dvp);
545	struct componentname *cnp = ap->a_cnp;
546
547	if (un->un_uppervp == NULLVP)
548		return (EOPNOTSUPP);
549
550	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
551}
552
553int
554union_mknod(void *v)
555{
556	struct vop_mknod_v3_args /* {
557		struct vnode *a_dvp;
558		struct vnode **a_vpp;
559		struct componentname *a_cnp;
560		struct vattr *a_vap;
561	} */ *ap = v;
562	struct union_node *un = VTOUNION(ap->a_dvp);
563	struct vnode *dvp = un->un_uppervp;
564	struct componentname *cnp = ap->a_cnp;
565
566	if (dvp != NULLVP) {
567		int error;
568		struct vnode *vp;
569		struct mount *mp;
570
571		mp = ap->a_dvp->v_mount;
572		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
573		if (error)
574			return (error);
575
576		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
577				      cnp, vp, NULLVP, 1);
578		if (error)
579			vrele(vp);
580		return (error);
581	}
582
583	return (EROFS);
584}
585
586int
587union_open(void *v)
588{
589	struct vop_open_args /* {
590		struct vnodeop_desc *a_desc;
591		struct vnode *a_vp;
592		int a_mode;
593		kauth_cred_t a_cred;
594	} */ *ap = v;
595	struct union_node *un = VTOUNION(ap->a_vp);
596	struct vnode *tvp;
597	int mode = ap->a_mode;
598	kauth_cred_t cred = ap->a_cred;
599	struct lwp *l = curlwp;
600	int error;
601
602	/*
603	 * If there is an existing upper vp then simply open that.
604	 */
605	tvp = un->un_uppervp;
606	if (tvp == NULLVP) {
607		/*
608		 * If the lower vnode is being opened for writing, then
609		 * copy the file contents to the upper vnode and open that,
610		 * otherwise can simply open the lower vnode.
611		 */
612		tvp = un->un_lowervp;
613		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
614			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
615			if (error == 0)
616				error = VOP_OPEN(un->un_uppervp, mode, cred);
617			return (error);
618		}
619
620		/*
621		 * Just open the lower vnode, but check for nodev mount flag
622		 */
623		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
624		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
625			return ENXIO;
626		un->un_openl++;
627		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
628		error = VOP_OPEN(tvp, mode, cred);
629		VOP_UNLOCK(tvp);
630
631		return (error);
632	}
633	/*
634	 * Just open the upper vnode, checking for nodev mount flag first
635	 */
636	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
637	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
638		return ENXIO;
639
640	error = VOP_OPEN(tvp, mode, cred);
641
642	return (error);
643}
644
645int
646union_close(void *v)
647{
648	struct vop_close_args /* {
649		struct vnode *a_vp;
650		int  a_fflag;
651		kauth_cred_t a_cred;
652	} */ *ap = v;
653	struct union_node *un = VTOUNION(ap->a_vp);
654	struct vnode *vp;
655	int error;
656	bool do_lock;
657
658	vp = un->un_uppervp;
659	if (vp != NULLVP) {
660		do_lock = false;
661	} else {
662		KASSERT(un->un_openl > 0);
663		--un->un_openl;
664		vp = un->un_lowervp;
665		do_lock = true;
666	}
667
668	KASSERT(vp != NULLVP);
669	ap->a_vp = vp;
670	if (do_lock)
671		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
672	error = VCALL(vp, VOFFSET(vop_close), ap);
673	if (do_lock)
674		VOP_UNLOCK(vp);
675
676	return error;
677}
678
679/*
680 * Check access permission on the union vnode.
681 * The access check being enforced is to check
682 * against both the underlying vnode, and any
683 * copied vnode.  This ensures that no additional
684 * file permissions are given away simply because
685 * the user caused an implicit file copy.
686 */
687int
688union_access(void *v)
689{
690	struct vop_access_args /* {
691		struct vnodeop_desc *a_desc;
692		struct vnode *a_vp;
693		int a_mode;
694		kauth_cred_t a_cred;
695	} */ *ap = v;
696	struct vnode *vp = ap->a_vp;
697	struct union_node *un = VTOUNION(vp);
698	int error = EACCES;
699	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
700
701	/*
702	 * Disallow write attempts on read-only file systems;
703	 * unless the file is a socket, fifo, or a block or
704	 * character device resident on the file system.
705	 */
706	if (ap->a_mode & VWRITE) {
707		switch (vp->v_type) {
708		case VDIR:
709		case VLNK:
710		case VREG:
711			if (vp->v_mount->mnt_flag & MNT_RDONLY)
712				return (EROFS);
713			break;
714		case VBAD:
715		case VBLK:
716		case VCHR:
717		case VSOCK:
718		case VFIFO:
719		case VNON:
720		default:
721			break;
722		}
723	}
724
725
726	if ((vp = un->un_uppervp) != NULLVP) {
727		ap->a_vp = vp;
728		return (VCALL(vp, VOFFSET(vop_access), ap));
729	}
730
731	if ((vp = un->un_lowervp) != NULLVP) {
732		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
733		ap->a_vp = vp;
734		error = VCALL(vp, VOFFSET(vop_access), ap);
735		if (error == 0) {
736			if (um->um_op == UNMNT_BELOW) {
737				ap->a_cred = um->um_cred;
738				error = VCALL(vp, VOFFSET(vop_access), ap);
739			}
740		}
741		VOP_UNLOCK(vp);
742		if (error)
743			return (error);
744	}
745
746	return (error);
747}
748
749/*
750 * We handle getattr only to change the fsid and
751 * track object sizes
752 */
753int
754union_getattr(void *v)
755{
756	struct vop_getattr_args /* {
757		struct vnode *a_vp;
758		struct vattr *a_vap;
759		kauth_cred_t a_cred;
760	} */ *ap = v;
761	int error;
762	struct union_node *un = VTOUNION(ap->a_vp);
763	struct vnode *vp = un->un_uppervp;
764	struct vattr *vap;
765	struct vattr va;
766
767
768	/*
769	 * Some programs walk the filesystem hierarchy by counting
770	 * links to directories to avoid stat'ing all the time.
771	 * This means the link count on directories needs to be "correct".
772	 * The only way to do that is to call getattr on both layers
773	 * and fix up the link count.  The link count will not necessarily
774	 * be accurate but will be large enough to defeat the tree walkers.
775	 *
776	 * To make life more interesting, some filesystems don't keep
777	 * track of link counts in the expected way, and return a
778	 * link count of `1' for those directories; if either of the
779	 * component directories returns a link count of `1', we return a 1.
780	 */
781
782	vap = ap->a_vap;
783
784	vp = un->un_uppervp;
785	if (vp != NULLVP) {
786		error = VOP_GETATTR(vp, vap, ap->a_cred);
787		if (error)
788			return (error);
789		mutex_enter(&un->un_lock);
790		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
791	}
792
793	if (vp == NULLVP) {
794		vp = un->un_lowervp;
795	} else if (vp->v_type == VDIR) {
796		vp = un->un_lowervp;
797		if (vp != NULLVP)
798			vap = &va;
799	} else {
800		vp = NULLVP;
801	}
802
803	if (vp != NULLVP) {
804		if (vp == un->un_lowervp)
805			vn_lock(vp, LK_SHARED | LK_RETRY);
806		error = VOP_GETATTR(vp, vap, ap->a_cred);
807		if (vp == un->un_lowervp)
808			VOP_UNLOCK(vp);
809		if (error)
810			return (error);
811		mutex_enter(&un->un_lock);
812		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
813	}
814
815	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
816		/*
817		 * Link count manipulation:
818		 *	- If both return "2", return 2 (no subdirs)
819		 *	- If one or the other return "1", return "1" (ENOCLUE)
820		 */
821		if ((ap->a_vap->va_nlink == 2) &&
822		    (vap->va_nlink == 2))
823			;
824		else if (ap->a_vap->va_nlink != 1) {
825			if (vap->va_nlink == 1)
826				ap->a_vap->va_nlink = 1;
827			else
828				ap->a_vap->va_nlink += vap->va_nlink;
829		}
830	}
831	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
832	return (0);
833}
834
835int
836union_setattr(void *v)
837{
838	struct vop_setattr_args /* {
839		struct vnode *a_vp;
840		struct vattr *a_vap;
841		kauth_cred_t a_cred;
842	} */ *ap = v;
843	struct vattr *vap = ap->a_vap;
844	struct vnode *vp = ap->a_vp;
845	struct union_node *un = VTOUNION(vp);
846	bool size_only;		/* All but va_size are VNOVAL. */
847	int error;
848
849	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
850	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
851	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
852
853	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
854		return (EROFS);
855	if (vap->va_size != VNOVAL) {
856 		switch (vp->v_type) {
857 		case VDIR:
858 			return (EISDIR);
859 		case VCHR:
860 		case VBLK:
861 		case VSOCK:
862 		case VFIFO:
863			break;
864		case VREG:
865		case VLNK:
866 		default:
867			/*
868			 * Disallow write attempts if the filesystem is
869			 * mounted read-only.
870			 */
871			if (vp->v_mount->mnt_flag & MNT_RDONLY)
872				return (EROFS);
873		}
874	}
875
876	/*
877	 * Handle case of truncating lower object to zero size,
878	 * by creating a zero length upper object.  This is to
879	 * handle the case of open with O_TRUNC and O_CREAT.
880	 */
881	if ((un->un_uppervp == NULLVP) &&
882	    /* assert(un->un_lowervp != NULLVP) */
883	    (un->un_lowervp->v_type == VREG)) {
884		error = union_copyup(un, (vap->va_size != 0),
885						ap->a_cred, curlwp);
886		if (error)
887			return (error);
888	}
889
890	/*
891	 * Try to set attributes in upper layer, ignore size change to zero
892	 * for devices to handle O_TRUNC and return read-only filesystem error
893	 * otherwise.
894	 */
895	if (un->un_uppervp != NULLVP) {
896		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
897		if ((error == 0) && (vap->va_size != VNOVAL)) {
898			mutex_enter(&un->un_lock);
899			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
900		}
901	} else {
902		KASSERT(un->un_lowervp != NULLVP);
903		if (NODE_IS_SPECIAL(un->un_lowervp)) {
904			if (size_only &&
905			    (vap->va_size == 0 || vap->va_size == VNOVAL))
906				error = 0;
907			else
908				error = EROFS;
909		} else {
910			error = EROFS;
911		}
912	}
913
914	return (error);
915}
916
917int
918union_read(void *v)
919{
920	struct vop_read_args /* {
921		struct vnode *a_vp;
922		struct uio *a_uio;
923		int  a_ioflag;
924		kauth_cred_t a_cred;
925	} */ *ap = v;
926	int error;
927	struct vnode *vp = OTHERVP(ap->a_vp);
928	int dolock = (vp == LOWERVP(ap->a_vp));
929
930	if (dolock)
931		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
932	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
933	if (dolock)
934		VOP_UNLOCK(vp);
935
936	/*
937	 * XXX
938	 * perhaps the size of the underlying object has changed under
939	 * our feet.  take advantage of the offset information present
940	 * in the uio structure.
941	 */
942	if (error == 0) {
943		struct union_node *un = VTOUNION(ap->a_vp);
944		off_t cur = ap->a_uio->uio_offset;
945		off_t usz = VNOVAL, lsz = VNOVAL;
946
947		mutex_enter(&un->un_lock);
948		if (vp == un->un_uppervp) {
949			if (cur > un->un_uppersz)
950				usz = cur;
951		} else {
952			if (cur > un->un_lowersz)
953				lsz = cur;
954		}
955
956		if (usz != VNOVAL || lsz != VNOVAL)
957			union_newsize(ap->a_vp, usz, lsz);
958		else
959			mutex_exit(&un->un_lock);
960	}
961
962	return (error);
963}
964
965int
966union_write(void *v)
967{
968	struct vop_read_args /* {
969		struct vnode *a_vp;
970		struct uio *a_uio;
971		int  a_ioflag;
972		kauth_cred_t a_cred;
973	} */ *ap = v;
974	int error;
975	struct vnode *vp;
976	struct union_node *un = VTOUNION(ap->a_vp);
977
978	vp = UPPERVP(ap->a_vp);
979	if (vp == NULLVP) {
980		vp = LOWERVP(ap->a_vp);
981		if (NODE_IS_SPECIAL(vp)) {
982			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
983			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
984			    ap->a_cred);
985			VOP_UNLOCK(vp);
986			return error;
987		}
988		panic("union: missing upper layer in write");
989	}
990
991	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
992
993	/*
994	 * the size of the underlying object may be changed by the
995	 * write.
996	 */
997	if (error == 0) {
998		off_t cur = ap->a_uio->uio_offset;
999
1000		mutex_enter(&un->un_lock);
1001		if (cur > un->un_uppersz)
1002			union_newsize(ap->a_vp, cur, VNOVAL);
1003		else
1004			mutex_exit(&un->un_lock);
1005	}
1006
1007	return (error);
1008}
1009
1010int
1011union_ioctl(void *v)
1012{
1013	struct vop_ioctl_args /* {
1014		struct vnode *a_vp;
1015		int  a_command;
1016		void *a_data;
1017		int  a_fflag;
1018		kauth_cred_t a_cred;
1019	} */ *ap = v;
1020	struct vnode *ovp = OTHERVP(ap->a_vp);
1021
1022	ap->a_vp = ovp;
1023	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1024}
1025
1026int
1027union_poll(void *v)
1028{
1029	struct vop_poll_args /* {
1030		struct vnode *a_vp;
1031		int a_events;
1032	} */ *ap = v;
1033	struct vnode *ovp = OTHERVP(ap->a_vp);
1034
1035	ap->a_vp = ovp;
1036	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1037}
1038
1039int
1040union_revoke(void *v)
1041{
1042	struct vop_revoke_args /* {
1043		struct vnode *a_vp;
1044		int a_flags;
1045		struct proc *a_p;
1046	} */ *ap = v;
1047	struct vnode *vp = ap->a_vp;
1048
1049	if (UPPERVP(vp))
1050		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1051	if (LOWERVP(vp))
1052		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1053	vgone(vp);	/* XXXAD?? */
1054	return (0);
1055}
1056
1057int
1058union_mmap(void *v)
1059{
1060	struct vop_mmap_args /* {
1061		struct vnode *a_vp;
1062		vm_prot_t a_prot;
1063		kauth_cred_t a_cred;
1064	} */ *ap = v;
1065	struct vnode *ovp = OTHERVP(ap->a_vp);
1066
1067	ap->a_vp = ovp;
1068	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1069}
1070
1071int
1072union_fsync(void *v)
1073{
1074	struct vop_fsync_args /* {
1075		struct vnode *a_vp;
1076		kauth_cred_t a_cred;
1077		int  a_flags;
1078		off_t offhi;
1079		off_t offlo;
1080	} */ *ap = v;
1081	int error = 0;
1082	struct vnode *targetvp;
1083
1084	/*
1085	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1086	 * bother syncing the underlying vnodes, since (a) they'll be
1087	 * fsync'ed when reclaimed and (b) we could deadlock if
1088	 * they're locked; otherwise, pass it through to the
1089	 * underlying layer.
1090	 */
1091	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1092		error = spec_fsync(v);
1093		if (error)
1094			return error;
1095	}
1096
1097	if (ap->a_flags & FSYNC_RECLAIM)
1098		return 0;
1099
1100	targetvp = OTHERVP(ap->a_vp);
1101	if (targetvp != NULLVP) {
1102		int dolock = (targetvp == LOWERVP(ap->a_vp));
1103
1104		if (dolock)
1105			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1106		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1107			    ap->a_offlo, ap->a_offhi);
1108		if (dolock)
1109			VOP_UNLOCK(targetvp);
1110	}
1111
1112	return (error);
1113}
1114
1115int
1116union_seek(void *v)
1117{
1118	struct vop_seek_args /* {
1119		struct vnode *a_vp;
1120		off_t  a_oldoff;
1121		off_t  a_newoff;
1122		kauth_cred_t a_cred;
1123	} */ *ap = v;
1124	struct vnode *ovp = OTHERVP(ap->a_vp);
1125
1126	ap->a_vp = ovp;
1127	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1128}
1129
1130int
1131union_remove(void *v)
1132{
1133	struct vop_remove_args /* {
1134		struct vnode *a_dvp;
1135		struct vnode *a_vp;
1136		struct componentname *a_cnp;
1137	} */ *ap = v;
1138	int error;
1139	struct union_node *dun = VTOUNION(ap->a_dvp);
1140	struct union_node *un = VTOUNION(ap->a_vp);
1141	struct componentname *cnp = ap->a_cnp;
1142
1143	if (dun->un_uppervp == NULLVP)
1144		panic("union remove: null upper vnode");
1145
1146	if (un->un_uppervp != NULLVP) {
1147		struct vnode *dvp = dun->un_uppervp;
1148		struct vnode *vp = un->un_uppervp;
1149
1150		/*
1151		 * Account for VOP_REMOVE to vrele dvp and vp.
1152		 * Note: VOP_REMOVE will unlock dvp and vp.
1153		 */
1154		vref(dvp);
1155		vref(vp);
1156		if (union_dowhiteout(un, cnp->cn_cred))
1157			cnp->cn_flags |= DOWHITEOUT;
1158		error = VOP_REMOVE(dvp, vp, cnp);
1159		if (!error)
1160			union_removed_upper(un);
1161		vrele(ap->a_dvp);
1162		vrele(ap->a_vp);
1163	} else {
1164		error = union_mkwhiteout(
1165			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1166			dun->un_uppervp, ap->a_cnp, un);
1167		vput(ap->a_dvp);
1168		vput(ap->a_vp);
1169	}
1170
1171	return (error);
1172}
1173
1174int
1175union_link(void *v)
1176{
1177	struct vop_link_args /* {
1178		struct vnode *a_dvp;
1179		struct vnode *a_vp;
1180		struct componentname *a_cnp;
1181	} */ *ap = v;
1182	int error = 0;
1183	struct componentname *cnp = ap->a_cnp;
1184	struct union_node *dun;
1185	struct vnode *vp;
1186	struct vnode *dvp;
1187
1188	dun = VTOUNION(ap->a_dvp);
1189
1190	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1191
1192	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1193		vp = ap->a_vp;
1194	} else {
1195		struct union_node *un = VTOUNION(ap->a_vp);
1196		if (un->un_uppervp == NULLVP) {
1197			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1198
1199			/*
1200			 * Needs to be copied before we can link it.
1201			 */
1202			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1203			if (droplock)
1204				VOP_UNLOCK(dun->un_uppervp);
1205			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1206			if (droplock) {
1207				vn_lock(dun->un_uppervp,
1208				    LK_EXCLUSIVE | LK_RETRY);
1209				/*
1210				 * During copyup, we dropped the lock on the
1211				 * dir and invalidated any saved namei lookup
1212				 * state for the directory we'll be entering
1213				 * the link in.  We need to re-run the lookup
1214				 * in that directory to reset any state needed
1215				 * for VOP_LINK.
1216				 * Call relookup on the union-layer to reset
1217				 * the state.
1218				 */
1219				vp  = NULLVP;
1220				if (dun->un_uppervp == NULLVP)
1221					 panic("union: null upperdvp?");
1222				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1223				if (error) {
1224					VOP_UNLOCK(ap->a_vp);
1225					return EROFS;	/* ? */
1226				}
1227				if (vp != NULLVP) {
1228					/*
1229					 * The name we want to create has
1230					 * mysteriously appeared (a race?)
1231					 */
1232					error = EEXIST;
1233					VOP_UNLOCK(ap->a_vp);
1234					vput(ap->a_dvp);
1235					vput(vp);
1236					return (error);
1237				}
1238			}
1239			VOP_UNLOCK(ap->a_vp);
1240		}
1241		vp = un->un_uppervp;
1242	}
1243
1244	dvp = dun->un_uppervp;
1245	if (dvp == NULLVP)
1246		error = EROFS;
1247
1248	if (error) {
1249		vput(ap->a_dvp);
1250		return (error);
1251	}
1252
1253	/*
1254	 * Account for VOP_LINK to vrele dvp.
1255	 * Note: VOP_LINK will unlock dvp.
1256	 */
1257	vref(dvp);
1258	error = VOP_LINK(dvp, vp, cnp);
1259	vrele(ap->a_dvp);
1260
1261	return error;
1262}
1263
1264int
1265union_rename(void *v)
1266{
1267	struct vop_rename_args  /* {
1268		struct vnode *a_fdvp;
1269		struct vnode *a_fvp;
1270		struct componentname *a_fcnp;
1271		struct vnode *a_tdvp;
1272		struct vnode *a_tvp;
1273		struct componentname *a_tcnp;
1274	} */ *ap = v;
1275	int error;
1276
1277	struct vnode *fdvp = ap->a_fdvp;
1278	struct vnode *fvp = ap->a_fvp;
1279	struct vnode *tdvp = ap->a_tdvp;
1280	struct vnode *tvp = ap->a_tvp;
1281
1282	/*
1283	 * Account for VOP_RENAME to vrele all nodes.
1284	 * Note: VOP_RENAME will unlock tdvp.
1285	 */
1286
1287	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1288		struct union_node *un = VTOUNION(fdvp);
1289		if (un->un_uppervp == NULLVP) {
1290			/*
1291			 * this should never happen in normal
1292			 * operation but might if there was
1293			 * a problem creating the top-level shadow
1294			 * directory.
1295			 */
1296			error = EXDEV;
1297			goto bad;
1298		}
1299
1300		fdvp = un->un_uppervp;
1301		vref(fdvp);
1302	}
1303
1304	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1305		struct union_node *un = VTOUNION(fvp);
1306		if (un->un_uppervp == NULLVP) {
1307			/* XXX: should do a copyup */
1308			error = EXDEV;
1309			goto bad;
1310		}
1311
1312		if (un->un_lowervp != NULLVP)
1313			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1314
1315		fvp = un->un_uppervp;
1316		vref(fvp);
1317	}
1318
1319	if (tdvp->v_op == union_vnodeop_p) {
1320		struct union_node *un = VTOUNION(tdvp);
1321		if (un->un_uppervp == NULLVP) {
1322			/*
1323			 * this should never happen in normal
1324			 * operation but might if there was
1325			 * a problem creating the top-level shadow
1326			 * directory.
1327			 */
1328			error = EXDEV;
1329			goto bad;
1330		}
1331
1332		tdvp = un->un_uppervp;
1333		vref(tdvp);
1334	}
1335
1336	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1337		struct union_node *un = VTOUNION(tvp);
1338
1339		tvp = un->un_uppervp;
1340		if (tvp != NULLVP) {
1341			vref(tvp);
1342		}
1343	}
1344
1345	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1346	goto out;
1347
1348bad:
1349	vput(tdvp);
1350	if (tvp != NULLVP)
1351		vput(tvp);
1352	vrele(fdvp);
1353	vrele(fvp);
1354
1355out:
1356	if (fdvp != ap->a_fdvp) {
1357		vrele(ap->a_fdvp);
1358	}
1359	if (fvp != ap->a_fvp) {
1360		vrele(ap->a_fvp);
1361	}
1362	if (tdvp != ap->a_tdvp) {
1363		vrele(ap->a_tdvp);
1364	}
1365	if (tvp != ap->a_tvp) {
1366		vrele(ap->a_tvp);
1367	}
1368	return (error);
1369}
1370
1371int
1372union_mkdir(void *v)
1373{
1374	struct vop_mkdir_v3_args /* {
1375		struct vnode *a_dvp;
1376		struct vnode **a_vpp;
1377		struct componentname *a_cnp;
1378		struct vattr *a_vap;
1379	} */ *ap = v;
1380	struct union_node *un = VTOUNION(ap->a_dvp);
1381	struct vnode *dvp = un->un_uppervp;
1382	struct componentname *cnp = ap->a_cnp;
1383
1384	if (dvp != NULLVP) {
1385		int error;
1386		struct vnode *vp;
1387
1388		vp = NULL;
1389		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1390		if (error) {
1391			vrele(ap->a_dvp);
1392			return (error);
1393		}
1394
1395		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1396				NULLVP, cnp, vp, NULLVP, 1);
1397		if (error)
1398			vrele(vp);
1399		return (error);
1400	}
1401
1402	return (EROFS);
1403}
1404
1405int
1406union_rmdir(void *v)
1407{
1408	struct vop_rmdir_args /* {
1409		struct vnode *a_dvp;
1410		struct vnode *a_vp;
1411		struct componentname *a_cnp;
1412	} */ *ap = v;
1413	int error;
1414	struct union_node *dun = VTOUNION(ap->a_dvp);
1415	struct union_node *un = VTOUNION(ap->a_vp);
1416	struct componentname *cnp = ap->a_cnp;
1417
1418	if (dun->un_uppervp == NULLVP)
1419		panic("union rmdir: null upper vnode");
1420
1421	error = union_check_rmdir(un, cnp->cn_cred);
1422	if (error) {
1423		vput(ap->a_dvp);
1424		vput(ap->a_vp);
1425		return error;
1426	}
1427
1428	if (un->un_uppervp != NULLVP) {
1429		struct vnode *dvp = dun->un_uppervp;
1430		struct vnode *vp = un->un_uppervp;
1431
1432		/*
1433		 * Account for VOP_RMDIR to vrele dvp and vp.
1434		 * Note: VOP_RMDIR will unlock dvp and vp.
1435		 */
1436		vref(dvp);
1437		vref(vp);
1438		if (union_dowhiteout(un, cnp->cn_cred))
1439			cnp->cn_flags |= DOWHITEOUT;
1440		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1441		if (!error)
1442			union_removed_upper(un);
1443		vrele(ap->a_dvp);
1444		vrele(ap->a_vp);
1445	} else {
1446		error = union_mkwhiteout(
1447			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1448			dun->un_uppervp, ap->a_cnp, un);
1449		vput(ap->a_dvp);
1450		vput(ap->a_vp);
1451	}
1452
1453	return (error);
1454}
1455
1456int
1457union_symlink(void *v)
1458{
1459	struct vop_symlink_v3_args /* {
1460		struct vnode *a_dvp;
1461		struct vnode **a_vpp;
1462		struct componentname *a_cnp;
1463		struct vattr *a_vap;
1464		char *a_target;
1465	} */ *ap = v;
1466	struct union_node *un = VTOUNION(ap->a_dvp);
1467	struct vnode *dvp = un->un_uppervp;
1468	struct componentname *cnp = ap->a_cnp;
1469
1470	if (dvp != NULLVP) {
1471		int error;
1472
1473		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1474				    ap->a_target);
1475		return (error);
1476	}
1477
1478	return (EROFS);
1479}
1480
1481/*
1482 * union_readdir works in concert with getdirentries and
1483 * readdir(3) to provide a list of entries in the unioned
1484 * directories.  getdirentries is responsible for walking
1485 * down the union stack.  readdir(3) is responsible for
1486 * eliminating duplicate names from the returned data stream.
1487 */
1488int
1489union_readdir(void *v)
1490{
1491	struct vop_readdir_args /* {
1492		struct vnodeop_desc *a_desc;
1493		struct vnode *a_vp;
1494		struct uio *a_uio;
1495		kauth_cred_t a_cred;
1496		int *a_eofflag;
1497		u_long *a_cookies;
1498		int a_ncookies;
1499	} */ *ap = v;
1500	struct union_node *un = VTOUNION(ap->a_vp);
1501	struct vnode *uvp = un->un_uppervp;
1502
1503	if (uvp == NULLVP)
1504		return (0);
1505
1506	ap->a_vp = uvp;
1507	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1508}
1509
1510int
1511union_readlink(void *v)
1512{
1513	struct vop_readlink_args /* {
1514		struct vnode *a_vp;
1515		struct uio *a_uio;
1516		kauth_cred_t a_cred;
1517	} */ *ap = v;
1518	int error;
1519	struct vnode *vp = OTHERVP(ap->a_vp);
1520	int dolock = (vp == LOWERVP(ap->a_vp));
1521
1522	if (dolock)
1523		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1524	ap->a_vp = vp;
1525	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1526	if (dolock)
1527		VOP_UNLOCK(vp);
1528
1529	return (error);
1530}
1531
1532int
1533union_abortop(void *v)
1534{
1535	struct vop_abortop_args /* {
1536		struct vnode *a_dvp;
1537		struct componentname *a_cnp;
1538	} */ *ap = v;
1539
1540	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1541
1542	ap->a_dvp = UPPERVP(ap->a_dvp);
1543	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1544}
1545
1546int
1547union_inactive(void *v)
1548{
1549	struct vop_inactive_args /* {
1550		const struct vnodeop_desc *a_desc;
1551		struct vnode *a_vp;
1552		bool *a_recycle;
1553	} */ *ap = v;
1554	struct vnode *vp = ap->a_vp;
1555	struct union_node *un = VTOUNION(vp);
1556	struct vnode **vpp;
1557
1558	/*
1559	 * Do nothing (and _don't_ bypass).
1560	 * Wait to vrele lowervp until reclaim,
1561	 * so that until then our union_node is in the
1562	 * cache and reusable.
1563	 *
1564	 * NEEDSWORK: Someday, consider inactive'ing
1565	 * the lowervp and then trying to reactivate it
1566	 * with capabilities (v_id)
1567	 * like they do in the name lookup cache code.
1568	 * That's too much work for now.
1569	 */
1570
1571	if (un->un_dircache != 0) {
1572		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1573			vrele(*vpp);
1574		free(un->un_dircache, M_TEMP);
1575		un->un_dircache = 0;
1576	}
1577
1578	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1579	VOP_UNLOCK(vp);
1580
1581	return (0);
1582}
1583
1584int
1585union_reclaim(void *v)
1586{
1587	struct vop_reclaim_args /* {
1588		struct vnode *a_vp;
1589	} */ *ap = v;
1590
1591	union_freevp(ap->a_vp);
1592
1593	return (0);
1594}
1595
1596static int
1597union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1598{
1599	struct vop_lock_args ap;
1600
1601	if (lockvp == vp) {
1602		ap.a_vp = vp;
1603		ap.a_flags = flags;
1604		return genfs_lock(&ap);
1605	} else
1606		return VOP_LOCK(lockvp, flags);
1607}
1608
1609static int
1610union_unlock1(struct vnode *vp, struct vnode *lockvp)
1611{
1612	struct vop_unlock_args ap;
1613
1614	if (lockvp == vp) {
1615		ap.a_vp = vp;
1616		return genfs_unlock(&ap);
1617	} else
1618		return VOP_UNLOCK(lockvp);
1619}
1620
1621int
1622union_lock(void *v)
1623{
1624	struct vop_lock_args /* {
1625		struct vnode *a_vp;
1626		int a_flags;
1627	} */ *ap = v;
1628	struct vnode *vp = ap->a_vp, *lockvp;
1629	struct union_node *un = VTOUNION(vp);
1630	int flags = ap->a_flags;
1631	int error;
1632
1633	if ((flags & LK_NOWAIT) != 0) {
1634		if (!mutex_tryenter(&un->un_lock))
1635			return EBUSY;
1636		lockvp = LOCKVP(vp);
1637		error = union_lock1(vp, lockvp, flags);
1638		mutex_exit(&un->un_lock);
1639		if (error)
1640			return error;
1641		if (mutex_tryenter(vp->v_interlock)) {
1642			error = vdead_check(vp, VDEAD_NOWAIT);
1643			mutex_exit(vp->v_interlock);
1644		} else
1645			error = EBUSY;
1646		if (error)
1647			union_unlock1(vp, lockvp);
1648		return error;
1649	}
1650
1651	mutex_enter(&un->un_lock);
1652	for (;;) {
1653		lockvp = LOCKVP(vp);
1654		mutex_exit(&un->un_lock);
1655		error = union_lock1(vp, lockvp, flags);
1656		if (error != 0)
1657			return error;
1658		mutex_enter(&un->un_lock);
1659		if (lockvp == LOCKVP(vp))
1660			break;
1661		union_unlock1(vp, lockvp);
1662	}
1663	mutex_exit(&un->un_lock);
1664
1665	mutex_enter(vp->v_interlock);
1666	error = vdead_check(vp, VDEAD_NOWAIT);
1667	if (error) {
1668		union_unlock1(vp, lockvp);
1669		error = vdead_check(vp, 0);
1670		KASSERT(error == ENOENT);
1671	}
1672	mutex_exit(vp->v_interlock);
1673	return error;
1674}
1675
1676int
1677union_unlock(void *v)
1678{
1679	struct vop_unlock_args /* {
1680		struct vnode *a_vp;
1681		int a_flags;
1682	} */ *ap = v;
1683	struct vnode *vp = ap->a_vp, *lockvp;
1684
1685	lockvp = LOCKVP(vp);
1686	union_unlock1(vp, lockvp);
1687
1688	return 0;
1689}
1690
1691int
1692union_bmap(void *v)
1693{
1694	struct vop_bmap_args /* {
1695		struct vnode *a_vp;
1696		daddr_t  a_bn;
1697		struct vnode **a_vpp;
1698		daddr_t *a_bnp;
1699		int *a_runp;
1700	} */ *ap = v;
1701	int error;
1702	struct vnode *vp = OTHERVP(ap->a_vp);
1703	int dolock = (vp == LOWERVP(ap->a_vp));
1704
1705	if (dolock)
1706		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1707	ap->a_vp = vp;
1708	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1709	if (dolock)
1710		VOP_UNLOCK(vp);
1711
1712	return (error);
1713}
1714
1715int
1716union_print(void *v)
1717{
1718	struct vop_print_args /* {
1719		struct vnode *a_vp;
1720	} */ *ap = v;
1721	struct vnode *vp = ap->a_vp;
1722
1723	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1724			vp, UPPERVP(vp), LOWERVP(vp));
1725	if (UPPERVP(vp) != NULLVP)
1726		vprint("union: upper", UPPERVP(vp));
1727	if (LOWERVP(vp) != NULLVP)
1728		vprint("union: lower", LOWERVP(vp));
1729	if (VTOUNION(vp)->un_dircache) {
1730		struct vnode **vpp;
1731		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1732			vprint("dircache:", *vpp);
1733	}
1734
1735	return (0);
1736}
1737
1738int
1739union_islocked(void *v)
1740{
1741	struct vop_islocked_args /* {
1742		struct vnode *a_vp;
1743	} */ *ap = v;
1744	struct vnode *vp;
1745	struct union_node *un;
1746
1747	un = VTOUNION(ap->a_vp);
1748	mutex_enter(&un->un_lock);
1749	vp = LOCKVP(ap->a_vp);
1750	mutex_exit(&un->un_lock);
1751
1752	if (vp == ap->a_vp)
1753		return genfs_islocked(ap);
1754	else
1755		return VOP_ISLOCKED(vp);
1756}
1757
1758int
1759union_pathconf(void *v)
1760{
1761	struct vop_pathconf_args /* {
1762		struct vnode *a_vp;
1763		int a_name;
1764		int *a_retval;
1765	} */ *ap = v;
1766	int error;
1767	struct vnode *vp = OTHERVP(ap->a_vp);
1768	int dolock = (vp == LOWERVP(ap->a_vp));
1769
1770	if (dolock)
1771		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1772	ap->a_vp = vp;
1773	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1774	if (dolock)
1775		VOP_UNLOCK(vp);
1776
1777	return (error);
1778}
1779
1780int
1781union_advlock(void *v)
1782{
1783	struct vop_advlock_args /* {
1784		struct vnode *a_vp;
1785		void *a_id;
1786		int  a_op;
1787		struct flock *a_fl;
1788		int  a_flags;
1789	} */ *ap = v;
1790	struct vnode *ovp = OTHERVP(ap->a_vp);
1791
1792	ap->a_vp = ovp;
1793	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1794}
1795
1796int
1797union_strategy(void *v)
1798{
1799	struct vop_strategy_args /* {
1800		struct vnode *a_vp;
1801		struct buf *a_bp;
1802	} */ *ap = v;
1803	struct vnode *ovp = OTHERVP(ap->a_vp);
1804	struct buf *bp = ap->a_bp;
1805
1806	KASSERT(ovp != NULLVP);
1807	if (!NODE_IS_SPECIAL(ovp))
1808		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1809
1810	return (VOP_STRATEGY(ovp, bp));
1811}
1812
1813int
1814union_bwrite(void *v)
1815{
1816	struct vop_bwrite_args /* {
1817		struct vnode *a_vp;
1818		struct buf *a_bp;
1819	} */ *ap = v;
1820	struct vnode *ovp = OTHERVP(ap->a_vp);
1821	struct buf *bp = ap->a_bp;
1822
1823	KASSERT(ovp != NULLVP);
1824	if (!NODE_IS_SPECIAL(ovp))
1825		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1826
1827	return (VOP_BWRITE(ovp, bp));
1828}
1829
1830int
1831union_getpages(void *v)
1832{
1833	struct vop_getpages_args /* {
1834		struct vnode *a_vp;
1835		voff_t a_offset;
1836		struct vm_page **a_m;
1837		int *a_count;
1838		int a_centeridx;
1839		vm_prot_t a_access_type;
1840		int a_advice;
1841		int a_flags;
1842	} */ *ap = v;
1843	struct vnode *vp = ap->a_vp;
1844
1845	KASSERT(mutex_owned(vp->v_interlock));
1846
1847	if (ap->a_flags & PGO_LOCKED) {
1848		return EBUSY;
1849	}
1850	ap->a_vp = OTHERVP(vp);
1851	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1852
1853	/* Just pass the request on to the underlying layer. */
1854	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1855}
1856
1857int
1858union_putpages(void *v)
1859{
1860	struct vop_putpages_args /* {
1861		struct vnode *a_vp;
1862		voff_t a_offlo;
1863		voff_t a_offhi;
1864		int a_flags;
1865	} */ *ap = v;
1866	struct vnode *vp = ap->a_vp;
1867
1868	KASSERT(mutex_owned(vp->v_interlock));
1869
1870	ap->a_vp = OTHERVP(vp);
1871	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1872
1873	if (ap->a_flags & PGO_RECLAIM) {
1874		mutex_exit(vp->v_interlock);
1875		return 0;
1876	}
1877
1878	/* Just pass the request on to the underlying layer. */
1879	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1880}
1881
1882int
1883union_kqfilter(void *v)
1884{
1885	struct vop_kqfilter_args /* {
1886		struct vnode	*a_vp;
1887		struct knote	*a_kn;
1888	} */ *ap = v;
1889	int error;
1890
1891	/*
1892	 * We watch either the upper layer file (if it already exists),
1893	 * or the lower layer one. If there is lower layer file only
1894	 * at this moment, we will keep watching that lower layer file
1895	 * even if upper layer file would be created later on.
1896	 */
1897	if (UPPERVP(ap->a_vp))
1898		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1899	else if (LOWERVP(ap->a_vp))
1900		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1901	else {
1902		/* panic? */
1903		error = EOPNOTSUPP;
1904	}
1905
1906	return (error);
1907}
1908