union_vnops.c revision 1.67
1/*	$NetBSD: union_vnops.c,v 1.67 2017/04/26 03:02:48 riastradh Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.67 2017/04/26 03:02:48 riastradh Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_getattr_desc, union_getattr },		/* getattr */
155	{ &vop_setattr_desc, union_setattr },		/* setattr */
156	{ &vop_read_desc, union_read },			/* read */
157	{ &vop_write_desc, union_write },		/* write */
158	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
159	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
160	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
161	{ &vop_poll_desc, union_poll },			/* select */
162	{ &vop_revoke_desc, union_revoke },		/* revoke */
163	{ &vop_mmap_desc, union_mmap },			/* mmap */
164	{ &vop_fsync_desc, union_fsync },		/* fsync */
165	{ &vop_seek_desc, union_seek },			/* seek */
166	{ &vop_remove_desc, union_remove },		/* remove */
167	{ &vop_link_desc, union_link },			/* link */
168	{ &vop_rename_desc, union_rename },		/* rename */
169	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
170	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
171	{ &vop_symlink_desc, union_symlink },		/* symlink */
172	{ &vop_readdir_desc, union_readdir },		/* readdir */
173	{ &vop_readlink_desc, union_readlink },		/* readlink */
174	{ &vop_abortop_desc, union_abortop },		/* abortop */
175	{ &vop_inactive_desc, union_inactive },		/* inactive */
176	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
177	{ &vop_lock_desc, union_lock },			/* lock */
178	{ &vop_unlock_desc, union_unlock },		/* unlock */
179	{ &vop_bmap_desc, union_bmap },			/* bmap */
180	{ &vop_strategy_desc, union_strategy },		/* strategy */
181	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
182	{ &vop_print_desc, union_print },		/* print */
183	{ &vop_islocked_desc, union_islocked },		/* islocked */
184	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
185	{ &vop_advlock_desc, union_advlock },		/* advlock */
186	{ &vop_getpages_desc, union_getpages },		/* getpages */
187	{ &vop_putpages_desc, union_putpages },		/* putpages */
188	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
189	{ NULL, NULL }
190};
191const struct vnodeopv_desc union_vnodeop_opv_desc =
192	{ &union_vnodeop_p, union_vnodeop_entries };
193
194#define NODE_IS_SPECIAL(vp) \
195	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
196	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
197
198static int
199union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
200	struct componentname *cnp)
201{
202	int error;
203	struct vnode *tdvp;
204	struct vnode *dvp;
205	struct mount *mp;
206
207	dvp = *dvpp;
208
209	/*
210	 * If stepping up the directory tree, check for going
211	 * back across the mount point, in which case do what
212	 * lookup would do by stepping back down the mount
213	 * hierarchy.
214	 */
215	if (cnp->cn_flags & ISDOTDOT) {
216		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
217			/*
218			 * Don't do the NOCROSSMOUNT check
219			 * at this level.  By definition,
220			 * union fs deals with namespaces, not
221			 * filesystems.
222			 */
223			tdvp = dvp;
224			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
225			VOP_UNLOCK(tdvp);
226			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
227		}
228	}
229
230        error = VOP_LOOKUP(dvp, &tdvp, cnp);
231	if (error)
232		return (error);
233	if (dvp != tdvp) {
234		if (cnp->cn_flags & ISDOTDOT)
235			VOP_UNLOCK(dvp);
236		error = vn_lock(tdvp, LK_EXCLUSIVE);
237		if (cnp->cn_flags & ISDOTDOT)
238			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239		if (error) {
240			vrele(tdvp);
241			return error;
242		}
243		dvp = tdvp;
244	}
245
246	/*
247	 * Lastly check if the current node is a mount point in
248	 * which case walk up the mount hierarchy making sure not to
249	 * bump into the root of the mount tree (ie. dvp != udvp).
250	 */
251	while (dvp != udvp && (dvp->v_type == VDIR) &&
252	       (mp = dvp->v_mountedhere)) {
253		if (vfs_busy(mp))
254			continue;
255		vput(dvp);
256		error = VFS_ROOT(mp, &tdvp);
257		vfs_unbusy(mp);
258		if (error) {
259			return (error);
260		}
261		dvp = tdvp;
262	}
263
264	*vpp = dvp;
265	return (0);
266}
267
268int
269union_lookup(void *v)
270{
271	struct vop_lookup_v2_args /* {
272		struct vnodeop_desc *a_desc;
273		struct vnode *a_dvp;
274		struct vnode **a_vpp;
275		struct componentname *a_cnp;
276	} */ *ap = v;
277	int error;
278	int uerror, lerror;
279	struct vnode *uppervp, *lowervp;
280	struct vnode *upperdvp, *lowerdvp;
281	struct vnode *dvp = ap->a_dvp;
282	struct union_node *dun = VTOUNION(dvp);
283	struct componentname *cnp = ap->a_cnp;
284	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
285	kauth_cred_t saved_cred = NULL;
286	int iswhiteout;
287	struct vattr va;
288
289#ifdef notyet
290	if (cnp->cn_namelen == 3 &&
291			cnp->cn_nameptr[2] == '.' &&
292			cnp->cn_nameptr[1] == '.' &&
293			cnp->cn_nameptr[0] == '.') {
294		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
295		if (dvp == NULLVP)
296			return (ENOENT);
297		vref(dvp);
298		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
299		return (0);
300	}
301#endif
302
303	if ((cnp->cn_flags & ISLASTCN) &&
304	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
305	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
306		return (EROFS);
307
308start:
309	upperdvp = dun->un_uppervp;
310	lowerdvp = dun->un_lowervp;
311	uppervp = NULLVP;
312	lowervp = NULLVP;
313	iswhiteout = 0;
314
315	/*
316	 * do the lookup in the upper level.
317	 * if that level comsumes additional pathnames,
318	 * then assume that something special is going
319	 * on and just return that vnode.
320	 */
321	if (upperdvp != NULLVP) {
322		uerror = union_lookup1(um->um_uppervp, &upperdvp,
323					&uppervp, cnp);
324		if (cnp->cn_consume != 0) {
325			if (uppervp != upperdvp)
326				VOP_UNLOCK(uppervp);
327			*ap->a_vpp = uppervp;
328			return (uerror);
329		}
330		if (uerror == ENOENT || uerror == EJUSTRETURN) {
331			if (cnp->cn_flags & ISWHITEOUT) {
332				iswhiteout = 1;
333			} else if (lowerdvp != NULLVP) {
334				lerror = VOP_GETATTR(upperdvp, &va,
335					cnp->cn_cred);
336				if (lerror == 0 && (va.va_flags & OPAQUE))
337					iswhiteout = 1;
338			}
339		}
340	} else {
341		uerror = ENOENT;
342	}
343
344	/*
345	 * in a similar way to the upper layer, do the lookup
346	 * in the lower layer.   this time, if there is some
347	 * component magic going on, then vput whatever we got
348	 * back from the upper layer and return the lower vnode
349	 * instead.
350	 */
351	if (lowerdvp != NULLVP && !iswhiteout) {
352		int nameiop;
353
354		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
355
356		/*
357		 * Only do a LOOKUP on the bottom node, since
358		 * we won't be making changes to it anyway.
359		 */
360		nameiop = cnp->cn_nameiop;
361		cnp->cn_nameiop = LOOKUP;
362		if (um->um_op == UNMNT_BELOW) {
363			saved_cred = cnp->cn_cred;
364			cnp->cn_cred = um->um_cred;
365		}
366
367		/*
368		 * we shouldn't have to worry about locking interactions
369		 * between the lower layer and our union layer (w.r.t.
370		 * `..' processing) because we don't futz with lowervp
371		 * locks in the union-node instantiation code path.
372		 */
373		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
374				&lowervp, cnp);
375		if (um->um_op == UNMNT_BELOW)
376			cnp->cn_cred = saved_cred;
377		cnp->cn_nameiop = nameiop;
378
379		if (lowervp != lowerdvp)
380			VOP_UNLOCK(lowerdvp);
381
382		if (cnp->cn_consume != 0) {
383			if (uppervp != NULLVP) {
384				if (uppervp == upperdvp)
385					vrele(uppervp);
386				else
387					vput(uppervp);
388				uppervp = NULLVP;
389			}
390			*ap->a_vpp = lowervp;
391			return (lerror);
392		}
393	} else {
394		lerror = ENOENT;
395		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
396			lowervp = LOWERVP(dun->un_pvp);
397			if (lowervp != NULLVP) {
398				vref(lowervp);
399				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
400				lerror = 0;
401			}
402		}
403	}
404
405	/*
406	 * EJUSTRETURN is used by underlying filesystems to indicate that
407	 * a directory modification op was started successfully.
408	 * This will only happen in the upper layer, since
409	 * the lower layer only does LOOKUPs.
410	 * If this union is mounted read-only, bounce it now.
411	 */
412
413	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
414	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
415	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
416		uerror = EROFS;
417
418	/*
419	 * at this point, we have uerror and lerror indicating
420	 * possible errors with the lookups in the upper and lower
421	 * layers.  additionally, uppervp and lowervp are (locked)
422	 * references to existing vnodes in the upper and lower layers.
423	 *
424	 * there are now three cases to consider.
425	 * 1. if both layers returned an error, then return whatever
426	 *    error the upper layer generated.
427	 *
428	 * 2. if the top layer failed and the bottom layer succeeded
429	 *    then two subcases occur.
430	 *    a.  the bottom vnode is not a directory, in which
431	 *	  case just return a new union vnode referencing
432	 *	  an empty top layer and the existing bottom layer.
433	 *    b.  the bottom vnode is a directory, in which case
434	 *	  create a new directory in the top-level and
435	 *	  continue as in case 3.
436	 *
437	 * 3. if the top layer succeeded then return a new union
438	 *    vnode referencing whatever the new top layer and
439	 *    whatever the bottom layer returned.
440	 */
441
442	*ap->a_vpp = NULLVP;
443
444
445	/* case 1. */
446	if ((uerror != 0) && (lerror != 0)) {
447		return (uerror);
448	}
449
450	/* case 2. */
451	if (uerror != 0 /* && (lerror == 0) */ ) {
452		if (lowervp->v_type == VDIR) { /* case 2b. */
453			/*
454			 * We may be racing another process to make the
455			 * upper-level shadow directory.  Be careful with
456			 * locks/etc!
457			 * If we have to create a shadow directory and want
458			 * to commit the node we have to restart the lookup
459			 * to get the componentname right.
460			 */
461			if (upperdvp) {
462				VOP_UNLOCK(upperdvp);
463				uerror = union_mkshadow(um, upperdvp, cnp,
464				    &uppervp);
465				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
466				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
467					vrele(uppervp);
468					if (lowervp != NULLVP)
469						vput(lowervp);
470					goto start;
471				}
472			}
473			if (uerror) {
474				if (lowervp != NULLVP) {
475					vput(lowervp);
476					lowervp = NULLVP;
477				}
478				return (uerror);
479			}
480		}
481	} else { /* uerror == 0 */
482		if (uppervp != upperdvp)
483			VOP_UNLOCK(uppervp);
484	}
485
486	if (lowervp != NULLVP)
487		VOP_UNLOCK(lowervp);
488
489	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
490			      uppervp, lowervp, 1);
491
492	if (error) {
493		if (uppervp != NULLVP)
494			vrele(uppervp);
495		if (lowervp != NULLVP)
496			vrele(lowervp);
497		return error;
498	}
499
500	return 0;
501}
502
503int
504union_create(void *v)
505{
506	struct vop_create_v3_args /* {
507		struct vnode *a_dvp;
508		struct vnode **a_vpp;
509		struct componentname *a_cnp;
510		struct vattr *a_vap;
511	} */ *ap = v;
512	struct union_node *un = VTOUNION(ap->a_dvp);
513	struct vnode *dvp = un->un_uppervp;
514	struct componentname *cnp = ap->a_cnp;
515
516	if (dvp != NULLVP) {
517		int error;
518		struct vnode *vp;
519		struct mount *mp;
520
521		mp = ap->a_dvp->v_mount;
522
523		vp = NULL;
524		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
525		if (error)
526			return (error);
527
528		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
529				NULLVP, 1);
530		if (error)
531			vrele(vp);
532		return (error);
533	}
534
535	return (EROFS);
536}
537
538int
539union_whiteout(void *v)
540{
541	struct vop_whiteout_args /* {
542		struct vnode *a_dvp;
543		struct componentname *a_cnp;
544		int a_flags;
545	} */ *ap = v;
546	struct union_node *un = VTOUNION(ap->a_dvp);
547	struct componentname *cnp = ap->a_cnp;
548
549	if (un->un_uppervp == NULLVP)
550		return (EOPNOTSUPP);
551
552	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
553}
554
555int
556union_mknod(void *v)
557{
558	struct vop_mknod_v3_args /* {
559		struct vnode *a_dvp;
560		struct vnode **a_vpp;
561		struct componentname *a_cnp;
562		struct vattr *a_vap;
563	} */ *ap = v;
564	struct union_node *un = VTOUNION(ap->a_dvp);
565	struct vnode *dvp = un->un_uppervp;
566	struct componentname *cnp = ap->a_cnp;
567
568	if (dvp != NULLVP) {
569		int error;
570		struct vnode *vp;
571		struct mount *mp;
572
573		mp = ap->a_dvp->v_mount;
574		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
575		if (error)
576			return (error);
577
578		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
579				      cnp, vp, NULLVP, 1);
580		if (error)
581			vrele(vp);
582		return (error);
583	}
584
585	return (EROFS);
586}
587
588int
589union_open(void *v)
590{
591	struct vop_open_args /* {
592		struct vnodeop_desc *a_desc;
593		struct vnode *a_vp;
594		int a_mode;
595		kauth_cred_t a_cred;
596	} */ *ap = v;
597	struct union_node *un = VTOUNION(ap->a_vp);
598	struct vnode *tvp;
599	int mode = ap->a_mode;
600	kauth_cred_t cred = ap->a_cred;
601	struct lwp *l = curlwp;
602	int error;
603
604	/*
605	 * If there is an existing upper vp then simply open that.
606	 */
607	tvp = un->un_uppervp;
608	if (tvp == NULLVP) {
609		/*
610		 * If the lower vnode is being opened for writing, then
611		 * copy the file contents to the upper vnode and open that,
612		 * otherwise can simply open the lower vnode.
613		 */
614		tvp = un->un_lowervp;
615		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
616			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
617			if (error == 0)
618				error = VOP_OPEN(un->un_uppervp, mode, cred);
619			if (error == 0) {
620				mutex_enter(un->un_uppervp->v_interlock);
621				un->un_uppervp->v_writecount++;
622				mutex_exit(un->un_uppervp->v_interlock);
623			}
624			return (error);
625		}
626
627		/*
628		 * Just open the lower vnode, but check for nodev mount flag
629		 */
630		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
631		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
632			return ENXIO;
633		un->un_openl++;
634		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
635		error = VOP_OPEN(tvp, mode, cred);
636		VOP_UNLOCK(tvp);
637
638		return (error);
639	}
640	/*
641	 * Just open the upper vnode, checking for nodev mount flag first
642	 */
643	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
644	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
645		return ENXIO;
646
647	error = VOP_OPEN(tvp, mode, cred);
648	if (error == 0 && (ap->a_mode & FWRITE)) {
649		mutex_enter(tvp->v_interlock);
650		tvp->v_writecount++;
651		mutex_exit(tvp->v_interlock);
652	}
653
654	return (error);
655}
656
657int
658union_close(void *v)
659{
660	struct vop_close_args /* {
661		struct vnode *a_vp;
662		int  a_fflag;
663		kauth_cred_t a_cred;
664	} */ *ap = v;
665	struct union_node *un = VTOUNION(ap->a_vp);
666	struct vnode *vp;
667	int error;
668	bool do_lock;
669
670	vp = un->un_uppervp;
671	if (vp != NULLVP) {
672		do_lock = false;
673	} else {
674		KASSERT(un->un_openl > 0);
675		--un->un_openl;
676		vp = un->un_lowervp;
677		do_lock = true;
678	}
679
680	KASSERT(vp != NULLVP);
681	ap->a_vp = vp;
682	if ((ap->a_fflag & FWRITE)) {
683		KASSERT(vp == un->un_uppervp);
684		mutex_enter(vp->v_interlock);
685		vp->v_writecount--;
686		mutex_exit(vp->v_interlock);
687	}
688	if (do_lock)
689		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
690	error = VCALL(vp, VOFFSET(vop_close), ap);
691	if (do_lock)
692		VOP_UNLOCK(vp);
693
694	return error;
695}
696
697/*
698 * Check access permission on the union vnode.
699 * The access check being enforced is to check
700 * against both the underlying vnode, and any
701 * copied vnode.  This ensures that no additional
702 * file permissions are given away simply because
703 * the user caused an implicit file copy.
704 */
705int
706union_access(void *v)
707{
708	struct vop_access_args /* {
709		struct vnodeop_desc *a_desc;
710		struct vnode *a_vp;
711		int a_mode;
712		kauth_cred_t a_cred;
713	} */ *ap = v;
714	struct vnode *vp = ap->a_vp;
715	struct union_node *un = VTOUNION(vp);
716	int error = EACCES;
717	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
718
719	/*
720	 * Disallow write attempts on read-only file systems;
721	 * unless the file is a socket, fifo, or a block or
722	 * character device resident on the file system.
723	 */
724	if (ap->a_mode & VWRITE) {
725		switch (vp->v_type) {
726		case VDIR:
727		case VLNK:
728		case VREG:
729			if (vp->v_mount->mnt_flag & MNT_RDONLY)
730				return (EROFS);
731			break;
732		case VBAD:
733		case VBLK:
734		case VCHR:
735		case VSOCK:
736		case VFIFO:
737		case VNON:
738		default:
739			break;
740		}
741	}
742
743
744	if ((vp = un->un_uppervp) != NULLVP) {
745		ap->a_vp = vp;
746		return (VCALL(vp, VOFFSET(vop_access), ap));
747	}
748
749	if ((vp = un->un_lowervp) != NULLVP) {
750		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
751		ap->a_vp = vp;
752		error = VCALL(vp, VOFFSET(vop_access), ap);
753		if (error == 0) {
754			if (um->um_op == UNMNT_BELOW) {
755				ap->a_cred = um->um_cred;
756				error = VCALL(vp, VOFFSET(vop_access), ap);
757			}
758		}
759		VOP_UNLOCK(vp);
760		if (error)
761			return (error);
762	}
763
764	return (error);
765}
766
767/*
768 * We handle getattr only to change the fsid and
769 * track object sizes
770 */
771int
772union_getattr(void *v)
773{
774	struct vop_getattr_args /* {
775		struct vnode *a_vp;
776		struct vattr *a_vap;
777		kauth_cred_t a_cred;
778	} */ *ap = v;
779	int error;
780	struct union_node *un = VTOUNION(ap->a_vp);
781	struct vnode *vp = un->un_uppervp;
782	struct vattr *vap;
783	struct vattr va;
784
785
786	/*
787	 * Some programs walk the filesystem hierarchy by counting
788	 * links to directories to avoid stat'ing all the time.
789	 * This means the link count on directories needs to be "correct".
790	 * The only way to do that is to call getattr on both layers
791	 * and fix up the link count.  The link count will not necessarily
792	 * be accurate but will be large enough to defeat the tree walkers.
793	 *
794	 * To make life more interesting, some filesystems don't keep
795	 * track of link counts in the expected way, and return a
796	 * link count of `1' for those directories; if either of the
797	 * component directories returns a link count of `1', we return a 1.
798	 */
799
800	vap = ap->a_vap;
801
802	vp = un->un_uppervp;
803	if (vp != NULLVP) {
804		error = VOP_GETATTR(vp, vap, ap->a_cred);
805		if (error)
806			return (error);
807		mutex_enter(&un->un_lock);
808		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
809	}
810
811	if (vp == NULLVP) {
812		vp = un->un_lowervp;
813	} else if (vp->v_type == VDIR) {
814		vp = un->un_lowervp;
815		if (vp != NULLVP)
816			vap = &va;
817	} else {
818		vp = NULLVP;
819	}
820
821	if (vp != NULLVP) {
822		if (vp == un->un_lowervp)
823			vn_lock(vp, LK_SHARED | LK_RETRY);
824		error = VOP_GETATTR(vp, vap, ap->a_cred);
825		if (vp == un->un_lowervp)
826			VOP_UNLOCK(vp);
827		if (error)
828			return (error);
829		mutex_enter(&un->un_lock);
830		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
831	}
832
833	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
834		/*
835		 * Link count manipulation:
836		 *	- If both return "2", return 2 (no subdirs)
837		 *	- If one or the other return "1", return "1" (ENOCLUE)
838		 */
839		if ((ap->a_vap->va_nlink == 2) &&
840		    (vap->va_nlink == 2))
841			;
842		else if (ap->a_vap->va_nlink != 1) {
843			if (vap->va_nlink == 1)
844				ap->a_vap->va_nlink = 1;
845			else
846				ap->a_vap->va_nlink += vap->va_nlink;
847		}
848	}
849	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
850	return (0);
851}
852
853int
854union_setattr(void *v)
855{
856	struct vop_setattr_args /* {
857		struct vnode *a_vp;
858		struct vattr *a_vap;
859		kauth_cred_t a_cred;
860	} */ *ap = v;
861	struct vattr *vap = ap->a_vap;
862	struct vnode *vp = ap->a_vp;
863	struct union_node *un = VTOUNION(vp);
864	bool size_only;		/* All but va_size are VNOVAL. */
865	int error;
866
867	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
868	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
869	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
870
871	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
872		return (EROFS);
873	if (vap->va_size != VNOVAL) {
874 		switch (vp->v_type) {
875 		case VDIR:
876 			return (EISDIR);
877 		case VCHR:
878 		case VBLK:
879 		case VSOCK:
880 		case VFIFO:
881			break;
882		case VREG:
883		case VLNK:
884 		default:
885			/*
886			 * Disallow write attempts if the filesystem is
887			 * mounted read-only.
888			 */
889			if (vp->v_mount->mnt_flag & MNT_RDONLY)
890				return (EROFS);
891		}
892	}
893
894	/*
895	 * Handle case of truncating lower object to zero size,
896	 * by creating a zero length upper object.  This is to
897	 * handle the case of open with O_TRUNC and O_CREAT.
898	 */
899	if ((un->un_uppervp == NULLVP) &&
900	    /* assert(un->un_lowervp != NULLVP) */
901	    (un->un_lowervp->v_type == VREG)) {
902		error = union_copyup(un, (vap->va_size != 0),
903						ap->a_cred, curlwp);
904		if (error)
905			return (error);
906	}
907
908	/*
909	 * Try to set attributes in upper layer, ignore size change to zero
910	 * for devices to handle O_TRUNC and return read-only filesystem error
911	 * otherwise.
912	 */
913	if (un->un_uppervp != NULLVP) {
914		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
915		if ((error == 0) && (vap->va_size != VNOVAL)) {
916			mutex_enter(&un->un_lock);
917			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
918		}
919	} else {
920		KASSERT(un->un_lowervp != NULLVP);
921		if (NODE_IS_SPECIAL(un->un_lowervp)) {
922			if (size_only &&
923			    (vap->va_size == 0 || vap->va_size == VNOVAL))
924				error = 0;
925			else
926				error = EROFS;
927		} else {
928			error = EROFS;
929		}
930	}
931
932	return (error);
933}
934
935int
936union_read(void *v)
937{
938	struct vop_read_args /* {
939		struct vnode *a_vp;
940		struct uio *a_uio;
941		int  a_ioflag;
942		kauth_cred_t a_cred;
943	} */ *ap = v;
944	int error;
945	struct vnode *vp = OTHERVP(ap->a_vp);
946	int dolock = (vp == LOWERVP(ap->a_vp));
947
948	if (dolock)
949		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
950	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
951	if (dolock)
952		VOP_UNLOCK(vp);
953
954	/*
955	 * XXX
956	 * perhaps the size of the underlying object has changed under
957	 * our feet.  take advantage of the offset information present
958	 * in the uio structure.
959	 */
960	if (error == 0) {
961		struct union_node *un = VTOUNION(ap->a_vp);
962		off_t cur = ap->a_uio->uio_offset;
963		off_t usz = VNOVAL, lsz = VNOVAL;
964
965		mutex_enter(&un->un_lock);
966		if (vp == un->un_uppervp) {
967			if (cur > un->un_uppersz)
968				usz = cur;
969		} else {
970			if (cur > un->un_lowersz)
971				lsz = cur;
972		}
973
974		if (usz != VNOVAL || lsz != VNOVAL)
975			union_newsize(ap->a_vp, usz, lsz);
976		else
977			mutex_exit(&un->un_lock);
978	}
979
980	return (error);
981}
982
983int
984union_write(void *v)
985{
986	struct vop_read_args /* {
987		struct vnode *a_vp;
988		struct uio *a_uio;
989		int  a_ioflag;
990		kauth_cred_t a_cred;
991	} */ *ap = v;
992	int error;
993	struct vnode *vp;
994	struct union_node *un = VTOUNION(ap->a_vp);
995
996	vp = UPPERVP(ap->a_vp);
997	if (vp == NULLVP) {
998		vp = LOWERVP(ap->a_vp);
999		if (NODE_IS_SPECIAL(vp)) {
1000			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1001			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1002			    ap->a_cred);
1003			VOP_UNLOCK(vp);
1004			return error;
1005		}
1006		panic("union: missing upper layer in write");
1007	}
1008
1009	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1010
1011	/*
1012	 * the size of the underlying object may be changed by the
1013	 * write.
1014	 */
1015	if (error == 0) {
1016		off_t cur = ap->a_uio->uio_offset;
1017
1018		mutex_enter(&un->un_lock);
1019		if (cur > un->un_uppersz)
1020			union_newsize(ap->a_vp, cur, VNOVAL);
1021		else
1022			mutex_exit(&un->un_lock);
1023	}
1024
1025	return (error);
1026}
1027
1028int
1029union_ioctl(void *v)
1030{
1031	struct vop_ioctl_args /* {
1032		struct vnode *a_vp;
1033		int  a_command;
1034		void *a_data;
1035		int  a_fflag;
1036		kauth_cred_t a_cred;
1037	} */ *ap = v;
1038	struct vnode *ovp = OTHERVP(ap->a_vp);
1039
1040	ap->a_vp = ovp;
1041	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1042}
1043
1044int
1045union_poll(void *v)
1046{
1047	struct vop_poll_args /* {
1048		struct vnode *a_vp;
1049		int a_events;
1050	} */ *ap = v;
1051	struct vnode *ovp = OTHERVP(ap->a_vp);
1052
1053	ap->a_vp = ovp;
1054	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1055}
1056
1057int
1058union_revoke(void *v)
1059{
1060	struct vop_revoke_args /* {
1061		struct vnode *a_vp;
1062		int a_flags;
1063		struct proc *a_p;
1064	} */ *ap = v;
1065	struct vnode *vp = ap->a_vp;
1066
1067	if (UPPERVP(vp)) {
1068		mutex_enter(UPPERVP(vp)->v_interlock);
1069		KASSERT(vp->v_interlock == UPPERVP(vp)->v_interlock);
1070		UPPERVP(vp)->v_writecount -= vp->v_writecount;
1071		mutex_exit(UPPERVP(vp)->v_interlock);
1072		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1073	}
1074	if (LOWERVP(vp))
1075		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1076	vgone(vp);	/* XXXAD?? */
1077	return (0);
1078}
1079
1080int
1081union_mmap(void *v)
1082{
1083	struct vop_mmap_args /* {
1084		struct vnode *a_vp;
1085		vm_prot_t a_prot;
1086		kauth_cred_t a_cred;
1087	} */ *ap = v;
1088	struct vnode *ovp = OTHERVP(ap->a_vp);
1089
1090	ap->a_vp = ovp;
1091	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1092}
1093
1094int
1095union_fsync(void *v)
1096{
1097	struct vop_fsync_args /* {
1098		struct vnode *a_vp;
1099		kauth_cred_t a_cred;
1100		int  a_flags;
1101		off_t offhi;
1102		off_t offlo;
1103	} */ *ap = v;
1104	int error = 0;
1105	struct vnode *targetvp;
1106
1107	/*
1108	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1109	 * bother syncing the underlying vnodes, since (a) they'll be
1110	 * fsync'ed when reclaimed and (b) we could deadlock if
1111	 * they're locked; otherwise, pass it through to the
1112	 * underlying layer.
1113	 */
1114	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1115		error = spec_fsync(v);
1116		if (error)
1117			return error;
1118	}
1119
1120	if (ap->a_flags & FSYNC_RECLAIM)
1121		return 0;
1122
1123	targetvp = OTHERVP(ap->a_vp);
1124	if (targetvp != NULLVP) {
1125		int dolock = (targetvp == LOWERVP(ap->a_vp));
1126
1127		if (dolock)
1128			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1129		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1130			    ap->a_offlo, ap->a_offhi);
1131		if (dolock)
1132			VOP_UNLOCK(targetvp);
1133	}
1134
1135	return (error);
1136}
1137
1138int
1139union_seek(void *v)
1140{
1141	struct vop_seek_args /* {
1142		struct vnode *a_vp;
1143		off_t  a_oldoff;
1144		off_t  a_newoff;
1145		kauth_cred_t a_cred;
1146	} */ *ap = v;
1147	struct vnode *ovp = OTHERVP(ap->a_vp);
1148
1149	ap->a_vp = ovp;
1150	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1151}
1152
1153int
1154union_remove(void *v)
1155{
1156	struct vop_remove_v2_args /* {
1157		struct vnode *a_dvp;
1158		struct vnode *a_vp;
1159		struct componentname *a_cnp;
1160	} */ *ap = v;
1161	int error;
1162	struct union_node *dun = VTOUNION(ap->a_dvp);
1163	struct union_node *un = VTOUNION(ap->a_vp);
1164	struct componentname *cnp = ap->a_cnp;
1165
1166	if (dun->un_uppervp == NULLVP)
1167		panic("union remove: null upper vnode");
1168
1169	if (un->un_uppervp != NULLVP) {
1170		struct vnode *dvp = dun->un_uppervp;
1171		struct vnode *vp = un->un_uppervp;
1172
1173		/* Account for VOP_REMOVE to vrele vp.  */
1174		vref(vp);
1175		if (union_dowhiteout(un, cnp->cn_cred))
1176			cnp->cn_flags |= DOWHITEOUT;
1177		error = VOP_REMOVE(dvp, vp, cnp);
1178		if (!error)
1179			union_removed_upper(un);
1180		vrele(ap->a_vp);
1181	} else {
1182		error = union_mkwhiteout(
1183			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1184			dun->un_uppervp, ap->a_cnp, un);
1185		vput(ap->a_vp);
1186	}
1187
1188	return (error);
1189}
1190
1191int
1192union_link(void *v)
1193{
1194	struct vop_link_v2_args /* {
1195		struct vnode *a_dvp;
1196		struct vnode *a_vp;
1197		struct componentname *a_cnp;
1198	} */ *ap = v;
1199	int error = 0;
1200	struct componentname *cnp = ap->a_cnp;
1201	struct union_node *dun;
1202	struct vnode *vp;
1203	struct vnode *dvp;
1204
1205	dun = VTOUNION(ap->a_dvp);
1206
1207	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1208
1209	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1210		vp = ap->a_vp;
1211	} else {
1212		struct union_node *un = VTOUNION(ap->a_vp);
1213		if (un->un_uppervp == NULLVP) {
1214			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1215
1216			/*
1217			 * Needs to be copied before we can link it.
1218			 */
1219			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1220			if (droplock)
1221				VOP_UNLOCK(dun->un_uppervp);
1222			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1223			if (droplock) {
1224				vn_lock(dun->un_uppervp,
1225				    LK_EXCLUSIVE | LK_RETRY);
1226				/*
1227				 * During copyup, we dropped the lock on the
1228				 * dir and invalidated any saved namei lookup
1229				 * state for the directory we'll be entering
1230				 * the link in.  We need to re-run the lookup
1231				 * in that directory to reset any state needed
1232				 * for VOP_LINK.
1233				 * Call relookup on the union-layer to reset
1234				 * the state.
1235				 */
1236				vp  = NULLVP;
1237				if (dun->un_uppervp == NULLVP)
1238					 panic("union: null upperdvp?");
1239				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1240				if (error) {
1241					VOP_UNLOCK(ap->a_vp);
1242					return EROFS;	/* ? */
1243				}
1244				if (vp != NULLVP) {
1245					/*
1246					 * The name we want to create has
1247					 * mysteriously appeared (a race?)
1248					 */
1249					error = EEXIST;
1250					VOP_UNLOCK(ap->a_vp);
1251					vput(vp);
1252					return (error);
1253				}
1254			}
1255			VOP_UNLOCK(ap->a_vp);
1256		}
1257		vp = un->un_uppervp;
1258	}
1259
1260	dvp = dun->un_uppervp;
1261	if (dvp == NULLVP)
1262		error = EROFS;
1263
1264	if (error)
1265		return (error);
1266
1267	return VOP_LINK(dvp, vp, cnp);
1268}
1269
1270int
1271union_rename(void *v)
1272{
1273	struct vop_rename_args  /* {
1274		struct vnode *a_fdvp;
1275		struct vnode *a_fvp;
1276		struct componentname *a_fcnp;
1277		struct vnode *a_tdvp;
1278		struct vnode *a_tvp;
1279		struct componentname *a_tcnp;
1280	} */ *ap = v;
1281	int error;
1282
1283	struct vnode *fdvp = ap->a_fdvp;
1284	struct vnode *fvp = ap->a_fvp;
1285	struct vnode *tdvp = ap->a_tdvp;
1286	struct vnode *tvp = ap->a_tvp;
1287
1288	/*
1289	 * Account for VOP_RENAME to vrele all nodes.
1290	 * Note: VOP_RENAME will unlock tdvp.
1291	 */
1292
1293	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1294		struct union_node *un = VTOUNION(fdvp);
1295		if (un->un_uppervp == NULLVP) {
1296			/*
1297			 * this should never happen in normal
1298			 * operation but might if there was
1299			 * a problem creating the top-level shadow
1300			 * directory.
1301			 */
1302			error = EXDEV;
1303			goto bad;
1304		}
1305
1306		fdvp = un->un_uppervp;
1307		vref(fdvp);
1308	}
1309
1310	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1311		struct union_node *un = VTOUNION(fvp);
1312		if (un->un_uppervp == NULLVP) {
1313			/* XXX: should do a copyup */
1314			error = EXDEV;
1315			goto bad;
1316		}
1317
1318		if (un->un_lowervp != NULLVP)
1319			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1320
1321		fvp = un->un_uppervp;
1322		vref(fvp);
1323	}
1324
1325	if (tdvp->v_op == union_vnodeop_p) {
1326		struct union_node *un = VTOUNION(tdvp);
1327		if (un->un_uppervp == NULLVP) {
1328			/*
1329			 * this should never happen in normal
1330			 * operation but might if there was
1331			 * a problem creating the top-level shadow
1332			 * directory.
1333			 */
1334			error = EXDEV;
1335			goto bad;
1336		}
1337
1338		tdvp = un->un_uppervp;
1339		vref(tdvp);
1340	}
1341
1342	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1343		struct union_node *un = VTOUNION(tvp);
1344
1345		tvp = un->un_uppervp;
1346		if (tvp != NULLVP) {
1347			vref(tvp);
1348		}
1349	}
1350
1351	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1352	goto out;
1353
1354bad:
1355	vput(tdvp);
1356	if (tvp != NULLVP)
1357		vput(tvp);
1358	vrele(fdvp);
1359	vrele(fvp);
1360
1361out:
1362	if (fdvp != ap->a_fdvp) {
1363		vrele(ap->a_fdvp);
1364	}
1365	if (fvp != ap->a_fvp) {
1366		vrele(ap->a_fvp);
1367	}
1368	if (tdvp != ap->a_tdvp) {
1369		vrele(ap->a_tdvp);
1370	}
1371	if (tvp != ap->a_tvp) {
1372		vrele(ap->a_tvp);
1373	}
1374	return (error);
1375}
1376
1377int
1378union_mkdir(void *v)
1379{
1380	struct vop_mkdir_v3_args /* {
1381		struct vnode *a_dvp;
1382		struct vnode **a_vpp;
1383		struct componentname *a_cnp;
1384		struct vattr *a_vap;
1385	} */ *ap = v;
1386	struct union_node *un = VTOUNION(ap->a_dvp);
1387	struct vnode *dvp = un->un_uppervp;
1388	struct componentname *cnp = ap->a_cnp;
1389
1390	if (dvp != NULLVP) {
1391		int error;
1392		struct vnode *vp;
1393
1394		vp = NULL;
1395		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1396		if (error) {
1397			vrele(ap->a_dvp);
1398			return (error);
1399		}
1400
1401		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1402				NULLVP, cnp, vp, NULLVP, 1);
1403		if (error)
1404			vrele(vp);
1405		return (error);
1406	}
1407
1408	return (EROFS);
1409}
1410
1411int
1412union_rmdir(void *v)
1413{
1414	struct vop_rmdir_v2_args /* {
1415		struct vnode *a_dvp;
1416		struct vnode *a_vp;
1417		struct componentname *a_cnp;
1418	} */ *ap = v;
1419	int error;
1420	struct union_node *dun = VTOUNION(ap->a_dvp);
1421	struct union_node *un = VTOUNION(ap->a_vp);
1422	struct componentname *cnp = ap->a_cnp;
1423
1424	if (dun->un_uppervp == NULLVP)
1425		panic("union rmdir: null upper vnode");
1426
1427	error = union_check_rmdir(un, cnp->cn_cred);
1428	if (error) {
1429		vput(ap->a_vp);
1430		return error;
1431	}
1432
1433	if (un->un_uppervp != NULLVP) {
1434		struct vnode *dvp = dun->un_uppervp;
1435		struct vnode *vp = un->un_uppervp;
1436
1437		/* Account for VOP_RMDIR to vrele vp.  */
1438		vref(vp);
1439		if (union_dowhiteout(un, cnp->cn_cred))
1440			cnp->cn_flags |= DOWHITEOUT;
1441		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1442		if (!error)
1443			union_removed_upper(un);
1444		vrele(ap->a_vp);
1445	} else {
1446		error = union_mkwhiteout(
1447			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1448			dun->un_uppervp, ap->a_cnp, un);
1449		vput(ap->a_vp);
1450	}
1451
1452	return (error);
1453}
1454
1455int
1456union_symlink(void *v)
1457{
1458	struct vop_symlink_v3_args /* {
1459		struct vnode *a_dvp;
1460		struct vnode **a_vpp;
1461		struct componentname *a_cnp;
1462		struct vattr *a_vap;
1463		char *a_target;
1464	} */ *ap = v;
1465	struct union_node *un = VTOUNION(ap->a_dvp);
1466	struct vnode *dvp = un->un_uppervp;
1467	struct componentname *cnp = ap->a_cnp;
1468
1469	if (dvp != NULLVP) {
1470		int error;
1471
1472		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1473				    ap->a_target);
1474		return (error);
1475	}
1476
1477	return (EROFS);
1478}
1479
1480/*
1481 * union_readdir works in concert with getdirentries and
1482 * readdir(3) to provide a list of entries in the unioned
1483 * directories.  getdirentries is responsible for walking
1484 * down the union stack.  readdir(3) is responsible for
1485 * eliminating duplicate names from the returned data stream.
1486 */
1487int
1488union_readdir(void *v)
1489{
1490	struct vop_readdir_args /* {
1491		struct vnodeop_desc *a_desc;
1492		struct vnode *a_vp;
1493		struct uio *a_uio;
1494		kauth_cred_t a_cred;
1495		int *a_eofflag;
1496		u_long *a_cookies;
1497		int a_ncookies;
1498	} */ *ap = v;
1499	struct union_node *un = VTOUNION(ap->a_vp);
1500	struct vnode *uvp = un->un_uppervp;
1501
1502	if (uvp == NULLVP)
1503		return (0);
1504
1505	ap->a_vp = uvp;
1506	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1507}
1508
1509int
1510union_readlink(void *v)
1511{
1512	struct vop_readlink_args /* {
1513		struct vnode *a_vp;
1514		struct uio *a_uio;
1515		kauth_cred_t a_cred;
1516	} */ *ap = v;
1517	int error;
1518	struct vnode *vp = OTHERVP(ap->a_vp);
1519	int dolock = (vp == LOWERVP(ap->a_vp));
1520
1521	if (dolock)
1522		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1523	ap->a_vp = vp;
1524	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1525	if (dolock)
1526		VOP_UNLOCK(vp);
1527
1528	return (error);
1529}
1530
1531int
1532union_abortop(void *v)
1533{
1534	struct vop_abortop_args /* {
1535		struct vnode *a_dvp;
1536		struct componentname *a_cnp;
1537	} */ *ap = v;
1538
1539	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1540
1541	ap->a_dvp = UPPERVP(ap->a_dvp);
1542	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1543}
1544
1545int
1546union_inactive(void *v)
1547{
1548	struct vop_inactive_v2_args /* {
1549		const struct vnodeop_desc *a_desc;
1550		struct vnode *a_vp;
1551		bool *a_recycle;
1552	} */ *ap = v;
1553	struct vnode *vp = ap->a_vp;
1554	struct union_node *un = VTOUNION(vp);
1555	struct vnode **vpp;
1556
1557	/*
1558	 * Do nothing (and _don't_ bypass).
1559	 * Wait to vrele lowervp until reclaim,
1560	 * so that until then our union_node is in the
1561	 * cache and reusable.
1562	 *
1563	 * NEEDSWORK: Someday, consider inactive'ing
1564	 * the lowervp and then trying to reactivate it
1565	 * with capabilities (v_id)
1566	 * like they do in the name lookup cache code.
1567	 * That's too much work for now.
1568	 */
1569
1570	if (un->un_dircache != 0) {
1571		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1572			vrele(*vpp);
1573		free(un->un_dircache, M_TEMP);
1574		un->un_dircache = 0;
1575	}
1576
1577	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1578
1579	return (0);
1580}
1581
1582int
1583union_reclaim(void *v)
1584{
1585	struct vop_reclaim_args /* {
1586		struct vnode *a_vp;
1587	} */ *ap = v;
1588
1589	union_freevp(ap->a_vp);
1590
1591	return (0);
1592}
1593
1594static int
1595union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1596{
1597	struct vop_lock_args ap;
1598
1599	if (lockvp == vp) {
1600		ap.a_vp = vp;
1601		ap.a_flags = flags;
1602		return genfs_lock(&ap);
1603	} else
1604		return VOP_LOCK(lockvp, flags);
1605}
1606
1607static int
1608union_unlock1(struct vnode *vp, struct vnode *lockvp)
1609{
1610	struct vop_unlock_args ap;
1611
1612	if (lockvp == vp) {
1613		ap.a_vp = vp;
1614		return genfs_unlock(&ap);
1615	} else
1616		return VOP_UNLOCK(lockvp);
1617}
1618
1619int
1620union_lock(void *v)
1621{
1622	struct vop_lock_args /* {
1623		struct vnode *a_vp;
1624		int a_flags;
1625	} */ *ap = v;
1626	struct vnode *vp = ap->a_vp, *lockvp;
1627	struct union_node *un = VTOUNION(vp);
1628	int flags = ap->a_flags;
1629	int error;
1630
1631	if ((flags & LK_NOWAIT) != 0) {
1632		if (!mutex_tryenter(&un->un_lock))
1633			return EBUSY;
1634		lockvp = LOCKVP(vp);
1635		error = union_lock1(vp, lockvp, flags);
1636		mutex_exit(&un->un_lock);
1637		if (error)
1638			return error;
1639		if (mutex_tryenter(vp->v_interlock)) {
1640			error = vdead_check(vp, VDEAD_NOWAIT);
1641			mutex_exit(vp->v_interlock);
1642		} else
1643			error = EBUSY;
1644		if (error)
1645			union_unlock1(vp, lockvp);
1646		return error;
1647	}
1648
1649	mutex_enter(&un->un_lock);
1650	for (;;) {
1651		lockvp = LOCKVP(vp);
1652		mutex_exit(&un->un_lock);
1653		error = union_lock1(vp, lockvp, flags);
1654		if (error != 0)
1655			return error;
1656		mutex_enter(&un->un_lock);
1657		if (lockvp == LOCKVP(vp))
1658			break;
1659		union_unlock1(vp, lockvp);
1660	}
1661	mutex_exit(&un->un_lock);
1662
1663	mutex_enter(vp->v_interlock);
1664	error = vdead_check(vp, VDEAD_NOWAIT);
1665	if (error) {
1666		union_unlock1(vp, lockvp);
1667		error = vdead_check(vp, 0);
1668		KASSERT(error == ENOENT);
1669	}
1670	mutex_exit(vp->v_interlock);
1671	return error;
1672}
1673
1674int
1675union_unlock(void *v)
1676{
1677	struct vop_unlock_args /* {
1678		struct vnode *a_vp;
1679		int a_flags;
1680	} */ *ap = v;
1681	struct vnode *vp = ap->a_vp, *lockvp;
1682
1683	lockvp = LOCKVP(vp);
1684	union_unlock1(vp, lockvp);
1685
1686	return 0;
1687}
1688
1689int
1690union_bmap(void *v)
1691{
1692	struct vop_bmap_args /* {
1693		struct vnode *a_vp;
1694		daddr_t  a_bn;
1695		struct vnode **a_vpp;
1696		daddr_t *a_bnp;
1697		int *a_runp;
1698	} */ *ap = v;
1699	int error;
1700	struct vnode *vp = OTHERVP(ap->a_vp);
1701	int dolock = (vp == LOWERVP(ap->a_vp));
1702
1703	if (dolock)
1704		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1705	ap->a_vp = vp;
1706	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1707	if (dolock)
1708		VOP_UNLOCK(vp);
1709
1710	return (error);
1711}
1712
1713int
1714union_print(void *v)
1715{
1716	struct vop_print_args /* {
1717		struct vnode *a_vp;
1718	} */ *ap = v;
1719	struct vnode *vp = ap->a_vp;
1720
1721	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1722			vp, UPPERVP(vp), LOWERVP(vp));
1723	if (UPPERVP(vp) != NULLVP)
1724		vprint("union: upper", UPPERVP(vp));
1725	if (LOWERVP(vp) != NULLVP)
1726		vprint("union: lower", LOWERVP(vp));
1727	if (VTOUNION(vp)->un_dircache) {
1728		struct vnode **vpp;
1729		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1730			vprint("dircache:", *vpp);
1731	}
1732
1733	return (0);
1734}
1735
1736int
1737union_islocked(void *v)
1738{
1739	struct vop_islocked_args /* {
1740		struct vnode *a_vp;
1741	} */ *ap = v;
1742	struct vnode *vp;
1743	struct union_node *un;
1744
1745	un = VTOUNION(ap->a_vp);
1746	mutex_enter(&un->un_lock);
1747	vp = LOCKVP(ap->a_vp);
1748	mutex_exit(&un->un_lock);
1749
1750	if (vp == ap->a_vp)
1751		return genfs_islocked(ap);
1752	else
1753		return VOP_ISLOCKED(vp);
1754}
1755
1756int
1757union_pathconf(void *v)
1758{
1759	struct vop_pathconf_args /* {
1760		struct vnode *a_vp;
1761		int a_name;
1762		int *a_retval;
1763	} */ *ap = v;
1764	int error;
1765	struct vnode *vp = OTHERVP(ap->a_vp);
1766	int dolock = (vp == LOWERVP(ap->a_vp));
1767
1768	if (dolock)
1769		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1770	ap->a_vp = vp;
1771	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1772	if (dolock)
1773		VOP_UNLOCK(vp);
1774
1775	return (error);
1776}
1777
1778int
1779union_advlock(void *v)
1780{
1781	struct vop_advlock_args /* {
1782		struct vnode *a_vp;
1783		void *a_id;
1784		int  a_op;
1785		struct flock *a_fl;
1786		int  a_flags;
1787	} */ *ap = v;
1788	struct vnode *ovp = OTHERVP(ap->a_vp);
1789
1790	ap->a_vp = ovp;
1791	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1792}
1793
1794int
1795union_strategy(void *v)
1796{
1797	struct vop_strategy_args /* {
1798		struct vnode *a_vp;
1799		struct buf *a_bp;
1800	} */ *ap = v;
1801	struct vnode *ovp = OTHERVP(ap->a_vp);
1802	struct buf *bp = ap->a_bp;
1803
1804	KASSERT(ovp != NULLVP);
1805	if (!NODE_IS_SPECIAL(ovp))
1806		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1807
1808	return (VOP_STRATEGY(ovp, bp));
1809}
1810
1811int
1812union_bwrite(void *v)
1813{
1814	struct vop_bwrite_args /* {
1815		struct vnode *a_vp;
1816		struct buf *a_bp;
1817	} */ *ap = v;
1818	struct vnode *ovp = OTHERVP(ap->a_vp);
1819	struct buf *bp = ap->a_bp;
1820
1821	KASSERT(ovp != NULLVP);
1822	if (!NODE_IS_SPECIAL(ovp))
1823		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1824
1825	return (VOP_BWRITE(ovp, bp));
1826}
1827
1828int
1829union_getpages(void *v)
1830{
1831	struct vop_getpages_args /* {
1832		struct vnode *a_vp;
1833		voff_t a_offset;
1834		struct vm_page **a_m;
1835		int *a_count;
1836		int a_centeridx;
1837		vm_prot_t a_access_type;
1838		int a_advice;
1839		int a_flags;
1840	} */ *ap = v;
1841	struct vnode *vp = ap->a_vp;
1842
1843	KASSERT(mutex_owned(vp->v_interlock));
1844
1845	if (ap->a_flags & PGO_LOCKED) {
1846		return EBUSY;
1847	}
1848	ap->a_vp = OTHERVP(vp);
1849	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1850
1851	/* Just pass the request on to the underlying layer. */
1852	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1853}
1854
1855int
1856union_putpages(void *v)
1857{
1858	struct vop_putpages_args /* {
1859		struct vnode *a_vp;
1860		voff_t a_offlo;
1861		voff_t a_offhi;
1862		int a_flags;
1863	} */ *ap = v;
1864	struct vnode *vp = ap->a_vp;
1865
1866	KASSERT(mutex_owned(vp->v_interlock));
1867
1868	ap->a_vp = OTHERVP(vp);
1869	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1870
1871	if (ap->a_flags & PGO_RECLAIM) {
1872		mutex_exit(vp->v_interlock);
1873		return 0;
1874	}
1875
1876	/* Just pass the request on to the underlying layer. */
1877	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1878}
1879
1880int
1881union_kqfilter(void *v)
1882{
1883	struct vop_kqfilter_args /* {
1884		struct vnode	*a_vp;
1885		struct knote	*a_kn;
1886	} */ *ap = v;
1887	int error;
1888
1889	/*
1890	 * We watch either the upper layer file (if it already exists),
1891	 * or the lower layer one. If there is lower layer file only
1892	 * at this moment, we will keep watching that lower layer file
1893	 * even if upper layer file would be created later on.
1894	 */
1895	if (UPPERVP(ap->a_vp))
1896		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1897	else if (LOWERVP(ap->a_vp))
1898		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1899	else {
1900		/* panic? */
1901		error = EOPNOTSUPP;
1902	}
1903
1904	return (error);
1905}
1906