union_vnops.c revision 1.58
1/*	$NetBSD: union_vnops.c,v 1.58 2014/03/12 09:40:05 hannken Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.58 2014/03/12 09:40:05 hannken Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_getattr_desc, union_getattr },		/* getattr */
155	{ &vop_setattr_desc, union_setattr },		/* setattr */
156	{ &vop_read_desc, union_read },			/* read */
157	{ &vop_write_desc, union_write },		/* write */
158	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
159	{ &vop_poll_desc, union_poll },			/* select */
160	{ &vop_revoke_desc, union_revoke },		/* revoke */
161	{ &vop_mmap_desc, union_mmap },			/* mmap */
162	{ &vop_fsync_desc, union_fsync },		/* fsync */
163	{ &vop_seek_desc, union_seek },			/* seek */
164	{ &vop_remove_desc, union_remove },		/* remove */
165	{ &vop_link_desc, union_link },			/* link */
166	{ &vop_rename_desc, union_rename },		/* rename */
167	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
168	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
169	{ &vop_symlink_desc, union_symlink },		/* symlink */
170	{ &vop_readdir_desc, union_readdir },		/* readdir */
171	{ &vop_readlink_desc, union_readlink },		/* readlink */
172	{ &vop_abortop_desc, union_abortop },		/* abortop */
173	{ &vop_inactive_desc, union_inactive },		/* inactive */
174	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
175	{ &vop_lock_desc, union_lock },			/* lock */
176	{ &vop_unlock_desc, union_unlock },		/* unlock */
177	{ &vop_bmap_desc, union_bmap },			/* bmap */
178	{ &vop_strategy_desc, union_strategy },		/* strategy */
179	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
180	{ &vop_print_desc, union_print },		/* print */
181	{ &vop_islocked_desc, union_islocked },		/* islocked */
182	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
183	{ &vop_advlock_desc, union_advlock },		/* advlock */
184	{ &vop_getpages_desc, union_getpages },		/* getpages */
185	{ &vop_putpages_desc, union_putpages },		/* putpages */
186	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
187	{ NULL, NULL }
188};
189const struct vnodeopv_desc union_vnodeop_opv_desc =
190	{ &union_vnodeop_p, union_vnodeop_entries };
191
192#define NODE_IS_SPECIAL(vp) \
193	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
194	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
195
196static int
197union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
198	struct componentname *cnp)
199{
200	int error;
201	struct vnode *tdvp;
202	struct vnode *dvp;
203	struct mount *mp;
204
205	dvp = *dvpp;
206
207	/*
208	 * If stepping up the directory tree, check for going
209	 * back across the mount point, in which case do what
210	 * lookup would do by stepping back down the mount
211	 * hierarchy.
212	 */
213	if (cnp->cn_flags & ISDOTDOT) {
214		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
215			/*
216			 * Don't do the NOCROSSMOUNT check
217			 * at this level.  By definition,
218			 * union fs deals with namespaces, not
219			 * filesystems.
220			 */
221			tdvp = dvp;
222			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
223			VOP_UNLOCK(tdvp);
224			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
225		}
226	}
227
228        error = VOP_LOOKUP(dvp, &tdvp, cnp);
229	if (error)
230		return (error);
231	if (dvp != tdvp) {
232		if (cnp->cn_flags & ISDOTDOT)
233			VOP_UNLOCK(dvp);
234		error = vn_lock(tdvp, LK_EXCLUSIVE);
235		if (cnp->cn_flags & ISDOTDOT)
236			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
237		if (error) {
238			vrele(tdvp);
239			return error;
240		}
241		dvp = tdvp;
242	}
243
244	/*
245	 * Lastly check if the current node is a mount point in
246	 * which case walk up the mount hierarchy making sure not to
247	 * bump into the root of the mount tree (ie. dvp != udvp).
248	 */
249	while (dvp != udvp && (dvp->v_type == VDIR) &&
250	       (mp = dvp->v_mountedhere)) {
251		if (vfs_busy(mp, NULL))
252			continue;
253		vput(dvp);
254		error = VFS_ROOT(mp, &tdvp);
255		vfs_unbusy(mp, false, NULL);
256		if (error) {
257			return (error);
258		}
259		dvp = tdvp;
260	}
261
262	*vpp = dvp;
263	return (0);
264}
265
266int
267union_lookup(void *v)
268{
269	struct vop_lookup_v2_args /* {
270		struct vnodeop_desc *a_desc;
271		struct vnode *a_dvp;
272		struct vnode **a_vpp;
273		struct componentname *a_cnp;
274	} */ *ap = v;
275	int error;
276	int uerror, lerror;
277	struct vnode *uppervp, *lowervp;
278	struct vnode *upperdvp, *lowerdvp;
279	struct vnode *dvp = ap->a_dvp;
280	struct union_node *dun = VTOUNION(dvp);
281	struct componentname *cnp = ap->a_cnp;
282	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
283	kauth_cred_t saved_cred = NULL;
284	int iswhiteout;
285	struct vattr va;
286
287#ifdef notyet
288	if (cnp->cn_namelen == 3 &&
289			cnp->cn_nameptr[2] == '.' &&
290			cnp->cn_nameptr[1] == '.' &&
291			cnp->cn_nameptr[0] == '.') {
292		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
293		if (dvp == NULLVP)
294			return (ENOENT);
295		vref(dvp);
296		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
297		return (0);
298	}
299#endif
300
301	if ((cnp->cn_flags & ISLASTCN) &&
302	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
303	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
304		return (EROFS);
305
306start:
307	upperdvp = dun->un_uppervp;
308	lowerdvp = dun->un_lowervp;
309	uppervp = NULLVP;
310	lowervp = NULLVP;
311	iswhiteout = 0;
312
313	/*
314	 * do the lookup in the upper level.
315	 * if that level comsumes additional pathnames,
316	 * then assume that something special is going
317	 * on and just return that vnode.
318	 */
319	if (upperdvp != NULLVP) {
320		uerror = union_lookup1(um->um_uppervp, &upperdvp,
321					&uppervp, cnp);
322		if (cnp->cn_consume != 0) {
323			if (uppervp != upperdvp)
324				VOP_UNLOCK(uppervp);
325			*ap->a_vpp = uppervp;
326			return (uerror);
327		}
328		if (uerror == ENOENT || uerror == EJUSTRETURN) {
329			if (cnp->cn_flags & ISWHITEOUT) {
330				iswhiteout = 1;
331			} else if (lowerdvp != NULLVP) {
332				lerror = VOP_GETATTR(upperdvp, &va,
333					cnp->cn_cred);
334				if (lerror == 0 && (va.va_flags & OPAQUE))
335					iswhiteout = 1;
336			}
337		}
338	} else {
339		uerror = ENOENT;
340	}
341
342	/*
343	 * in a similar way to the upper layer, do the lookup
344	 * in the lower layer.   this time, if there is some
345	 * component magic going on, then vput whatever we got
346	 * back from the upper layer and return the lower vnode
347	 * instead.
348	 */
349	if (lowerdvp != NULLVP && !iswhiteout) {
350		int nameiop;
351
352		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
353
354		/*
355		 * Only do a LOOKUP on the bottom node, since
356		 * we won't be making changes to it anyway.
357		 */
358		nameiop = cnp->cn_nameiop;
359		cnp->cn_nameiop = LOOKUP;
360		if (um->um_op == UNMNT_BELOW) {
361			saved_cred = cnp->cn_cred;
362			cnp->cn_cred = um->um_cred;
363		}
364
365		/*
366		 * we shouldn't have to worry about locking interactions
367		 * between the lower layer and our union layer (w.r.t.
368		 * `..' processing) because we don't futz with lowervp
369		 * locks in the union-node instantiation code path.
370		 */
371		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
372				&lowervp, cnp);
373		if (um->um_op == UNMNT_BELOW)
374			cnp->cn_cred = saved_cred;
375		cnp->cn_nameiop = nameiop;
376
377		if (lowervp != lowerdvp)
378			VOP_UNLOCK(lowerdvp);
379
380		if (cnp->cn_consume != 0) {
381			if (uppervp != NULLVP) {
382				if (uppervp == upperdvp)
383					vrele(uppervp);
384				else
385					vput(uppervp);
386				uppervp = NULLVP;
387			}
388			*ap->a_vpp = lowervp;
389			return (lerror);
390		}
391	} else {
392		lerror = ENOENT;
393		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
394			lowervp = LOWERVP(dun->un_pvp);
395			if (lowervp != NULLVP) {
396				vref(lowervp);
397				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
398				lerror = 0;
399			}
400		}
401	}
402
403	/*
404	 * EJUSTRETURN is used by underlying filesystems to indicate that
405	 * a directory modification op was started successfully.
406	 * This will only happen in the upper layer, since
407	 * the lower layer only does LOOKUPs.
408	 * If this union is mounted read-only, bounce it now.
409	 */
410
411	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
412	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
413	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
414		uerror = EROFS;
415
416	/*
417	 * at this point, we have uerror and lerror indicating
418	 * possible errors with the lookups in the upper and lower
419	 * layers.  additionally, uppervp and lowervp are (locked)
420	 * references to existing vnodes in the upper and lower layers.
421	 *
422	 * there are now three cases to consider.
423	 * 1. if both layers returned an error, then return whatever
424	 *    error the upper layer generated.
425	 *
426	 * 2. if the top layer failed and the bottom layer succeeded
427	 *    then two subcases occur.
428	 *    a.  the bottom vnode is not a directory, in which
429	 *	  case just return a new union vnode referencing
430	 *	  an empty top layer and the existing bottom layer.
431	 *    b.  the bottom vnode is a directory, in which case
432	 *	  create a new directory in the top-level and
433	 *	  continue as in case 3.
434	 *
435	 * 3. if the top layer succeeded then return a new union
436	 *    vnode referencing whatever the new top layer and
437	 *    whatever the bottom layer returned.
438	 */
439
440	*ap->a_vpp = NULLVP;
441
442
443	/* case 1. */
444	if ((uerror != 0) && (lerror != 0)) {
445		return (uerror);
446	}
447
448	/* case 2. */
449	if (uerror != 0 /* && (lerror == 0) */ ) {
450		if (lowervp->v_type == VDIR) { /* case 2b. */
451			/*
452			 * We may be racing another process to make the
453			 * upper-level shadow directory.  Be careful with
454			 * locks/etc!
455			 * If we have to create a shadow directory and want
456			 * to commit the node we have to restart the lookup
457			 * to get the componentname right.
458			 */
459			if (upperdvp) {
460				VOP_UNLOCK(upperdvp);
461				uerror = union_mkshadow(um, upperdvp, cnp,
462				    &uppervp);
463				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
464				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
465					vrele(uppervp);
466					if (lowervp != NULLVP)
467						vput(lowervp);
468					goto start;
469				}
470			}
471			if (uerror) {
472				if (lowervp != NULLVP) {
473					vput(lowervp);
474					lowervp = NULLVP;
475				}
476				return (uerror);
477			}
478		}
479	} else { /* uerror == 0 */
480		if (uppervp != upperdvp)
481			VOP_UNLOCK(uppervp);
482	}
483
484	if (lowervp != NULLVP)
485		VOP_UNLOCK(lowervp);
486
487	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
488			      uppervp, lowervp, 1);
489
490	if (error) {
491		if (uppervp != NULLVP)
492			vrele(uppervp);
493		if (lowervp != NULLVP)
494			vrele(lowervp);
495		return error;
496	}
497
498	return 0;
499}
500
501int
502union_create(void *v)
503{
504	struct vop_create_v3_args /* {
505		struct vnode *a_dvp;
506		struct vnode **a_vpp;
507		struct componentname *a_cnp;
508		struct vattr *a_vap;
509	} */ *ap = v;
510	struct union_node *un = VTOUNION(ap->a_dvp);
511	struct vnode *dvp = un->un_uppervp;
512	struct componentname *cnp = ap->a_cnp;
513
514	if (dvp != NULLVP) {
515		int error;
516		struct vnode *vp;
517		struct mount *mp;
518
519		mp = ap->a_dvp->v_mount;
520		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
521		if (error)
522			return (error);
523
524		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
525				NULLVP, 1);
526		if (error)
527			vrele(vp);
528		return (error);
529	}
530
531	return (EROFS);
532}
533
534int
535union_whiteout(void *v)
536{
537	struct vop_whiteout_args /* {
538		struct vnode *a_dvp;
539		struct componentname *a_cnp;
540		int a_flags;
541	} */ *ap = v;
542	struct union_node *un = VTOUNION(ap->a_dvp);
543	struct componentname *cnp = ap->a_cnp;
544
545	if (un->un_uppervp == NULLVP)
546		return (EOPNOTSUPP);
547
548	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
549}
550
551int
552union_mknod(void *v)
553{
554	struct vop_mknod_v3_args /* {
555		struct vnode *a_dvp;
556		struct vnode **a_vpp;
557		struct componentname *a_cnp;
558		struct vattr *a_vap;
559	} */ *ap = v;
560	struct union_node *un = VTOUNION(ap->a_dvp);
561	struct vnode *dvp = un->un_uppervp;
562	struct componentname *cnp = ap->a_cnp;
563
564	if (dvp != NULLVP) {
565		int error;
566		struct vnode *vp;
567		struct mount *mp;
568
569		mp = ap->a_dvp->v_mount;
570		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
571		if (error)
572			return (error);
573
574		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
575				      cnp, vp, NULLVP, 1);
576		if (error)
577			vrele(vp);
578		return (error);
579	}
580
581	return (EROFS);
582}
583
584int
585union_open(void *v)
586{
587	struct vop_open_args /* {
588		struct vnodeop_desc *a_desc;
589		struct vnode *a_vp;
590		int a_mode;
591		kauth_cred_t a_cred;
592	} */ *ap = v;
593	struct union_node *un = VTOUNION(ap->a_vp);
594	struct vnode *tvp;
595	int mode = ap->a_mode;
596	kauth_cred_t cred = ap->a_cred;
597	struct lwp *l = curlwp;
598	int error;
599
600	/*
601	 * If there is an existing upper vp then simply open that.
602	 */
603	tvp = un->un_uppervp;
604	if (tvp == NULLVP) {
605		/*
606		 * If the lower vnode is being opened for writing, then
607		 * copy the file contents to the upper vnode and open that,
608		 * otherwise can simply open the lower vnode.
609		 */
610		tvp = un->un_lowervp;
611		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
612			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
613			if (error == 0)
614				error = VOP_OPEN(un->un_uppervp, mode, cred);
615			return (error);
616		}
617
618		/*
619		 * Just open the lower vnode, but check for nodev mount flag
620		 */
621		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
622		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
623			return ENXIO;
624		un->un_openl++;
625		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
626		error = VOP_OPEN(tvp, mode, cred);
627		VOP_UNLOCK(tvp);
628
629		return (error);
630	}
631	/*
632	 * Just open the upper vnode, checking for nodev mount flag first
633	 */
634	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
635	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
636		return ENXIO;
637
638	error = VOP_OPEN(tvp, mode, cred);
639
640	return (error);
641}
642
643int
644union_close(void *v)
645{
646	struct vop_close_args /* {
647		struct vnode *a_vp;
648		int  a_fflag;
649		kauth_cred_t a_cred;
650	} */ *ap = v;
651	struct union_node *un = VTOUNION(ap->a_vp);
652	struct vnode *vp;
653	int error;
654	bool do_lock;
655
656	vp = un->un_uppervp;
657	if (vp != NULLVP) {
658		do_lock = false;
659	} else {
660		KASSERT(un->un_openl > 0);
661		--un->un_openl;
662		vp = un->un_lowervp;
663		do_lock = true;
664	}
665
666	KASSERT(vp != NULLVP);
667	ap->a_vp = vp;
668	if (do_lock)
669		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
670	error = VCALL(vp, VOFFSET(vop_close), ap);
671	if (do_lock)
672		VOP_UNLOCK(vp);
673
674	return error;
675}
676
677/*
678 * Check access permission on the union vnode.
679 * The access check being enforced is to check
680 * against both the underlying vnode, and any
681 * copied vnode.  This ensures that no additional
682 * file permissions are given away simply because
683 * the user caused an implicit file copy.
684 */
685int
686union_access(void *v)
687{
688	struct vop_access_args /* {
689		struct vnodeop_desc *a_desc;
690		struct vnode *a_vp;
691		int a_mode;
692		kauth_cred_t a_cred;
693	} */ *ap = v;
694	struct vnode *vp = ap->a_vp;
695	struct union_node *un = VTOUNION(vp);
696	int error = EACCES;
697	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
698
699	/*
700	 * Disallow write attempts on read-only file systems;
701	 * unless the file is a socket, fifo, or a block or
702	 * character device resident on the file system.
703	 */
704	if (ap->a_mode & VWRITE) {
705		switch (vp->v_type) {
706		case VDIR:
707		case VLNK:
708		case VREG:
709			if (vp->v_mount->mnt_flag & MNT_RDONLY)
710				return (EROFS);
711			break;
712		case VBAD:
713		case VBLK:
714		case VCHR:
715		case VSOCK:
716		case VFIFO:
717		case VNON:
718		default:
719			break;
720		}
721	}
722
723
724	if ((vp = un->un_uppervp) != NULLVP) {
725		ap->a_vp = vp;
726		return (VCALL(vp, VOFFSET(vop_access), ap));
727	}
728
729	if ((vp = un->un_lowervp) != NULLVP) {
730		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
731		ap->a_vp = vp;
732		error = VCALL(vp, VOFFSET(vop_access), ap);
733		if (error == 0) {
734			if (um->um_op == UNMNT_BELOW) {
735				ap->a_cred = um->um_cred;
736				error = VCALL(vp, VOFFSET(vop_access), ap);
737			}
738		}
739		VOP_UNLOCK(vp);
740		if (error)
741			return (error);
742	}
743
744	return (error);
745}
746
747/*
748 * We handle getattr only to change the fsid and
749 * track object sizes
750 */
751int
752union_getattr(void *v)
753{
754	struct vop_getattr_args /* {
755		struct vnode *a_vp;
756		struct vattr *a_vap;
757		kauth_cred_t a_cred;
758	} */ *ap = v;
759	int error;
760	struct union_node *un = VTOUNION(ap->a_vp);
761	struct vnode *vp = un->un_uppervp;
762	struct vattr *vap;
763	struct vattr va;
764
765
766	/*
767	 * Some programs walk the filesystem hierarchy by counting
768	 * links to directories to avoid stat'ing all the time.
769	 * This means the link count on directories needs to be "correct".
770	 * The only way to do that is to call getattr on both layers
771	 * and fix up the link count.  The link count will not necessarily
772	 * be accurate but will be large enough to defeat the tree walkers.
773	 *
774	 * To make life more interesting, some filesystems don't keep
775	 * track of link counts in the expected way, and return a
776	 * link count of `1' for those directories; if either of the
777	 * component directories returns a link count of `1', we return a 1.
778	 */
779
780	vap = ap->a_vap;
781
782	vp = un->un_uppervp;
783	if (vp != NULLVP) {
784		error = VOP_GETATTR(vp, vap, ap->a_cred);
785		if (error)
786			return (error);
787		mutex_enter(&un->un_lock);
788		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
789	}
790
791	if (vp == NULLVP) {
792		vp = un->un_lowervp;
793	} else if (vp->v_type == VDIR) {
794		vp = un->un_lowervp;
795		if (vp != NULLVP)
796			vap = &va;
797	} else {
798		vp = NULLVP;
799	}
800
801	if (vp != NULLVP) {
802		if (vp == un->un_lowervp)
803			vn_lock(vp, LK_SHARED | LK_RETRY);
804		error = VOP_GETATTR(vp, vap, ap->a_cred);
805		if (vp == un->un_lowervp)
806			VOP_UNLOCK(vp);
807		if (error)
808			return (error);
809		mutex_enter(&un->un_lock);
810		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
811	}
812
813	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
814		/*
815		 * Link count manipulation:
816		 *	- If both return "2", return 2 (no subdirs)
817		 *	- If one or the other return "1", return "1" (ENOCLUE)
818		 */
819		if ((ap->a_vap->va_nlink == 2) &&
820		    (vap->va_nlink == 2))
821			;
822		else if (ap->a_vap->va_nlink != 1) {
823			if (vap->va_nlink == 1)
824				ap->a_vap->va_nlink = 1;
825			else
826				ap->a_vap->va_nlink += vap->va_nlink;
827		}
828	}
829	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
830	return (0);
831}
832
833int
834union_setattr(void *v)
835{
836	struct vop_setattr_args /* {
837		struct vnode *a_vp;
838		struct vattr *a_vap;
839		kauth_cred_t a_cred;
840	} */ *ap = v;
841	struct vattr *vap = ap->a_vap;
842	struct vnode *vp = ap->a_vp;
843	struct union_node *un = VTOUNION(vp);
844	bool size_only;		/* All but va_size are VNOVAL. */
845	int error;
846
847	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
848	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
849	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
850
851	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
852		return (EROFS);
853	if (vap->va_size != VNOVAL) {
854 		switch (vp->v_type) {
855 		case VDIR:
856 			return (EISDIR);
857 		case VCHR:
858 		case VBLK:
859 		case VSOCK:
860 		case VFIFO:
861			break;
862		case VREG:
863		case VLNK:
864 		default:
865			/*
866			 * Disallow write attempts if the filesystem is
867			 * mounted read-only.
868			 */
869			if (vp->v_mount->mnt_flag & MNT_RDONLY)
870				return (EROFS);
871		}
872	}
873
874	/*
875	 * Handle case of truncating lower object to zero size,
876	 * by creating a zero length upper object.  This is to
877	 * handle the case of open with O_TRUNC and O_CREAT.
878	 */
879	if ((un->un_uppervp == NULLVP) &&
880	    /* assert(un->un_lowervp != NULLVP) */
881	    (un->un_lowervp->v_type == VREG)) {
882		error = union_copyup(un, (vap->va_size != 0),
883						ap->a_cred, curlwp);
884		if (error)
885			return (error);
886	}
887
888	/*
889	 * Try to set attributes in upper layer, ignore size change to zero
890	 * for devices to handle O_TRUNC and return read-only filesystem error
891	 * otherwise.
892	 */
893	if (un->un_uppervp != NULLVP) {
894		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
895		if ((error == 0) && (vap->va_size != VNOVAL)) {
896			mutex_enter(&un->un_lock);
897			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
898		}
899	} else {
900		KASSERT(un->un_lowervp != NULLVP);
901		if (NODE_IS_SPECIAL(un->un_lowervp)) {
902			if (size_only &&
903			    (vap->va_size == 0 || vap->va_size == VNOVAL))
904				error = 0;
905			else
906				error = EROFS;
907		} else {
908			error = EROFS;
909		}
910	}
911
912	return (error);
913}
914
915int
916union_read(void *v)
917{
918	struct vop_read_args /* {
919		struct vnode *a_vp;
920		struct uio *a_uio;
921		int  a_ioflag;
922		kauth_cred_t a_cred;
923	} */ *ap = v;
924	int error;
925	struct vnode *vp = OTHERVP(ap->a_vp);
926	int dolock = (vp == LOWERVP(ap->a_vp));
927
928	if (dolock)
929		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
930	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
931	if (dolock)
932		VOP_UNLOCK(vp);
933
934	/*
935	 * XXX
936	 * perhaps the size of the underlying object has changed under
937	 * our feet.  take advantage of the offset information present
938	 * in the uio structure.
939	 */
940	if (error == 0) {
941		struct union_node *un = VTOUNION(ap->a_vp);
942		off_t cur = ap->a_uio->uio_offset;
943		off_t usz = VNOVAL, lsz = VNOVAL;
944
945		mutex_enter(&un->un_lock);
946		if (vp == un->un_uppervp) {
947			if (cur > un->un_uppersz)
948				usz = cur;
949		} else {
950			if (cur > un->un_lowersz)
951				lsz = cur;
952		}
953
954		if (usz != VNOVAL || lsz != VNOVAL)
955			union_newsize(ap->a_vp, usz, lsz);
956		else
957			mutex_exit(&un->un_lock);
958	}
959
960	return (error);
961}
962
963int
964union_write(void *v)
965{
966	struct vop_read_args /* {
967		struct vnode *a_vp;
968		struct uio *a_uio;
969		int  a_ioflag;
970		kauth_cred_t a_cred;
971	} */ *ap = v;
972	int error;
973	struct vnode *vp;
974	struct union_node *un = VTOUNION(ap->a_vp);
975
976	vp = UPPERVP(ap->a_vp);
977	if (vp == NULLVP) {
978		vp = LOWERVP(ap->a_vp);
979		if (NODE_IS_SPECIAL(vp)) {
980			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
981			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
982			    ap->a_cred);
983			VOP_UNLOCK(vp);
984			return error;
985		}
986		panic("union: missing upper layer in write");
987	}
988
989	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
990
991	/*
992	 * the size of the underlying object may be changed by the
993	 * write.
994	 */
995	if (error == 0) {
996		off_t cur = ap->a_uio->uio_offset;
997
998		mutex_enter(&un->un_lock);
999		if (cur > un->un_uppersz)
1000			union_newsize(ap->a_vp, cur, VNOVAL);
1001		else
1002			mutex_exit(&un->un_lock);
1003	}
1004
1005	return (error);
1006}
1007
1008int
1009union_ioctl(void *v)
1010{
1011	struct vop_ioctl_args /* {
1012		struct vnode *a_vp;
1013		int  a_command;
1014		void *a_data;
1015		int  a_fflag;
1016		kauth_cred_t a_cred;
1017	} */ *ap = v;
1018	struct vnode *ovp = OTHERVP(ap->a_vp);
1019
1020	ap->a_vp = ovp;
1021	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1022}
1023
1024int
1025union_poll(void *v)
1026{
1027	struct vop_poll_args /* {
1028		struct vnode *a_vp;
1029		int a_events;
1030	} */ *ap = v;
1031	struct vnode *ovp = OTHERVP(ap->a_vp);
1032
1033	ap->a_vp = ovp;
1034	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1035}
1036
1037int
1038union_revoke(void *v)
1039{
1040	struct vop_revoke_args /* {
1041		struct vnode *a_vp;
1042		int a_flags;
1043		struct proc *a_p;
1044	} */ *ap = v;
1045	struct vnode *vp = ap->a_vp;
1046
1047	if (UPPERVP(vp))
1048		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1049	if (LOWERVP(vp))
1050		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1051	vgone(vp);	/* XXXAD?? */
1052	return (0);
1053}
1054
1055int
1056union_mmap(void *v)
1057{
1058	struct vop_mmap_args /* {
1059		struct vnode *a_vp;
1060		vm_prot_t a_prot;
1061		kauth_cred_t a_cred;
1062	} */ *ap = v;
1063	struct vnode *ovp = OTHERVP(ap->a_vp);
1064
1065	ap->a_vp = ovp;
1066	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1067}
1068
1069int
1070union_fsync(void *v)
1071{
1072	struct vop_fsync_args /* {
1073		struct vnode *a_vp;
1074		kauth_cred_t a_cred;
1075		int  a_flags;
1076		off_t offhi;
1077		off_t offlo;
1078	} */ *ap = v;
1079	int error = 0;
1080	struct vnode *targetvp;
1081
1082	/*
1083	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1084	 * bother syncing the underlying vnodes, since (a) they'll be
1085	 * fsync'ed when reclaimed and (b) we could deadlock if
1086	 * they're locked; otherwise, pass it through to the
1087	 * underlying layer.
1088	 */
1089	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1090		error = spec_fsync(v);
1091		if (error)
1092			return error;
1093	}
1094
1095	if (ap->a_flags & FSYNC_RECLAIM)
1096		return 0;
1097
1098	targetvp = OTHERVP(ap->a_vp);
1099	if (targetvp != NULLVP) {
1100		int dolock = (targetvp == LOWERVP(ap->a_vp));
1101
1102		if (dolock)
1103			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1104		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1105			    ap->a_offlo, ap->a_offhi);
1106		if (dolock)
1107			VOP_UNLOCK(targetvp);
1108	}
1109
1110	return (error);
1111}
1112
1113int
1114union_seek(void *v)
1115{
1116	struct vop_seek_args /* {
1117		struct vnode *a_vp;
1118		off_t  a_oldoff;
1119		off_t  a_newoff;
1120		kauth_cred_t a_cred;
1121	} */ *ap = v;
1122	struct vnode *ovp = OTHERVP(ap->a_vp);
1123
1124	ap->a_vp = ovp;
1125	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1126}
1127
1128int
1129union_remove(void *v)
1130{
1131	struct vop_remove_args /* {
1132		struct vnode *a_dvp;
1133		struct vnode *a_vp;
1134		struct componentname *a_cnp;
1135	} */ *ap = v;
1136	int error;
1137	struct union_node *dun = VTOUNION(ap->a_dvp);
1138	struct union_node *un = VTOUNION(ap->a_vp);
1139	struct componentname *cnp = ap->a_cnp;
1140
1141	if (dun->un_uppervp == NULLVP)
1142		panic("union remove: null upper vnode");
1143
1144	if (un->un_uppervp != NULLVP) {
1145		struct vnode *dvp = dun->un_uppervp;
1146		struct vnode *vp = un->un_uppervp;
1147
1148		/*
1149		 * Account for VOP_REMOVE to vrele dvp and vp.
1150		 * Note: VOP_REMOVE will unlock dvp and vp.
1151		 */
1152		vref(dvp);
1153		vref(vp);
1154		if (union_dowhiteout(un, cnp->cn_cred))
1155			cnp->cn_flags |= DOWHITEOUT;
1156		error = VOP_REMOVE(dvp, vp, cnp);
1157		if (!error)
1158			union_removed_upper(un);
1159		vrele(ap->a_dvp);
1160		vrele(ap->a_vp);
1161	} else {
1162		error = union_mkwhiteout(
1163			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1164			dun->un_uppervp, ap->a_cnp, un);
1165		vput(ap->a_dvp);
1166		vput(ap->a_vp);
1167	}
1168
1169	return (error);
1170}
1171
1172int
1173union_link(void *v)
1174{
1175	struct vop_link_args /* {
1176		struct vnode *a_dvp;
1177		struct vnode *a_vp;
1178		struct componentname *a_cnp;
1179	} */ *ap = v;
1180	int error = 0;
1181	struct componentname *cnp = ap->a_cnp;
1182	struct union_node *dun;
1183	struct vnode *vp;
1184	struct vnode *dvp;
1185
1186	dun = VTOUNION(ap->a_dvp);
1187
1188	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1189
1190	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1191		vp = ap->a_vp;
1192	} else {
1193		struct union_node *un = VTOUNION(ap->a_vp);
1194		if (un->un_uppervp == NULLVP) {
1195			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1196
1197			/*
1198			 * Needs to be copied before we can link it.
1199			 */
1200			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1201			if (droplock)
1202				VOP_UNLOCK(dun->un_uppervp);
1203			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1204			if (droplock) {
1205				vn_lock(dun->un_uppervp,
1206				    LK_EXCLUSIVE | LK_RETRY);
1207				/*
1208				 * During copyup, we dropped the lock on the
1209				 * dir and invalidated any saved namei lookup
1210				 * state for the directory we'll be entering
1211				 * the link in.  We need to re-run the lookup
1212				 * in that directory to reset any state needed
1213				 * for VOP_LINK.
1214				 * Call relookup on the union-layer to reset
1215				 * the state.
1216				 */
1217				vp  = NULLVP;
1218				if (dun->un_uppervp == NULLVP)
1219					 panic("union: null upperdvp?");
1220				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1221				if (error) {
1222					VOP_UNLOCK(ap->a_vp);
1223					return EROFS;	/* ? */
1224				}
1225				if (vp != NULLVP) {
1226					/*
1227					 * The name we want to create has
1228					 * mysteriously appeared (a race?)
1229					 */
1230					error = EEXIST;
1231					VOP_UNLOCK(ap->a_vp);
1232					vput(ap->a_dvp);
1233					vput(vp);
1234					return (error);
1235				}
1236			}
1237			VOP_UNLOCK(ap->a_vp);
1238		}
1239		vp = un->un_uppervp;
1240	}
1241
1242	dvp = dun->un_uppervp;
1243	if (dvp == NULLVP)
1244		error = EROFS;
1245
1246	if (error) {
1247		vput(ap->a_dvp);
1248		return (error);
1249	}
1250
1251	/*
1252	 * Account for VOP_LINK to vrele dvp.
1253	 * Note: VOP_LINK will unlock dvp.
1254	 */
1255	vref(dvp);
1256	error = VOP_LINK(dvp, vp, cnp);
1257	vrele(ap->a_dvp);
1258
1259	return error;
1260}
1261
1262int
1263union_rename(void *v)
1264{
1265	struct vop_rename_args  /* {
1266		struct vnode *a_fdvp;
1267		struct vnode *a_fvp;
1268		struct componentname *a_fcnp;
1269		struct vnode *a_tdvp;
1270		struct vnode *a_tvp;
1271		struct componentname *a_tcnp;
1272	} */ *ap = v;
1273	int error;
1274
1275	struct vnode *fdvp = ap->a_fdvp;
1276	struct vnode *fvp = ap->a_fvp;
1277	struct vnode *tdvp = ap->a_tdvp;
1278	struct vnode *tvp = ap->a_tvp;
1279
1280	/*
1281	 * Account for VOP_RENAME to vrele all nodes.
1282	 * Note: VOP_RENAME will unlock tdvp.
1283	 */
1284
1285	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1286		struct union_node *un = VTOUNION(fdvp);
1287		if (un->un_uppervp == NULLVP) {
1288			/*
1289			 * this should never happen in normal
1290			 * operation but might if there was
1291			 * a problem creating the top-level shadow
1292			 * directory.
1293			 */
1294			error = EXDEV;
1295			goto bad;
1296		}
1297
1298		fdvp = un->un_uppervp;
1299		vref(fdvp);
1300	}
1301
1302	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1303		struct union_node *un = VTOUNION(fvp);
1304		if (un->un_uppervp == NULLVP) {
1305			/* XXX: should do a copyup */
1306			error = EXDEV;
1307			goto bad;
1308		}
1309
1310		if (un->un_lowervp != NULLVP)
1311			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1312
1313		fvp = un->un_uppervp;
1314		vref(fvp);
1315	}
1316
1317	if (tdvp->v_op == union_vnodeop_p) {
1318		struct union_node *un = VTOUNION(tdvp);
1319		if (un->un_uppervp == NULLVP) {
1320			/*
1321			 * this should never happen in normal
1322			 * operation but might if there was
1323			 * a problem creating the top-level shadow
1324			 * directory.
1325			 */
1326			error = EXDEV;
1327			goto bad;
1328		}
1329
1330		tdvp = un->un_uppervp;
1331		vref(tdvp);
1332	}
1333
1334	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1335		struct union_node *un = VTOUNION(tvp);
1336
1337		tvp = un->un_uppervp;
1338		if (tvp != NULLVP) {
1339			vref(tvp);
1340		}
1341	}
1342
1343	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1344	goto out;
1345
1346bad:
1347	vput(tdvp);
1348	if (tvp != NULLVP)
1349		vput(tvp);
1350	vrele(fdvp);
1351	vrele(fvp);
1352
1353out:
1354	if (fdvp != ap->a_fdvp) {
1355		vrele(ap->a_fdvp);
1356	}
1357	if (fvp != ap->a_fvp) {
1358		vrele(ap->a_fvp);
1359	}
1360	if (tdvp != ap->a_tdvp) {
1361		vrele(ap->a_tdvp);
1362	}
1363	if (tvp != ap->a_tvp) {
1364		vrele(ap->a_tvp);
1365	}
1366	return (error);
1367}
1368
1369int
1370union_mkdir(void *v)
1371{
1372	struct vop_mkdir_v3_args /* {
1373		struct vnode *a_dvp;
1374		struct vnode **a_vpp;
1375		struct componentname *a_cnp;
1376		struct vattr *a_vap;
1377	} */ *ap = v;
1378	struct union_node *un = VTOUNION(ap->a_dvp);
1379	struct vnode *dvp = un->un_uppervp;
1380	struct componentname *cnp = ap->a_cnp;
1381
1382	if (dvp != NULLVP) {
1383		int error;
1384		struct vnode *vp;
1385
1386		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1387		if (error) {
1388			vrele(ap->a_dvp);
1389			return (error);
1390		}
1391
1392		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1393				NULLVP, cnp, vp, NULLVP, 1);
1394		if (error)
1395			vrele(vp);
1396		return (error);
1397	}
1398
1399	return (EROFS);
1400}
1401
1402int
1403union_rmdir(void *v)
1404{
1405	struct vop_rmdir_args /* {
1406		struct vnode *a_dvp;
1407		struct vnode *a_vp;
1408		struct componentname *a_cnp;
1409	} */ *ap = v;
1410	int error;
1411	struct union_node *dun = VTOUNION(ap->a_dvp);
1412	struct union_node *un = VTOUNION(ap->a_vp);
1413	struct componentname *cnp = ap->a_cnp;
1414
1415	if (dun->un_uppervp == NULLVP)
1416		panic("union rmdir: null upper vnode");
1417
1418	error = union_check_rmdir(un, cnp->cn_cred);
1419	if (error) {
1420		vput(ap->a_dvp);
1421		vput(ap->a_vp);
1422		return error;
1423	}
1424
1425	if (un->un_uppervp != NULLVP) {
1426		struct vnode *dvp = dun->un_uppervp;
1427		struct vnode *vp = un->un_uppervp;
1428
1429		/*
1430		 * Account for VOP_RMDIR to vrele dvp and vp.
1431		 * Note: VOP_RMDIR will unlock dvp and vp.
1432		 */
1433		vref(dvp);
1434		vref(vp);
1435		if (union_dowhiteout(un, cnp->cn_cred))
1436			cnp->cn_flags |= DOWHITEOUT;
1437		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1438		if (!error)
1439			union_removed_upper(un);
1440		vrele(ap->a_dvp);
1441		vrele(ap->a_vp);
1442	} else {
1443		error = union_mkwhiteout(
1444			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1445			dun->un_uppervp, ap->a_cnp, un);
1446		vput(ap->a_dvp);
1447		vput(ap->a_vp);
1448	}
1449
1450	return (error);
1451}
1452
1453int
1454union_symlink(void *v)
1455{
1456	struct vop_symlink_v3_args /* {
1457		struct vnode *a_dvp;
1458		struct vnode **a_vpp;
1459		struct componentname *a_cnp;
1460		struct vattr *a_vap;
1461		char *a_target;
1462	} */ *ap = v;
1463	struct union_node *un = VTOUNION(ap->a_dvp);
1464	struct vnode *dvp = un->un_uppervp;
1465	struct componentname *cnp = ap->a_cnp;
1466
1467	if (dvp != NULLVP) {
1468		int error;
1469
1470		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1471				    ap->a_target);
1472		return (error);
1473	}
1474
1475	return (EROFS);
1476}
1477
1478/*
1479 * union_readdir works in concert with getdirentries and
1480 * readdir(3) to provide a list of entries in the unioned
1481 * directories.  getdirentries is responsible for walking
1482 * down the union stack.  readdir(3) is responsible for
1483 * eliminating duplicate names from the returned data stream.
1484 */
1485int
1486union_readdir(void *v)
1487{
1488	struct vop_readdir_args /* {
1489		struct vnodeop_desc *a_desc;
1490		struct vnode *a_vp;
1491		struct uio *a_uio;
1492		kauth_cred_t a_cred;
1493		int *a_eofflag;
1494		u_long *a_cookies;
1495		int a_ncookies;
1496	} */ *ap = v;
1497	struct union_node *un = VTOUNION(ap->a_vp);
1498	struct vnode *uvp = un->un_uppervp;
1499
1500	if (uvp == NULLVP)
1501		return (0);
1502
1503	ap->a_vp = uvp;
1504	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1505}
1506
1507int
1508union_readlink(void *v)
1509{
1510	struct vop_readlink_args /* {
1511		struct vnode *a_vp;
1512		struct uio *a_uio;
1513		kauth_cred_t a_cred;
1514	} */ *ap = v;
1515	int error;
1516	struct vnode *vp = OTHERVP(ap->a_vp);
1517	int dolock = (vp == LOWERVP(ap->a_vp));
1518
1519	if (dolock)
1520		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1521	ap->a_vp = vp;
1522	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1523	if (dolock)
1524		VOP_UNLOCK(vp);
1525
1526	return (error);
1527}
1528
1529int
1530union_abortop(void *v)
1531{
1532	struct vop_abortop_args /* {
1533		struct vnode *a_dvp;
1534		struct componentname *a_cnp;
1535	} */ *ap = v;
1536
1537	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1538
1539	ap->a_dvp = UPPERVP(ap->a_dvp);
1540	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1541}
1542
1543int
1544union_inactive(void *v)
1545{
1546	struct vop_inactive_args /* {
1547		const struct vnodeop_desc *a_desc;
1548		struct vnode *a_vp;
1549		bool *a_recycle;
1550	} */ *ap = v;
1551	struct vnode *vp = ap->a_vp;
1552	struct union_node *un = VTOUNION(vp);
1553	struct vnode **vpp;
1554
1555	/*
1556	 * Do nothing (and _don't_ bypass).
1557	 * Wait to vrele lowervp until reclaim,
1558	 * so that until then our union_node is in the
1559	 * cache and reusable.
1560	 *
1561	 * NEEDSWORK: Someday, consider inactive'ing
1562	 * the lowervp and then trying to reactivate it
1563	 * with capabilities (v_id)
1564	 * like they do in the name lookup cache code.
1565	 * That's too much work for now.
1566	 */
1567
1568	if (un->un_dircache != 0) {
1569		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1570			vrele(*vpp);
1571		free(un->un_dircache, M_TEMP);
1572		un->un_dircache = 0;
1573	}
1574
1575	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1576	VOP_UNLOCK(vp);
1577
1578	return (0);
1579}
1580
1581int
1582union_reclaim(void *v)
1583{
1584	struct vop_reclaim_args /* {
1585		struct vnode *a_vp;
1586	} */ *ap = v;
1587
1588	union_freevp(ap->a_vp);
1589
1590	return (0);
1591}
1592
1593static int
1594union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1595{
1596	struct vop_lock_args ap;
1597
1598	if (lockvp == vp) {
1599		ap.a_vp = vp;
1600		ap.a_flags = flags;
1601		return genfs_lock(&ap);
1602	} else
1603		return VOP_LOCK(lockvp, flags);
1604}
1605
1606static int
1607union_unlock1(struct vnode *vp, struct vnode *lockvp)
1608{
1609	struct vop_unlock_args ap;
1610
1611	if (lockvp == vp) {
1612		ap.a_vp = vp;
1613		return genfs_unlock(&ap);
1614	} else
1615		return VOP_UNLOCK(lockvp);
1616}
1617
1618int
1619union_lock(void *v)
1620{
1621	struct vop_lock_args /* {
1622		struct vnode *a_vp;
1623		int a_flags;
1624	} */ *ap = v;
1625	struct vnode *vp = ap->a_vp, *lockvp;
1626	struct union_node *un = VTOUNION(vp);
1627	int flags = ap->a_flags;
1628	int error;
1629
1630	if ((flags & LK_NOWAIT) != 0) {
1631		if (!mutex_tryenter(&un->un_lock))
1632			return EBUSY;
1633		lockvp = LOCKVP(vp);
1634		error = union_lock1(vp, lockvp, flags);
1635		mutex_exit(&un->un_lock);
1636		if (error)
1637			return error;
1638		if (mutex_tryenter(vp->v_interlock)) {
1639			if (ISSET(vp->v_iflag, VI_XLOCK))
1640				error = EBUSY;
1641			else if (ISSET(vp->v_iflag, VI_CLEAN))
1642				error = ENOENT;
1643			else
1644				error = 0;
1645			mutex_exit(vp->v_interlock);
1646		} else
1647			error = EBUSY;
1648		if (error)
1649			union_unlock1(vp, lockvp);
1650		return error;
1651	}
1652
1653	mutex_enter(&un->un_lock);
1654	for (;;) {
1655		lockvp = LOCKVP(vp);
1656		mutex_exit(&un->un_lock);
1657		error = union_lock1(vp, lockvp, flags);
1658		if (error != 0)
1659			return error;
1660		mutex_enter(&un->un_lock);
1661		if (lockvp == LOCKVP(vp))
1662			break;
1663		union_unlock1(vp, lockvp);
1664	}
1665	mutex_exit(&un->un_lock);
1666
1667	mutex_enter(vp->v_interlock);
1668	if (ISSET(vp->v_iflag, VI_XLOCK) || ISSET(vp->v_iflag, VI_CLEAN)) {
1669		union_unlock1(vp, lockvp);
1670		vwait(vp, VI_XLOCK);
1671		KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
1672		mutex_exit(vp->v_interlock);
1673		return ENOENT;
1674	}
1675	mutex_exit(vp->v_interlock);
1676	return 0;
1677}
1678
1679int
1680union_unlock(void *v)
1681{
1682	struct vop_unlock_args /* {
1683		struct vnode *a_vp;
1684		int a_flags;
1685	} */ *ap = v;
1686	struct vnode *vp = ap->a_vp, *lockvp;
1687
1688	lockvp = LOCKVP(vp);
1689	union_unlock1(vp, lockvp);
1690
1691	return 0;
1692}
1693
1694int
1695union_bmap(void *v)
1696{
1697	struct vop_bmap_args /* {
1698		struct vnode *a_vp;
1699		daddr_t  a_bn;
1700		struct vnode **a_vpp;
1701		daddr_t *a_bnp;
1702		int *a_runp;
1703	} */ *ap = v;
1704	int error;
1705	struct vnode *vp = OTHERVP(ap->a_vp);
1706	int dolock = (vp == LOWERVP(ap->a_vp));
1707
1708	if (dolock)
1709		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1710	ap->a_vp = vp;
1711	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1712	if (dolock)
1713		VOP_UNLOCK(vp);
1714
1715	return (error);
1716}
1717
1718int
1719union_print(void *v)
1720{
1721	struct vop_print_args /* {
1722		struct vnode *a_vp;
1723	} */ *ap = v;
1724	struct vnode *vp = ap->a_vp;
1725
1726	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1727			vp, UPPERVP(vp), LOWERVP(vp));
1728	if (UPPERVP(vp) != NULLVP)
1729		vprint("union: upper", UPPERVP(vp));
1730	if (LOWERVP(vp) != NULLVP)
1731		vprint("union: lower", LOWERVP(vp));
1732	if (VTOUNION(vp)->un_dircache) {
1733		struct vnode **vpp;
1734		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1735			vprint("dircache:", *vpp);
1736	}
1737
1738	return (0);
1739}
1740
1741int
1742union_islocked(void *v)
1743{
1744	struct vop_islocked_args /* {
1745		struct vnode *a_vp;
1746	} */ *ap = v;
1747	struct vnode *vp;
1748	struct union_node *un;
1749
1750	un = VTOUNION(ap->a_vp);
1751	mutex_enter(&un->un_lock);
1752	vp = LOCKVP(ap->a_vp);
1753	mutex_exit(&un->un_lock);
1754
1755	if (vp == ap->a_vp)
1756		return genfs_islocked(ap);
1757	else
1758		return VOP_ISLOCKED(vp);
1759}
1760
1761int
1762union_pathconf(void *v)
1763{
1764	struct vop_pathconf_args /* {
1765		struct vnode *a_vp;
1766		int a_name;
1767		int *a_retval;
1768	} */ *ap = v;
1769	int error;
1770	struct vnode *vp = OTHERVP(ap->a_vp);
1771	int dolock = (vp == LOWERVP(ap->a_vp));
1772
1773	if (dolock)
1774		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1775	ap->a_vp = vp;
1776	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1777	if (dolock)
1778		VOP_UNLOCK(vp);
1779
1780	return (error);
1781}
1782
1783int
1784union_advlock(void *v)
1785{
1786	struct vop_advlock_args /* {
1787		struct vnode *a_vp;
1788		void *a_id;
1789		int  a_op;
1790		struct flock *a_fl;
1791		int  a_flags;
1792	} */ *ap = v;
1793	struct vnode *ovp = OTHERVP(ap->a_vp);
1794
1795	ap->a_vp = ovp;
1796	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1797}
1798
1799int
1800union_strategy(void *v)
1801{
1802	struct vop_strategy_args /* {
1803		struct vnode *a_vp;
1804		struct buf *a_bp;
1805	} */ *ap = v;
1806	struct vnode *ovp = OTHERVP(ap->a_vp);
1807	struct buf *bp = ap->a_bp;
1808
1809	KASSERT(ovp != NULLVP);
1810	if (!NODE_IS_SPECIAL(ovp))
1811		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1812
1813	return (VOP_STRATEGY(ovp, bp));
1814}
1815
1816int
1817union_bwrite(void *v)
1818{
1819	struct vop_bwrite_args /* {
1820		struct vnode *a_vp;
1821		struct buf *a_bp;
1822	} */ *ap = v;
1823	struct vnode *ovp = OTHERVP(ap->a_vp);
1824	struct buf *bp = ap->a_bp;
1825
1826	KASSERT(ovp != NULLVP);
1827	if (!NODE_IS_SPECIAL(ovp))
1828		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1829
1830	return (VOP_BWRITE(ovp, bp));
1831}
1832
1833int
1834union_getpages(void *v)
1835{
1836	struct vop_getpages_args /* {
1837		struct vnode *a_vp;
1838		voff_t a_offset;
1839		struct vm_page **a_m;
1840		int *a_count;
1841		int a_centeridx;
1842		vm_prot_t a_access_type;
1843		int a_advice;
1844		int a_flags;
1845	} */ *ap = v;
1846	struct vnode *vp = ap->a_vp;
1847
1848	KASSERT(mutex_owned(vp->v_interlock));
1849
1850	if (ap->a_flags & PGO_LOCKED) {
1851		return EBUSY;
1852	}
1853	ap->a_vp = OTHERVP(vp);
1854	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1855
1856	/* Just pass the request on to the underlying layer. */
1857	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1858}
1859
1860int
1861union_putpages(void *v)
1862{
1863	struct vop_putpages_args /* {
1864		struct vnode *a_vp;
1865		voff_t a_offlo;
1866		voff_t a_offhi;
1867		int a_flags;
1868	} */ *ap = v;
1869	struct vnode *vp = ap->a_vp;
1870
1871	KASSERT(mutex_owned(vp->v_interlock));
1872
1873	ap->a_vp = OTHERVP(vp);
1874	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1875
1876	if (ap->a_flags & PGO_RECLAIM) {
1877		mutex_exit(vp->v_interlock);
1878		return 0;
1879	}
1880
1881	/* Just pass the request on to the underlying layer. */
1882	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1883}
1884
1885int
1886union_kqfilter(void *v)
1887{
1888	struct vop_kqfilter_args /* {
1889		struct vnode	*a_vp;
1890		struct knote	*a_kn;
1891	} */ *ap = v;
1892	int error;
1893
1894	/*
1895	 * We watch either the upper layer file (if it already exists),
1896	 * or the lower layer one. If there is lower layer file only
1897	 * at this moment, we will keep watching that lower layer file
1898	 * even if upper layer file would be created later on.
1899	 */
1900	if (UPPERVP(ap->a_vp))
1901		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1902	else if (LOWERVP(ap->a_vp))
1903		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1904	else {
1905		/* panic? */
1906		error = EOPNOTSUPP;
1907	}
1908
1909	return (error);
1910}
1911