union_vnops.c revision 1.51
1/*	$NetBSD: union_vnops.c,v 1.51 2014/01/23 10:13:56 hannken Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.51 2014/01/23 10:13:56 hannken Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_getattr_desc, union_getattr },		/* getattr */
155	{ &vop_setattr_desc, union_setattr },		/* setattr */
156	{ &vop_read_desc, union_read },			/* read */
157	{ &vop_write_desc, union_write },		/* write */
158	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
159	{ &vop_poll_desc, union_poll },			/* select */
160	{ &vop_revoke_desc, union_revoke },		/* revoke */
161	{ &vop_mmap_desc, union_mmap },			/* mmap */
162	{ &vop_fsync_desc, union_fsync },		/* fsync */
163	{ &vop_seek_desc, union_seek },			/* seek */
164	{ &vop_remove_desc, union_remove },		/* remove */
165	{ &vop_link_desc, union_link },			/* link */
166	{ &vop_rename_desc, union_rename },		/* rename */
167	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
168	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
169	{ &vop_symlink_desc, union_symlink },		/* symlink */
170	{ &vop_readdir_desc, union_readdir },		/* readdir */
171	{ &vop_readlink_desc, union_readlink },		/* readlink */
172	{ &vop_abortop_desc, union_abortop },		/* abortop */
173	{ &vop_inactive_desc, union_inactive },		/* inactive */
174	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
175	{ &vop_lock_desc, union_lock },			/* lock */
176	{ &vop_unlock_desc, union_unlock },		/* unlock */
177	{ &vop_bmap_desc, union_bmap },			/* bmap */
178	{ &vop_strategy_desc, union_strategy },		/* strategy */
179	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
180	{ &vop_print_desc, union_print },		/* print */
181	{ &vop_islocked_desc, union_islocked },		/* islocked */
182	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
183	{ &vop_advlock_desc, union_advlock },		/* advlock */
184	{ &vop_getpages_desc, union_getpages },		/* getpages */
185	{ &vop_putpages_desc, union_putpages },		/* putpages */
186	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
187	{ NULL, NULL }
188};
189const struct vnodeopv_desc union_vnodeop_opv_desc =
190	{ &union_vnodeop_p, union_vnodeop_entries };
191
192#define NODE_IS_SPECIAL(vp) \
193	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
194	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
195
196static int
197union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
198	struct componentname *cnp)
199{
200	int error;
201	struct vnode *tdvp;
202	struct vnode *dvp;
203	struct mount *mp;
204
205	dvp = *dvpp;
206
207	/*
208	 * If stepping up the directory tree, check for going
209	 * back across the mount point, in which case do what
210	 * lookup would do by stepping back down the mount
211	 * hierarchy.
212	 */
213	if (cnp->cn_flags & ISDOTDOT) {
214		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
215			/*
216			 * Don't do the NOCROSSMOUNT check
217			 * at this level.  By definition,
218			 * union fs deals with namespaces, not
219			 * filesystems.
220			 */
221			tdvp = dvp;
222			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
223			VOP_UNLOCK(tdvp);
224			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
225		}
226	}
227
228        error = VOP_LOOKUP(dvp, &tdvp, cnp);
229	if (error)
230		return (error);
231
232	dvp = tdvp;
233
234	/*
235	 * Lastly check if the current node is a mount point in
236	 * which case walk up the mount hierarchy making sure not to
237	 * bump into the root of the mount tree (ie. dvp != udvp).
238	 */
239	while (dvp != udvp && (dvp->v_type == VDIR) &&
240	       (mp = dvp->v_mountedhere)) {
241		if (vfs_busy(mp, NULL))
242			continue;
243		vput(dvp);
244		error = VFS_ROOT(mp, &tdvp);
245		vfs_unbusy(mp, false, NULL);
246		if (error) {
247			return (error);
248		}
249		dvp = tdvp;
250	}
251
252	*vpp = dvp;
253	return (0);
254}
255
256int
257union_lookup(void *v)
258{
259	struct vop_lookup_args /* {
260		struct vnodeop_desc *a_desc;
261		struct vnode *a_dvp;
262		struct vnode **a_vpp;
263		struct componentname *a_cnp;
264	} */ *ap = v;
265	int error;
266	int uerror, lerror;
267	struct vnode *uppervp, *lowervp;
268	struct vnode *upperdvp, *lowerdvp;
269	struct vnode *dvp = ap->a_dvp;
270	struct union_node *dun = VTOUNION(dvp);
271	struct componentname *cnp = ap->a_cnp;
272	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
273	kauth_cred_t saved_cred = NULL;
274	int iswhiteout;
275	struct vattr va;
276
277#ifdef notyet
278	if (cnp->cn_namelen == 3 &&
279			cnp->cn_nameptr[2] == '.' &&
280			cnp->cn_nameptr[1] == '.' &&
281			cnp->cn_nameptr[0] == '.') {
282		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
283		if (dvp == NULLVP)
284			return (ENOENT);
285		vref(dvp);
286		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
287		return (0);
288	}
289#endif
290
291	if ((cnp->cn_flags & ISLASTCN) &&
292	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
293	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
294		return (EROFS);
295
296start:
297	upperdvp = dun->un_uppervp;
298	lowerdvp = dun->un_lowervp;
299	uppervp = NULLVP;
300	lowervp = NULLVP;
301	iswhiteout = 0;
302
303	/*
304	 * do the lookup in the upper level.
305	 * if that level comsumes additional pathnames,
306	 * then assume that something special is going
307	 * on and just return that vnode.
308	 */
309	if (upperdvp != NULLVP) {
310		uerror = union_lookup1(um->um_uppervp, &upperdvp,
311					&uppervp, cnp);
312		if (cnp->cn_consume != 0) {
313			*ap->a_vpp = uppervp;
314			return (uerror);
315		}
316		if (uerror == ENOENT || uerror == EJUSTRETURN) {
317			if (cnp->cn_flags & ISWHITEOUT) {
318				iswhiteout = 1;
319			} else if (lowerdvp != NULLVP) {
320				lerror = VOP_GETATTR(upperdvp, &va,
321					cnp->cn_cred);
322				if (lerror == 0 && (va.va_flags & OPAQUE))
323					iswhiteout = 1;
324			}
325		}
326	} else {
327		uerror = ENOENT;
328	}
329
330	/*
331	 * in a similar way to the upper layer, do the lookup
332	 * in the lower layer.   this time, if there is some
333	 * component magic going on, then vput whatever we got
334	 * back from the upper layer and return the lower vnode
335	 * instead.
336	 */
337	if (lowerdvp != NULLVP && !iswhiteout) {
338		int nameiop;
339
340		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
341
342		/*
343		 * Only do a LOOKUP on the bottom node, since
344		 * we won't be making changes to it anyway.
345		 */
346		nameiop = cnp->cn_nameiop;
347		cnp->cn_nameiop = LOOKUP;
348		if (um->um_op == UNMNT_BELOW) {
349			saved_cred = cnp->cn_cred;
350			cnp->cn_cred = um->um_cred;
351		}
352
353		/*
354		 * we shouldn't have to worry about locking interactions
355		 * between the lower layer and our union layer (w.r.t.
356		 * `..' processing) because we don't futz with lowervp
357		 * locks in the union-node instantiation code path.
358		 */
359		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
360				&lowervp, cnp);
361		if (um->um_op == UNMNT_BELOW)
362			cnp->cn_cred = saved_cred;
363		cnp->cn_nameiop = nameiop;
364
365		if (lowervp != lowerdvp)
366			VOP_UNLOCK(lowerdvp);
367
368		if (cnp->cn_consume != 0) {
369			if (uppervp != NULLVP) {
370				if (uppervp == upperdvp)
371					vrele(uppervp);
372				else
373					vput(uppervp);
374				uppervp = NULLVP;
375			}
376			*ap->a_vpp = lowervp;
377			return (lerror);
378		}
379	} else {
380		lerror = ENOENT;
381		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
382			lowervp = LOWERVP(dun->un_pvp);
383			if (lowervp != NULLVP) {
384				vref(lowervp);
385				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
386				lerror = 0;
387			}
388		}
389	}
390
391	/*
392	 * EJUSTRETURN is used by underlying filesystems to indicate that
393	 * a directory modification op was started successfully.
394	 * This will only happen in the upper layer, since
395	 * the lower layer only does LOOKUPs.
396	 * If this union is mounted read-only, bounce it now.
397	 */
398
399	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
400	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
401	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
402		uerror = EROFS;
403
404	/*
405	 * at this point, we have uerror and lerror indicating
406	 * possible errors with the lookups in the upper and lower
407	 * layers.  additionally, uppervp and lowervp are (locked)
408	 * references to existing vnodes in the upper and lower layers.
409	 *
410	 * there are now three cases to consider.
411	 * 1. if both layers returned an error, then return whatever
412	 *    error the upper layer generated.
413	 *
414	 * 2. if the top layer failed and the bottom layer succeeded
415	 *    then two subcases occur.
416	 *    a.  the bottom vnode is not a directory, in which
417	 *	  case just return a new union vnode referencing
418	 *	  an empty top layer and the existing bottom layer.
419	 *    b.  the bottom vnode is a directory, in which case
420	 *	  create a new directory in the top-level and
421	 *	  continue as in case 3.
422	 *
423	 * 3. if the top layer succeeded then return a new union
424	 *    vnode referencing whatever the new top layer and
425	 *    whatever the bottom layer returned.
426	 */
427
428	*ap->a_vpp = NULLVP;
429
430
431	/* case 1. */
432	if ((uerror != 0) && (lerror != 0)) {
433		return (uerror);
434	}
435
436	/* case 2. */
437	if (uerror != 0 /* && (lerror == 0) */ ) {
438		if (lowervp->v_type == VDIR) { /* case 2b. */
439			/*
440			 * We may be racing another process to make the
441			 * upper-level shadow directory.  Be careful with
442			 * locks/etc!
443			 * If we have to create a shadow directory and want
444			 * to commit the node we have to restart the lookup
445			 * to get the componentname right.
446			 */
447			if (upperdvp) {
448				VOP_UNLOCK(upperdvp);
449				uerror = union_mkshadow(um, upperdvp, cnp,
450				    &uppervp);
451				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
452				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
453					vrele(uppervp);
454					if (lowervp != NULLVP)
455						vput(lowervp);
456					goto start;
457				}
458				/*
459				 * XXX: lock upper node until lookup returns
460				 * unlocked nodes.
461				 */
462				vn_lock(uppervp, LK_EXCLUSIVE | LK_RETRY);
463			}
464			if (uerror) {
465				if (lowervp != NULLVP) {
466					vput(lowervp);
467					lowervp = NULLVP;
468				}
469				return (uerror);
470			}
471		}
472	}
473
474	if (lowervp != NULLVP)
475		VOP_UNLOCK(lowervp);
476
477	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
478			      uppervp, lowervp, 1);
479
480	if (error) {
481		if (uppervp != NULLVP)
482			vput(uppervp);
483		if (lowervp != NULLVP)
484			vrele(lowervp);
485	}
486
487	return (error);
488}
489
490int
491union_create(void *v)
492{
493	struct vop_create_v3_args /* {
494		struct vnode *a_dvp;
495		struct vnode **a_vpp;
496		struct componentname *a_cnp;
497		struct vattr *a_vap;
498	} */ *ap = v;
499	struct union_node *un = VTOUNION(ap->a_dvp);
500	struct vnode *dvp = un->un_uppervp;
501	struct componentname *cnp = ap->a_cnp;
502
503	if (dvp != NULLVP) {
504		int error;
505		struct vnode *vp;
506		struct mount *mp;
507
508		mp = ap->a_dvp->v_mount;
509		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
510		if (error)
511			return (error);
512
513		/* XXX: lock upper node until lookup returns unlocked nodes. */
514		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
515		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
516				NULLVP, 1);
517		VOP_UNLOCK(vp);
518		if (error)
519			vrele(vp);
520		return (error);
521	}
522
523	return (EROFS);
524}
525
526int
527union_whiteout(void *v)
528{
529	struct vop_whiteout_args /* {
530		struct vnode *a_dvp;
531		struct componentname *a_cnp;
532		int a_flags;
533	} */ *ap = v;
534	struct union_node *un = VTOUNION(ap->a_dvp);
535	struct componentname *cnp = ap->a_cnp;
536
537	if (un->un_uppervp == NULLVP)
538		return (EOPNOTSUPP);
539
540	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
541}
542
543int
544union_mknod(void *v)
545{
546	struct vop_mknod_v3_args /* {
547		struct vnode *a_dvp;
548		struct vnode **a_vpp;
549		struct componentname *a_cnp;
550		struct vattr *a_vap;
551	} */ *ap = v;
552	struct union_node *un = VTOUNION(ap->a_dvp);
553	struct vnode *dvp = un->un_uppervp;
554	struct componentname *cnp = ap->a_cnp;
555
556	if (dvp != NULLVP) {
557		int error;
558		struct vnode *vp;
559		struct mount *mp;
560
561		mp = ap->a_dvp->v_mount;
562		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
563		if (error)
564			return (error);
565
566		/* XXX: lock upper node until lookup returns unlocked nodes. */
567		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
568		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
569				      cnp, vp, NULLVP, 1);
570		VOP_UNLOCK(vp);
571		if (error)
572			vrele(vp);
573		return (error);
574	}
575
576	return (EROFS);
577}
578
579int
580union_open(void *v)
581{
582	struct vop_open_args /* {
583		struct vnodeop_desc *a_desc;
584		struct vnode *a_vp;
585		int a_mode;
586		kauth_cred_t a_cred;
587	} */ *ap = v;
588	struct union_node *un = VTOUNION(ap->a_vp);
589	struct vnode *tvp;
590	int mode = ap->a_mode;
591	kauth_cred_t cred = ap->a_cred;
592	struct lwp *l = curlwp;
593	int error;
594
595	/*
596	 * If there is an existing upper vp then simply open that.
597	 */
598	tvp = un->un_uppervp;
599	if (tvp == NULLVP) {
600		/*
601		 * If the lower vnode is being opened for writing, then
602		 * copy the file contents to the upper vnode and open that,
603		 * otherwise can simply open the lower vnode.
604		 */
605		tvp = un->un_lowervp;
606		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
607			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
608			if (error == 0)
609				error = VOP_OPEN(un->un_uppervp, mode, cred);
610			return (error);
611		}
612
613		/*
614		 * Just open the lower vnode, but check for nodev mount flag
615		 */
616		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
617		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
618			return ENXIO;
619		un->un_openl++;
620		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
621		error = VOP_OPEN(tvp, mode, cred);
622		VOP_UNLOCK(tvp);
623
624		return (error);
625	}
626	/*
627	 * Just open the upper vnode, checking for nodev mount flag first
628	 */
629	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
630	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
631		return ENXIO;
632
633	error = VOP_OPEN(tvp, mode, cred);
634
635	return (error);
636}
637
638int
639union_close(void *v)
640{
641	struct vop_close_args /* {
642		struct vnode *a_vp;
643		int  a_fflag;
644		kauth_cred_t a_cred;
645	} */ *ap = v;
646	struct union_node *un = VTOUNION(ap->a_vp);
647	struct vnode *vp;
648	int error;
649	bool do_lock;
650
651	vp = un->un_uppervp;
652	if (vp != NULLVP) {
653		do_lock = false;
654	} else {
655		KASSERT(un->un_openl > 0);
656		--un->un_openl;
657		vp = un->un_lowervp;
658		do_lock = true;
659	}
660
661	KASSERT(vp != NULLVP);
662	ap->a_vp = vp;
663	if (do_lock)
664		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
665	error = VCALL(vp, VOFFSET(vop_close), ap);
666	if (do_lock)
667		VOP_UNLOCK(vp);
668
669	return error;
670}
671
672/*
673 * Check access permission on the union vnode.
674 * The access check being enforced is to check
675 * against both the underlying vnode, and any
676 * copied vnode.  This ensures that no additional
677 * file permissions are given away simply because
678 * the user caused an implicit file copy.
679 */
680int
681union_access(void *v)
682{
683	struct vop_access_args /* {
684		struct vnodeop_desc *a_desc;
685		struct vnode *a_vp;
686		int a_mode;
687		kauth_cred_t a_cred;
688	} */ *ap = v;
689	struct vnode *vp = ap->a_vp;
690	struct union_node *un = VTOUNION(vp);
691	int error = EACCES;
692	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
693
694	/*
695	 * Disallow write attempts on read-only file systems;
696	 * unless the file is a socket, fifo, or a block or
697	 * character device resident on the file system.
698	 */
699	if (ap->a_mode & VWRITE) {
700		switch (vp->v_type) {
701		case VDIR:
702		case VLNK:
703		case VREG:
704			if (vp->v_mount->mnt_flag & MNT_RDONLY)
705				return (EROFS);
706			break;
707		case VBAD:
708		case VBLK:
709		case VCHR:
710		case VSOCK:
711		case VFIFO:
712		case VNON:
713		default:
714			break;
715		}
716	}
717
718
719	if ((vp = un->un_uppervp) != NULLVP) {
720		ap->a_vp = vp;
721		return (VCALL(vp, VOFFSET(vop_access), ap));
722	}
723
724	if ((vp = un->un_lowervp) != NULLVP) {
725		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
726		ap->a_vp = vp;
727		error = VCALL(vp, VOFFSET(vop_access), ap);
728		if (error == 0) {
729			if (um->um_op == UNMNT_BELOW) {
730				ap->a_cred = um->um_cred;
731				error = VCALL(vp, VOFFSET(vop_access), ap);
732			}
733		}
734		VOP_UNLOCK(vp);
735		if (error)
736			return (error);
737	}
738
739	return (error);
740}
741
742/*
743 * We handle getattr only to change the fsid and
744 * track object sizes
745 */
746int
747union_getattr(void *v)
748{
749	struct vop_getattr_args /* {
750		struct vnode *a_vp;
751		struct vattr *a_vap;
752		kauth_cred_t a_cred;
753	} */ *ap = v;
754	int error;
755	struct union_node *un = VTOUNION(ap->a_vp);
756	struct vnode *vp = un->un_uppervp;
757	struct vattr *vap;
758	struct vattr va;
759
760
761	/*
762	 * Some programs walk the filesystem hierarchy by counting
763	 * links to directories to avoid stat'ing all the time.
764	 * This means the link count on directories needs to be "correct".
765	 * The only way to do that is to call getattr on both layers
766	 * and fix up the link count.  The link count will not necessarily
767	 * be accurate but will be large enough to defeat the tree walkers.
768	 *
769	 * To make life more interesting, some filesystems don't keep
770	 * track of link counts in the expected way, and return a
771	 * link count of `1' for those directories; if either of the
772	 * component directories returns a link count of `1', we return a 1.
773	 */
774
775	vap = ap->a_vap;
776
777	vp = un->un_uppervp;
778	if (vp != NULLVP) {
779		error = VOP_GETATTR(vp, vap, ap->a_cred);
780		if (error)
781			return (error);
782		mutex_enter(&un->un_lock);
783		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
784	}
785
786	if (vp == NULLVP) {
787		vp = un->un_lowervp;
788	} else if (vp->v_type == VDIR) {
789		vp = un->un_lowervp;
790		if (vp != NULLVP)
791			vap = &va;
792	} else {
793		vp = NULLVP;
794	}
795
796	if (vp != NULLVP) {
797		if (vp == un->un_lowervp)
798			vn_lock(vp, LK_SHARED | LK_RETRY);
799		error = VOP_GETATTR(vp, vap, ap->a_cred);
800		if (vp == un->un_lowervp)
801			VOP_UNLOCK(vp);
802		if (error)
803			return (error);
804		mutex_enter(&un->un_lock);
805		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
806	}
807
808	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
809		/*
810		 * Link count manipulation:
811		 *	- If both return "2", return 2 (no subdirs)
812		 *	- If one or the other return "1", return "1" (ENOCLUE)
813		 */
814		if ((ap->a_vap->va_nlink == 2) &&
815		    (vap->va_nlink == 2))
816			;
817		else if (ap->a_vap->va_nlink != 1) {
818			if (vap->va_nlink == 1)
819				ap->a_vap->va_nlink = 1;
820			else
821				ap->a_vap->va_nlink += vap->va_nlink;
822		}
823	}
824	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
825	return (0);
826}
827
828int
829union_setattr(void *v)
830{
831	struct vop_setattr_args /* {
832		struct vnode *a_vp;
833		struct vattr *a_vap;
834		kauth_cred_t a_cred;
835	} */ *ap = v;
836	struct vattr *vap = ap->a_vap;
837	struct vnode *vp = ap->a_vp;
838	struct union_node *un = VTOUNION(vp);
839	bool size_only;		/* All but va_size are VNOVAL. */
840	int error;
841
842	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
843	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
844	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
845
846	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
847		return (EROFS);
848	if (vap->va_size != VNOVAL) {
849 		switch (vp->v_type) {
850 		case VDIR:
851 			return (EISDIR);
852 		case VCHR:
853 		case VBLK:
854 		case VSOCK:
855 		case VFIFO:
856			break;
857		case VREG:
858		case VLNK:
859 		default:
860			/*
861			 * Disallow write attempts if the filesystem is
862			 * mounted read-only.
863			 */
864			if (vp->v_mount->mnt_flag & MNT_RDONLY)
865				return (EROFS);
866		}
867	}
868
869	/*
870	 * Handle case of truncating lower object to zero size,
871	 * by creating a zero length upper object.  This is to
872	 * handle the case of open with O_TRUNC and O_CREAT.
873	 */
874	if ((un->un_uppervp == NULLVP) &&
875	    /* assert(un->un_lowervp != NULLVP) */
876	    (un->un_lowervp->v_type == VREG)) {
877		error = union_copyup(un, (vap->va_size != 0),
878						ap->a_cred, curlwp);
879		if (error)
880			return (error);
881	}
882
883	/*
884	 * Try to set attributes in upper layer, ignore size change to zero
885	 * for devices to handle O_TRUNC and return read-only filesystem error
886	 * otherwise.
887	 */
888	if (un->un_uppervp != NULLVP) {
889		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
890		if ((error == 0) && (vap->va_size != VNOVAL)) {
891			mutex_enter(&un->un_lock);
892			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
893		}
894	} else {
895		KASSERT(un->un_lowervp != NULLVP);
896		if (NODE_IS_SPECIAL(un->un_lowervp)) {
897			if (size_only &&
898			    (vap->va_size == 0 || vap->va_size == VNOVAL))
899				error = 0;
900			else
901				error = EROFS;
902		} else {
903			error = EROFS;
904		}
905	}
906
907	return (error);
908}
909
910int
911union_read(void *v)
912{
913	struct vop_read_args /* {
914		struct vnode *a_vp;
915		struct uio *a_uio;
916		int  a_ioflag;
917		kauth_cred_t a_cred;
918	} */ *ap = v;
919	int error;
920	struct vnode *vp = OTHERVP(ap->a_vp);
921	int dolock = (vp == LOWERVP(ap->a_vp));
922
923	if (dolock)
924		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
925	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
926	if (dolock)
927		VOP_UNLOCK(vp);
928
929	/*
930	 * XXX
931	 * perhaps the size of the underlying object has changed under
932	 * our feet.  take advantage of the offset information present
933	 * in the uio structure.
934	 */
935	if (error == 0) {
936		struct union_node *un = VTOUNION(ap->a_vp);
937		off_t cur = ap->a_uio->uio_offset;
938		off_t usz = VNOVAL, lsz = VNOVAL;
939
940		mutex_enter(&un->un_lock);
941		if (vp == un->un_uppervp) {
942			if (cur > un->un_uppersz)
943				usz = cur;
944		} else {
945			if (cur > un->un_lowersz)
946				lsz = cur;
947		}
948
949		if (usz != VNOVAL || lsz != VNOVAL)
950			union_newsize(ap->a_vp, usz, lsz);
951		else
952			mutex_exit(&un->un_lock);
953	}
954
955	return (error);
956}
957
958int
959union_write(void *v)
960{
961	struct vop_read_args /* {
962		struct vnode *a_vp;
963		struct uio *a_uio;
964		int  a_ioflag;
965		kauth_cred_t a_cred;
966	} */ *ap = v;
967	int error;
968	struct vnode *vp;
969	struct union_node *un = VTOUNION(ap->a_vp);
970
971	vp = UPPERVP(ap->a_vp);
972	if (vp == NULLVP) {
973		vp = LOWERVP(ap->a_vp);
974		if (NODE_IS_SPECIAL(vp)) {
975			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
976			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
977			    ap->a_cred);
978			VOP_UNLOCK(vp);
979			return error;
980		}
981		panic("union: missing upper layer in write");
982	}
983
984	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
985
986	/*
987	 * the size of the underlying object may be changed by the
988	 * write.
989	 */
990	if (error == 0) {
991		off_t cur = ap->a_uio->uio_offset;
992
993		mutex_enter(&un->un_lock);
994		if (cur > un->un_uppersz)
995			union_newsize(ap->a_vp, cur, VNOVAL);
996		else
997			mutex_exit(&un->un_lock);
998	}
999
1000	return (error);
1001}
1002
1003int
1004union_ioctl(void *v)
1005{
1006	struct vop_ioctl_args /* {
1007		struct vnode *a_vp;
1008		int  a_command;
1009		void *a_data;
1010		int  a_fflag;
1011		kauth_cred_t a_cred;
1012	} */ *ap = v;
1013	struct vnode *ovp = OTHERVP(ap->a_vp);
1014
1015	ap->a_vp = ovp;
1016	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1017}
1018
1019int
1020union_poll(void *v)
1021{
1022	struct vop_poll_args /* {
1023		struct vnode *a_vp;
1024		int a_events;
1025	} */ *ap = v;
1026	struct vnode *ovp = OTHERVP(ap->a_vp);
1027
1028	ap->a_vp = ovp;
1029	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1030}
1031
1032int
1033union_revoke(void *v)
1034{
1035	struct vop_revoke_args /* {
1036		struct vnode *a_vp;
1037		int a_flags;
1038		struct proc *a_p;
1039	} */ *ap = v;
1040	struct vnode *vp = ap->a_vp;
1041
1042	if (UPPERVP(vp))
1043		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1044	if (LOWERVP(vp))
1045		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1046	vgone(vp);	/* XXXAD?? */
1047	return (0);
1048}
1049
1050int
1051union_mmap(void *v)
1052{
1053	struct vop_mmap_args /* {
1054		struct vnode *a_vp;
1055		vm_prot_t a_prot;
1056		kauth_cred_t a_cred;
1057	} */ *ap = v;
1058	struct vnode *ovp = OTHERVP(ap->a_vp);
1059
1060	ap->a_vp = ovp;
1061	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1062}
1063
1064int
1065union_fsync(void *v)
1066{
1067	struct vop_fsync_args /* {
1068		struct vnode *a_vp;
1069		kauth_cred_t a_cred;
1070		int  a_flags;
1071		off_t offhi;
1072		off_t offlo;
1073	} */ *ap = v;
1074	int error = 0;
1075	struct vnode *targetvp;
1076
1077	/*
1078	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1079	 * bother syncing the underlying vnodes, since (a) they'll be
1080	 * fsync'ed when reclaimed and (b) we could deadlock if
1081	 * they're locked; otherwise, pass it through to the
1082	 * underlying layer.
1083	 */
1084	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1085		error = spec_fsync(v);
1086		if (error)
1087			return error;
1088	}
1089
1090	if (ap->a_flags & FSYNC_RECLAIM)
1091		return 0;
1092
1093	targetvp = OTHERVP(ap->a_vp);
1094	if (targetvp != NULLVP) {
1095		int dolock = (targetvp == LOWERVP(ap->a_vp));
1096
1097		if (dolock)
1098			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1099		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1100			    ap->a_offlo, ap->a_offhi);
1101		if (dolock)
1102			VOP_UNLOCK(targetvp);
1103	}
1104
1105	return (error);
1106}
1107
1108int
1109union_seek(void *v)
1110{
1111	struct vop_seek_args /* {
1112		struct vnode *a_vp;
1113		off_t  a_oldoff;
1114		off_t  a_newoff;
1115		kauth_cred_t a_cred;
1116	} */ *ap = v;
1117	struct vnode *ovp = OTHERVP(ap->a_vp);
1118
1119	ap->a_vp = ovp;
1120	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1121}
1122
1123int
1124union_remove(void *v)
1125{
1126	struct vop_remove_args /* {
1127		struct vnode *a_dvp;
1128		struct vnode *a_vp;
1129		struct componentname *a_cnp;
1130	} */ *ap = v;
1131	int error;
1132	struct union_node *dun = VTOUNION(ap->a_dvp);
1133	struct union_node *un = VTOUNION(ap->a_vp);
1134	struct componentname *cnp = ap->a_cnp;
1135
1136	if (dun->un_uppervp == NULLVP)
1137		panic("union remove: null upper vnode");
1138
1139	if (un->un_uppervp != NULLVP) {
1140		struct vnode *dvp = dun->un_uppervp;
1141		struct vnode *vp = un->un_uppervp;
1142
1143		vref(dvp);
1144		dun->un_flags |= UN_KLOCK;
1145		vput(ap->a_dvp);
1146		vref(vp);
1147		un->un_flags |= UN_KLOCK;
1148		vput(ap->a_vp);
1149
1150		if (union_dowhiteout(un, cnp->cn_cred))
1151			cnp->cn_flags |= DOWHITEOUT;
1152		error = VOP_REMOVE(dvp, vp, cnp);
1153		if (!error)
1154			union_removed_upper(un);
1155	} else {
1156		error = union_mkwhiteout(
1157			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1158			dun->un_uppervp, ap->a_cnp, un);
1159		vput(ap->a_dvp);
1160		vput(ap->a_vp);
1161	}
1162
1163	return (error);
1164}
1165
1166int
1167union_link(void *v)
1168{
1169	struct vop_link_args /* {
1170		struct vnode *a_dvp;
1171		struct vnode *a_vp;
1172		struct componentname *a_cnp;
1173	} */ *ap = v;
1174	int error = 0;
1175	struct componentname *cnp = ap->a_cnp;
1176	struct union_node *dun;
1177	struct vnode *vp;
1178	struct vnode *dvp;
1179
1180	dun = VTOUNION(ap->a_dvp);
1181
1182	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1183
1184	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1185		vp = ap->a_vp;
1186	} else {
1187		struct union_node *un = VTOUNION(ap->a_vp);
1188		if (un->un_uppervp == NULLVP) {
1189			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1190
1191			/*
1192			 * Needs to be copied before we can link it.
1193			 */
1194			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1195			if (droplock)
1196				VOP_UNLOCK(dun->un_uppervp);
1197			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1198			if (droplock) {
1199				vn_lock(dun->un_uppervp,
1200				    LK_EXCLUSIVE | LK_RETRY);
1201				/*
1202				 * During copyup, we dropped the lock on the
1203				 * dir and invalidated any saved namei lookup
1204				 * state for the directory we'll be entering
1205				 * the link in.  We need to re-run the lookup
1206				 * in that directory to reset any state needed
1207				 * for VOP_LINK.
1208				 * Call relookup on the union-layer to reset
1209				 * the state.
1210				 */
1211				vp  = NULLVP;
1212				if (dun->un_uppervp == NULLVP)
1213					 panic("union: null upperdvp?");
1214				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1215				if (error) {
1216					VOP_UNLOCK(ap->a_vp);
1217					return EROFS;	/* ? */
1218				}
1219				if (vp != NULLVP) {
1220					/*
1221					 * The name we want to create has
1222					 * mysteriously appeared (a race?)
1223					 */
1224					error = EEXIST;
1225					VOP_UNLOCK(ap->a_vp);
1226					vput(ap->a_dvp);
1227					vput(vp);
1228					return (error);
1229				}
1230			}
1231			VOP_UNLOCK(ap->a_vp);
1232		}
1233		vp = un->un_uppervp;
1234	}
1235
1236	dvp = dun->un_uppervp;
1237	if (dvp == NULLVP)
1238		error = EROFS;
1239
1240	if (error) {
1241		vput(ap->a_dvp);
1242		return (error);
1243	}
1244
1245	vref(dvp);
1246	dun->un_flags |= UN_KLOCK;
1247	vput(ap->a_dvp);
1248
1249	return (VOP_LINK(dvp, vp, cnp));
1250}
1251
1252int
1253union_rename(void *v)
1254{
1255	struct vop_rename_args  /* {
1256		struct vnode *a_fdvp;
1257		struct vnode *a_fvp;
1258		struct componentname *a_fcnp;
1259		struct vnode *a_tdvp;
1260		struct vnode *a_tvp;
1261		struct componentname *a_tcnp;
1262	} */ *ap = v;
1263	int error;
1264
1265	struct vnode *fdvp = ap->a_fdvp;
1266	struct vnode *fvp = ap->a_fvp;
1267	struct vnode *tdvp = ap->a_tdvp;
1268	struct vnode *tvp = ap->a_tvp;
1269
1270	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1271		struct union_node *un = VTOUNION(fdvp);
1272		if (un->un_uppervp == NULLVP) {
1273			/*
1274			 * this should never happen in normal
1275			 * operation but might if there was
1276			 * a problem creating the top-level shadow
1277			 * directory.
1278			 */
1279			error = EXDEV;
1280			goto bad;
1281		}
1282
1283		fdvp = un->un_uppervp;
1284		vref(fdvp);
1285	}
1286
1287	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1288		struct union_node *un = VTOUNION(fvp);
1289		if (un->un_uppervp == NULLVP) {
1290			/* XXX: should do a copyup */
1291			error = EXDEV;
1292			goto bad;
1293		}
1294
1295		if (un->un_lowervp != NULLVP)
1296			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1297
1298		fvp = un->un_uppervp;
1299		vref(fvp);
1300	}
1301
1302	if (tdvp->v_op == union_vnodeop_p) {
1303		struct union_node *un = VTOUNION(tdvp);
1304		if (un->un_uppervp == NULLVP) {
1305			/*
1306			 * this should never happen in normal
1307			 * operation but might if there was
1308			 * a problem creating the top-level shadow
1309			 * directory.
1310			 */
1311			error = EXDEV;
1312			goto bad;
1313		}
1314
1315		tdvp = un->un_uppervp;
1316		vref(tdvp);
1317		un->un_flags |= UN_KLOCK;
1318		vput(ap->a_tdvp);
1319	}
1320
1321	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1322		struct union_node *un = VTOUNION(tvp);
1323
1324		tvp = un->un_uppervp;
1325		if (tvp != NULLVP) {
1326			vref(tvp);
1327			un->un_flags |= UN_KLOCK;
1328		}
1329		vput(ap->a_tvp);
1330	}
1331
1332	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1333	goto out;
1334
1335bad:
1336	vput(tdvp);
1337	if (tvp != NULLVP)
1338		vput(tvp);
1339	vrele(fdvp);
1340	vrele(fvp);
1341
1342out:
1343	if (fdvp != ap->a_fdvp) {
1344		vrele(ap->a_fdvp);
1345	}
1346	if (fvp != ap->a_fvp) {
1347		vrele(ap->a_fvp);
1348	}
1349	return (error);
1350}
1351
1352int
1353union_mkdir(void *v)
1354{
1355	struct vop_mkdir_v3_args /* {
1356		struct vnode *a_dvp;
1357		struct vnode **a_vpp;
1358		struct componentname *a_cnp;
1359		struct vattr *a_vap;
1360	} */ *ap = v;
1361	struct union_node *un = VTOUNION(ap->a_dvp);
1362	struct vnode *dvp = un->un_uppervp;
1363	struct componentname *cnp = ap->a_cnp;
1364
1365	if (dvp != NULLVP) {
1366		int error;
1367		struct vnode *vp;
1368
1369		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1370		if (error) {
1371			vrele(ap->a_dvp);
1372			return (error);
1373		}
1374
1375		/* XXX: lock upper node until lookup returns unlocked nodes. */
1376		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1377		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1378				NULLVP, cnp, vp, NULLVP, 1);
1379		VOP_UNLOCK(vp);
1380		if (error)
1381			vrele(vp);
1382		return (error);
1383	}
1384
1385	return (EROFS);
1386}
1387
1388int
1389union_rmdir(void *v)
1390{
1391	struct vop_rmdir_args /* {
1392		struct vnode *a_dvp;
1393		struct vnode *a_vp;
1394		struct componentname *a_cnp;
1395	} */ *ap = v;
1396	int error;
1397	struct union_node *dun = VTOUNION(ap->a_dvp);
1398	struct union_node *un = VTOUNION(ap->a_vp);
1399	struct componentname *cnp = ap->a_cnp;
1400
1401	if (dun->un_uppervp == NULLVP)
1402		panic("union rmdir: null upper vnode");
1403
1404	error = union_check_rmdir(un, cnp->cn_cred);
1405	if (error) {
1406		vput(ap->a_dvp);
1407		vput(ap->a_vp);
1408		return error;
1409	}
1410
1411	if (un->un_uppervp != NULLVP) {
1412		struct vnode *dvp = dun->un_uppervp;
1413		struct vnode *vp = un->un_uppervp;
1414
1415		vref(dvp);
1416		dun->un_flags |= UN_KLOCK;
1417		vput(ap->a_dvp);
1418		vref(vp);
1419		un->un_flags |= UN_KLOCK;
1420		vput(ap->a_vp);
1421
1422		if (union_dowhiteout(un, cnp->cn_cred))
1423			cnp->cn_flags |= DOWHITEOUT;
1424		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1425		if (!error)
1426			union_removed_upper(un);
1427	} else {
1428		error = union_mkwhiteout(
1429			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1430			dun->un_uppervp, ap->a_cnp, un);
1431		vput(ap->a_dvp);
1432		vput(ap->a_vp);
1433	}
1434
1435	return (error);
1436}
1437
1438int
1439union_symlink(void *v)
1440{
1441	struct vop_symlink_v3_args /* {
1442		struct vnode *a_dvp;
1443		struct vnode **a_vpp;
1444		struct componentname *a_cnp;
1445		struct vattr *a_vap;
1446		char *a_target;
1447	} */ *ap = v;
1448	struct union_node *un = VTOUNION(ap->a_dvp);
1449	struct vnode *dvp = un->un_uppervp;
1450	struct componentname *cnp = ap->a_cnp;
1451
1452	if (dvp != NULLVP) {
1453		int error;
1454
1455		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1456				    ap->a_target);
1457		return (error);
1458	}
1459
1460	return (EROFS);
1461}
1462
1463/*
1464 * union_readdir works in concert with getdirentries and
1465 * readdir(3) to provide a list of entries in the unioned
1466 * directories.  getdirentries is responsible for walking
1467 * down the union stack.  readdir(3) is responsible for
1468 * eliminating duplicate names from the returned data stream.
1469 */
1470int
1471union_readdir(void *v)
1472{
1473	struct vop_readdir_args /* {
1474		struct vnodeop_desc *a_desc;
1475		struct vnode *a_vp;
1476		struct uio *a_uio;
1477		kauth_cred_t a_cred;
1478		int *a_eofflag;
1479		u_long *a_cookies;
1480		int a_ncookies;
1481	} */ *ap = v;
1482	struct union_node *un = VTOUNION(ap->a_vp);
1483	struct vnode *uvp = un->un_uppervp;
1484
1485	if (uvp == NULLVP)
1486		return (0);
1487
1488	ap->a_vp = uvp;
1489	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1490}
1491
1492int
1493union_readlink(void *v)
1494{
1495	struct vop_readlink_args /* {
1496		struct vnode *a_vp;
1497		struct uio *a_uio;
1498		kauth_cred_t a_cred;
1499	} */ *ap = v;
1500	int error;
1501	struct vnode *vp = OTHERVP(ap->a_vp);
1502	int dolock = (vp == LOWERVP(ap->a_vp));
1503
1504	if (dolock)
1505		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1506	ap->a_vp = vp;
1507	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1508	if (dolock)
1509		VOP_UNLOCK(vp);
1510
1511	return (error);
1512}
1513
1514int
1515union_abortop(void *v)
1516{
1517	struct vop_abortop_args /* {
1518		struct vnode *a_dvp;
1519		struct componentname *a_cnp;
1520	} */ *ap = v;
1521
1522	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1523
1524	ap->a_dvp = UPPERVP(ap->a_dvp);
1525	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1526}
1527
1528int
1529union_inactive(void *v)
1530{
1531	struct vop_inactive_args /* {
1532		const struct vnodeop_desc *a_desc;
1533		struct vnode *a_vp;
1534		bool *a_recycle;
1535	} */ *ap = v;
1536	struct vnode *vp = ap->a_vp;
1537	struct union_node *un = VTOUNION(vp);
1538	struct vnode **vpp;
1539
1540	/*
1541	 * Do nothing (and _don't_ bypass).
1542	 * Wait to vrele lowervp until reclaim,
1543	 * so that until then our union_node is in the
1544	 * cache and reusable.
1545	 *
1546	 * NEEDSWORK: Someday, consider inactive'ing
1547	 * the lowervp and then trying to reactivate it
1548	 * with capabilities (v_id)
1549	 * like they do in the name lookup cache code.
1550	 * That's too much work for now.
1551	 */
1552
1553	if (un->un_dircache != 0) {
1554		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1555			vrele(*vpp);
1556		free(un->un_dircache, M_TEMP);
1557		un->un_dircache = 0;
1558	}
1559
1560	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1561	VOP_UNLOCK(vp);
1562
1563	return (0);
1564}
1565
1566int
1567union_reclaim(void *v)
1568{
1569	struct vop_reclaim_args /* {
1570		struct vnode *a_vp;
1571	} */ *ap = v;
1572
1573	union_freevp(ap->a_vp);
1574
1575	return (0);
1576}
1577
1578int
1579union_lock(void *v)
1580{
1581	struct vop_lock_args /* {
1582		struct vnode *a_vp;
1583		int a_flags;
1584	} */ *ap = v;
1585	struct vnode *vp;
1586	struct union_node *un;
1587	int error;
1588
1589	un = VTOUNION(ap->a_vp);
1590	mutex_enter(&un->un_lock);
1591	for (;;) {
1592		vp = LOCKVP(ap->a_vp);
1593		mutex_exit(&un->un_lock);
1594		if (vp == ap->a_vp)
1595			error = genfs_lock(ap);
1596		else
1597			error = VOP_LOCK(vp, ap->a_flags);
1598		if (error != 0)
1599			return error;
1600		mutex_enter(&un->un_lock);
1601		if (vp == LOCKVP(ap->a_vp))
1602			break;
1603		if (vp == ap->a_vp)
1604			genfs_unlock(ap);
1605		else
1606			VOP_UNLOCK(vp);
1607	}
1608	KASSERT((un->un_flags & UN_KLOCK) == 0);
1609	mutex_exit(&un->un_lock);
1610
1611	return error;
1612}
1613
1614/*
1615 * When operations want to vput() a union node yet retain a lock on
1616 * the upper vnode (say, to do some further operations like link(),
1617 * mkdir(), ...), they set UN_KLOCK on the union node, then call
1618 * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
1619 * unlocks the union node (leaving the upper vnode alone), clears the
1620 * KLOCK flag, and then returns to vput().  The caller then does whatever
1621 * is left to do with the upper vnode, and ensures that it gets unlocked.
1622 *
1623 * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
1624 */
1625int
1626union_unlock(void *v)
1627{
1628	struct vop_unlock_args /* {
1629		struct vnode *a_vp;
1630		int a_flags;
1631	} */ *ap = v;
1632	struct vnode *vp;
1633	struct union_node *un;
1634
1635	un = VTOUNION(ap->a_vp);
1636	vp = LOCKVP(ap->a_vp);
1637	if ((un->un_flags & UN_KLOCK) == UN_KLOCK) {
1638		KASSERT(vp != ap->a_vp);
1639		un->un_flags &= ~UN_KLOCK;
1640		return 0;
1641	}
1642	if (vp == ap->a_vp)
1643		genfs_unlock(ap);
1644	else
1645		VOP_UNLOCK(vp);
1646
1647	return 0;
1648}
1649
1650int
1651union_bmap(void *v)
1652{
1653	struct vop_bmap_args /* {
1654		struct vnode *a_vp;
1655		daddr_t  a_bn;
1656		struct vnode **a_vpp;
1657		daddr_t *a_bnp;
1658		int *a_runp;
1659	} */ *ap = v;
1660	int error;
1661	struct vnode *vp = OTHERVP(ap->a_vp);
1662	int dolock = (vp == LOWERVP(ap->a_vp));
1663
1664	if (dolock)
1665		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1666	ap->a_vp = vp;
1667	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1668	if (dolock)
1669		VOP_UNLOCK(vp);
1670
1671	return (error);
1672}
1673
1674int
1675union_print(void *v)
1676{
1677	struct vop_print_args /* {
1678		struct vnode *a_vp;
1679	} */ *ap = v;
1680	struct vnode *vp = ap->a_vp;
1681
1682	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1683			vp, UPPERVP(vp), LOWERVP(vp));
1684	if (UPPERVP(vp) != NULLVP)
1685		vprint("union: upper", UPPERVP(vp));
1686	if (LOWERVP(vp) != NULLVP)
1687		vprint("union: lower", LOWERVP(vp));
1688	if (VTOUNION(vp)->un_dircache) {
1689		struct vnode **vpp;
1690		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1691			vprint("dircache:", *vpp);
1692	}
1693
1694	return (0);
1695}
1696
1697int
1698union_islocked(void *v)
1699{
1700	struct vop_islocked_args /* {
1701		struct vnode *a_vp;
1702	} */ *ap = v;
1703	struct vnode *vp;
1704	struct union_node *un;
1705
1706	un = VTOUNION(ap->a_vp);
1707	mutex_enter(&un->un_lock);
1708	vp = LOCKVP(ap->a_vp);
1709	mutex_exit(&un->un_lock);
1710
1711	if (vp == ap->a_vp)
1712		return genfs_islocked(ap);
1713	else
1714		return VOP_ISLOCKED(vp);
1715}
1716
1717int
1718union_pathconf(void *v)
1719{
1720	struct vop_pathconf_args /* {
1721		struct vnode *a_vp;
1722		int a_name;
1723		int *a_retval;
1724	} */ *ap = v;
1725	int error;
1726	struct vnode *vp = OTHERVP(ap->a_vp);
1727	int dolock = (vp == LOWERVP(ap->a_vp));
1728
1729	if (dolock)
1730		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1731	ap->a_vp = vp;
1732	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1733	if (dolock)
1734		VOP_UNLOCK(vp);
1735
1736	return (error);
1737}
1738
1739int
1740union_advlock(void *v)
1741{
1742	struct vop_advlock_args /* {
1743		struct vnode *a_vp;
1744		void *a_id;
1745		int  a_op;
1746		struct flock *a_fl;
1747		int  a_flags;
1748	} */ *ap = v;
1749	struct vnode *ovp = OTHERVP(ap->a_vp);
1750
1751	ap->a_vp = ovp;
1752	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1753}
1754
1755int
1756union_strategy(void *v)
1757{
1758	struct vop_strategy_args /* {
1759		struct vnode *a_vp;
1760		struct buf *a_bp;
1761	} */ *ap = v;
1762	struct vnode *ovp = OTHERVP(ap->a_vp);
1763	struct buf *bp = ap->a_bp;
1764
1765	KASSERT(ovp != NULLVP);
1766	if (!NODE_IS_SPECIAL(ovp))
1767		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1768
1769	return (VOP_STRATEGY(ovp, bp));
1770}
1771
1772int
1773union_bwrite(void *v)
1774{
1775	struct vop_bwrite_args /* {
1776		struct vnode *a_vp;
1777		struct buf *a_bp;
1778	} */ *ap = v;
1779	struct vnode *ovp = OTHERVP(ap->a_vp);
1780	struct buf *bp = ap->a_bp;
1781
1782	KASSERT(ovp != NULLVP);
1783	if (!NODE_IS_SPECIAL(ovp))
1784		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1785
1786	return (VOP_BWRITE(ovp, bp));
1787}
1788
1789int
1790union_getpages(void *v)
1791{
1792	struct vop_getpages_args /* {
1793		struct vnode *a_vp;
1794		voff_t a_offset;
1795		struct vm_page **a_m;
1796		int *a_count;
1797		int a_centeridx;
1798		vm_prot_t a_access_type;
1799		int a_advice;
1800		int a_flags;
1801	} */ *ap = v;
1802	struct vnode *vp = ap->a_vp;
1803
1804	KASSERT(mutex_owned(vp->v_interlock));
1805
1806	if (ap->a_flags & PGO_LOCKED) {
1807		return EBUSY;
1808	}
1809	ap->a_vp = OTHERVP(vp);
1810	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1811
1812	/* Just pass the request on to the underlying layer. */
1813	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1814}
1815
1816int
1817union_putpages(void *v)
1818{
1819	struct vop_putpages_args /* {
1820		struct vnode *a_vp;
1821		voff_t a_offlo;
1822		voff_t a_offhi;
1823		int a_flags;
1824	} */ *ap = v;
1825	struct vnode *vp = ap->a_vp;
1826
1827	KASSERT(mutex_owned(vp->v_interlock));
1828
1829	ap->a_vp = OTHERVP(vp);
1830	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1831
1832	if (ap->a_flags & PGO_RECLAIM) {
1833		mutex_exit(vp->v_interlock);
1834		return 0;
1835	}
1836
1837	/* Just pass the request on to the underlying layer. */
1838	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1839}
1840
1841int
1842union_kqfilter(void *v)
1843{
1844	struct vop_kqfilter_args /* {
1845		struct vnode	*a_vp;
1846		struct knote	*a_kn;
1847	} */ *ap = v;
1848	int error;
1849
1850	/*
1851	 * We watch either the upper layer file (if it already exists),
1852	 * or the lower layer one. If there is lower layer file only
1853	 * at this moment, we will keep watching that lower layer file
1854	 * even if upper layer file would be created later on.
1855	 */
1856	if (UPPERVP(ap->a_vp))
1857		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1858	else if (LOWERVP(ap->a_vp))
1859		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1860	else {
1861		/* panic? */
1862		error = EOPNOTSUPP;
1863	}
1864
1865	return (error);
1866}
1867