union_vnops.c revision 1.62
1/*	$NetBSD: union_vnops.c,v 1.62 2014/07/25 08:20:52 dholland Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.62 2014/07/25 08:20:52 dholland Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_lookup(void *);
97int union_create(void *);
98int union_whiteout(void *);
99int union_mknod(void *);
100int union_open(void *);
101int union_close(void *);
102int union_access(void *);
103int union_getattr(void *);
104int union_setattr(void *);
105int union_read(void *);
106int union_write(void *);
107int union_ioctl(void *);
108int union_poll(void *);
109int union_revoke(void *);
110int union_mmap(void *);
111int union_fsync(void *);
112int union_seek(void *);
113int union_remove(void *);
114int union_link(void *);
115int union_rename(void *);
116int union_mkdir(void *);
117int union_rmdir(void *);
118int union_symlink(void *);
119int union_readdir(void *);
120int union_readlink(void *);
121int union_abortop(void *);
122int union_inactive(void *);
123int union_reclaim(void *);
124int union_lock(void *);
125int union_unlock(void *);
126int union_bmap(void *);
127int union_print(void *);
128int union_islocked(void *);
129int union_pathconf(void *);
130int union_advlock(void *);
131int union_strategy(void *);
132int union_bwrite(void *);
133int union_getpages(void *);
134int union_putpages(void *);
135int union_kqfilter(void *);
136
137static int union_lookup1(struct vnode *, struct vnode **,
138			      struct vnode **, struct componentname *);
139
140
141/*
142 * Global vfs data structures
143 */
144int (**union_vnodeop_p)(void *);
145const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
146	{ &vop_default_desc, vn_default_error },
147	{ &vop_lookup_desc, union_lookup },		/* lookup */
148	{ &vop_create_desc, union_create },		/* create */
149	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
150	{ &vop_mknod_desc, union_mknod },		/* mknod */
151	{ &vop_open_desc, union_open },			/* open */
152	{ &vop_close_desc, union_close },		/* close */
153	{ &vop_access_desc, union_access },		/* access */
154	{ &vop_getattr_desc, union_getattr },		/* getattr */
155	{ &vop_setattr_desc, union_setattr },		/* setattr */
156	{ &vop_read_desc, union_read },			/* read */
157	{ &vop_write_desc, union_write },		/* write */
158	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
159	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
160	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
161	{ &vop_poll_desc, union_poll },			/* select */
162	{ &vop_revoke_desc, union_revoke },		/* revoke */
163	{ &vop_mmap_desc, union_mmap },			/* mmap */
164	{ &vop_fsync_desc, union_fsync },		/* fsync */
165	{ &vop_seek_desc, union_seek },			/* seek */
166	{ &vop_remove_desc, union_remove },		/* remove */
167	{ &vop_link_desc, union_link },			/* link */
168	{ &vop_rename_desc, union_rename },		/* rename */
169	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
170	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
171	{ &vop_symlink_desc, union_symlink },		/* symlink */
172	{ &vop_readdir_desc, union_readdir },		/* readdir */
173	{ &vop_readlink_desc, union_readlink },		/* readlink */
174	{ &vop_abortop_desc, union_abortop },		/* abortop */
175	{ &vop_inactive_desc, union_inactive },		/* inactive */
176	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
177	{ &vop_lock_desc, union_lock },			/* lock */
178	{ &vop_unlock_desc, union_unlock },		/* unlock */
179	{ &vop_bmap_desc, union_bmap },			/* bmap */
180	{ &vop_strategy_desc, union_strategy },		/* strategy */
181	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
182	{ &vop_print_desc, union_print },		/* print */
183	{ &vop_islocked_desc, union_islocked },		/* islocked */
184	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
185	{ &vop_advlock_desc, union_advlock },		/* advlock */
186	{ &vop_getpages_desc, union_getpages },		/* getpages */
187	{ &vop_putpages_desc, union_putpages },		/* putpages */
188	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
189	{ NULL, NULL }
190};
191const struct vnodeopv_desc union_vnodeop_opv_desc =
192	{ &union_vnodeop_p, union_vnodeop_entries };
193
194#define NODE_IS_SPECIAL(vp) \
195	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
196	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
197
198static int
199union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
200	struct componentname *cnp)
201{
202	int error;
203	struct vnode *tdvp;
204	struct vnode *dvp;
205	struct mount *mp;
206
207	dvp = *dvpp;
208
209	/*
210	 * If stepping up the directory tree, check for going
211	 * back across the mount point, in which case do what
212	 * lookup would do by stepping back down the mount
213	 * hierarchy.
214	 */
215	if (cnp->cn_flags & ISDOTDOT) {
216		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
217			/*
218			 * Don't do the NOCROSSMOUNT check
219			 * at this level.  By definition,
220			 * union fs deals with namespaces, not
221			 * filesystems.
222			 */
223			tdvp = dvp;
224			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
225			VOP_UNLOCK(tdvp);
226			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
227		}
228	}
229
230        error = VOP_LOOKUP(dvp, &tdvp, cnp);
231	if (error)
232		return (error);
233	if (dvp != tdvp) {
234		if (cnp->cn_flags & ISDOTDOT)
235			VOP_UNLOCK(dvp);
236		error = vn_lock(tdvp, LK_EXCLUSIVE);
237		if (cnp->cn_flags & ISDOTDOT)
238			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
239		if (error) {
240			vrele(tdvp);
241			return error;
242		}
243		dvp = tdvp;
244	}
245
246	/*
247	 * Lastly check if the current node is a mount point in
248	 * which case walk up the mount hierarchy making sure not to
249	 * bump into the root of the mount tree (ie. dvp != udvp).
250	 */
251	while (dvp != udvp && (dvp->v_type == VDIR) &&
252	       (mp = dvp->v_mountedhere)) {
253		if (vfs_busy(mp, NULL))
254			continue;
255		vput(dvp);
256		error = VFS_ROOT(mp, &tdvp);
257		vfs_unbusy(mp, false, NULL);
258		if (error) {
259			return (error);
260		}
261		dvp = tdvp;
262	}
263
264	*vpp = dvp;
265	return (0);
266}
267
268int
269union_lookup(void *v)
270{
271	struct vop_lookup_v2_args /* {
272		struct vnodeop_desc *a_desc;
273		struct vnode *a_dvp;
274		struct vnode **a_vpp;
275		struct componentname *a_cnp;
276	} */ *ap = v;
277	int error;
278	int uerror, lerror;
279	struct vnode *uppervp, *lowervp;
280	struct vnode *upperdvp, *lowerdvp;
281	struct vnode *dvp = ap->a_dvp;
282	struct union_node *dun = VTOUNION(dvp);
283	struct componentname *cnp = ap->a_cnp;
284	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
285	kauth_cred_t saved_cred = NULL;
286	int iswhiteout;
287	struct vattr va;
288
289#ifdef notyet
290	if (cnp->cn_namelen == 3 &&
291			cnp->cn_nameptr[2] == '.' &&
292			cnp->cn_nameptr[1] == '.' &&
293			cnp->cn_nameptr[0] == '.') {
294		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
295		if (dvp == NULLVP)
296			return (ENOENT);
297		vref(dvp);
298		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
299		return (0);
300	}
301#endif
302
303	if ((cnp->cn_flags & ISLASTCN) &&
304	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
305	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
306		return (EROFS);
307
308start:
309	upperdvp = dun->un_uppervp;
310	lowerdvp = dun->un_lowervp;
311	uppervp = NULLVP;
312	lowervp = NULLVP;
313	iswhiteout = 0;
314
315	/*
316	 * do the lookup in the upper level.
317	 * if that level comsumes additional pathnames,
318	 * then assume that something special is going
319	 * on and just return that vnode.
320	 */
321	if (upperdvp != NULLVP) {
322		uerror = union_lookup1(um->um_uppervp, &upperdvp,
323					&uppervp, cnp);
324		if (cnp->cn_consume != 0) {
325			if (uppervp != upperdvp)
326				VOP_UNLOCK(uppervp);
327			*ap->a_vpp = uppervp;
328			return (uerror);
329		}
330		if (uerror == ENOENT || uerror == EJUSTRETURN) {
331			if (cnp->cn_flags & ISWHITEOUT) {
332				iswhiteout = 1;
333			} else if (lowerdvp != NULLVP) {
334				lerror = VOP_GETATTR(upperdvp, &va,
335					cnp->cn_cred);
336				if (lerror == 0 && (va.va_flags & OPAQUE))
337					iswhiteout = 1;
338			}
339		}
340	} else {
341		uerror = ENOENT;
342	}
343
344	/*
345	 * in a similar way to the upper layer, do the lookup
346	 * in the lower layer.   this time, if there is some
347	 * component magic going on, then vput whatever we got
348	 * back from the upper layer and return the lower vnode
349	 * instead.
350	 */
351	if (lowerdvp != NULLVP && !iswhiteout) {
352		int nameiop;
353
354		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
355
356		/*
357		 * Only do a LOOKUP on the bottom node, since
358		 * we won't be making changes to it anyway.
359		 */
360		nameiop = cnp->cn_nameiop;
361		cnp->cn_nameiop = LOOKUP;
362		if (um->um_op == UNMNT_BELOW) {
363			saved_cred = cnp->cn_cred;
364			cnp->cn_cred = um->um_cred;
365		}
366
367		/*
368		 * we shouldn't have to worry about locking interactions
369		 * between the lower layer and our union layer (w.r.t.
370		 * `..' processing) because we don't futz with lowervp
371		 * locks in the union-node instantiation code path.
372		 */
373		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
374				&lowervp, cnp);
375		if (um->um_op == UNMNT_BELOW)
376			cnp->cn_cred = saved_cred;
377		cnp->cn_nameiop = nameiop;
378
379		if (lowervp != lowerdvp)
380			VOP_UNLOCK(lowerdvp);
381
382		if (cnp->cn_consume != 0) {
383			if (uppervp != NULLVP) {
384				if (uppervp == upperdvp)
385					vrele(uppervp);
386				else
387					vput(uppervp);
388				uppervp = NULLVP;
389			}
390			*ap->a_vpp = lowervp;
391			return (lerror);
392		}
393	} else {
394		lerror = ENOENT;
395		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
396			lowervp = LOWERVP(dun->un_pvp);
397			if (lowervp != NULLVP) {
398				vref(lowervp);
399				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
400				lerror = 0;
401			}
402		}
403	}
404
405	/*
406	 * EJUSTRETURN is used by underlying filesystems to indicate that
407	 * a directory modification op was started successfully.
408	 * This will only happen in the upper layer, since
409	 * the lower layer only does LOOKUPs.
410	 * If this union is mounted read-only, bounce it now.
411	 */
412
413	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
414	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
415	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
416		uerror = EROFS;
417
418	/*
419	 * at this point, we have uerror and lerror indicating
420	 * possible errors with the lookups in the upper and lower
421	 * layers.  additionally, uppervp and lowervp are (locked)
422	 * references to existing vnodes in the upper and lower layers.
423	 *
424	 * there are now three cases to consider.
425	 * 1. if both layers returned an error, then return whatever
426	 *    error the upper layer generated.
427	 *
428	 * 2. if the top layer failed and the bottom layer succeeded
429	 *    then two subcases occur.
430	 *    a.  the bottom vnode is not a directory, in which
431	 *	  case just return a new union vnode referencing
432	 *	  an empty top layer and the existing bottom layer.
433	 *    b.  the bottom vnode is a directory, in which case
434	 *	  create a new directory in the top-level and
435	 *	  continue as in case 3.
436	 *
437	 * 3. if the top layer succeeded then return a new union
438	 *    vnode referencing whatever the new top layer and
439	 *    whatever the bottom layer returned.
440	 */
441
442	*ap->a_vpp = NULLVP;
443
444
445	/* case 1. */
446	if ((uerror != 0) && (lerror != 0)) {
447		return (uerror);
448	}
449
450	/* case 2. */
451	if (uerror != 0 /* && (lerror == 0) */ ) {
452		if (lowervp->v_type == VDIR) { /* case 2b. */
453			/*
454			 * We may be racing another process to make the
455			 * upper-level shadow directory.  Be careful with
456			 * locks/etc!
457			 * If we have to create a shadow directory and want
458			 * to commit the node we have to restart the lookup
459			 * to get the componentname right.
460			 */
461			if (upperdvp) {
462				VOP_UNLOCK(upperdvp);
463				uerror = union_mkshadow(um, upperdvp, cnp,
464				    &uppervp);
465				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
466				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
467					vrele(uppervp);
468					if (lowervp != NULLVP)
469						vput(lowervp);
470					goto start;
471				}
472			}
473			if (uerror) {
474				if (lowervp != NULLVP) {
475					vput(lowervp);
476					lowervp = NULLVP;
477				}
478				return (uerror);
479			}
480		}
481	} else { /* uerror == 0 */
482		if (uppervp != upperdvp)
483			VOP_UNLOCK(uppervp);
484	}
485
486	if (lowervp != NULLVP)
487		VOP_UNLOCK(lowervp);
488
489	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
490			      uppervp, lowervp, 1);
491
492	if (error) {
493		if (uppervp != NULLVP)
494			vrele(uppervp);
495		if (lowervp != NULLVP)
496			vrele(lowervp);
497		return error;
498	}
499
500	return 0;
501}
502
503int
504union_create(void *v)
505{
506	struct vop_create_v3_args /* {
507		struct vnode *a_dvp;
508		struct vnode **a_vpp;
509		struct componentname *a_cnp;
510		struct vattr *a_vap;
511	} */ *ap = v;
512	struct union_node *un = VTOUNION(ap->a_dvp);
513	struct vnode *dvp = un->un_uppervp;
514	struct componentname *cnp = ap->a_cnp;
515
516	if (dvp != NULLVP) {
517		int error;
518		struct vnode *vp;
519		struct mount *mp;
520
521		mp = ap->a_dvp->v_mount;
522
523		vp = NULL;
524		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
525		if (error)
526			return (error);
527
528		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
529				NULLVP, 1);
530		if (error)
531			vrele(vp);
532		return (error);
533	}
534
535	return (EROFS);
536}
537
538int
539union_whiteout(void *v)
540{
541	struct vop_whiteout_args /* {
542		struct vnode *a_dvp;
543		struct componentname *a_cnp;
544		int a_flags;
545	} */ *ap = v;
546	struct union_node *un = VTOUNION(ap->a_dvp);
547	struct componentname *cnp = ap->a_cnp;
548
549	if (un->un_uppervp == NULLVP)
550		return (EOPNOTSUPP);
551
552	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
553}
554
555int
556union_mknod(void *v)
557{
558	struct vop_mknod_v3_args /* {
559		struct vnode *a_dvp;
560		struct vnode **a_vpp;
561		struct componentname *a_cnp;
562		struct vattr *a_vap;
563	} */ *ap = v;
564	struct union_node *un = VTOUNION(ap->a_dvp);
565	struct vnode *dvp = un->un_uppervp;
566	struct componentname *cnp = ap->a_cnp;
567
568	if (dvp != NULLVP) {
569		int error;
570		struct vnode *vp;
571		struct mount *mp;
572
573		mp = ap->a_dvp->v_mount;
574		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
575		if (error)
576			return (error);
577
578		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
579				      cnp, vp, NULLVP, 1);
580		if (error)
581			vrele(vp);
582		return (error);
583	}
584
585	return (EROFS);
586}
587
588int
589union_open(void *v)
590{
591	struct vop_open_args /* {
592		struct vnodeop_desc *a_desc;
593		struct vnode *a_vp;
594		int a_mode;
595		kauth_cred_t a_cred;
596	} */ *ap = v;
597	struct union_node *un = VTOUNION(ap->a_vp);
598	struct vnode *tvp;
599	int mode = ap->a_mode;
600	kauth_cred_t cred = ap->a_cred;
601	struct lwp *l = curlwp;
602	int error;
603
604	/*
605	 * If there is an existing upper vp then simply open that.
606	 */
607	tvp = un->un_uppervp;
608	if (tvp == NULLVP) {
609		/*
610		 * If the lower vnode is being opened for writing, then
611		 * copy the file contents to the upper vnode and open that,
612		 * otherwise can simply open the lower vnode.
613		 */
614		tvp = un->un_lowervp;
615		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
616			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
617			if (error == 0)
618				error = VOP_OPEN(un->un_uppervp, mode, cred);
619			return (error);
620		}
621
622		/*
623		 * Just open the lower vnode, but check for nodev mount flag
624		 */
625		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
626		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
627			return ENXIO;
628		un->un_openl++;
629		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
630		error = VOP_OPEN(tvp, mode, cred);
631		VOP_UNLOCK(tvp);
632
633		return (error);
634	}
635	/*
636	 * Just open the upper vnode, checking for nodev mount flag first
637	 */
638	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
639	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
640		return ENXIO;
641
642	error = VOP_OPEN(tvp, mode, cred);
643
644	return (error);
645}
646
647int
648union_close(void *v)
649{
650	struct vop_close_args /* {
651		struct vnode *a_vp;
652		int  a_fflag;
653		kauth_cred_t a_cred;
654	} */ *ap = v;
655	struct union_node *un = VTOUNION(ap->a_vp);
656	struct vnode *vp;
657	int error;
658	bool do_lock;
659
660	vp = un->un_uppervp;
661	if (vp != NULLVP) {
662		do_lock = false;
663	} else {
664		KASSERT(un->un_openl > 0);
665		--un->un_openl;
666		vp = un->un_lowervp;
667		do_lock = true;
668	}
669
670	KASSERT(vp != NULLVP);
671	ap->a_vp = vp;
672	if (do_lock)
673		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
674	error = VCALL(vp, VOFFSET(vop_close), ap);
675	if (do_lock)
676		VOP_UNLOCK(vp);
677
678	return error;
679}
680
681/*
682 * Check access permission on the union vnode.
683 * The access check being enforced is to check
684 * against both the underlying vnode, and any
685 * copied vnode.  This ensures that no additional
686 * file permissions are given away simply because
687 * the user caused an implicit file copy.
688 */
689int
690union_access(void *v)
691{
692	struct vop_access_args /* {
693		struct vnodeop_desc *a_desc;
694		struct vnode *a_vp;
695		int a_mode;
696		kauth_cred_t a_cred;
697	} */ *ap = v;
698	struct vnode *vp = ap->a_vp;
699	struct union_node *un = VTOUNION(vp);
700	int error = EACCES;
701	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
702
703	/*
704	 * Disallow write attempts on read-only file systems;
705	 * unless the file is a socket, fifo, or a block or
706	 * character device resident on the file system.
707	 */
708	if (ap->a_mode & VWRITE) {
709		switch (vp->v_type) {
710		case VDIR:
711		case VLNK:
712		case VREG:
713			if (vp->v_mount->mnt_flag & MNT_RDONLY)
714				return (EROFS);
715			break;
716		case VBAD:
717		case VBLK:
718		case VCHR:
719		case VSOCK:
720		case VFIFO:
721		case VNON:
722		default:
723			break;
724		}
725	}
726
727
728	if ((vp = un->un_uppervp) != NULLVP) {
729		ap->a_vp = vp;
730		return (VCALL(vp, VOFFSET(vop_access), ap));
731	}
732
733	if ((vp = un->un_lowervp) != NULLVP) {
734		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
735		ap->a_vp = vp;
736		error = VCALL(vp, VOFFSET(vop_access), ap);
737		if (error == 0) {
738			if (um->um_op == UNMNT_BELOW) {
739				ap->a_cred = um->um_cred;
740				error = VCALL(vp, VOFFSET(vop_access), ap);
741			}
742		}
743		VOP_UNLOCK(vp);
744		if (error)
745			return (error);
746	}
747
748	return (error);
749}
750
751/*
752 * We handle getattr only to change the fsid and
753 * track object sizes
754 */
755int
756union_getattr(void *v)
757{
758	struct vop_getattr_args /* {
759		struct vnode *a_vp;
760		struct vattr *a_vap;
761		kauth_cred_t a_cred;
762	} */ *ap = v;
763	int error;
764	struct union_node *un = VTOUNION(ap->a_vp);
765	struct vnode *vp = un->un_uppervp;
766	struct vattr *vap;
767	struct vattr va;
768
769
770	/*
771	 * Some programs walk the filesystem hierarchy by counting
772	 * links to directories to avoid stat'ing all the time.
773	 * This means the link count on directories needs to be "correct".
774	 * The only way to do that is to call getattr on both layers
775	 * and fix up the link count.  The link count will not necessarily
776	 * be accurate but will be large enough to defeat the tree walkers.
777	 *
778	 * To make life more interesting, some filesystems don't keep
779	 * track of link counts in the expected way, and return a
780	 * link count of `1' for those directories; if either of the
781	 * component directories returns a link count of `1', we return a 1.
782	 */
783
784	vap = ap->a_vap;
785
786	vp = un->un_uppervp;
787	if (vp != NULLVP) {
788		error = VOP_GETATTR(vp, vap, ap->a_cred);
789		if (error)
790			return (error);
791		mutex_enter(&un->un_lock);
792		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
793	}
794
795	if (vp == NULLVP) {
796		vp = un->un_lowervp;
797	} else if (vp->v_type == VDIR) {
798		vp = un->un_lowervp;
799		if (vp != NULLVP)
800			vap = &va;
801	} else {
802		vp = NULLVP;
803	}
804
805	if (vp != NULLVP) {
806		if (vp == un->un_lowervp)
807			vn_lock(vp, LK_SHARED | LK_RETRY);
808		error = VOP_GETATTR(vp, vap, ap->a_cred);
809		if (vp == un->un_lowervp)
810			VOP_UNLOCK(vp);
811		if (error)
812			return (error);
813		mutex_enter(&un->un_lock);
814		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
815	}
816
817	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
818		/*
819		 * Link count manipulation:
820		 *	- If both return "2", return 2 (no subdirs)
821		 *	- If one or the other return "1", return "1" (ENOCLUE)
822		 */
823		if ((ap->a_vap->va_nlink == 2) &&
824		    (vap->va_nlink == 2))
825			;
826		else if (ap->a_vap->va_nlink != 1) {
827			if (vap->va_nlink == 1)
828				ap->a_vap->va_nlink = 1;
829			else
830				ap->a_vap->va_nlink += vap->va_nlink;
831		}
832	}
833	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
834	return (0);
835}
836
837int
838union_setattr(void *v)
839{
840	struct vop_setattr_args /* {
841		struct vnode *a_vp;
842		struct vattr *a_vap;
843		kauth_cred_t a_cred;
844	} */ *ap = v;
845	struct vattr *vap = ap->a_vap;
846	struct vnode *vp = ap->a_vp;
847	struct union_node *un = VTOUNION(vp);
848	bool size_only;		/* All but va_size are VNOVAL. */
849	int error;
850
851	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
852	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
853	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
854
855	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
856		return (EROFS);
857	if (vap->va_size != VNOVAL) {
858 		switch (vp->v_type) {
859 		case VDIR:
860 			return (EISDIR);
861 		case VCHR:
862 		case VBLK:
863 		case VSOCK:
864 		case VFIFO:
865			break;
866		case VREG:
867		case VLNK:
868 		default:
869			/*
870			 * Disallow write attempts if the filesystem is
871			 * mounted read-only.
872			 */
873			if (vp->v_mount->mnt_flag & MNT_RDONLY)
874				return (EROFS);
875		}
876	}
877
878	/*
879	 * Handle case of truncating lower object to zero size,
880	 * by creating a zero length upper object.  This is to
881	 * handle the case of open with O_TRUNC and O_CREAT.
882	 */
883	if ((un->un_uppervp == NULLVP) &&
884	    /* assert(un->un_lowervp != NULLVP) */
885	    (un->un_lowervp->v_type == VREG)) {
886		error = union_copyup(un, (vap->va_size != 0),
887						ap->a_cred, curlwp);
888		if (error)
889			return (error);
890	}
891
892	/*
893	 * Try to set attributes in upper layer, ignore size change to zero
894	 * for devices to handle O_TRUNC and return read-only filesystem error
895	 * otherwise.
896	 */
897	if (un->un_uppervp != NULLVP) {
898		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
899		if ((error == 0) && (vap->va_size != VNOVAL)) {
900			mutex_enter(&un->un_lock);
901			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
902		}
903	} else {
904		KASSERT(un->un_lowervp != NULLVP);
905		if (NODE_IS_SPECIAL(un->un_lowervp)) {
906			if (size_only &&
907			    (vap->va_size == 0 || vap->va_size == VNOVAL))
908				error = 0;
909			else
910				error = EROFS;
911		} else {
912			error = EROFS;
913		}
914	}
915
916	return (error);
917}
918
919int
920union_read(void *v)
921{
922	struct vop_read_args /* {
923		struct vnode *a_vp;
924		struct uio *a_uio;
925		int  a_ioflag;
926		kauth_cred_t a_cred;
927	} */ *ap = v;
928	int error;
929	struct vnode *vp = OTHERVP(ap->a_vp);
930	int dolock = (vp == LOWERVP(ap->a_vp));
931
932	if (dolock)
933		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
934	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
935	if (dolock)
936		VOP_UNLOCK(vp);
937
938	/*
939	 * XXX
940	 * perhaps the size of the underlying object has changed under
941	 * our feet.  take advantage of the offset information present
942	 * in the uio structure.
943	 */
944	if (error == 0) {
945		struct union_node *un = VTOUNION(ap->a_vp);
946		off_t cur = ap->a_uio->uio_offset;
947		off_t usz = VNOVAL, lsz = VNOVAL;
948
949		mutex_enter(&un->un_lock);
950		if (vp == un->un_uppervp) {
951			if (cur > un->un_uppersz)
952				usz = cur;
953		} else {
954			if (cur > un->un_lowersz)
955				lsz = cur;
956		}
957
958		if (usz != VNOVAL || lsz != VNOVAL)
959			union_newsize(ap->a_vp, usz, lsz);
960		else
961			mutex_exit(&un->un_lock);
962	}
963
964	return (error);
965}
966
967int
968union_write(void *v)
969{
970	struct vop_read_args /* {
971		struct vnode *a_vp;
972		struct uio *a_uio;
973		int  a_ioflag;
974		kauth_cred_t a_cred;
975	} */ *ap = v;
976	int error;
977	struct vnode *vp;
978	struct union_node *un = VTOUNION(ap->a_vp);
979
980	vp = UPPERVP(ap->a_vp);
981	if (vp == NULLVP) {
982		vp = LOWERVP(ap->a_vp);
983		if (NODE_IS_SPECIAL(vp)) {
984			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
985			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
986			    ap->a_cred);
987			VOP_UNLOCK(vp);
988			return error;
989		}
990		panic("union: missing upper layer in write");
991	}
992
993	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
994
995	/*
996	 * the size of the underlying object may be changed by the
997	 * write.
998	 */
999	if (error == 0) {
1000		off_t cur = ap->a_uio->uio_offset;
1001
1002		mutex_enter(&un->un_lock);
1003		if (cur > un->un_uppersz)
1004			union_newsize(ap->a_vp, cur, VNOVAL);
1005		else
1006			mutex_exit(&un->un_lock);
1007	}
1008
1009	return (error);
1010}
1011
1012int
1013union_ioctl(void *v)
1014{
1015	struct vop_ioctl_args /* {
1016		struct vnode *a_vp;
1017		int  a_command;
1018		void *a_data;
1019		int  a_fflag;
1020		kauth_cred_t a_cred;
1021	} */ *ap = v;
1022	struct vnode *ovp = OTHERVP(ap->a_vp);
1023
1024	ap->a_vp = ovp;
1025	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1026}
1027
1028int
1029union_poll(void *v)
1030{
1031	struct vop_poll_args /* {
1032		struct vnode *a_vp;
1033		int a_events;
1034	} */ *ap = v;
1035	struct vnode *ovp = OTHERVP(ap->a_vp);
1036
1037	ap->a_vp = ovp;
1038	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1039}
1040
1041int
1042union_revoke(void *v)
1043{
1044	struct vop_revoke_args /* {
1045		struct vnode *a_vp;
1046		int a_flags;
1047		struct proc *a_p;
1048	} */ *ap = v;
1049	struct vnode *vp = ap->a_vp;
1050
1051	if (UPPERVP(vp))
1052		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1053	if (LOWERVP(vp))
1054		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1055	vgone(vp);	/* XXXAD?? */
1056	return (0);
1057}
1058
1059int
1060union_mmap(void *v)
1061{
1062	struct vop_mmap_args /* {
1063		struct vnode *a_vp;
1064		vm_prot_t a_prot;
1065		kauth_cred_t a_cred;
1066	} */ *ap = v;
1067	struct vnode *ovp = OTHERVP(ap->a_vp);
1068
1069	ap->a_vp = ovp;
1070	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1071}
1072
1073int
1074union_fsync(void *v)
1075{
1076	struct vop_fsync_args /* {
1077		struct vnode *a_vp;
1078		kauth_cred_t a_cred;
1079		int  a_flags;
1080		off_t offhi;
1081		off_t offlo;
1082	} */ *ap = v;
1083	int error = 0;
1084	struct vnode *targetvp;
1085
1086	/*
1087	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1088	 * bother syncing the underlying vnodes, since (a) they'll be
1089	 * fsync'ed when reclaimed and (b) we could deadlock if
1090	 * they're locked; otherwise, pass it through to the
1091	 * underlying layer.
1092	 */
1093	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1094		error = spec_fsync(v);
1095		if (error)
1096			return error;
1097	}
1098
1099	if (ap->a_flags & FSYNC_RECLAIM)
1100		return 0;
1101
1102	targetvp = OTHERVP(ap->a_vp);
1103	if (targetvp != NULLVP) {
1104		int dolock = (targetvp == LOWERVP(ap->a_vp));
1105
1106		if (dolock)
1107			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1108		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1109			    ap->a_offlo, ap->a_offhi);
1110		if (dolock)
1111			VOP_UNLOCK(targetvp);
1112	}
1113
1114	return (error);
1115}
1116
1117int
1118union_seek(void *v)
1119{
1120	struct vop_seek_args /* {
1121		struct vnode *a_vp;
1122		off_t  a_oldoff;
1123		off_t  a_newoff;
1124		kauth_cred_t a_cred;
1125	} */ *ap = v;
1126	struct vnode *ovp = OTHERVP(ap->a_vp);
1127
1128	ap->a_vp = ovp;
1129	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1130}
1131
1132int
1133union_remove(void *v)
1134{
1135	struct vop_remove_args /* {
1136		struct vnode *a_dvp;
1137		struct vnode *a_vp;
1138		struct componentname *a_cnp;
1139	} */ *ap = v;
1140	int error;
1141	struct union_node *dun = VTOUNION(ap->a_dvp);
1142	struct union_node *un = VTOUNION(ap->a_vp);
1143	struct componentname *cnp = ap->a_cnp;
1144
1145	if (dun->un_uppervp == NULLVP)
1146		panic("union remove: null upper vnode");
1147
1148	if (un->un_uppervp != NULLVP) {
1149		struct vnode *dvp = dun->un_uppervp;
1150		struct vnode *vp = un->un_uppervp;
1151
1152		/*
1153		 * Account for VOP_REMOVE to vrele dvp and vp.
1154		 * Note: VOP_REMOVE will unlock dvp and vp.
1155		 */
1156		vref(dvp);
1157		vref(vp);
1158		if (union_dowhiteout(un, cnp->cn_cred))
1159			cnp->cn_flags |= DOWHITEOUT;
1160		error = VOP_REMOVE(dvp, vp, cnp);
1161		if (!error)
1162			union_removed_upper(un);
1163		vrele(ap->a_dvp);
1164		vrele(ap->a_vp);
1165	} else {
1166		error = union_mkwhiteout(
1167			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1168			dun->un_uppervp, ap->a_cnp, un);
1169		vput(ap->a_dvp);
1170		vput(ap->a_vp);
1171	}
1172
1173	return (error);
1174}
1175
1176int
1177union_link(void *v)
1178{
1179	struct vop_link_args /* {
1180		struct vnode *a_dvp;
1181		struct vnode *a_vp;
1182		struct componentname *a_cnp;
1183	} */ *ap = v;
1184	int error = 0;
1185	struct componentname *cnp = ap->a_cnp;
1186	struct union_node *dun;
1187	struct vnode *vp;
1188	struct vnode *dvp;
1189
1190	dun = VTOUNION(ap->a_dvp);
1191
1192	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1193
1194	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1195		vp = ap->a_vp;
1196	} else {
1197		struct union_node *un = VTOUNION(ap->a_vp);
1198		if (un->un_uppervp == NULLVP) {
1199			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1200
1201			/*
1202			 * Needs to be copied before we can link it.
1203			 */
1204			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1205			if (droplock)
1206				VOP_UNLOCK(dun->un_uppervp);
1207			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1208			if (droplock) {
1209				vn_lock(dun->un_uppervp,
1210				    LK_EXCLUSIVE | LK_RETRY);
1211				/*
1212				 * During copyup, we dropped the lock on the
1213				 * dir and invalidated any saved namei lookup
1214				 * state for the directory we'll be entering
1215				 * the link in.  We need to re-run the lookup
1216				 * in that directory to reset any state needed
1217				 * for VOP_LINK.
1218				 * Call relookup on the union-layer to reset
1219				 * the state.
1220				 */
1221				vp  = NULLVP;
1222				if (dun->un_uppervp == NULLVP)
1223					 panic("union: null upperdvp?");
1224				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1225				if (error) {
1226					VOP_UNLOCK(ap->a_vp);
1227					return EROFS;	/* ? */
1228				}
1229				if (vp != NULLVP) {
1230					/*
1231					 * The name we want to create has
1232					 * mysteriously appeared (a race?)
1233					 */
1234					error = EEXIST;
1235					VOP_UNLOCK(ap->a_vp);
1236					vput(ap->a_dvp);
1237					vput(vp);
1238					return (error);
1239				}
1240			}
1241			VOP_UNLOCK(ap->a_vp);
1242		}
1243		vp = un->un_uppervp;
1244	}
1245
1246	dvp = dun->un_uppervp;
1247	if (dvp == NULLVP)
1248		error = EROFS;
1249
1250	if (error) {
1251		vput(ap->a_dvp);
1252		return (error);
1253	}
1254
1255	/*
1256	 * Account for VOP_LINK to vrele dvp.
1257	 * Note: VOP_LINK will unlock dvp.
1258	 */
1259	vref(dvp);
1260	error = VOP_LINK(dvp, vp, cnp);
1261	vrele(ap->a_dvp);
1262
1263	return error;
1264}
1265
1266int
1267union_rename(void *v)
1268{
1269	struct vop_rename_args  /* {
1270		struct vnode *a_fdvp;
1271		struct vnode *a_fvp;
1272		struct componentname *a_fcnp;
1273		struct vnode *a_tdvp;
1274		struct vnode *a_tvp;
1275		struct componentname *a_tcnp;
1276	} */ *ap = v;
1277	int error;
1278
1279	struct vnode *fdvp = ap->a_fdvp;
1280	struct vnode *fvp = ap->a_fvp;
1281	struct vnode *tdvp = ap->a_tdvp;
1282	struct vnode *tvp = ap->a_tvp;
1283
1284	/*
1285	 * Account for VOP_RENAME to vrele all nodes.
1286	 * Note: VOP_RENAME will unlock tdvp.
1287	 */
1288
1289	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1290		struct union_node *un = VTOUNION(fdvp);
1291		if (un->un_uppervp == NULLVP) {
1292			/*
1293			 * this should never happen in normal
1294			 * operation but might if there was
1295			 * a problem creating the top-level shadow
1296			 * directory.
1297			 */
1298			error = EXDEV;
1299			goto bad;
1300		}
1301
1302		fdvp = un->un_uppervp;
1303		vref(fdvp);
1304	}
1305
1306	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1307		struct union_node *un = VTOUNION(fvp);
1308		if (un->un_uppervp == NULLVP) {
1309			/* XXX: should do a copyup */
1310			error = EXDEV;
1311			goto bad;
1312		}
1313
1314		if (un->un_lowervp != NULLVP)
1315			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1316
1317		fvp = un->un_uppervp;
1318		vref(fvp);
1319	}
1320
1321	if (tdvp->v_op == union_vnodeop_p) {
1322		struct union_node *un = VTOUNION(tdvp);
1323		if (un->un_uppervp == NULLVP) {
1324			/*
1325			 * this should never happen in normal
1326			 * operation but might if there was
1327			 * a problem creating the top-level shadow
1328			 * directory.
1329			 */
1330			error = EXDEV;
1331			goto bad;
1332		}
1333
1334		tdvp = un->un_uppervp;
1335		vref(tdvp);
1336	}
1337
1338	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1339		struct union_node *un = VTOUNION(tvp);
1340
1341		tvp = un->un_uppervp;
1342		if (tvp != NULLVP) {
1343			vref(tvp);
1344		}
1345	}
1346
1347	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1348	goto out;
1349
1350bad:
1351	vput(tdvp);
1352	if (tvp != NULLVP)
1353		vput(tvp);
1354	vrele(fdvp);
1355	vrele(fvp);
1356
1357out:
1358	if (fdvp != ap->a_fdvp) {
1359		vrele(ap->a_fdvp);
1360	}
1361	if (fvp != ap->a_fvp) {
1362		vrele(ap->a_fvp);
1363	}
1364	if (tdvp != ap->a_tdvp) {
1365		vrele(ap->a_tdvp);
1366	}
1367	if (tvp != ap->a_tvp) {
1368		vrele(ap->a_tvp);
1369	}
1370	return (error);
1371}
1372
1373int
1374union_mkdir(void *v)
1375{
1376	struct vop_mkdir_v3_args /* {
1377		struct vnode *a_dvp;
1378		struct vnode **a_vpp;
1379		struct componentname *a_cnp;
1380		struct vattr *a_vap;
1381	} */ *ap = v;
1382	struct union_node *un = VTOUNION(ap->a_dvp);
1383	struct vnode *dvp = un->un_uppervp;
1384	struct componentname *cnp = ap->a_cnp;
1385
1386	if (dvp != NULLVP) {
1387		int error;
1388		struct vnode *vp;
1389
1390		vp = NULL;
1391		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1392		if (error) {
1393			vrele(ap->a_dvp);
1394			return (error);
1395		}
1396
1397		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1398				NULLVP, cnp, vp, NULLVP, 1);
1399		if (error)
1400			vrele(vp);
1401		return (error);
1402	}
1403
1404	return (EROFS);
1405}
1406
1407int
1408union_rmdir(void *v)
1409{
1410	struct vop_rmdir_args /* {
1411		struct vnode *a_dvp;
1412		struct vnode *a_vp;
1413		struct componentname *a_cnp;
1414	} */ *ap = v;
1415	int error;
1416	struct union_node *dun = VTOUNION(ap->a_dvp);
1417	struct union_node *un = VTOUNION(ap->a_vp);
1418	struct componentname *cnp = ap->a_cnp;
1419
1420	if (dun->un_uppervp == NULLVP)
1421		panic("union rmdir: null upper vnode");
1422
1423	error = union_check_rmdir(un, cnp->cn_cred);
1424	if (error) {
1425		vput(ap->a_dvp);
1426		vput(ap->a_vp);
1427		return error;
1428	}
1429
1430	if (un->un_uppervp != NULLVP) {
1431		struct vnode *dvp = dun->un_uppervp;
1432		struct vnode *vp = un->un_uppervp;
1433
1434		/*
1435		 * Account for VOP_RMDIR to vrele dvp and vp.
1436		 * Note: VOP_RMDIR will unlock dvp and vp.
1437		 */
1438		vref(dvp);
1439		vref(vp);
1440		if (union_dowhiteout(un, cnp->cn_cred))
1441			cnp->cn_flags |= DOWHITEOUT;
1442		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1443		if (!error)
1444			union_removed_upper(un);
1445		vrele(ap->a_dvp);
1446		vrele(ap->a_vp);
1447	} else {
1448		error = union_mkwhiteout(
1449			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1450			dun->un_uppervp, ap->a_cnp, un);
1451		vput(ap->a_dvp);
1452		vput(ap->a_vp);
1453	}
1454
1455	return (error);
1456}
1457
1458int
1459union_symlink(void *v)
1460{
1461	struct vop_symlink_v3_args /* {
1462		struct vnode *a_dvp;
1463		struct vnode **a_vpp;
1464		struct componentname *a_cnp;
1465		struct vattr *a_vap;
1466		char *a_target;
1467	} */ *ap = v;
1468	struct union_node *un = VTOUNION(ap->a_dvp);
1469	struct vnode *dvp = un->un_uppervp;
1470	struct componentname *cnp = ap->a_cnp;
1471
1472	if (dvp != NULLVP) {
1473		int error;
1474
1475		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1476				    ap->a_target);
1477		return (error);
1478	}
1479
1480	return (EROFS);
1481}
1482
1483/*
1484 * union_readdir works in concert with getdirentries and
1485 * readdir(3) to provide a list of entries in the unioned
1486 * directories.  getdirentries is responsible for walking
1487 * down the union stack.  readdir(3) is responsible for
1488 * eliminating duplicate names from the returned data stream.
1489 */
1490int
1491union_readdir(void *v)
1492{
1493	struct vop_readdir_args /* {
1494		struct vnodeop_desc *a_desc;
1495		struct vnode *a_vp;
1496		struct uio *a_uio;
1497		kauth_cred_t a_cred;
1498		int *a_eofflag;
1499		u_long *a_cookies;
1500		int a_ncookies;
1501	} */ *ap = v;
1502	struct union_node *un = VTOUNION(ap->a_vp);
1503	struct vnode *uvp = un->un_uppervp;
1504
1505	if (uvp == NULLVP)
1506		return (0);
1507
1508	ap->a_vp = uvp;
1509	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
1510}
1511
1512int
1513union_readlink(void *v)
1514{
1515	struct vop_readlink_args /* {
1516		struct vnode *a_vp;
1517		struct uio *a_uio;
1518		kauth_cred_t a_cred;
1519	} */ *ap = v;
1520	int error;
1521	struct vnode *vp = OTHERVP(ap->a_vp);
1522	int dolock = (vp == LOWERVP(ap->a_vp));
1523
1524	if (dolock)
1525		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1526	ap->a_vp = vp;
1527	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1528	if (dolock)
1529		VOP_UNLOCK(vp);
1530
1531	return (error);
1532}
1533
1534int
1535union_abortop(void *v)
1536{
1537	struct vop_abortop_args /* {
1538		struct vnode *a_dvp;
1539		struct componentname *a_cnp;
1540	} */ *ap = v;
1541
1542	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1543
1544	ap->a_dvp = UPPERVP(ap->a_dvp);
1545	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1546}
1547
1548int
1549union_inactive(void *v)
1550{
1551	struct vop_inactive_args /* {
1552		const struct vnodeop_desc *a_desc;
1553		struct vnode *a_vp;
1554		bool *a_recycle;
1555	} */ *ap = v;
1556	struct vnode *vp = ap->a_vp;
1557	struct union_node *un = VTOUNION(vp);
1558	struct vnode **vpp;
1559
1560	/*
1561	 * Do nothing (and _don't_ bypass).
1562	 * Wait to vrele lowervp until reclaim,
1563	 * so that until then our union_node is in the
1564	 * cache and reusable.
1565	 *
1566	 * NEEDSWORK: Someday, consider inactive'ing
1567	 * the lowervp and then trying to reactivate it
1568	 * with capabilities (v_id)
1569	 * like they do in the name lookup cache code.
1570	 * That's too much work for now.
1571	 */
1572
1573	if (un->un_dircache != 0) {
1574		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1575			vrele(*vpp);
1576		free(un->un_dircache, M_TEMP);
1577		un->un_dircache = 0;
1578	}
1579
1580	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1581	VOP_UNLOCK(vp);
1582
1583	return (0);
1584}
1585
1586int
1587union_reclaim(void *v)
1588{
1589	struct vop_reclaim_args /* {
1590		struct vnode *a_vp;
1591	} */ *ap = v;
1592
1593	union_freevp(ap->a_vp);
1594
1595	return (0);
1596}
1597
1598static int
1599union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1600{
1601	struct vop_lock_args ap;
1602
1603	if (lockvp == vp) {
1604		ap.a_vp = vp;
1605		ap.a_flags = flags;
1606		return genfs_lock(&ap);
1607	} else
1608		return VOP_LOCK(lockvp, flags);
1609}
1610
1611static int
1612union_unlock1(struct vnode *vp, struct vnode *lockvp)
1613{
1614	struct vop_unlock_args ap;
1615
1616	if (lockvp == vp) {
1617		ap.a_vp = vp;
1618		return genfs_unlock(&ap);
1619	} else
1620		return VOP_UNLOCK(lockvp);
1621}
1622
1623int
1624union_lock(void *v)
1625{
1626	struct vop_lock_args /* {
1627		struct vnode *a_vp;
1628		int a_flags;
1629	} */ *ap = v;
1630	struct vnode *vp = ap->a_vp, *lockvp;
1631	struct union_node *un = VTOUNION(vp);
1632	int flags = ap->a_flags;
1633	int error;
1634
1635	if ((flags & LK_NOWAIT) != 0) {
1636		if (!mutex_tryenter(&un->un_lock))
1637			return EBUSY;
1638		lockvp = LOCKVP(vp);
1639		error = union_lock1(vp, lockvp, flags);
1640		mutex_exit(&un->un_lock);
1641		if (error)
1642			return error;
1643		if (mutex_tryenter(vp->v_interlock)) {
1644			error = vdead_check(vp, VDEAD_NOWAIT);
1645			mutex_exit(vp->v_interlock);
1646		} else
1647			error = EBUSY;
1648		if (error)
1649			union_unlock1(vp, lockvp);
1650		return error;
1651	}
1652
1653	mutex_enter(&un->un_lock);
1654	for (;;) {
1655		lockvp = LOCKVP(vp);
1656		mutex_exit(&un->un_lock);
1657		error = union_lock1(vp, lockvp, flags);
1658		if (error != 0)
1659			return error;
1660		mutex_enter(&un->un_lock);
1661		if (lockvp == LOCKVP(vp))
1662			break;
1663		union_unlock1(vp, lockvp);
1664	}
1665	mutex_exit(&un->un_lock);
1666
1667	mutex_enter(vp->v_interlock);
1668	error = vdead_check(vp, VDEAD_NOWAIT);
1669	if (error) {
1670		union_unlock1(vp, lockvp);
1671		error = vdead_check(vp, 0);
1672		KASSERT(error == ENOENT);
1673	}
1674	mutex_exit(vp->v_interlock);
1675	return error;
1676}
1677
1678int
1679union_unlock(void *v)
1680{
1681	struct vop_unlock_args /* {
1682		struct vnode *a_vp;
1683		int a_flags;
1684	} */ *ap = v;
1685	struct vnode *vp = ap->a_vp, *lockvp;
1686
1687	lockvp = LOCKVP(vp);
1688	union_unlock1(vp, lockvp);
1689
1690	return 0;
1691}
1692
1693int
1694union_bmap(void *v)
1695{
1696	struct vop_bmap_args /* {
1697		struct vnode *a_vp;
1698		daddr_t  a_bn;
1699		struct vnode **a_vpp;
1700		daddr_t *a_bnp;
1701		int *a_runp;
1702	} */ *ap = v;
1703	int error;
1704	struct vnode *vp = OTHERVP(ap->a_vp);
1705	int dolock = (vp == LOWERVP(ap->a_vp));
1706
1707	if (dolock)
1708		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1709	ap->a_vp = vp;
1710	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1711	if (dolock)
1712		VOP_UNLOCK(vp);
1713
1714	return (error);
1715}
1716
1717int
1718union_print(void *v)
1719{
1720	struct vop_print_args /* {
1721		struct vnode *a_vp;
1722	} */ *ap = v;
1723	struct vnode *vp = ap->a_vp;
1724
1725	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1726			vp, UPPERVP(vp), LOWERVP(vp));
1727	if (UPPERVP(vp) != NULLVP)
1728		vprint("union: upper", UPPERVP(vp));
1729	if (LOWERVP(vp) != NULLVP)
1730		vprint("union: lower", LOWERVP(vp));
1731	if (VTOUNION(vp)->un_dircache) {
1732		struct vnode **vpp;
1733		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1734			vprint("dircache:", *vpp);
1735	}
1736
1737	return (0);
1738}
1739
1740int
1741union_islocked(void *v)
1742{
1743	struct vop_islocked_args /* {
1744		struct vnode *a_vp;
1745	} */ *ap = v;
1746	struct vnode *vp;
1747	struct union_node *un;
1748
1749	un = VTOUNION(ap->a_vp);
1750	mutex_enter(&un->un_lock);
1751	vp = LOCKVP(ap->a_vp);
1752	mutex_exit(&un->un_lock);
1753
1754	if (vp == ap->a_vp)
1755		return genfs_islocked(ap);
1756	else
1757		return VOP_ISLOCKED(vp);
1758}
1759
1760int
1761union_pathconf(void *v)
1762{
1763	struct vop_pathconf_args /* {
1764		struct vnode *a_vp;
1765		int a_name;
1766		int *a_retval;
1767	} */ *ap = v;
1768	int error;
1769	struct vnode *vp = OTHERVP(ap->a_vp);
1770	int dolock = (vp == LOWERVP(ap->a_vp));
1771
1772	if (dolock)
1773		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1774	ap->a_vp = vp;
1775	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1776	if (dolock)
1777		VOP_UNLOCK(vp);
1778
1779	return (error);
1780}
1781
1782int
1783union_advlock(void *v)
1784{
1785	struct vop_advlock_args /* {
1786		struct vnode *a_vp;
1787		void *a_id;
1788		int  a_op;
1789		struct flock *a_fl;
1790		int  a_flags;
1791	} */ *ap = v;
1792	struct vnode *ovp = OTHERVP(ap->a_vp);
1793
1794	ap->a_vp = ovp;
1795	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1796}
1797
1798int
1799union_strategy(void *v)
1800{
1801	struct vop_strategy_args /* {
1802		struct vnode *a_vp;
1803		struct buf *a_bp;
1804	} */ *ap = v;
1805	struct vnode *ovp = OTHERVP(ap->a_vp);
1806	struct buf *bp = ap->a_bp;
1807
1808	KASSERT(ovp != NULLVP);
1809	if (!NODE_IS_SPECIAL(ovp))
1810		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1811
1812	return (VOP_STRATEGY(ovp, bp));
1813}
1814
1815int
1816union_bwrite(void *v)
1817{
1818	struct vop_bwrite_args /* {
1819		struct vnode *a_vp;
1820		struct buf *a_bp;
1821	} */ *ap = v;
1822	struct vnode *ovp = OTHERVP(ap->a_vp);
1823	struct buf *bp = ap->a_bp;
1824
1825	KASSERT(ovp != NULLVP);
1826	if (!NODE_IS_SPECIAL(ovp))
1827		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1828
1829	return (VOP_BWRITE(ovp, bp));
1830}
1831
1832int
1833union_getpages(void *v)
1834{
1835	struct vop_getpages_args /* {
1836		struct vnode *a_vp;
1837		voff_t a_offset;
1838		struct vm_page **a_m;
1839		int *a_count;
1840		int a_centeridx;
1841		vm_prot_t a_access_type;
1842		int a_advice;
1843		int a_flags;
1844	} */ *ap = v;
1845	struct vnode *vp = ap->a_vp;
1846
1847	KASSERT(mutex_owned(vp->v_interlock));
1848
1849	if (ap->a_flags & PGO_LOCKED) {
1850		return EBUSY;
1851	}
1852	ap->a_vp = OTHERVP(vp);
1853	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1854
1855	/* Just pass the request on to the underlying layer. */
1856	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1857}
1858
1859int
1860union_putpages(void *v)
1861{
1862	struct vop_putpages_args /* {
1863		struct vnode *a_vp;
1864		voff_t a_offlo;
1865		voff_t a_offhi;
1866		int a_flags;
1867	} */ *ap = v;
1868	struct vnode *vp = ap->a_vp;
1869
1870	KASSERT(mutex_owned(vp->v_interlock));
1871
1872	ap->a_vp = OTHERVP(vp);
1873	KASSERT(vp->v_interlock == ap->a_vp->v_interlock);
1874
1875	if (ap->a_flags & PGO_RECLAIM) {
1876		mutex_exit(vp->v_interlock);
1877		return 0;
1878	}
1879
1880	/* Just pass the request on to the underlying layer. */
1881	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1882}
1883
1884int
1885union_kqfilter(void *v)
1886{
1887	struct vop_kqfilter_args /* {
1888		struct vnode	*a_vp;
1889		struct knote	*a_kn;
1890	} */ *ap = v;
1891	int error;
1892
1893	/*
1894	 * We watch either the upper layer file (if it already exists),
1895	 * or the lower layer one. If there is lower layer file only
1896	 * at this moment, we will keep watching that lower layer file
1897	 * even if upper layer file would be created later on.
1898	 */
1899	if (UPPERVP(ap->a_vp))
1900		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1901	else if (LOWERVP(ap->a_vp))
1902		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1903	else {
1904		/* panic? */
1905		error = EOPNOTSUPP;
1906	}
1907
1908	return (error);
1909}
1910