union_vnops.c revision 1.80
1/*	$NetBSD: union_vnops.c,v 1.80 2021/12/05 16:16:58 hannken Exp $	*/
2
3/*
4 * Copyright (c) 1992, 1993, 1994, 1995
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Jan-Simon Pendry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
35 */
36
37/*
38 * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Jan-Simon Pendry.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 *    notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 *    notice, this list of conditions and the following disclaimer in the
50 *    documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 *    must display the following acknowledgement:
53 *	This product includes software developed by the University of
54 *	California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 *    may be used to endorse or promote products derived from this software
57 *    without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 *	@(#)union_vnops.c	8.33 (Berkeley) 7/31/95
72 */
73
74#include <sys/cdefs.h>
75__KERNEL_RCSID(0, "$NetBSD: union_vnops.c,v 1.80 2021/12/05 16:16:58 hannken Exp $");
76
77#include <sys/param.h>
78#include <sys/systm.h>
79#include <sys/proc.h>
80#include <sys/file.h>
81#include <sys/time.h>
82#include <sys/stat.h>
83#include <sys/vnode.h>
84#include <sys/mount.h>
85#include <sys/namei.h>
86#include <sys/malloc.h>
87#include <sys/buf.h>
88#include <sys/queue.h>
89#include <sys/lock.h>
90#include <sys/kauth.h>
91
92#include <fs/union/union.h>
93#include <miscfs/genfs/genfs.h>
94#include <miscfs/specfs/specdev.h>
95
96int union_parsepath(void *);
97int union_lookup(void *);
98int union_create(void *);
99int union_whiteout(void *);
100int union_mknod(void *);
101int union_open(void *);
102int union_close(void *);
103int union_access(void *);
104int union_getattr(void *);
105int union_setattr(void *);
106int union_read(void *);
107int union_write(void *);
108int union_ioctl(void *);
109int union_poll(void *);
110int union_revoke(void *);
111int union_mmap(void *);
112int union_fsync(void *);
113int union_seek(void *);
114int union_remove(void *);
115int union_link(void *);
116int union_rename(void *);
117int union_mkdir(void *);
118int union_rmdir(void *);
119int union_symlink(void *);
120int union_readdir(void *);
121int union_readlink(void *);
122int union_abortop(void *);
123int union_inactive(void *);
124int union_reclaim(void *);
125int union_lock(void *);
126int union_unlock(void *);
127int union_bmap(void *);
128int union_print(void *);
129int union_islocked(void *);
130int union_pathconf(void *);
131int union_advlock(void *);
132int union_strategy(void *);
133int union_bwrite(void *);
134int union_getpages(void *);
135int union_putpages(void *);
136int union_kqfilter(void *);
137
138static int union_lookup1(struct vnode *, struct vnode **,
139			      struct vnode **, struct componentname *);
140
141
142/*
143 * Global vfs data structures
144 */
145int (**union_vnodeop_p)(void *);
146const struct vnodeopv_entry_desc union_vnodeop_entries[] = {
147	{ &vop_default_desc, vn_default_error },
148	{ &vop_parsepath_desc, union_parsepath },	/* parsepath */
149	{ &vop_lookup_desc, union_lookup },		/* lookup */
150	{ &vop_create_desc, union_create },		/* create */
151	{ &vop_whiteout_desc, union_whiteout },		/* whiteout */
152	{ &vop_mknod_desc, union_mknod },		/* mknod */
153	{ &vop_open_desc, union_open },			/* open */
154	{ &vop_close_desc, union_close },		/* close */
155	{ &vop_access_desc, union_access },		/* access */
156	{ &vop_accessx_desc, genfs_accessx },		/* accessx */
157	{ &vop_getattr_desc, union_getattr },		/* getattr */
158	{ &vop_setattr_desc, union_setattr },		/* setattr */
159	{ &vop_read_desc, union_read },			/* read */
160	{ &vop_write_desc, union_write },		/* write */
161	{ &vop_fallocate_desc, genfs_eopnotsupp },	/* fallocate */
162	{ &vop_fdiscard_desc, genfs_eopnotsupp },	/* fdiscard */
163	{ &vop_ioctl_desc, union_ioctl },		/* ioctl */
164	{ &vop_poll_desc, union_poll },			/* select */
165	{ &vop_revoke_desc, union_revoke },		/* revoke */
166	{ &vop_mmap_desc, union_mmap },			/* mmap */
167	{ &vop_fsync_desc, union_fsync },		/* fsync */
168	{ &vop_seek_desc, union_seek },			/* seek */
169	{ &vop_remove_desc, union_remove },		/* remove */
170	{ &vop_link_desc, union_link },			/* link */
171	{ &vop_rename_desc, union_rename },		/* rename */
172	{ &vop_mkdir_desc, union_mkdir },		/* mkdir */
173	{ &vop_rmdir_desc, union_rmdir },		/* rmdir */
174	{ &vop_symlink_desc, union_symlink },		/* symlink */
175	{ &vop_readdir_desc, union_readdir },		/* readdir */
176	{ &vop_readlink_desc, union_readlink },		/* readlink */
177	{ &vop_abortop_desc, union_abortop },		/* abortop */
178	{ &vop_inactive_desc, union_inactive },		/* inactive */
179	{ &vop_reclaim_desc, union_reclaim },		/* reclaim */
180	{ &vop_lock_desc, union_lock },			/* lock */
181	{ &vop_unlock_desc, union_unlock },		/* unlock */
182	{ &vop_bmap_desc, union_bmap },			/* bmap */
183	{ &vop_strategy_desc, union_strategy },		/* strategy */
184	{ &vop_bwrite_desc, union_bwrite },		/* bwrite */
185	{ &vop_print_desc, union_print },		/* print */
186	{ &vop_islocked_desc, union_islocked },		/* islocked */
187	{ &vop_pathconf_desc, union_pathconf },		/* pathconf */
188	{ &vop_advlock_desc, union_advlock },		/* advlock */
189	{ &vop_getpages_desc, union_getpages },		/* getpages */
190	{ &vop_putpages_desc, union_putpages },		/* putpages */
191	{ &vop_kqfilter_desc, union_kqfilter },		/* kqfilter */
192	{ NULL, NULL }
193};
194const struct vnodeopv_desc union_vnodeop_opv_desc =
195	{ &union_vnodeop_p, union_vnodeop_entries };
196
197#define NODE_IS_SPECIAL(vp) \
198	((vp)->v_type == VBLK || (vp)->v_type == VCHR || \
199	(vp)->v_type == VSOCK || (vp)->v_type == VFIFO)
200
201int
202union_parsepath(void *v)
203{
204	struct vop_parsepath_args /* {
205		struct vnode *a_dvp;
206		const char *a_name;
207		size_t *a_retval;
208	} */ *ap = v;
209	struct vnode *upperdvp, *lowerdvp;
210	size_t upper, lower;
211	int error;
212
213	upperdvp = UPPERVP(ap->a_dvp);
214	lowerdvp = LOWERVP(ap->a_dvp);
215
216	if (upperdvp != NULLVP) {
217		error = VOP_PARSEPATH(upperdvp, ap->a_name, &upper);
218		if (error) {
219			return error;
220		}
221	} else {
222		upper = 0;
223	}
224
225	if (lowerdvp != NULLVP) {
226		error = VOP_PARSEPATH(lowerdvp, ap->a_name, &lower);
227		if (error) {
228			return error;
229		}
230	} else {
231		lower = 0;
232	}
233
234	if (upper == 0 && lower == 0) {
235		panic("%s: missing both layers", __func__);
236	}
237
238	/*
239	 * If they're different, use the larger one. This is not a
240	 * comprehensive solution, but it's sufficient for the
241	 * non-default cases of parsepath that currently exist.
242	 */
243	*ap->a_retval = MAX(upper, lower);
244	return 0;
245}
246
247static int
248union_lookup1(struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp,
249	struct componentname *cnp)
250{
251	int error;
252	struct vnode *tdvp;
253	struct vnode *dvp;
254	struct mount *mp;
255
256	dvp = *dvpp;
257
258	/*
259	 * If stepping up the directory tree, check for going
260	 * back across the mount point, in which case do what
261	 * lookup would do by stepping back down the mount
262	 * hierarchy.
263	 */
264	if (cnp->cn_flags & ISDOTDOT) {
265		while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) {
266			/*
267			 * Don't do the NOCROSSMOUNT check
268			 * at this level.  By definition,
269			 * union fs deals with namespaces, not
270			 * filesystems.
271			 */
272			tdvp = dvp;
273			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
274			VOP_UNLOCK(tdvp);
275			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
276		}
277	}
278
279        error = VOP_LOOKUP(dvp, &tdvp, cnp);
280	if (error)
281		return (error);
282	if (dvp != tdvp) {
283		if (cnp->cn_flags & ISDOTDOT)
284			VOP_UNLOCK(dvp);
285		error = vn_lock(tdvp, LK_EXCLUSIVE);
286		if (cnp->cn_flags & ISDOTDOT)
287			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
288		if (error) {
289			vrele(tdvp);
290			return error;
291		}
292		dvp = tdvp;
293	}
294
295	/*
296	 * Lastly check if the current node is a mount point in
297	 * which case walk up the mount hierarchy making sure not to
298	 * bump into the root of the mount tree (ie. dvp != udvp).
299	 */
300	while (dvp != udvp && (dvp->v_type == VDIR) &&
301	       (mp = dvp->v_mountedhere)) {
302		if (vfs_busy(mp))
303			continue;
304		vput(dvp);
305		error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdvp);
306		vfs_unbusy(mp);
307		if (error) {
308			return (error);
309		}
310		dvp = tdvp;
311	}
312
313	*vpp = dvp;
314	return (0);
315}
316
317int
318union_lookup(void *v)
319{
320	struct vop_lookup_v2_args /* {
321		struct vnodeop_desc *a_desc;
322		struct vnode *a_dvp;
323		struct vnode **a_vpp;
324		struct componentname *a_cnp;
325	} */ *ap = v;
326	int error;
327	int uerror, lerror;
328	struct vnode *uppervp, *lowervp;
329	struct vnode *upperdvp, *lowerdvp;
330	struct vnode *dvp = ap->a_dvp;
331	struct union_node *dun = VTOUNION(dvp);
332	struct componentname *cnp = ap->a_cnp;
333	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
334	kauth_cred_t saved_cred = NULL;
335	int iswhiteout;
336	struct vattr va;
337
338#ifdef notyet
339	if (cnp->cn_namelen == 3 &&
340			cnp->cn_nameptr[2] == '.' &&
341			cnp->cn_nameptr[1] == '.' &&
342			cnp->cn_nameptr[0] == '.') {
343		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
344		if (dvp == NULLVP)
345			return (ENOENT);
346		vref(dvp);
347		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
348		return (0);
349	}
350#endif
351
352	if ((cnp->cn_flags & ISLASTCN) &&
353	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
354	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
355		return (EROFS);
356
357start:
358	upperdvp = dun->un_uppervp;
359	lowerdvp = dun->un_lowervp;
360	uppervp = NULLVP;
361	lowervp = NULLVP;
362	iswhiteout = 0;
363
364	/*
365	 * do the lookup in the upper level.
366	 * if that level comsumes additional pathnames,
367	 * then assume that something special is going
368	 * on and just return that vnode.
369	 */
370	if (upperdvp != NULLVP) {
371		uerror = union_lookup1(um->um_uppervp, &upperdvp,
372					&uppervp, cnp);
373		if (uerror == ENOENT || uerror == EJUSTRETURN) {
374			if (cnp->cn_flags & ISWHITEOUT) {
375				iswhiteout = 1;
376			} else if (lowerdvp != NULLVP) {
377				lerror = VOP_GETATTR(upperdvp, &va,
378					cnp->cn_cred);
379				if (lerror == 0 && (va.va_flags & OPAQUE))
380					iswhiteout = 1;
381			}
382		}
383	} else {
384		uerror = ENOENT;
385	}
386
387	/*
388	 * in a similar way to the upper layer, do the lookup
389	 * in the lower layer.   this time, if there is some
390	 * component magic going on, then vput whatever we got
391	 * back from the upper layer and return the lower vnode
392	 * instead.
393	 */
394	if (lowerdvp != NULLVP && !iswhiteout) {
395		int nameiop;
396
397		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY);
398
399		/*
400		 * Only do a LOOKUP on the bottom node, since
401		 * we won't be making changes to it anyway.
402		 */
403		nameiop = cnp->cn_nameiop;
404		cnp->cn_nameiop = LOOKUP;
405		if (um->um_op == UNMNT_BELOW) {
406			saved_cred = cnp->cn_cred;
407			cnp->cn_cred = um->um_cred;
408		}
409
410		/*
411		 * we shouldn't have to worry about locking interactions
412		 * between the lower layer and our union layer (w.r.t.
413		 * `..' processing) because we don't futz with lowervp
414		 * locks in the union-node instantiation code path.
415		 */
416		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
417				&lowervp, cnp);
418		if (um->um_op == UNMNT_BELOW)
419			cnp->cn_cred = saved_cred;
420		cnp->cn_nameiop = nameiop;
421
422		if (lowervp != lowerdvp)
423			VOP_UNLOCK(lowerdvp);
424	} else {
425		lerror = ENOENT;
426		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
427			lowervp = LOWERVP(dun->un_pvp);
428			if (lowervp != NULLVP) {
429				vref(lowervp);
430				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY);
431				lerror = 0;
432			}
433		}
434	}
435
436	/*
437	 * EJUSTRETURN is used by underlying filesystems to indicate that
438	 * a directory modification op was started successfully.
439	 * This will only happen in the upper layer, since
440	 * the lower layer only does LOOKUPs.
441	 * If this union is mounted read-only, bounce it now.
442	 */
443
444	if ((uerror == EJUSTRETURN) && (cnp->cn_flags & ISLASTCN) &&
445	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
446	    ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)))
447		uerror = EROFS;
448
449	/*
450	 * at this point, we have uerror and lerror indicating
451	 * possible errors with the lookups in the upper and lower
452	 * layers.  additionally, uppervp and lowervp are (locked)
453	 * references to existing vnodes in the upper and lower layers.
454	 *
455	 * there are now three cases to consider.
456	 * 1. if both layers returned an error, then return whatever
457	 *    error the upper layer generated.
458	 *
459	 * 2. if the top layer failed and the bottom layer succeeded
460	 *    then two subcases occur.
461	 *    a.  the bottom vnode is not a directory, in which
462	 *	  case just return a new union vnode referencing
463	 *	  an empty top layer and the existing bottom layer.
464	 *    b.  the bottom vnode is a directory, in which case
465	 *	  create a new directory in the top-level and
466	 *	  continue as in case 3.
467	 *
468	 * 3. if the top layer succeeded then return a new union
469	 *    vnode referencing whatever the new top layer and
470	 *    whatever the bottom layer returned.
471	 */
472
473	*ap->a_vpp = NULLVP;
474
475
476	/* case 1. */
477	if ((uerror != 0) && (lerror != 0)) {
478		return (uerror);
479	}
480
481	/* case 2. */
482	if (uerror != 0 /* && (lerror == 0) */ ) {
483		if (lowervp->v_type == VDIR) { /* case 2b. */
484			/*
485			 * We may be racing another process to make the
486			 * upper-level shadow directory.  Be careful with
487			 * locks/etc!
488			 * If we have to create a shadow directory and want
489			 * to commit the node we have to restart the lookup
490			 * to get the componentname right.
491			 */
492			if (upperdvp) {
493				VOP_UNLOCK(upperdvp);
494				uerror = union_mkshadow(um, upperdvp, cnp,
495				    &uppervp);
496				vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY);
497				if (uerror == 0 && cnp->cn_nameiop != LOOKUP) {
498					vrele(uppervp);
499					if (lowervp != NULLVP)
500						vput(lowervp);
501					goto start;
502				}
503			}
504			if (uerror) {
505				if (lowervp != NULLVP) {
506					vput(lowervp);
507					lowervp = NULLVP;
508				}
509				return (uerror);
510			}
511		}
512	} else { /* uerror == 0 */
513		if (uppervp != upperdvp)
514			VOP_UNLOCK(uppervp);
515	}
516
517	if (lowervp != NULLVP)
518		VOP_UNLOCK(lowervp);
519
520	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
521			      uppervp, lowervp, 1);
522
523	if (error) {
524		if (uppervp != NULLVP)
525			vrele(uppervp);
526		if (lowervp != NULLVP)
527			vrele(lowervp);
528		return error;
529	}
530
531	return 0;
532}
533
534int
535union_create(void *v)
536{
537	struct vop_create_v3_args /* {
538		struct vnode *a_dvp;
539		struct vnode **a_vpp;
540		struct componentname *a_cnp;
541		struct vattr *a_vap;
542	} */ *ap = v;
543	struct union_node *un = VTOUNION(ap->a_dvp);
544	struct vnode *dvp = un->un_uppervp;
545	struct componentname *cnp = ap->a_cnp;
546
547	if (dvp != NULLVP) {
548		int error;
549		struct vnode *vp;
550		struct mount *mp;
551
552		mp = ap->a_dvp->v_mount;
553
554		vp = NULL;
555		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
556		if (error)
557			return (error);
558
559		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
560				NULLVP, 1);
561		if (error)
562			vrele(vp);
563		return (error);
564	}
565
566	return (EROFS);
567}
568
569int
570union_whiteout(void *v)
571{
572	struct vop_whiteout_args /* {
573		struct vnode *a_dvp;
574		struct componentname *a_cnp;
575		int a_flags;
576	} */ *ap = v;
577	struct union_node *un = VTOUNION(ap->a_dvp);
578	struct componentname *cnp = ap->a_cnp;
579
580	if (un->un_uppervp == NULLVP)
581		return (EOPNOTSUPP);
582
583	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
584}
585
586int
587union_mknod(void *v)
588{
589	struct vop_mknod_v3_args /* {
590		struct vnode *a_dvp;
591		struct vnode **a_vpp;
592		struct componentname *a_cnp;
593		struct vattr *a_vap;
594	} */ *ap = v;
595	struct union_node *un = VTOUNION(ap->a_dvp);
596	struct vnode *dvp = un->un_uppervp;
597	struct componentname *cnp = ap->a_cnp;
598
599	if (dvp != NULLVP) {
600		int error;
601		struct vnode *vp;
602		struct mount *mp;
603
604		mp = ap->a_dvp->v_mount;
605		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
606		if (error)
607			return (error);
608
609		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
610				      cnp, vp, NULLVP, 1);
611		if (error)
612			vrele(vp);
613		return (error);
614	}
615
616	return (EROFS);
617}
618
619int
620union_open(void *v)
621{
622	struct vop_open_args /* {
623		struct vnodeop_desc *a_desc;
624		struct vnode *a_vp;
625		int a_mode;
626		kauth_cred_t a_cred;
627	} */ *ap = v;
628	struct union_node *un = VTOUNION(ap->a_vp);
629	struct vnode *tvp;
630	int mode = ap->a_mode;
631	kauth_cred_t cred = ap->a_cred;
632	struct lwp *l = curlwp;
633	int error;
634
635	/*
636	 * If there is an existing upper vp then simply open that.
637	 */
638	tvp = un->un_uppervp;
639	if (tvp == NULLVP) {
640		/*
641		 * If the lower vnode is being opened for writing, then
642		 * copy the file contents to the upper vnode and open that,
643		 * otherwise can simply open the lower vnode.
644		 */
645		tvp = un->un_lowervp;
646		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
647			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, l);
648			if (error == 0)
649				error = VOP_OPEN(un->un_uppervp, mode, cred);
650			if (error == 0) {
651				mutex_enter(un->un_uppervp->v_interlock);
652				un->un_uppervp->v_writecount++;
653				mutex_exit(un->un_uppervp->v_interlock);
654			}
655			return (error);
656		}
657
658		/*
659		 * Just open the lower vnode, but check for nodev mount flag
660		 */
661		if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
662		    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
663			return ENXIO;
664		un->un_openl++;
665		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
666		error = VOP_OPEN(tvp, mode, cred);
667		VOP_UNLOCK(tvp);
668
669		return (error);
670	}
671	/*
672	 * Just open the upper vnode, checking for nodev mount flag first
673	 */
674	if ((tvp->v_type == VBLK || tvp->v_type == VCHR) &&
675	    (ap->a_vp->v_mount->mnt_flag & MNT_NODEV))
676		return ENXIO;
677
678	error = VOP_OPEN(tvp, mode, cred);
679	if (error == 0 && (ap->a_mode & FWRITE)) {
680		mutex_enter(tvp->v_interlock);
681		tvp->v_writecount++;
682		mutex_exit(tvp->v_interlock);
683	}
684
685	return (error);
686}
687
688int
689union_close(void *v)
690{
691	struct vop_close_args /* {
692		struct vnode *a_vp;
693		int  a_fflag;
694		kauth_cred_t a_cred;
695	} */ *ap = v;
696	struct union_node *un = VTOUNION(ap->a_vp);
697	struct vnode *vp;
698	int error;
699	bool do_lock;
700
701	vp = un->un_uppervp;
702	if (vp != NULLVP) {
703		do_lock = false;
704	} else {
705		KASSERT(un->un_openl > 0);
706		--un->un_openl;
707		vp = un->un_lowervp;
708		do_lock = true;
709	}
710
711	KASSERT(vp != NULLVP);
712	ap->a_vp = vp;
713	if ((ap->a_fflag & FWRITE)) {
714		KASSERT(vp == un->un_uppervp);
715		mutex_enter(vp->v_interlock);
716		vp->v_writecount--;
717		mutex_exit(vp->v_interlock);
718	}
719	if (do_lock)
720		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
721	error = VCALL(vp, VOFFSET(vop_close), ap);
722	if (do_lock)
723		VOP_UNLOCK(vp);
724
725	return error;
726}
727
728/*
729 * Check access permission on the union vnode.
730 * The access check being enforced is to check
731 * against both the underlying vnode, and any
732 * copied vnode.  This ensures that no additional
733 * file permissions are given away simply because
734 * the user caused an implicit file copy.
735 */
736int
737union_access(void *v)
738{
739	struct vop_access_args /* {
740		struct vnodeop_desc *a_desc;
741		struct vnode *a_vp;
742		accmode_t a_accmode;
743		kauth_cred_t a_cred;
744	} */ *ap = v;
745	struct vnode *vp = ap->a_vp;
746	struct union_node *un = VTOUNION(vp);
747	int error = EACCES;
748	struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
749
750	/*
751	 * Disallow write attempts on read-only file systems;
752	 * unless the file is a socket, fifo, or a block or
753	 * character device resident on the file system.
754	 */
755	if (ap->a_accmode & VWRITE) {
756		switch (vp->v_type) {
757		case VDIR:
758		case VLNK:
759		case VREG:
760			if (vp->v_mount->mnt_flag & MNT_RDONLY)
761				return (EROFS);
762			break;
763		case VBAD:
764		case VBLK:
765		case VCHR:
766		case VSOCK:
767		case VFIFO:
768		case VNON:
769		default:
770			break;
771		}
772	}
773
774	if (un->un_uppervp == NULLVP &&
775	    (un->un_lowervp->v_type == VREG) &&
776	    (ap->a_accmode & VWRITE)) {
777		error = union_copyup(un, 1, ap->a_cred, curlwp);
778		if (error)
779			return error;
780	}
781
782	if ((vp = un->un_uppervp) != NULLVP) {
783		ap->a_vp = vp;
784		return (VCALL(vp, VOFFSET(vop_access), ap));
785	}
786
787	if ((vp = un->un_lowervp) != NULLVP) {
788		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
789		ap->a_vp = vp;
790		error = VCALL(vp, VOFFSET(vop_access), ap);
791		if (error == 0) {
792			if (um->um_op == UNMNT_BELOW) {
793				ap->a_cred = um->um_cred;
794				error = VCALL(vp, VOFFSET(vop_access), ap);
795			}
796		}
797		VOP_UNLOCK(vp);
798		if (error)
799			return (error);
800	}
801
802	return (error);
803}
804
805/*
806 * We handle getattr only to change the fsid and
807 * track object sizes
808 */
809int
810union_getattr(void *v)
811{
812	struct vop_getattr_args /* {
813		struct vnode *a_vp;
814		struct vattr *a_vap;
815		kauth_cred_t a_cred;
816	} */ *ap = v;
817	int error;
818	struct union_node *un = VTOUNION(ap->a_vp);
819	struct vnode *vp = un->un_uppervp;
820	struct vattr *vap;
821	struct vattr va;
822
823
824	/*
825	 * Some programs walk the filesystem hierarchy by counting
826	 * links to directories to avoid stat'ing all the time.
827	 * This means the link count on directories needs to be "correct".
828	 * The only way to do that is to call getattr on both layers
829	 * and fix up the link count.  The link count will not necessarily
830	 * be accurate but will be large enough to defeat the tree walkers.
831	 *
832	 * To make life more interesting, some filesystems don't keep
833	 * track of link counts in the expected way, and return a
834	 * link count of `1' for those directories; if either of the
835	 * component directories returns a link count of `1', we return a 1.
836	 */
837
838	vap = ap->a_vap;
839
840	vp = un->un_uppervp;
841	if (vp != NULLVP) {
842		error = VOP_GETATTR(vp, vap, ap->a_cred);
843		if (error)
844			return (error);
845		mutex_enter(&un->un_lock);
846		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
847	}
848
849	if (vp == NULLVP) {
850		vp = un->un_lowervp;
851	} else if (vp->v_type == VDIR) {
852		vp = un->un_lowervp;
853		if (vp != NULLVP)
854			vap = &va;
855	} else {
856		vp = NULLVP;
857	}
858
859	if (vp != NULLVP) {
860		if (vp == un->un_lowervp)
861			vn_lock(vp, LK_SHARED | LK_RETRY);
862		error = VOP_GETATTR(vp, vap, ap->a_cred);
863		if (vp == un->un_lowervp)
864			VOP_UNLOCK(vp);
865		if (error)
866			return (error);
867		mutex_enter(&un->un_lock);
868		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
869	}
870
871	if ((vap != ap->a_vap) && (vap->va_type == VDIR)) {
872		/*
873		 * Link count manipulation:
874		 *	- If both return "2", return 2 (no subdirs)
875		 *	- If one or the other return "1", return "1" (ENOCLUE)
876		 */
877		if ((ap->a_vap->va_nlink == 2) &&
878		    (vap->va_nlink == 2))
879			;
880		else if (ap->a_vap->va_nlink != 1) {
881			if (vap->va_nlink == 1)
882				ap->a_vap->va_nlink = 1;
883			else
884				ap->a_vap->va_nlink += vap->va_nlink;
885		}
886	}
887	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
888	return (0);
889}
890
891int
892union_setattr(void *v)
893{
894	struct vop_setattr_args /* {
895		struct vnode *a_vp;
896		struct vattr *a_vap;
897		kauth_cred_t a_cred;
898	} */ *ap = v;
899	struct vattr *vap = ap->a_vap;
900	struct vnode *vp = ap->a_vp;
901	struct union_node *un = VTOUNION(vp);
902	bool size_only;		/* All but va_size are VNOVAL. */
903	int error;
904
905	size_only = (vap->va_flags == VNOVAL && vap->va_uid == (uid_t)VNOVAL &&
906	    vap->va_gid == (gid_t)VNOVAL && vap->va_atime.tv_sec == VNOVAL &&
907	    vap->va_mtime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL);
908
909	if (!size_only && (vp->v_mount->mnt_flag & MNT_RDONLY))
910		return (EROFS);
911	if (vap->va_size != VNOVAL) {
912 		switch (vp->v_type) {
913 		case VDIR:
914 			return (EISDIR);
915 		case VCHR:
916 		case VBLK:
917 		case VSOCK:
918 		case VFIFO:
919			break;
920		case VREG:
921		case VLNK:
922 		default:
923			/*
924			 * Disallow write attempts if the filesystem is
925			 * mounted read-only.
926			 */
927			if (vp->v_mount->mnt_flag & MNT_RDONLY)
928				return (EROFS);
929		}
930	}
931
932	/*
933	 * Handle case of truncating lower object to zero size,
934	 * by creating a zero length upper object.  This is to
935	 * handle the case of open with O_TRUNC and O_CREAT.
936	 */
937	if ((un->un_uppervp == NULLVP) &&
938	    /* assert(un->un_lowervp != NULLVP) */
939	    (un->un_lowervp->v_type == VREG)) {
940		error = union_copyup(un, (vap->va_size != 0),
941						ap->a_cred, curlwp);
942		if (error)
943			return (error);
944	}
945
946	/*
947	 * Try to set attributes in upper layer, ignore size change to zero
948	 * for devices to handle O_TRUNC and return read-only filesystem error
949	 * otherwise.
950	 */
951	if (un->un_uppervp != NULLVP) {
952		error = VOP_SETATTR(un->un_uppervp, vap, ap->a_cred);
953		if ((error == 0) && (vap->va_size != VNOVAL)) {
954			mutex_enter(&un->un_lock);
955			union_newsize(ap->a_vp, vap->va_size, VNOVAL);
956		}
957	} else {
958		KASSERT(un->un_lowervp != NULLVP);
959		if (NODE_IS_SPECIAL(un->un_lowervp)) {
960			if (size_only &&
961			    (vap->va_size == 0 || vap->va_size == VNOVAL))
962				error = 0;
963			else
964				error = EROFS;
965		} else {
966			error = EROFS;
967		}
968	}
969
970	return (error);
971}
972
973int
974union_read(void *v)
975{
976	struct vop_read_args /* {
977		struct vnode *a_vp;
978		struct uio *a_uio;
979		int  a_ioflag;
980		kauth_cred_t a_cred;
981	} */ *ap = v;
982	int error;
983	struct vnode *vp = OTHERVP(ap->a_vp);
984	int dolock = (vp == LOWERVP(ap->a_vp));
985
986	if (dolock)
987		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
988	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
989	if (dolock)
990		VOP_UNLOCK(vp);
991
992	/*
993	 * XXX
994	 * perhaps the size of the underlying object has changed under
995	 * our feet.  take advantage of the offset information present
996	 * in the uio structure.
997	 */
998	if (error == 0) {
999		struct union_node *un = VTOUNION(ap->a_vp);
1000		off_t cur = ap->a_uio->uio_offset;
1001		off_t usz = VNOVAL, lsz = VNOVAL;
1002
1003		mutex_enter(&un->un_lock);
1004		if (vp == un->un_uppervp) {
1005			if (cur > un->un_uppersz)
1006				usz = cur;
1007		} else {
1008			if (cur > un->un_lowersz)
1009				lsz = cur;
1010		}
1011
1012		if (usz != VNOVAL || lsz != VNOVAL)
1013			union_newsize(ap->a_vp, usz, lsz);
1014		else
1015			mutex_exit(&un->un_lock);
1016	}
1017
1018	return (error);
1019}
1020
1021int
1022union_write(void *v)
1023{
1024	struct vop_read_args /* {
1025		struct vnode *a_vp;
1026		struct uio *a_uio;
1027		int  a_ioflag;
1028		kauth_cred_t a_cred;
1029	} */ *ap = v;
1030	int error;
1031	struct vnode *vp;
1032	struct union_node *un = VTOUNION(ap->a_vp);
1033
1034	vp = UPPERVP(ap->a_vp);
1035	if (vp == NULLVP) {
1036		vp = LOWERVP(ap->a_vp);
1037		if (NODE_IS_SPECIAL(vp)) {
1038			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1039			error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag,
1040			    ap->a_cred);
1041			VOP_UNLOCK(vp);
1042			return error;
1043		}
1044		panic("union: missing upper layer in write");
1045	}
1046
1047	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
1048
1049	/*
1050	 * the size of the underlying object may be changed by the
1051	 * write.
1052	 */
1053	if (error == 0) {
1054		off_t cur = ap->a_uio->uio_offset;
1055
1056		mutex_enter(&un->un_lock);
1057		if (cur > un->un_uppersz)
1058			union_newsize(ap->a_vp, cur, VNOVAL);
1059		else
1060			mutex_exit(&un->un_lock);
1061	}
1062
1063	return (error);
1064}
1065
1066int
1067union_ioctl(void *v)
1068{
1069	struct vop_ioctl_args /* {
1070		struct vnode *a_vp;
1071		int  a_command;
1072		void *a_data;
1073		int  a_fflag;
1074		kauth_cred_t a_cred;
1075	} */ *ap = v;
1076	struct vnode *ovp = OTHERVP(ap->a_vp);
1077
1078	ap->a_vp = ovp;
1079	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
1080}
1081
1082int
1083union_poll(void *v)
1084{
1085	struct vop_poll_args /* {
1086		struct vnode *a_vp;
1087		int a_events;
1088	} */ *ap = v;
1089	struct vnode *ovp = OTHERVP(ap->a_vp);
1090
1091	ap->a_vp = ovp;
1092	return (VCALL(ovp, VOFFSET(vop_poll), ap));
1093}
1094
1095int
1096union_revoke(void *v)
1097{
1098	struct vop_revoke_args /* {
1099		struct vnode *a_vp;
1100		int a_flags;
1101		struct proc *a_p;
1102	} */ *ap = v;
1103	struct vnode *vp = ap->a_vp;
1104
1105	if (UPPERVP(vp))
1106		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
1107	if (LOWERVP(vp))
1108		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
1109	vgone(vp);	/* XXXAD?? */
1110	return (0);
1111}
1112
1113int
1114union_mmap(void *v)
1115{
1116	struct vop_mmap_args /* {
1117		struct vnode *a_vp;
1118		vm_prot_t a_prot;
1119		kauth_cred_t a_cred;
1120	} */ *ap = v;
1121	struct vnode *ovp = OTHERVP(ap->a_vp);
1122
1123	ap->a_vp = ovp;
1124	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
1125}
1126
1127int
1128union_fsync(void *v)
1129{
1130	struct vop_fsync_args /* {
1131		struct vnode *a_vp;
1132		kauth_cred_t a_cred;
1133		int  a_flags;
1134		off_t offhi;
1135		off_t offlo;
1136	} */ *ap = v;
1137	int error = 0;
1138	struct vnode *targetvp;
1139
1140	/*
1141	 * If vinvalbuf is calling us, it's a "shallow fsync" -- don't
1142	 * bother syncing the underlying vnodes, since (a) they'll be
1143	 * fsync'ed when reclaimed and (b) we could deadlock if
1144	 * they're locked; otherwise, pass it through to the
1145	 * underlying layer.
1146	 */
1147	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) {
1148		error = spec_fsync(v);
1149		if (error)
1150			return error;
1151	}
1152
1153	if (ap->a_flags & FSYNC_RECLAIM)
1154		return 0;
1155
1156	targetvp = OTHERVP(ap->a_vp);
1157	if (targetvp != NULLVP) {
1158		int dolock = (targetvp == LOWERVP(ap->a_vp));
1159
1160		if (dolock)
1161			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY);
1162		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_flags,
1163			    ap->a_offlo, ap->a_offhi);
1164		if (dolock)
1165			VOP_UNLOCK(targetvp);
1166	}
1167
1168	return (error);
1169}
1170
1171int
1172union_seek(void *v)
1173{
1174	struct vop_seek_args /* {
1175		struct vnode *a_vp;
1176		off_t  a_oldoff;
1177		off_t  a_newoff;
1178		kauth_cred_t a_cred;
1179	} */ *ap = v;
1180	struct vnode *ovp = OTHERVP(ap->a_vp);
1181
1182	ap->a_vp = ovp;
1183	return (VCALL(ovp, VOFFSET(vop_seek), ap));
1184}
1185
1186int
1187union_remove(void *v)
1188{
1189	struct vop_remove_v3_args /* {
1190		struct vnode *a_dvp;
1191		struct vnode *a_vp;
1192		struct componentname *a_cnp;
1193		nlink_t ctx_vp_new_nlink;
1194	} */ *ap = v;
1195	int error;
1196	struct union_node *dun = VTOUNION(ap->a_dvp);
1197	struct union_node *un = VTOUNION(ap->a_vp);
1198	struct componentname *cnp = ap->a_cnp;
1199
1200	if (dun->un_uppervp == NULLVP)
1201		panic("union remove: null upper vnode");
1202
1203	if (un->un_uppervp != NULLVP) {
1204		struct vnode *dvp = dun->un_uppervp;
1205		struct vnode *vp = un->un_uppervp;
1206
1207		/* Account for VOP_REMOVE to vrele vp.  */
1208		vref(vp);
1209		if (union_dowhiteout(un, cnp->cn_cred))
1210			cnp->cn_flags |= DOWHITEOUT;
1211		error = VOP_REMOVE(dvp, vp, cnp);
1212		if (!error)
1213			union_removed_upper(un);
1214		vrele(ap->a_vp);
1215	} else {
1216		error = union_mkwhiteout(
1217			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1218			dun->un_uppervp, ap->a_cnp, un);
1219		vput(ap->a_vp);
1220	}
1221
1222	return (error);
1223}
1224
1225int
1226union_link(void *v)
1227{
1228	struct vop_link_v2_args /* {
1229		struct vnode *a_dvp;
1230		struct vnode *a_vp;
1231		struct componentname *a_cnp;
1232	} */ *ap = v;
1233	int error = 0;
1234	struct componentname *cnp = ap->a_cnp;
1235	struct union_node *dun;
1236	struct vnode *vp;
1237	struct vnode *dvp;
1238
1239	dun = VTOUNION(ap->a_dvp);
1240
1241	KASSERT((ap->a_cnp->cn_flags & LOCKPARENT) != 0);
1242
1243	if (ap->a_dvp->v_op != ap->a_vp->v_op) {
1244		vp = ap->a_vp;
1245	} else {
1246		struct union_node *un = VTOUNION(ap->a_vp);
1247		if (un->un_uppervp == NULLVP) {
1248			const bool droplock = (dun->un_uppervp == un->un_dirvp);
1249
1250			/*
1251			 * Needs to be copied before we can link it.
1252			 */
1253			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
1254			if (droplock)
1255				VOP_UNLOCK(dun->un_uppervp);
1256			error = union_copyup(un, 1, cnp->cn_cred, curlwp);
1257			if (droplock) {
1258				vn_lock(dun->un_uppervp,
1259				    LK_EXCLUSIVE | LK_RETRY);
1260				/*
1261				 * During copyup, we dropped the lock on the
1262				 * dir and invalidated any saved namei lookup
1263				 * state for the directory we'll be entering
1264				 * the link in.  We need to re-run the lookup
1265				 * in that directory to reset any state needed
1266				 * for VOP_LINK.
1267				 * Call relookup on the union-layer to reset
1268				 * the state.
1269				 */
1270				vp  = NULLVP;
1271				if (dun->un_uppervp == NULLVP)
1272					 panic("union: null upperdvp?");
1273				error = relookup(ap->a_dvp, &vp, ap->a_cnp, 0);
1274				if (error) {
1275					VOP_UNLOCK(ap->a_vp);
1276					return EROFS;	/* ? */
1277				}
1278				if (vp != NULLVP) {
1279					/*
1280					 * The name we want to create has
1281					 * mysteriously appeared (a race?)
1282					 */
1283					error = EEXIST;
1284					VOP_UNLOCK(ap->a_vp);
1285					vput(vp);
1286					return (error);
1287				}
1288			}
1289			VOP_UNLOCK(ap->a_vp);
1290		}
1291		vp = un->un_uppervp;
1292	}
1293
1294	dvp = dun->un_uppervp;
1295	if (dvp == NULLVP)
1296		error = EROFS;
1297
1298	if (error)
1299		return (error);
1300
1301	return VOP_LINK(dvp, vp, cnp);
1302}
1303
1304int
1305union_rename(void *v)
1306{
1307	struct vop_rename_args /* {
1308		struct vnode *a_fdvp;
1309		struct vnode *a_fvp;
1310		struct componentname *a_fcnp;
1311		struct vnode *a_tdvp;
1312		struct vnode *a_tvp;
1313		struct componentname *a_tcnp;
1314	} */ *ap = v;
1315	int error;
1316
1317	struct vnode *fdvp = ap->a_fdvp;
1318	struct vnode *fvp = ap->a_fvp;
1319	struct vnode *tdvp = ap->a_tdvp;
1320	struct vnode *tvp = ap->a_tvp;
1321
1322	/*
1323	 * Account for VOP_RENAME to vrele all nodes.
1324	 * Note: VOP_RENAME will unlock tdvp.
1325	 */
1326
1327	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
1328		struct union_node *un = VTOUNION(fdvp);
1329		if (un->un_uppervp == NULLVP) {
1330			/*
1331			 * this should never happen in normal
1332			 * operation but might if there was
1333			 * a problem creating the top-level shadow
1334			 * directory.
1335			 */
1336			error = EXDEV;
1337			goto bad;
1338		}
1339
1340		fdvp = un->un_uppervp;
1341		vref(fdvp);
1342	}
1343
1344	if (fvp->v_op == union_vnodeop_p) {	/* always true */
1345		struct union_node *un = VTOUNION(fvp);
1346		if (un->un_uppervp == NULLVP) {
1347			/* XXX: should do a copyup */
1348			error = EXDEV;
1349			goto bad;
1350		}
1351
1352		if (un->un_lowervp != NULLVP)
1353			ap->a_fcnp->cn_flags |= DOWHITEOUT;
1354
1355		fvp = un->un_uppervp;
1356		vref(fvp);
1357	}
1358
1359	if (tdvp->v_op == union_vnodeop_p) {
1360		struct union_node *un = VTOUNION(tdvp);
1361		if (un->un_uppervp == NULLVP) {
1362			/*
1363			 * this should never happen in normal
1364			 * operation but might if there was
1365			 * a problem creating the top-level shadow
1366			 * directory.
1367			 */
1368			error = EXDEV;
1369			goto bad;
1370		}
1371
1372		tdvp = un->un_uppervp;
1373		vref(tdvp);
1374	}
1375
1376	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
1377		struct union_node *un = VTOUNION(tvp);
1378
1379		tvp = un->un_uppervp;
1380		if (tvp != NULLVP) {
1381			vref(tvp);
1382		}
1383	}
1384
1385	error = VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp);
1386	goto out;
1387
1388bad:
1389	vput(tdvp);
1390	if (tvp != NULLVP)
1391		vput(tvp);
1392	vrele(fdvp);
1393	vrele(fvp);
1394
1395out:
1396	if (fdvp != ap->a_fdvp) {
1397		vrele(ap->a_fdvp);
1398	}
1399	if (fvp != ap->a_fvp) {
1400		vrele(ap->a_fvp);
1401	}
1402	if (tdvp != ap->a_tdvp) {
1403		vrele(ap->a_tdvp);
1404	}
1405	if (tvp != ap->a_tvp) {
1406		vrele(ap->a_tvp);
1407	}
1408	return (error);
1409}
1410
1411int
1412union_mkdir(void *v)
1413{
1414	struct vop_mkdir_v3_args /* {
1415		struct vnode *a_dvp;
1416		struct vnode **a_vpp;
1417		struct componentname *a_cnp;
1418		struct vattr *a_vap;
1419	} */ *ap = v;
1420	struct union_node *un = VTOUNION(ap->a_dvp);
1421	struct vnode *dvp = un->un_uppervp;
1422	struct componentname *cnp = ap->a_cnp;
1423
1424	if (dvp != NULLVP) {
1425		int error;
1426		struct vnode *vp;
1427
1428		vp = NULL;
1429		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
1430		if (error) {
1431			vrele(ap->a_dvp);
1432			return (error);
1433		}
1434
1435		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
1436				NULLVP, cnp, vp, NULLVP, 1);
1437		if (error)
1438			vrele(vp);
1439		return (error);
1440	}
1441
1442	return (EROFS);
1443}
1444
1445int
1446union_rmdir(void *v)
1447{
1448	struct vop_rmdir_v2_args /* {
1449		struct vnode *a_dvp;
1450		struct vnode *a_vp;
1451		struct componentname *a_cnp;
1452	} */ *ap = v;
1453	int error;
1454	struct union_node *dun = VTOUNION(ap->a_dvp);
1455	struct union_node *un = VTOUNION(ap->a_vp);
1456	struct componentname *cnp = ap->a_cnp;
1457
1458	if (dun->un_uppervp == NULLVP)
1459		panic("union rmdir: null upper vnode");
1460
1461	error = union_check_rmdir(un, cnp->cn_cred);
1462	if (error) {
1463		vput(ap->a_vp);
1464		return error;
1465	}
1466
1467	if (un->un_uppervp != NULLVP) {
1468		struct vnode *dvp = dun->un_uppervp;
1469		struct vnode *vp = un->un_uppervp;
1470
1471		/* Account for VOP_RMDIR to vrele vp.  */
1472		vref(vp);
1473		if (union_dowhiteout(un, cnp->cn_cred))
1474			cnp->cn_flags |= DOWHITEOUT;
1475		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
1476		if (!error)
1477			union_removed_upper(un);
1478		vrele(ap->a_vp);
1479	} else {
1480		error = union_mkwhiteout(
1481			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
1482			dun->un_uppervp, ap->a_cnp, un);
1483		vput(ap->a_vp);
1484	}
1485
1486	return (error);
1487}
1488
1489int
1490union_symlink(void *v)
1491{
1492	struct vop_symlink_v3_args /* {
1493		struct vnode *a_dvp;
1494		struct vnode **a_vpp;
1495		struct componentname *a_cnp;
1496		struct vattr *a_vap;
1497		char *a_target;
1498	} */ *ap = v;
1499	struct union_node *un = VTOUNION(ap->a_dvp);
1500	struct vnode *dvp = un->un_uppervp;
1501	struct componentname *cnp = ap->a_cnp;
1502
1503	if (dvp != NULLVP) {
1504		int error;
1505
1506		error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap,
1507				    ap->a_target);
1508		return (error);
1509	}
1510
1511	return (EROFS);
1512}
1513
1514/*
1515 * union_readdir works in concert with getdirentries and
1516 * readdir(3) to provide a list of entries in the unioned
1517 * directories.  getdirentries is responsible for walking
1518 * down the union stack.  readdir(3) is responsible for
1519 * eliminating duplicate names from the returned data stream.
1520 */
1521int
1522union_readdir(void *v)
1523{
1524	struct vop_readdir_args /* {
1525		struct vnodeop_desc *a_desc;
1526		struct vnode *a_vp;
1527		struct uio *a_uio;
1528		kauth_cred_t a_cred;
1529		int *a_eofflag;
1530		u_long *a_cookies;
1531		int a_ncookies;
1532	} */ *ap = v;
1533	struct union_node *un = VTOUNION(ap->a_vp);
1534	struct vnode *vp;
1535	int dolock, error;
1536
1537	if (un->un_hooknode) {
1538		KASSERT(un->un_uppervp == NULLVP);
1539		KASSERT(un->un_lowervp != NULLVP);
1540		vp = un->un_lowervp;
1541		dolock = 1;
1542	} else {
1543		vp = un->un_uppervp;
1544		dolock = 0;
1545	}
1546	if (vp == NULLVP)
1547		return 0;
1548
1549	if (dolock)
1550		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1551	ap->a_vp = vp;
1552	error = VCALL(vp, VOFFSET(vop_readdir), ap);
1553	if (dolock)
1554		VOP_UNLOCK(vp);
1555
1556	return error;
1557}
1558
1559int
1560union_readlink(void *v)
1561{
1562	struct vop_readlink_args /* {
1563		struct vnode *a_vp;
1564		struct uio *a_uio;
1565		kauth_cred_t a_cred;
1566	} */ *ap = v;
1567	int error;
1568	struct vnode *vp = OTHERVP(ap->a_vp);
1569	int dolock = (vp == LOWERVP(ap->a_vp));
1570
1571	if (dolock)
1572		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1573	ap->a_vp = vp;
1574	error = VCALL(vp, VOFFSET(vop_readlink), ap);
1575	if (dolock)
1576		VOP_UNLOCK(vp);
1577
1578	return (error);
1579}
1580
1581int
1582union_abortop(void *v)
1583{
1584	struct vop_abortop_args /* {
1585		struct vnode *a_dvp;
1586		struct componentname *a_cnp;
1587	} */ *ap = v;
1588
1589	KASSERT(UPPERVP(ap->a_dvp) != NULL);
1590
1591	ap->a_dvp = UPPERVP(ap->a_dvp);
1592	return VCALL(ap->a_dvp, VOFFSET(vop_abortop), ap);
1593}
1594
1595int
1596union_inactive(void *v)
1597{
1598	struct vop_inactive_v2_args /* {
1599		const struct vnodeop_desc *a_desc;
1600		struct vnode *a_vp;
1601		bool *a_recycle;
1602	} */ *ap = v;
1603	struct vnode *vp = ap->a_vp;
1604	struct union_node *un = VTOUNION(vp);
1605	struct vnode **vpp;
1606
1607	/*
1608	 * Do nothing (and _don't_ bypass).
1609	 * Wait to vrele lowervp until reclaim,
1610	 * so that until then our union_node is in the
1611	 * cache and reusable.
1612	 *
1613	 * NEEDSWORK: Someday, consider inactive'ing
1614	 * the lowervp and then trying to reactivate it
1615	 * with capabilities (v_id)
1616	 * like they do in the name lookup cache code.
1617	 * That's too much work for now.
1618	 */
1619
1620	if (un->un_dircache != 0) {
1621		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
1622			vrele(*vpp);
1623		free(un->un_dircache, M_TEMP);
1624		un->un_dircache = 0;
1625	}
1626
1627	*ap->a_recycle = ((un->un_cflags & UN_CACHED) == 0);
1628
1629	return (0);
1630}
1631
1632int
1633union_reclaim(void *v)
1634{
1635	struct vop_reclaim_v2_args /* {
1636		struct vnode *a_vp;
1637	} */ *ap = v;
1638	struct vnode *vp = ap->a_vp;
1639	struct vnode *uvp = UPPERVP(vp);
1640
1641	VOP_UNLOCK(vp);
1642
1643	if (uvp != NULL) {
1644		mutex_enter(uvp->v_interlock);
1645		KASSERT(vp->v_interlock == uvp->v_interlock);
1646		uvp->v_writecount -= vp->v_writecount;
1647		mutex_exit(uvp->v_interlock);
1648	}
1649
1650	union_freevp(vp);
1651
1652	return (0);
1653}
1654
1655static int
1656union_lock1(struct vnode *vp, struct vnode *lockvp, int flags)
1657{
1658	struct vop_lock_args ap;
1659
1660	ap.a_desc = VDESC(vop_lock);
1661	ap.a_vp = lockvp;
1662	ap.a_flags = flags;
1663
1664	if (lockvp == vp)
1665		return genfs_lock(&ap);
1666	else
1667		return VCALL(ap.a_vp, VOFFSET(vop_lock), &ap);
1668}
1669
1670static int
1671union_unlock1(struct vnode *vp, struct vnode *lockvp)
1672{
1673	struct vop_unlock_args ap;
1674
1675	ap.a_desc = VDESC(vop_unlock);
1676	ap.a_vp = lockvp;
1677
1678	if (lockvp == vp)
1679		return genfs_unlock(&ap);
1680	else
1681		return VCALL(ap.a_vp, VOFFSET(vop_unlock), &ap);
1682}
1683
1684int
1685union_lock(void *v)
1686{
1687	struct vop_lock_args /* {
1688		struct vnode *a_vp;
1689		int a_flags;
1690	} */ *ap = v;
1691	struct vnode *vp = ap->a_vp, *lockvp;
1692	struct union_node *un = VTOUNION(vp);
1693	int flags = ap->a_flags;
1694	int error;
1695
1696	if ((flags & LK_NOWAIT) != 0) {
1697		if (!mutex_tryenter(&un->un_lock))
1698			return EBUSY;
1699		lockvp = LOCKVP(vp);
1700		error = union_lock1(vp, lockvp, flags);
1701		mutex_exit(&un->un_lock);
1702		if (error)
1703			return error;
1704		if (mutex_tryenter(vp->v_interlock)) {
1705			error = vdead_check(vp, VDEAD_NOWAIT);
1706			mutex_exit(vp->v_interlock);
1707		} else
1708			error = EBUSY;
1709		if (error)
1710			union_unlock1(vp, lockvp);
1711		return error;
1712	}
1713
1714	mutex_enter(&un->un_lock);
1715	for (;;) {
1716		lockvp = LOCKVP(vp);
1717		mutex_exit(&un->un_lock);
1718		error = union_lock1(vp, lockvp, flags);
1719		if (error != 0)
1720			return error;
1721		mutex_enter(&un->un_lock);
1722		if (lockvp == LOCKVP(vp))
1723			break;
1724		union_unlock1(vp, lockvp);
1725	}
1726	mutex_exit(&un->un_lock);
1727
1728	mutex_enter(vp->v_interlock);
1729	error = vdead_check(vp, VDEAD_NOWAIT);
1730	if (error) {
1731		union_unlock1(vp, lockvp);
1732		error = vdead_check(vp, 0);
1733		KASSERT(error == ENOENT);
1734	}
1735	mutex_exit(vp->v_interlock);
1736	return error;
1737}
1738
1739int
1740union_unlock(void *v)
1741{
1742	struct vop_unlock_args /* {
1743		struct vnode *a_vp;
1744		int a_flags;
1745	} */ *ap = v;
1746	struct vnode *vp = ap->a_vp, *lockvp;
1747
1748	lockvp = LOCKVP(vp);
1749	union_unlock1(vp, lockvp);
1750
1751	return 0;
1752}
1753
1754int
1755union_bmap(void *v)
1756{
1757	struct vop_bmap_args /* {
1758		struct vnode *a_vp;
1759		daddr_t  a_bn;
1760		struct vnode **a_vpp;
1761		daddr_t *a_bnp;
1762		int *a_runp;
1763	} */ *ap = v;
1764	int error;
1765	struct vnode *vp = OTHERVP(ap->a_vp);
1766	int dolock = (vp == LOWERVP(ap->a_vp));
1767
1768	if (dolock)
1769		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1770	ap->a_vp = vp;
1771	error = VCALL(vp, VOFFSET(vop_bmap), ap);
1772	if (dolock)
1773		VOP_UNLOCK(vp);
1774
1775	return (error);
1776}
1777
1778int
1779union_print(void *v)
1780{
1781	struct vop_print_args /* {
1782		struct vnode *a_vp;
1783	} */ *ap = v;
1784	struct vnode *vp = ap->a_vp;
1785
1786	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
1787			vp, UPPERVP(vp), LOWERVP(vp));
1788	if (UPPERVP(vp) != NULLVP)
1789		vprint("union: upper", UPPERVP(vp));
1790	if (LOWERVP(vp) != NULLVP)
1791		vprint("union: lower", LOWERVP(vp));
1792	if (VTOUNION(vp)->un_dircache) {
1793		struct vnode **vpp;
1794		for (vpp = VTOUNION(vp)->un_dircache; *vpp != NULLVP; vpp++)
1795			vprint("dircache:", *vpp);
1796	}
1797
1798	return (0);
1799}
1800
1801int
1802union_islocked(void *v)
1803{
1804	struct vop_islocked_args /* {
1805		struct vnode *a_vp;
1806	} */ *ap = v;
1807	struct vnode *vp;
1808	struct union_node *un;
1809
1810	un = VTOUNION(ap->a_vp);
1811	mutex_enter(&un->un_lock);
1812	vp = LOCKVP(ap->a_vp);
1813	mutex_exit(&un->un_lock);
1814
1815	if (vp == ap->a_vp)
1816		return genfs_islocked(ap);
1817	else
1818		return VOP_ISLOCKED(vp);
1819}
1820
1821int
1822union_pathconf(void *v)
1823{
1824	struct vop_pathconf_args /* {
1825		struct vnode *a_vp;
1826		int a_name;
1827		int *a_retval;
1828	} */ *ap = v;
1829	int error;
1830	struct vnode *vp = OTHERVP(ap->a_vp);
1831	int dolock = (vp == LOWERVP(ap->a_vp));
1832
1833	if (dolock)
1834		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1835	ap->a_vp = vp;
1836	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
1837	if (dolock)
1838		VOP_UNLOCK(vp);
1839
1840	return (error);
1841}
1842
1843int
1844union_advlock(void *v)
1845{
1846	struct vop_advlock_args /* {
1847		struct vnode *a_vp;
1848		void *a_id;
1849		int  a_op;
1850		struct flock *a_fl;
1851		int  a_flags;
1852	} */ *ap = v;
1853	struct vnode *ovp = OTHERVP(ap->a_vp);
1854
1855	ap->a_vp = ovp;
1856	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
1857}
1858
1859int
1860union_strategy(void *v)
1861{
1862	struct vop_strategy_args /* {
1863		struct vnode *a_vp;
1864		struct buf *a_bp;
1865	} */ *ap = v;
1866	struct vnode *ovp = OTHERVP(ap->a_vp);
1867	struct buf *bp = ap->a_bp;
1868
1869	KASSERT(ovp != NULLVP);
1870	if (!NODE_IS_SPECIAL(ovp))
1871		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1872
1873	return (VOP_STRATEGY(ovp, bp));
1874}
1875
1876int
1877union_bwrite(void *v)
1878{
1879	struct vop_bwrite_args /* {
1880		struct vnode *a_vp;
1881		struct buf *a_bp;
1882	} */ *ap = v;
1883	struct vnode *ovp = OTHERVP(ap->a_vp);
1884	struct buf *bp = ap->a_bp;
1885
1886	KASSERT(ovp != NULLVP);
1887	if (!NODE_IS_SPECIAL(ovp))
1888		KASSERT((bp->b_flags & B_READ) || ovp != LOWERVP(bp->b_vp));
1889
1890	return (VOP_BWRITE(ovp, bp));
1891}
1892
1893int
1894union_getpages(void *v)
1895{
1896	struct vop_getpages_args /* {
1897		struct vnode *a_vp;
1898		voff_t a_offset;
1899		struct vm_page **a_m;
1900		int *a_count;
1901		int a_centeridx;
1902		vm_prot_t a_access_type;
1903		int a_advice;
1904		int a_flags;
1905	} */ *ap = v;
1906	struct vnode *vp = ap->a_vp;
1907
1908	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1909
1910	if (ap->a_flags & PGO_LOCKED) {
1911		return EBUSY;
1912	}
1913	ap->a_vp = OTHERVP(vp);
1914	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1915
1916	/* Just pass the request on to the underlying layer. */
1917	return VCALL(ap->a_vp, VOFFSET(vop_getpages), ap);
1918}
1919
1920int
1921union_putpages(void *v)
1922{
1923	struct vop_putpages_args /* {
1924		struct vnode *a_vp;
1925		voff_t a_offlo;
1926		voff_t a_offhi;
1927		int a_flags;
1928	} */ *ap = v;
1929	struct vnode *vp = ap->a_vp;
1930
1931	KASSERT(rw_lock_held(vp->v_uobj.vmobjlock));
1932
1933	ap->a_vp = OTHERVP(vp);
1934	KASSERT(vp->v_uobj.vmobjlock == ap->a_vp->v_uobj.vmobjlock);
1935
1936	if (ap->a_flags & PGO_RECLAIM) {
1937		rw_exit(vp->v_uobj.vmobjlock);
1938		return 0;
1939	}
1940
1941	/* Just pass the request on to the underlying layer. */
1942	return VCALL(ap->a_vp, VOFFSET(vop_putpages), ap);
1943}
1944
1945int
1946union_kqfilter(void *v)
1947{
1948	struct vop_kqfilter_args /* {
1949		struct vnode	*a_vp;
1950		struct knote	*a_kn;
1951	} */ *ap = v;
1952	int error;
1953
1954	/*
1955	 * We watch either the upper layer file (if it already exists),
1956	 * or the lower layer one. If there is lower layer file only
1957	 * at this moment, we will keep watching that lower layer file
1958	 * even if upper layer file would be created later on.
1959	 */
1960	if (UPPERVP(ap->a_vp))
1961		error = VOP_KQFILTER(UPPERVP(ap->a_vp), ap->a_kn);
1962	else if (LOWERVP(ap->a_vp))
1963		error = VOP_KQFILTER(LOWERVP(ap->a_vp), ap->a_kn);
1964	else {
1965		/* panic? */
1966		error = EOPNOTSUPP;
1967	}
1968
1969	return (error);
1970}
1971