union_subr.c revision 330897
1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1994 Jan-Simon Pendry
5 * Copyright (c) 1994
6 *	The Regents of the University of California.  All rights reserved.
7 * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
8 * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
9 *
10 * This code is derived from software contributed to Berkeley by
11 * Jan-Simon Pendry.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 *    may be used to endorse or promote products derived from this software
23 *    without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
38 * $FreeBSD: stable/11/sys/fs/unionfs/union_subr.c 330897 2018-03-14 03:19:51Z eadler $
39 */
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/malloc.h>
47#include <sys/mount.h>
48#include <sys/namei.h>
49#include <sys/proc.h>
50#include <sys/vnode.h>
51#include <sys/dirent.h>
52#include <sys/fcntl.h>
53#include <sys/filedesc.h>
54#include <sys/stat.h>
55#include <sys/resourcevar.h>
56
57#include <security/mac/mac_framework.h>
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63#define NUNIONFSNODECACHE 16
64
65static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68
69/*
70 * Initialize
71 */
72int
73unionfs_init(struct vfsconf *vfsp)
74{
75	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76	return (0);
77}
78
79/*
80 * Uninitialize
81 */
82int
83unionfs_uninit(struct vfsconf *vfsp)
84{
85	return (0);
86}
87
88static struct unionfs_node_hashhead *
89unionfs_get_hashhead(struct vnode *dvp, char *path)
90{
91	int		count;
92	char		hash;
93	struct unionfs_node *unp;
94
95	hash = 0;
96	unp = VTOUNIONFS(dvp);
97	if (path != NULL) {
98		for (count = 0; path[count]; count++)
99			hash += path[count];
100	}
101
102	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103}
104
105/*
106 * Get the cached vnode.
107 */
108static struct vnode *
109unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110			struct vnode *dvp, char *path)
111{
112	struct unionfs_node_hashhead *hd;
113	struct unionfs_node *unp;
114	struct vnode   *vp;
115
116	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
119	    ("unionfs_get_cached_vnode: v_type != VDIR"));
120
121	VI_LOCK(dvp);
122	hd = unionfs_get_hashhead(dvp, path);
123	LIST_FOREACH(unp, hd, un_hash) {
124		if (!strcmp(unp->un_path, path)) {
125			vp = UNIONFSTOV(unp);
126			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127			VI_UNLOCK(dvp);
128			vp->v_iflag &= ~VI_OWEINACT;
129			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130				VI_UNLOCK(vp);
131				vp = NULLVP;
132			} else
133				VI_UNLOCK(vp);
134			return (vp);
135		}
136	}
137	VI_UNLOCK(dvp);
138
139	return (NULLVP);
140}
141
142/*
143 * Add the new vnode into cache.
144 */
145static struct vnode *
146unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147			struct vnode *dvp, char *path)
148{
149	struct unionfs_node_hashhead *hd;
150	struct unionfs_node *unp;
151	struct vnode   *vp;
152
153	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
156	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
157
158	VI_LOCK(dvp);
159	hd = unionfs_get_hashhead(dvp, path);
160	LIST_FOREACH(unp, hd, un_hash) {
161		if (!strcmp(unp->un_path, path)) {
162			vp = UNIONFSTOV(unp);
163			VI_LOCK_FLAGS(vp, MTX_DUPOK);
164			vp->v_iflag &= ~VI_OWEINACT;
165			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
166				LIST_INSERT_HEAD(hd, uncp, un_hash);
167				VI_UNLOCK(vp);
168				vp = NULLVP;
169			} else
170				VI_UNLOCK(vp);
171			VI_UNLOCK(dvp);
172			return (vp);
173		}
174	}
175
176	LIST_INSERT_HEAD(hd, uncp, un_hash);
177	VI_UNLOCK(dvp);
178
179	return (NULLVP);
180}
181
182/*
183 * Remove the vnode.
184 */
185static void
186unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
187{
188	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
189	KASSERT((dvp != NULLVP),
190	    ("unionfs_rem_cached_vnode: null parent vnode"));
191	KASSERT((unp->un_hash.le_prev != NULL),
192	    ("unionfs_rem_cached_vnode: null hash"));
193
194	VI_LOCK(dvp);
195	LIST_REMOVE(unp, un_hash);
196	unp->un_hash.le_next = NULL;
197	unp->un_hash.le_prev = NULL;
198	VI_UNLOCK(dvp);
199}
200
201/*
202 * Make a new or get existing unionfs node.
203 *
204 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
205 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
206 * you should not lock plurality simultaneously.
207 */
208int
209unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
210		struct vnode *lowervp, struct vnode *dvp,
211		struct vnode **vpp, struct componentname *cnp,
212		struct thread *td)
213{
214	struct unionfs_mount *ump;
215	struct unionfs_node *unp;
216	struct vnode   *vp;
217	int		error;
218	int		lkflags;
219	enum vtype	vt;
220	char	       *path;
221
222	ump = MOUNTTOUNIONFSMOUNT(mp);
223	lkflags = (cnp ? cnp->cn_lkflags : 0);
224	path = (cnp ? cnp->cn_nameptr : NULL);
225	*vpp = NULLVP;
226
227	if (uppervp == NULLVP && lowervp == NULLVP)
228		panic("unionfs_nodeget: upper and lower is null");
229
230	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
231
232	/* If it has no ISLASTCN flag, path check is skipped. */
233	if (cnp && !(cnp->cn_flags & ISLASTCN))
234		path = NULL;
235
236	/* check the cache */
237	if (path != NULL && dvp != NULLVP && vt == VDIR) {
238		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
239		if (vp != NULLVP) {
240			vref(vp);
241			*vpp = vp;
242			goto unionfs_nodeget_out;
243		}
244	}
245
246	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
247	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
248		/* dvp will be NULLVP only in case of root vnode. */
249		if (dvp == NULLVP)
250			return (EINVAL);
251	}
252	unp = malloc(sizeof(struct unionfs_node),
253	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
254
255	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
256	if (error != 0) {
257		free(unp, M_UNIONFSNODE);
258		return (error);
259	}
260	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
261	if (error != 0) {
262		free(unp, M_UNIONFSNODE);
263		return (error);
264	}
265	if (dvp != NULLVP)
266		vref(dvp);
267	if (uppervp != NULLVP)
268		vref(uppervp);
269	if (lowervp != NULLVP)
270		vref(lowervp);
271
272	if (vt == VDIR)
273		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
274		    &(unp->un_hashmask));
275
276	unp->un_vnode = vp;
277	unp->un_uppervp = uppervp;
278	unp->un_lowervp = lowervp;
279	unp->un_dvp = dvp;
280	if (uppervp != NULLVP)
281		vp->v_vnlock = uppervp->v_vnlock;
282	else
283		vp->v_vnlock = lowervp->v_vnlock;
284
285	if (path != NULL) {
286		unp->un_path = (char *)
287		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
288		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
289		unp->un_path[cnp->cn_namelen] = '\0';
290	}
291	vp->v_type = vt;
292	vp->v_data = unp;
293
294	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
295	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
296		vp->v_vflag |= VV_ROOT;
297
298	if (path != NULL && dvp != NULLVP && vt == VDIR)
299		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
300	if ((*vpp) != NULLVP) {
301		if (dvp != NULLVP)
302			vrele(dvp);
303		if (uppervp != NULLVP)
304			vrele(uppervp);
305		if (lowervp != NULLVP)
306			vrele(lowervp);
307
308		unp->un_uppervp = NULLVP;
309		unp->un_lowervp = NULLVP;
310		unp->un_dvp = NULLVP;
311		vrele(vp);
312		vp = *vpp;
313		vref(vp);
314	} else
315		*vpp = vp;
316
317unionfs_nodeget_out:
318	if (lkflags & LK_TYPE_MASK)
319		vn_lock(vp, lkflags | LK_RETRY);
320
321	return (0);
322}
323
324/*
325 * Clean up the unionfs node.
326 */
327void
328unionfs_noderem(struct vnode *vp, struct thread *td)
329{
330	int		count;
331	struct unionfs_node *unp, *unp_t1, *unp_t2;
332	struct unionfs_node_hashhead *hd;
333	struct unionfs_node_status *unsp, *unsp_tmp;
334	struct vnode   *lvp;
335	struct vnode   *uvp;
336	struct vnode   *dvp;
337
338	/*
339	 * Use the interlock to protect the clearing of v_data to
340	 * prevent faults in unionfs_lock().
341	 */
342	VI_LOCK(vp);
343	unp = VTOUNIONFS(vp);
344	lvp = unp->un_lowervp;
345	uvp = unp->un_uppervp;
346	dvp = unp->un_dvp;
347	unp->un_lowervp = unp->un_uppervp = NULLVP;
348	vp->v_vnlock = &(vp->v_lock);
349	vp->v_data = NULL;
350	vp->v_object = NULL;
351	VI_UNLOCK(vp);
352
353	if (lvp != NULLVP)
354		VOP_UNLOCK(lvp, LK_RELEASE);
355	if (uvp != NULLVP)
356		VOP_UNLOCK(uvp, LK_RELEASE);
357
358	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
359		unionfs_rem_cached_vnode(unp, dvp);
360
361	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
362		panic("the lock for deletion is unacquirable.");
363
364	if (lvp != NULLVP)
365		vrele(lvp);
366	if (uvp != NULLVP)
367		vrele(uvp);
368	if (dvp != NULLVP) {
369		vrele(dvp);
370		unp->un_dvp = NULLVP;
371	}
372	if (unp->un_path != NULL) {
373		free(unp->un_path, M_UNIONFSPATH);
374		unp->un_path = NULL;
375	}
376
377	if (unp->un_hashtbl != NULL) {
378		for (count = 0; count <= unp->un_hashmask; count++) {
379			hd = unp->un_hashtbl + count;
380			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
381				LIST_REMOVE(unp_t1, un_hash);
382				unp_t1->un_hash.le_next = NULL;
383				unp_t1->un_hash.le_prev = NULL;
384			}
385		}
386		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
387	}
388
389	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
390		LIST_REMOVE(unsp, uns_list);
391		free(unsp, M_TEMP);
392	}
393	free(unp, M_UNIONFSNODE);
394}
395
396/*
397 * Get the unionfs node status.
398 * You need exclusive lock this vnode.
399 */
400void
401unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
402			struct unionfs_node_status **unspp)
403{
404	struct unionfs_node_status *unsp;
405	pid_t pid = td->td_proc->p_pid;
406
407	KASSERT(NULL != unspp, ("null pointer"));
408	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
409
410	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
411		if (unsp->uns_pid == pid) {
412			*unspp = unsp;
413			return;
414		}
415	}
416
417	/* create a new unionfs node status */
418	unsp = malloc(sizeof(struct unionfs_node_status),
419	    M_TEMP, M_WAITOK | M_ZERO);
420
421	unsp->uns_pid = pid;
422	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
423
424	*unspp = unsp;
425}
426
427/*
428 * Remove the unionfs node status, if you can.
429 * You need exclusive lock this vnode.
430 */
431void
432unionfs_tryrem_node_status(struct unionfs_node *unp,
433			   struct unionfs_node_status *unsp)
434{
435	KASSERT(NULL != unsp, ("null pointer"));
436	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
437
438	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
439		return;
440
441	LIST_REMOVE(unsp, uns_list);
442	free(unsp, M_TEMP);
443}
444
445/*
446 * Create upper node attr.
447 */
448void
449unionfs_create_uppervattr_core(struct unionfs_mount *ump,
450			       struct vattr *lva,
451			       struct vattr *uva,
452			       struct thread *td)
453{
454	VATTR_NULL(uva);
455	uva->va_type = lva->va_type;
456	uva->va_atime = lva->va_atime;
457	uva->va_mtime = lva->va_mtime;
458	uva->va_ctime = lva->va_ctime;
459
460	switch (ump->um_copymode) {
461	case UNIONFS_TRANSPARENT:
462		uva->va_mode = lva->va_mode;
463		uva->va_uid = lva->va_uid;
464		uva->va_gid = lva->va_gid;
465		break;
466	case UNIONFS_MASQUERADE:
467		if (ump->um_uid == lva->va_uid) {
468			uva->va_mode = lva->va_mode & 077077;
469			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
470			uva->va_uid = lva->va_uid;
471			uva->va_gid = lva->va_gid;
472		} else {
473			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
474			uva->va_uid = ump->um_uid;
475			uva->va_gid = ump->um_gid;
476		}
477		break;
478	default:		/* UNIONFS_TRADITIONAL */
479		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
480		uva->va_uid = ump->um_uid;
481		uva->va_gid = ump->um_gid;
482		break;
483	}
484}
485
486/*
487 * Create upper node attr.
488 */
489int
490unionfs_create_uppervattr(struct unionfs_mount *ump,
491			  struct vnode *lvp,
492			  struct vattr *uva,
493			  struct ucred *cred,
494			  struct thread *td)
495{
496	int		error;
497	struct vattr	lva;
498
499	if ((error = VOP_GETATTR(lvp, &lva, cred)))
500		return (error);
501
502	unionfs_create_uppervattr_core(ump, &lva, uva, td);
503
504	return (error);
505}
506
507/*
508 * relookup
509 *
510 * dvp should be locked on entry and will be locked on return.
511 *
512 * If an error is returned, *vpp will be invalid, otherwise it will hold a
513 * locked, referenced vnode. If *vpp == dvp then remember that only one
514 * LK_EXCLUSIVE lock is held.
515 */
516int
517unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
518		 struct componentname *cnp, struct componentname *cn,
519		 struct thread *td, char *path, int pathlen, u_long nameiop)
520{
521	int	error;
522
523	cn->cn_namelen = pathlen;
524	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
525	bcopy(path, cn->cn_pnbuf, pathlen);
526	cn->cn_pnbuf[pathlen] = '\0';
527
528	cn->cn_nameiop = nameiop;
529	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
530	cn->cn_lkflags = LK_EXCLUSIVE;
531	cn->cn_thread = td;
532	cn->cn_cred = cnp->cn_cred;
533
534	cn->cn_nameptr = cn->cn_pnbuf;
535
536	if (nameiop == DELETE)
537		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
538	else if (RENAME == nameiop)
539		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
540	else if (nameiop == CREATE)
541		cn->cn_flags |= NOCACHE;
542
543	vref(dvp);
544	VOP_UNLOCK(dvp, LK_RELEASE);
545
546	if ((error = relookup(dvp, vpp, cn))) {
547		uma_zfree(namei_zone, cn->cn_pnbuf);
548		cn->cn_flags &= ~HASBUF;
549		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
550	} else
551		vrele(dvp);
552
553	return (error);
554}
555
556/*
557 * relookup for CREATE namei operation.
558 *
559 * dvp is unionfs vnode. dvp should be locked.
560 *
561 * If it called 'unionfs_copyfile' function by unionfs_link etc,
562 * VOP_LOOKUP information is broken.
563 * So it need relookup in order to create link etc.
564 */
565int
566unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
567			    struct thread *td)
568{
569	int	error;
570	struct vnode *udvp;
571	struct vnode *vp;
572	struct componentname cn;
573
574	udvp = UNIONFSVPTOUPPERVP(dvp);
575	vp = NULLVP;
576
577	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
578	    strlen(cnp->cn_nameptr), CREATE);
579	if (error)
580		return (error);
581
582	if (vp != NULLVP) {
583		if (udvp == vp)
584			vrele(vp);
585		else
586			vput(vp);
587
588		error = EEXIST;
589	}
590
591	if (cn.cn_flags & HASBUF) {
592		uma_zfree(namei_zone, cn.cn_pnbuf);
593		cn.cn_flags &= ~HASBUF;
594	}
595
596	if (!error) {
597		cn.cn_flags |= (cnp->cn_flags & HASBUF);
598		cnp->cn_flags = cn.cn_flags;
599	}
600
601	return (error);
602}
603
604/*
605 * relookup for DELETE namei operation.
606 *
607 * dvp is unionfs vnode. dvp should be locked.
608 */
609int
610unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
611			    struct thread *td)
612{
613	int	error;
614	struct vnode *udvp;
615	struct vnode *vp;
616	struct componentname cn;
617
618	udvp = UNIONFSVPTOUPPERVP(dvp);
619	vp = NULLVP;
620
621	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
622	    strlen(cnp->cn_nameptr), DELETE);
623	if (error)
624		return (error);
625
626	if (vp == NULLVP)
627		error = ENOENT;
628	else {
629		if (udvp == vp)
630			vrele(vp);
631		else
632			vput(vp);
633	}
634
635	if (cn.cn_flags & HASBUF) {
636		uma_zfree(namei_zone, cn.cn_pnbuf);
637		cn.cn_flags &= ~HASBUF;
638	}
639
640	if (!error) {
641		cn.cn_flags |= (cnp->cn_flags & HASBUF);
642		cnp->cn_flags = cn.cn_flags;
643	}
644
645	return (error);
646}
647
648/*
649 * relookup for RENAME namei operation.
650 *
651 * dvp is unionfs vnode. dvp should be locked.
652 */
653int
654unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
655			    struct thread *td)
656{
657	int error;
658	struct vnode *udvp;
659	struct vnode *vp;
660	struct componentname cn;
661
662	udvp = UNIONFSVPTOUPPERVP(dvp);
663	vp = NULLVP;
664
665	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
666	    strlen(cnp->cn_nameptr), RENAME);
667	if (error)
668		return (error);
669
670	if (vp != NULLVP) {
671		if (udvp == vp)
672			vrele(vp);
673		else
674			vput(vp);
675	}
676
677	if (cn.cn_flags & HASBUF) {
678		uma_zfree(namei_zone, cn.cn_pnbuf);
679		cn.cn_flags &= ~HASBUF;
680	}
681
682	if (!error) {
683		cn.cn_flags |= (cnp->cn_flags & HASBUF);
684		cnp->cn_flags = cn.cn_flags;
685	}
686
687	return (error);
688
689}
690
691/*
692 * Update the unionfs_node.
693 *
694 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
695 * uvp's lock and lower's lock will be unlocked.
696 */
697static void
698unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
699		    struct thread *td)
700{
701	unsigned	count, lockrec;
702	struct vnode   *vp;
703	struct vnode   *lvp;
704	struct vnode   *dvp;
705
706	vp = UNIONFSTOV(unp);
707	lvp = unp->un_lowervp;
708	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
709	dvp = unp->un_dvp;
710
711	/*
712	 * lock update
713	 */
714	VI_LOCK(vp);
715	unp->un_uppervp = uvp;
716	vp->v_vnlock = uvp->v_vnlock;
717	VI_UNLOCK(vp);
718	lockrec = lvp->v_vnlock->lk_recurse;
719	for (count = 0; count < lockrec; count++)
720		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
721
722	/*
723	 * cache update
724	 */
725	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
726		static struct unionfs_node_hashhead *hd;
727
728		VI_LOCK(dvp);
729		hd = unionfs_get_hashhead(dvp, unp->un_path);
730		LIST_REMOVE(unp, un_hash);
731		LIST_INSERT_HEAD(hd, unp, un_hash);
732		VI_UNLOCK(dvp);
733	}
734}
735
736/*
737 * Create a new shadow dir.
738 *
739 * udvp should be locked on entry and will be locked on return.
740 *
741 * If no error returned, unp will be updated.
742 */
743int
744unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
745		    struct unionfs_node *unp, struct componentname *cnp,
746		    struct thread *td)
747{
748	int		error;
749	struct vnode   *lvp;
750	struct vnode   *uvp;
751	struct vattr	va;
752	struct vattr	lva;
753	struct componentname cn;
754	struct mount   *mp;
755	struct ucred   *cred;
756	struct ucred   *credbk;
757	struct uidinfo *rootinfo;
758
759	if (unp->un_uppervp != NULLVP)
760		return (EEXIST);
761
762	lvp = unp->un_lowervp;
763	uvp = NULLVP;
764	credbk = cnp->cn_cred;
765
766	/* Authority change to root */
767	rootinfo = uifind((uid_t)0);
768	cred = crdup(cnp->cn_cred);
769	/*
770	 * The calls to chgproccnt() are needed to compensate for change_ruid()
771	 * calling chgproccnt().
772	 */
773	chgproccnt(cred->cr_ruidinfo, 1, 0);
774	change_euid(cred, rootinfo);
775	change_ruid(cred, rootinfo);
776	change_svuid(cred, (uid_t)0);
777	uifree(rootinfo);
778	cnp->cn_cred = cred;
779
780	memset(&cn, 0, sizeof(cn));
781
782	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
783		goto unionfs_mkshadowdir_abort;
784
785	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
786		goto unionfs_mkshadowdir_abort;
787	if (uvp != NULLVP) {
788		if (udvp == uvp)
789			vrele(uvp);
790		else
791			vput(uvp);
792
793		error = EEXIST;
794		goto unionfs_mkshadowdir_free_out;
795	}
796
797	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
798		goto unionfs_mkshadowdir_free_out;
799	unionfs_create_uppervattr_core(ump, &lva, &va, td);
800
801	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
802
803	if (!error) {
804		unionfs_node_update(unp, uvp, td);
805
806		/*
807		 * XXX The bug which cannot set uid/gid was corrected.
808		 * Ignore errors.
809		 */
810		va.va_type = VNON;
811		VOP_SETATTR(uvp, &va, cn.cn_cred);
812	}
813	vn_finished_write(mp);
814
815unionfs_mkshadowdir_free_out:
816	if (cn.cn_flags & HASBUF) {
817		uma_zfree(namei_zone, cn.cn_pnbuf);
818		cn.cn_flags &= ~HASBUF;
819	}
820
821unionfs_mkshadowdir_abort:
822	cnp->cn_cred = credbk;
823	chgproccnt(cred->cr_ruidinfo, -1, 0);
824	crfree(cred);
825
826	return (error);
827}
828
829/*
830 * Create a new whiteout.
831 *
832 * dvp should be locked on entry and will be locked on return.
833 */
834int
835unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
836		   struct thread *td, char *path)
837{
838	int		error;
839	struct vnode   *wvp;
840	struct componentname cn;
841	struct mount   *mp;
842
843	if (path == NULL)
844		path = cnp->cn_nameptr;
845
846	wvp = NULLVP;
847	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
848		return (error);
849	if (wvp != NULLVP) {
850		if (cn.cn_flags & HASBUF) {
851			uma_zfree(namei_zone, cn.cn_pnbuf);
852			cn.cn_flags &= ~HASBUF;
853		}
854		if (dvp == wvp)
855			vrele(wvp);
856		else
857			vput(wvp);
858
859		return (EEXIST);
860	}
861
862	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
863		goto unionfs_mkwhiteout_free_out;
864	error = VOP_WHITEOUT(dvp, &cn, CREATE);
865
866	vn_finished_write(mp);
867
868unionfs_mkwhiteout_free_out:
869	if (cn.cn_flags & HASBUF) {
870		uma_zfree(namei_zone, cn.cn_pnbuf);
871		cn.cn_flags &= ~HASBUF;
872	}
873
874	return (error);
875}
876
877/*
878 * Create a new vnode for create a new shadow file.
879 *
880 * If an error is returned, *vpp will be invalid, otherwise it will hold a
881 * locked, referenced and opened vnode.
882 *
883 * unp is never updated.
884 */
885static int
886unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
887			   struct unionfs_node *unp, struct vattr *uvap,
888			   struct thread *td)
889{
890	struct unionfs_mount *ump;
891	struct vnode   *vp;
892	struct vnode   *lvp;
893	struct ucred   *cred;
894	struct vattr	lva;
895	int		fmode;
896	int		error;
897	struct componentname cn;
898
899	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
900	vp = NULLVP;
901	lvp = unp->un_lowervp;
902	cred = td->td_ucred;
903	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
904	error = 0;
905
906	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
907		return (error);
908	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
909
910	if (unp->un_path == NULL)
911		panic("unionfs: un_path is null");
912
913	cn.cn_namelen = strlen(unp->un_path);
914	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
915	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
916	cn.cn_nameiop = CREATE;
917	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
918	cn.cn_lkflags = LK_EXCLUSIVE;
919	cn.cn_thread = td;
920	cn.cn_cred = cred;
921	cn.cn_nameptr = cn.cn_pnbuf;
922
923	vref(udvp);
924	if ((error = relookup(udvp, &vp, &cn)) != 0)
925		goto unionfs_vn_create_on_upper_free_out2;
926	vrele(udvp);
927
928	if (vp != NULLVP) {
929		if (vp == udvp)
930			vrele(vp);
931		else
932			vput(vp);
933		error = EEXIST;
934		goto unionfs_vn_create_on_upper_free_out1;
935	}
936
937	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
938		goto unionfs_vn_create_on_upper_free_out1;
939
940	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
941		vput(vp);
942		goto unionfs_vn_create_on_upper_free_out1;
943	}
944	VOP_ADD_WRITECOUNT(vp, 1);
945	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
946	    vp->v_writecount);
947	*vpp = vp;
948
949unionfs_vn_create_on_upper_free_out1:
950	VOP_UNLOCK(udvp, LK_RELEASE);
951
952unionfs_vn_create_on_upper_free_out2:
953	if (cn.cn_flags & HASBUF) {
954		uma_zfree(namei_zone, cn.cn_pnbuf);
955		cn.cn_flags &= ~HASBUF;
956	}
957
958	return (error);
959}
960
961/*
962 * Copy from lvp to uvp.
963 *
964 * lvp and uvp should be locked and opened on entry and will be locked and
965 * opened on return.
966 */
967static int
968unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
969		      struct ucred *cred, struct thread *td)
970{
971	int		error;
972	off_t		offset;
973	int		count;
974	int		bufoffset;
975	char           *buf;
976	struct uio	uio;
977	struct iovec	iov;
978
979	error = 0;
980	memset(&uio, 0, sizeof(uio));
981
982	uio.uio_td = td;
983	uio.uio_segflg = UIO_SYSSPACE;
984	uio.uio_offset = 0;
985
986	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
987
988	while (error == 0) {
989		offset = uio.uio_offset;
990
991		uio.uio_iov = &iov;
992		uio.uio_iovcnt = 1;
993		iov.iov_base = buf;
994		iov.iov_len = MAXBSIZE;
995		uio.uio_resid = iov.iov_len;
996		uio.uio_rw = UIO_READ;
997
998		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
999			break;
1000		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1001			break;
1002
1003		bufoffset = 0;
1004		while (bufoffset < count) {
1005			uio.uio_iov = &iov;
1006			uio.uio_iovcnt = 1;
1007			iov.iov_base = buf + bufoffset;
1008			iov.iov_len = count - bufoffset;
1009			uio.uio_offset = offset + bufoffset;
1010			uio.uio_resid = iov.iov_len;
1011			uio.uio_rw = UIO_WRITE;
1012
1013			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1014				break;
1015
1016			bufoffset += (count - bufoffset) - uio.uio_resid;
1017		}
1018
1019		uio.uio_offset = offset + bufoffset;
1020	}
1021
1022	free(buf, M_TEMP);
1023
1024	return (error);
1025}
1026
1027/*
1028 * Copy file from lower to upper.
1029 *
1030 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1031 * docopy.
1032 *
1033 * If no error returned, unp will be updated.
1034 */
1035int
1036unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1037		 struct thread *td)
1038{
1039	int		error;
1040	struct mount   *mp;
1041	struct vnode   *udvp;
1042	struct vnode   *lvp;
1043	struct vnode   *uvp;
1044	struct vattr	uva;
1045
1046	lvp = unp->un_lowervp;
1047	uvp = NULLVP;
1048
1049	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1050		return (EROFS);
1051	if (unp->un_dvp == NULLVP)
1052		return (EINVAL);
1053	if (unp->un_uppervp != NULLVP)
1054		return (EEXIST);
1055	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1056	if (udvp == NULLVP)
1057		return (EROFS);
1058	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1059		return (EROFS);
1060
1061	error = VOP_ACCESS(lvp, VREAD, cred, td);
1062	if (error != 0)
1063		return (error);
1064
1065	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1066		return (error);
1067	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1068	if (error != 0) {
1069		vn_finished_write(mp);
1070		return (error);
1071	}
1072
1073	if (docopy != 0) {
1074		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1075		if (error == 0) {
1076			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1077			VOP_CLOSE(lvp, FREAD, cred, td);
1078		}
1079	}
1080	VOP_CLOSE(uvp, FWRITE, cred, td);
1081	VOP_ADD_WRITECOUNT(uvp, -1);
1082	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1083	    uvp->v_writecount);
1084
1085	vn_finished_write(mp);
1086
1087	if (error == 0) {
1088		/* Reset the attributes. Ignore errors. */
1089		uva.va_type = VNON;
1090		VOP_SETATTR(uvp, &uva, cred);
1091	}
1092
1093	unionfs_node_update(unp, uvp, td);
1094
1095	return (error);
1096}
1097
1098/*
1099 * It checks whether vp can rmdir. (check empty)
1100 *
1101 * vp is unionfs vnode.
1102 * vp should be locked.
1103 */
1104int
1105unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1106{
1107	int		error;
1108	int		eofflag;
1109	int		lookuperr;
1110	struct vnode   *uvp;
1111	struct vnode   *lvp;
1112	struct vnode   *tvp;
1113	struct vattr	va;
1114	struct componentname cn;
1115	/*
1116	 * The size of buf needs to be larger than DIRBLKSIZ.
1117	 */
1118	char		buf[256 * 6];
1119	struct dirent  *dp;
1120	struct dirent  *edp;
1121	struct uio	uio;
1122	struct iovec	iov;
1123
1124	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1125
1126	eofflag = 0;
1127	uvp = UNIONFSVPTOUPPERVP(vp);
1128	lvp = UNIONFSVPTOLOWERVP(vp);
1129
1130	/* check opaque */
1131	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1132		return (error);
1133	if (va.va_flags & OPAQUE)
1134		return (0);
1135
1136	/* open vnode */
1137#ifdef MAC
1138	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1139		return (error);
1140#endif
1141	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1142		return (error);
1143	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1144		return (error);
1145
1146	uio.uio_rw = UIO_READ;
1147	uio.uio_segflg = UIO_SYSSPACE;
1148	uio.uio_td = td;
1149	uio.uio_offset = 0;
1150
1151#ifdef MAC
1152	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1153#endif
1154	while (!error && !eofflag) {
1155		iov.iov_base = buf;
1156		iov.iov_len = sizeof(buf);
1157		uio.uio_iov = &iov;
1158		uio.uio_iovcnt = 1;
1159		uio.uio_resid = iov.iov_len;
1160
1161		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1162		if (error != 0)
1163			break;
1164		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1165#ifdef DIAGNOSTIC
1166			panic("bad readdir response from lower FS.");
1167#endif
1168			break;
1169		}
1170
1171		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1172		for (dp = (struct dirent*)buf; !error && dp < edp;
1173		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1174			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1175			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1176			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1177				continue;
1178
1179			cn.cn_namelen = dp->d_namlen;
1180			cn.cn_pnbuf = NULL;
1181			cn.cn_nameptr = dp->d_name;
1182			cn.cn_nameiop = LOOKUP;
1183			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1184			cn.cn_lkflags = LK_EXCLUSIVE;
1185			cn.cn_thread = td;
1186			cn.cn_cred = cred;
1187
1188			/*
1189			 * check entry in lower.
1190			 * Sometimes, readdir function returns
1191			 * wrong entry.
1192			 */
1193			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1194
1195			if (!lookuperr)
1196				vput(tvp);
1197			else
1198				continue; /* skip entry */
1199
1200			/*
1201			 * check entry
1202			 * If it has no exist/whiteout entry in upper,
1203			 * directory is not empty.
1204			 */
1205			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1206			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1207
1208			if (!lookuperr)
1209				vput(tvp);
1210
1211			/* ignore exist or whiteout entry */
1212			if (!lookuperr ||
1213			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1214				continue;
1215
1216			error = ENOTEMPTY;
1217		}
1218	}
1219
1220	/* close vnode */
1221	VOP_CLOSE(vp, FREAD, cred, td);
1222
1223	return (error);
1224}
1225
1226#ifdef DIAGNOSTIC
1227
1228struct vnode   *
1229unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1230{
1231	struct unionfs_node *unp;
1232
1233	unp = VTOUNIONFS(vp);
1234
1235#ifdef notyet
1236	if (vp->v_op != unionfs_vnodeop_p) {
1237		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1238#ifdef KDB
1239		kdb_enter(KDB_WHY_UNIONFS,
1240		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1241#endif
1242		panic("unionfs_checkuppervp");
1243	}
1244#endif
1245	return (unp->un_uppervp);
1246}
1247
1248struct vnode   *
1249unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1250{
1251	struct unionfs_node *unp;
1252
1253	unp = VTOUNIONFS(vp);
1254
1255#ifdef notyet
1256	if (vp->v_op != unionfs_vnodeop_p) {
1257		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1258#ifdef KDB
1259		kdb_enter(KDB_WHY_UNIONFS,
1260		    "unionfs_checklowervp: on non-unionfs-node.\n");
1261#endif
1262		panic("unionfs_checklowervp");
1263	}
1264#endif
1265	return (unp->un_lowervp);
1266}
1267#endif
1268