union_subr.c revision 185283
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 185283 2008-11-25 03:18:35Z daichi $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63#define NUNIONFSNODECACHE 16
64
65static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68
69/*
70 * Initialize
71 */
72int
73unionfs_init(struct vfsconf *vfsp)
74{
75	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76	return (0);
77}
78
79/*
80 * Uninitialize
81 */
82int
83unionfs_uninit(struct vfsconf *vfsp)
84{
85	return (0);
86}
87
88static struct unionfs_node_hashhead *
89unionfs_get_hashhead(struct vnode *dvp, char *path)
90{
91	int		count;
92	char		hash;
93	struct unionfs_node *unp;
94
95	hash = 0;
96	unp = VTOUNIONFS(dvp);
97	if (path != NULL) {
98		for (count = 0; path[count]; count++)
99			hash += path[count];
100	}
101
102	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103}
104
105/*
106 * Get the cached vnode.
107 */
108static struct vnode *
109unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110			struct vnode *dvp, char *path)
111{
112	struct unionfs_node_hashhead *hd;
113	struct unionfs_node *unp;
114	struct vnode   *vp;
115
116	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
119	    ("unionfs_get_cached_vnode: v_type != VDIR"));
120
121	VI_LOCK(dvp);
122	hd = unionfs_get_hashhead(dvp, path);
123	LIST_FOREACH(unp, hd, un_hash) {
124		if (!strcmp(unp->un_path, path)) {
125			vp = UNIONFSTOV(unp);
126			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127			VI_UNLOCK(dvp);
128			vp->v_iflag &= ~VI_OWEINACT;
129			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130				VI_UNLOCK(vp);
131				vp = NULLVP;
132			} else
133				VI_UNLOCK(vp);
134			return (vp);
135		}
136	}
137	VI_UNLOCK(dvp);
138
139	return (NULLVP);
140}
141
142/*
143 * Add the new vnode into cache.
144 */
145static struct vnode *
146unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147			struct vnode *dvp, char *path)
148{
149	struct unionfs_node_hashhead *hd;
150	struct unionfs_node *unp;
151	struct vnode   *vp;
152
153	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
156	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
157
158	VI_LOCK(dvp);
159	hd = unionfs_get_hashhead(dvp, path);
160	LIST_FOREACH(unp, hd, un_hash) {
161		if (!strcmp(unp->un_path, path)) {
162			vp = UNIONFSTOV(unp);
163			VI_LOCK_FLAGS(vp, MTX_DUPOK);
164			vp->v_iflag &= ~VI_OWEINACT;
165			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
166				LIST_INSERT_HEAD(hd, uncp, un_hash);
167				VI_UNLOCK(vp);
168				vp = NULLVP;
169			} else
170				VI_UNLOCK(vp);
171			VI_UNLOCK(dvp);
172			return (vp);
173		}
174	}
175
176	LIST_INSERT_HEAD(hd, uncp, un_hash);
177	VI_UNLOCK(dvp);
178
179	return (NULLVP);
180}
181
182/*
183 * Remove the vnode.
184 */
185static void
186unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
187{
188	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
189	KASSERT((dvp != NULLVP),
190	    ("unionfs_rem_cached_vnode: null parent vnode"));
191	KASSERT((unp->un_hash.le_prev != NULL),
192	    ("unionfs_rem_cached_vnode: null hash"));
193
194	VI_LOCK(dvp);
195	LIST_REMOVE(unp, un_hash);
196	unp->un_hash.le_next = NULL;
197	unp->un_hash.le_prev = NULL;
198	VI_UNLOCK(dvp);
199}
200
201/*
202 * Make a new or get existing unionfs node.
203 *
204 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
205 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
206 * you should not lock plurality simultaneously.
207 */
208int
209unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
210		struct vnode *lowervp, struct vnode *dvp,
211		struct vnode **vpp, struct componentname *cnp,
212		struct thread *td)
213{
214	struct unionfs_mount *ump;
215	struct unionfs_node *unp;
216	struct vnode   *vp;
217	int		error;
218	int		lkflags;
219	enum vtype	vt;
220	char	       *path;
221
222	ump = MOUNTTOUNIONFSMOUNT(mp);
223	lkflags = (cnp ? cnp->cn_lkflags : 0);
224	path = (cnp ? cnp->cn_nameptr : NULL);
225	*vpp = NULLVP;
226
227	if (uppervp == NULLVP && lowervp == NULLVP)
228		panic("unionfs_nodeget: upper and lower is null");
229
230	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
231
232	/* If it has no ISLASTCN flag, path check is skipped. */
233	if (cnp && !(cnp->cn_flags & ISLASTCN))
234		path = NULL;
235
236	/* check the cache */
237	if (path != NULL && dvp != NULLVP && vt == VDIR) {
238		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
239		if (vp != NULLVP) {
240			vref(vp);
241			*vpp = vp;
242			goto unionfs_nodeget_out;
243		}
244	}
245
246	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
247	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
248		/* dvp will be NULLVP only in case of root vnode. */
249		if (dvp == NULLVP)
250			return (EINVAL);
251	}
252
253	/*
254	 * Do the MALLOC before the getnewvnode since doing so afterward
255	 * might cause a bogus v_data pointer to get dereferenced elsewhere
256	 * if MALLOC should block.
257	 */
258	unp = malloc(sizeof(struct unionfs_node),
259	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
260
261	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
262	if (error != 0) {
263		free(unp, M_UNIONFSNODE);
264		return (error);
265	}
266	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
267	if (error != 0) {
268		free(unp, M_UNIONFSNODE);
269		return (error);
270	}
271	if (dvp != NULLVP)
272		vref(dvp);
273	if (uppervp != NULLVP)
274		vref(uppervp);
275	if (lowervp != NULLVP)
276		vref(lowervp);
277
278	if (vt == VDIR)
279		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
280		    &(unp->un_hashmask));
281
282	unp->un_vnode = vp;
283	unp->un_uppervp = uppervp;
284	unp->un_lowervp = lowervp;
285	unp->un_dvp = dvp;
286	if (uppervp != NULLVP)
287		vp->v_vnlock = uppervp->v_vnlock;
288	else
289		vp->v_vnlock = lowervp->v_vnlock;
290
291	if (path != NULL) {
292		unp->un_path = (char *)
293		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
294		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
295		unp->un_path[cnp->cn_namelen] = '\0';
296	}
297	vp->v_type = vt;
298	vp->v_data = unp;
299
300	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
301	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
302		vp->v_vflag |= VV_ROOT;
303
304	if (path != NULL && dvp != NULLVP && vt == VDIR)
305		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
306	if ((*vpp) != NULLVP) {
307		if (dvp != NULLVP)
308			vrele(dvp);
309		if (uppervp != NULLVP)
310			vrele(uppervp);
311		if (lowervp != NULLVP)
312			vrele(lowervp);
313
314		unp->un_uppervp = NULLVP;
315		unp->un_lowervp = NULLVP;
316		unp->un_dvp = NULLVP;
317		vrele(vp);
318		vp = *vpp;
319		vref(vp);
320	} else
321		*vpp = vp;
322
323unionfs_nodeget_out:
324	if (lkflags & LK_TYPE_MASK)
325		vn_lock(vp, lkflags | LK_RETRY);
326
327	return (0);
328}
329
330/*
331 * Clean up the unionfs node.
332 */
333void
334unionfs_noderem(struct vnode *vp, struct thread *td)
335{
336	int		vfslocked;
337	int		count;
338	struct unionfs_node *unp, *unp_t1, *unp_t2;
339	struct unionfs_node_hashhead *hd;
340	struct unionfs_node_status *unsp, *unsp_tmp;
341	struct vnode   *lvp;
342	struct vnode   *uvp;
343	struct vnode   *dvp;
344
345	/*
346	 * Use the interlock to protect the clearing of v_data to
347	 * prevent faults in unionfs_lock().
348	 */
349	VI_LOCK(vp);
350	unp = VTOUNIONFS(vp);
351	lvp = unp->un_lowervp;
352	uvp = unp->un_uppervp;
353	dvp = unp->un_dvp;
354	unp->un_lowervp = unp->un_uppervp = NULLVP;
355
356	vp->v_vnlock = &(vp->v_lock);
357	vp->v_data = NULL;
358	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
359	if (lvp != NULLVP)
360		VOP_UNLOCK(lvp, 0);
361	if (uvp != NULLVP)
362		VOP_UNLOCK(uvp, 0);
363	vp->v_object = NULL;
364
365	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
366		unionfs_rem_cached_vnode(unp, dvp);
367
368	if (lvp != NULLVP) {
369		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
370		vrele(lvp);
371		VFS_UNLOCK_GIANT(vfslocked);
372	}
373	if (uvp != NULLVP) {
374		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
375		vrele(uvp);
376		VFS_UNLOCK_GIANT(vfslocked);
377	}
378	if (dvp != NULLVP) {
379		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
380		vrele(dvp);
381		VFS_UNLOCK_GIANT(vfslocked);
382		unp->un_dvp = NULLVP;
383	}
384	if (unp->un_path != NULL) {
385		free(unp->un_path, M_UNIONFSPATH);
386		unp->un_path = NULL;
387	}
388
389	if (unp->un_hashtbl != NULL) {
390		for (count = 0; count <= unp->un_hashmask; count++) {
391			hd = unp->un_hashtbl + count;
392			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
393				LIST_REMOVE(unp_t1, un_hash);
394				unp_t1->un_hash.le_next = NULL;
395				unp_t1->un_hash.le_prev = NULL;
396			}
397		}
398		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
399	}
400
401	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
402		LIST_REMOVE(unsp, uns_list);
403		free(unsp, M_TEMP);
404	}
405	free(unp, M_UNIONFSNODE);
406}
407
408/*
409 * Get the unionfs node status.
410 * You need exclusive lock this vnode.
411 */
412void
413unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
414			struct unionfs_node_status **unspp)
415{
416	struct unionfs_node_status *unsp;
417	pid_t pid = td->td_proc->p_pid;
418
419	KASSERT(NULL != unspp, ("null pointer"));
420	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
421
422	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
423		if (unsp->uns_pid == pid) {
424			*unspp = unsp;
425			return;
426		}
427	}
428
429	/* create a new unionfs node status */
430	unsp = malloc(sizeof(struct unionfs_node_status),
431	    M_TEMP, M_WAITOK | M_ZERO);
432
433	unsp->uns_pid = pid;
434	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
435
436	*unspp = unsp;
437}
438
439/*
440 * Remove the unionfs node status, if you can.
441 * You need exclusive lock this vnode.
442 */
443void
444unionfs_tryrem_node_status(struct unionfs_node *unp,
445			   struct unionfs_node_status *unsp)
446{
447	KASSERT(NULL != unsp, ("null pointer"));
448	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
449
450	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
451		return;
452
453	LIST_REMOVE(unsp, uns_list);
454	free(unsp, M_TEMP);
455}
456
457/*
458 * Create upper node attr.
459 */
460void
461unionfs_create_uppervattr_core(struct unionfs_mount *ump,
462			       struct vattr *lva,
463			       struct vattr *uva,
464			       struct thread *td)
465{
466	VATTR_NULL(uva);
467	uva->va_type = lva->va_type;
468	uva->va_atime = lva->va_atime;
469	uva->va_mtime = lva->va_mtime;
470	uva->va_ctime = lva->va_ctime;
471
472	switch (ump->um_copymode) {
473	case UNIONFS_TRANSPARENT:
474		uva->va_mode = lva->va_mode;
475		uva->va_uid = lva->va_uid;
476		uva->va_gid = lva->va_gid;
477		break;
478	case UNIONFS_MASQUERADE:
479		if (ump->um_uid == lva->va_uid) {
480			uva->va_mode = lva->va_mode & 077077;
481			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
482			uva->va_uid = lva->va_uid;
483			uva->va_gid = lva->va_gid;
484		} else {
485			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
486			uva->va_uid = ump->um_uid;
487			uva->va_gid = ump->um_gid;
488		}
489		break;
490	default:		/* UNIONFS_TRADITIONAL */
491		FILEDESC_SLOCK(td->td_proc->p_fd);
492		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
493		FILEDESC_SUNLOCK(td->td_proc->p_fd);
494		uva->va_uid = ump->um_uid;
495		uva->va_gid = ump->um_gid;
496		break;
497	}
498}
499
500/*
501 * Create upper node attr.
502 */
503int
504unionfs_create_uppervattr(struct unionfs_mount *ump,
505			  struct vnode *lvp,
506			  struct vattr *uva,
507			  struct ucred *cred,
508			  struct thread *td)
509{
510	int		error;
511	struct vattr	lva;
512
513	if ((error = VOP_GETATTR(lvp, &lva, cred)))
514		return (error);
515
516	unionfs_create_uppervattr_core(ump, &lva, uva, td);
517
518	return (error);
519}
520
521/*
522 * relookup
523 *
524 * dvp should be locked on entry and will be locked on return.
525 *
526 * If an error is returned, *vpp will be invalid, otherwise it will hold a
527 * locked, referenced vnode. If *vpp == dvp then remember that only one
528 * LK_EXCLUSIVE lock is held.
529 */
530int
531unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
532		 struct componentname *cnp, struct componentname *cn,
533		 struct thread *td, char *path, int pathlen, u_long nameiop)
534{
535	int	error;
536
537	cn->cn_namelen = pathlen;
538	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
539	bcopy(path, cn->cn_pnbuf, pathlen);
540	cn->cn_pnbuf[pathlen] = '\0';
541
542	cn->cn_nameiop = nameiop;
543	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
544	cn->cn_lkflags = LK_EXCLUSIVE;
545	cn->cn_thread = td;
546	cn->cn_cred = cnp->cn_cred;
547
548	cn->cn_nameptr = cn->cn_pnbuf;
549	cn->cn_consume = cnp->cn_consume;
550
551	if (nameiop == DELETE)
552		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
553	else if (RENAME == nameiop)
554		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
555
556	vref(dvp);
557	VOP_UNLOCK(dvp, 0);
558
559	if ((error = relookup(dvp, vpp, cn))) {
560		uma_zfree(namei_zone, cn->cn_pnbuf);
561		cn->cn_flags &= ~HASBUF;
562		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
563	} else
564		vrele(dvp);
565
566	return (error);
567}
568
569/*
570 * relookup for CREATE namei operation.
571 *
572 * dvp is unionfs vnode. dvp should be locked.
573 *
574 * If it called 'unionfs_copyfile' function by unionfs_link etc,
575 * VOP_LOOKUP information is broken.
576 * So it need relookup in order to create link etc.
577 */
578int
579unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
580			    struct thread *td)
581{
582	int	error;
583	struct vnode *udvp;
584	struct vnode *vp;
585	struct componentname cn;
586
587	udvp = UNIONFSVPTOUPPERVP(dvp);
588	vp = NULLVP;
589
590	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
591	    strlen(cnp->cn_nameptr), CREATE);
592	if (error)
593		return (error);
594
595	if (vp != NULLVP) {
596		if (udvp == vp)
597			vrele(vp);
598		else
599			vput(vp);
600
601		error = EEXIST;
602	}
603
604	if (cn.cn_flags & HASBUF) {
605		uma_zfree(namei_zone, cn.cn_pnbuf);
606		cn.cn_flags &= ~HASBUF;
607	}
608
609	if (!error) {
610		cn.cn_flags |= (cnp->cn_flags & HASBUF);
611		cnp->cn_flags = cn.cn_flags;
612	}
613
614	return (error);
615}
616
617/*
618 * relookup for DELETE namei operation.
619 *
620 * dvp is unionfs vnode. dvp should be locked.
621 */
622int
623unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
624			    struct thread *td)
625{
626	int	error;
627	struct vnode *udvp;
628	struct vnode *vp;
629	struct componentname cn;
630
631	udvp = UNIONFSVPTOUPPERVP(dvp);
632	vp = NULLVP;
633
634	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
635	    strlen(cnp->cn_nameptr), DELETE);
636	if (error)
637		return (error);
638
639	if (vp == NULLVP)
640		error = ENOENT;
641	else {
642		if (udvp == vp)
643			vrele(vp);
644		else
645			vput(vp);
646	}
647
648	if (cn.cn_flags & HASBUF) {
649		uma_zfree(namei_zone, cn.cn_pnbuf);
650		cn.cn_flags &= ~HASBUF;
651	}
652
653	if (!error) {
654		cn.cn_flags |= (cnp->cn_flags & HASBUF);
655		cnp->cn_flags = cn.cn_flags;
656	}
657
658	return (error);
659}
660
661/*
662 * relookup for RENAME namei operation.
663 *
664 * dvp is unionfs vnode. dvp should be locked.
665 */
666int
667unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
668			    struct thread *td)
669{
670	int error;
671	struct vnode *udvp;
672	struct vnode *vp;
673	struct componentname cn;
674
675	udvp = UNIONFSVPTOUPPERVP(dvp);
676	vp = NULLVP;
677
678	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
679	    strlen(cnp->cn_nameptr), RENAME);
680	if (error)
681		return (error);
682
683	if (vp != NULLVP) {
684		if (udvp == vp)
685			vrele(vp);
686		else
687			vput(vp);
688	}
689
690	if (cn.cn_flags & HASBUF) {
691		uma_zfree(namei_zone, cn.cn_pnbuf);
692		cn.cn_flags &= ~HASBUF;
693	}
694
695	if (!error) {
696		cn.cn_flags |= (cnp->cn_flags & HASBUF);
697		cnp->cn_flags = cn.cn_flags;
698	}
699
700	return (error);
701
702}
703
704/*
705 * Update the unionfs_node.
706 *
707 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
708 * uvp's lock and lower's lock will be unlocked.
709 */
710static void
711unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
712		    struct thread *td)
713{
714	unsigned	count, lockrec;
715	struct vnode   *vp;
716	struct vnode   *lvp;
717	struct vnode   *dvp;
718
719	vp = UNIONFSTOV(unp);
720	lvp = unp->un_lowervp;
721	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
722	dvp = unp->un_dvp;
723
724	/*
725	 * lock update
726	 */
727	VI_LOCK(vp);
728	unp->un_uppervp = uvp;
729	vp->v_vnlock = uvp->v_vnlock;
730	VI_UNLOCK(vp);
731	lockrec = lvp->v_vnlock->lk_recurse;
732	for (count = 0; count < lockrec; count++)
733		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
734
735	/*
736	 * cache update
737	 */
738	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
739		static struct unionfs_node_hashhead *hd;
740
741		VI_LOCK(dvp);
742		hd = unionfs_get_hashhead(dvp, unp->un_path);
743		LIST_REMOVE(unp, un_hash);
744		LIST_INSERT_HEAD(hd, unp, un_hash);
745		VI_UNLOCK(dvp);
746	}
747}
748
749/*
750 * Create a new shadow dir.
751 *
752 * udvp should be locked on entry and will be locked on return.
753 *
754 * If no error returned, unp will be updated.
755 */
756int
757unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
758		    struct unionfs_node *unp, struct componentname *cnp,
759		    struct thread *td)
760{
761	int		error;
762	struct vnode   *lvp;
763	struct vnode   *uvp;
764	struct vattr	va;
765	struct vattr	lva;
766	struct componentname cn;
767	struct mount   *mp;
768	struct ucred   *cred;
769	struct ucred   *credbk;
770	struct uidinfo *rootinfo;
771
772	if (unp->un_uppervp != NULLVP)
773		return (EEXIST);
774
775	lvp = unp->un_lowervp;
776	uvp = NULLVP;
777	credbk = cnp->cn_cred;
778
779	/* Authority change to root */
780	rootinfo = uifind((uid_t)0);
781	cred = crdup(cnp->cn_cred);
782	chgproccnt(cred->cr_ruidinfo, 1, 0);
783	change_euid(cred, rootinfo);
784	change_ruid(cred, rootinfo);
785	change_svuid(cred, (uid_t)0);
786	uifree(rootinfo);
787	cnp->cn_cred = cred;
788
789	memset(&cn, 0, sizeof(cn));
790
791	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
792		goto unionfs_mkshadowdir_abort;
793
794	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
795		goto unionfs_mkshadowdir_abort;
796	if (uvp != NULLVP) {
797		if (udvp == uvp)
798			vrele(uvp);
799		else
800			vput(uvp);
801
802		error = EEXIST;
803		goto unionfs_mkshadowdir_free_out;
804	}
805
806	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
807		goto unionfs_mkshadowdir_free_out;
808	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
809		vn_finished_write(mp);
810		goto unionfs_mkshadowdir_free_out;
811	}
812	unionfs_create_uppervattr_core(ump, &lva, &va, td);
813
814	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
815
816	if (!error) {
817		unionfs_node_update(unp, uvp, td);
818
819		/*
820		 * XXX The bug which cannot set uid/gid was corrected.
821		 * Ignore errors.
822		 */
823		va.va_type = VNON;
824		VOP_SETATTR(uvp, &va, cn.cn_cred);
825	}
826	vn_finished_write(mp);
827
828unionfs_mkshadowdir_free_out:
829	if (cn.cn_flags & HASBUF) {
830		uma_zfree(namei_zone, cn.cn_pnbuf);
831		cn.cn_flags &= ~HASBUF;
832	}
833
834unionfs_mkshadowdir_abort:
835	cnp->cn_cred = credbk;
836	chgproccnt(cred->cr_ruidinfo, -1, 0);
837	crfree(cred);
838
839	return (error);
840}
841
842/*
843 * Create a new whiteout.
844 *
845 * dvp should be locked on entry and will be locked on return.
846 */
847int
848unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
849		   struct thread *td, char *path)
850{
851	int		error;
852	struct vnode   *wvp;
853	struct componentname cn;
854	struct mount   *mp;
855
856	if (path == NULL)
857		path = cnp->cn_nameptr;
858
859	wvp = NULLVP;
860	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
861		return (error);
862	if (wvp != NULLVP) {
863		if (cn.cn_flags & HASBUF) {
864			uma_zfree(namei_zone, cn.cn_pnbuf);
865			cn.cn_flags &= ~HASBUF;
866		}
867		if (dvp == wvp)
868			vrele(wvp);
869		else
870			vput(wvp);
871
872		return (EEXIST);
873	}
874
875	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
876		goto unionfs_mkwhiteout_free_out;
877	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
878		error = VOP_WHITEOUT(dvp, &cn, CREATE);
879
880	vn_finished_write(mp);
881
882unionfs_mkwhiteout_free_out:
883	if (cn.cn_flags & HASBUF) {
884		uma_zfree(namei_zone, cn.cn_pnbuf);
885		cn.cn_flags &= ~HASBUF;
886	}
887
888	return (error);
889}
890
891/*
892 * Create a new vnode for create a new shadow file.
893 *
894 * If an error is returned, *vpp will be invalid, otherwise it will hold a
895 * locked, referenced and opened vnode.
896 *
897 * unp is never updated.
898 */
899static int
900unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
901			   struct unionfs_node *unp, struct vattr *uvap,
902			   struct thread *td)
903{
904	struct unionfs_mount *ump;
905	struct vnode   *vp;
906	struct vnode   *lvp;
907	struct ucred   *cred;
908	struct vattr	lva;
909	int		fmode;
910	int		error;
911	struct componentname cn;
912
913	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
914	vp = NULLVP;
915	lvp = unp->un_lowervp;
916	cred = td->td_ucred;
917	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
918	error = 0;
919
920	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
921		return (error);
922	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
923
924	if (unp->un_path == NULL)
925		panic("unionfs: un_path is null");
926
927	cn.cn_namelen = strlen(unp->un_path);
928	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
929	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
930	cn.cn_nameiop = CREATE;
931	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
932	cn.cn_lkflags = LK_EXCLUSIVE;
933	cn.cn_thread = td;
934	cn.cn_cred = cred;
935	cn.cn_nameptr = cn.cn_pnbuf;
936	cn.cn_consume = 0;
937
938	vref(udvp);
939	if ((error = relookup(udvp, &vp, &cn)) != 0)
940		goto unionfs_vn_create_on_upper_free_out2;
941	vrele(udvp);
942
943	if (vp != NULLVP) {
944		if (vp == udvp)
945			vrele(vp);
946		else
947			vput(vp);
948		error = EEXIST;
949		goto unionfs_vn_create_on_upper_free_out1;
950	}
951
952	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
953		goto unionfs_vn_create_on_upper_free_out1;
954
955	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
956		goto unionfs_vn_create_on_upper_free_out1;
957
958	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
959		vput(vp);
960		goto unionfs_vn_create_on_upper_free_out1;
961	}
962	vp->v_writecount++;
963	*vpp = vp;
964
965unionfs_vn_create_on_upper_free_out1:
966	VOP_UNLOCK(udvp, 0);
967
968unionfs_vn_create_on_upper_free_out2:
969	if (cn.cn_flags & HASBUF) {
970		uma_zfree(namei_zone, cn.cn_pnbuf);
971		cn.cn_flags &= ~HASBUF;
972	}
973
974	return (error);
975}
976
977/*
978 * Copy from lvp to uvp.
979 *
980 * lvp and uvp should be locked and opened on entry and will be locked and
981 * opened on return.
982 */
983static int
984unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
985		      struct ucred *cred, struct thread *td)
986{
987	int		error;
988	off_t		offset;
989	int		count;
990	int		bufoffset;
991	char           *buf;
992	struct uio	uio;
993	struct iovec	iov;
994
995	error = 0;
996	memset(&uio, 0, sizeof(uio));
997
998	uio.uio_td = td;
999	uio.uio_segflg = UIO_SYSSPACE;
1000	uio.uio_offset = 0;
1001
1002	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
1003		return (error);
1004	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
1005		return (error);
1006	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
1007
1008	while (error == 0) {
1009		offset = uio.uio_offset;
1010
1011		uio.uio_iov = &iov;
1012		uio.uio_iovcnt = 1;
1013		iov.iov_base = buf;
1014		iov.iov_len = MAXBSIZE;
1015		uio.uio_resid = iov.iov_len;
1016		uio.uio_rw = UIO_READ;
1017
1018		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1019			break;
1020		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1021			break;
1022
1023		bufoffset = 0;
1024		while (bufoffset < count) {
1025			uio.uio_iov = &iov;
1026			uio.uio_iovcnt = 1;
1027			iov.iov_base = buf + bufoffset;
1028			iov.iov_len = count - bufoffset;
1029			uio.uio_offset = offset + bufoffset;
1030			uio.uio_resid = iov.iov_len;
1031			uio.uio_rw = UIO_WRITE;
1032
1033			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1034				break;
1035
1036			bufoffset += (count - bufoffset) - uio.uio_resid;
1037		}
1038
1039		uio.uio_offset = offset + bufoffset;
1040	}
1041
1042	free(buf, M_TEMP);
1043
1044	return (error);
1045}
1046
1047/*
1048 * Copy file from lower to upper.
1049 *
1050 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1051 * docopy.
1052 *
1053 * If no error returned, unp will be updated.
1054 */
1055int
1056unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1057		 struct thread *td)
1058{
1059	int		error;
1060	struct mount   *mp;
1061	struct vnode   *udvp;
1062	struct vnode   *lvp;
1063	struct vnode   *uvp;
1064	struct vattr	uva;
1065
1066	lvp = unp->un_lowervp;
1067	uvp = NULLVP;
1068
1069	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1070		return (EROFS);
1071	if (unp->un_dvp == NULLVP)
1072		return (EINVAL);
1073	if (unp->un_uppervp != NULLVP)
1074		return (EEXIST);
1075	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1076	if (udvp == NULLVP)
1077		return (EROFS);
1078	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1079		return (EROFS);
1080
1081	error = VOP_ACCESS(lvp, VREAD, cred, td);
1082	if (error != 0)
1083		return (error);
1084
1085	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1086		return (error);
1087	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1088	if (error != 0) {
1089		vn_finished_write(mp);
1090		return (error);
1091	}
1092
1093	if (docopy != 0) {
1094		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1095		if (error == 0) {
1096			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1097			VOP_CLOSE(lvp, FREAD, cred, td);
1098		}
1099	}
1100	VOP_CLOSE(uvp, FWRITE, cred, td);
1101	uvp->v_writecount--;
1102
1103	vn_finished_write(mp);
1104
1105	if (error == 0) {
1106		/* Reset the attributes. Ignore errors. */
1107		uva.va_type = VNON;
1108		VOP_SETATTR(uvp, &uva, cred);
1109	}
1110
1111	unionfs_node_update(unp, uvp, td);
1112
1113	return (error);
1114}
1115
1116/*
1117 * It checks whether vp can rmdir. (check empty)
1118 *
1119 * vp is unionfs vnode.
1120 * vp should be locked.
1121 */
1122int
1123unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1124{
1125	int		error;
1126	int		eofflag;
1127	int		lookuperr;
1128	struct vnode   *uvp;
1129	struct vnode   *lvp;
1130	struct vnode   *tvp;
1131	struct vattr	va;
1132	struct componentname cn;
1133	/*
1134	 * The size of buf needs to be larger than DIRBLKSIZ.
1135	 */
1136	char		buf[256 * 6];
1137	struct dirent  *dp;
1138	struct dirent  *edp;
1139	struct uio	uio;
1140	struct iovec	iov;
1141
1142	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1143
1144	eofflag = 0;
1145	uvp = UNIONFSVPTOUPPERVP(vp);
1146	lvp = UNIONFSVPTOLOWERVP(vp);
1147
1148	/* check opaque */
1149	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1150		return (error);
1151	if (va.va_flags & OPAQUE)
1152		return (0);
1153
1154	/* open vnode */
1155#ifdef MAC
1156	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1157		return (error);
1158#endif
1159	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1160		return (error);
1161	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1162		return (error);
1163
1164	uio.uio_rw = UIO_READ;
1165	uio.uio_segflg = UIO_SYSSPACE;
1166	uio.uio_td = td;
1167	uio.uio_offset = 0;
1168
1169#ifdef MAC
1170	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1171#endif
1172	while (!error && !eofflag) {
1173		iov.iov_base = buf;
1174		iov.iov_len = sizeof(buf);
1175		uio.uio_iov = &iov;
1176		uio.uio_iovcnt = 1;
1177		uio.uio_resid = iov.iov_len;
1178
1179		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1180		if (error != 0)
1181			break;
1182		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1183#ifdef DIAGNOSTIC
1184			panic("bad readdir response from lower FS.");
1185#endif
1186			break;
1187		}
1188
1189		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1190		for (dp = (struct dirent*)buf; !error && dp < edp;
1191		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1192			if (dp->d_type == DT_WHT ||
1193			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1194			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1195				continue;
1196
1197			cn.cn_namelen = dp->d_namlen;
1198			cn.cn_pnbuf = NULL;
1199			cn.cn_nameptr = dp->d_name;
1200			cn.cn_nameiop = LOOKUP;
1201			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1202			cn.cn_lkflags = LK_EXCLUSIVE;
1203			cn.cn_thread = td;
1204			cn.cn_cred = cred;
1205			cn.cn_consume = 0;
1206
1207			/*
1208			 * check entry in lower.
1209			 * Sometimes, readdir function returns
1210			 * wrong entry.
1211			 */
1212			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1213
1214			if (!lookuperr)
1215				vput(tvp);
1216			else
1217				continue; /* skip entry */
1218
1219			/*
1220			 * check entry
1221			 * If it has no exist/whiteout entry in upper,
1222			 * directory is not empty.
1223			 */
1224			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1225			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1226
1227			if (!lookuperr)
1228				vput(tvp);
1229
1230			/* ignore exist or whiteout entry */
1231			if (!lookuperr ||
1232			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1233				continue;
1234
1235			error = ENOTEMPTY;
1236		}
1237	}
1238
1239	/* close vnode */
1240	VOP_CLOSE(vp, FREAD, cred, td);
1241
1242	return (error);
1243}
1244
1245#ifdef DIAGNOSTIC
1246
1247struct vnode   *
1248unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1249{
1250	struct unionfs_node *unp;
1251
1252	unp = VTOUNIONFS(vp);
1253
1254#ifdef notyet
1255	if (vp->v_op != unionfs_vnodeop_p) {
1256		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1257#ifdef KDB
1258		kdb_enter(KDB_WHY_UNIONFS,
1259		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1260#endif
1261		panic("unionfs_checkuppervp");
1262	};
1263#endif
1264	return (unp->un_uppervp);
1265}
1266
1267struct vnode   *
1268unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1269{
1270	struct unionfs_node *unp;
1271
1272	unp = VTOUNIONFS(vp);
1273
1274#ifdef notyet
1275	if (vp->v_op != unionfs_vnodeop_p) {
1276		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1277#ifdef KDB
1278		kdb_enter(KDB_WHY_UNIONFS,
1279		    "unionfs_checklowervp: on non-unionfs-node.\n");
1280#endif
1281		panic("unionfs_checklowervp");
1282	};
1283#endif
1284	return (unp->un_lowervp);
1285}
1286#endif
1287