union_subr.c revision 190888
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 190888 2009-04-10 10:52:19Z rwatson $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63#define NUNIONFSNODECACHE 16
64
65static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68
69/*
70 * Initialize
71 */
72int
73unionfs_init(struct vfsconf *vfsp)
74{
75	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76	return (0);
77}
78
79/*
80 * Uninitialize
81 */
82int
83unionfs_uninit(struct vfsconf *vfsp)
84{
85	return (0);
86}
87
88static struct unionfs_node_hashhead *
89unionfs_get_hashhead(struct vnode *dvp, char *path)
90{
91	int		count;
92	char		hash;
93	struct unionfs_node *unp;
94
95	hash = 0;
96	unp = VTOUNIONFS(dvp);
97	if (path != NULL) {
98		for (count = 0; path[count]; count++)
99			hash += path[count];
100	}
101
102	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103}
104
105/*
106 * Get the cached vnode.
107 */
108static struct vnode *
109unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110			struct vnode *dvp, char *path)
111{
112	struct unionfs_node_hashhead *hd;
113	struct unionfs_node *unp;
114	struct vnode   *vp;
115
116	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
119	    ("unionfs_get_cached_vnode: v_type != VDIR"));
120
121	VI_LOCK(dvp);
122	hd = unionfs_get_hashhead(dvp, path);
123	LIST_FOREACH(unp, hd, un_hash) {
124		if (!strcmp(unp->un_path, path)) {
125			vp = UNIONFSTOV(unp);
126			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127			VI_UNLOCK(dvp);
128			vp->v_iflag &= ~VI_OWEINACT;
129			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130				VI_UNLOCK(vp);
131				vp = NULLVP;
132			} else
133				VI_UNLOCK(vp);
134			return (vp);
135		}
136	}
137	VI_UNLOCK(dvp);
138
139	return (NULLVP);
140}
141
142/*
143 * Add the new vnode into cache.
144 */
145static struct vnode *
146unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147			struct vnode *dvp, char *path)
148{
149	struct unionfs_node_hashhead *hd;
150	struct unionfs_node *unp;
151	struct vnode   *vp;
152
153	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
156	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
157
158	VI_LOCK(dvp);
159	hd = unionfs_get_hashhead(dvp, path);
160	LIST_FOREACH(unp, hd, un_hash) {
161		if (!strcmp(unp->un_path, path)) {
162			vp = UNIONFSTOV(unp);
163			VI_LOCK_FLAGS(vp, MTX_DUPOK);
164			vp->v_iflag &= ~VI_OWEINACT;
165			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
166				LIST_INSERT_HEAD(hd, uncp, un_hash);
167				VI_UNLOCK(vp);
168				vp = NULLVP;
169			} else
170				VI_UNLOCK(vp);
171			VI_UNLOCK(dvp);
172			return (vp);
173		}
174	}
175
176	LIST_INSERT_HEAD(hd, uncp, un_hash);
177	VI_UNLOCK(dvp);
178
179	return (NULLVP);
180}
181
182/*
183 * Remove the vnode.
184 */
185static void
186unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
187{
188	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
189	KASSERT((dvp != NULLVP),
190	    ("unionfs_rem_cached_vnode: null parent vnode"));
191	KASSERT((unp->un_hash.le_prev != NULL),
192	    ("unionfs_rem_cached_vnode: null hash"));
193
194	VI_LOCK(dvp);
195	LIST_REMOVE(unp, un_hash);
196	unp->un_hash.le_next = NULL;
197	unp->un_hash.le_prev = NULL;
198	VI_UNLOCK(dvp);
199}
200
201/*
202 * Make a new or get existing unionfs node.
203 *
204 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
205 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
206 * you should not lock plurality simultaneously.
207 */
208int
209unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
210		struct vnode *lowervp, struct vnode *dvp,
211		struct vnode **vpp, struct componentname *cnp,
212		struct thread *td)
213{
214	struct unionfs_mount *ump;
215	struct unionfs_node *unp;
216	struct vnode   *vp;
217	int		error;
218	int		lkflags;
219	enum vtype	vt;
220	char	       *path;
221
222	ump = MOUNTTOUNIONFSMOUNT(mp);
223	lkflags = (cnp ? cnp->cn_lkflags : 0);
224	path = (cnp ? cnp->cn_nameptr : NULL);
225	*vpp = NULLVP;
226
227	if (uppervp == NULLVP && lowervp == NULLVP)
228		panic("unionfs_nodeget: upper and lower is null");
229
230	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
231
232	/* If it has no ISLASTCN flag, path check is skipped. */
233	if (cnp && !(cnp->cn_flags & ISLASTCN))
234		path = NULL;
235
236	/* check the cache */
237	if (path != NULL && dvp != NULLVP && vt == VDIR) {
238		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
239		if (vp != NULLVP) {
240			vref(vp);
241			*vpp = vp;
242			goto unionfs_nodeget_out;
243		}
244	}
245
246	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
247	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
248		/* dvp will be NULLVP only in case of root vnode. */
249		if (dvp == NULLVP)
250			return (EINVAL);
251	}
252
253	/*
254	 * Do the MALLOC before the getnewvnode since doing so afterward
255	 * might cause a bogus v_data pointer to get dereferenced elsewhere
256	 * if MALLOC should block.
257	 */
258	unp = malloc(sizeof(struct unionfs_node),
259	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
260
261	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
262	if (error != 0) {
263		free(unp, M_UNIONFSNODE);
264		return (error);
265	}
266	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
267	if (error != 0) {
268		free(unp, M_UNIONFSNODE);
269		return (error);
270	}
271	if (dvp != NULLVP)
272		vref(dvp);
273	if (uppervp != NULLVP)
274		vref(uppervp);
275	if (lowervp != NULLVP)
276		vref(lowervp);
277
278	if (vt == VDIR)
279		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
280		    &(unp->un_hashmask));
281
282	unp->un_vnode = vp;
283	unp->un_uppervp = uppervp;
284	unp->un_lowervp = lowervp;
285	unp->un_dvp = dvp;
286	if (uppervp != NULLVP)
287		vp->v_vnlock = uppervp->v_vnlock;
288	else
289		vp->v_vnlock = lowervp->v_vnlock;
290
291	if (path != NULL) {
292		unp->un_path = (char *)
293		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
294		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
295		unp->un_path[cnp->cn_namelen] = '\0';
296	}
297	vp->v_type = vt;
298	vp->v_data = unp;
299
300	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
301	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
302		vp->v_vflag |= VV_ROOT;
303
304	if (path != NULL && dvp != NULLVP && vt == VDIR)
305		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
306	if ((*vpp) != NULLVP) {
307		if (dvp != NULLVP)
308			vrele(dvp);
309		if (uppervp != NULLVP)
310			vrele(uppervp);
311		if (lowervp != NULLVP)
312			vrele(lowervp);
313
314		unp->un_uppervp = NULLVP;
315		unp->un_lowervp = NULLVP;
316		unp->un_dvp = NULLVP;
317		vrele(vp);
318		vp = *vpp;
319		vref(vp);
320	} else
321		*vpp = vp;
322
323unionfs_nodeget_out:
324	if (lkflags & LK_TYPE_MASK)
325		vn_lock(vp, lkflags | LK_RETRY);
326
327	return (0);
328}
329
330/*
331 * Clean up the unionfs node.
332 */
333void
334unionfs_noderem(struct vnode *vp, struct thread *td)
335{
336	int		vfslocked;
337	int		count;
338	struct unionfs_node *unp, *unp_t1, *unp_t2;
339	struct unionfs_node_hashhead *hd;
340	struct unionfs_node_status *unsp, *unsp_tmp;
341	struct vnode   *lvp;
342	struct vnode   *uvp;
343	struct vnode   *dvp;
344
345	/*
346	 * Use the interlock to protect the clearing of v_data to
347	 * prevent faults in unionfs_lock().
348	 */
349	VI_LOCK(vp);
350	unp = VTOUNIONFS(vp);
351	lvp = unp->un_lowervp;
352	uvp = unp->un_uppervp;
353	dvp = unp->un_dvp;
354	unp->un_lowervp = unp->un_uppervp = NULLVP;
355
356	vp->v_vnlock = &(vp->v_lock);
357	vp->v_data = NULL;
358	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
359	if (lvp != NULLVP)
360		VOP_UNLOCK(lvp, 0);
361	if (uvp != NULLVP)
362		VOP_UNLOCK(uvp, 0);
363	vp->v_object = NULL;
364
365	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
366		unionfs_rem_cached_vnode(unp, dvp);
367
368	if (lvp != NULLVP) {
369		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
370		vrele(lvp);
371		VFS_UNLOCK_GIANT(vfslocked);
372	}
373	if (uvp != NULLVP) {
374		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
375		vrele(uvp);
376		VFS_UNLOCK_GIANT(vfslocked);
377	}
378	if (dvp != NULLVP) {
379		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
380		vrele(dvp);
381		VFS_UNLOCK_GIANT(vfslocked);
382		unp->un_dvp = NULLVP;
383	}
384	if (unp->un_path != NULL) {
385		free(unp->un_path, M_UNIONFSPATH);
386		unp->un_path = NULL;
387	}
388
389	if (unp->un_hashtbl != NULL) {
390		for (count = 0; count <= unp->un_hashmask; count++) {
391			hd = unp->un_hashtbl + count;
392			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
393				LIST_REMOVE(unp_t1, un_hash);
394				unp_t1->un_hash.le_next = NULL;
395				unp_t1->un_hash.le_prev = NULL;
396			}
397		}
398		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
399	}
400
401	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
402		LIST_REMOVE(unsp, uns_list);
403		free(unsp, M_TEMP);
404	}
405	free(unp, M_UNIONFSNODE);
406}
407
408/*
409 * Get the unionfs node status.
410 * You need exclusive lock this vnode.
411 */
412void
413unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
414			struct unionfs_node_status **unspp)
415{
416	struct unionfs_node_status *unsp;
417	pid_t pid = td->td_proc->p_pid;
418
419	KASSERT(NULL != unspp, ("null pointer"));
420	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
421
422	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
423		if (unsp->uns_pid == pid) {
424			*unspp = unsp;
425			return;
426		}
427	}
428
429	/* create a new unionfs node status */
430	unsp = malloc(sizeof(struct unionfs_node_status),
431	    M_TEMP, M_WAITOK | M_ZERO);
432
433	unsp->uns_pid = pid;
434	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
435
436	*unspp = unsp;
437}
438
439/*
440 * Remove the unionfs node status, if you can.
441 * You need exclusive lock this vnode.
442 */
443void
444unionfs_tryrem_node_status(struct unionfs_node *unp,
445			   struct unionfs_node_status *unsp)
446{
447	KASSERT(NULL != unsp, ("null pointer"));
448	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
449
450	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
451		return;
452
453	LIST_REMOVE(unsp, uns_list);
454	free(unsp, M_TEMP);
455}
456
457/*
458 * Create upper node attr.
459 */
460void
461unionfs_create_uppervattr_core(struct unionfs_mount *ump,
462			       struct vattr *lva,
463			       struct vattr *uva,
464			       struct thread *td)
465{
466	VATTR_NULL(uva);
467	uva->va_type = lva->va_type;
468	uva->va_atime = lva->va_atime;
469	uva->va_mtime = lva->va_mtime;
470	uva->va_ctime = lva->va_ctime;
471
472	switch (ump->um_copymode) {
473	case UNIONFS_TRANSPARENT:
474		uva->va_mode = lva->va_mode;
475		uva->va_uid = lva->va_uid;
476		uva->va_gid = lva->va_gid;
477		break;
478	case UNIONFS_MASQUERADE:
479		if (ump->um_uid == lva->va_uid) {
480			uva->va_mode = lva->va_mode & 077077;
481			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
482			uva->va_uid = lva->va_uid;
483			uva->va_gid = lva->va_gid;
484		} else {
485			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
486			uva->va_uid = ump->um_uid;
487			uva->va_gid = ump->um_gid;
488		}
489		break;
490	default:		/* UNIONFS_TRADITIONAL */
491		FILEDESC_SLOCK(td->td_proc->p_fd);
492		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
493		FILEDESC_SUNLOCK(td->td_proc->p_fd);
494		uva->va_uid = ump->um_uid;
495		uva->va_gid = ump->um_gid;
496		break;
497	}
498}
499
500/*
501 * Create upper node attr.
502 */
503int
504unionfs_create_uppervattr(struct unionfs_mount *ump,
505			  struct vnode *lvp,
506			  struct vattr *uva,
507			  struct ucred *cred,
508			  struct thread *td)
509{
510	int		error;
511	struct vattr	lva;
512
513	if ((error = VOP_GETATTR(lvp, &lva, cred)))
514		return (error);
515
516	unionfs_create_uppervattr_core(ump, &lva, uva, td);
517
518	return (error);
519}
520
521/*
522 * relookup
523 *
524 * dvp should be locked on entry and will be locked on return.
525 *
526 * If an error is returned, *vpp will be invalid, otherwise it will hold a
527 * locked, referenced vnode. If *vpp == dvp then remember that only one
528 * LK_EXCLUSIVE lock is held.
529 */
530int
531unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
532		 struct componentname *cnp, struct componentname *cn,
533		 struct thread *td, char *path, int pathlen, u_long nameiop)
534{
535	int	error;
536
537	cn->cn_namelen = pathlen;
538	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
539	bcopy(path, cn->cn_pnbuf, pathlen);
540	cn->cn_pnbuf[pathlen] = '\0';
541
542	cn->cn_nameiop = nameiop;
543	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
544	cn->cn_lkflags = LK_EXCLUSIVE;
545	cn->cn_thread = td;
546	cn->cn_cred = cnp->cn_cred;
547
548	cn->cn_nameptr = cn->cn_pnbuf;
549	cn->cn_consume = cnp->cn_consume;
550
551	if (nameiop == DELETE)
552		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
553	else if (RENAME == nameiop)
554		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
555
556	vref(dvp);
557	VOP_UNLOCK(dvp, 0);
558
559	if ((error = relookup(dvp, vpp, cn))) {
560		uma_zfree(namei_zone, cn->cn_pnbuf);
561		cn->cn_flags &= ~HASBUF;
562		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
563	} else
564		vrele(dvp);
565
566	return (error);
567}
568
569/*
570 * relookup for CREATE namei operation.
571 *
572 * dvp is unionfs vnode. dvp should be locked.
573 *
574 * If it called 'unionfs_copyfile' function by unionfs_link etc,
575 * VOP_LOOKUP information is broken.
576 * So it need relookup in order to create link etc.
577 */
578int
579unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
580			    struct thread *td)
581{
582	int	error;
583	struct vnode *udvp;
584	struct vnode *vp;
585	struct componentname cn;
586
587	udvp = UNIONFSVPTOUPPERVP(dvp);
588	vp = NULLVP;
589
590	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
591	    strlen(cnp->cn_nameptr), CREATE);
592	if (error)
593		return (error);
594
595	if (vp != NULLVP) {
596		if (udvp == vp)
597			vrele(vp);
598		else
599			vput(vp);
600
601		error = EEXIST;
602	}
603
604	if (cn.cn_flags & HASBUF) {
605		uma_zfree(namei_zone, cn.cn_pnbuf);
606		cn.cn_flags &= ~HASBUF;
607	}
608
609	if (!error) {
610		cn.cn_flags |= (cnp->cn_flags & HASBUF);
611		cnp->cn_flags = cn.cn_flags;
612	}
613
614	return (error);
615}
616
617/*
618 * relookup for DELETE namei operation.
619 *
620 * dvp is unionfs vnode. dvp should be locked.
621 */
622int
623unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
624			    struct thread *td)
625{
626	int	error;
627	struct vnode *udvp;
628	struct vnode *vp;
629	struct componentname cn;
630
631	udvp = UNIONFSVPTOUPPERVP(dvp);
632	vp = NULLVP;
633
634	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
635	    strlen(cnp->cn_nameptr), DELETE);
636	if (error)
637		return (error);
638
639	if (vp == NULLVP)
640		error = ENOENT;
641	else {
642		if (udvp == vp)
643			vrele(vp);
644		else
645			vput(vp);
646	}
647
648	if (cn.cn_flags & HASBUF) {
649		uma_zfree(namei_zone, cn.cn_pnbuf);
650		cn.cn_flags &= ~HASBUF;
651	}
652
653	if (!error) {
654		cn.cn_flags |= (cnp->cn_flags & HASBUF);
655		cnp->cn_flags = cn.cn_flags;
656	}
657
658	return (error);
659}
660
661/*
662 * relookup for RENAME namei operation.
663 *
664 * dvp is unionfs vnode. dvp should be locked.
665 */
666int
667unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
668			    struct thread *td)
669{
670	int error;
671	struct vnode *udvp;
672	struct vnode *vp;
673	struct componentname cn;
674
675	udvp = UNIONFSVPTOUPPERVP(dvp);
676	vp = NULLVP;
677
678	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
679	    strlen(cnp->cn_nameptr), RENAME);
680	if (error)
681		return (error);
682
683	if (vp != NULLVP) {
684		if (udvp == vp)
685			vrele(vp);
686		else
687			vput(vp);
688	}
689
690	if (cn.cn_flags & HASBUF) {
691		uma_zfree(namei_zone, cn.cn_pnbuf);
692		cn.cn_flags &= ~HASBUF;
693	}
694
695	if (!error) {
696		cn.cn_flags |= (cnp->cn_flags & HASBUF);
697		cnp->cn_flags = cn.cn_flags;
698	}
699
700	return (error);
701
702}
703
704/*
705 * Update the unionfs_node.
706 *
707 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
708 * uvp's lock and lower's lock will be unlocked.
709 */
710static void
711unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
712		    struct thread *td)
713{
714	unsigned	count, lockrec;
715	struct vnode   *vp;
716	struct vnode   *lvp;
717	struct vnode   *dvp;
718
719	vp = UNIONFSTOV(unp);
720	lvp = unp->un_lowervp;
721	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
722	dvp = unp->un_dvp;
723
724	/*
725	 * lock update
726	 */
727	VI_LOCK(vp);
728	unp->un_uppervp = uvp;
729	vp->v_vnlock = uvp->v_vnlock;
730	VI_UNLOCK(vp);
731	lockrec = lvp->v_vnlock->lk_recurse;
732	for (count = 0; count < lockrec; count++)
733		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
734
735	/*
736	 * cache update
737	 */
738	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
739		static struct unionfs_node_hashhead *hd;
740
741		VI_LOCK(dvp);
742		hd = unionfs_get_hashhead(dvp, unp->un_path);
743		LIST_REMOVE(unp, un_hash);
744		LIST_INSERT_HEAD(hd, unp, un_hash);
745		VI_UNLOCK(dvp);
746	}
747}
748
749/*
750 * Create a new shadow dir.
751 *
752 * udvp should be locked on entry and will be locked on return.
753 *
754 * If no error returned, unp will be updated.
755 */
756int
757unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
758		    struct unionfs_node *unp, struct componentname *cnp,
759		    struct thread *td)
760{
761	int		error;
762	struct vnode   *lvp;
763	struct vnode   *uvp;
764	struct vattr	va;
765	struct vattr	lva;
766	struct componentname cn;
767	struct mount   *mp;
768	struct ucred   *cred;
769	struct ucred   *credbk;
770	struct uidinfo *rootinfo;
771
772	if (unp->un_uppervp != NULLVP)
773		return (EEXIST);
774
775	lvp = unp->un_lowervp;
776	uvp = NULLVP;
777	credbk = cnp->cn_cred;
778
779	/* Authority change to root */
780	rootinfo = uifind((uid_t)0);
781	cred = crdup(cnp->cn_cred);
782	chgproccnt(cred->cr_ruidinfo, 1, 0);
783	change_euid(cred, rootinfo);
784	change_ruid(cred, rootinfo);
785	change_svuid(cred, (uid_t)0);
786	uifree(rootinfo);
787	cnp->cn_cred = cred;
788
789	memset(&cn, 0, sizeof(cn));
790
791	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
792		goto unionfs_mkshadowdir_abort;
793
794	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
795		goto unionfs_mkshadowdir_abort;
796	if (uvp != NULLVP) {
797		if (udvp == uvp)
798			vrele(uvp);
799		else
800			vput(uvp);
801
802		error = EEXIST;
803		goto unionfs_mkshadowdir_free_out;
804	}
805
806	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
807		goto unionfs_mkshadowdir_free_out;
808	unionfs_create_uppervattr_core(ump, &lva, &va, td);
809
810	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
811
812	if (!error) {
813		unionfs_node_update(unp, uvp, td);
814
815		/*
816		 * XXX The bug which cannot set uid/gid was corrected.
817		 * Ignore errors.
818		 */
819		va.va_type = VNON;
820		VOP_SETATTR(uvp, &va, cn.cn_cred);
821	}
822	vn_finished_write(mp);
823
824unionfs_mkshadowdir_free_out:
825	if (cn.cn_flags & HASBUF) {
826		uma_zfree(namei_zone, cn.cn_pnbuf);
827		cn.cn_flags &= ~HASBUF;
828	}
829
830unionfs_mkshadowdir_abort:
831	cnp->cn_cred = credbk;
832	chgproccnt(cred->cr_ruidinfo, -1, 0);
833	crfree(cred);
834
835	return (error);
836}
837
838/*
839 * Create a new whiteout.
840 *
841 * dvp should be locked on entry and will be locked on return.
842 */
843int
844unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
845		   struct thread *td, char *path)
846{
847	int		error;
848	struct vnode   *wvp;
849	struct componentname cn;
850	struct mount   *mp;
851
852	if (path == NULL)
853		path = cnp->cn_nameptr;
854
855	wvp = NULLVP;
856	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
857		return (error);
858	if (wvp != NULLVP) {
859		if (cn.cn_flags & HASBUF) {
860			uma_zfree(namei_zone, cn.cn_pnbuf);
861			cn.cn_flags &= ~HASBUF;
862		}
863		if (dvp == wvp)
864			vrele(wvp);
865		else
866			vput(wvp);
867
868		return (EEXIST);
869	}
870
871	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
872		goto unionfs_mkwhiteout_free_out;
873	error = VOP_WHITEOUT(dvp, &cn, CREATE);
874
875	vn_finished_write(mp);
876
877unionfs_mkwhiteout_free_out:
878	if (cn.cn_flags & HASBUF) {
879		uma_zfree(namei_zone, cn.cn_pnbuf);
880		cn.cn_flags &= ~HASBUF;
881	}
882
883	return (error);
884}
885
886/*
887 * Create a new vnode for create a new shadow file.
888 *
889 * If an error is returned, *vpp will be invalid, otherwise it will hold a
890 * locked, referenced and opened vnode.
891 *
892 * unp is never updated.
893 */
894static int
895unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
896			   struct unionfs_node *unp, struct vattr *uvap,
897			   struct thread *td)
898{
899	struct unionfs_mount *ump;
900	struct vnode   *vp;
901	struct vnode   *lvp;
902	struct ucred   *cred;
903	struct vattr	lva;
904	int		fmode;
905	int		error;
906	struct componentname cn;
907
908	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
909	vp = NULLVP;
910	lvp = unp->un_lowervp;
911	cred = td->td_ucred;
912	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
913	error = 0;
914
915	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
916		return (error);
917	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
918
919	if (unp->un_path == NULL)
920		panic("unionfs: un_path is null");
921
922	cn.cn_namelen = strlen(unp->un_path);
923	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
924	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
925	cn.cn_nameiop = CREATE;
926	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
927	cn.cn_lkflags = LK_EXCLUSIVE;
928	cn.cn_thread = td;
929	cn.cn_cred = cred;
930	cn.cn_nameptr = cn.cn_pnbuf;
931	cn.cn_consume = 0;
932
933	vref(udvp);
934	if ((error = relookup(udvp, &vp, &cn)) != 0)
935		goto unionfs_vn_create_on_upper_free_out2;
936	vrele(udvp);
937
938	if (vp != NULLVP) {
939		if (vp == udvp)
940			vrele(vp);
941		else
942			vput(vp);
943		error = EEXIST;
944		goto unionfs_vn_create_on_upper_free_out1;
945	}
946
947	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
948		goto unionfs_vn_create_on_upper_free_out1;
949
950	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
951		vput(vp);
952		goto unionfs_vn_create_on_upper_free_out1;
953	}
954	vp->v_writecount++;
955	*vpp = vp;
956
957unionfs_vn_create_on_upper_free_out1:
958	VOP_UNLOCK(udvp, 0);
959
960unionfs_vn_create_on_upper_free_out2:
961	if (cn.cn_flags & HASBUF) {
962		uma_zfree(namei_zone, cn.cn_pnbuf);
963		cn.cn_flags &= ~HASBUF;
964	}
965
966	return (error);
967}
968
969/*
970 * Copy from lvp to uvp.
971 *
972 * lvp and uvp should be locked and opened on entry and will be locked and
973 * opened on return.
974 */
975static int
976unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
977		      struct ucred *cred, struct thread *td)
978{
979	int		error;
980	off_t		offset;
981	int		count;
982	int		bufoffset;
983	char           *buf;
984	struct uio	uio;
985	struct iovec	iov;
986
987	error = 0;
988	memset(&uio, 0, sizeof(uio));
989
990	uio.uio_td = td;
991	uio.uio_segflg = UIO_SYSSPACE;
992	uio.uio_offset = 0;
993
994	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
995
996	while (error == 0) {
997		offset = uio.uio_offset;
998
999		uio.uio_iov = &iov;
1000		uio.uio_iovcnt = 1;
1001		iov.iov_base = buf;
1002		iov.iov_len = MAXBSIZE;
1003		uio.uio_resid = iov.iov_len;
1004		uio.uio_rw = UIO_READ;
1005
1006		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1007			break;
1008		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1009			break;
1010
1011		bufoffset = 0;
1012		while (bufoffset < count) {
1013			uio.uio_iov = &iov;
1014			uio.uio_iovcnt = 1;
1015			iov.iov_base = buf + bufoffset;
1016			iov.iov_len = count - bufoffset;
1017			uio.uio_offset = offset + bufoffset;
1018			uio.uio_resid = iov.iov_len;
1019			uio.uio_rw = UIO_WRITE;
1020
1021			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1022				break;
1023
1024			bufoffset += (count - bufoffset) - uio.uio_resid;
1025		}
1026
1027		uio.uio_offset = offset + bufoffset;
1028	}
1029
1030	free(buf, M_TEMP);
1031
1032	return (error);
1033}
1034
1035/*
1036 * Copy file from lower to upper.
1037 *
1038 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1039 * docopy.
1040 *
1041 * If no error returned, unp will be updated.
1042 */
1043int
1044unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1045		 struct thread *td)
1046{
1047	int		error;
1048	struct mount   *mp;
1049	struct vnode   *udvp;
1050	struct vnode   *lvp;
1051	struct vnode   *uvp;
1052	struct vattr	uva;
1053
1054	lvp = unp->un_lowervp;
1055	uvp = NULLVP;
1056
1057	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1058		return (EROFS);
1059	if (unp->un_dvp == NULLVP)
1060		return (EINVAL);
1061	if (unp->un_uppervp != NULLVP)
1062		return (EEXIST);
1063	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1064	if (udvp == NULLVP)
1065		return (EROFS);
1066	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1067		return (EROFS);
1068
1069	error = VOP_ACCESS(lvp, VREAD, cred, td);
1070	if (error != 0)
1071		return (error);
1072
1073	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1074		return (error);
1075	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1076	if (error != 0) {
1077		vn_finished_write(mp);
1078		return (error);
1079	}
1080
1081	if (docopy != 0) {
1082		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1083		if (error == 0) {
1084			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1085			VOP_CLOSE(lvp, FREAD, cred, td);
1086		}
1087	}
1088	VOP_CLOSE(uvp, FWRITE, cred, td);
1089	uvp->v_writecount--;
1090
1091	vn_finished_write(mp);
1092
1093	if (error == 0) {
1094		/* Reset the attributes. Ignore errors. */
1095		uva.va_type = VNON;
1096		VOP_SETATTR(uvp, &uva, cred);
1097	}
1098
1099	unionfs_node_update(unp, uvp, td);
1100
1101	return (error);
1102}
1103
1104/*
1105 * It checks whether vp can rmdir. (check empty)
1106 *
1107 * vp is unionfs vnode.
1108 * vp should be locked.
1109 */
1110int
1111unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1112{
1113	int		error;
1114	int		eofflag;
1115	int		lookuperr;
1116	struct vnode   *uvp;
1117	struct vnode   *lvp;
1118	struct vnode   *tvp;
1119	struct vattr	va;
1120	struct componentname cn;
1121	/*
1122	 * The size of buf needs to be larger than DIRBLKSIZ.
1123	 */
1124	char		buf[256 * 6];
1125	struct dirent  *dp;
1126	struct dirent  *edp;
1127	struct uio	uio;
1128	struct iovec	iov;
1129
1130	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1131
1132	eofflag = 0;
1133	uvp = UNIONFSVPTOUPPERVP(vp);
1134	lvp = UNIONFSVPTOLOWERVP(vp);
1135
1136	/* check opaque */
1137	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1138		return (error);
1139	if (va.va_flags & OPAQUE)
1140		return (0);
1141
1142	/* open vnode */
1143#ifdef MAC
1144	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1145		return (error);
1146#endif
1147	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1148		return (error);
1149	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1150		return (error);
1151
1152	uio.uio_rw = UIO_READ;
1153	uio.uio_segflg = UIO_SYSSPACE;
1154	uio.uio_td = td;
1155	uio.uio_offset = 0;
1156
1157#ifdef MAC
1158	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1159#endif
1160	while (!error && !eofflag) {
1161		iov.iov_base = buf;
1162		iov.iov_len = sizeof(buf);
1163		uio.uio_iov = &iov;
1164		uio.uio_iovcnt = 1;
1165		uio.uio_resid = iov.iov_len;
1166
1167		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1168		if (error != 0)
1169			break;
1170		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1171#ifdef DIAGNOSTIC
1172			panic("bad readdir response from lower FS.");
1173#endif
1174			break;
1175		}
1176
1177		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1178		for (dp = (struct dirent*)buf; !error && dp < edp;
1179		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1180			if (dp->d_type == DT_WHT ||
1181			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1182			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1183				continue;
1184
1185			cn.cn_namelen = dp->d_namlen;
1186			cn.cn_pnbuf = NULL;
1187			cn.cn_nameptr = dp->d_name;
1188			cn.cn_nameiop = LOOKUP;
1189			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1190			cn.cn_lkflags = LK_EXCLUSIVE;
1191			cn.cn_thread = td;
1192			cn.cn_cred = cred;
1193			cn.cn_consume = 0;
1194
1195			/*
1196			 * check entry in lower.
1197			 * Sometimes, readdir function returns
1198			 * wrong entry.
1199			 */
1200			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1201
1202			if (!lookuperr)
1203				vput(tvp);
1204			else
1205				continue; /* skip entry */
1206
1207			/*
1208			 * check entry
1209			 * If it has no exist/whiteout entry in upper,
1210			 * directory is not empty.
1211			 */
1212			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1213			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1214
1215			if (!lookuperr)
1216				vput(tvp);
1217
1218			/* ignore exist or whiteout entry */
1219			if (!lookuperr ||
1220			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1221				continue;
1222
1223			error = ENOTEMPTY;
1224		}
1225	}
1226
1227	/* close vnode */
1228	VOP_CLOSE(vp, FREAD, cred, td);
1229
1230	return (error);
1231}
1232
1233#ifdef DIAGNOSTIC
1234
1235struct vnode   *
1236unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1237{
1238	struct unionfs_node *unp;
1239
1240	unp = VTOUNIONFS(vp);
1241
1242#ifdef notyet
1243	if (vp->v_op != unionfs_vnodeop_p) {
1244		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1245#ifdef KDB
1246		kdb_enter(KDB_WHY_UNIONFS,
1247		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1248#endif
1249		panic("unionfs_checkuppervp");
1250	};
1251#endif
1252	return (unp->un_uppervp);
1253}
1254
1255struct vnode   *
1256unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1257{
1258	struct unionfs_node *unp;
1259
1260	unp = VTOUNIONFS(vp);
1261
1262#ifdef notyet
1263	if (vp->v_op != unionfs_vnodeop_p) {
1264		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1265#ifdef KDB
1266		kdb_enter(KDB_WHY_UNIONFS,
1267		    "unionfs_checklowervp: on non-unionfs-node.\n");
1268#endif
1269		panic("unionfs_checklowervp");
1270	};
1271#endif
1272	return (unp->un_lowervp);
1273}
1274#endif
1275