union_subr.c revision 193511
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 193511 2009-06-05 14:55:22Z rwatson $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <vm/uma.h>
58
59#include <fs/unionfs/union.h>
60
61#define NUNIONFSNODECACHE 16
62
63static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
64MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
65MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
66
67/*
68 * Initialize
69 */
70int
71unionfs_init(struct vfsconf *vfsp)
72{
73	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
74	return (0);
75}
76
77/*
78 * Uninitialize
79 */
80int
81unionfs_uninit(struct vfsconf *vfsp)
82{
83	return (0);
84}
85
86static struct unionfs_node_hashhead *
87unionfs_get_hashhead(struct vnode *dvp, char *path)
88{
89	int		count;
90	char		hash;
91	struct unionfs_node *unp;
92
93	hash = 0;
94	unp = VTOUNIONFS(dvp);
95	if (path != NULL) {
96		for (count = 0; path[count]; count++)
97			hash += path[count];
98	}
99
100	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
101}
102
103/*
104 * Get the cached vnode.
105 */
106static struct vnode *
107unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
108			struct vnode *dvp, char *path)
109{
110	struct unionfs_node_hashhead *hd;
111	struct unionfs_node *unp;
112	struct vnode   *vp;
113
114	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
115	    ("unionfs_get_cached_vnode: v_type != VDIR"));
116	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118
119	VI_LOCK(dvp);
120	hd = unionfs_get_hashhead(dvp, path);
121	LIST_FOREACH(unp, hd, un_hash) {
122		if (!strcmp(unp->un_path, path)) {
123			vp = UNIONFSTOV(unp);
124			VI_LOCK_FLAGS(vp, MTX_DUPOK);
125			VI_UNLOCK(dvp);
126			vp->v_iflag &= ~VI_OWEINACT;
127			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
128				VI_UNLOCK(vp);
129				vp = NULLVP;
130			} else
131				VI_UNLOCK(vp);
132			return (vp);
133		}
134	}
135	VI_UNLOCK(dvp);
136
137	return (NULLVP);
138}
139
140/*
141 * Add the new vnode into cache.
142 */
143static struct vnode *
144unionfs_ins_cached_vnode(struct unionfs_node *uncp,
145			struct vnode *dvp, char *path)
146{
147	struct unionfs_node_hashhead *hd;
148	struct unionfs_node *unp;
149	struct vnode   *vp;
150
151	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
152	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
153	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155
156	VI_LOCK(dvp);
157	hd = unionfs_get_hashhead(dvp, path);
158	LIST_FOREACH(unp, hd, un_hash) {
159		if (!strcmp(unp->un_path, path)) {
160			vp = UNIONFSTOV(unp);
161			VI_LOCK_FLAGS(vp, MTX_DUPOK);
162			vp->v_iflag &= ~VI_OWEINACT;
163			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
164				LIST_INSERT_HEAD(hd, uncp, un_hash);
165				VI_UNLOCK(vp);
166				vp = NULLVP;
167			} else
168				VI_UNLOCK(vp);
169			VI_UNLOCK(dvp);
170			return (vp);
171		}
172	}
173
174	LIST_INSERT_HEAD(hd, uncp, un_hash);
175	VI_UNLOCK(dvp);
176
177	return (NULLVP);
178}
179
180/*
181 * Remove the vnode.
182 */
183static void
184unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
185{
186	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
187	KASSERT((dvp != NULLVP),
188	    ("unionfs_rem_cached_vnode: null parent vnode"));
189	KASSERT((unp->un_hash.le_prev != NULL),
190	    ("unionfs_rem_cached_vnode: null hash"));
191
192	VI_LOCK(dvp);
193	LIST_REMOVE(unp, un_hash);
194	unp->un_hash.le_next = NULL;
195	unp->un_hash.le_prev = NULL;
196	VI_UNLOCK(dvp);
197}
198
199/*
200 * Make a new or get existing unionfs node.
201 *
202 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
203 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
204 * you should not lock plurality simultaneously.
205 */
206int
207unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
208		struct vnode *lowervp, struct vnode *dvp,
209		struct vnode **vpp, struct componentname *cnp,
210		struct thread *td)
211{
212	struct unionfs_mount *ump;
213	struct unionfs_node *unp;
214	struct vnode   *vp;
215	int		error;
216	int		lkflags;
217	enum vtype	vt;
218	char	       *path;
219
220	ump = MOUNTTOUNIONFSMOUNT(mp);
221	lkflags = (cnp ? cnp->cn_lkflags : 0);
222	path = (cnp ? cnp->cn_nameptr : NULL);
223	*vpp = NULLVP;
224
225	if (uppervp == NULLVP && lowervp == NULLVP)
226		panic("unionfs_nodeget: upper and lower is null");
227
228	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
229
230	/* If it has no ISLASTCN flag, path check is skipped. */
231	if (cnp && !(cnp->cn_flags & ISLASTCN))
232		path = NULL;
233
234	/* check the cache */
235	if (path != NULL && dvp != NULLVP && vt == VDIR) {
236		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
237		if (vp != NULLVP) {
238			vref(vp);
239			*vpp = vp;
240			goto unionfs_nodeget_out;
241		}
242	}
243
244	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
245	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
246		/* dvp will be NULLVP only in case of root vnode. */
247		if (dvp == NULLVP)
248			return (EINVAL);
249	}
250
251	/*
252	 * Do the MALLOC before the getnewvnode since doing so afterward
253	 * might cause a bogus v_data pointer to get dereferenced elsewhere
254	 * if MALLOC should block.
255	 */
256	unp = malloc(sizeof(struct unionfs_node),
257	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
258
259	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
260	if (error != 0) {
261		free(unp, M_UNIONFSNODE);
262		return (error);
263	}
264	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
265	if (error != 0) {
266		free(unp, M_UNIONFSNODE);
267		return (error);
268	}
269	if (dvp != NULLVP)
270		vref(dvp);
271	if (uppervp != NULLVP)
272		vref(uppervp);
273	if (lowervp != NULLVP)
274		vref(lowervp);
275
276	if (vt == VDIR)
277		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
278		    &(unp->un_hashmask));
279
280	unp->un_vnode = vp;
281	unp->un_uppervp = uppervp;
282	unp->un_lowervp = lowervp;
283	unp->un_dvp = dvp;
284	if (uppervp != NULLVP)
285		vp->v_vnlock = uppervp->v_vnlock;
286	else
287		vp->v_vnlock = lowervp->v_vnlock;
288
289	if (path != NULL) {
290		unp->un_path = (char *)
291		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
292		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
293		unp->un_path[cnp->cn_namelen] = '\0';
294	}
295	vp->v_type = vt;
296	vp->v_data = unp;
297
298	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
299	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
300		vp->v_vflag |= VV_ROOT;
301
302	if (path != NULL && dvp != NULLVP && vt == VDIR)
303		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
304	if ((*vpp) != NULLVP) {
305		if (dvp != NULLVP)
306			vrele(dvp);
307		if (uppervp != NULLVP)
308			vrele(uppervp);
309		if (lowervp != NULLVP)
310			vrele(lowervp);
311
312		unp->un_uppervp = NULLVP;
313		unp->un_lowervp = NULLVP;
314		unp->un_dvp = NULLVP;
315		vrele(vp);
316		vp = *vpp;
317		vref(vp);
318	} else
319		*vpp = vp;
320
321unionfs_nodeget_out:
322	if (lkflags & LK_TYPE_MASK)
323		vn_lock(vp, lkflags | LK_RETRY);
324
325	return (0);
326}
327
328/*
329 * Clean up the unionfs node.
330 */
331void
332unionfs_noderem(struct vnode *vp, struct thread *td)
333{
334	int		vfslocked;
335	int		count;
336	struct unionfs_node *unp, *unp_t1, *unp_t2;
337	struct unionfs_node_hashhead *hd;
338	struct unionfs_node_status *unsp, *unsp_tmp;
339	struct vnode   *lvp;
340	struct vnode   *uvp;
341	struct vnode   *dvp;
342
343	/*
344	 * Use the interlock to protect the clearing of v_data to
345	 * prevent faults in unionfs_lock().
346	 */
347	VI_LOCK(vp);
348	unp = VTOUNIONFS(vp);
349	lvp = unp->un_lowervp;
350	uvp = unp->un_uppervp;
351	dvp = unp->un_dvp;
352	unp->un_lowervp = unp->un_uppervp = NULLVP;
353
354	vp->v_vnlock = &(vp->v_lock);
355	vp->v_data = NULL;
356	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
357	if (lvp != NULLVP)
358		VOP_UNLOCK(lvp, 0);
359	if (uvp != NULLVP)
360		VOP_UNLOCK(uvp, 0);
361	vp->v_object = NULL;
362
363	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
364		unionfs_rem_cached_vnode(unp, dvp);
365
366	if (lvp != NULLVP) {
367		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
368		vrele(lvp);
369		VFS_UNLOCK_GIANT(vfslocked);
370	}
371	if (uvp != NULLVP) {
372		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
373		vrele(uvp);
374		VFS_UNLOCK_GIANT(vfslocked);
375	}
376	if (dvp != NULLVP) {
377		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
378		vrele(dvp);
379		VFS_UNLOCK_GIANT(vfslocked);
380		unp->un_dvp = NULLVP;
381	}
382	if (unp->un_path != NULL) {
383		free(unp->un_path, M_UNIONFSPATH);
384		unp->un_path = NULL;
385	}
386
387	if (unp->un_hashtbl != NULL) {
388		for (count = 0; count <= unp->un_hashmask; count++) {
389			hd = unp->un_hashtbl + count;
390			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
391				LIST_REMOVE(unp_t1, un_hash);
392				unp_t1->un_hash.le_next = NULL;
393				unp_t1->un_hash.le_prev = NULL;
394			}
395		}
396		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
397	}
398
399	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
400		LIST_REMOVE(unsp, uns_list);
401		free(unsp, M_TEMP);
402	}
403	free(unp, M_UNIONFSNODE);
404}
405
406/*
407 * Get the unionfs node status.
408 * You need exclusive lock this vnode.
409 */
410void
411unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
412			struct unionfs_node_status **unspp)
413{
414	struct unionfs_node_status *unsp;
415	pid_t pid = td->td_proc->p_pid;
416
417	KASSERT(NULL != unspp, ("null pointer"));
418	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
419
420	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
421		if (unsp->uns_pid == pid) {
422			*unspp = unsp;
423			return;
424		}
425	}
426
427	/* create a new unionfs node status */
428	unsp = malloc(sizeof(struct unionfs_node_status),
429	    M_TEMP, M_WAITOK | M_ZERO);
430
431	unsp->uns_pid = pid;
432	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
433
434	*unspp = unsp;
435}
436
437/*
438 * Remove the unionfs node status, if you can.
439 * You need exclusive lock this vnode.
440 */
441void
442unionfs_tryrem_node_status(struct unionfs_node *unp,
443			   struct unionfs_node_status *unsp)
444{
445	KASSERT(NULL != unsp, ("null pointer"));
446	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
447
448	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
449		return;
450
451	LIST_REMOVE(unsp, uns_list);
452	free(unsp, M_TEMP);
453}
454
455/*
456 * Create upper node attr.
457 */
458void
459unionfs_create_uppervattr_core(struct unionfs_mount *ump,
460			       struct vattr *lva,
461			       struct vattr *uva,
462			       struct thread *td)
463{
464	VATTR_NULL(uva);
465	uva->va_type = lva->va_type;
466	uva->va_atime = lva->va_atime;
467	uva->va_mtime = lva->va_mtime;
468	uva->va_ctime = lva->va_ctime;
469
470	switch (ump->um_copymode) {
471	case UNIONFS_TRANSPARENT:
472		uva->va_mode = lva->va_mode;
473		uva->va_uid = lva->va_uid;
474		uva->va_gid = lva->va_gid;
475		break;
476	case UNIONFS_MASQUERADE:
477		if (ump->um_uid == lva->va_uid) {
478			uva->va_mode = lva->va_mode & 077077;
479			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
480			uva->va_uid = lva->va_uid;
481			uva->va_gid = lva->va_gid;
482		} else {
483			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
484			uva->va_uid = ump->um_uid;
485			uva->va_gid = ump->um_gid;
486		}
487		break;
488	default:		/* UNIONFS_TRADITIONAL */
489		FILEDESC_SLOCK(td->td_proc->p_fd);
490		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
491		FILEDESC_SUNLOCK(td->td_proc->p_fd);
492		uva->va_uid = ump->um_uid;
493		uva->va_gid = ump->um_gid;
494		break;
495	}
496}
497
498/*
499 * Create upper node attr.
500 */
501int
502unionfs_create_uppervattr(struct unionfs_mount *ump,
503			  struct vnode *lvp,
504			  struct vattr *uva,
505			  struct ucred *cred,
506			  struct thread *td)
507{
508	int		error;
509	struct vattr	lva;
510
511	if ((error = VOP_GETATTR(lvp, &lva, cred)))
512		return (error);
513
514	unionfs_create_uppervattr_core(ump, &lva, uva, td);
515
516	return (error);
517}
518
519/*
520 * relookup
521 *
522 * dvp should be locked on entry and will be locked on return.
523 *
524 * If an error is returned, *vpp will be invalid, otherwise it will hold a
525 * locked, referenced vnode. If *vpp == dvp then remember that only one
526 * LK_EXCLUSIVE lock is held.
527 */
528int
529unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
530		 struct componentname *cnp, struct componentname *cn,
531		 struct thread *td, char *path, int pathlen, u_long nameiop)
532{
533	int	error;
534
535	cn->cn_namelen = pathlen;
536	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
537	bcopy(path, cn->cn_pnbuf, pathlen);
538	cn->cn_pnbuf[pathlen] = '\0';
539
540	cn->cn_nameiop = nameiop;
541	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
542	cn->cn_lkflags = LK_EXCLUSIVE;
543	cn->cn_thread = td;
544	cn->cn_cred = cnp->cn_cred;
545
546	cn->cn_nameptr = cn->cn_pnbuf;
547	cn->cn_consume = cnp->cn_consume;
548
549	if (nameiop == DELETE)
550		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
551	else if (RENAME == nameiop)
552		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
553
554	vref(dvp);
555	VOP_UNLOCK(dvp, 0);
556
557	if ((error = relookup(dvp, vpp, cn))) {
558		uma_zfree(namei_zone, cn->cn_pnbuf);
559		cn->cn_flags &= ~HASBUF;
560		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
561	} else
562		vrele(dvp);
563
564	return (error);
565}
566
567/*
568 * relookup for CREATE namei operation.
569 *
570 * dvp is unionfs vnode. dvp should be locked.
571 *
572 * If it called 'unionfs_copyfile' function by unionfs_link etc,
573 * VOP_LOOKUP information is broken.
574 * So it need relookup in order to create link etc.
575 */
576int
577unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
578			    struct thread *td)
579{
580	int	error;
581	struct vnode *udvp;
582	struct vnode *vp;
583	struct componentname cn;
584
585	udvp = UNIONFSVPTOUPPERVP(dvp);
586	vp = NULLVP;
587
588	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
589	    strlen(cnp->cn_nameptr), CREATE);
590	if (error)
591		return (error);
592
593	if (vp != NULLVP) {
594		if (udvp == vp)
595			vrele(vp);
596		else
597			vput(vp);
598
599		error = EEXIST;
600	}
601
602	if (cn.cn_flags & HASBUF) {
603		uma_zfree(namei_zone, cn.cn_pnbuf);
604		cn.cn_flags &= ~HASBUF;
605	}
606
607	if (!error) {
608		cn.cn_flags |= (cnp->cn_flags & HASBUF);
609		cnp->cn_flags = cn.cn_flags;
610	}
611
612	return (error);
613}
614
615/*
616 * relookup for DELETE namei operation.
617 *
618 * dvp is unionfs vnode. dvp should be locked.
619 */
620int
621unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
622			    struct thread *td)
623{
624	int	error;
625	struct vnode *udvp;
626	struct vnode *vp;
627	struct componentname cn;
628
629	udvp = UNIONFSVPTOUPPERVP(dvp);
630	vp = NULLVP;
631
632	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
633	    strlen(cnp->cn_nameptr), DELETE);
634	if (error)
635		return (error);
636
637	if (vp == NULLVP)
638		error = ENOENT;
639	else {
640		if (udvp == vp)
641			vrele(vp);
642		else
643			vput(vp);
644	}
645
646	if (cn.cn_flags & HASBUF) {
647		uma_zfree(namei_zone, cn.cn_pnbuf);
648		cn.cn_flags &= ~HASBUF;
649	}
650
651	if (!error) {
652		cn.cn_flags |= (cnp->cn_flags & HASBUF);
653		cnp->cn_flags = cn.cn_flags;
654	}
655
656	return (error);
657}
658
659/*
660 * relookup for RENAME namei operation.
661 *
662 * dvp is unionfs vnode. dvp should be locked.
663 */
664int
665unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
666			    struct thread *td)
667{
668	int error;
669	struct vnode *udvp;
670	struct vnode *vp;
671	struct componentname cn;
672
673	udvp = UNIONFSVPTOUPPERVP(dvp);
674	vp = NULLVP;
675
676	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
677	    strlen(cnp->cn_nameptr), RENAME);
678	if (error)
679		return (error);
680
681	if (vp != NULLVP) {
682		if (udvp == vp)
683			vrele(vp);
684		else
685			vput(vp);
686	}
687
688	if (cn.cn_flags & HASBUF) {
689		uma_zfree(namei_zone, cn.cn_pnbuf);
690		cn.cn_flags &= ~HASBUF;
691	}
692
693	if (!error) {
694		cn.cn_flags |= (cnp->cn_flags & HASBUF);
695		cnp->cn_flags = cn.cn_flags;
696	}
697
698	return (error);
699
700}
701
702/*
703 * Update the unionfs_node.
704 *
705 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
706 * uvp's lock and lower's lock will be unlocked.
707 */
708static void
709unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
710		    struct thread *td)
711{
712	unsigned	count, lockrec;
713	struct vnode   *vp;
714	struct vnode   *lvp;
715	struct vnode   *dvp;
716
717	vp = UNIONFSTOV(unp);
718	lvp = unp->un_lowervp;
719	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
720	dvp = unp->un_dvp;
721
722	/*
723	 * lock update
724	 */
725	VI_LOCK(vp);
726	unp->un_uppervp = uvp;
727	vp->v_vnlock = uvp->v_vnlock;
728	VI_UNLOCK(vp);
729	lockrec = lvp->v_vnlock->lk_recurse;
730	for (count = 0; count < lockrec; count++)
731		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
732
733	/*
734	 * cache update
735	 */
736	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
737		static struct unionfs_node_hashhead *hd;
738
739		VI_LOCK(dvp);
740		hd = unionfs_get_hashhead(dvp, unp->un_path);
741		LIST_REMOVE(unp, un_hash);
742		LIST_INSERT_HEAD(hd, unp, un_hash);
743		VI_UNLOCK(dvp);
744	}
745}
746
747/*
748 * Create a new shadow dir.
749 *
750 * udvp should be locked on entry and will be locked on return.
751 *
752 * If no error returned, unp will be updated.
753 */
754int
755unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
756		    struct unionfs_node *unp, struct componentname *cnp,
757		    struct thread *td)
758{
759	int		error;
760	struct vnode   *lvp;
761	struct vnode   *uvp;
762	struct vattr	va;
763	struct vattr	lva;
764	struct componentname cn;
765	struct mount   *mp;
766	struct ucred   *cred;
767	struct ucred   *credbk;
768	struct uidinfo *rootinfo;
769
770	if (unp->un_uppervp != NULLVP)
771		return (EEXIST);
772
773	lvp = unp->un_lowervp;
774	uvp = NULLVP;
775	credbk = cnp->cn_cred;
776
777	/* Authority change to root */
778	rootinfo = uifind((uid_t)0);
779	cred = crdup(cnp->cn_cred);
780	chgproccnt(cred->cr_ruidinfo, 1, 0);
781	change_euid(cred, rootinfo);
782	change_ruid(cred, rootinfo);
783	change_svuid(cred, (uid_t)0);
784	uifree(rootinfo);
785	cnp->cn_cred = cred;
786
787	memset(&cn, 0, sizeof(cn));
788
789	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
790		goto unionfs_mkshadowdir_abort;
791
792	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
793		goto unionfs_mkshadowdir_abort;
794	if (uvp != NULLVP) {
795		if (udvp == uvp)
796			vrele(uvp);
797		else
798			vput(uvp);
799
800		error = EEXIST;
801		goto unionfs_mkshadowdir_free_out;
802	}
803
804	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
805		goto unionfs_mkshadowdir_free_out;
806	unionfs_create_uppervattr_core(ump, &lva, &va, td);
807
808	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
809
810	if (!error) {
811		unionfs_node_update(unp, uvp, td);
812
813		/*
814		 * XXX The bug which cannot set uid/gid was corrected.
815		 * Ignore errors.
816		 */
817		va.va_type = VNON;
818		VOP_SETATTR(uvp, &va, cn.cn_cred);
819	}
820	vn_finished_write(mp);
821
822unionfs_mkshadowdir_free_out:
823	if (cn.cn_flags & HASBUF) {
824		uma_zfree(namei_zone, cn.cn_pnbuf);
825		cn.cn_flags &= ~HASBUF;
826	}
827
828unionfs_mkshadowdir_abort:
829	cnp->cn_cred = credbk;
830	chgproccnt(cred->cr_ruidinfo, -1, 0);
831	crfree(cred);
832
833	return (error);
834}
835
836/*
837 * Create a new whiteout.
838 *
839 * dvp should be locked on entry and will be locked on return.
840 */
841int
842unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
843		   struct thread *td, char *path)
844{
845	int		error;
846	struct vnode   *wvp;
847	struct componentname cn;
848	struct mount   *mp;
849
850	if (path == NULL)
851		path = cnp->cn_nameptr;
852
853	wvp = NULLVP;
854	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
855		return (error);
856	if (wvp != NULLVP) {
857		if (cn.cn_flags & HASBUF) {
858			uma_zfree(namei_zone, cn.cn_pnbuf);
859			cn.cn_flags &= ~HASBUF;
860		}
861		if (dvp == wvp)
862			vrele(wvp);
863		else
864			vput(wvp);
865
866		return (EEXIST);
867	}
868
869	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
870		goto unionfs_mkwhiteout_free_out;
871	error = VOP_WHITEOUT(dvp, &cn, CREATE);
872
873	vn_finished_write(mp);
874
875unionfs_mkwhiteout_free_out:
876	if (cn.cn_flags & HASBUF) {
877		uma_zfree(namei_zone, cn.cn_pnbuf);
878		cn.cn_flags &= ~HASBUF;
879	}
880
881	return (error);
882}
883
884/*
885 * Create a new vnode for create a new shadow file.
886 *
887 * If an error is returned, *vpp will be invalid, otherwise it will hold a
888 * locked, referenced and opened vnode.
889 *
890 * unp is never updated.
891 */
892static int
893unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
894			   struct unionfs_node *unp, struct vattr *uvap,
895			   struct thread *td)
896{
897	struct unionfs_mount *ump;
898	struct vnode   *vp;
899	struct vnode   *lvp;
900	struct ucred   *cred;
901	struct vattr	lva;
902	int		fmode;
903	int		error;
904	struct componentname cn;
905
906	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
907	vp = NULLVP;
908	lvp = unp->un_lowervp;
909	cred = td->td_ucred;
910	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
911	error = 0;
912
913	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
914		return (error);
915	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
916
917	if (unp->un_path == NULL)
918		panic("unionfs: un_path is null");
919
920	cn.cn_namelen = strlen(unp->un_path);
921	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
922	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
923	cn.cn_nameiop = CREATE;
924	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
925	cn.cn_lkflags = LK_EXCLUSIVE;
926	cn.cn_thread = td;
927	cn.cn_cred = cred;
928	cn.cn_nameptr = cn.cn_pnbuf;
929	cn.cn_consume = 0;
930
931	vref(udvp);
932	if ((error = relookup(udvp, &vp, &cn)) != 0)
933		goto unionfs_vn_create_on_upper_free_out2;
934	vrele(udvp);
935
936	if (vp != NULLVP) {
937		if (vp == udvp)
938			vrele(vp);
939		else
940			vput(vp);
941		error = EEXIST;
942		goto unionfs_vn_create_on_upper_free_out1;
943	}
944
945	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
946		goto unionfs_vn_create_on_upper_free_out1;
947
948	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
949		vput(vp);
950		goto unionfs_vn_create_on_upper_free_out1;
951	}
952	vp->v_writecount++;
953	*vpp = vp;
954
955unionfs_vn_create_on_upper_free_out1:
956	VOP_UNLOCK(udvp, 0);
957
958unionfs_vn_create_on_upper_free_out2:
959	if (cn.cn_flags & HASBUF) {
960		uma_zfree(namei_zone, cn.cn_pnbuf);
961		cn.cn_flags &= ~HASBUF;
962	}
963
964	return (error);
965}
966
967/*
968 * Copy from lvp to uvp.
969 *
970 * lvp and uvp should be locked and opened on entry and will be locked and
971 * opened on return.
972 */
973static int
974unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
975		      struct ucred *cred, struct thread *td)
976{
977	int		error;
978	off_t		offset;
979	int		count;
980	int		bufoffset;
981	char           *buf;
982	struct uio	uio;
983	struct iovec	iov;
984
985	error = 0;
986	memset(&uio, 0, sizeof(uio));
987
988	uio.uio_td = td;
989	uio.uio_segflg = UIO_SYSSPACE;
990	uio.uio_offset = 0;
991
992	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
993
994	while (error == 0) {
995		offset = uio.uio_offset;
996
997		uio.uio_iov = &iov;
998		uio.uio_iovcnt = 1;
999		iov.iov_base = buf;
1000		iov.iov_len = MAXBSIZE;
1001		uio.uio_resid = iov.iov_len;
1002		uio.uio_rw = UIO_READ;
1003
1004		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1005			break;
1006		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1007			break;
1008
1009		bufoffset = 0;
1010		while (bufoffset < count) {
1011			uio.uio_iov = &iov;
1012			uio.uio_iovcnt = 1;
1013			iov.iov_base = buf + bufoffset;
1014			iov.iov_len = count - bufoffset;
1015			uio.uio_offset = offset + bufoffset;
1016			uio.uio_resid = iov.iov_len;
1017			uio.uio_rw = UIO_WRITE;
1018
1019			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1020				break;
1021
1022			bufoffset += (count - bufoffset) - uio.uio_resid;
1023		}
1024
1025		uio.uio_offset = offset + bufoffset;
1026	}
1027
1028	free(buf, M_TEMP);
1029
1030	return (error);
1031}
1032
1033/*
1034 * Copy file from lower to upper.
1035 *
1036 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1037 * docopy.
1038 *
1039 * If no error returned, unp will be updated.
1040 */
1041int
1042unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1043		 struct thread *td)
1044{
1045	int		error;
1046	struct mount   *mp;
1047	struct vnode   *udvp;
1048	struct vnode   *lvp;
1049	struct vnode   *uvp;
1050	struct vattr	uva;
1051
1052	lvp = unp->un_lowervp;
1053	uvp = NULLVP;
1054
1055	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1056		return (EROFS);
1057	if (unp->un_dvp == NULLVP)
1058		return (EINVAL);
1059	if (unp->un_uppervp != NULLVP)
1060		return (EEXIST);
1061	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1062	if (udvp == NULLVP)
1063		return (EROFS);
1064	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1065		return (EROFS);
1066
1067	error = VOP_ACCESS(lvp, VREAD, cred, td);
1068	if (error != 0)
1069		return (error);
1070
1071	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1072		return (error);
1073	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1074	if (error != 0) {
1075		vn_finished_write(mp);
1076		return (error);
1077	}
1078
1079	if (docopy != 0) {
1080		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1081		if (error == 0) {
1082			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1083			VOP_CLOSE(lvp, FREAD, cred, td);
1084		}
1085	}
1086	VOP_CLOSE(uvp, FWRITE, cred, td);
1087	uvp->v_writecount--;
1088
1089	vn_finished_write(mp);
1090
1091	if (error == 0) {
1092		/* Reset the attributes. Ignore errors. */
1093		uva.va_type = VNON;
1094		VOP_SETATTR(uvp, &uva, cred);
1095	}
1096
1097	unionfs_node_update(unp, uvp, td);
1098
1099	return (error);
1100}
1101
1102/*
1103 * It checks whether vp can rmdir. (check empty)
1104 *
1105 * vp is unionfs vnode.
1106 * vp should be locked.
1107 */
1108int
1109unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1110{
1111	int		error;
1112	int		eofflag;
1113	int		lookuperr;
1114	struct vnode   *uvp;
1115	struct vnode   *lvp;
1116	struct vnode   *tvp;
1117	struct vattr	va;
1118	struct componentname cn;
1119	/*
1120	 * The size of buf needs to be larger than DIRBLKSIZ.
1121	 */
1122	char		buf[256 * 6];
1123	struct dirent  *dp;
1124	struct dirent  *edp;
1125	struct uio	uio;
1126	struct iovec	iov;
1127
1128	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1129
1130	eofflag = 0;
1131	uvp = UNIONFSVPTOUPPERVP(vp);
1132	lvp = UNIONFSVPTOLOWERVP(vp);
1133
1134	/* check opaque */
1135	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1136		return (error);
1137	if (va.va_flags & OPAQUE)
1138		return (0);
1139
1140	/* open vnode */
1141#ifdef MAC
1142	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1143		return (error);
1144#endif
1145	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1146		return (error);
1147	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1148		return (error);
1149
1150	uio.uio_rw = UIO_READ;
1151	uio.uio_segflg = UIO_SYSSPACE;
1152	uio.uio_td = td;
1153	uio.uio_offset = 0;
1154
1155#ifdef MAC
1156	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1157#endif
1158	while (!error && !eofflag) {
1159		iov.iov_base = buf;
1160		iov.iov_len = sizeof(buf);
1161		uio.uio_iov = &iov;
1162		uio.uio_iovcnt = 1;
1163		uio.uio_resid = iov.iov_len;
1164
1165		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1166		if (error != 0)
1167			break;
1168		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1169#ifdef DIAGNOSTIC
1170			panic("bad readdir response from lower FS.");
1171#endif
1172			break;
1173		}
1174
1175		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1176		for (dp = (struct dirent*)buf; !error && dp < edp;
1177		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1178			if (dp->d_type == DT_WHT ||
1179			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1180			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1181				continue;
1182
1183			cn.cn_namelen = dp->d_namlen;
1184			cn.cn_pnbuf = NULL;
1185			cn.cn_nameptr = dp->d_name;
1186			cn.cn_nameiop = LOOKUP;
1187			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1188			cn.cn_lkflags = LK_EXCLUSIVE;
1189			cn.cn_thread = td;
1190			cn.cn_cred = cred;
1191			cn.cn_consume = 0;
1192
1193			/*
1194			 * check entry in lower.
1195			 * Sometimes, readdir function returns
1196			 * wrong entry.
1197			 */
1198			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1199
1200			if (!lookuperr)
1201				vput(tvp);
1202			else
1203				continue; /* skip entry */
1204
1205			/*
1206			 * check entry
1207			 * If it has no exist/whiteout entry in upper,
1208			 * directory is not empty.
1209			 */
1210			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1211			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1212
1213			if (!lookuperr)
1214				vput(tvp);
1215
1216			/* ignore exist or whiteout entry */
1217			if (!lookuperr ||
1218			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1219				continue;
1220
1221			error = ENOTEMPTY;
1222		}
1223	}
1224
1225	/* close vnode */
1226	VOP_CLOSE(vp, FREAD, cred, td);
1227
1228	return (error);
1229}
1230
1231#ifdef DIAGNOSTIC
1232
1233struct vnode   *
1234unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1235{
1236	struct unionfs_node *unp;
1237
1238	unp = VTOUNIONFS(vp);
1239
1240#ifdef notyet
1241	if (vp->v_op != unionfs_vnodeop_p) {
1242		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1243#ifdef KDB
1244		kdb_enter(KDB_WHY_UNIONFS,
1245		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1246#endif
1247		panic("unionfs_checkuppervp");
1248	};
1249#endif
1250	return (unp->un_uppervp);
1251}
1252
1253struct vnode   *
1254unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1255{
1256	struct unionfs_node *unp;
1257
1258	unp = VTOUNIONFS(vp);
1259
1260#ifdef notyet
1261	if (vp->v_op != unionfs_vnodeop_p) {
1262		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1263#ifdef KDB
1264		kdb_enter(KDB_WHY_UNIONFS,
1265		    "unionfs_checklowervp: on non-unionfs-node.\n");
1266#endif
1267		panic("unionfs_checklowervp");
1268	};
1269#endif
1270	return (unp->un_lowervp);
1271}
1272#endif
1273