union_subr.c revision 184214
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 184214 2008-10-23 20:26:15Z des $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63#define NUNIONFSNODECACHE 16
64
65static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
66MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
67MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
68
69/*
70 * Initialize
71 */
72int
73unionfs_init(struct vfsconf *vfsp)
74{
75	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
76	return (0);
77}
78
79/*
80 * Uninitialize
81 */
82int
83unionfs_uninit(struct vfsconf *vfsp)
84{
85	return (0);
86}
87
88static struct unionfs_node_hashhead *
89unionfs_get_hashhead(struct vnode *dvp, char *path)
90{
91	int		count;
92	char		hash;
93	struct unionfs_node *unp;
94
95	hash = 0;
96	unp = VTOUNIONFS(dvp);
97	if (path != NULL) {
98		for (count = 0; path[count]; count++)
99			hash += path[count];
100	}
101
102	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
103}
104
105/*
106 * Get the cached vnode.
107 */
108static struct vnode *
109unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
110			struct vnode *dvp, char *path)
111{
112	struct unionfs_node_hashhead *hd;
113	struct unionfs_node *unp;
114	struct vnode   *vp;
115
116	KASSERT((uvp == NULLVP || uvp->v_type == VDIR || uvp->v_type == VSOCK),
117	    ("unionfs_get_cached_vnode: v_type != VDIR/VSOCK"));
118	KASSERT((lvp == NULLVP || lvp->v_type == VDIR || lvp->v_type == VSOCK),
119	    ("unionfs_get_cached_vnode: v_type != VDIR/VSOCK"));
120
121	VI_LOCK(dvp);
122	hd = unionfs_get_hashhead(dvp, path);
123	LIST_FOREACH(unp, hd, un_hash) {
124		if (!strcmp(unp->un_path, path)) {
125			vp = UNIONFSTOV(unp);
126			VI_LOCK_FLAGS(vp, MTX_DUPOK);
127			VI_UNLOCK(dvp);
128			vp->v_iflag &= ~VI_OWEINACT;
129			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
130				VI_UNLOCK(vp);
131				vp = NULLVP;
132			} else
133				VI_UNLOCK(vp);
134			return (vp);
135		}
136	}
137	VI_UNLOCK(dvp);
138
139	return (NULLVP);
140}
141
142/*
143 * Add the new vnode into cache.
144 */
145static struct vnode *
146unionfs_ins_cached_vnode(struct unionfs_node *uncp,
147			struct vnode *dvp, char *path)
148{
149	struct unionfs_node_hashhead *hd;
150	struct unionfs_node *unp;
151	struct vnode   *vp;
152
153	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR ||
154	    uncp->un_uppervp->v_type==VSOCK),
155	    ("unionfs_ins_cached_vnode: v_type != VDIR/VSOCK"));
156	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR ||
157	    uncp->un_lowervp->v_type==VSOCK),
158	    ("unionfs_ins_cached_vnode: v_type != VDIR/VSOCK"));
159
160	VI_LOCK(dvp);
161	hd = unionfs_get_hashhead(dvp, path);
162	LIST_FOREACH(unp, hd, un_hash) {
163		if (!strcmp(unp->un_path, path)) {
164			vp = UNIONFSTOV(unp);
165			VI_LOCK_FLAGS(vp, MTX_DUPOK);
166			vp->v_iflag &= ~VI_OWEINACT;
167			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
168				LIST_INSERT_HEAD(hd, uncp, un_hash);
169				VI_UNLOCK(vp);
170				vp = NULLVP;
171			} else
172				VI_UNLOCK(vp);
173			VI_UNLOCK(dvp);
174			return (vp);
175		}
176	}
177
178	LIST_INSERT_HEAD(hd, uncp, un_hash);
179	VI_UNLOCK(dvp);
180
181	return (NULLVP);
182}
183
184/*
185 * Remove the vnode.
186 */
187static void
188unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
189{
190	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
191	KASSERT((dvp != NULLVP),
192	    ("unionfs_rem_cached_vnode: null parent vnode"));
193	KASSERT((unp->un_hash.le_prev != NULL),
194	    ("unionfs_rem_cached_vnode: null hash"));
195
196	VI_LOCK(dvp);
197	LIST_REMOVE(unp, un_hash);
198	unp->un_hash.le_next = NULL;
199	unp->un_hash.le_prev = NULL;
200	VI_UNLOCK(dvp);
201}
202
203/*
204 * Make a new or get existing unionfs node.
205 *
206 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
207 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
208 * you should not lock plurality simultaneously.
209 */
210int
211unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
212		struct vnode *lowervp, struct vnode *dvp,
213		struct vnode **vpp, struct componentname *cnp,
214		struct thread *td)
215{
216	struct unionfs_mount *ump;
217	struct unionfs_node *unp;
218	struct vnode   *vp;
219	int		error;
220	int		lkflags;
221	enum vtype	vt;
222	char	       *path;
223
224	ump = MOUNTTOUNIONFSMOUNT(mp);
225	lkflags = (cnp ? cnp->cn_lkflags : 0);
226	path = (cnp ? cnp->cn_nameptr : NULL);
227	*vpp = NULLVP;
228
229	if (uppervp == NULLVP && lowervp == NULLVP)
230		panic("unionfs_nodeget: upper and lower is null");
231
232	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
233
234	/* If it has no ISLASTCN flag, path check is skipped. */
235	if (cnp && !(cnp->cn_flags & ISLASTCN))
236		path = NULL;
237
238	/* check the cache */
239	if (path != NULL && dvp != NULLVP && (vt == VDIR || vt == VSOCK)) {
240		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
241		if (vp != NULLVP) {
242			vref(vp);
243			*vpp = vp;
244			goto unionfs_nodeget_out;
245		}
246	}
247
248	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
249	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
250		/* dvp will be NULLVP only in case of root vnode. */
251		if (dvp == NULLVP)
252			return (EINVAL);
253	}
254
255	/*
256	 * Do the MALLOC before the getnewvnode since doing so afterward
257	 * might cause a bogus v_data pointer to get dereferenced elsewhere
258	 * if MALLOC should block.
259	 */
260	unp = malloc(sizeof(struct unionfs_node),
261	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
262
263	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
264	if (error != 0) {
265		free(unp, M_UNIONFSNODE);
266		return (error);
267	}
268	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
269	if (error != 0) {
270		free(unp, M_UNIONFSNODE);
271		return (error);
272	}
273	if (dvp != NULLVP)
274		vref(dvp);
275	if (uppervp != NULLVP)
276		vref(uppervp);
277	if (lowervp != NULLVP)
278		vref(lowervp);
279
280	switch (vt) {
281	case VDIR:
282		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
283		    &(unp->un_hashmask));
284		break;
285	case VSOCK:
286		if (uppervp != NULLVP)
287			vp->v_socket = uppervp->v_socket;
288		else
289			vp->v_socket = lowervp->v_socket;
290		break;
291	default:
292		break;
293	}
294
295	unp->un_vnode = vp;
296	unp->un_uppervp = uppervp;
297	unp->un_lowervp = lowervp;
298	unp->un_dvp = dvp;
299	if (uppervp != NULLVP)
300		vp->v_vnlock = uppervp->v_vnlock;
301	else
302		vp->v_vnlock = lowervp->v_vnlock;
303
304	if (path != NULL) {
305		unp->un_path = (char *)
306		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
307		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
308		unp->un_path[cnp->cn_namelen] = '\0';
309	}
310	vp->v_type = vt;
311	vp->v_data = unp;
312
313	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
314	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
315		vp->v_vflag |= VV_ROOT;
316
317	if (path != NULL && dvp != NULLVP && (vt == VDIR || vt == VSOCK))
318		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
319	if ((*vpp) != NULLVP) {
320		if (dvp != NULLVP)
321			vrele(dvp);
322		if (uppervp != NULLVP)
323			vrele(uppervp);
324		if (lowervp != NULLVP)
325			vrele(lowervp);
326
327		unp->un_uppervp = NULLVP;
328		unp->un_lowervp = NULLVP;
329		unp->un_dvp = NULLVP;
330		vrele(vp);
331		vp = *vpp;
332		vref(vp);
333	} else
334		*vpp = vp;
335
336unionfs_nodeget_out:
337	if (lkflags & LK_TYPE_MASK)
338		vn_lock(vp, lkflags | LK_RETRY);
339
340	return (0);
341}
342
343/*
344 * Clean up the unionfs node.
345 */
346void
347unionfs_noderem(struct vnode *vp, struct thread *td)
348{
349	int		vfslocked;
350	int		count;
351	struct unionfs_node *unp, *unp_t1, *unp_t2;
352	struct unionfs_node_hashhead *hd;
353	struct unionfs_node_status *unsp, *unsp_tmp;
354	struct vnode   *lvp;
355	struct vnode   *uvp;
356	struct vnode   *dvp;
357
358	/*
359	 * Use the interlock to protect the clearing of v_data to
360	 * prevent faults in unionfs_lock().
361	 */
362	VI_LOCK(vp);
363	unp = VTOUNIONFS(vp);
364	lvp = unp->un_lowervp;
365	uvp = unp->un_uppervp;
366	dvp = unp->un_dvp;
367	unp->un_lowervp = unp->un_uppervp = NULLVP;
368
369	vp->v_vnlock = &(vp->v_lock);
370	vp->v_data = NULL;
371	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
372	if (lvp != NULLVP)
373		VOP_UNLOCK(lvp, 0);
374	if (uvp != NULLVP)
375		VOP_UNLOCK(uvp, 0);
376	vp->v_object = NULL;
377
378	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
379		unionfs_rem_cached_vnode(unp, dvp);
380
381	if (lvp != NULLVP) {
382		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
383		vrele(lvp);
384		VFS_UNLOCK_GIANT(vfslocked);
385	}
386	if (uvp != NULLVP) {
387		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
388		vrele(uvp);
389		VFS_UNLOCK_GIANT(vfslocked);
390	}
391	if (dvp != NULLVP) {
392		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
393		vrele(dvp);
394		VFS_UNLOCK_GIANT(vfslocked);
395		unp->un_dvp = NULLVP;
396	}
397	if (unp->un_path != NULL) {
398		free(unp->un_path, M_UNIONFSPATH);
399		unp->un_path = NULL;
400	}
401
402	if (unp->un_hashtbl != NULL) {
403		for (count = 0; count <= unp->un_hashmask; count++) {
404			hd = unp->un_hashtbl + count;
405			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
406				LIST_REMOVE(unp_t1, un_hash);
407				unp_t1->un_hash.le_next = NULL;
408				unp_t1->un_hash.le_prev = NULL;
409			}
410		}
411		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
412	}
413
414	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
415		LIST_REMOVE(unsp, uns_list);
416		free(unsp, M_TEMP);
417	}
418	free(unp, M_UNIONFSNODE);
419}
420
421/*
422 * Get the unionfs node status.
423 * You need exclusive lock this vnode.
424 */
425void
426unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
427			struct unionfs_node_status **unspp)
428{
429	struct unionfs_node_status *unsp;
430	pid_t pid = td->td_proc->p_pid;
431
432	KASSERT(NULL != unspp, ("null pointer"));
433	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
434
435	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
436		if (unsp->uns_pid == pid) {
437			*unspp = unsp;
438			return;
439		}
440	}
441
442	/* create a new unionfs node status */
443	unsp = malloc(sizeof(struct unionfs_node_status),
444	    M_TEMP, M_WAITOK | M_ZERO);
445
446	unsp->uns_pid = pid;
447	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
448
449	*unspp = unsp;
450}
451
452/*
453 * Remove the unionfs node status, if you can.
454 * You need exclusive lock this vnode.
455 */
456void
457unionfs_tryrem_node_status(struct unionfs_node *unp,
458			   struct unionfs_node_status *unsp)
459{
460	KASSERT(NULL != unsp, ("null pointer"));
461	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
462
463	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
464		return;
465
466	LIST_REMOVE(unsp, uns_list);
467	free(unsp, M_TEMP);
468}
469
470/*
471 * Create upper node attr.
472 */
473void
474unionfs_create_uppervattr_core(struct unionfs_mount *ump,
475			       struct vattr *lva,
476			       struct vattr *uva,
477			       struct thread *td)
478{
479	VATTR_NULL(uva);
480	uva->va_type = lva->va_type;
481	uva->va_atime = lva->va_atime;
482	uva->va_mtime = lva->va_mtime;
483	uva->va_ctime = lva->va_ctime;
484
485	switch (ump->um_copymode) {
486	case UNIONFS_TRANSPARENT:
487		uva->va_mode = lva->va_mode;
488		uva->va_uid = lva->va_uid;
489		uva->va_gid = lva->va_gid;
490		break;
491	case UNIONFS_MASQUERADE:
492		if (ump->um_uid == lva->va_uid) {
493			uva->va_mode = lva->va_mode & 077077;
494			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
495			uva->va_uid = lva->va_uid;
496			uva->va_gid = lva->va_gid;
497		} else {
498			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
499			uva->va_uid = ump->um_uid;
500			uva->va_gid = ump->um_gid;
501		}
502		break;
503	default:		/* UNIONFS_TRADITIONAL */
504		FILEDESC_SLOCK(td->td_proc->p_fd);
505		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
506		FILEDESC_SUNLOCK(td->td_proc->p_fd);
507		uva->va_uid = ump->um_uid;
508		uva->va_gid = ump->um_gid;
509		break;
510	}
511}
512
513/*
514 * Create upper node attr.
515 */
516int
517unionfs_create_uppervattr(struct unionfs_mount *ump,
518			  struct vnode *lvp,
519			  struct vattr *uva,
520			  struct ucred *cred,
521			  struct thread *td)
522{
523	int		error;
524	struct vattr	lva;
525
526	if ((error = VOP_GETATTR(lvp, &lva, cred)))
527		return (error);
528
529	unionfs_create_uppervattr_core(ump, &lva, uva, td);
530
531	return (error);
532}
533
534/*
535 * relookup
536 *
537 * dvp should be locked on entry and will be locked on return.
538 *
539 * If an error is returned, *vpp will be invalid, otherwise it will hold a
540 * locked, referenced vnode. If *vpp == dvp then remember that only one
541 * LK_EXCLUSIVE lock is held.
542 */
543static int
544unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
545		 struct componentname *cnp, struct componentname *cn,
546		 struct thread *td, char *path, int pathlen, u_long nameiop)
547{
548	int	error;
549
550	cn->cn_namelen = pathlen;
551	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
552	bcopy(path, cn->cn_pnbuf, pathlen);
553	cn->cn_pnbuf[pathlen] = '\0';
554
555	cn->cn_nameiop = nameiop;
556	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
557	cn->cn_lkflags = LK_EXCLUSIVE;
558	cn->cn_thread = td;
559	cn->cn_cred = cnp->cn_cred;
560
561	cn->cn_nameptr = cn->cn_pnbuf;
562	cn->cn_consume = cnp->cn_consume;
563
564	if (nameiop == DELETE)
565		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
566	else if (RENAME == nameiop)
567		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
568
569	vref(dvp);
570	VOP_UNLOCK(dvp, 0);
571
572	if ((error = relookup(dvp, vpp, cn))) {
573		uma_zfree(namei_zone, cn->cn_pnbuf);
574		cn->cn_flags &= ~HASBUF;
575		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
576	} else
577		vrele(dvp);
578
579	return (error);
580}
581
582/*
583 * relookup for CREATE namei operation.
584 *
585 * dvp is unionfs vnode. dvp should be locked.
586 *
587 * If it called 'unionfs_copyfile' function by unionfs_link etc,
588 * VOP_LOOKUP information is broken.
589 * So it need relookup in order to create link etc.
590 */
591int
592unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
593			    struct thread *td)
594{
595	int	error;
596	struct vnode *udvp;
597	struct vnode *vp;
598	struct componentname cn;
599
600	udvp = UNIONFSVPTOUPPERVP(dvp);
601	vp = NULLVP;
602
603	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
604	    strlen(cnp->cn_nameptr), CREATE);
605	if (error)
606		return (error);
607
608	if (vp != NULLVP) {
609		if (udvp == vp)
610			vrele(vp);
611		else
612			vput(vp);
613
614		error = EEXIST;
615	}
616
617	if (cn.cn_flags & HASBUF) {
618		uma_zfree(namei_zone, cn.cn_pnbuf);
619		cn.cn_flags &= ~HASBUF;
620	}
621
622	if (!error) {
623		cn.cn_flags |= (cnp->cn_flags & HASBUF);
624		cnp->cn_flags = cn.cn_flags;
625	}
626
627	return (error);
628}
629
630/*
631 * relookup for DELETE namei operation.
632 *
633 * dvp is unionfs vnode. dvp should be locked.
634 */
635int
636unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
637			    struct thread *td)
638{
639	int	error;
640	struct vnode *udvp;
641	struct vnode *vp;
642	struct componentname cn;
643
644	udvp = UNIONFSVPTOUPPERVP(dvp);
645	vp = NULLVP;
646
647	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
648	    strlen(cnp->cn_nameptr), DELETE);
649	if (error)
650		return (error);
651
652	if (vp == NULLVP)
653		error = ENOENT;
654	else {
655		if (udvp == vp)
656			vrele(vp);
657		else
658			vput(vp);
659	}
660
661	if (cn.cn_flags & HASBUF) {
662		uma_zfree(namei_zone, cn.cn_pnbuf);
663		cn.cn_flags &= ~HASBUF;
664	}
665
666	if (!error) {
667		cn.cn_flags |= (cnp->cn_flags & HASBUF);
668		cnp->cn_flags = cn.cn_flags;
669	}
670
671	return (error);
672}
673
674/*
675 * relookup for RENAME namei operation.
676 *
677 * dvp is unionfs vnode. dvp should be locked.
678 */
679int
680unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
681			    struct thread *td)
682{
683	int error;
684	struct vnode *udvp;
685	struct vnode *vp;
686	struct componentname cn;
687
688	udvp = UNIONFSVPTOUPPERVP(dvp);
689	vp = NULLVP;
690
691	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
692	    strlen(cnp->cn_nameptr), RENAME);
693	if (error)
694		return (error);
695
696	if (vp != NULLVP) {
697		if (udvp == vp)
698			vrele(vp);
699		else
700			vput(vp);
701	}
702
703	if (cn.cn_flags & HASBUF) {
704		uma_zfree(namei_zone, cn.cn_pnbuf);
705		cn.cn_flags &= ~HASBUF;
706	}
707
708	if (!error) {
709		cn.cn_flags |= (cnp->cn_flags & HASBUF);
710		cnp->cn_flags = cn.cn_flags;
711	}
712
713	return (error);
714
715}
716
717/*
718 * Update the unionfs_node.
719 *
720 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
721 * uvp's lock and lower's lock will be unlocked.
722 */
723static void
724unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
725		    struct thread *td)
726{
727	unsigned	count, lockrec;
728	struct vnode   *vp;
729	struct vnode   *lvp;
730	struct vnode   *dvp;
731
732	vp = UNIONFSTOV(unp);
733	lvp = unp->un_lowervp;
734	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
735	dvp = unp->un_dvp;
736
737	/*
738	 * lock update
739	 */
740	VI_LOCK(vp);
741	unp->un_uppervp = uvp;
742	vp->v_vnlock = uvp->v_vnlock;
743	VI_UNLOCK(vp);
744	lockrec = lvp->v_vnlock->lk_recurse;
745	for (count = 0; count < lockrec; count++)
746		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
747
748	/*
749	 * cache update
750	 */
751	if (unp->un_path != NULL && dvp != NULLVP &&
752	    (vp->v_type == VDIR || vp->v_type == VSOCK)) {
753		static struct unionfs_node_hashhead *hd;
754
755		VI_LOCK(dvp);
756		hd = unionfs_get_hashhead(dvp, unp->un_path);
757		LIST_REMOVE(unp, un_hash);
758		LIST_INSERT_HEAD(hd, unp, un_hash);
759		VI_UNLOCK(dvp);
760	}
761}
762
763/*
764 * Create a new shadow dir.
765 *
766 * udvp should be locked on entry and will be locked on return.
767 *
768 * If no error returned, unp will be updated.
769 */
770int
771unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
772		    struct unionfs_node *unp, struct componentname *cnp,
773		    struct thread *td)
774{
775	int		error;
776	struct vnode   *lvp;
777	struct vnode   *uvp;
778	struct vattr	va;
779	struct vattr	lva;
780	struct componentname cn;
781	struct mount   *mp;
782	struct ucred   *cred;
783	struct ucred   *credbk;
784	struct uidinfo *rootinfo;
785
786	if (unp->un_uppervp != NULLVP)
787		return (EEXIST);
788
789	lvp = unp->un_lowervp;
790	uvp = NULLVP;
791	credbk = cnp->cn_cred;
792
793	/* Authority change to root */
794	rootinfo = uifind((uid_t)0);
795	cred = crdup(cnp->cn_cred);
796	chgproccnt(cred->cr_ruidinfo, 1, 0);
797	change_euid(cred, rootinfo);
798	change_ruid(cred, rootinfo);
799	change_svuid(cred, (uid_t)0);
800	uifree(rootinfo);
801	cnp->cn_cred = cred;
802
803	memset(&cn, 0, sizeof(cn));
804
805	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
806		goto unionfs_mkshadowdir_abort;
807
808	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
809		goto unionfs_mkshadowdir_abort;
810	if (uvp != NULLVP) {
811		if (udvp == uvp)
812			vrele(uvp);
813		else
814			vput(uvp);
815
816		error = EEXIST;
817		goto unionfs_mkshadowdir_free_out;
818	}
819
820	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
821		goto unionfs_mkshadowdir_free_out;
822	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
823		vn_finished_write(mp);
824		goto unionfs_mkshadowdir_free_out;
825	}
826	unionfs_create_uppervattr_core(ump, &lva, &va, td);
827
828	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
829
830	if (!error) {
831		unionfs_node_update(unp, uvp, td);
832
833		/*
834		 * XXX The bug which cannot set uid/gid was corrected.
835		 * Ignore errors.
836		 */
837		va.va_type = VNON;
838		VOP_SETATTR(uvp, &va, cn.cn_cred);
839	}
840	vn_finished_write(mp);
841
842unionfs_mkshadowdir_free_out:
843	if (cn.cn_flags & HASBUF) {
844		uma_zfree(namei_zone, cn.cn_pnbuf);
845		cn.cn_flags &= ~HASBUF;
846	}
847
848unionfs_mkshadowdir_abort:
849	cnp->cn_cred = credbk;
850	chgproccnt(cred->cr_ruidinfo, -1, 0);
851	crfree(cred);
852
853	return (error);
854}
855
856/*
857 * Create a new whiteout.
858 *
859 * dvp should be locked on entry and will be locked on return.
860 */
861int
862unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
863		   struct thread *td, char *path)
864{
865	int		error;
866	struct vnode   *wvp;
867	struct componentname cn;
868	struct mount   *mp;
869
870	if (path == NULL)
871		path = cnp->cn_nameptr;
872
873	wvp = NULLVP;
874	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
875		return (error);
876	if (wvp != NULLVP) {
877		if (cn.cn_flags & HASBUF) {
878			uma_zfree(namei_zone, cn.cn_pnbuf);
879			cn.cn_flags &= ~HASBUF;
880		}
881		if (dvp == wvp)
882			vrele(wvp);
883		else
884			vput(wvp);
885
886		return (EEXIST);
887	}
888
889	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
890		goto unionfs_mkwhiteout_free_out;
891	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
892		error = VOP_WHITEOUT(dvp, &cn, CREATE);
893
894	vn_finished_write(mp);
895
896unionfs_mkwhiteout_free_out:
897	if (cn.cn_flags & HASBUF) {
898		uma_zfree(namei_zone, cn.cn_pnbuf);
899		cn.cn_flags &= ~HASBUF;
900	}
901
902	return (error);
903}
904
905/*
906 * Create a new vnode for create a new shadow file.
907 *
908 * If an error is returned, *vpp will be invalid, otherwise it will hold a
909 * locked, referenced and opened vnode.
910 *
911 * unp is never updated.
912 */
913static int
914unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
915			   struct unionfs_node *unp, struct vattr *uvap,
916			   struct thread *td)
917{
918	struct unionfs_mount *ump;
919	struct vnode   *vp;
920	struct vnode   *lvp;
921	struct ucred   *cred;
922	struct vattr	lva;
923	int		fmode;
924	int		error;
925	struct componentname cn;
926
927	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
928	vp = NULLVP;
929	lvp = unp->un_lowervp;
930	cred = td->td_ucred;
931	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
932	error = 0;
933
934	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
935		return (error);
936	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
937
938	if (unp->un_path == NULL)
939		panic("unionfs: un_path is null");
940
941	cn.cn_namelen = strlen(unp->un_path);
942	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
943	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
944	cn.cn_nameiop = CREATE;
945	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
946	cn.cn_lkflags = LK_EXCLUSIVE;
947	cn.cn_thread = td;
948	cn.cn_cred = cred;
949	cn.cn_nameptr = cn.cn_pnbuf;
950	cn.cn_consume = 0;
951
952	vref(udvp);
953	if ((error = relookup(udvp, &vp, &cn)) != 0)
954		goto unionfs_vn_create_on_upper_free_out2;
955	vrele(udvp);
956
957	if (vp != NULLVP) {
958		if (vp == udvp)
959			vrele(vp);
960		else
961			vput(vp);
962		error = EEXIST;
963		goto unionfs_vn_create_on_upper_free_out1;
964	}
965
966	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
967		goto unionfs_vn_create_on_upper_free_out1;
968
969	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
970		goto unionfs_vn_create_on_upper_free_out1;
971
972	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
973		vput(vp);
974		goto unionfs_vn_create_on_upper_free_out1;
975	}
976	vp->v_writecount++;
977	*vpp = vp;
978
979unionfs_vn_create_on_upper_free_out1:
980	VOP_UNLOCK(udvp, 0);
981
982unionfs_vn_create_on_upper_free_out2:
983	if (cn.cn_flags & HASBUF) {
984		uma_zfree(namei_zone, cn.cn_pnbuf);
985		cn.cn_flags &= ~HASBUF;
986	}
987
988	return (error);
989}
990
991/*
992 * Copy from lvp to uvp.
993 *
994 * lvp and uvp should be locked and opened on entry and will be locked and
995 * opened on return.
996 */
997static int
998unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
999		      struct ucred *cred, struct thread *td)
1000{
1001	int		error;
1002	off_t		offset;
1003	int		count;
1004	int		bufoffset;
1005	char           *buf;
1006	struct uio	uio;
1007	struct iovec	iov;
1008
1009	error = 0;
1010	memset(&uio, 0, sizeof(uio));
1011
1012	uio.uio_td = td;
1013	uio.uio_segflg = UIO_SYSSPACE;
1014	uio.uio_offset = 0;
1015
1016	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
1017		return (error);
1018	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
1019		return (error);
1020	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
1021
1022	while (error == 0) {
1023		offset = uio.uio_offset;
1024
1025		uio.uio_iov = &iov;
1026		uio.uio_iovcnt = 1;
1027		iov.iov_base = buf;
1028		iov.iov_len = MAXBSIZE;
1029		uio.uio_resid = iov.iov_len;
1030		uio.uio_rw = UIO_READ;
1031
1032		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1033			break;
1034		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1035			break;
1036
1037		bufoffset = 0;
1038		while (bufoffset < count) {
1039			uio.uio_iov = &iov;
1040			uio.uio_iovcnt = 1;
1041			iov.iov_base = buf + bufoffset;
1042			iov.iov_len = count - bufoffset;
1043			uio.uio_offset = offset + bufoffset;
1044			uio.uio_resid = iov.iov_len;
1045			uio.uio_rw = UIO_WRITE;
1046
1047			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1048				break;
1049
1050			bufoffset += (count - bufoffset) - uio.uio_resid;
1051		}
1052
1053		uio.uio_offset = offset + bufoffset;
1054	}
1055
1056	free(buf, M_TEMP);
1057
1058	return (error);
1059}
1060
1061/*
1062 * Copy file from lower to upper.
1063 *
1064 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1065 * docopy.
1066 *
1067 * If no error returned, unp will be updated.
1068 */
1069int
1070unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1071		 struct thread *td)
1072{
1073	int		error;
1074	struct mount   *mp;
1075	struct vnode   *udvp;
1076	struct vnode   *lvp;
1077	struct vnode   *uvp;
1078	struct vattr	uva;
1079
1080	lvp = unp->un_lowervp;
1081	uvp = NULLVP;
1082
1083	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1084		return (EROFS);
1085	if (unp->un_dvp == NULLVP)
1086		return (EINVAL);
1087	if (unp->un_uppervp != NULLVP)
1088		return (EEXIST);
1089	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1090	if (udvp == NULLVP)
1091		return (EROFS);
1092	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1093		return (EROFS);
1094
1095	error = VOP_ACCESS(lvp, VREAD, cred, td);
1096	if (error != 0)
1097		return (error);
1098
1099	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1100		return (error);
1101	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1102	if (error != 0) {
1103		vn_finished_write(mp);
1104		return (error);
1105	}
1106
1107	if (docopy != 0) {
1108		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1109		if (error == 0) {
1110			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1111			VOP_CLOSE(lvp, FREAD, cred, td);
1112		}
1113	}
1114	VOP_CLOSE(uvp, FWRITE, cred, td);
1115	uvp->v_writecount--;
1116
1117	vn_finished_write(mp);
1118
1119	if (error == 0) {
1120		/* Reset the attributes. Ignore errors. */
1121		uva.va_type = VNON;
1122		VOP_SETATTR(uvp, &uva, cred);
1123	}
1124
1125	unionfs_node_update(unp, uvp, td);
1126
1127	return (error);
1128}
1129
1130/*
1131 * It checks whether vp can rmdir. (check empty)
1132 *
1133 * vp is unionfs vnode.
1134 * vp should be locked.
1135 */
1136int
1137unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1138{
1139	int		error;
1140	int		eofflag;
1141	int		lookuperr;
1142	struct vnode   *uvp;
1143	struct vnode   *lvp;
1144	struct vnode   *tvp;
1145	struct vattr	va;
1146	struct componentname cn;
1147	/*
1148	 * The size of buf needs to be larger than DIRBLKSIZ.
1149	 */
1150	char		buf[256 * 6];
1151	struct dirent  *dp;
1152	struct dirent  *edp;
1153	struct uio	uio;
1154	struct iovec	iov;
1155
1156	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1157
1158	eofflag = 0;
1159	uvp = UNIONFSVPTOUPPERVP(vp);
1160	lvp = UNIONFSVPTOLOWERVP(vp);
1161
1162	/* check opaque */
1163	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1164		return (error);
1165	if (va.va_flags & OPAQUE)
1166		return (0);
1167
1168	/* open vnode */
1169#ifdef MAC
1170	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1171		return (error);
1172#endif
1173	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1174		return (error);
1175	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1176		return (error);
1177
1178	uio.uio_rw = UIO_READ;
1179	uio.uio_segflg = UIO_SYSSPACE;
1180	uio.uio_td = td;
1181	uio.uio_offset = 0;
1182
1183#ifdef MAC
1184	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1185#endif
1186	while (!error && !eofflag) {
1187		iov.iov_base = buf;
1188		iov.iov_len = sizeof(buf);
1189		uio.uio_iov = &iov;
1190		uio.uio_iovcnt = 1;
1191		uio.uio_resid = iov.iov_len;
1192
1193		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1194		if (error != 0)
1195			break;
1196		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1197#ifdef DIAGNOSTIC
1198			panic("bad readdir response from lower FS.");
1199#endif
1200			break;
1201		}
1202
1203		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1204		for (dp = (struct dirent*)buf; !error && dp < edp;
1205		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1206			if (dp->d_type == DT_WHT ||
1207			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1208			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1209				continue;
1210
1211			cn.cn_namelen = dp->d_namlen;
1212			cn.cn_pnbuf = NULL;
1213			cn.cn_nameptr = dp->d_name;
1214			cn.cn_nameiop = LOOKUP;
1215			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1216			cn.cn_lkflags = LK_EXCLUSIVE;
1217			cn.cn_thread = td;
1218			cn.cn_cred = cred;
1219			cn.cn_consume = 0;
1220
1221			/*
1222			 * check entry in lower.
1223			 * Sometimes, readdir function returns
1224			 * wrong entry.
1225			 */
1226			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1227
1228			if (!lookuperr)
1229				vput(tvp);
1230			else
1231				continue; /* skip entry */
1232
1233			/*
1234			 * check entry
1235			 * If it has no exist/whiteout entry in upper,
1236			 * directory is not empty.
1237			 */
1238			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1239			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1240
1241			if (!lookuperr)
1242				vput(tvp);
1243
1244			/* ignore exist or whiteout entry */
1245			if (!lookuperr ||
1246			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1247				continue;
1248
1249			error = ENOTEMPTY;
1250		}
1251	}
1252
1253	/* close vnode */
1254	VOP_CLOSE(vp, FREAD, cred, td);
1255
1256	return (error);
1257}
1258
1259#ifdef DIAGNOSTIC
1260
1261struct vnode   *
1262unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1263{
1264	struct unionfs_node *unp;
1265
1266	unp = VTOUNIONFS(vp);
1267
1268#ifdef notyet
1269	if (vp->v_op != unionfs_vnodeop_p) {
1270		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1271#ifdef KDB
1272		kdb_enter(KDB_WHY_UNIONFS,
1273		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1274#endif
1275		panic("unionfs_checkuppervp");
1276	};
1277#endif
1278	return (unp->un_uppervp);
1279}
1280
1281struct vnode   *
1282unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1283{
1284	struct unionfs_node *unp;
1285
1286	unp = VTOUNIONFS(vp);
1287
1288#ifdef notyet
1289	if (vp->v_op != unionfs_vnodeop_p) {
1290		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1291#ifdef KDB
1292		kdb_enter(KDB_WHY_UNIONFS,
1293		    "unionfs_checklowervp: on non-unionfs-node.\n");
1294#endif
1295		panic("unionfs_checklowervp");
1296	};
1297#endif
1298	return (unp->un_lowervp);
1299}
1300#endif
1301