union_subr.c revision 165033
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 165033 2006-12-09 16:27:50Z rodrigc $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63#define	NUNIONFSNODECACHE 32
64
65#define	UNIONFS_NHASH(upper, lower) \
66	(&unionfs_node_hashtbl[(((uintptr_t)upper + (uintptr_t)lower) >> 8) & unionfs_node_hash])
67
68static LIST_HEAD(unionfs_node_hashhead, unionfs_node) *unionfs_node_hashtbl;
69static u_long	unionfs_node_hash;
70struct mtx	unionfs_hashmtx;
71
72static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
73MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
74MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
75
76/*
77 * Initialize cache headers
78 */
79int
80unionfs_init(struct vfsconf *vfsp)
81{
82	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
83	unionfs_node_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, &unionfs_node_hash);
84	mtx_init(&unionfs_hashmtx, "unionfs", NULL, MTX_DEF);
85
86	return (0);
87}
88
89/*
90 * Destroy cache headers
91 */
92int
93unionfs_uninit(struct vfsconf *vfsp)
94{
95	mtx_destroy(&unionfs_hashmtx);
96	free(unionfs_node_hashtbl, M_UNIONFSHASH);
97	return (0);
98}
99
100/*
101 * Return a VREF'ed alias for unionfs vnode if already exists, else 0.
102 */
103static struct vnode *
104unionfs_hashget(struct mount *mp, struct vnode *uppervp,
105		struct vnode *lowervp, struct vnode *dvp, char *path,
106		int lkflags, struct thread *td)
107{
108	struct unionfs_node_hashhead *hd;
109	struct unionfs_node *unp;
110	struct vnode   *vp;
111
112	if (lkflags & LK_TYPE_MASK)
113		lkflags |= LK_RETRY;
114	hd = UNIONFS_NHASH(uppervp, lowervp);
115
116loop:
117	mtx_lock(&unionfs_hashmtx);
118	LIST_FOREACH(unp, hd, un_hash) {
119		if (unp->un_uppervp == uppervp &&
120		    unp->un_lowervp == lowervp &&
121		    unp->un_dvp == dvp &&
122		    UNIONFSTOV(unp)->v_mount == mp &&
123		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
124			vp = UNIONFSTOV(unp);
125			VI_LOCK(vp);
126
127			/*
128			 * If the unionfs node is being recycled we have to
129			 * wait until it finishes prior to scanning again.
130			 */
131			mtx_unlock(&unionfs_hashmtx);
132			if (vp->v_iflag & VI_DOOMED) {
133				/* Wait for recycling to finish. */
134				vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
135				VOP_UNLOCK(vp, 0, td);
136				goto loop;
137			}
138			/*
139			 * We need to clear the OWEINACT flag here as this
140			 * may lead vget() to try to lock our vnode which is
141			 * already locked via vp.
142			 */
143			vp->v_iflag &= ~VI_OWEINACT;
144			vget(vp, lkflags | LK_INTERLOCK, td);
145
146			return (vp);
147		}
148	}
149
150	mtx_unlock(&unionfs_hashmtx);
151
152	return (NULLVP);
153}
154
155/*
156 * Act like unionfs_hashget, but add passed unionfs_node to hash if no existing
157 * node found.
158 */
159static struct vnode *
160unionfs_hashins(struct mount *mp, struct unionfs_node *uncp,
161		char *path, int lkflags, struct thread *td)
162{
163	struct unionfs_node_hashhead *hd;
164	struct unionfs_node *unp;
165	struct vnode   *vp;
166
167	if (lkflags & LK_TYPE_MASK)
168		lkflags |= LK_RETRY;
169	hd = UNIONFS_NHASH(uncp->un_uppervp, uncp->un_lowervp);
170
171loop:
172	mtx_lock(&unionfs_hashmtx);
173	LIST_FOREACH(unp, hd, un_hash) {
174		if (unp->un_uppervp == uncp->un_uppervp &&
175		    unp->un_lowervp == uncp->un_lowervp &&
176		    unp->un_dvp == uncp->un_dvp &&
177		    UNIONFSTOV(unp)->v_mount == mp &&
178		    (!path || !(unp->un_path) || !strcmp(unp->un_path, path))) {
179			vp = UNIONFSTOV(unp);
180			VI_LOCK(vp);
181
182			mtx_unlock(&unionfs_hashmtx);
183			if (vp->v_iflag & VI_DOOMED) {
184				/* Wait for recycling to finish. */
185				vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
186				VOP_UNLOCK(vp, 0, td);
187				goto loop;
188			}
189			vp->v_iflag &= ~VI_OWEINACT;
190			vget(vp, lkflags | LK_INTERLOCK, td);
191
192			return (vp);
193		}
194	}
195
196	LIST_INSERT_HEAD(hd, uncp, un_hash);
197	uncp->un_flag |= UNIONFS_CACHED;
198	mtx_unlock(&unionfs_hashmtx);
199
200	return (NULLVP);
201}
202
203/*
204 * Make a new or get existing unionfs node.
205 *
206 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
207 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
208 * you should not lock plurality simultaneously.
209 */
210int
211unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
212		struct vnode *lowervp, struct vnode *dvp,
213		struct vnode **vpp, struct componentname *cnp,
214		struct thread *td)
215{
216	struct unionfs_mount *ump;
217	struct unionfs_node *unp;
218	struct vnode   *vp;
219	int		error;
220	int		lkflags;
221	char	       *path;
222
223	ump = MOUNTTOUNIONFSMOUNT(mp);
224	lkflags = (cnp ? cnp->cn_lkflags : 0);
225	path = (cnp ? cnp->cn_nameptr : "");
226
227	if (uppervp == NULLVP && lowervp == NULLVP)
228		panic("unionfs_nodeget: upper and lower is null");
229
230	/* If it has no ISLASTCN flag, path check is skipped. */
231	if (!cnp || !(cnp->cn_flags & ISLASTCN))
232		path = NULL;
233
234	/* Lookup the hash first. */
235	*vpp = unionfs_hashget(mp, uppervp, lowervp, dvp, path, lkflags, td);
236	if (*vpp != NULLVP)
237		return (0);
238
239	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
240	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
241		if (dvp == NULLVP)
242			return (EINVAL);
243	}
244
245	/*
246	 * Do the MALLOC before the getnewvnode since doing so afterward
247	 * might cause a bogus v_data pointer to get dereferenced elsewhere
248	 * if MALLOC should block.
249	 */
250	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
251	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
252
253	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
254	if (error) {
255		FREE(unp, M_UNIONFSNODE);
256		return (error);
257	}
258	if (dvp != NULLVP)
259		vref(dvp);
260	if (uppervp != NULLVP)
261		vref(uppervp);
262	if (lowervp != NULLVP)
263		vref(lowervp);
264
265	unp->un_vnode = vp;
266	unp->un_uppervp = uppervp;
267	unp->un_lowervp = lowervp;
268	unp->un_dvp = dvp;
269	if (uppervp != NULLVP)
270		vp->v_vnlock = uppervp->v_vnlock;
271	else
272		vp->v_vnlock = lowervp->v_vnlock;
273
274	if (cnp) {
275		unp->un_path = (char *)
276		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK | M_ZERO);
277		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
278		unp->un_path[cnp->cn_namelen] = '\0';
279	}
280	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
281	vp->v_data = unp;
282
283	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
284	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
285		vp->v_vflag |= VV_ROOT;
286
287	*vpp = unionfs_hashins(mp, unp, path, lkflags, td);
288	if (*vpp != NULLVP) {
289		if (dvp != NULLVP)
290			vrele(dvp);
291		if (uppervp != NULLVP)
292			vrele(uppervp);
293		if (lowervp != NULLVP)
294			vrele(lowervp);
295
296		unp->un_uppervp = NULLVP;
297		unp->un_lowervp = NULLVP;
298		unp->un_dvp = NULLVP;
299		vrele(vp);
300
301		return (0);
302	}
303
304	if (lkflags & LK_TYPE_MASK)
305		vn_lock(vp, lkflags | LK_RETRY, td);
306
307	*vpp = vp;
308
309	return (0);
310}
311
312/*
313 * Remove node from hash.
314 */
315void
316unionfs_hashrem(struct vnode *vp, struct thread *td)
317{
318	int		vfslocked;
319	struct unionfs_node *unp;
320	struct unionfs_node_status *unsp, *unsp_tmp;
321	struct vnode   *lvp;
322	struct vnode   *uvp;
323
324	/*
325	 * Use the interlock to protect the clearing of v_data to
326	 * prevent faults in unionfs_lock().
327	 */
328	VI_LOCK(vp);
329	unp = VTOUNIONFS(vp);
330	lvp = unp->un_lowervp;
331	uvp = unp->un_uppervp;
332	unp->un_lowervp = unp->un_uppervp = NULLVP;
333
334	vp->v_vnlock = &(vp->v_lock);
335	vp->v_data = NULL;
336	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td);
337	if (lvp != NULLVP)
338		VOP_UNLOCK(lvp, 0, td);
339	if (uvp != NULLVP)
340		VOP_UNLOCK(uvp, 0, td);
341
342	mtx_lock(&unionfs_hashmtx);
343	if (unp->un_flag & UNIONFS_CACHED) {
344		LIST_REMOVE(unp, un_hash);
345		unp->un_flag &= ~UNIONFS_CACHED;
346	}
347	mtx_unlock(&unionfs_hashmtx);
348	vp->v_object = NULL;
349
350	if (lvp != NULLVP) {
351		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
352		vrele(lvp);
353		VFS_UNLOCK_GIANT(vfslocked);
354	}
355	if (uvp != NULLVP) {
356		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
357		vrele(uvp);
358		VFS_UNLOCK_GIANT(vfslocked);
359	}
360	if (unp->un_dvp != NULLVP) {
361		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
362		vrele(unp->un_dvp);
363		VFS_UNLOCK_GIANT(vfslocked);
364		unp->un_dvp = NULLVP;
365	}
366	if (unp->un_path) {
367		free(unp->un_path, M_UNIONFSPATH);
368		unp->un_path = NULL;
369	}
370
371	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
372		LIST_REMOVE(unsp, uns_list);
373		free(unsp, M_TEMP);
374	}
375	FREE(unp, M_UNIONFSNODE);
376}
377
378/*
379 * Get the unionfs node status.
380 * You need exclusive lock this vnode.
381 */
382void
383unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
384			struct unionfs_node_status **unspp)
385{
386	struct unionfs_node_status *unsp;
387
388	KASSERT(NULL != unspp, ("null pointer"));
389	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
390
391	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
392		if (unsp->uns_tid == td->td_tid) {
393			*unspp = unsp;
394			return;
395		}
396	}
397
398	/* create a new unionfs node status */
399	MALLOC(unsp, struct unionfs_node_status *,
400	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
401
402	unsp->uns_tid = td->td_tid;
403	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
404
405	*unspp = unsp;
406}
407
408/*
409 * Remove the unionfs node status, if you can.
410 * You need exclusive lock this vnode.
411 */
412void
413unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
414			   struct unionfs_node_status *unsp)
415{
416	KASSERT(NULL != unsp, ("null pointer"));
417	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
418
419	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
420		return;
421
422	LIST_REMOVE(unsp, uns_list);
423	free(unsp, M_TEMP);
424}
425
426/*
427 * Create upper node attr.
428 */
429void
430unionfs_create_uppervattr_core(struct unionfs_mount *ump,
431			       struct vattr *lva,
432			       struct vattr *uva,
433			       struct thread *td)
434{
435	VATTR_NULL(uva);
436	uva->va_type = lva->va_type;
437	uva->va_atime = lva->va_atime;
438	uva->va_mtime = lva->va_mtime;
439	uva->va_ctime = lva->va_ctime;
440
441	switch (ump->um_copymode) {
442	case UNIONFS_TRANSPARENT:
443		uva->va_mode = lva->va_mode;
444		uva->va_uid = lva->va_uid;
445		uva->va_gid = lva->va_gid;
446		break;
447	case UNIONFS_MASQUERADE:
448		if (ump->um_uid == lva->va_uid) {
449			uva->va_mode = lva->va_mode & 077077;
450			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
451			uva->va_uid = lva->va_uid;
452			uva->va_gid = lva->va_gid;
453		} else {
454			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
455			uva->va_uid = ump->um_uid;
456			uva->va_gid = ump->um_gid;
457		}
458		break;
459	default:		/* UNIONFS_TRADITIONAL */
460		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
461		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
462		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
463		uva->va_uid = ump->um_uid;
464		uva->va_gid = ump->um_gid;
465		break;
466	}
467}
468
469/*
470 * Create upper node attr.
471 */
472int
473unionfs_create_uppervattr(struct unionfs_mount *ump,
474			  struct vnode *lvp,
475			  struct vattr *uva,
476			  struct ucred *cred,
477			  struct thread *td)
478{
479	int		error;
480	struct vattr	lva;
481
482	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
483		return (error);
484
485	unionfs_create_uppervattr_core(ump, &lva, uva, td);
486
487	return (error);
488}
489
490/*
491 * relookup
492 *
493 * dvp should be locked on entry and will be locked on return.
494 *
495 * If an error is returned, *vpp will be invalid, otherwise it will hold a
496 * locked, referenced vnode. If *vpp == dvp then remember that only one
497 * LK_EXCLUSIVE lock is held.
498 */
499static int
500unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
501		 struct componentname *cnp, struct componentname *cn,
502		 struct thread *td, char *path, int pathlen, u_long nameiop)
503{
504	int	error;
505
506	cn->cn_namelen = pathlen;
507	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
508	bcopy(path, cn->cn_pnbuf, pathlen);
509	cn->cn_pnbuf[pathlen] = '\0';
510
511	cn->cn_nameiop = nameiop;
512	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
513	cn->cn_lkflags = LK_EXCLUSIVE;
514	cn->cn_thread = td;
515	cn->cn_cred = cnp->cn_cred;
516
517	cn->cn_nameptr = cn->cn_pnbuf;
518	cn->cn_consume = cnp->cn_consume;
519
520	if (nameiop == DELETE)
521		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
522	else if (RENAME == nameiop)
523		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
524
525	vref(dvp);
526	VOP_UNLOCK(dvp, 0, td);
527
528	if ((error = relookup(dvp, vpp, cn))) {
529		uma_zfree(namei_zone, cn->cn_pnbuf);
530		cn->cn_flags &= ~HASBUF;
531		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
532	} else
533		vrele(dvp);
534
535	return (error);
536}
537
538/*
539 * relookup for CREATE namei operation.
540 *
541 * dvp is unionfs vnode. dvp should be locked.
542 *
543 * If it called 'unionfs_copyfile' function by unionfs_link etc,
544 * VOP_LOOKUP information is broken.
545 * So it need relookup in order to create link etc.
546 */
547int
548unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
549			    struct thread *td)
550{
551	int	error;
552	struct vnode *udvp;
553	struct vnode *vp;
554	struct componentname cn;
555
556	udvp = UNIONFSVPTOUPPERVP(dvp);
557	vp = NULLVP;
558
559	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
560	    strlen(cnp->cn_nameptr), CREATE);
561	if (error)
562		return (error);
563
564	if (vp != NULLVP) {
565		if (udvp == vp)
566			vrele(vp);
567		else
568			vput(vp);
569
570		error = EEXIST;
571	}
572
573	if (cn.cn_flags & HASBUF) {
574		uma_zfree(namei_zone, cn.cn_pnbuf);
575		cn.cn_flags &= ~HASBUF;
576	}
577
578	if (!error) {
579		cn.cn_flags |= (cnp->cn_flags & HASBUF);
580		cnp->cn_flags = cn.cn_flags;
581	}
582
583	return (error);
584}
585
586/*
587 * relookup for DELETE namei operation.
588 *
589 * dvp is unionfs vnode. dvp should be locked.
590 */
591int
592unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
593			    struct thread *td)
594{
595	int	error;
596	struct vnode *udvp;
597	struct vnode *vp;
598	struct componentname cn;
599
600	udvp = UNIONFSVPTOUPPERVP(dvp);
601	vp = NULLVP;
602
603	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
604	    strlen(cnp->cn_nameptr), DELETE);
605	if (error)
606		return (error);
607
608	if (vp == NULLVP)
609		error = ENOENT;
610	else {
611		if (udvp == vp)
612			vrele(vp);
613		else
614			vput(vp);
615	}
616
617	if (cn.cn_flags & HASBUF) {
618		uma_zfree(namei_zone, cn.cn_pnbuf);
619		cn.cn_flags &= ~HASBUF;
620	}
621
622	if (!error) {
623		cn.cn_flags |= (cnp->cn_flags & HASBUF);
624		cnp->cn_flags = cn.cn_flags;
625	}
626
627	return (error);
628}
629
630/*
631 * relookup for RENAME namei operation.
632 *
633 * dvp is unionfs vnode. dvp should be locked.
634 */
635int
636unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
637			    struct thread *td)
638{
639	int error;
640	struct vnode *udvp;
641	struct vnode *vp;
642	struct componentname cn;
643
644	udvp = UNIONFSVPTOUPPERVP(dvp);
645	vp = NULLVP;
646
647	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
648	    strlen(cnp->cn_nameptr), RENAME);
649	if (error)
650		return (error);
651
652	if (vp != NULLVP) {
653		if (udvp == vp)
654			vrele(vp);
655		else
656			vput(vp);
657	}
658
659	if (cn.cn_flags & HASBUF) {
660		uma_zfree(namei_zone, cn.cn_pnbuf);
661		cn.cn_flags &= ~HASBUF;
662	}
663
664	if (!error) {
665		cn.cn_flags |= (cnp->cn_flags & HASBUF);
666		cnp->cn_flags = cn.cn_flags;
667	}
668
669	return (error);
670
671}
672
673/*
674 * Update the unionfs_node.
675 *
676 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
677 * uvp's lock and lower's lock will be unlocked.
678 */
679static void
680unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
681		    struct thread *td)
682{
683	int		count, lockcnt;
684	struct vnode   *vp;
685	struct vnode   *lvp;
686
687	vp = UNIONFSTOV(unp);
688	lvp = unp->un_lowervp;
689
690	/*
691	 * lock update
692	 */
693	VI_LOCK(vp);
694	unp->un_uppervp = uvp;
695	vp->v_vnlock = uvp->v_vnlock;
696	lockcnt = lvp->v_vnlock->lk_exclusivecount;
697	if (lockcnt <= 0)
698		panic("unionfs: no exclusive lock");
699	VI_UNLOCK(vp);
700	for (count = 1; count < lockcnt; count++)
701		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td);
702
703	/*
704	 * cache update
705	 */
706	mtx_lock(&unionfs_hashmtx);
707	if (unp->un_flag & UNIONFS_CACHED)
708		LIST_REMOVE(unp, un_hash);
709	LIST_INSERT_HEAD(UNIONFS_NHASH(uvp, lvp), unp, un_hash);
710	unp->un_flag |= UNIONFS_CACHED;
711	mtx_unlock(&unionfs_hashmtx);
712}
713
714/*
715 * Create a new shadow dir.
716 *
717 * udvp should be locked on entry and will be locked on return.
718 *
719 * If no error returned, unp will be updated.
720 */
721int
722unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
723		    struct unionfs_node *unp, struct componentname *cnp,
724		    struct thread *td)
725{
726	int		error;
727	struct vnode   *lvp;
728	struct vnode   *uvp;
729	struct vattr	va;
730	struct vattr	lva;
731	struct componentname cn;
732	struct mount   *mp;
733	struct ucred   *cred;
734	struct ucred   *credbk;
735	struct uidinfo *rootinfo;
736
737	if (unp->un_uppervp != NULLVP)
738		return (EEXIST);
739
740	lvp = unp->un_lowervp;
741	uvp = NULLVP;
742	credbk = cnp->cn_cred;
743
744	/* Authority change to root */
745	rootinfo = uifind((uid_t)0);
746	cred = crdup(cnp->cn_cred);
747	chgproccnt(cred->cr_ruidinfo, 1, 0);
748	change_euid(cred, rootinfo);
749	change_ruid(cred, rootinfo);
750	change_svuid(cred, (uid_t)0);
751	uifree(rootinfo);
752	cnp->cn_cred = cred;
753
754	memset(&cn, 0, sizeof(cn));
755
756	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
757		goto unionfs_mkshadowdir_abort;
758
759	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
760		goto unionfs_mkshadowdir_abort;
761	if (uvp != NULLVP) {
762		if (udvp == uvp)
763			vrele(uvp);
764		else
765			vput(uvp);
766
767		error = EEXIST;
768		goto unionfs_mkshadowdir_free_out;
769	}
770
771	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
772		goto unionfs_mkshadowdir_free_out;
773	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
774		vn_finished_write(mp);
775		goto unionfs_mkshadowdir_free_out;
776	}
777	unionfs_create_uppervattr_core(ump, &lva, &va, td);
778
779	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
780
781	if (!error) {
782		unionfs_node_update(unp, uvp, td);
783
784		/*
785		 * XXX The bug which cannot set uid/gid was corrected.
786		 * Ignore errors.
787		 */
788		va.va_type = VNON;
789		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
790	}
791	vn_finished_write(mp);
792
793unionfs_mkshadowdir_free_out:
794	if (cn.cn_flags & HASBUF) {
795		uma_zfree(namei_zone, cn.cn_pnbuf);
796		cn.cn_flags &= ~HASBUF;
797	}
798
799unionfs_mkshadowdir_abort:
800	cnp->cn_cred = credbk;
801	chgproccnt(cred->cr_ruidinfo, -1, 0);
802	crfree(cred);
803
804	return (error);
805}
806
807/*
808 * Create a new whiteout.
809 *
810 * dvp should be locked on entry and will be locked on return.
811 */
812int
813unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
814		   struct thread *td, char *path)
815{
816	int		error;
817	struct vnode   *wvp;
818	struct componentname cn;
819	struct mount   *mp;
820
821	if (path == NULL)
822		path = cnp->cn_nameptr;
823
824	wvp = NULLVP;
825	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
826		return (error);
827	if (wvp != NULLVP) {
828		if (cn.cn_flags & HASBUF) {
829			uma_zfree(namei_zone, cn.cn_pnbuf);
830			cn.cn_flags &= ~HASBUF;
831		}
832		if (dvp == wvp)
833			vrele(wvp);
834		else
835			vput(wvp);
836
837		return (EEXIST);
838	}
839
840	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
841		goto unionfs_mkwhiteout_free_out;
842	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
843		error = VOP_WHITEOUT(dvp, &cn, CREATE);
844
845	vn_finished_write(mp);
846
847unionfs_mkwhiteout_free_out:
848	if (cn.cn_flags & HASBUF) {
849		uma_zfree(namei_zone, cn.cn_pnbuf);
850		cn.cn_flags &= ~HASBUF;
851	}
852
853	return (error);
854}
855
856/*
857 * Create a new vnode for create a new shadow file.
858 *
859 * If an error is returned, *vpp will be invalid, otherwise it will hold a
860 * locked, referenced and opened vnode.
861 *
862 * unp is never updated.
863 */
864static int
865unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
866			   struct unionfs_node *unp, struct vattr *uvap,
867			   struct thread *td)
868{
869	struct unionfs_mount *ump;
870	struct vnode   *vp;
871	struct vnode   *lvp;
872	struct ucred   *cred;
873	struct vattr	lva;
874	int		fmode;
875	int		error;
876	struct componentname cn;
877
878	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
879	vp = NULLVP;
880	lvp = unp->un_lowervp;
881	cred = td->td_ucred;
882	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
883	error = 0;
884
885	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
886		return (error);
887	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
888
889	if (unp->un_path == NULL)
890		panic("unionfs: un_path is null");
891
892	cn.cn_namelen = strlen(unp->un_path);
893	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
894	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
895	cn.cn_nameiop = CREATE;
896	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
897	cn.cn_lkflags = LK_EXCLUSIVE;
898	cn.cn_thread = td;
899	cn.cn_cred = cred;
900	cn.cn_nameptr = cn.cn_pnbuf;
901	cn.cn_consume = 0;
902
903	vref(udvp);
904	if ((error = relookup(udvp, &vp, &cn)) != 0)
905		goto unionfs_vn_create_on_upper_free_out2;
906	vrele(udvp);
907
908	if (vp != NULLVP) {
909		if (vp == udvp)
910			vrele(vp);
911		else
912			vput(vp);
913		error = EEXIST;
914		goto unionfs_vn_create_on_upper_free_out1;
915	}
916
917	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
918		goto unionfs_vn_create_on_upper_free_out1;
919
920	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
921		goto unionfs_vn_create_on_upper_free_out1;
922
923	if ((error = VOP_OPEN(vp, fmode, cred, td, -1)) != 0) {
924		vput(vp);
925		goto unionfs_vn_create_on_upper_free_out1;
926	}
927	vp->v_writecount++;
928	*vpp = vp;
929
930unionfs_vn_create_on_upper_free_out1:
931	VOP_UNLOCK(udvp, 0, td);
932
933unionfs_vn_create_on_upper_free_out2:
934	if (cn.cn_flags & HASBUF) {
935		uma_zfree(namei_zone, cn.cn_pnbuf);
936		cn.cn_flags &= ~HASBUF;
937	}
938
939	return (error);
940}
941
942/*
943 * Copy from lvp to uvp.
944 *
945 * lvp and uvp should be locked and opened on entry and will be locked and
946 * opened on return.
947 */
948static int
949unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
950		      struct ucred *cred, struct thread *td)
951{
952	int		error;
953	off_t		offset;
954	int		count;
955	int		bufoffset;
956	char           *buf;
957	struct uio	uio;
958	struct iovec	iov;
959
960	error = 0;
961	memset(&uio, 0, sizeof(uio));
962
963	uio.uio_td = td;
964	uio.uio_segflg = UIO_SYSSPACE;
965	uio.uio_offset = 0;
966
967	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
968		return (error);
969	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
970		return (error);
971	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
972
973	while (error == 0) {
974		offset = uio.uio_offset;
975
976		uio.uio_iov = &iov;
977		uio.uio_iovcnt = 1;
978		iov.iov_base = buf;
979		iov.iov_len = MAXBSIZE;
980		uio.uio_resid = iov.iov_len;
981		uio.uio_rw = UIO_READ;
982
983		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
984			break;
985		if ((count = MAXBSIZE - uio.uio_resid) == 0)
986			break;
987
988		bufoffset = 0;
989		while (bufoffset < count) {
990			uio.uio_iov = &iov;
991			uio.uio_iovcnt = 1;
992			iov.iov_base = buf + bufoffset;
993			iov.iov_len = count - bufoffset;
994			uio.uio_offset = offset + bufoffset;
995			uio.uio_resid = iov.iov_len;
996			uio.uio_rw = UIO_WRITE;
997
998			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
999				break;
1000
1001			bufoffset += (count - bufoffset) - uio.uio_resid;
1002		}
1003
1004		uio.uio_offset = offset + bufoffset;
1005	}
1006
1007	free(buf, M_TEMP);
1008
1009	return (error);
1010}
1011
1012/*
1013 * Copy file from lower to upper.
1014 *
1015 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1016 * docopy.
1017 *
1018 * If no error returned, unp will be updated.
1019 */
1020int
1021unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1022		 struct thread *td)
1023{
1024	int		error;
1025	struct mount   *mp;
1026	struct vnode   *udvp;
1027	struct vnode   *lvp;
1028	struct vnode   *uvp;
1029	struct vattr	uva;
1030
1031	lvp = unp->un_lowervp;
1032	uvp = NULLVP;
1033
1034	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1035		return (EROFS);
1036	if (unp->un_dvp == NULLVP)
1037		return (EINVAL);
1038	if (unp->un_uppervp != NULLVP)
1039		return (EEXIST);
1040	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1041	if (udvp == NULLVP)
1042		return (EROFS);
1043	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1044		return (EROFS);
1045
1046	error = VOP_ACCESS(lvp, VREAD, cred, td);
1047	if (error != 0)
1048		return (error);
1049
1050	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1051		return (error);
1052	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1053	if (error != 0) {
1054		vn_finished_write(mp);
1055		return (error);
1056	}
1057
1058	if (docopy != 0) {
1059		error = VOP_OPEN(lvp, FREAD, cred, td, -1);
1060		if (error == 0) {
1061			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1062			VOP_CLOSE(lvp, FREAD, cred, td);
1063		}
1064	}
1065	VOP_CLOSE(uvp, FWRITE, cred, td);
1066	uvp->v_writecount--;
1067
1068	vn_finished_write(mp);
1069
1070	if (error == 0) {
1071		/* Reset the attributes. Ignore errors. */
1072		uva.va_type = VNON;
1073		VOP_SETATTR(uvp, &uva, cred, td);
1074	}
1075
1076	unionfs_node_update(unp, uvp, td);
1077
1078	return (error);
1079}
1080
1081/*
1082 * It checks whether vp can rmdir. (check empty)
1083 *
1084 * vp is unionfs vnode.
1085 * vp should be locked.
1086 */
1087int
1088unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1089{
1090	int		error;
1091	int		eofflag;
1092	int		lookuperr;
1093	struct vnode   *uvp;
1094	struct vnode   *lvp;
1095	struct vnode   *tvp;
1096	struct vattr	va;
1097	struct componentname cn;
1098	/*
1099	 * The size of buf needs to be larger than DIRBLKSIZ.
1100	 */
1101	char		buf[256 * 6];
1102	struct dirent  *dp;
1103	struct dirent  *edp;
1104	struct uio	uio;
1105	struct iovec	iov;
1106
1107	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1108
1109	eofflag = 0;
1110	uvp = UNIONFSVPTOUPPERVP(vp);
1111	lvp = UNIONFSVPTOLOWERVP(vp);
1112
1113	/* check opaque */
1114	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
1115		return (error);
1116	if (va.va_flags & OPAQUE)
1117		return (0);
1118
1119	/* open vnode */
1120	if ((error = VOP_OPEN(vp, FREAD, cred, td, -1)) != 0)
1121		return (error);
1122
1123	uio.uio_rw = UIO_READ;
1124	uio.uio_segflg = UIO_SYSSPACE;
1125	uio.uio_td = td;
1126	uio.uio_offset = 0;
1127
1128#ifdef MAC
1129	error = mac_check_vnode_readdir(td->td_ucred, lvp);
1130#endif
1131	while (!error && !eofflag) {
1132		iov.iov_base = buf;
1133		iov.iov_len = sizeof(buf);
1134		uio.uio_iov = &iov;
1135		uio.uio_iovcnt = 1;
1136		uio.uio_resid = iov.iov_len;
1137
1138		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1139		if (error)
1140			break;
1141
1142		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1143		for (dp = (struct dirent*)buf; !error && dp < edp;
1144		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1145			if (dp->d_type == DT_WHT ||
1146			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1147			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1148				continue;
1149
1150			cn.cn_namelen = dp->d_namlen;
1151			cn.cn_pnbuf = NULL;
1152			cn.cn_nameptr = dp->d_name;
1153			cn.cn_nameiop = LOOKUP;
1154			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1155			cn.cn_lkflags = LK_EXCLUSIVE;
1156			cn.cn_thread = td;
1157			cn.cn_cred = cred;
1158			cn.cn_consume = 0;
1159
1160			/*
1161			 * check entry in lower.
1162			 * Sometimes, readdir function returns
1163			 * wrong entry.
1164			 */
1165			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1166
1167			if (!lookuperr)
1168				vput(tvp);
1169			else
1170				continue; /* skip entry */
1171
1172			/*
1173			 * check entry
1174			 * If it has no exist/whiteout entry in upper,
1175			 * directory is not empty.
1176			 */
1177			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1178			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1179
1180			if (!lookuperr)
1181				vput(tvp);
1182
1183			/* ignore exist or whiteout entry */
1184			if (!lookuperr ||
1185			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1186				continue;
1187
1188			error = ENOTEMPTY;
1189		}
1190	}
1191
1192	/* close vnode */
1193	VOP_CLOSE(vp, FREAD, cred, td);
1194
1195	return (error);
1196}
1197
1198#ifdef DIAGNOSTIC
1199
1200struct vnode   *
1201unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1202{
1203	struct unionfs_node *unp;
1204
1205	unp = VTOUNIONFS(vp);
1206
1207#ifdef notyet
1208	if (vp->v_op != unionfs_vnodeop_p) {
1209		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1210#ifdef KDB
1211		kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n");
1212#endif
1213		panic("unionfs_checkuppervp");
1214	};
1215#endif
1216	return (unp->un_uppervp);
1217}
1218
1219struct vnode   *
1220unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1221{
1222	struct unionfs_node *unp;
1223
1224	unp = VTOUNIONFS(vp);
1225
1226#ifdef notyet
1227	if (vp->v_op != unionfs_vnodeop_p) {
1228		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1229#ifdef KDB
1230		kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n");
1231#endif
1232		panic("unionfs_checklowervp");
1233	};
1234#endif
1235	return (unp->un_lowervp);
1236}
1237#endif
1238