union_subr.c revision 175635
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 175635 2008-01-24 12:34:30Z attilio $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
64MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
65
66/*
67 * Initialize
68 */
69int
70unionfs_init(struct vfsconf *vfsp)
71{
72	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
73	return (0);
74}
75
76/*
77 * Uninitialize
78 */
79int
80unionfs_uninit(struct vfsconf *vfsp)
81{
82	return (0);
83}
84
85/*
86 * Make a new or get existing unionfs node.
87 *
88 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
89 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
90 * you should not lock plurality simultaneously.
91 */
92int
93unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
94		struct vnode *lowervp, struct vnode *dvp,
95		struct vnode **vpp, struct componentname *cnp,
96		struct thread *td)
97{
98	struct unionfs_mount *ump;
99	struct unionfs_node *unp;
100	struct vnode   *vp;
101	int		error;
102	int		lkflags;
103	char	       *path;
104
105	ump = MOUNTTOUNIONFSMOUNT(mp);
106	lkflags = (cnp ? cnp->cn_lkflags : 0);
107	path = (cnp ? cnp->cn_nameptr : NULL);
108
109	if (uppervp == NULLVP && lowervp == NULLVP)
110		panic("unionfs_nodeget: upper and lower is null");
111
112	/* If it has no ISLASTCN flag, path check is skipped. */
113	if (cnp && !(cnp->cn_flags & ISLASTCN))
114		path = NULL;
115
116	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
117	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
118		if (dvp == NULLVP)
119			return (EINVAL);
120	}
121
122	/*
123	 * Do the MALLOC before the getnewvnode since doing so afterward
124	 * might cause a bogus v_data pointer to get dereferenced elsewhere
125	 * if MALLOC should block.
126	 */
127	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
128	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
129
130	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
131	if (error != 0) {
132		FREE(unp, M_UNIONFSNODE);
133		return (error);
134	}
135	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
136	if (error != 0) {
137		FREE(unp, M_UNIONFSNODE);
138		return (error);
139	}
140	if (dvp != NULLVP)
141		vref(dvp);
142	if (uppervp != NULLVP)
143		vref(uppervp);
144	if (lowervp != NULLVP)
145		vref(lowervp);
146
147	unp->un_vnode = vp;
148	unp->un_uppervp = uppervp;
149	unp->un_lowervp = lowervp;
150	unp->un_dvp = dvp;
151	if (uppervp != NULLVP)
152		vp->v_vnlock = uppervp->v_vnlock;
153	else
154		vp->v_vnlock = lowervp->v_vnlock;
155
156	if (path != NULL) {
157		unp->un_path = (char *)
158		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
159		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
160		unp->un_path[cnp->cn_namelen] = '\0';
161	}
162	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
163	vp->v_data = unp;
164
165	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
166	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
167		vp->v_vflag |= VV_ROOT;
168
169	if (lkflags & LK_TYPE_MASK)
170		vn_lock(vp, lkflags | LK_RETRY);
171
172	*vpp = vp;
173
174	return (0);
175}
176
177/*
178 * Clean up the unionfs node.
179 */
180void
181unionfs_noderem(struct vnode *vp, struct thread *td)
182{
183	int		vfslocked;
184	struct unionfs_node *unp;
185	struct unionfs_node_status *unsp, *unsp_tmp;
186	struct vnode   *lvp;
187	struct vnode   *uvp;
188
189	/*
190	 * Use the interlock to protect the clearing of v_data to
191	 * prevent faults in unionfs_lock().
192	 */
193	VI_LOCK(vp);
194	unp = VTOUNIONFS(vp);
195	lvp = unp->un_lowervp;
196	uvp = unp->un_uppervp;
197	unp->un_lowervp = unp->un_uppervp = NULLVP;
198
199	vp->v_vnlock = &(vp->v_lock);
200	vp->v_data = NULL;
201	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
202	if (lvp != NULLVP)
203		VOP_UNLOCK(lvp, 0);
204	if (uvp != NULLVP)
205		VOP_UNLOCK(uvp, 0);
206	vp->v_object = NULL;
207
208	if (lvp != NULLVP) {
209		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
210		vrele(lvp);
211		VFS_UNLOCK_GIANT(vfslocked);
212	}
213	if (uvp != NULLVP) {
214		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
215		vrele(uvp);
216		VFS_UNLOCK_GIANT(vfslocked);
217	}
218	if (unp->un_dvp != NULLVP) {
219		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
220		vrele(unp->un_dvp);
221		VFS_UNLOCK_GIANT(vfslocked);
222		unp->un_dvp = NULLVP;
223	}
224	if (unp->un_path) {
225		free(unp->un_path, M_UNIONFSPATH);
226		unp->un_path = NULL;
227	}
228
229	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
230		LIST_REMOVE(unsp, uns_list);
231		free(unsp, M_TEMP);
232	}
233	FREE(unp, M_UNIONFSNODE);
234}
235
236/*
237 * Get the unionfs node status.
238 * You need exclusive lock this vnode.
239 */
240void
241unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
242			struct unionfs_node_status **unspp)
243{
244	struct unionfs_node_status *unsp;
245
246	KASSERT(NULL != unspp, ("null pointer"));
247	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
248
249	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
250		if (unsp->uns_tid == td->td_tid) {
251			*unspp = unsp;
252			return;
253		}
254	}
255
256	/* create a new unionfs node status */
257	MALLOC(unsp, struct unionfs_node_status *,
258	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
259
260	unsp->uns_tid = td->td_tid;
261	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
262
263	*unspp = unsp;
264}
265
266/*
267 * Remove the unionfs node status, if you can.
268 * You need exclusive lock this vnode.
269 */
270void
271unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
272			   struct unionfs_node_status *unsp)
273{
274	KASSERT(NULL != unsp, ("null pointer"));
275	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
276
277	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
278		return;
279
280	LIST_REMOVE(unsp, uns_list);
281	free(unsp, M_TEMP);
282}
283
284/*
285 * Create upper node attr.
286 */
287void
288unionfs_create_uppervattr_core(struct unionfs_mount *ump,
289			       struct vattr *lva,
290			       struct vattr *uva,
291			       struct thread *td)
292{
293	VATTR_NULL(uva);
294	uva->va_type = lva->va_type;
295	uva->va_atime = lva->va_atime;
296	uva->va_mtime = lva->va_mtime;
297	uva->va_ctime = lva->va_ctime;
298
299	switch (ump->um_copymode) {
300	case UNIONFS_TRANSPARENT:
301		uva->va_mode = lva->va_mode;
302		uva->va_uid = lva->va_uid;
303		uva->va_gid = lva->va_gid;
304		break;
305	case UNIONFS_MASQUERADE:
306		if (ump->um_uid == lva->va_uid) {
307			uva->va_mode = lva->va_mode & 077077;
308			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
309			uva->va_uid = lva->va_uid;
310			uva->va_gid = lva->va_gid;
311		} else {
312			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
313			uva->va_uid = ump->um_uid;
314			uva->va_gid = ump->um_gid;
315		}
316		break;
317	default:		/* UNIONFS_TRADITIONAL */
318		FILEDESC_SLOCK(td->td_proc->p_fd);
319		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
320		FILEDESC_SUNLOCK(td->td_proc->p_fd);
321		uva->va_uid = ump->um_uid;
322		uva->va_gid = ump->um_gid;
323		break;
324	}
325}
326
327/*
328 * Create upper node attr.
329 */
330int
331unionfs_create_uppervattr(struct unionfs_mount *ump,
332			  struct vnode *lvp,
333			  struct vattr *uva,
334			  struct ucred *cred,
335			  struct thread *td)
336{
337	int		error;
338	struct vattr	lva;
339
340	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
341		return (error);
342
343	unionfs_create_uppervattr_core(ump, &lva, uva, td);
344
345	return (error);
346}
347
348/*
349 * relookup
350 *
351 * dvp should be locked on entry and will be locked on return.
352 *
353 * If an error is returned, *vpp will be invalid, otherwise it will hold a
354 * locked, referenced vnode. If *vpp == dvp then remember that only one
355 * LK_EXCLUSIVE lock is held.
356 */
357static int
358unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
359		 struct componentname *cnp, struct componentname *cn,
360		 struct thread *td, char *path, int pathlen, u_long nameiop)
361{
362	int	error;
363
364	cn->cn_namelen = pathlen;
365	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
366	bcopy(path, cn->cn_pnbuf, pathlen);
367	cn->cn_pnbuf[pathlen] = '\0';
368
369	cn->cn_nameiop = nameiop;
370	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
371	cn->cn_lkflags = LK_EXCLUSIVE;
372	cn->cn_thread = td;
373	cn->cn_cred = cnp->cn_cred;
374
375	cn->cn_nameptr = cn->cn_pnbuf;
376	cn->cn_consume = cnp->cn_consume;
377
378	if (nameiop == DELETE)
379		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
380	else if (RENAME == nameiop)
381		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
382
383	vref(dvp);
384	VOP_UNLOCK(dvp, 0);
385
386	if ((error = relookup(dvp, vpp, cn))) {
387		uma_zfree(namei_zone, cn->cn_pnbuf);
388		cn->cn_flags &= ~HASBUF;
389		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
390	} else
391		vrele(dvp);
392
393	return (error);
394}
395
396/*
397 * relookup for CREATE namei operation.
398 *
399 * dvp is unionfs vnode. dvp should be locked.
400 *
401 * If it called 'unionfs_copyfile' function by unionfs_link etc,
402 * VOP_LOOKUP information is broken.
403 * So it need relookup in order to create link etc.
404 */
405int
406unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
407			    struct thread *td)
408{
409	int	error;
410	struct vnode *udvp;
411	struct vnode *vp;
412	struct componentname cn;
413
414	udvp = UNIONFSVPTOUPPERVP(dvp);
415	vp = NULLVP;
416
417	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
418	    strlen(cnp->cn_nameptr), CREATE);
419	if (error)
420		return (error);
421
422	if (vp != NULLVP) {
423		if (udvp == vp)
424			vrele(vp);
425		else
426			vput(vp);
427
428		error = EEXIST;
429	}
430
431	if (cn.cn_flags & HASBUF) {
432		uma_zfree(namei_zone, cn.cn_pnbuf);
433		cn.cn_flags &= ~HASBUF;
434	}
435
436	if (!error) {
437		cn.cn_flags |= (cnp->cn_flags & HASBUF);
438		cnp->cn_flags = cn.cn_flags;
439	}
440
441	return (error);
442}
443
444/*
445 * relookup for DELETE namei operation.
446 *
447 * dvp is unionfs vnode. dvp should be locked.
448 */
449int
450unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
451			    struct thread *td)
452{
453	int	error;
454	struct vnode *udvp;
455	struct vnode *vp;
456	struct componentname cn;
457
458	udvp = UNIONFSVPTOUPPERVP(dvp);
459	vp = NULLVP;
460
461	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
462	    strlen(cnp->cn_nameptr), DELETE);
463	if (error)
464		return (error);
465
466	if (vp == NULLVP)
467		error = ENOENT;
468	else {
469		if (udvp == vp)
470			vrele(vp);
471		else
472			vput(vp);
473	}
474
475	if (cn.cn_flags & HASBUF) {
476		uma_zfree(namei_zone, cn.cn_pnbuf);
477		cn.cn_flags &= ~HASBUF;
478	}
479
480	if (!error) {
481		cn.cn_flags |= (cnp->cn_flags & HASBUF);
482		cnp->cn_flags = cn.cn_flags;
483	}
484
485	return (error);
486}
487
488/*
489 * relookup for RENAME namei operation.
490 *
491 * dvp is unionfs vnode. dvp should be locked.
492 */
493int
494unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
495			    struct thread *td)
496{
497	int error;
498	struct vnode *udvp;
499	struct vnode *vp;
500	struct componentname cn;
501
502	udvp = UNIONFSVPTOUPPERVP(dvp);
503	vp = NULLVP;
504
505	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
506	    strlen(cnp->cn_nameptr), RENAME);
507	if (error)
508		return (error);
509
510	if (vp != NULLVP) {
511		if (udvp == vp)
512			vrele(vp);
513		else
514			vput(vp);
515	}
516
517	if (cn.cn_flags & HASBUF) {
518		uma_zfree(namei_zone, cn.cn_pnbuf);
519		cn.cn_flags &= ~HASBUF;
520	}
521
522	if (!error) {
523		cn.cn_flags |= (cnp->cn_flags & HASBUF);
524		cnp->cn_flags = cn.cn_flags;
525	}
526
527	return (error);
528
529}
530
531/*
532 * Update the unionfs_node.
533 *
534 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
535 * uvp's lock and lower's lock will be unlocked.
536 */
537static void
538unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
539		    struct thread *td)
540{
541	int		count, lockcnt;
542	struct vnode   *vp;
543	struct vnode   *lvp;
544
545	vp = UNIONFSTOV(unp);
546	lvp = unp->un_lowervp;
547
548	/*
549	 * lock update
550	 */
551	VI_LOCK(vp);
552	unp->un_uppervp = uvp;
553	vp->v_vnlock = uvp->v_vnlock;
554	lockcnt = lvp->v_vnlock->lk_exclusivecount;
555	if (lockcnt <= 0)
556		panic("unionfs: no exclusive lock");
557	VI_UNLOCK(vp);
558	for (count = 1; count < lockcnt; count++)
559		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
560}
561
562/*
563 * Create a new shadow dir.
564 *
565 * udvp should be locked on entry and will be locked on return.
566 *
567 * If no error returned, unp will be updated.
568 */
569int
570unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
571		    struct unionfs_node *unp, struct componentname *cnp,
572		    struct thread *td)
573{
574	int		error;
575	struct vnode   *lvp;
576	struct vnode   *uvp;
577	struct vattr	va;
578	struct vattr	lva;
579	struct componentname cn;
580	struct mount   *mp;
581	struct ucred   *cred;
582	struct ucred   *credbk;
583	struct uidinfo *rootinfo;
584
585	if (unp->un_uppervp != NULLVP)
586		return (EEXIST);
587
588	lvp = unp->un_lowervp;
589	uvp = NULLVP;
590	credbk = cnp->cn_cred;
591
592	/* Authority change to root */
593	rootinfo = uifind((uid_t)0);
594	cred = crdup(cnp->cn_cred);
595	chgproccnt(cred->cr_ruidinfo, 1, 0);
596	change_euid(cred, rootinfo);
597	change_ruid(cred, rootinfo);
598	change_svuid(cred, (uid_t)0);
599	uifree(rootinfo);
600	cnp->cn_cred = cred;
601
602	memset(&cn, 0, sizeof(cn));
603
604	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
605		goto unionfs_mkshadowdir_abort;
606
607	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
608		goto unionfs_mkshadowdir_abort;
609	if (uvp != NULLVP) {
610		if (udvp == uvp)
611			vrele(uvp);
612		else
613			vput(uvp);
614
615		error = EEXIST;
616		goto unionfs_mkshadowdir_free_out;
617	}
618
619	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
620		goto unionfs_mkshadowdir_free_out;
621	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
622		vn_finished_write(mp);
623		goto unionfs_mkshadowdir_free_out;
624	}
625	unionfs_create_uppervattr_core(ump, &lva, &va, td);
626
627	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
628
629	if (!error) {
630		unionfs_node_update(unp, uvp, td);
631
632		/*
633		 * XXX The bug which cannot set uid/gid was corrected.
634		 * Ignore errors.
635		 */
636		va.va_type = VNON;
637		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
638	}
639	vn_finished_write(mp);
640
641unionfs_mkshadowdir_free_out:
642	if (cn.cn_flags & HASBUF) {
643		uma_zfree(namei_zone, cn.cn_pnbuf);
644		cn.cn_flags &= ~HASBUF;
645	}
646
647unionfs_mkshadowdir_abort:
648	cnp->cn_cred = credbk;
649	chgproccnt(cred->cr_ruidinfo, -1, 0);
650	crfree(cred);
651
652	return (error);
653}
654
655/*
656 * Create a new whiteout.
657 *
658 * dvp should be locked on entry and will be locked on return.
659 */
660int
661unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
662		   struct thread *td, char *path)
663{
664	int		error;
665	struct vnode   *wvp;
666	struct componentname cn;
667	struct mount   *mp;
668
669	if (path == NULL)
670		path = cnp->cn_nameptr;
671
672	wvp = NULLVP;
673	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
674		return (error);
675	if (wvp != NULLVP) {
676		if (cn.cn_flags & HASBUF) {
677			uma_zfree(namei_zone, cn.cn_pnbuf);
678			cn.cn_flags &= ~HASBUF;
679		}
680		if (dvp == wvp)
681			vrele(wvp);
682		else
683			vput(wvp);
684
685		return (EEXIST);
686	}
687
688	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
689		goto unionfs_mkwhiteout_free_out;
690	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
691		error = VOP_WHITEOUT(dvp, &cn, CREATE);
692
693	vn_finished_write(mp);
694
695unionfs_mkwhiteout_free_out:
696	if (cn.cn_flags & HASBUF) {
697		uma_zfree(namei_zone, cn.cn_pnbuf);
698		cn.cn_flags &= ~HASBUF;
699	}
700
701	return (error);
702}
703
704/*
705 * Create a new vnode for create a new shadow file.
706 *
707 * If an error is returned, *vpp will be invalid, otherwise it will hold a
708 * locked, referenced and opened vnode.
709 *
710 * unp is never updated.
711 */
712static int
713unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
714			   struct unionfs_node *unp, struct vattr *uvap,
715			   struct thread *td)
716{
717	struct unionfs_mount *ump;
718	struct vnode   *vp;
719	struct vnode   *lvp;
720	struct ucred   *cred;
721	struct vattr	lva;
722	int		fmode;
723	int		error;
724	struct componentname cn;
725
726	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
727	vp = NULLVP;
728	lvp = unp->un_lowervp;
729	cred = td->td_ucred;
730	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
731	error = 0;
732
733	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
734		return (error);
735	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
736
737	if (unp->un_path == NULL)
738		panic("unionfs: un_path is null");
739
740	cn.cn_namelen = strlen(unp->un_path);
741	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
742	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
743	cn.cn_nameiop = CREATE;
744	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
745	cn.cn_lkflags = LK_EXCLUSIVE;
746	cn.cn_thread = td;
747	cn.cn_cred = cred;
748	cn.cn_nameptr = cn.cn_pnbuf;
749	cn.cn_consume = 0;
750
751	vref(udvp);
752	if ((error = relookup(udvp, &vp, &cn)) != 0)
753		goto unionfs_vn_create_on_upper_free_out2;
754	vrele(udvp);
755
756	if (vp != NULLVP) {
757		if (vp == udvp)
758			vrele(vp);
759		else
760			vput(vp);
761		error = EEXIST;
762		goto unionfs_vn_create_on_upper_free_out1;
763	}
764
765	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
766		goto unionfs_vn_create_on_upper_free_out1;
767
768	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
769		goto unionfs_vn_create_on_upper_free_out1;
770
771	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
772		vput(vp);
773		goto unionfs_vn_create_on_upper_free_out1;
774	}
775	vp->v_writecount++;
776	*vpp = vp;
777
778unionfs_vn_create_on_upper_free_out1:
779	VOP_UNLOCK(udvp, 0);
780
781unionfs_vn_create_on_upper_free_out2:
782	if (cn.cn_flags & HASBUF) {
783		uma_zfree(namei_zone, cn.cn_pnbuf);
784		cn.cn_flags &= ~HASBUF;
785	}
786
787	return (error);
788}
789
790/*
791 * Copy from lvp to uvp.
792 *
793 * lvp and uvp should be locked and opened on entry and will be locked and
794 * opened on return.
795 */
796static int
797unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
798		      struct ucred *cred, struct thread *td)
799{
800	int		error;
801	off_t		offset;
802	int		count;
803	int		bufoffset;
804	char           *buf;
805	struct uio	uio;
806	struct iovec	iov;
807
808	error = 0;
809	memset(&uio, 0, sizeof(uio));
810
811	uio.uio_td = td;
812	uio.uio_segflg = UIO_SYSSPACE;
813	uio.uio_offset = 0;
814
815	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
816		return (error);
817	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
818		return (error);
819	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
820
821	while (error == 0) {
822		offset = uio.uio_offset;
823
824		uio.uio_iov = &iov;
825		uio.uio_iovcnt = 1;
826		iov.iov_base = buf;
827		iov.iov_len = MAXBSIZE;
828		uio.uio_resid = iov.iov_len;
829		uio.uio_rw = UIO_READ;
830
831		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
832			break;
833		if ((count = MAXBSIZE - uio.uio_resid) == 0)
834			break;
835
836		bufoffset = 0;
837		while (bufoffset < count) {
838			uio.uio_iov = &iov;
839			uio.uio_iovcnt = 1;
840			iov.iov_base = buf + bufoffset;
841			iov.iov_len = count - bufoffset;
842			uio.uio_offset = offset + bufoffset;
843			uio.uio_resid = iov.iov_len;
844			uio.uio_rw = UIO_WRITE;
845
846			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
847				break;
848
849			bufoffset += (count - bufoffset) - uio.uio_resid;
850		}
851
852		uio.uio_offset = offset + bufoffset;
853	}
854
855	free(buf, M_TEMP);
856
857	return (error);
858}
859
860/*
861 * Copy file from lower to upper.
862 *
863 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
864 * docopy.
865 *
866 * If no error returned, unp will be updated.
867 */
868int
869unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
870		 struct thread *td)
871{
872	int		error;
873	struct mount   *mp;
874	struct vnode   *udvp;
875	struct vnode   *lvp;
876	struct vnode   *uvp;
877	struct vattr	uva;
878
879	lvp = unp->un_lowervp;
880	uvp = NULLVP;
881
882	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
883		return (EROFS);
884	if (unp->un_dvp == NULLVP)
885		return (EINVAL);
886	if (unp->un_uppervp != NULLVP)
887		return (EEXIST);
888	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
889	if (udvp == NULLVP)
890		return (EROFS);
891	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
892		return (EROFS);
893
894	error = VOP_ACCESS(lvp, VREAD, cred, td);
895	if (error != 0)
896		return (error);
897
898	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
899		return (error);
900	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
901	if (error != 0) {
902		vn_finished_write(mp);
903		return (error);
904	}
905
906	if (docopy != 0) {
907		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
908		if (error == 0) {
909			error = unionfs_copyfile_core(lvp, uvp, cred, td);
910			VOP_CLOSE(lvp, FREAD, cred, td);
911		}
912	}
913	VOP_CLOSE(uvp, FWRITE, cred, td);
914	uvp->v_writecount--;
915
916	vn_finished_write(mp);
917
918	if (error == 0) {
919		/* Reset the attributes. Ignore errors. */
920		uva.va_type = VNON;
921		VOP_SETATTR(uvp, &uva, cred, td);
922	}
923
924	unionfs_node_update(unp, uvp, td);
925
926	return (error);
927}
928
929/*
930 * It checks whether vp can rmdir. (check empty)
931 *
932 * vp is unionfs vnode.
933 * vp should be locked.
934 */
935int
936unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
937{
938	int		error;
939	int		eofflag;
940	int		lookuperr;
941	struct vnode   *uvp;
942	struct vnode   *lvp;
943	struct vnode   *tvp;
944	struct vattr	va;
945	struct componentname cn;
946	/*
947	 * The size of buf needs to be larger than DIRBLKSIZ.
948	 */
949	char		buf[256 * 6];
950	struct dirent  *dp;
951	struct dirent  *edp;
952	struct uio	uio;
953	struct iovec	iov;
954
955	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
956
957	eofflag = 0;
958	uvp = UNIONFSVPTOUPPERVP(vp);
959	lvp = UNIONFSVPTOLOWERVP(vp);
960
961	/* check opaque */
962	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
963		return (error);
964	if (va.va_flags & OPAQUE)
965		return (0);
966
967	/* open vnode */
968#ifdef MAC
969	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
970		return (error);
971#endif
972	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
973		return (error);
974	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
975		return (error);
976
977	uio.uio_rw = UIO_READ;
978	uio.uio_segflg = UIO_SYSSPACE;
979	uio.uio_td = td;
980	uio.uio_offset = 0;
981
982#ifdef MAC
983	error = mac_vnode_check_readdir(td->td_ucred, lvp);
984#endif
985	while (!error && !eofflag) {
986		iov.iov_base = buf;
987		iov.iov_len = sizeof(buf);
988		uio.uio_iov = &iov;
989		uio.uio_iovcnt = 1;
990		uio.uio_resid = iov.iov_len;
991
992		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
993		if (error)
994			break;
995
996		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
997		for (dp = (struct dirent*)buf; !error && dp < edp;
998		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
999			if (dp->d_type == DT_WHT ||
1000			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1001			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1002				continue;
1003
1004			cn.cn_namelen = dp->d_namlen;
1005			cn.cn_pnbuf = NULL;
1006			cn.cn_nameptr = dp->d_name;
1007			cn.cn_nameiop = LOOKUP;
1008			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1009			cn.cn_lkflags = LK_EXCLUSIVE;
1010			cn.cn_thread = td;
1011			cn.cn_cred = cred;
1012			cn.cn_consume = 0;
1013
1014			/*
1015			 * check entry in lower.
1016			 * Sometimes, readdir function returns
1017			 * wrong entry.
1018			 */
1019			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1020
1021			if (!lookuperr)
1022				vput(tvp);
1023			else
1024				continue; /* skip entry */
1025
1026			/*
1027			 * check entry
1028			 * If it has no exist/whiteout entry in upper,
1029			 * directory is not empty.
1030			 */
1031			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1032			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1033
1034			if (!lookuperr)
1035				vput(tvp);
1036
1037			/* ignore exist or whiteout entry */
1038			if (!lookuperr ||
1039			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1040				continue;
1041
1042			error = ENOTEMPTY;
1043		}
1044	}
1045
1046	/* close vnode */
1047	VOP_CLOSE(vp, FREAD, cred, td);
1048
1049	return (error);
1050}
1051
1052#ifdef DIAGNOSTIC
1053
1054struct vnode   *
1055unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1056{
1057	struct unionfs_node *unp;
1058
1059	unp = VTOUNIONFS(vp);
1060
1061#ifdef notyet
1062	if (vp->v_op != unionfs_vnodeop_p) {
1063		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1064#ifdef KDB
1065		kdb_enter(KDB_WHY_UNIONFS,
1066		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1067#endif
1068		panic("unionfs_checkuppervp");
1069	};
1070#endif
1071	return (unp->un_uppervp);
1072}
1073
1074struct vnode   *
1075unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1076{
1077	struct unionfs_node *unp;
1078
1079	unp = VTOUNIONFS(vp);
1080
1081#ifdef notyet
1082	if (vp->v_op != unionfs_vnodeop_p) {
1083		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1084#ifdef KDB
1085		kdb_enter(KDB_WHY_UNIONFS,
1086		    "unionfs_checklowervp: on non-unionfs-node.\n");
1087#endif
1088		panic("unionfs_checklowervp");
1089	};
1090#endif
1091	return (unp->un_lowervp);
1092}
1093#endif
1094