union_subr.c revision 177957
1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD: head/sys/fs/unionfs/union_subr.c 177957 2008-04-06 20:08:51Z attilio $
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#ifdef MAC
56#include <sys/mac.h>
57#endif
58
59#include <vm/uma.h>
60
61#include <fs/unionfs/union.h>
62
63MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
64MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
65
66/*
67 * Initialize
68 */
69int
70unionfs_init(struct vfsconf *vfsp)
71{
72	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
73	return (0);
74}
75
76/*
77 * Uninitialize
78 */
79int
80unionfs_uninit(struct vfsconf *vfsp)
81{
82	return (0);
83}
84
85/*
86 * Make a new or get existing unionfs node.
87 *
88 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
89 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
90 * you should not lock plurality simultaneously.
91 */
92int
93unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
94		struct vnode *lowervp, struct vnode *dvp,
95		struct vnode **vpp, struct componentname *cnp,
96		struct thread *td)
97{
98	struct unionfs_mount *ump;
99	struct unionfs_node *unp;
100	struct vnode   *vp;
101	int		error;
102	int		lkflags;
103	char	       *path;
104
105	ump = MOUNTTOUNIONFSMOUNT(mp);
106	lkflags = (cnp ? cnp->cn_lkflags : 0);
107	path = (cnp ? cnp->cn_nameptr : NULL);
108
109	if (uppervp == NULLVP && lowervp == NULLVP)
110		panic("unionfs_nodeget: upper and lower is null");
111
112	/* If it has no ISLASTCN flag, path check is skipped. */
113	if (cnp && !(cnp->cn_flags & ISLASTCN))
114		path = NULL;
115
116	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
117	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
118		if (dvp == NULLVP)
119			return (EINVAL);
120	}
121
122	/*
123	 * Do the MALLOC before the getnewvnode since doing so afterward
124	 * might cause a bogus v_data pointer to get dereferenced elsewhere
125	 * if MALLOC should block.
126	 */
127	MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node),
128	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
129
130	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
131	if (error != 0) {
132		FREE(unp, M_UNIONFSNODE);
133		return (error);
134	}
135	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
136	if (error != 0) {
137		FREE(unp, M_UNIONFSNODE);
138		return (error);
139	}
140	if (dvp != NULLVP)
141		vref(dvp);
142	if (uppervp != NULLVP)
143		vref(uppervp);
144	if (lowervp != NULLVP)
145		vref(lowervp);
146
147	unp->un_vnode = vp;
148	unp->un_uppervp = uppervp;
149	unp->un_lowervp = lowervp;
150	unp->un_dvp = dvp;
151	if (uppervp != NULLVP)
152		vp->v_vnlock = uppervp->v_vnlock;
153	else
154		vp->v_vnlock = lowervp->v_vnlock;
155
156	if (path != NULL) {
157		unp->un_path = (char *)
158		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
159		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
160		unp->un_path[cnp->cn_namelen] = '\0';
161	}
162	vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
163	vp->v_data = unp;
164
165	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
166	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
167		vp->v_vflag |= VV_ROOT;
168
169	if (lkflags & LK_TYPE_MASK)
170		vn_lock(vp, lkflags | LK_RETRY);
171
172	*vpp = vp;
173
174	return (0);
175}
176
177/*
178 * Clean up the unionfs node.
179 */
180void
181unionfs_noderem(struct vnode *vp, struct thread *td)
182{
183	int		vfslocked;
184	struct unionfs_node *unp;
185	struct unionfs_node_status *unsp, *unsp_tmp;
186	struct vnode   *lvp;
187	struct vnode   *uvp;
188
189	/*
190	 * Use the interlock to protect the clearing of v_data to
191	 * prevent faults in unionfs_lock().
192	 */
193	VI_LOCK(vp);
194	unp = VTOUNIONFS(vp);
195	lvp = unp->un_lowervp;
196	uvp = unp->un_uppervp;
197	unp->un_lowervp = unp->un_uppervp = NULLVP;
198
199	vp->v_vnlock = &(vp->v_lock);
200	vp->v_data = NULL;
201	lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp));
202	if (lvp != NULLVP)
203		VOP_UNLOCK(lvp, 0);
204	if (uvp != NULLVP)
205		VOP_UNLOCK(uvp, 0);
206	vp->v_object = NULL;
207
208	if (lvp != NULLVP) {
209		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
210		vrele(lvp);
211		VFS_UNLOCK_GIANT(vfslocked);
212	}
213	if (uvp != NULLVP) {
214		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
215		vrele(uvp);
216		VFS_UNLOCK_GIANT(vfslocked);
217	}
218	if (unp->un_dvp != NULLVP) {
219		vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount);
220		vrele(unp->un_dvp);
221		VFS_UNLOCK_GIANT(vfslocked);
222		unp->un_dvp = NULLVP;
223	}
224	if (unp->un_path) {
225		free(unp->un_path, M_UNIONFSPATH);
226		unp->un_path = NULL;
227	}
228
229	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
230		LIST_REMOVE(unsp, uns_list);
231		free(unsp, M_TEMP);
232	}
233	FREE(unp, M_UNIONFSNODE);
234}
235
236/*
237 * Get the unionfs node status.
238 * You need exclusive lock this vnode.
239 */
240void
241unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
242			struct unionfs_node_status **unspp)
243{
244	struct unionfs_node_status *unsp;
245
246	KASSERT(NULL != unspp, ("null pointer"));
247	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
248
249	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
250		if (unsp->uns_tid == td->td_tid) {
251			*unspp = unsp;
252			return;
253		}
254	}
255
256	/* create a new unionfs node status */
257	MALLOC(unsp, struct unionfs_node_status *,
258	    sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO);
259
260	unsp->uns_tid = td->td_tid;
261	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
262
263	*unspp = unsp;
264}
265
266/*
267 * Remove the unionfs node status, if you can.
268 * You need exclusive lock this vnode.
269 */
270void
271unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td,
272			   struct unionfs_node_status *unsp)
273{
274	KASSERT(NULL != unsp, ("null pointer"));
275	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
276
277	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
278		return;
279
280	LIST_REMOVE(unsp, uns_list);
281	free(unsp, M_TEMP);
282}
283
284/*
285 * Create upper node attr.
286 */
287void
288unionfs_create_uppervattr_core(struct unionfs_mount *ump,
289			       struct vattr *lva,
290			       struct vattr *uva,
291			       struct thread *td)
292{
293	VATTR_NULL(uva);
294	uva->va_type = lva->va_type;
295	uva->va_atime = lva->va_atime;
296	uva->va_mtime = lva->va_mtime;
297	uva->va_ctime = lva->va_ctime;
298
299	switch (ump->um_copymode) {
300	case UNIONFS_TRANSPARENT:
301		uva->va_mode = lva->va_mode;
302		uva->va_uid = lva->va_uid;
303		uva->va_gid = lva->va_gid;
304		break;
305	case UNIONFS_MASQUERADE:
306		if (ump->um_uid == lva->va_uid) {
307			uva->va_mode = lva->va_mode & 077077;
308			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
309			uva->va_uid = lva->va_uid;
310			uva->va_gid = lva->va_gid;
311		} else {
312			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
313			uva->va_uid = ump->um_uid;
314			uva->va_gid = ump->um_gid;
315		}
316		break;
317	default:		/* UNIONFS_TRADITIONAL */
318		FILEDESC_SLOCK(td->td_proc->p_fd);
319		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
320		FILEDESC_SUNLOCK(td->td_proc->p_fd);
321		uva->va_uid = ump->um_uid;
322		uva->va_gid = ump->um_gid;
323		break;
324	}
325}
326
327/*
328 * Create upper node attr.
329 */
330int
331unionfs_create_uppervattr(struct unionfs_mount *ump,
332			  struct vnode *lvp,
333			  struct vattr *uva,
334			  struct ucred *cred,
335			  struct thread *td)
336{
337	int		error;
338	struct vattr	lva;
339
340	if ((error = VOP_GETATTR(lvp, &lva, cred, td)))
341		return (error);
342
343	unionfs_create_uppervattr_core(ump, &lva, uva, td);
344
345	return (error);
346}
347
348/*
349 * relookup
350 *
351 * dvp should be locked on entry and will be locked on return.
352 *
353 * If an error is returned, *vpp will be invalid, otherwise it will hold a
354 * locked, referenced vnode. If *vpp == dvp then remember that only one
355 * LK_EXCLUSIVE lock is held.
356 */
357static int
358unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
359		 struct componentname *cnp, struct componentname *cn,
360		 struct thread *td, char *path, int pathlen, u_long nameiop)
361{
362	int	error;
363
364	cn->cn_namelen = pathlen;
365	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
366	bcopy(path, cn->cn_pnbuf, pathlen);
367	cn->cn_pnbuf[pathlen] = '\0';
368
369	cn->cn_nameiop = nameiop;
370	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
371	cn->cn_lkflags = LK_EXCLUSIVE;
372	cn->cn_thread = td;
373	cn->cn_cred = cnp->cn_cred;
374
375	cn->cn_nameptr = cn->cn_pnbuf;
376	cn->cn_consume = cnp->cn_consume;
377
378	if (nameiop == DELETE)
379		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
380	else if (RENAME == nameiop)
381		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
382
383	vref(dvp);
384	VOP_UNLOCK(dvp, 0);
385
386	if ((error = relookup(dvp, vpp, cn))) {
387		uma_zfree(namei_zone, cn->cn_pnbuf);
388		cn->cn_flags &= ~HASBUF;
389		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
390	} else
391		vrele(dvp);
392
393	return (error);
394}
395
396/*
397 * relookup for CREATE namei operation.
398 *
399 * dvp is unionfs vnode. dvp should be locked.
400 *
401 * If it called 'unionfs_copyfile' function by unionfs_link etc,
402 * VOP_LOOKUP information is broken.
403 * So it need relookup in order to create link etc.
404 */
405int
406unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
407			    struct thread *td)
408{
409	int	error;
410	struct vnode *udvp;
411	struct vnode *vp;
412	struct componentname cn;
413
414	udvp = UNIONFSVPTOUPPERVP(dvp);
415	vp = NULLVP;
416
417	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
418	    strlen(cnp->cn_nameptr), CREATE);
419	if (error)
420		return (error);
421
422	if (vp != NULLVP) {
423		if (udvp == vp)
424			vrele(vp);
425		else
426			vput(vp);
427
428		error = EEXIST;
429	}
430
431	if (cn.cn_flags & HASBUF) {
432		uma_zfree(namei_zone, cn.cn_pnbuf);
433		cn.cn_flags &= ~HASBUF;
434	}
435
436	if (!error) {
437		cn.cn_flags |= (cnp->cn_flags & HASBUF);
438		cnp->cn_flags = cn.cn_flags;
439	}
440
441	return (error);
442}
443
444/*
445 * relookup for DELETE namei operation.
446 *
447 * dvp is unionfs vnode. dvp should be locked.
448 */
449int
450unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
451			    struct thread *td)
452{
453	int	error;
454	struct vnode *udvp;
455	struct vnode *vp;
456	struct componentname cn;
457
458	udvp = UNIONFSVPTOUPPERVP(dvp);
459	vp = NULLVP;
460
461	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
462	    strlen(cnp->cn_nameptr), DELETE);
463	if (error)
464		return (error);
465
466	if (vp == NULLVP)
467		error = ENOENT;
468	else {
469		if (udvp == vp)
470			vrele(vp);
471		else
472			vput(vp);
473	}
474
475	if (cn.cn_flags & HASBUF) {
476		uma_zfree(namei_zone, cn.cn_pnbuf);
477		cn.cn_flags &= ~HASBUF;
478	}
479
480	if (!error) {
481		cn.cn_flags |= (cnp->cn_flags & HASBUF);
482		cnp->cn_flags = cn.cn_flags;
483	}
484
485	return (error);
486}
487
488/*
489 * relookup for RENAME namei operation.
490 *
491 * dvp is unionfs vnode. dvp should be locked.
492 */
493int
494unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
495			    struct thread *td)
496{
497	int error;
498	struct vnode *udvp;
499	struct vnode *vp;
500	struct componentname cn;
501
502	udvp = UNIONFSVPTOUPPERVP(dvp);
503	vp = NULLVP;
504
505	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
506	    strlen(cnp->cn_nameptr), RENAME);
507	if (error)
508		return (error);
509
510	if (vp != NULLVP) {
511		if (udvp == vp)
512			vrele(vp);
513		else
514			vput(vp);
515	}
516
517	if (cn.cn_flags & HASBUF) {
518		uma_zfree(namei_zone, cn.cn_pnbuf);
519		cn.cn_flags &= ~HASBUF;
520	}
521
522	if (!error) {
523		cn.cn_flags |= (cnp->cn_flags & HASBUF);
524		cnp->cn_flags = cn.cn_flags;
525	}
526
527	return (error);
528
529}
530
531/*
532 * Update the unionfs_node.
533 *
534 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
535 * uvp's lock and lower's lock will be unlocked.
536 */
537static void
538unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
539		    struct thread *td)
540{
541	unsigned	count, lockrec;
542	struct vnode   *vp;
543	struct vnode   *lvp;
544
545	vp = UNIONFSTOV(unp);
546	lvp = unp->un_lowervp;
547	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
548
549	/*
550	 * lock update
551	 */
552	VI_LOCK(vp);
553	unp->un_uppervp = uvp;
554	vp->v_vnlock = uvp->v_vnlock;
555	VI_UNLOCK(vp);
556	lockrec = lvp->v_vnlock->lk_recurse;
557	for (count = 0; count < lockrec; count++)
558		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
559}
560
561/*
562 * Create a new shadow dir.
563 *
564 * udvp should be locked on entry and will be locked on return.
565 *
566 * If no error returned, unp will be updated.
567 */
568int
569unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
570		    struct unionfs_node *unp, struct componentname *cnp,
571		    struct thread *td)
572{
573	int		error;
574	struct vnode   *lvp;
575	struct vnode   *uvp;
576	struct vattr	va;
577	struct vattr	lva;
578	struct componentname cn;
579	struct mount   *mp;
580	struct ucred   *cred;
581	struct ucred   *credbk;
582	struct uidinfo *rootinfo;
583
584	if (unp->un_uppervp != NULLVP)
585		return (EEXIST);
586
587	lvp = unp->un_lowervp;
588	uvp = NULLVP;
589	credbk = cnp->cn_cred;
590
591	/* Authority change to root */
592	rootinfo = uifind((uid_t)0);
593	cred = crdup(cnp->cn_cred);
594	chgproccnt(cred->cr_ruidinfo, 1, 0);
595	change_euid(cred, rootinfo);
596	change_ruid(cred, rootinfo);
597	change_svuid(cred, (uid_t)0);
598	uifree(rootinfo);
599	cnp->cn_cred = cred;
600
601	memset(&cn, 0, sizeof(cn));
602
603	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td)))
604		goto unionfs_mkshadowdir_abort;
605
606	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
607		goto unionfs_mkshadowdir_abort;
608	if (uvp != NULLVP) {
609		if (udvp == uvp)
610			vrele(uvp);
611		else
612			vput(uvp);
613
614		error = EEXIST;
615		goto unionfs_mkshadowdir_free_out;
616	}
617
618	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
619		goto unionfs_mkshadowdir_free_out;
620	if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) {
621		vn_finished_write(mp);
622		goto unionfs_mkshadowdir_free_out;
623	}
624	unionfs_create_uppervattr_core(ump, &lva, &va, td);
625
626	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
627
628	if (!error) {
629		unionfs_node_update(unp, uvp, td);
630
631		/*
632		 * XXX The bug which cannot set uid/gid was corrected.
633		 * Ignore errors.
634		 */
635		va.va_type = VNON;
636		VOP_SETATTR(uvp, &va, cn.cn_cred, td);
637	}
638	vn_finished_write(mp);
639
640unionfs_mkshadowdir_free_out:
641	if (cn.cn_flags & HASBUF) {
642		uma_zfree(namei_zone, cn.cn_pnbuf);
643		cn.cn_flags &= ~HASBUF;
644	}
645
646unionfs_mkshadowdir_abort:
647	cnp->cn_cred = credbk;
648	chgproccnt(cred->cr_ruidinfo, -1, 0);
649	crfree(cred);
650
651	return (error);
652}
653
654/*
655 * Create a new whiteout.
656 *
657 * dvp should be locked on entry and will be locked on return.
658 */
659int
660unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
661		   struct thread *td, char *path)
662{
663	int		error;
664	struct vnode   *wvp;
665	struct componentname cn;
666	struct mount   *mp;
667
668	if (path == NULL)
669		path = cnp->cn_nameptr;
670
671	wvp = NULLVP;
672	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
673		return (error);
674	if (wvp != NULLVP) {
675		if (cn.cn_flags & HASBUF) {
676			uma_zfree(namei_zone, cn.cn_pnbuf);
677			cn.cn_flags &= ~HASBUF;
678		}
679		if (dvp == wvp)
680			vrele(wvp);
681		else
682			vput(wvp);
683
684		return (EEXIST);
685	}
686
687	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
688		goto unionfs_mkwhiteout_free_out;
689	if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE)))
690		error = VOP_WHITEOUT(dvp, &cn, CREATE);
691
692	vn_finished_write(mp);
693
694unionfs_mkwhiteout_free_out:
695	if (cn.cn_flags & HASBUF) {
696		uma_zfree(namei_zone, cn.cn_pnbuf);
697		cn.cn_flags &= ~HASBUF;
698	}
699
700	return (error);
701}
702
703/*
704 * Create a new vnode for create a new shadow file.
705 *
706 * If an error is returned, *vpp will be invalid, otherwise it will hold a
707 * locked, referenced and opened vnode.
708 *
709 * unp is never updated.
710 */
711static int
712unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
713			   struct unionfs_node *unp, struct vattr *uvap,
714			   struct thread *td)
715{
716	struct unionfs_mount *ump;
717	struct vnode   *vp;
718	struct vnode   *lvp;
719	struct ucred   *cred;
720	struct vattr	lva;
721	int		fmode;
722	int		error;
723	struct componentname cn;
724
725	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
726	vp = NULLVP;
727	lvp = unp->un_lowervp;
728	cred = td->td_ucred;
729	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
730	error = 0;
731
732	if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0)
733		return (error);
734	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
735
736	if (unp->un_path == NULL)
737		panic("unionfs: un_path is null");
738
739	cn.cn_namelen = strlen(unp->un_path);
740	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
741	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
742	cn.cn_nameiop = CREATE;
743	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
744	cn.cn_lkflags = LK_EXCLUSIVE;
745	cn.cn_thread = td;
746	cn.cn_cred = cred;
747	cn.cn_nameptr = cn.cn_pnbuf;
748	cn.cn_consume = 0;
749
750	vref(udvp);
751	if ((error = relookup(udvp, &vp, &cn)) != 0)
752		goto unionfs_vn_create_on_upper_free_out2;
753	vrele(udvp);
754
755	if (vp != NULLVP) {
756		if (vp == udvp)
757			vrele(vp);
758		else
759			vput(vp);
760		error = EEXIST;
761		goto unionfs_vn_create_on_upper_free_out1;
762	}
763
764	if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0)
765		goto unionfs_vn_create_on_upper_free_out1;
766
767	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
768		goto unionfs_vn_create_on_upper_free_out1;
769
770	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
771		vput(vp);
772		goto unionfs_vn_create_on_upper_free_out1;
773	}
774	vp->v_writecount++;
775	*vpp = vp;
776
777unionfs_vn_create_on_upper_free_out1:
778	VOP_UNLOCK(udvp, 0);
779
780unionfs_vn_create_on_upper_free_out2:
781	if (cn.cn_flags & HASBUF) {
782		uma_zfree(namei_zone, cn.cn_pnbuf);
783		cn.cn_flags &= ~HASBUF;
784	}
785
786	return (error);
787}
788
789/*
790 * Copy from lvp to uvp.
791 *
792 * lvp and uvp should be locked and opened on entry and will be locked and
793 * opened on return.
794 */
795static int
796unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
797		      struct ucred *cred, struct thread *td)
798{
799	int		error;
800	off_t		offset;
801	int		count;
802	int		bufoffset;
803	char           *buf;
804	struct uio	uio;
805	struct iovec	iov;
806
807	error = 0;
808	memset(&uio, 0, sizeof(uio));
809
810	uio.uio_td = td;
811	uio.uio_segflg = UIO_SYSSPACE;
812	uio.uio_offset = 0;
813
814	if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0)
815		return (error);
816	if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0)
817		return (error);
818	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
819
820	while (error == 0) {
821		offset = uio.uio_offset;
822
823		uio.uio_iov = &iov;
824		uio.uio_iovcnt = 1;
825		iov.iov_base = buf;
826		iov.iov_len = MAXBSIZE;
827		uio.uio_resid = iov.iov_len;
828		uio.uio_rw = UIO_READ;
829
830		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
831			break;
832		if ((count = MAXBSIZE - uio.uio_resid) == 0)
833			break;
834
835		bufoffset = 0;
836		while (bufoffset < count) {
837			uio.uio_iov = &iov;
838			uio.uio_iovcnt = 1;
839			iov.iov_base = buf + bufoffset;
840			iov.iov_len = count - bufoffset;
841			uio.uio_offset = offset + bufoffset;
842			uio.uio_resid = iov.iov_len;
843			uio.uio_rw = UIO_WRITE;
844
845			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
846				break;
847
848			bufoffset += (count - bufoffset) - uio.uio_resid;
849		}
850
851		uio.uio_offset = offset + bufoffset;
852	}
853
854	free(buf, M_TEMP);
855
856	return (error);
857}
858
859/*
860 * Copy file from lower to upper.
861 *
862 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
863 * docopy.
864 *
865 * If no error returned, unp will be updated.
866 */
867int
868unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
869		 struct thread *td)
870{
871	int		error;
872	struct mount   *mp;
873	struct vnode   *udvp;
874	struct vnode   *lvp;
875	struct vnode   *uvp;
876	struct vattr	uva;
877
878	lvp = unp->un_lowervp;
879	uvp = NULLVP;
880
881	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
882		return (EROFS);
883	if (unp->un_dvp == NULLVP)
884		return (EINVAL);
885	if (unp->un_uppervp != NULLVP)
886		return (EEXIST);
887	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
888	if (udvp == NULLVP)
889		return (EROFS);
890	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
891		return (EROFS);
892
893	error = VOP_ACCESS(lvp, VREAD, cred, td);
894	if (error != 0)
895		return (error);
896
897	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
898		return (error);
899	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
900	if (error != 0) {
901		vn_finished_write(mp);
902		return (error);
903	}
904
905	if (docopy != 0) {
906		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
907		if (error == 0) {
908			error = unionfs_copyfile_core(lvp, uvp, cred, td);
909			VOP_CLOSE(lvp, FREAD, cred, td);
910		}
911	}
912	VOP_CLOSE(uvp, FWRITE, cred, td);
913	uvp->v_writecount--;
914
915	vn_finished_write(mp);
916
917	if (error == 0) {
918		/* Reset the attributes. Ignore errors. */
919		uva.va_type = VNON;
920		VOP_SETATTR(uvp, &uva, cred, td);
921	}
922
923	unionfs_node_update(unp, uvp, td);
924
925	return (error);
926}
927
928/*
929 * It checks whether vp can rmdir. (check empty)
930 *
931 * vp is unionfs vnode.
932 * vp should be locked.
933 */
934int
935unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
936{
937	int		error;
938	int		eofflag;
939	int		lookuperr;
940	struct vnode   *uvp;
941	struct vnode   *lvp;
942	struct vnode   *tvp;
943	struct vattr	va;
944	struct componentname cn;
945	/*
946	 * The size of buf needs to be larger than DIRBLKSIZ.
947	 */
948	char		buf[256 * 6];
949	struct dirent  *dp;
950	struct dirent  *edp;
951	struct uio	uio;
952	struct iovec	iov;
953
954	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
955
956	eofflag = 0;
957	uvp = UNIONFSVPTOUPPERVP(vp);
958	lvp = UNIONFSVPTOLOWERVP(vp);
959
960	/* check opaque */
961	if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0)
962		return (error);
963	if (va.va_flags & OPAQUE)
964		return (0);
965
966	/* open vnode */
967#ifdef MAC
968	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
969		return (error);
970#endif
971	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
972		return (error);
973	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
974		return (error);
975
976	uio.uio_rw = UIO_READ;
977	uio.uio_segflg = UIO_SYSSPACE;
978	uio.uio_td = td;
979	uio.uio_offset = 0;
980
981#ifdef MAC
982	error = mac_vnode_check_readdir(td->td_ucred, lvp);
983#endif
984	while (!error && !eofflag) {
985		iov.iov_base = buf;
986		iov.iov_len = sizeof(buf);
987		uio.uio_iov = &iov;
988		uio.uio_iovcnt = 1;
989		uio.uio_resid = iov.iov_len;
990
991		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
992		if (error)
993			break;
994
995		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
996		for (dp = (struct dirent*)buf; !error && dp < edp;
997		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
998			if (dp->d_type == DT_WHT ||
999			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1000			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1001				continue;
1002
1003			cn.cn_namelen = dp->d_namlen;
1004			cn.cn_pnbuf = NULL;
1005			cn.cn_nameptr = dp->d_name;
1006			cn.cn_nameiop = LOOKUP;
1007			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1008			cn.cn_lkflags = LK_EXCLUSIVE;
1009			cn.cn_thread = td;
1010			cn.cn_cred = cred;
1011			cn.cn_consume = 0;
1012
1013			/*
1014			 * check entry in lower.
1015			 * Sometimes, readdir function returns
1016			 * wrong entry.
1017			 */
1018			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1019
1020			if (!lookuperr)
1021				vput(tvp);
1022			else
1023				continue; /* skip entry */
1024
1025			/*
1026			 * check entry
1027			 * If it has no exist/whiteout entry in upper,
1028			 * directory is not empty.
1029			 */
1030			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1031			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1032
1033			if (!lookuperr)
1034				vput(tvp);
1035
1036			/* ignore exist or whiteout entry */
1037			if (!lookuperr ||
1038			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1039				continue;
1040
1041			error = ENOTEMPTY;
1042		}
1043	}
1044
1045	/* close vnode */
1046	VOP_CLOSE(vp, FREAD, cred, td);
1047
1048	return (error);
1049}
1050
1051#ifdef DIAGNOSTIC
1052
1053struct vnode   *
1054unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1055{
1056	struct unionfs_node *unp;
1057
1058	unp = VTOUNIONFS(vp);
1059
1060#ifdef notyet
1061	if (vp->v_op != unionfs_vnodeop_p) {
1062		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1063#ifdef KDB
1064		kdb_enter(KDB_WHY_UNIONFS,
1065		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1066#endif
1067		panic("unionfs_checkuppervp");
1068	};
1069#endif
1070	return (unp->un_uppervp);
1071}
1072
1073struct vnode   *
1074unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1075{
1076	struct unionfs_node *unp;
1077
1078	unp = VTOUNIONFS(vp);
1079
1080#ifdef notyet
1081	if (vp->v_op != unionfs_vnodeop_p) {
1082		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1083#ifdef KDB
1084		kdb_enter(KDB_WHY_UNIONFS,
1085		    "unionfs_checklowervp: on non-unionfs-node.\n");
1086#endif
1087		panic("unionfs_checklowervp");
1088	};
1089#endif
1090	return (unp->un_lowervp);
1091}
1092#endif
1093