1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD$
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <vm/uma.h>
58
59#include <fs/unionfs/union.h>
60
61#define NUNIONFSNODECACHE 16
62
63static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
64MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
65MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
66
67/*
68 * Initialize
69 */
70int
71unionfs_init(struct vfsconf *vfsp)
72{
73	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
74	return (0);
75}
76
77/*
78 * Uninitialize
79 */
80int
81unionfs_uninit(struct vfsconf *vfsp)
82{
83	return (0);
84}
85
86static struct unionfs_node_hashhead *
87unionfs_get_hashhead(struct vnode *dvp, char *path)
88{
89	int		count;
90	char		hash;
91	struct unionfs_node *unp;
92
93	hash = 0;
94	unp = VTOUNIONFS(dvp);
95	if (path != NULL) {
96		for (count = 0; path[count]; count++)
97			hash += path[count];
98	}
99
100	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
101}
102
103/*
104 * Get the cached vnode.
105 */
106static struct vnode *
107unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
108			struct vnode *dvp, char *path)
109{
110	struct unionfs_node_hashhead *hd;
111	struct unionfs_node *unp;
112	struct vnode   *vp;
113
114	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
115	    ("unionfs_get_cached_vnode: v_type != VDIR"));
116	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118
119	VI_LOCK(dvp);
120	hd = unionfs_get_hashhead(dvp, path);
121	LIST_FOREACH(unp, hd, un_hash) {
122		if (!strcmp(unp->un_path, path)) {
123			vp = UNIONFSTOV(unp);
124			VI_LOCK_FLAGS(vp, MTX_DUPOK);
125			VI_UNLOCK(dvp);
126			vp->v_iflag &= ~VI_OWEINACT;
127			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
128				VI_UNLOCK(vp);
129				vp = NULLVP;
130			} else
131				VI_UNLOCK(vp);
132			return (vp);
133		}
134	}
135	VI_UNLOCK(dvp);
136
137	return (NULLVP);
138}
139
140/*
141 * Add the new vnode into cache.
142 */
143static struct vnode *
144unionfs_ins_cached_vnode(struct unionfs_node *uncp,
145			struct vnode *dvp, char *path)
146{
147	struct unionfs_node_hashhead *hd;
148	struct unionfs_node *unp;
149	struct vnode   *vp;
150
151	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
152	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
153	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155
156	VI_LOCK(dvp);
157	hd = unionfs_get_hashhead(dvp, path);
158	LIST_FOREACH(unp, hd, un_hash) {
159		if (!strcmp(unp->un_path, path)) {
160			vp = UNIONFSTOV(unp);
161			VI_LOCK_FLAGS(vp, MTX_DUPOK);
162			vp->v_iflag &= ~VI_OWEINACT;
163			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
164				LIST_INSERT_HEAD(hd, uncp, un_hash);
165				VI_UNLOCK(vp);
166				vp = NULLVP;
167			} else
168				VI_UNLOCK(vp);
169			VI_UNLOCK(dvp);
170			return (vp);
171		}
172	}
173
174	LIST_INSERT_HEAD(hd, uncp, un_hash);
175	VI_UNLOCK(dvp);
176
177	return (NULLVP);
178}
179
180/*
181 * Remove the vnode.
182 */
183static void
184unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
185{
186	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
187	KASSERT((dvp != NULLVP),
188	    ("unionfs_rem_cached_vnode: null parent vnode"));
189	KASSERT((unp->un_hash.le_prev != NULL),
190	    ("unionfs_rem_cached_vnode: null hash"));
191
192	VI_LOCK(dvp);
193	LIST_REMOVE(unp, un_hash);
194	unp->un_hash.le_next = NULL;
195	unp->un_hash.le_prev = NULL;
196	VI_UNLOCK(dvp);
197}
198
199/*
200 * Make a new or get existing unionfs node.
201 *
202 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
203 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
204 * you should not lock plurality simultaneously.
205 */
206int
207unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
208		struct vnode *lowervp, struct vnode *dvp,
209		struct vnode **vpp, struct componentname *cnp,
210		struct thread *td)
211{
212	struct unionfs_mount *ump;
213	struct unionfs_node *unp;
214	struct vnode   *vp;
215	int		error;
216	int		lkflags;
217	enum vtype	vt;
218	char	       *path;
219
220	ump = MOUNTTOUNIONFSMOUNT(mp);
221	lkflags = (cnp ? cnp->cn_lkflags : 0);
222	path = (cnp ? cnp->cn_nameptr : NULL);
223	*vpp = NULLVP;
224
225	if (uppervp == NULLVP && lowervp == NULLVP)
226		panic("unionfs_nodeget: upper and lower is null");
227
228	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
229
230	/* If it has no ISLASTCN flag, path check is skipped. */
231	if (cnp && !(cnp->cn_flags & ISLASTCN))
232		path = NULL;
233
234	/* check the cache */
235	if (path != NULL && dvp != NULLVP && vt == VDIR) {
236		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
237		if (vp != NULLVP) {
238			vref(vp);
239			*vpp = vp;
240			goto unionfs_nodeget_out;
241		}
242	}
243
244	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
245	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
246		/* dvp will be NULLVP only in case of root vnode. */
247		if (dvp == NULLVP)
248			return (EINVAL);
249	}
250
251	/*
252	 * Do the MALLOC before the getnewvnode since doing so afterward
253	 * might cause a bogus v_data pointer to get dereferenced elsewhere
254	 * if MALLOC should block.
255	 */
256	unp = malloc(sizeof(struct unionfs_node),
257	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
258
259	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
260	if (error != 0) {
261		free(unp, M_UNIONFSNODE);
262		return (error);
263	}
264	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
265	if (error != 0) {
266		free(unp, M_UNIONFSNODE);
267		return (error);
268	}
269	if (dvp != NULLVP)
270		vref(dvp);
271	if (uppervp != NULLVP)
272		vref(uppervp);
273	if (lowervp != NULLVP)
274		vref(lowervp);
275
276	if (vt == VDIR)
277		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
278		    &(unp->un_hashmask));
279
280	unp->un_vnode = vp;
281	unp->un_uppervp = uppervp;
282	unp->un_lowervp = lowervp;
283	unp->un_dvp = dvp;
284	if (uppervp != NULLVP)
285		vp->v_vnlock = uppervp->v_vnlock;
286	else
287		vp->v_vnlock = lowervp->v_vnlock;
288
289	if (path != NULL) {
290		unp->un_path = (char *)
291		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
292		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
293		unp->un_path[cnp->cn_namelen] = '\0';
294	}
295	vp->v_type = vt;
296	vp->v_data = unp;
297
298	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
299	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
300		vp->v_vflag |= VV_ROOT;
301
302	if (path != NULL && dvp != NULLVP && vt == VDIR)
303		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
304	if ((*vpp) != NULLVP) {
305		if (dvp != NULLVP)
306			vrele(dvp);
307		if (uppervp != NULLVP)
308			vrele(uppervp);
309		if (lowervp != NULLVP)
310			vrele(lowervp);
311
312		unp->un_uppervp = NULLVP;
313		unp->un_lowervp = NULLVP;
314		unp->un_dvp = NULLVP;
315		vrele(vp);
316		vp = *vpp;
317		vref(vp);
318	} else
319		*vpp = vp;
320
321unionfs_nodeget_out:
322	if (lkflags & LK_TYPE_MASK)
323		vn_lock(vp, lkflags | LK_RETRY);
324
325	return (0);
326}
327
328/*
329 * Clean up the unionfs node.
330 */
331void
332unionfs_noderem(struct vnode *vp, struct thread *td)
333{
334	int		vfslocked;
335	int		count;
336	struct unionfs_node *unp, *unp_t1, *unp_t2;
337	struct unionfs_node_hashhead *hd;
338	struct unionfs_node_status *unsp, *unsp_tmp;
339	struct vnode   *lvp;
340	struct vnode   *uvp;
341	struct vnode   *dvp;
342
343	/*
344	 * Use the interlock to protect the clearing of v_data to
345	 * prevent faults in unionfs_lock().
346	 */
347	VI_LOCK(vp);
348	unp = VTOUNIONFS(vp);
349	lvp = unp->un_lowervp;
350	uvp = unp->un_uppervp;
351	dvp = unp->un_dvp;
352	unp->un_lowervp = unp->un_uppervp = NULLVP;
353	vp->v_vnlock = &(vp->v_lock);
354	vp->v_data = NULL;
355	vp->v_object = NULL;
356	VI_UNLOCK(vp);
357
358	if (lvp != NULLVP)
359		VOP_UNLOCK(lvp, LK_RELEASE);
360	if (uvp != NULLVP)
361		VOP_UNLOCK(uvp, LK_RELEASE);
362
363	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
364		unionfs_rem_cached_vnode(unp, dvp);
365
366	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
367		panic("the lock for deletion is unacquirable.");
368
369	if (lvp != NULLVP) {
370		vfslocked = VFS_LOCK_GIANT(lvp->v_mount);
371		vrele(lvp);
372		VFS_UNLOCK_GIANT(vfslocked);
373	}
374	if (uvp != NULLVP) {
375		vfslocked = VFS_LOCK_GIANT(uvp->v_mount);
376		vrele(uvp);
377		VFS_UNLOCK_GIANT(vfslocked);
378	}
379	if (dvp != NULLVP) {
380		vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
381		vrele(dvp);
382		VFS_UNLOCK_GIANT(vfslocked);
383		unp->un_dvp = NULLVP;
384	}
385	if (unp->un_path != NULL) {
386		free(unp->un_path, M_UNIONFSPATH);
387		unp->un_path = NULL;
388	}
389
390	if (unp->un_hashtbl != NULL) {
391		for (count = 0; count <= unp->un_hashmask; count++) {
392			hd = unp->un_hashtbl + count;
393			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
394				LIST_REMOVE(unp_t1, un_hash);
395				unp_t1->un_hash.le_next = NULL;
396				unp_t1->un_hash.le_prev = NULL;
397			}
398		}
399		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
400	}
401
402	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
403		LIST_REMOVE(unsp, uns_list);
404		free(unsp, M_TEMP);
405	}
406	free(unp, M_UNIONFSNODE);
407}
408
409/*
410 * Get the unionfs node status.
411 * You need exclusive lock this vnode.
412 */
413void
414unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
415			struct unionfs_node_status **unspp)
416{
417	struct unionfs_node_status *unsp;
418	pid_t pid = td->td_proc->p_pid;
419
420	KASSERT(NULL != unspp, ("null pointer"));
421	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
422
423	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
424		if (unsp->uns_pid == pid) {
425			*unspp = unsp;
426			return;
427		}
428	}
429
430	/* create a new unionfs node status */
431	unsp = malloc(sizeof(struct unionfs_node_status),
432	    M_TEMP, M_WAITOK | M_ZERO);
433
434	unsp->uns_pid = pid;
435	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
436
437	*unspp = unsp;
438}
439
440/*
441 * Remove the unionfs node status, if you can.
442 * You need exclusive lock this vnode.
443 */
444void
445unionfs_tryrem_node_status(struct unionfs_node *unp,
446			   struct unionfs_node_status *unsp)
447{
448	KASSERT(NULL != unsp, ("null pointer"));
449	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
450
451	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
452		return;
453
454	LIST_REMOVE(unsp, uns_list);
455	free(unsp, M_TEMP);
456}
457
458/*
459 * Create upper node attr.
460 */
461void
462unionfs_create_uppervattr_core(struct unionfs_mount *ump,
463			       struct vattr *lva,
464			       struct vattr *uva,
465			       struct thread *td)
466{
467	VATTR_NULL(uva);
468	uva->va_type = lva->va_type;
469	uva->va_atime = lva->va_atime;
470	uva->va_mtime = lva->va_mtime;
471	uva->va_ctime = lva->va_ctime;
472
473	switch (ump->um_copymode) {
474	case UNIONFS_TRANSPARENT:
475		uva->va_mode = lva->va_mode;
476		uva->va_uid = lva->va_uid;
477		uva->va_gid = lva->va_gid;
478		break;
479	case UNIONFS_MASQUERADE:
480		if (ump->um_uid == lva->va_uid) {
481			uva->va_mode = lva->va_mode & 077077;
482			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
483			uva->va_uid = lva->va_uid;
484			uva->va_gid = lva->va_gid;
485		} else {
486			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
487			uva->va_uid = ump->um_uid;
488			uva->va_gid = ump->um_gid;
489		}
490		break;
491	default:		/* UNIONFS_TRADITIONAL */
492		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
493		uva->va_uid = ump->um_uid;
494		uva->va_gid = ump->um_gid;
495		break;
496	}
497}
498
499/*
500 * Create upper node attr.
501 */
502int
503unionfs_create_uppervattr(struct unionfs_mount *ump,
504			  struct vnode *lvp,
505			  struct vattr *uva,
506			  struct ucred *cred,
507			  struct thread *td)
508{
509	int		error;
510	struct vattr	lva;
511
512	if ((error = VOP_GETATTR(lvp, &lva, cred)))
513		return (error);
514
515	unionfs_create_uppervattr_core(ump, &lva, uva, td);
516
517	return (error);
518}
519
520/*
521 * relookup
522 *
523 * dvp should be locked on entry and will be locked on return.
524 *
525 * If an error is returned, *vpp will be invalid, otherwise it will hold a
526 * locked, referenced vnode. If *vpp == dvp then remember that only one
527 * LK_EXCLUSIVE lock is held.
528 */
529int
530unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
531		 struct componentname *cnp, struct componentname *cn,
532		 struct thread *td, char *path, int pathlen, u_long nameiop)
533{
534	int	error;
535
536	cn->cn_namelen = pathlen;
537	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
538	bcopy(path, cn->cn_pnbuf, pathlen);
539	cn->cn_pnbuf[pathlen] = '\0';
540
541	cn->cn_nameiop = nameiop;
542	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
543	cn->cn_lkflags = LK_EXCLUSIVE;
544	cn->cn_thread = td;
545	cn->cn_cred = cnp->cn_cred;
546
547	cn->cn_nameptr = cn->cn_pnbuf;
548	cn->cn_consume = cnp->cn_consume;
549
550	if (nameiop == DELETE)
551		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
552	else if (RENAME == nameiop)
553		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
554
555	vref(dvp);
556	VOP_UNLOCK(dvp, LK_RELEASE);
557
558	if ((error = relookup(dvp, vpp, cn))) {
559		uma_zfree(namei_zone, cn->cn_pnbuf);
560		cn->cn_flags &= ~HASBUF;
561		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
562	} else
563		vrele(dvp);
564
565	return (error);
566}
567
568/*
569 * relookup for CREATE namei operation.
570 *
571 * dvp is unionfs vnode. dvp should be locked.
572 *
573 * If it called 'unionfs_copyfile' function by unionfs_link etc,
574 * VOP_LOOKUP information is broken.
575 * So it need relookup in order to create link etc.
576 */
577int
578unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
579			    struct thread *td)
580{
581	int	error;
582	struct vnode *udvp;
583	struct vnode *vp;
584	struct componentname cn;
585
586	udvp = UNIONFSVPTOUPPERVP(dvp);
587	vp = NULLVP;
588
589	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
590	    strlen(cnp->cn_nameptr), CREATE);
591	if (error)
592		return (error);
593
594	if (vp != NULLVP) {
595		if (udvp == vp)
596			vrele(vp);
597		else
598			vput(vp);
599
600		error = EEXIST;
601	}
602
603	if (cn.cn_flags & HASBUF) {
604		uma_zfree(namei_zone, cn.cn_pnbuf);
605		cn.cn_flags &= ~HASBUF;
606	}
607
608	if (!error) {
609		cn.cn_flags |= (cnp->cn_flags & HASBUF);
610		cnp->cn_flags = cn.cn_flags;
611	}
612
613	return (error);
614}
615
616/*
617 * relookup for DELETE namei operation.
618 *
619 * dvp is unionfs vnode. dvp should be locked.
620 */
621int
622unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
623			    struct thread *td)
624{
625	int	error;
626	struct vnode *udvp;
627	struct vnode *vp;
628	struct componentname cn;
629
630	udvp = UNIONFSVPTOUPPERVP(dvp);
631	vp = NULLVP;
632
633	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
634	    strlen(cnp->cn_nameptr), DELETE);
635	if (error)
636		return (error);
637
638	if (vp == NULLVP)
639		error = ENOENT;
640	else {
641		if (udvp == vp)
642			vrele(vp);
643		else
644			vput(vp);
645	}
646
647	if (cn.cn_flags & HASBUF) {
648		uma_zfree(namei_zone, cn.cn_pnbuf);
649		cn.cn_flags &= ~HASBUF;
650	}
651
652	if (!error) {
653		cn.cn_flags |= (cnp->cn_flags & HASBUF);
654		cnp->cn_flags = cn.cn_flags;
655	}
656
657	return (error);
658}
659
660/*
661 * relookup for RENAME namei operation.
662 *
663 * dvp is unionfs vnode. dvp should be locked.
664 */
665int
666unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
667			    struct thread *td)
668{
669	int error;
670	struct vnode *udvp;
671	struct vnode *vp;
672	struct componentname cn;
673
674	udvp = UNIONFSVPTOUPPERVP(dvp);
675	vp = NULLVP;
676
677	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
678	    strlen(cnp->cn_nameptr), RENAME);
679	if (error)
680		return (error);
681
682	if (vp != NULLVP) {
683		if (udvp == vp)
684			vrele(vp);
685		else
686			vput(vp);
687	}
688
689	if (cn.cn_flags & HASBUF) {
690		uma_zfree(namei_zone, cn.cn_pnbuf);
691		cn.cn_flags &= ~HASBUF;
692	}
693
694	if (!error) {
695		cn.cn_flags |= (cnp->cn_flags & HASBUF);
696		cnp->cn_flags = cn.cn_flags;
697	}
698
699	return (error);
700
701}
702
703/*
704 * Update the unionfs_node.
705 *
706 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
707 * uvp's lock and lower's lock will be unlocked.
708 */
709static void
710unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
711		    struct thread *td)
712{
713	unsigned	count, lockrec;
714	struct vnode   *vp;
715	struct vnode   *lvp;
716	struct vnode   *dvp;
717
718	vp = UNIONFSTOV(unp);
719	lvp = unp->un_lowervp;
720	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
721	dvp = unp->un_dvp;
722
723	/*
724	 * lock update
725	 */
726	VI_LOCK(vp);
727	unp->un_uppervp = uvp;
728	vp->v_vnlock = uvp->v_vnlock;
729	VI_UNLOCK(vp);
730	lockrec = lvp->v_vnlock->lk_recurse;
731	for (count = 0; count < lockrec; count++)
732		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
733
734	/*
735	 * cache update
736	 */
737	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
738		static struct unionfs_node_hashhead *hd;
739
740		VI_LOCK(dvp);
741		hd = unionfs_get_hashhead(dvp, unp->un_path);
742		LIST_REMOVE(unp, un_hash);
743		LIST_INSERT_HEAD(hd, unp, un_hash);
744		VI_UNLOCK(dvp);
745	}
746}
747
748/*
749 * Create a new shadow dir.
750 *
751 * udvp should be locked on entry and will be locked on return.
752 *
753 * If no error returned, unp will be updated.
754 */
755int
756unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
757		    struct unionfs_node *unp, struct componentname *cnp,
758		    struct thread *td)
759{
760	int		error;
761	struct vnode   *lvp;
762	struct vnode   *uvp;
763	struct vattr	va;
764	struct vattr	lva;
765	struct componentname cn;
766	struct mount   *mp;
767	struct ucred   *cred;
768	struct ucred   *credbk;
769	struct uidinfo *rootinfo;
770
771	if (unp->un_uppervp != NULLVP)
772		return (EEXIST);
773
774	lvp = unp->un_lowervp;
775	uvp = NULLVP;
776	credbk = cnp->cn_cred;
777
778	/* Authority change to root */
779	rootinfo = uifind((uid_t)0);
780	cred = crdup(cnp->cn_cred);
781	/*
782	 * The calls to chgproccnt() are needed to compensate for change_ruid()
783	 * calling chgproccnt().
784	 */
785	chgproccnt(cred->cr_ruidinfo, 1, 0);
786	change_euid(cred, rootinfo);
787	change_ruid(cred, rootinfo);
788	change_svuid(cred, (uid_t)0);
789	uifree(rootinfo);
790	cnp->cn_cred = cred;
791
792	memset(&cn, 0, sizeof(cn));
793
794	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
795		goto unionfs_mkshadowdir_abort;
796
797	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
798		goto unionfs_mkshadowdir_abort;
799	if (uvp != NULLVP) {
800		if (udvp == uvp)
801			vrele(uvp);
802		else
803			vput(uvp);
804
805		error = EEXIST;
806		goto unionfs_mkshadowdir_free_out;
807	}
808
809	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
810		goto unionfs_mkshadowdir_free_out;
811	unionfs_create_uppervattr_core(ump, &lva, &va, td);
812
813	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
814
815	if (!error) {
816		unionfs_node_update(unp, uvp, td);
817
818		/*
819		 * XXX The bug which cannot set uid/gid was corrected.
820		 * Ignore errors.
821		 */
822		va.va_type = VNON;
823		VOP_SETATTR(uvp, &va, cn.cn_cred);
824	}
825	vn_finished_write(mp);
826
827unionfs_mkshadowdir_free_out:
828	if (cn.cn_flags & HASBUF) {
829		uma_zfree(namei_zone, cn.cn_pnbuf);
830		cn.cn_flags &= ~HASBUF;
831	}
832
833unionfs_mkshadowdir_abort:
834	cnp->cn_cred = credbk;
835	chgproccnt(cred->cr_ruidinfo, -1, 0);
836	crfree(cred);
837
838	return (error);
839}
840
841/*
842 * Create a new whiteout.
843 *
844 * dvp should be locked on entry and will be locked on return.
845 */
846int
847unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
848		   struct thread *td, char *path)
849{
850	int		error;
851	struct vnode   *wvp;
852	struct componentname cn;
853	struct mount   *mp;
854
855	if (path == NULL)
856		path = cnp->cn_nameptr;
857
858	wvp = NULLVP;
859	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
860		return (error);
861	if (wvp != NULLVP) {
862		if (cn.cn_flags & HASBUF) {
863			uma_zfree(namei_zone, cn.cn_pnbuf);
864			cn.cn_flags &= ~HASBUF;
865		}
866		if (dvp == wvp)
867			vrele(wvp);
868		else
869			vput(wvp);
870
871		return (EEXIST);
872	}
873
874	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
875		goto unionfs_mkwhiteout_free_out;
876	error = VOP_WHITEOUT(dvp, &cn, CREATE);
877
878	vn_finished_write(mp);
879
880unionfs_mkwhiteout_free_out:
881	if (cn.cn_flags & HASBUF) {
882		uma_zfree(namei_zone, cn.cn_pnbuf);
883		cn.cn_flags &= ~HASBUF;
884	}
885
886	return (error);
887}
888
889/*
890 * Create a new vnode for create a new shadow file.
891 *
892 * If an error is returned, *vpp will be invalid, otherwise it will hold a
893 * locked, referenced and opened vnode.
894 *
895 * unp is never updated.
896 */
897static int
898unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
899			   struct unionfs_node *unp, struct vattr *uvap,
900			   struct thread *td)
901{
902	struct unionfs_mount *ump;
903	struct vnode   *vp;
904	struct vnode   *lvp;
905	struct ucred   *cred;
906	struct vattr	lva;
907	int		fmode;
908	int		error;
909	struct componentname cn;
910
911	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
912	vp = NULLVP;
913	lvp = unp->un_lowervp;
914	cred = td->td_ucred;
915	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
916	error = 0;
917
918	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
919		return (error);
920	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
921
922	if (unp->un_path == NULL)
923		panic("unionfs: un_path is null");
924
925	cn.cn_namelen = strlen(unp->un_path);
926	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
927	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
928	cn.cn_nameiop = CREATE;
929	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
930	cn.cn_lkflags = LK_EXCLUSIVE;
931	cn.cn_thread = td;
932	cn.cn_cred = cred;
933	cn.cn_nameptr = cn.cn_pnbuf;
934	cn.cn_consume = 0;
935
936	vref(udvp);
937	if ((error = relookup(udvp, &vp, &cn)) != 0)
938		goto unionfs_vn_create_on_upper_free_out2;
939	vrele(udvp);
940
941	if (vp != NULLVP) {
942		if (vp == udvp)
943			vrele(vp);
944		else
945			vput(vp);
946		error = EEXIST;
947		goto unionfs_vn_create_on_upper_free_out1;
948	}
949
950	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
951		goto unionfs_vn_create_on_upper_free_out1;
952
953	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
954		vput(vp);
955		goto unionfs_vn_create_on_upper_free_out1;
956	}
957	VOP_ADD_WRITECOUNT(vp, 1);
958	*vpp = vp;
959
960unionfs_vn_create_on_upper_free_out1:
961	VOP_UNLOCK(udvp, LK_RELEASE);
962
963unionfs_vn_create_on_upper_free_out2:
964	if (cn.cn_flags & HASBUF) {
965		uma_zfree(namei_zone, cn.cn_pnbuf);
966		cn.cn_flags &= ~HASBUF;
967	}
968
969	return (error);
970}
971
972/*
973 * Copy from lvp to uvp.
974 *
975 * lvp and uvp should be locked and opened on entry and will be locked and
976 * opened on return.
977 */
978static int
979unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
980		      struct ucred *cred, struct thread *td)
981{
982	int		error;
983	off_t		offset;
984	int		count;
985	int		bufoffset;
986	char           *buf;
987	struct uio	uio;
988	struct iovec	iov;
989
990	error = 0;
991	memset(&uio, 0, sizeof(uio));
992
993	uio.uio_td = td;
994	uio.uio_segflg = UIO_SYSSPACE;
995	uio.uio_offset = 0;
996
997	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
998
999	while (error == 0) {
1000		offset = uio.uio_offset;
1001
1002		uio.uio_iov = &iov;
1003		uio.uio_iovcnt = 1;
1004		iov.iov_base = buf;
1005		iov.iov_len = MAXBSIZE;
1006		uio.uio_resid = iov.iov_len;
1007		uio.uio_rw = UIO_READ;
1008
1009		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
1010			break;
1011		if ((count = MAXBSIZE - uio.uio_resid) == 0)
1012			break;
1013
1014		bufoffset = 0;
1015		while (bufoffset < count) {
1016			uio.uio_iov = &iov;
1017			uio.uio_iovcnt = 1;
1018			iov.iov_base = buf + bufoffset;
1019			iov.iov_len = count - bufoffset;
1020			uio.uio_offset = offset + bufoffset;
1021			uio.uio_resid = iov.iov_len;
1022			uio.uio_rw = UIO_WRITE;
1023
1024			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1025				break;
1026
1027			bufoffset += (count - bufoffset) - uio.uio_resid;
1028		}
1029
1030		uio.uio_offset = offset + bufoffset;
1031	}
1032
1033	free(buf, M_TEMP);
1034
1035	return (error);
1036}
1037
1038/*
1039 * Copy file from lower to upper.
1040 *
1041 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1042 * docopy.
1043 *
1044 * If no error returned, unp will be updated.
1045 */
1046int
1047unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1048		 struct thread *td)
1049{
1050	int		error;
1051	struct mount   *mp;
1052	struct vnode   *udvp;
1053	struct vnode   *lvp;
1054	struct vnode   *uvp;
1055	struct vattr	uva;
1056
1057	lvp = unp->un_lowervp;
1058	uvp = NULLVP;
1059
1060	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1061		return (EROFS);
1062	if (unp->un_dvp == NULLVP)
1063		return (EINVAL);
1064	if (unp->un_uppervp != NULLVP)
1065		return (EEXIST);
1066	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1067	if (udvp == NULLVP)
1068		return (EROFS);
1069	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1070		return (EROFS);
1071
1072	error = VOP_ACCESS(lvp, VREAD, cred, td);
1073	if (error != 0)
1074		return (error);
1075
1076	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1077		return (error);
1078	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1079	if (error != 0) {
1080		vn_finished_write(mp);
1081		return (error);
1082	}
1083
1084	if (docopy != 0) {
1085		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1086		if (error == 0) {
1087			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1088			VOP_CLOSE(lvp, FREAD, cred, td);
1089		}
1090	}
1091	VOP_CLOSE(uvp, FWRITE, cred, td);
1092	VOP_ADD_WRITECOUNT(uvp, -1);
1093
1094	vn_finished_write(mp);
1095
1096	if (error == 0) {
1097		/* Reset the attributes. Ignore errors. */
1098		uva.va_type = VNON;
1099		VOP_SETATTR(uvp, &uva, cred);
1100	}
1101
1102	unionfs_node_update(unp, uvp, td);
1103
1104	return (error);
1105}
1106
1107/*
1108 * It checks whether vp can rmdir. (check empty)
1109 *
1110 * vp is unionfs vnode.
1111 * vp should be locked.
1112 */
1113int
1114unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1115{
1116	int		error;
1117	int		eofflag;
1118	int		lookuperr;
1119	struct vnode   *uvp;
1120	struct vnode   *lvp;
1121	struct vnode   *tvp;
1122	struct vattr	va;
1123	struct componentname cn;
1124	/*
1125	 * The size of buf needs to be larger than DIRBLKSIZ.
1126	 */
1127	char		buf[256 * 6];
1128	struct dirent  *dp;
1129	struct dirent  *edp;
1130	struct uio	uio;
1131	struct iovec	iov;
1132
1133	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1134
1135	eofflag = 0;
1136	uvp = UNIONFSVPTOUPPERVP(vp);
1137	lvp = UNIONFSVPTOLOWERVP(vp);
1138
1139	/* check opaque */
1140	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1141		return (error);
1142	if (va.va_flags & OPAQUE)
1143		return (0);
1144
1145	/* open vnode */
1146#ifdef MAC
1147	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1148		return (error);
1149#endif
1150	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1151		return (error);
1152	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1153		return (error);
1154
1155	uio.uio_rw = UIO_READ;
1156	uio.uio_segflg = UIO_SYSSPACE;
1157	uio.uio_td = td;
1158	uio.uio_offset = 0;
1159
1160#ifdef MAC
1161	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1162#endif
1163	while (!error && !eofflag) {
1164		iov.iov_base = buf;
1165		iov.iov_len = sizeof(buf);
1166		uio.uio_iov = &iov;
1167		uio.uio_iovcnt = 1;
1168		uio.uio_resid = iov.iov_len;
1169
1170		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1171		if (error != 0)
1172			break;
1173		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1174#ifdef DIAGNOSTIC
1175			panic("bad readdir response from lower FS.");
1176#endif
1177			break;
1178		}
1179
1180		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1181		for (dp = (struct dirent*)buf; !error && dp < edp;
1182		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1183			if (dp->d_type == DT_WHT ||
1184			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1185			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1186				continue;
1187
1188			cn.cn_namelen = dp->d_namlen;
1189			cn.cn_pnbuf = NULL;
1190			cn.cn_nameptr = dp->d_name;
1191			cn.cn_nameiop = LOOKUP;
1192			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1193			cn.cn_lkflags = LK_EXCLUSIVE;
1194			cn.cn_thread = td;
1195			cn.cn_cred = cred;
1196			cn.cn_consume = 0;
1197
1198			/*
1199			 * check entry in lower.
1200			 * Sometimes, readdir function returns
1201			 * wrong entry.
1202			 */
1203			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1204
1205			if (!lookuperr)
1206				vput(tvp);
1207			else
1208				continue; /* skip entry */
1209
1210			/*
1211			 * check entry
1212			 * If it has no exist/whiteout entry in upper,
1213			 * directory is not empty.
1214			 */
1215			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1216			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1217
1218			if (!lookuperr)
1219				vput(tvp);
1220
1221			/* ignore exist or whiteout entry */
1222			if (!lookuperr ||
1223			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1224				continue;
1225
1226			error = ENOTEMPTY;
1227		}
1228	}
1229
1230	/* close vnode */
1231	VOP_CLOSE(vp, FREAD, cred, td);
1232
1233	return (error);
1234}
1235
1236#ifdef DIAGNOSTIC
1237
1238struct vnode   *
1239unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1240{
1241	struct unionfs_node *unp;
1242
1243	unp = VTOUNIONFS(vp);
1244
1245#ifdef notyet
1246	if (vp->v_op != unionfs_vnodeop_p) {
1247		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1248#ifdef KDB
1249		kdb_enter(KDB_WHY_UNIONFS,
1250		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1251#endif
1252		panic("unionfs_checkuppervp");
1253	};
1254#endif
1255	return (unp->un_uppervp);
1256}
1257
1258struct vnode   *
1259unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1260{
1261	struct unionfs_node *unp;
1262
1263	unp = VTOUNIONFS(vp);
1264
1265#ifdef notyet
1266	if (vp->v_op != unionfs_vnodeop_p) {
1267		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1268#ifdef KDB
1269		kdb_enter(KDB_WHY_UNIONFS,
1270		    "unionfs_checklowervp: on non-unionfs-node.\n");
1271#endif
1272		panic("unionfs_checklowervp");
1273	};
1274#endif
1275	return (unp->un_lowervp);
1276}
1277#endif
1278