1/*-
2 * Copyright (c) 1994 Jan-Simon Pendry
3 * Copyright (c) 1994
4 *	The Regents of the University of California.  All rights reserved.
5 * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
6 * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 4. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
36 * $FreeBSD$
37 */
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/malloc.h>
45#include <sys/mount.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/vnode.h>
49#include <sys/dirent.h>
50#include <sys/fcntl.h>
51#include <sys/filedesc.h>
52#include <sys/stat.h>
53#include <sys/resourcevar.h>
54
55#include <security/mac/mac_framework.h>
56
57#include <vm/uma.h>
58
59#include <fs/unionfs/union.h>
60
61#define NUNIONFSNODECACHE 16
62
63static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
64MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
65MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
66
67/*
68 * Initialize
69 */
70int
71unionfs_init(struct vfsconf *vfsp)
72{
73	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
74	return (0);
75}
76
77/*
78 * Uninitialize
79 */
80int
81unionfs_uninit(struct vfsconf *vfsp)
82{
83	return (0);
84}
85
86static struct unionfs_node_hashhead *
87unionfs_get_hashhead(struct vnode *dvp, char *path)
88{
89	int		count;
90	char		hash;
91	struct unionfs_node *unp;
92
93	hash = 0;
94	unp = VTOUNIONFS(dvp);
95	if (path != NULL) {
96		for (count = 0; path[count]; count++)
97			hash += path[count];
98	}
99
100	return (&(unp->un_hashtbl[hash & (unp->un_hashmask)]));
101}
102
103/*
104 * Get the cached vnode.
105 */
106static struct vnode *
107unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
108			struct vnode *dvp, char *path)
109{
110	struct unionfs_node_hashhead *hd;
111	struct unionfs_node *unp;
112	struct vnode   *vp;
113
114	KASSERT((uvp == NULLVP || uvp->v_type == VDIR),
115	    ("unionfs_get_cached_vnode: v_type != VDIR"));
116	KASSERT((lvp == NULLVP || lvp->v_type == VDIR),
117	    ("unionfs_get_cached_vnode: v_type != VDIR"));
118
119	VI_LOCK(dvp);
120	hd = unionfs_get_hashhead(dvp, path);
121	LIST_FOREACH(unp, hd, un_hash) {
122		if (!strcmp(unp->un_path, path)) {
123			vp = UNIONFSTOV(unp);
124			VI_LOCK_FLAGS(vp, MTX_DUPOK);
125			VI_UNLOCK(dvp);
126			vp->v_iflag &= ~VI_OWEINACT;
127			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
128				VI_UNLOCK(vp);
129				vp = NULLVP;
130			} else
131				VI_UNLOCK(vp);
132			return (vp);
133		}
134	}
135	VI_UNLOCK(dvp);
136
137	return (NULLVP);
138}
139
140/*
141 * Add the new vnode into cache.
142 */
143static struct vnode *
144unionfs_ins_cached_vnode(struct unionfs_node *uncp,
145			struct vnode *dvp, char *path)
146{
147	struct unionfs_node_hashhead *hd;
148	struct unionfs_node *unp;
149	struct vnode   *vp;
150
151	KASSERT((uncp->un_uppervp==NULLVP || uncp->un_uppervp->v_type==VDIR),
152	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
153	KASSERT((uncp->un_lowervp==NULLVP || uncp->un_lowervp->v_type==VDIR),
154	    ("unionfs_ins_cached_vnode: v_type != VDIR"));
155
156	VI_LOCK(dvp);
157	hd = unionfs_get_hashhead(dvp, path);
158	LIST_FOREACH(unp, hd, un_hash) {
159		if (!strcmp(unp->un_path, path)) {
160			vp = UNIONFSTOV(unp);
161			VI_LOCK_FLAGS(vp, MTX_DUPOK);
162			vp->v_iflag &= ~VI_OWEINACT;
163			if ((vp->v_iflag & (VI_DOOMED | VI_DOINGINACT)) != 0) {
164				LIST_INSERT_HEAD(hd, uncp, un_hash);
165				VI_UNLOCK(vp);
166				vp = NULLVP;
167			} else
168				VI_UNLOCK(vp);
169			VI_UNLOCK(dvp);
170			return (vp);
171		}
172	}
173
174	LIST_INSERT_HEAD(hd, uncp, un_hash);
175	VI_UNLOCK(dvp);
176
177	return (NULLVP);
178}
179
180/*
181 * Remove the vnode.
182 */
183static void
184unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
185{
186	KASSERT((unp != NULL), ("unionfs_rem_cached_vnode: null node"));
187	KASSERT((dvp != NULLVP),
188	    ("unionfs_rem_cached_vnode: null parent vnode"));
189	KASSERT((unp->un_hash.le_prev != NULL),
190	    ("unionfs_rem_cached_vnode: null hash"));
191
192	VI_LOCK(dvp);
193	LIST_REMOVE(unp, un_hash);
194	unp->un_hash.le_next = NULL;
195	unp->un_hash.le_prev = NULL;
196	VI_UNLOCK(dvp);
197}
198
199/*
200 * Make a new or get existing unionfs node.
201 *
202 * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
203 * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
204 * you should not lock plurality simultaneously.
205 */
206int
207unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
208		struct vnode *lowervp, struct vnode *dvp,
209		struct vnode **vpp, struct componentname *cnp,
210		struct thread *td)
211{
212	struct unionfs_mount *ump;
213	struct unionfs_node *unp;
214	struct vnode   *vp;
215	int		error;
216	int		lkflags;
217	enum vtype	vt;
218	char	       *path;
219
220	ump = MOUNTTOUNIONFSMOUNT(mp);
221	lkflags = (cnp ? cnp->cn_lkflags : 0);
222	path = (cnp ? cnp->cn_nameptr : NULL);
223	*vpp = NULLVP;
224
225	if (uppervp == NULLVP && lowervp == NULLVP)
226		panic("unionfs_nodeget: upper and lower is null");
227
228	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
229
230	/* If it has no ISLASTCN flag, path check is skipped. */
231	if (cnp && !(cnp->cn_flags & ISLASTCN))
232		path = NULL;
233
234	/* check the cache */
235	if (path != NULL && dvp != NULLVP && vt == VDIR) {
236		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp, path);
237		if (vp != NULLVP) {
238			vref(vp);
239			*vpp = vp;
240			goto unionfs_nodeget_out;
241		}
242	}
243
244	if ((uppervp == NULLVP || ump->um_uppervp != uppervp) ||
245	    (lowervp == NULLVP || ump->um_lowervp != lowervp)) {
246		/* dvp will be NULLVP only in case of root vnode. */
247		if (dvp == NULLVP)
248			return (EINVAL);
249	}
250	unp = malloc(sizeof(struct unionfs_node),
251	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
252
253	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
254	if (error != 0) {
255		free(unp, M_UNIONFSNODE);
256		return (error);
257	}
258	error = insmntque(vp, mp);	/* XXX: Too early for mpsafe fs */
259	if (error != 0) {
260		free(unp, M_UNIONFSNODE);
261		return (error);
262	}
263	if (dvp != NULLVP)
264		vref(dvp);
265	if (uppervp != NULLVP)
266		vref(uppervp);
267	if (lowervp != NULLVP)
268		vref(lowervp);
269
270	if (vt == VDIR)
271		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
272		    &(unp->un_hashmask));
273
274	unp->un_vnode = vp;
275	unp->un_uppervp = uppervp;
276	unp->un_lowervp = lowervp;
277	unp->un_dvp = dvp;
278	if (uppervp != NULLVP)
279		vp->v_vnlock = uppervp->v_vnlock;
280	else
281		vp->v_vnlock = lowervp->v_vnlock;
282
283	if (path != NULL) {
284		unp->un_path = (char *)
285		    malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO);
286		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
287		unp->un_path[cnp->cn_namelen] = '\0';
288	}
289	vp->v_type = vt;
290	vp->v_data = unp;
291
292	if ((uppervp != NULLVP && ump->um_uppervp == uppervp) &&
293	    (lowervp != NULLVP && ump->um_lowervp == lowervp))
294		vp->v_vflag |= VV_ROOT;
295
296	if (path != NULL && dvp != NULLVP && vt == VDIR)
297		*vpp = unionfs_ins_cached_vnode(unp, dvp, path);
298	if ((*vpp) != NULLVP) {
299		if (dvp != NULLVP)
300			vrele(dvp);
301		if (uppervp != NULLVP)
302			vrele(uppervp);
303		if (lowervp != NULLVP)
304			vrele(lowervp);
305
306		unp->un_uppervp = NULLVP;
307		unp->un_lowervp = NULLVP;
308		unp->un_dvp = NULLVP;
309		vrele(vp);
310		vp = *vpp;
311		vref(vp);
312	} else
313		*vpp = vp;
314
315unionfs_nodeget_out:
316	if (lkflags & LK_TYPE_MASK)
317		vn_lock(vp, lkflags | LK_RETRY);
318
319	return (0);
320}
321
322/*
323 * Clean up the unionfs node.
324 */
325void
326unionfs_noderem(struct vnode *vp, struct thread *td)
327{
328	int		count;
329	struct unionfs_node *unp, *unp_t1, *unp_t2;
330	struct unionfs_node_hashhead *hd;
331	struct unionfs_node_status *unsp, *unsp_tmp;
332	struct vnode   *lvp;
333	struct vnode   *uvp;
334	struct vnode   *dvp;
335
336	/*
337	 * Use the interlock to protect the clearing of v_data to
338	 * prevent faults in unionfs_lock().
339	 */
340	VI_LOCK(vp);
341	unp = VTOUNIONFS(vp);
342	lvp = unp->un_lowervp;
343	uvp = unp->un_uppervp;
344	dvp = unp->un_dvp;
345	unp->un_lowervp = unp->un_uppervp = NULLVP;
346	vp->v_vnlock = &(vp->v_lock);
347	vp->v_data = NULL;
348	vp->v_object = NULL;
349	VI_UNLOCK(vp);
350
351	if (lvp != NULLVP)
352		VOP_UNLOCK(lvp, LK_RELEASE);
353	if (uvp != NULLVP)
354		VOP_UNLOCK(uvp, LK_RELEASE);
355
356	if (dvp != NULLVP && unp->un_hash.le_prev != NULL)
357		unionfs_rem_cached_vnode(unp, dvp);
358
359	if (lockmgr(vp->v_vnlock, LK_EXCLUSIVE, VI_MTX(vp)) != 0)
360		panic("the lock for deletion is unacquirable.");
361
362	if (lvp != NULLVP)
363		vrele(lvp);
364	if (uvp != NULLVP)
365		vrele(uvp);
366	if (dvp != NULLVP) {
367		vrele(dvp);
368		unp->un_dvp = NULLVP;
369	}
370	if (unp->un_path != NULL) {
371		free(unp->un_path, M_UNIONFSPATH);
372		unp->un_path = NULL;
373	}
374
375	if (unp->un_hashtbl != NULL) {
376		for (count = 0; count <= unp->un_hashmask; count++) {
377			hd = unp->un_hashtbl + count;
378			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
379				LIST_REMOVE(unp_t1, un_hash);
380				unp_t1->un_hash.le_next = NULL;
381				unp_t1->un_hash.le_prev = NULL;
382			}
383		}
384		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, unp->un_hashmask);
385	}
386
387	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
388		LIST_REMOVE(unsp, uns_list);
389		free(unsp, M_TEMP);
390	}
391	free(unp, M_UNIONFSNODE);
392}
393
394/*
395 * Get the unionfs node status.
396 * You need exclusive lock this vnode.
397 */
398void
399unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
400			struct unionfs_node_status **unspp)
401{
402	struct unionfs_node_status *unsp;
403	pid_t pid = td->td_proc->p_pid;
404
405	KASSERT(NULL != unspp, ("null pointer"));
406	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
407
408	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
409		if (unsp->uns_pid == pid) {
410			*unspp = unsp;
411			return;
412		}
413	}
414
415	/* create a new unionfs node status */
416	unsp = malloc(sizeof(struct unionfs_node_status),
417	    M_TEMP, M_WAITOK | M_ZERO);
418
419	unsp->uns_pid = pid;
420	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
421
422	*unspp = unsp;
423}
424
425/*
426 * Remove the unionfs node status, if you can.
427 * You need exclusive lock this vnode.
428 */
429void
430unionfs_tryrem_node_status(struct unionfs_node *unp,
431			   struct unionfs_node_status *unsp)
432{
433	KASSERT(NULL != unsp, ("null pointer"));
434	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status");
435
436	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
437		return;
438
439	LIST_REMOVE(unsp, uns_list);
440	free(unsp, M_TEMP);
441}
442
443/*
444 * Create upper node attr.
445 */
446void
447unionfs_create_uppervattr_core(struct unionfs_mount *ump,
448			       struct vattr *lva,
449			       struct vattr *uva,
450			       struct thread *td)
451{
452	VATTR_NULL(uva);
453	uva->va_type = lva->va_type;
454	uva->va_atime = lva->va_atime;
455	uva->va_mtime = lva->va_mtime;
456	uva->va_ctime = lva->va_ctime;
457
458	switch (ump->um_copymode) {
459	case UNIONFS_TRANSPARENT:
460		uva->va_mode = lva->va_mode;
461		uva->va_uid = lva->va_uid;
462		uva->va_gid = lva->va_gid;
463		break;
464	case UNIONFS_MASQUERADE:
465		if (ump->um_uid == lva->va_uid) {
466			uva->va_mode = lva->va_mode & 077077;
467			uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700;
468			uva->va_uid = lva->va_uid;
469			uva->va_gid = lva->va_gid;
470		} else {
471			uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile);
472			uva->va_uid = ump->um_uid;
473			uva->va_gid = ump->um_gid;
474		}
475		break;
476	default:		/* UNIONFS_TRADITIONAL */
477		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
478		uva->va_uid = ump->um_uid;
479		uva->va_gid = ump->um_gid;
480		break;
481	}
482}
483
484/*
485 * Create upper node attr.
486 */
487int
488unionfs_create_uppervattr(struct unionfs_mount *ump,
489			  struct vnode *lvp,
490			  struct vattr *uva,
491			  struct ucred *cred,
492			  struct thread *td)
493{
494	int		error;
495	struct vattr	lva;
496
497	if ((error = VOP_GETATTR(lvp, &lva, cred)))
498		return (error);
499
500	unionfs_create_uppervattr_core(ump, &lva, uva, td);
501
502	return (error);
503}
504
505/*
506 * relookup
507 *
508 * dvp should be locked on entry and will be locked on return.
509 *
510 * If an error is returned, *vpp will be invalid, otherwise it will hold a
511 * locked, referenced vnode. If *vpp == dvp then remember that only one
512 * LK_EXCLUSIVE lock is held.
513 */
514int
515unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
516		 struct componentname *cnp, struct componentname *cn,
517		 struct thread *td, char *path, int pathlen, u_long nameiop)
518{
519	int	error;
520
521	cn->cn_namelen = pathlen;
522	cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
523	bcopy(path, cn->cn_pnbuf, pathlen);
524	cn->cn_pnbuf[pathlen] = '\0';
525
526	cn->cn_nameiop = nameiop;
527	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
528	cn->cn_lkflags = LK_EXCLUSIVE;
529	cn->cn_thread = td;
530	cn->cn_cred = cnp->cn_cred;
531
532	cn->cn_nameptr = cn->cn_pnbuf;
533	cn->cn_consume = cnp->cn_consume;
534
535	if (nameiop == DELETE)
536		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
537	else if (RENAME == nameiop)
538		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
539
540	vref(dvp);
541	VOP_UNLOCK(dvp, LK_RELEASE);
542
543	if ((error = relookup(dvp, vpp, cn))) {
544		uma_zfree(namei_zone, cn->cn_pnbuf);
545		cn->cn_flags &= ~HASBUF;
546		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
547	} else
548		vrele(dvp);
549
550	return (error);
551}
552
553/*
554 * relookup for CREATE namei operation.
555 *
556 * dvp is unionfs vnode. dvp should be locked.
557 *
558 * If it called 'unionfs_copyfile' function by unionfs_link etc,
559 * VOP_LOOKUP information is broken.
560 * So it need relookup in order to create link etc.
561 */
562int
563unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
564			    struct thread *td)
565{
566	int	error;
567	struct vnode *udvp;
568	struct vnode *vp;
569	struct componentname cn;
570
571	udvp = UNIONFSVPTOUPPERVP(dvp);
572	vp = NULLVP;
573
574	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
575	    strlen(cnp->cn_nameptr), CREATE);
576	if (error)
577		return (error);
578
579	if (vp != NULLVP) {
580		if (udvp == vp)
581			vrele(vp);
582		else
583			vput(vp);
584
585		error = EEXIST;
586	}
587
588	if (cn.cn_flags & HASBUF) {
589		uma_zfree(namei_zone, cn.cn_pnbuf);
590		cn.cn_flags &= ~HASBUF;
591	}
592
593	if (!error) {
594		cn.cn_flags |= (cnp->cn_flags & HASBUF);
595		cnp->cn_flags = cn.cn_flags;
596	}
597
598	return (error);
599}
600
601/*
602 * relookup for DELETE namei operation.
603 *
604 * dvp is unionfs vnode. dvp should be locked.
605 */
606int
607unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
608			    struct thread *td)
609{
610	int	error;
611	struct vnode *udvp;
612	struct vnode *vp;
613	struct componentname cn;
614
615	udvp = UNIONFSVPTOUPPERVP(dvp);
616	vp = NULLVP;
617
618	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
619	    strlen(cnp->cn_nameptr), DELETE);
620	if (error)
621		return (error);
622
623	if (vp == NULLVP)
624		error = ENOENT;
625	else {
626		if (udvp == vp)
627			vrele(vp);
628		else
629			vput(vp);
630	}
631
632	if (cn.cn_flags & HASBUF) {
633		uma_zfree(namei_zone, cn.cn_pnbuf);
634		cn.cn_flags &= ~HASBUF;
635	}
636
637	if (!error) {
638		cn.cn_flags |= (cnp->cn_flags & HASBUF);
639		cnp->cn_flags = cn.cn_flags;
640	}
641
642	return (error);
643}
644
645/*
646 * relookup for RENAME namei operation.
647 *
648 * dvp is unionfs vnode. dvp should be locked.
649 */
650int
651unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
652			    struct thread *td)
653{
654	int error;
655	struct vnode *udvp;
656	struct vnode *vp;
657	struct componentname cn;
658
659	udvp = UNIONFSVPTOUPPERVP(dvp);
660	vp = NULLVP;
661
662	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
663	    strlen(cnp->cn_nameptr), RENAME);
664	if (error)
665		return (error);
666
667	if (vp != NULLVP) {
668		if (udvp == vp)
669			vrele(vp);
670		else
671			vput(vp);
672	}
673
674	if (cn.cn_flags & HASBUF) {
675		uma_zfree(namei_zone, cn.cn_pnbuf);
676		cn.cn_flags &= ~HASBUF;
677	}
678
679	if (!error) {
680		cn.cn_flags |= (cnp->cn_flags & HASBUF);
681		cnp->cn_flags = cn.cn_flags;
682	}
683
684	return (error);
685
686}
687
688/*
689 * Update the unionfs_node.
690 *
691 * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
692 * uvp's lock and lower's lock will be unlocked.
693 */
694static void
695unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
696		    struct thread *td)
697{
698	unsigned	count, lockrec;
699	struct vnode   *vp;
700	struct vnode   *lvp;
701	struct vnode   *dvp;
702
703	vp = UNIONFSTOV(unp);
704	lvp = unp->un_lowervp;
705	ASSERT_VOP_ELOCKED(lvp, "unionfs_node_update");
706	dvp = unp->un_dvp;
707
708	/*
709	 * lock update
710	 */
711	VI_LOCK(vp);
712	unp->un_uppervp = uvp;
713	vp->v_vnlock = uvp->v_vnlock;
714	VI_UNLOCK(vp);
715	lockrec = lvp->v_vnlock->lk_recurse;
716	for (count = 0; count < lockrec; count++)
717		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
718
719	/*
720	 * cache update
721	 */
722	if (unp->un_path != NULL && dvp != NULLVP && vp->v_type == VDIR) {
723		static struct unionfs_node_hashhead *hd;
724
725		VI_LOCK(dvp);
726		hd = unionfs_get_hashhead(dvp, unp->un_path);
727		LIST_REMOVE(unp, un_hash);
728		LIST_INSERT_HEAD(hd, unp, un_hash);
729		VI_UNLOCK(dvp);
730	}
731}
732
733/*
734 * Create a new shadow dir.
735 *
736 * udvp should be locked on entry and will be locked on return.
737 *
738 * If no error returned, unp will be updated.
739 */
740int
741unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
742		    struct unionfs_node *unp, struct componentname *cnp,
743		    struct thread *td)
744{
745	int		error;
746	struct vnode   *lvp;
747	struct vnode   *uvp;
748	struct vattr	va;
749	struct vattr	lva;
750	struct componentname cn;
751	struct mount   *mp;
752	struct ucred   *cred;
753	struct ucred   *credbk;
754	struct uidinfo *rootinfo;
755
756	if (unp->un_uppervp != NULLVP)
757		return (EEXIST);
758
759	lvp = unp->un_lowervp;
760	uvp = NULLVP;
761	credbk = cnp->cn_cred;
762
763	/* Authority change to root */
764	rootinfo = uifind((uid_t)0);
765	cred = crdup(cnp->cn_cred);
766	/*
767	 * The calls to chgproccnt() are needed to compensate for change_ruid()
768	 * calling chgproccnt().
769	 */
770	chgproccnt(cred->cr_ruidinfo, 1, 0);
771	change_euid(cred, rootinfo);
772	change_ruid(cred, rootinfo);
773	change_svuid(cred, (uid_t)0);
774	uifree(rootinfo);
775	cnp->cn_cred = cred;
776
777	memset(&cn, 0, sizeof(cn));
778
779	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
780		goto unionfs_mkshadowdir_abort;
781
782	if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
783		goto unionfs_mkshadowdir_abort;
784	if (uvp != NULLVP) {
785		if (udvp == uvp)
786			vrele(uvp);
787		else
788			vput(uvp);
789
790		error = EEXIST;
791		goto unionfs_mkshadowdir_free_out;
792	}
793
794	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
795		goto unionfs_mkshadowdir_free_out;
796	unionfs_create_uppervattr_core(ump, &lva, &va, td);
797
798	error = VOP_MKDIR(udvp, &uvp, &cn, &va);
799
800	if (!error) {
801		unionfs_node_update(unp, uvp, td);
802
803		/*
804		 * XXX The bug which cannot set uid/gid was corrected.
805		 * Ignore errors.
806		 */
807		va.va_type = VNON;
808		VOP_SETATTR(uvp, &va, cn.cn_cred);
809	}
810	vn_finished_write(mp);
811
812unionfs_mkshadowdir_free_out:
813	if (cn.cn_flags & HASBUF) {
814		uma_zfree(namei_zone, cn.cn_pnbuf);
815		cn.cn_flags &= ~HASBUF;
816	}
817
818unionfs_mkshadowdir_abort:
819	cnp->cn_cred = credbk;
820	chgproccnt(cred->cr_ruidinfo, -1, 0);
821	crfree(cred);
822
823	return (error);
824}
825
826/*
827 * Create a new whiteout.
828 *
829 * dvp should be locked on entry and will be locked on return.
830 */
831int
832unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
833		   struct thread *td, char *path)
834{
835	int		error;
836	struct vnode   *wvp;
837	struct componentname cn;
838	struct mount   *mp;
839
840	if (path == NULL)
841		path = cnp->cn_nameptr;
842
843	wvp = NULLVP;
844	if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE)))
845		return (error);
846	if (wvp != NULLVP) {
847		if (cn.cn_flags & HASBUF) {
848			uma_zfree(namei_zone, cn.cn_pnbuf);
849			cn.cn_flags &= ~HASBUF;
850		}
851		if (dvp == wvp)
852			vrele(wvp);
853		else
854			vput(wvp);
855
856		return (EEXIST);
857	}
858
859	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
860		goto unionfs_mkwhiteout_free_out;
861	error = VOP_WHITEOUT(dvp, &cn, CREATE);
862
863	vn_finished_write(mp);
864
865unionfs_mkwhiteout_free_out:
866	if (cn.cn_flags & HASBUF) {
867		uma_zfree(namei_zone, cn.cn_pnbuf);
868		cn.cn_flags &= ~HASBUF;
869	}
870
871	return (error);
872}
873
874/*
875 * Create a new vnode for create a new shadow file.
876 *
877 * If an error is returned, *vpp will be invalid, otherwise it will hold a
878 * locked, referenced and opened vnode.
879 *
880 * unp is never updated.
881 */
882static int
883unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
884			   struct unionfs_node *unp, struct vattr *uvap,
885			   struct thread *td)
886{
887	struct unionfs_mount *ump;
888	struct vnode   *vp;
889	struct vnode   *lvp;
890	struct ucred   *cred;
891	struct vattr	lva;
892	int		fmode;
893	int		error;
894	struct componentname cn;
895
896	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
897	vp = NULLVP;
898	lvp = unp->un_lowervp;
899	cred = td->td_ucred;
900	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
901	error = 0;
902
903	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
904		return (error);
905	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
906
907	if (unp->un_path == NULL)
908		panic("unionfs: un_path is null");
909
910	cn.cn_namelen = strlen(unp->un_path);
911	cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
912	bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1);
913	cn.cn_nameiop = CREATE;
914	cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
915	cn.cn_lkflags = LK_EXCLUSIVE;
916	cn.cn_thread = td;
917	cn.cn_cred = cred;
918	cn.cn_nameptr = cn.cn_pnbuf;
919	cn.cn_consume = 0;
920
921	vref(udvp);
922	if ((error = relookup(udvp, &vp, &cn)) != 0)
923		goto unionfs_vn_create_on_upper_free_out2;
924	vrele(udvp);
925
926	if (vp != NULLVP) {
927		if (vp == udvp)
928			vrele(vp);
929		else
930			vput(vp);
931		error = EEXIST;
932		goto unionfs_vn_create_on_upper_free_out1;
933	}
934
935	if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0)
936		goto unionfs_vn_create_on_upper_free_out1;
937
938	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
939		vput(vp);
940		goto unionfs_vn_create_on_upper_free_out1;
941	}
942	VOP_ADD_WRITECOUNT(vp, 1);
943	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",  __func__, vp,
944	    vp->v_writecount);
945	*vpp = vp;
946
947unionfs_vn_create_on_upper_free_out1:
948	VOP_UNLOCK(udvp, LK_RELEASE);
949
950unionfs_vn_create_on_upper_free_out2:
951	if (cn.cn_flags & HASBUF) {
952		uma_zfree(namei_zone, cn.cn_pnbuf);
953		cn.cn_flags &= ~HASBUF;
954	}
955
956	return (error);
957}
958
959/*
960 * Copy from lvp to uvp.
961 *
962 * lvp and uvp should be locked and opened on entry and will be locked and
963 * opened on return.
964 */
965static int
966unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
967		      struct ucred *cred, struct thread *td)
968{
969	int		error;
970	off_t		offset;
971	int		count;
972	int		bufoffset;
973	char           *buf;
974	struct uio	uio;
975	struct iovec	iov;
976
977	error = 0;
978	memset(&uio, 0, sizeof(uio));
979
980	uio.uio_td = td;
981	uio.uio_segflg = UIO_SYSSPACE;
982	uio.uio_offset = 0;
983
984	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
985
986	while (error == 0) {
987		offset = uio.uio_offset;
988
989		uio.uio_iov = &iov;
990		uio.uio_iovcnt = 1;
991		iov.iov_base = buf;
992		iov.iov_len = MAXBSIZE;
993		uio.uio_resid = iov.iov_len;
994		uio.uio_rw = UIO_READ;
995
996		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
997			break;
998		if ((count = MAXBSIZE - uio.uio_resid) == 0)
999			break;
1000
1001		bufoffset = 0;
1002		while (bufoffset < count) {
1003			uio.uio_iov = &iov;
1004			uio.uio_iovcnt = 1;
1005			iov.iov_base = buf + bufoffset;
1006			iov.iov_len = count - bufoffset;
1007			uio.uio_offset = offset + bufoffset;
1008			uio.uio_resid = iov.iov_len;
1009			uio.uio_rw = UIO_WRITE;
1010
1011			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
1012				break;
1013
1014			bufoffset += (count - bufoffset) - uio.uio_resid;
1015		}
1016
1017		uio.uio_offset = offset + bufoffset;
1018	}
1019
1020	free(buf, M_TEMP);
1021
1022	return (error);
1023}
1024
1025/*
1026 * Copy file from lower to upper.
1027 *
1028 * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
1029 * docopy.
1030 *
1031 * If no error returned, unp will be updated.
1032 */
1033int
1034unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
1035		 struct thread *td)
1036{
1037	int		error;
1038	struct mount   *mp;
1039	struct vnode   *udvp;
1040	struct vnode   *lvp;
1041	struct vnode   *uvp;
1042	struct vattr	uva;
1043
1044	lvp = unp->un_lowervp;
1045	uvp = NULLVP;
1046
1047	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
1048		return (EROFS);
1049	if (unp->un_dvp == NULLVP)
1050		return (EINVAL);
1051	if (unp->un_uppervp != NULLVP)
1052		return (EEXIST);
1053	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
1054	if (udvp == NULLVP)
1055		return (EROFS);
1056	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
1057		return (EROFS);
1058
1059	error = VOP_ACCESS(lvp, VREAD, cred, td);
1060	if (error != 0)
1061		return (error);
1062
1063	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
1064		return (error);
1065	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
1066	if (error != 0) {
1067		vn_finished_write(mp);
1068		return (error);
1069	}
1070
1071	if (docopy != 0) {
1072		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
1073		if (error == 0) {
1074			error = unionfs_copyfile_core(lvp, uvp, cred, td);
1075			VOP_CLOSE(lvp, FREAD, cred, td);
1076		}
1077	}
1078	VOP_CLOSE(uvp, FWRITE, cred, td);
1079	VOP_ADD_WRITECOUNT(uvp, -1);
1080	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp,
1081	    uvp->v_writecount);
1082
1083	vn_finished_write(mp);
1084
1085	if (error == 0) {
1086		/* Reset the attributes. Ignore errors. */
1087		uva.va_type = VNON;
1088		VOP_SETATTR(uvp, &uva, cred);
1089	}
1090
1091	unionfs_node_update(unp, uvp, td);
1092
1093	return (error);
1094}
1095
1096/*
1097 * It checks whether vp can rmdir. (check empty)
1098 *
1099 * vp is unionfs vnode.
1100 * vp should be locked.
1101 */
1102int
1103unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
1104{
1105	int		error;
1106	int		eofflag;
1107	int		lookuperr;
1108	struct vnode   *uvp;
1109	struct vnode   *lvp;
1110	struct vnode   *tvp;
1111	struct vattr	va;
1112	struct componentname cn;
1113	/*
1114	 * The size of buf needs to be larger than DIRBLKSIZ.
1115	 */
1116	char		buf[256 * 6];
1117	struct dirent  *dp;
1118	struct dirent  *edp;
1119	struct uio	uio;
1120	struct iovec	iov;
1121
1122	ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir");
1123
1124	eofflag = 0;
1125	uvp = UNIONFSVPTOUPPERVP(vp);
1126	lvp = UNIONFSVPTOLOWERVP(vp);
1127
1128	/* check opaque */
1129	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
1130		return (error);
1131	if (va.va_flags & OPAQUE)
1132		return (0);
1133
1134	/* open vnode */
1135#ifdef MAC
1136	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
1137		return (error);
1138#endif
1139	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
1140		return (error);
1141	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
1142		return (error);
1143
1144	uio.uio_rw = UIO_READ;
1145	uio.uio_segflg = UIO_SYSSPACE;
1146	uio.uio_td = td;
1147	uio.uio_offset = 0;
1148
1149#ifdef MAC
1150	error = mac_vnode_check_readdir(td->td_ucred, lvp);
1151#endif
1152	while (!error && !eofflag) {
1153		iov.iov_base = buf;
1154		iov.iov_len = sizeof(buf);
1155		uio.uio_iov = &iov;
1156		uio.uio_iovcnt = 1;
1157		uio.uio_resid = iov.iov_len;
1158
1159		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
1160		if (error != 0)
1161			break;
1162		if (eofflag == 0 && uio.uio_resid == sizeof(buf)) {
1163#ifdef DIAGNOSTIC
1164			panic("bad readdir response from lower FS.");
1165#endif
1166			break;
1167		}
1168
1169		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
1170		for (dp = (struct dirent*)buf; !error && dp < edp;
1171		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
1172			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
1173			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1174			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
1175				continue;
1176
1177			cn.cn_namelen = dp->d_namlen;
1178			cn.cn_pnbuf = NULL;
1179			cn.cn_nameptr = dp->d_name;
1180			cn.cn_nameiop = LOOKUP;
1181			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1182			cn.cn_lkflags = LK_EXCLUSIVE;
1183			cn.cn_thread = td;
1184			cn.cn_cred = cred;
1185			cn.cn_consume = 0;
1186
1187			/*
1188			 * check entry in lower.
1189			 * Sometimes, readdir function returns
1190			 * wrong entry.
1191			 */
1192			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
1193
1194			if (!lookuperr)
1195				vput(tvp);
1196			else
1197				continue; /* skip entry */
1198
1199			/*
1200			 * check entry
1201			 * If it has no exist/whiteout entry in upper,
1202			 * directory is not empty.
1203			 */
1204			cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN);
1205			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
1206
1207			if (!lookuperr)
1208				vput(tvp);
1209
1210			/* ignore exist or whiteout entry */
1211			if (!lookuperr ||
1212			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
1213				continue;
1214
1215			error = ENOTEMPTY;
1216		}
1217	}
1218
1219	/* close vnode */
1220	VOP_CLOSE(vp, FREAD, cred, td);
1221
1222	return (error);
1223}
1224
1225#ifdef DIAGNOSTIC
1226
1227struct vnode   *
1228unionfs_checkuppervp(struct vnode *vp, char *fil, int lno)
1229{
1230	struct unionfs_node *unp;
1231
1232	unp = VTOUNIONFS(vp);
1233
1234#ifdef notyet
1235	if (vp->v_op != unionfs_vnodeop_p) {
1236		printf("unionfs_checkuppervp: on non-unionfs-node.\n");
1237#ifdef KDB
1238		kdb_enter(KDB_WHY_UNIONFS,
1239		    "unionfs_checkuppervp: on non-unionfs-node.\n");
1240#endif
1241		panic("unionfs_checkuppervp");
1242	};
1243#endif
1244	return (unp->un_uppervp);
1245}
1246
1247struct vnode   *
1248unionfs_checklowervp(struct vnode *vp, char *fil, int lno)
1249{
1250	struct unionfs_node *unp;
1251
1252	unp = VTOUNIONFS(vp);
1253
1254#ifdef notyet
1255	if (vp->v_op != unionfs_vnodeop_p) {
1256		printf("unionfs_checklowervp: on non-unionfs-node.\n");
1257#ifdef KDB
1258		kdb_enter(KDB_WHY_UNIONFS,
1259		    "unionfs_checklowervp: on non-unionfs-node.\n");
1260#endif
1261		panic("unionfs_checklowervp");
1262	};
1263#endif
1264	return (unp->un_lowervp);
1265}
1266#endif
1267