union_subr.c revision 22975
1176669Spiso/*
2176669Spiso * Copyright (c) 1994 Jan-Simon Pendry
3176669Spiso * Copyright (c) 1994
4176669Spiso *	The Regents of the University of California.  All rights reserved.
5176669Spiso *
6176669Spiso * This code is derived from software contributed to Berkeley by
7176669Spiso * Jan-Simon Pendry.
8176669Spiso *
9176669Spiso * Redistribution and use in source and binary forms, with or without
10176669Spiso * modification, are permitted provided that the following conditions
11176669Spiso * are met:
12176669Spiso * 1. Redistributions of source code must retain the above copyright
13176669Spiso *    notice, this list of conditions and the following disclaimer.
14176669Spiso * 2. Redistributions in binary form must reproduce the above copyright
15176669Spiso *    notice, this list of conditions and the following disclaimer in the
16176669Spiso *    documentation and/or other materials provided with the distribution.
17176669Spiso * 3. All advertising materials mentioning features or use of this software
18176669Spiso *    must display the following acknowledgement:
19176669Spiso *	This product includes software developed by the University of
20176669Spiso *	California, Berkeley and its contributors.
21176669Spiso * 4. Neither the name of the University nor the names of its contributors
22176669Spiso *    may be used to endorse or promote products derived from this software
23176669Spiso *    without specific prior written permission.
24176669Spiso *
25176669Spiso * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26176669Spiso * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27176669Spiso * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28176669Spiso * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29176669Spiso * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30176669Spiso * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31176669Spiso * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32176669Spiso * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33176669Spiso * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34176669Spiso * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35176669Spiso * SUCH DAMAGE.
36176669Spiso *
37176669Spiso *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
38176669Spiso * $Id$
39200580Sluigi */
40200580Sluigi
41176669Spiso#include <sys/param.h>
42176669Spiso#include <sys/systm.h>
43176669Spiso#include <sys/time.h>
44176669Spiso#include <sys/kernel.h>
45176669Spiso#include <sys/vnode.h>
46176669Spiso#include <sys/namei.h>
47176669Spiso#include <sys/malloc.h>
48176669Spiso#include <sys/file.h>
49176669Spiso#include <sys/filedesc.h>
50176669Spiso#include <sys/queue.h>
51176669Spiso#include <sys/mount.h>
52243401Sglebius#include <sys/stat.h>
53243401Sglebius#include <vm/vm.h>
54176669Spiso#include <vm/vm_extern.h>	/* for vnode_pager_setsize */
55176669Spiso#include <miscfs/union/union.h>
56255395Strociny
57176669Spiso#include <sys/proc.h>
58200909Sluigi
59176669Spisoextern int	union_init __P((void));
60176669Spiso
61176669Spiso/* must be power of two, otherwise change UNION_HASH() */
62176669Spiso#define NHASH 32
63200897Sluigi
64176669Spiso/* unsigned int ... */
65255395Strociny#define UNION_HASH(u, l) \
66255395Strociny	(((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
67200897Sluigi
68200909Sluigistatic LIST_HEAD(unhead, union_node) unhead[NHASH];
69176669Spisostatic int unvplock[NHASH];
70200897Sluigi
71176669Spisostatic void	union_dircache_r __P((struct vnode *vp, struct vnode ***vppp,
72200909Sluigi				      int *cntp));
73200909Sluigistatic int	union_list_lock __P((int ix));
74200909Sluigistatic void	union_list_unlock __P((int ix));
75200909Sluigistatic int	union_relookup __P((struct union_mount *um, struct vnode *dvp,
76200909Sluigi				    struct vnode **vpp,
77200909Sluigi				    struct componentname *cnp,
78200909Sluigi				    struct componentname *cn, char *path,
79200909Sluigi				    int pathlen));
80200909Sluigiextern void	union_updatevp __P((struct union_node *un,
81200909Sluigi				    struct vnode *uppervp,
82200909Sluigi				    struct vnode *lowervp));
83176669Spiso
84200909Sluigiint
85176669Spisounion_init()
86200909Sluigi{
87176669Spiso	int i;
88176669Spiso
89200897Sluigi	for (i = 0; i < NHASH; i++)
90200897Sluigi		LIST_INIT(&unhead[i]);
91200897Sluigi	bzero((caddr_t) unvplock, sizeof(unvplock));
92176669Spiso	return (0);
93200897Sluigi}
94176669Spiso
95200897Sluigistatic int
96200909Sluigiunion_list_lock(ix)
97176669Spiso	int ix;
98200897Sluigi{
99200897Sluigi
100200909Sluigi	if (unvplock[ix] & UN_LOCKED) {
101200909Sluigi		unvplock[ix] |= UN_WANT;
102200897Sluigi		(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
103200897Sluigi		return (1);
104176669Spiso	}
105176669Spiso
106176669Spiso	unvplock[ix] |= UN_LOCKED;
107176669Spiso
108176669Spiso	return (0);
109176669Spiso}
110176669Spiso
111176669Spisostatic void
112176669Spisounion_list_unlock(ix)
113176669Spiso	int ix;
114176669Spiso{
115176669Spiso
116176669Spiso	unvplock[ix] &= ~UN_LOCKED;
117176669Spiso
118176669Spiso	if (unvplock[ix] & UN_WANT) {
119176669Spiso		unvplock[ix] &= ~UN_WANT;
120176669Spiso		wakeup((caddr_t) &unvplock[ix]);
121176669Spiso	}
122176669Spiso}
123176669Spiso
124176669Spisovoid
125176669Spisounion_updatevp(un, uppervp, lowervp)
126176669Spiso	struct union_node *un;
127176669Spiso	struct vnode *uppervp;
128176669Spiso	struct vnode *lowervp;
129176669Spiso{
130176669Spiso	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
131176669Spiso	int nhash = UNION_HASH(uppervp, lowervp);
132176669Spiso	int docache = (lowervp != NULLVP || uppervp != NULLVP);
133176669Spiso	int lhash, hhash, uhash;
134176669Spiso
135176669Spiso	/*
136200909Sluigi	 * Ensure locking is ordered from lower to higher
137176669Spiso	 * to avoid deadlocks.
138200909Sluigi	 */
139176669Spiso	if (nhash < ohash) {
140176669Spiso		lhash = nhash;
141176669Spiso		uhash = ohash;
142176669Spiso	} else {
143220837Sglebius		lhash = ohash;
144176669Spiso		uhash = nhash;
145176669Spiso	}
146176669Spiso
147176669Spiso	if (lhash != uhash)
148176669Spiso		while (union_list_lock(lhash))
149176669Spiso			continue;
150176669Spiso
151176669Spiso	while (union_list_lock(uhash))
152176669Spiso		continue;
153176669Spiso
154176669Spiso	if (ohash != nhash || !docache) {
155176669Spiso		if (un->un_flags & UN_CACHED) {
156176669Spiso			un->un_flags &= ~UN_CACHED;
157176669Spiso			LIST_REMOVE(un, un_cache);
158176669Spiso		}
159176669Spiso	}
160176669Spiso
161176669Spiso	if (ohash != nhash)
162176669Spiso		union_list_unlock(ohash);
163176669Spiso
164176669Spiso	if (un->un_lowervp != lowervp) {
165176669Spiso		if (un->un_lowervp) {
166176669Spiso			vrele(un->un_lowervp);
167176669Spiso			if (un->un_path) {
168176669Spiso				free(un->un_path, M_TEMP);
169176669Spiso				un->un_path = 0;
170176669Spiso			}
171200909Sluigi			if (un->un_dirvp) {
172176669Spiso				vrele(un->un_dirvp);
173176669Spiso				un->un_dirvp = NULLVP;
174176669Spiso			}
175176669Spiso		}
176176669Spiso		un->un_lowervp = lowervp;
177176669Spiso		un->un_lowersz = VNOVAL;
178176669Spiso	}
179176669Spiso
180176669Spiso	if (un->un_uppervp != uppervp) {
181176669Spiso		if (un->un_uppervp)
182176669Spiso			vrele(un->un_uppervp);
183176669Spiso
184176669Spiso		un->un_uppervp = uppervp;
185200909Sluigi		un->un_uppersz = VNOVAL;
186176669Spiso	}
187200909Sluigi
188200909Sluigi	if (docache && (ohash != nhash)) {
189200909Sluigi		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
190200909Sluigi		un->un_flags |= UN_CACHED;
191200909Sluigi	}
192200909Sluigi
193200909Sluigi	union_list_unlock(nhash);
194200909Sluigi}
195200909Sluigi
196200909Sluigivoid
197200909Sluigiunion_newlower(un, lowervp)
198200909Sluigi	struct union_node *un;
199200909Sluigi	struct vnode *lowervp;
200200909Sluigi{
201176669Spiso
202200897Sluigi	union_updatevp(un, un->un_uppervp, lowervp);
203176669Spiso}
204176669Spiso
205176669Spisovoid
206244569Smelifarounion_newupper(un, uppervp)
207244569Smelifaro	struct union_node *un;
208244569Smelifaro	struct vnode *uppervp;
209244569Smelifaro{
210244569Smelifaro
211244569Smelifaro	union_updatevp(un, uppervp, un->un_lowervp);
212244569Smelifaro}
213176669Spiso
214176669Spiso/*
215176669Spiso * Keep track of size changes in the underlying vnodes.
216176669Spiso * If the size changes, then callback to the vm layer
217176669Spiso * giving priority to the upper layer size.
218176669Spiso */
219223080Saevoid
220223080Saeunion_newsize(vp, uppersz, lowersz)
221176669Spiso	struct vnode *vp;
222176669Spiso	off_t uppersz, lowersz;
223176669Spiso{
224176669Spiso	struct union_node *un;
225200909Sluigi	off_t sz;
226200909Sluigi
227200909Sluigi	/* only interested in regular files */
228200909Sluigi	if (vp->v_type != VREG)
229200909Sluigi		return;
230176669Spiso
231176669Spiso	un = VTOUNION(vp);
232200909Sluigi	sz = VNOVAL;
233176669Spiso
234176669Spiso	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
235200909Sluigi		un->un_uppersz = uppersz;
236200909Sluigi		if (sz == VNOVAL)
237200909Sluigi			sz = un->un_uppersz;
238200909Sluigi	}
239176669Spiso
240200909Sluigi	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
241200909Sluigi		un->un_lowersz = lowersz;
242200909Sluigi		if (sz == VNOVAL)
243200909Sluigi			sz = un->un_lowersz;
244200909Sluigi	}
245200909Sluigi
246176669Spiso	if (sz != VNOVAL) {
247200909Sluigi#ifdef UNION_DIAGNOSTIC
248200909Sluigi		printf("union: %s size now %ld\n",
249200909Sluigi			uppersz != VNOVAL ? "upper" : "lower", (long) sz);
250200909Sluigi#endif
251200909Sluigi		vnode_pager_setsize(vp, sz);
252200909Sluigi	}
253200909Sluigi}
254200909Sluigi
255200909Sluigi/*
256200909Sluigi * allocate a union_node/vnode pair.  the vnode is
257200909Sluigi * referenced and locked.  the new vnode is returned
258200909Sluigi * via (vpp).  (mp) is the mountpoint of the union filesystem,
259200909Sluigi * (dvp) is the parent directory where the upper layer object
260176669Spiso * should exist (but doesn't) and (cnp) is the componentname
261176669Spiso * information which is partially copied to allow the upper
262176669Spiso * layer object to be created at a later time.  (uppervp)
263176669Spiso * and (lowervp) reference the upper and lower layer objects
264200909Sluigi * being mapped.  either, but not both, can be nil.
265200909Sluigi * if supplied, (uppervp) is locked.
266176669Spiso * the reference is either maintained in the new union_node
267176669Spiso * object which is allocated, or they are vrele'd.
268176669Spiso *
269222806Sae * all union_nodes are maintained on a singly-linked
270223080Sae * list.  new nodes are only allocated when they cannot
271223080Sae * be found on this list.  entries on the list are
272223080Sae * removed when the vfs reclaim entry is called.
273223080Sae *
274223080Sae * a single lock is kept for the entire list.  this is
275223080Sae * needed because the getnewvnode() function can block
276223080Sae * waiting for a vnode to become free, in which case there
277223080Sae * may be more than one process trying to get the same
278223080Sae * vnode.  this lock is only taken if we are going to
279223080Sae * call getnewvnode, since the kernel itself is single-threaded.
280244569Smelifaro *
281223080Sae * if an entry is found on the list, then call vget() to
282223080Sae * take a reference.  this is done because there may be
283223080Sae * zero references to it and so it needs to removed from
284223080Sae * the vnode free list.
285223080Sae */
286223080Saeint
287223080Saeunion_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
288223080Sae	struct vnode **vpp;
289223080Sae	struct mount *mp;
290223080Sae	struct vnode *undvp;		/* parent union vnode */
291223080Sae	struct vnode *dvp;		/* may be null */
292223080Sae	struct componentname *cnp;	/* may be null */
293223080Sae	struct vnode *uppervp;		/* may be null */
294223080Sae	struct vnode *lowervp;		/* may be null */
295223080Sae	int docache;
296223080Sae{
297223080Sae	int error;
298223080Sae	struct union_node *un = 0;
299223080Sae	struct vnode *xlowervp = NULLVP;
300223080Sae	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
301223080Sae	int hash;
302223080Sae	int vflag;
303223080Sae	int try;
304223080Sae
305223080Sae	if (uppervp == NULLVP && lowervp == NULLVP)
306223080Sae		panic("union: unidentifiable allocation");
307222806Sae
308222806Sae	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
309222806Sae		xlowervp = lowervp;
310222806Sae		lowervp = NULLVP;
311222806Sae	}
312222806Sae
313222806Sae	/* detect the root vnode (and aliases) */
314222806Sae	vflag = 0;
315222806Sae	if ((uppervp == um->um_uppervp) &&
316222806Sae	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
317222806Sae		if (lowervp == NULLVP) {
318223080Sae			lowervp = um->um_lowervp;
319176669Spiso			if (lowervp != NULLVP)
320176669Spiso				VREF(lowervp);
321176669Spiso		}
322176669Spiso		vflag = VROOT;
323176669Spiso	}
324222806Sae
325222806Saeloop:
326232171Sae	if (!docache) {
327200909Sluigi		un = 0;
328176669Spiso	} else for (try = 0; try < 3; try++) {
329200909Sluigi		switch (try) {
330200909Sluigi		case 0:
331200909Sluigi			if (lowervp == NULLVP)
332176669Spiso				continue;
333176669Spiso			hash = UNION_HASH(uppervp, lowervp);
334200909Sluigi			break;
335176669Spiso
336200909Sluigi		case 1:
337176669Spiso			if (uppervp == NULLVP)
338176669Spiso				continue;
339200909Sluigi			hash = UNION_HASH(uppervp, NULLVP);
340176669Spiso			break;
341176669Spiso
342176669Spiso		case 2:
343176669Spiso			if (lowervp == NULLVP)
344176669Spiso				continue;
345176669Spiso			hash = UNION_HASH(NULLVP, lowervp);
346176669Spiso			break;
347176669Spiso		}
348212256Sglebius
349212256Sglebius		while (union_list_lock(hash))
350212256Sglebius			continue;
351200909Sluigi
352176669Spiso		for (un = unhead[hash].lh_first; un != 0;
353176669Spiso					un = un->un_cache.le_next) {
354176669Spiso			if ((un->un_lowervp == lowervp ||
355200909Sluigi			     un->un_lowervp == NULLVP) &&
356200909Sluigi			    (un->un_uppervp == uppervp ||
357200909Sluigi			     un->un_uppervp == NULLVP) &&
358176669Spiso			    (UNIONTOV(un)->v_mount == mp)) {
359176669Spiso				if (vget(UNIONTOV(un), 0,
360176669Spiso				    cnp ? cnp->cn_proc : NULL)) {
361200909Sluigi					union_list_unlock(hash);
362176669Spiso					goto loop;
363176669Spiso				}
364176669Spiso				break;
365176669Spiso			}
366176669Spiso		}
367200909Sluigi
368176669Spiso		union_list_unlock(hash);
369200909Sluigi
370176669Spiso		if (un)
371200909Sluigi			break;
372200909Sluigi	}
373176669Spiso
374200909Sluigi	if (un) {
375176669Spiso		/*
376212256Sglebius		 * Obtain a lock on the union_node.
377176669Spiso		 * uppervp is locked, though un->un_uppervp
378176669Spiso		 * may not be.  this doesn't break the locking
379176669Spiso		 * hierarchy since in the case that un->un_uppervp
380176669Spiso		 * is not yet locked it will be vrele'd and replaced
381176669Spiso		 * with uppervp.
382200580Sluigi		 */
383200580Sluigi
384200580Sluigi		if ((dvp != NULLVP) && (uppervp == dvp)) {
385200580Sluigi			/*
386200580Sluigi			 * Access ``.'', so (un) will already
387200909Sluigi			 * be locked.  Since this process has
388200909Sluigi			 * the lock on (uppervp) no other
389200909Sluigi			 * process can hold the lock on (un).
390200909Sluigi			 */
391200580Sluigi#ifdef DIAGNOSTIC
392200580Sluigi			if ((un->un_flags & UN_LOCKED) == 0)
393200580Sluigi				panic("union: . not locked");
394200909Sluigi			else if (curproc && un->un_pid != curproc->p_pid &&
395176669Spiso				    un->un_pid > -1 && curproc->p_pid > -1)
396176669Spiso				panic("union: allocvp not lock owner");
397220837Sglebius#endif
398176669Spiso		} else {
399200897Sluigi			if (un->un_flags & UN_LOCKED) {
400220914Sglebius				vrele(UNIONTOV(un));
401220914Sglebius				un->un_flags |= UN_WANT;
402176669Spiso				(void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
403220837Sglebius				goto loop;
404220837Sglebius			}
405220837Sglebius			un->un_flags |= UN_LOCKED;
406220837Sglebius
407176669Spiso#ifdef DIAGNOSTIC
408220837Sglebius			if (curproc)
409220837Sglebius				un->un_pid = curproc->p_pid;
410220837Sglebius			else
411220837Sglebius				un->un_pid = -1;
412220837Sglebius#endif
413220837Sglebius		}
414200909Sluigi
415176669Spiso		/*
416176669Spiso		 * At this point, the union_node is locked,
417200897Sluigi		 * un->un_uppervp may not be locked, and uppervp
418220837Sglebius		 * is locked or nil.
419220837Sglebius		 */
420176669Spiso
421220800Sglebius		/*
422176669Spiso		 * Save information about the upper layer.
423220800Sglebius		 */
424176669Spiso		if (uppervp != un->un_uppervp) {
425176669Spiso			union_newupper(un, uppervp);
426176669Spiso		} else if (uppervp) {
427220837Sglebius			vrele(uppervp);
428200897Sluigi		}
429220837Sglebius
430220800Sglebius		if (un->un_uppervp) {
431176669Spiso			un->un_flags |= UN_ULOCK;
432176669Spiso			un->un_flags &= ~UN_KLOCK;
433200909Sluigi		}
434176669Spiso
435176669Spiso		/*
436220837Sglebius		 * Save information about the lower layer.
437200909Sluigi		 * This needs to keep track of pathname
438200909Sluigi		 * and directory information which union_vn_create
439200909Sluigi		 * might need.
440176669Spiso		 */
441176669Spiso		if (lowervp != un->un_lowervp) {
442220837Sglebius			union_newlower(un, lowervp);
443220837Sglebius			if (cnp && (lowervp != NULLVP)) {
444220837Sglebius				un->un_hash = cnp->cn_hash;
445220837Sglebius				un->un_path = malloc(cnp->cn_namelen+1,
446176669Spiso						M_TEMP, M_WAITOK);
447220837Sglebius				bcopy(cnp->cn_nameptr, un->un_path,
448176669Spiso						cnp->cn_namelen);
449200909Sluigi				un->un_path[cnp->cn_namelen] = '\0';
450176669Spiso				VREF(dvp);
451176669Spiso				un->un_dirvp = dvp;
452176669Spiso			}
453176669Spiso		} else if (lowervp) {
454176669Spiso			vrele(lowervp);
455176669Spiso		}
456220837Sglebius		*vpp = UNIONTOV(un);
457200897Sluigi		return (0);
458220837Sglebius	}
459220837Sglebius
460220837Sglebius	if (docache) {
461220837Sglebius		/*
462200897Sluigi		 * otherwise lock the vp list while we call getnewvnode
463220837Sglebius		 * since that can block.
464200897Sluigi		 */
465220837Sglebius		hash = UNION_HASH(uppervp, lowervp);
466220837Sglebius
467220837Sglebius		if (union_list_lock(hash))
468220837Sglebius			goto loop;
469176669Spiso	}
470176669Spiso
471176669Spiso	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
472176669Spiso	if (error) {
473176669Spiso		if (uppervp) {
474176669Spiso			if (dvp == uppervp)
475200897Sluigi				vrele(uppervp);
476176669Spiso			else
477200909Sluigi				vput(uppervp);
478176669Spiso		}
479200909Sluigi		if (lowervp)
480200897Sluigi			vrele(lowervp);
481200909Sluigi
482176669Spiso		goto out;
483200897Sluigi	}
484176669Spiso
485176669Spiso	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
486200897Sluigi		M_TEMP, M_WAITOK);
487200897Sluigi
488200897Sluigi	(*vpp)->v_flag |= vflag;
489176669Spiso	if (uppervp)
490176669Spiso		(*vpp)->v_type = uppervp->v_type;
491176669Spiso	else
492176669Spiso		(*vpp)->v_type = lowervp->v_type;
493176669Spiso	un = VTOUNION(*vpp);
494176669Spiso	un->un_vnode = *vpp;
495176669Spiso	un->un_uppervp = uppervp;
496176669Spiso	un->un_uppersz = VNOVAL;
497200909Sluigi	un->un_lowervp = lowervp;
498220837Sglebius	un->un_lowersz = VNOVAL;
499176669Spiso	un->un_pvp = undvp;
500176669Spiso	if (undvp != NULLVP)
501176669Spiso		VREF(undvp);
502220837Sglebius	un->un_dircache = 0;
503220837Sglebius	un->un_openl = 0;
504200897Sluigi	un->un_flags = UN_LOCKED;
505176669Spiso	if (un->un_uppervp)
506220837Sglebius		un->un_flags |= UN_ULOCK;
507176669Spiso#ifdef DIAGNOSTIC
508200897Sluigi	if (curproc)
509220837Sglebius		un->un_pid = curproc->p_pid;
510220837Sglebius	else
511220837Sglebius		un->un_pid = -1;
512200897Sluigi#endif
513176669Spiso	if (cnp && (lowervp != NULLVP)) {
514220837Sglebius		un->un_hash = cnp->cn_hash;
515200909Sluigi		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
516220837Sglebius		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
517220837Sglebius		un->un_path[cnp->cn_namelen] = '\0';
518220837Sglebius		VREF(dvp);
519220837Sglebius		un->un_dirvp = dvp;
520220837Sglebius	} else {
521220837Sglebius		un->un_hash = 0;
522220837Sglebius		un->un_path = 0;
523220837Sglebius		un->un_dirvp = 0;
524220837Sglebius	}
525220837Sglebius
526220837Sglebius	if (docache) {
527220837Sglebius		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
528220837Sglebius		un->un_flags |= UN_CACHED;
529220837Sglebius	}
530220837Sglebius
531220837Sglebius	if (xlowervp)
532220837Sglebius		vrele(xlowervp);
533220837Sglebius
534220837Sglebiusout:
535220837Sglebius	if (docache)
536220837Sglebius		union_list_unlock(hash);
537220837Sglebius
538220837Sglebius	return (error);
539220837Sglebius}
540220837Sglebius
541200909Sluigiint
542220837Sglebiusunion_freevp(vp)
543220837Sglebius	struct vnode *vp;
544176669Spiso{
545200909Sluigi	struct union_node *un = VTOUNION(vp);
546176669Spiso
547200897Sluigi	if (un->un_flags & UN_CACHED) {
548220837Sglebius		un->un_flags &= ~UN_CACHED;
549220837Sglebius		LIST_REMOVE(un, un_cache);
550220837Sglebius	}
551220837Sglebius
552220837Sglebius	if (un->un_pvp != NULLVP)
553176669Spiso		vrele(un->un_pvp);
554176669Spiso	if (un->un_uppervp != NULLVP)
555176669Spiso		vrele(un->un_uppervp);
556176669Spiso	if (un->un_lowervp != NULLVP)
557176669Spiso		vrele(un->un_lowervp);
558176669Spiso	if (un->un_dirvp != NULLVP)
559176669Spiso		vrele(un->un_dirvp);
560200909Sluigi	if (un->un_path)
561200897Sluigi		free(un->un_path, M_TEMP);
562176669Spiso
563200897Sluigi	FREE(vp->v_data, M_TEMP);
564176669Spiso	vp->v_data = 0;
565200897Sluigi
566200909Sluigi	return (0);
567200909Sluigi}
568200897Sluigi
569200909Sluigi/*
570176669Spiso * copyfile.  copy the vnode (fvp) to the vnode (tvp)
571200909Sluigi * using a sequence of reads and writes.  both (fvp)
572200909Sluigi * and (tvp) are locked on entry and exit.
573200909Sluigi */
574200909Sluigiint
575200909Sluigiunion_copyfile(fvp, tvp, cred, p)
576200909Sluigi	struct vnode *fvp;
577200909Sluigi	struct vnode *tvp;
578200909Sluigi	struct ucred *cred;
579200909Sluigi	struct proc *p;
580200909Sluigi{
581200909Sluigi	char *buf;
582200909Sluigi	struct uio uio;
583176669Spiso	struct iovec iov;
584176669Spiso	int error = 0;
585200909Sluigi
586200909Sluigi	/*
587176669Spiso	 * strategy:
588200897Sluigi	 * allocate a buffer of size MAXBSIZE.
589176669Spiso	 * loop doing reads and writes, keeping track
590176669Spiso	 * of the current uio offset.
591176669Spiso	 * give up at the first sign of trouble.
592176669Spiso	 */
593176669Spiso
594255395Strociny	uio.uio_procp = p;
595255395Strociny	uio.uio_segflg = UIO_SYSSPACE;
596255395Strociny	uio.uio_offset = 0;
597255395Strociny
598255395Strociny	VOP_UNLOCK(fvp, 0, p);				/* XXX */
599255395Strociny	VOP_LEASE(fvp, p, cred, LEASE_READ);
600255395Strociny	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
601255395Strociny	VOP_UNLOCK(tvp, 0, p);				/* XXX */
602255395Strociny	VOP_LEASE(tvp, p, cred, LEASE_WRITE);
603255395Strociny	vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
604255395Strociny
605255395Strociny	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
606255395Strociny
607255395Strociny	/* ugly loop follows... */
608255395Strociny	do {
609255395Strociny		off_t offset = uio.uio_offset;
610255395Strociny
611255395Strociny		uio.uio_iov = &iov;
612255395Strociny		uio.uio_iovcnt = 1;
613255395Strociny		iov.iov_base = buf;
614255395Strociny		iov.iov_len = MAXBSIZE;
615255395Strociny		uio.uio_resid = iov.iov_len;
616255395Strociny		uio.uio_rw = UIO_READ;
617255395Strociny		error = VOP_READ(fvp, &uio, 0, cred);
618255395Strociny
619255395Strociny		if (error == 0) {
620255395Strociny			uio.uio_iov = &iov;
621255395Strociny			uio.uio_iovcnt = 1;
622176669Spiso			iov.iov_base = buf;
623176669Spiso			iov.iov_len = MAXBSIZE - uio.uio_resid;
624176669Spiso			uio.uio_offset = offset;
625176669Spiso			uio.uio_rw = UIO_WRITE;
626176669Spiso			uio.uio_resid = iov.iov_len;
627176669Spiso
628200580Sluigi			if (uio.uio_resid == 0)
629176669Spiso				break;
630176669Spiso
631176669Spiso			do {
632176669Spiso				error = VOP_WRITE(tvp, &uio, 0, cred);
633255395Strociny			} while ((uio.uio_resid > 0) && (error == 0));
634255395Strociny		}
635176669Spiso
636176669Spiso	} while (error == 0);
637176669Spiso
638176669Spiso	free(buf, M_TEMP);
639176669Spiso	return (error);
640176669Spiso}
641200897Sluigi
642255395Strociny/*
643176669Spiso * (un) is assumed to be locked on entry and remains
644176669Spiso * locked on exit.
645200580Sluigi */
646200580Sluigiint
647200580Sluigiunion_copyup(un, docopy, cred, p)
648200580Sluigi	struct union_node *un;
649200580Sluigi	int docopy;
650176669Spiso	struct ucred *cred;
651176669Spiso	struct proc *p;
652176669Spiso{
653176669Spiso	int error;
654176669Spiso	struct vnode *lvp, *uvp;
655176669Spiso
656176669Spiso	error = union_vn_create(&uvp, un, p);
657176669Spiso	if (error)
658176669Spiso		return (error);
659176669Spiso
660176669Spiso	/* at this point, uppervp is locked */
661176669Spiso	union_newupper(un, uvp);
662176669Spiso	un->un_flags |= UN_ULOCK;
663176669Spiso
664176669Spiso	lvp = un->un_lowervp;
665176669Spiso
666176669Spiso	if (docopy) {
667176669Spiso		/*
668176669Spiso		 * XX - should not ignore errors
669176669Spiso		 * from VOP_CLOSE
670176669Spiso		 */
671176669Spiso		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
672176669Spiso		error = VOP_OPEN(lvp, FREAD, cred, p);
673176669Spiso		if (error == 0) {
674176669Spiso			error = union_copyfile(lvp, uvp, cred, p);
675176669Spiso			VOP_UNLOCK(lvp, 0, p);
676176669Spiso			(void) VOP_CLOSE(lvp, FREAD, cred, p);
677255395Strociny		}
678255395Strociny#ifdef UNION_DIAGNOSTIC
679255395Strociny		if (error == 0)
680255395Strociny			uprintf("union: copied up %s\n", un->un_path);
681255395Strociny#endif
682255395Strociny
683255395Strociny	}
684176669Spiso	un->un_flags &= ~UN_ULOCK;
685176669Spiso	VOP_UNLOCK(uvp, 0, p);
686176669Spiso	union_vn_close(uvp, FWRITE, cred, p);
687255395Strociny	vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
688255395Strociny	un->un_flags |= UN_ULOCK;
689255395Strociny
690255395Strociny	/*
691255395Strociny	 * Subsequent IOs will go to the top layer, so
692255395Strociny	 * call close on the lower vnode and open on the
693255395Strociny	 * upper vnode to ensure that the filesystem keeps
694255395Strociny	 * its references counts right.  This doesn't do
695255395Strociny	 * the right thing with (cred) and (FREAD) though.
696255395Strociny	 * Ignoring error returns is not right, either.
697255395Strociny	 */
698200601Sluigi	if (error == 0) {
699		int i;
700
701		for (i = 0; i < un->un_openl; i++) {
702			(void) VOP_CLOSE(lvp, FREAD, cred, p);
703			(void) VOP_OPEN(uvp, FREAD, cred, p);
704		}
705		un->un_openl = 0;
706	}
707
708	return (error);
709
710}
711
712static int
713union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
714	struct union_mount *um;
715	struct vnode *dvp;
716	struct vnode **vpp;
717	struct componentname *cnp;
718	struct componentname *cn;
719	char *path;
720	int pathlen;
721{
722	int error;
723
724	/*
725	 * A new componentname structure must be faked up because
726	 * there is no way to know where the upper level cnp came
727	 * from or what it is being used for.  This must duplicate
728	 * some of the work done by NDINIT, some of the work done
729	 * by namei, some of the work done by lookup and some of
730	 * the work done by VOP_LOOKUP when given a CREATE flag.
731	 * Conclusion: Horrible.
732	 *
733	 * The pathname buffer will be FREEed by VOP_MKDIR.
734	 */
735	cn->cn_namelen = pathlen;
736	cn->cn_pnbuf = malloc(cn->cn_namelen+1, M_NAMEI, M_WAITOK);
737	bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
738	cn->cn_pnbuf[cn->cn_namelen] = '\0';
739
740	cn->cn_nameiop = CREATE;
741	cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
742	cn->cn_proc = cnp->cn_proc;
743	if (um->um_op == UNMNT_ABOVE)
744		cn->cn_cred = cnp->cn_cred;
745	else
746		cn->cn_cred = um->um_cred;
747	cn->cn_nameptr = cn->cn_pnbuf;
748	cn->cn_hash = cnp->cn_hash;
749	cn->cn_consume = cnp->cn_consume;
750
751	VREF(dvp);
752	error = relookup(dvp, vpp, cn);
753	if (!error)
754		vrele(dvp);
755
756	return (error);
757}
758
759/*
760 * Create a shadow directory in the upper layer.
761 * The new vnode is returned locked.
762 *
763 * (um) points to the union mount structure for access to the
764 * the mounting process's credentials.
765 * (dvp) is the directory in which to create the shadow directory.
766 * it is unlocked on entry and exit.
767 * (cnp) is the componentname to be created.
768 * (vpp) is the returned newly created shadow directory, which
769 * is returned locked.
770 */
771int
772union_mkshadow(um, dvp, cnp, vpp)
773	struct union_mount *um;
774	struct vnode *dvp;
775	struct componentname *cnp;
776	struct vnode **vpp;
777{
778	int error;
779	struct vattr va;
780	struct proc *p = cnp->cn_proc;
781	struct componentname cn;
782
783	error = union_relookup(um, dvp, vpp, cnp, &cn,
784			cnp->cn_nameptr, cnp->cn_namelen);
785	if (error)
786		return (error);
787
788	if (*vpp) {
789		VOP_ABORTOP(dvp, &cn);
790		VOP_UNLOCK(dvp, 0, p);
791		vrele(*vpp);
792		*vpp = NULLVP;
793		return (EEXIST);
794	}
795
796	/*
797	 * policy: when creating the shadow directory in the
798	 * upper layer, create it owned by the user who did
799	 * the mount, group from parent directory, and mode
800	 * 777 modified by umask (ie mostly identical to the
801	 * mkdir syscall).  (jsp, kb)
802	 */
803
804	VATTR_NULL(&va);
805	va.va_type = VDIR;
806	va.va_mode = um->um_cmode;
807
808	/* VOP_LEASE: dvp is locked */
809	VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
810
811	error = VOP_MKDIR(dvp, vpp, &cn, &va);
812	return (error);
813}
814
815/*
816 * Create a whiteout entry in the upper layer.
817 *
818 * (um) points to the union mount structure for access to the
819 * the mounting process's credentials.
820 * (dvp) is the directory in which to create the whiteout.
821 * it is locked on entry and exit.
822 * (cnp) is the componentname to be created.
823 */
824int
825union_mkwhiteout(um, dvp, cnp, path)
826	struct union_mount *um;
827	struct vnode *dvp;
828	struct componentname *cnp;
829	char *path;
830{
831	int error;
832	struct vattr va;
833	struct proc *p = cnp->cn_proc;
834	struct vnode *wvp;
835	struct componentname cn;
836
837	VOP_UNLOCK(dvp, 0, p);
838	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
839	if (error) {
840		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
841		return (error);
842	}
843
844	if (wvp) {
845		VOP_ABORTOP(dvp, &cn);
846		vrele(dvp);
847		vrele(wvp);
848		return (EEXIST);
849	}
850
851	/* VOP_LEASE: dvp is locked */
852	VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
853
854	error = VOP_WHITEOUT(dvp, &cn, CREATE);
855	if (error)
856		VOP_ABORTOP(dvp, &cn);
857
858	vrele(dvp);
859
860	return (error);
861}
862
863/*
864 * union_vn_create: creates and opens a new shadow file
865 * on the upper union layer.  this function is similar
866 * in spirit to calling vn_open but it avoids calling namei().
867 * the problem with calling namei is that a) it locks too many
868 * things, and b) it doesn't start at the "right" directory,
869 * whereas relookup is told where to start.
870 */
871int
872union_vn_create(vpp, un, p)
873	struct vnode **vpp;
874	struct union_node *un;
875	struct proc *p;
876{
877	struct vnode *vp;
878	struct ucred *cred = p->p_ucred;
879	struct vattr vat;
880	struct vattr *vap = &vat;
881	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
882	int error;
883	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
884	struct componentname cn;
885
886	*vpp = NULLVP;
887
888	/*
889	 * Build a new componentname structure (for the same
890	 * reasons outlines in union_mkshadow).
891	 * The difference here is that the file is owned by
892	 * the current user, rather than by the person who
893	 * did the mount, since the current user needs to be
894	 * able to write the file (that's why it is being
895	 * copied in the first place).
896	 */
897	cn.cn_namelen = strlen(un->un_path);
898	cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
899	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
900	cn.cn_nameiop = CREATE;
901	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
902	cn.cn_proc = p;
903	cn.cn_cred = p->p_ucred;
904	cn.cn_nameptr = cn.cn_pnbuf;
905	cn.cn_hash = un->un_hash;
906	cn.cn_consume = 0;
907
908	VREF(un->un_dirvp);
909	error = relookup(un->un_dirvp, &vp, &cn);
910	if (error)
911		return (error);
912	vrele(un->un_dirvp);
913
914	if (vp) {
915		VOP_ABORTOP(un->un_dirvp, &cn);
916		if (un->un_dirvp == vp)
917			vrele(un->un_dirvp);
918		else
919			vput(un->un_dirvp);
920		vrele(vp);
921		return (EEXIST);
922	}
923
924	/*
925	 * Good - there was no race to create the file
926	 * so go ahead and create it.  The permissions
927	 * on the file will be 0666 modified by the
928	 * current user's umask.  Access to the file, while
929	 * it is unioned, will require access to the top *and*
930	 * bottom files.  Access when not unioned will simply
931	 * require access to the top-level file.
932	 * TODO: confirm choice of access permissions.
933	 */
934	VATTR_NULL(vap);
935	vap->va_type = VREG;
936	vap->va_mode = cmode;
937	VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
938	if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
939		return (error);
940
941	error = VOP_OPEN(vp, fmode, cred, p);
942	if (error) {
943		vput(vp);
944		return (error);
945	}
946
947	vp->v_writecount++;
948	*vpp = vp;
949	return (0);
950}
951
952int
953union_vn_close(vp, fmode, cred, p)
954	struct vnode *vp;
955	int fmode;
956	struct ucred *cred;
957	struct proc *p;
958{
959
960	if (fmode & FWRITE)
961		--vp->v_writecount;
962	return (VOP_CLOSE(vp, fmode, cred, p));
963}
964
965void
966union_removed_upper(un)
967	struct union_node *un;
968{
969	struct proc *p = curproc;	/* XXX */
970
971	union_newupper(un, NULLVP);
972	if (un->un_flags & UN_CACHED) {
973		un->un_flags &= ~UN_CACHED;
974		LIST_REMOVE(un, un_cache);
975	}
976
977	if (un->un_flags & UN_ULOCK) {
978		un->un_flags &= ~UN_ULOCK;
979		VOP_UNLOCK(un->un_uppervp, 0, p);
980	}
981}
982
983#if 0
984struct vnode *
985union_lowervp(vp)
986	struct vnode *vp;
987{
988	struct union_node *un = VTOUNION(vp);
989
990	if ((un->un_lowervp != NULLVP) &&
991	    (vp->v_type == un->un_lowervp->v_type)) {
992		if (vget(un->un_lowervp, 0) == 0)
993			return (un->un_lowervp);
994	}
995
996	return (NULLVP);
997}
998#endif
999
1000/*
1001 * determine whether a whiteout is needed
1002 * during a remove/rmdir operation.
1003 */
1004int
1005union_dowhiteout(un, cred, p)
1006	struct union_node *un;
1007	struct ucred *cred;
1008	struct proc *p;
1009{
1010	struct vattr va;
1011
1012	if (un->un_lowervp != NULLVP)
1013		return (1);
1014
1015	if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
1016	    (va.va_flags & OPAQUE))
1017		return (1);
1018
1019	return (0);
1020}
1021
1022static void
1023union_dircache_r(vp, vppp, cntp)
1024	struct vnode *vp;
1025	struct vnode ***vppp;
1026	int *cntp;
1027{
1028	struct union_node *un;
1029
1030	if (vp->v_op != union_vnodeop_p) {
1031		if (vppp) {
1032			VREF(vp);
1033			*(*vppp)++ = vp;
1034			if (--(*cntp) == 0)
1035				panic("union: dircache table too small");
1036		} else {
1037			(*cntp)++;
1038		}
1039
1040		return;
1041	}
1042
1043	un = VTOUNION(vp);
1044	if (un->un_uppervp != NULLVP)
1045		union_dircache_r(un->un_uppervp, vppp, cntp);
1046	if (un->un_lowervp != NULLVP)
1047		union_dircache_r(un->un_lowervp, vppp, cntp);
1048}
1049
1050struct vnode *
1051union_dircache(vp, p)
1052	struct vnode *vp;
1053	struct proc *p;
1054{
1055	int cnt;
1056	struct vnode *nvp;
1057	struct vnode **vpp;
1058	struct vnode **dircache;
1059	struct union_node *un;
1060	int error;
1061
1062	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
1063	dircache = VTOUNION(vp)->un_dircache;
1064
1065	nvp = NULLVP;
1066
1067	if (dircache == 0) {
1068		cnt = 0;
1069		union_dircache_r(vp, 0, &cnt);
1070		cnt++;
1071		dircache = (struct vnode **)
1072				malloc(cnt * sizeof(struct vnode *),
1073					M_TEMP, M_WAITOK);
1074		vpp = dircache;
1075		union_dircache_r(vp, &vpp, &cnt);
1076		*vpp = NULLVP;
1077		vpp = dircache + 1;
1078	} else {
1079		vpp = dircache;
1080		do {
1081			if (*vpp++ == VTOUNION(vp)->un_uppervp)
1082				break;
1083		} while (*vpp != NULLVP);
1084	}
1085
1086	if (*vpp == NULLVP)
1087		goto out;
1088
1089	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
1090	VREF(*vpp);
1091	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
1092	if (error)
1093		goto out;
1094
1095	VTOUNION(vp)->un_dircache = 0;
1096	un = VTOUNION(nvp);
1097	un->un_dircache = dircache;
1098
1099out:
1100	VOP_UNLOCK(vp, 0, p);
1101	return (nvp);
1102}
1103