null_subr.c revision 103314
133965Sjdp/* 2218822Sdim * Copyright (c) 1992, 1993 378828Sobrien * The Regents of the University of California. All rights reserved. 433965Sjdp * 533965Sjdp * This code is derived from software donated to Berkeley by 633965Sjdp * Jan-Simon Pendry. 733965Sjdp * 833965Sjdp * Redistribution and use in source and binary forms, with or without 933965Sjdp * modification, are permitted provided that the following conditions 1033965Sjdp * are met: 1133965Sjdp * 1. Redistributions of source code must retain the above copyright 1233965Sjdp * notice, this list of conditions and the following disclaimer. 1333965Sjdp * 2. Redistributions in binary form must reproduce the above copyright 1433965Sjdp * notice, this list of conditions and the following disclaimer in the 1533965Sjdp * documentation and/or other materials provided with the distribution. 1633965Sjdp * 3. All advertising materials mentioning features or use of this software 1733965Sjdp * must display the following acknowledgement: 1833965Sjdp * This product includes software developed by the University of 19218822Sdim * California, Berkeley and its contributors. 2033965Sjdp * 4. Neither the name of the University nor the names of its contributors 2133965Sjdp * may be used to endorse or promote products derived from this software 2233965Sjdp * without specific prior written permission. 2333965Sjdp * 2433965Sjdp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2533965Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2633965Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2733965Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28218822Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2933965Sjdp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3033965Sjdp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3133965Sjdp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32218822Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34218822Sdim * SUCH DAMAGE. 35218822Sdim * 36218822Sdim * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 37218822Sdim * 38218822Sdim * $FreeBSD: head/sys/fs/nullfs/null_subr.c 103314 2002-09-14 09:02:28Z njl $ 39218822Sdim */ 40218822Sdim 41218822Sdim#include <sys/param.h> 42218822Sdim#include <sys/systm.h> 43218822Sdim#include <sys/kernel.h> 44218822Sdim#include <sys/lock.h> 45218822Sdim#include <sys/mutex.h> 46218822Sdim#include <sys/malloc.h> 47218822Sdim#include <sys/mount.h> 48218822Sdim#include <sys/proc.h> 49218822Sdim#include <sys/vnode.h> 5033965Sjdp 5133965Sjdp#include <fs/nullfs/null.h> 5233965Sjdp 5333965Sjdp#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */ 5433965Sjdp#define NNULLNODECACHE 16 5533965Sjdp 5633965Sjdp/* 5733965Sjdp * Null layer cache: 5833965Sjdp * Each cache entry holds a reference to the lower vnode 5933965Sjdp * along with a pointer to the alias vnode. When an 6033965Sjdp * entry is added the lower vnode is VREF'd. When the 6133965Sjdp * alias is removed the lower vnode is vrele'd. 6233965Sjdp */ 6333965Sjdp 6433965Sjdp#define NULL_NHASH(vp) \ 6533965Sjdp (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) 6633965Sjdp 6733965Sjdpstatic LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; 6877298Sobrienstatic u_long null_node_hash; 6933965Sjdpstruct mtx null_hashmtx; 7033965Sjdp 7133965Sjdpstatic MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table"); 7233965SjdpMALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part"); 7333965Sjdp 7433965Sjdpstatic struct vnode * null_hashget(struct vnode *); 7533965Sjdpstatic struct vnode * null_hashins(struct null_node *); 7633965Sjdp 7733965Sjdp/* 7833965Sjdp * Initialise cache headers 7933965Sjdp */ 8033965Sjdpint 8133965Sjdpnullfs_init(vfsp) 8233965Sjdp struct vfsconf *vfsp; 8333965Sjdp{ 8433965Sjdp 8533965Sjdp NULLFSDEBUG("nullfs_init\n"); /* printed during system boot */ 8633965Sjdp null_node_hashtbl = hashinit(NNULLNODECACHE, M_NULLFSHASH, &null_node_hash); 8733965Sjdp mtx_init(&null_hashmtx, "nullhs", NULL, MTX_DEF); 8833965Sjdp return (0); 8933965Sjdp} 9033965Sjdp 9133965Sjdpint 9233965Sjdpnullfs_uninit(vfsp) 9333965Sjdp struct vfsconf *vfsp; 9433965Sjdp{ 9533965Sjdp 9633965Sjdp mtx_destroy(&null_hashmtx); 9733965Sjdp free(null_node_hashtbl, M_NULLFSHASH); 9833965Sjdp return (0); 9933965Sjdp} 10033965Sjdp 10133965Sjdp/* 10233965Sjdp * Return a VREF'ed alias for lower vnode if already exists, else 0. 10333965Sjdp * Lower vnode should be locked on entry and will be left locked on exit. 10433965Sjdp */ 10533965Sjdpstatic struct vnode * 10633965Sjdpnull_hashget(lowervp) 107 struct vnode *lowervp; 108{ 109 struct thread *td = curthread; /* XXX */ 110 struct null_node_hashhead *hd; 111 struct null_node *a; 112 struct vnode *vp; 113 114 /* 115 * Find hash base, and then search the (two-way) linked 116 * list looking for a null_node structure which is referencing 117 * the lower vnode. If found, the increment the null_node 118 * reference count (but NOT the lower vnode's VREF counter). 119 */ 120 hd = NULL_NHASH(lowervp); 121loop: 122 mtx_lock(&null_hashmtx); 123 LIST_FOREACH(a, hd, null_hash) { 124 if (a->null_lowervp == lowervp) { 125 vp = NULLTOV(a); 126 mtx_lock(&vp->v_interlock); 127 mtx_unlock(&null_hashmtx); 128 /* 129 * We need vget for the VXLOCK 130 * stuff, but we don't want to lock 131 * the lower node. 132 */ 133 if (vget(vp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 134 goto loop; 135 136 return (vp); 137 } 138 } 139 mtx_unlock(&null_hashmtx); 140 return (NULLVP); 141} 142 143/* 144 * Act like null_hashget, but add passed null_node to hash if no existing 145 * node found. 146 */ 147static struct vnode * 148null_hashins(xp) 149 struct null_node *xp; 150{ 151 struct thread *td = curthread; /* XXX */ 152 struct null_node_hashhead *hd; 153 struct null_node *oxp; 154 struct vnode *ovp; 155 156 hd = NULL_NHASH(xp->null_lowervp); 157loop: 158 mtx_lock(&null_hashmtx); 159 LIST_FOREACH(oxp, hd, null_hash) { 160 if (oxp->null_lowervp == xp->null_lowervp) { 161 ovp = NULLTOV(oxp); 162 mtx_lock(&ovp->v_interlock); 163 mtx_unlock(&null_hashmtx); 164 if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td)) 165 goto loop; 166 167 return (ovp); 168 } 169 } 170 LIST_INSERT_HEAD(hd, xp, null_hash); 171 mtx_unlock(&null_hashmtx); 172 return (NULLVP); 173} 174 175/* 176 * Make a new or get existing nullfs node. 177 * Vp is the alias vnode, lowervp is the lower vnode. 178 * 179 * The lowervp assumed to be locked and having "spare" reference. This routine 180 * vrele lowervp if nullfs node was taken from hash. Otherwise it "transfers" 181 * the caller's "spare" reference to created nullfs vnode. 182 */ 183int 184null_nodeget(mp, lowervp, vpp) 185 struct mount *mp; 186 struct vnode *lowervp; 187 struct vnode **vpp; 188{ 189 struct thread *td = curthread; /* XXX */ 190 struct null_node *xp; 191 struct vnode *vp; 192 int error; 193 194 /* Lookup the hash firstly */ 195 *vpp = null_hashget(lowervp); 196 if (*vpp != NULL) { 197 vrele(lowervp); 198 return (0); 199 } 200 201 /* 202 * We do not serialize vnode creation, instead we will check for 203 * duplicates later, when adding new vnode to hash. 204 * 205 * Note that duplicate can only appear in hash if the lowervp is 206 * locked LK_SHARED. 207 */ 208 209 /* 210 * Do the MALLOC before the getnewvnode since doing so afterward 211 * might cause a bogus v_data pointer to get dereferenced 212 * elsewhere if MALLOC should block. 213 */ 214 MALLOC(xp, struct null_node *, sizeof(struct null_node), 215 M_NULLFSNODE, M_WAITOK); 216 217 error = getnewvnode("null", mp, null_vnodeop_p, &vp); 218 if (error) { 219 FREE(xp, M_NULLFSNODE); 220 return (error); 221 } 222 223 xp->null_vnode = vp; 224 xp->null_lowervp = lowervp; 225 226 vp->v_type = lowervp->v_type; 227 vp->v_data = xp; 228 229 /* Though v_lock is inited by getnewvnode(), we want our own wmesg */ 230 lockinit(&vp->v_lock, PVFS, "nunode", VLKTIMEOUT, LK_NOPAUSE); 231 232 /* 233 * From NetBSD: 234 * Now lock the new node. We rely on the fact that we were passed 235 * a locked vnode. If the lower node is exporting a struct lock 236 * (v_vnlock != NULL) then we just set the upper v_vnlock to the 237 * lower one, and both are now locked. If the lower node is exporting 238 * NULL, then we copy that up and manually lock the new vnode. 239 */ 240 241 vp->v_vnlock = lowervp->v_vnlock; 242 error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_THISLAYER, td); 243 if (error) 244 panic("null_nodeget: can't lock new vnode\n"); 245 246 /* 247 * Atomically insert our new node into the hash or vget existing 248 * if someone else has beaten us to it. 249 */ 250 *vpp = null_hashins(xp); 251 if (*vpp != NULL) { 252 vrele(lowervp); 253 VOP_UNLOCK(vp, LK_THISLAYER, td); 254 vp->v_vnlock = NULL; 255 xp->null_lowervp = NULL; 256 vrele(vp); 257 return (0); 258 } 259 260 /* 261 * XXX We take extra vref just to workaround UFS's XXX: 262 * UFS can vrele() vnode in VOP_CLOSE() in some cases. Luckily, this 263 * can only happen if v_usecount == 1. To workaround, we just don't 264 * let v_usecount be 1, it will be 2 or more. 265 */ 266 VREF(lowervp); 267 268 *vpp = vp; 269 270 return (0); 271} 272 273/* 274 * Remove node from hash. 275 */ 276void 277null_hashrem(xp) 278 struct null_node *xp; 279{ 280 281 mtx_lock(&null_hashmtx); 282 LIST_REMOVE(xp, null_hash); 283 mtx_unlock(&null_hashmtx); 284} 285 286#ifdef DIAGNOSTIC 287#include "opt_ddb.h" 288 289#ifdef DDB 290#define null_checkvp_barrier 1 291#else 292#define null_checkvp_barrier 0 293#endif 294 295struct vnode * 296null_checkvp(vp, fil, lno) 297 struct vnode *vp; 298 char *fil; 299 int lno; 300{ 301 struct null_node *a = VTONULL(vp); 302#ifdef notyet 303 /* 304 * Can't do this check because vop_reclaim runs 305 * with a funny vop vector. 306 */ 307 if (vp->v_op != null_vnodeop_p) { 308 printf ("null_checkvp: on non-null-node\n"); 309 while (null_checkvp_barrier) /*WAIT*/ ; 310 panic("null_checkvp"); 311 }; 312#endif 313 if (a->null_lowervp == NULLVP) { 314 /* Should never happen */ 315 int i; u_long *p; 316 printf("vp = %p, ZERO ptr\n", (void *)vp); 317 for (p = (u_long *) a, i = 0; i < 8; i++) 318 printf(" %lx", p[i]); 319 printf("\n"); 320 /* wait for debugger */ 321 while (null_checkvp_barrier) /*WAIT*/ ; 322 panic("null_checkvp"); 323 } 324 if (a->null_lowervp->v_usecount < 1) { 325 int i; u_long *p; 326 printf("vp = %p, unref'ed lowervp\n", (void *)vp); 327 for (p = (u_long *) a, i = 0; i < 8; i++) 328 printf(" %lx", p[i]); 329 printf("\n"); 330 /* wait for debugger */ 331 while (null_checkvp_barrier) /*WAIT*/ ; 332 panic ("null with unref'ed lowervp"); 333 }; 334#ifdef notyet 335 printf("null %x/%d -> %x/%d [%s, %d]\n", 336 NULLTOV(a), NULLTOV(a)->v_usecount, 337 a->null_lowervp, a->null_lowervp->v_usecount, 338 fil, lno); 339#endif 340 return a->null_lowervp; 341} 342#endif 343