vfs_init.c revision 116182
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD: head/sys/kern/vfs_init.c 116182 2003-06-11 00:56:59Z obrien $"); 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/mount.h> 48#include <sys/sysctl.h> 49#include <sys/vnode.h> 50#include <sys/malloc.h> 51 52 53MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 54 55/* 56 * The highest defined VFS number. 57 */ 58int maxvfsconf = VFS_GENERIC + 1; 59 60/* 61 * Single-linked list of configured VFSes. 62 * New entries are added/deleted by vfs_register()/vfs_unregister() 63 */ 64struct vfsconf *vfsconf; 65 66/* 67 * vfs_init.c 68 * 69 * Allocate and fill in operations vectors. 70 * 71 * An undocumented feature of this approach to defining operations is that 72 * there can be multiple entries in vfs_opv_descs for the same operations 73 * vector. This allows third parties to extend the set of operations 74 * supported by another layer in a binary compatibile way. For example, 75 * assume that NFS needed to be modified to support Ficus. NFS has an entry 76 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 77 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 78 * listing those new operations Ficus adds to NFS, all without modifying the 79 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 80 * that is a(whole)nother story.) This is a feature. 81 */ 82 83/* Table of known vnodeop vectors (list of VFS vnode vectors) */ 84static const struct vnodeopv_desc **vnodeopv_descs; 85static int vnodeopv_num; 86 87/* Table of known descs (list of vnode op handlers "vop_access_desc") */ 88static struct vnodeop_desc **vfs_op_descs; 89/* Reference counts for vfs_op_descs */ 90static int *vfs_op_desc_refs; 91/* Number of descriptions */ 92static int num_op_descs; 93/* Number of entries in each description */ 94static int vfs_opv_numops = 64; 95 96/* Allow this number to be tuned at boot */ 97TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 98SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RD, &vfs_opv_numops, 99 0, "Maximum number of operations in vop_t vector"); 100 101static int int_cmp(const void *a, const void *b); 102 103static int 104int_cmp(const void *a, const void *b) 105{ 106 return(*(const int *)a - *(const int *)b); 107} 108 109/* 110 * Recalculate the operations vector/description (those parts of it that can 111 * be recalculated, that is.) 112 * Always allocate operations vector large enough to hold vfs_opv_numops 113 * entries. The vector is never freed or deallocated once it is initialized, 114 * so that vnodes might safely reference it through their v_op pointer without 115 * vector changing suddenly from under them. 116 */ 117static void 118vfs_opv_recalc(void) 119{ 120 int i, j, k; 121 int *vfs_op_offsets; 122 vop_t ***opv_desc_vector_p; 123 vop_t **opv_desc_vector; 124 struct vnodeopv_entry_desc *opve_descp; 125 const struct vnodeopv_desc *opv; 126 127 if (vfs_op_descs == NULL) 128 panic("vfs_opv_recalc called with null vfs_op_descs"); 129 130 /* 131 * Allocate and initialize temporary array to store 132 * offsets. Sort it to put all uninitialized entries 133 * first and to make holes in existing offset sequence 134 * detectable. 135 */ 136 MALLOC(vfs_op_offsets, int *, 137 num_op_descs * sizeof(int), M_TEMP, M_WAITOK); 138 if (vfs_op_offsets == NULL) 139 panic("vfs_opv_recalc: no memory"); 140 for (i = 0; i < num_op_descs; i++) 141 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 142 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 143 144 /* 145 * Run through and make sure all known descs have an offset. 146 * Use vfs_op_offsets to locate holes in offset sequence and 147 * reuse them. 148 * vop_default_desc is hardwired at offset 1, and offset 0 149 * is a panic sanity check. 150 */ 151 j = 1; k = 1; 152 for (i = 0; i < num_op_descs; i++) { 153 if (vfs_op_descs[i]->vdesc_offset != 0) 154 continue; 155 /* 156 * Look at two adjacent entries vfs_op_offsets[j - 1] and 157 * vfs_op_offsets[j] and see if we can fit a new offset 158 * number in between. If not, look at the next pair until 159 * hole is found or the end of the vfs_op_offsets vector is 160 * reached. j has been initialized to 1 above so that 161 * referencing (j-1)-th element is safe and the loop will 162 * never execute if num_op_descs is 1. For each new value s 163 * of i the j loop pick up from where previous iteration has 164 * left off. When the last hole has been consumed or if no 165 * hole has been found, we will start allocating new numbers 166 * starting from the biggest already available offset + 1. 167 */ 168 for (; j < num_op_descs; j++) { 169 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 170 break; 171 k = vfs_op_offsets[j] + 1; 172 } 173 vfs_op_descs[i]->vdesc_offset = k++; 174 } 175 FREE(vfs_op_offsets, M_TEMP); 176 177 /* Panic if new vops will cause vector overflow */ 178 if (k > vfs_opv_numops) 179 panic("VFS: Ran out of vop_t vector entries. %d entries required, only %d available.\n", k, vfs_opv_numops); 180 181 /* 182 * Allocate and fill in the vectors 183 */ 184 for (i = 0; i < vnodeopv_num; i++) { 185 opv = vnodeopv_descs[i]; 186 opv_desc_vector_p = opv->opv_desc_vector_p; 187 if (*opv_desc_vector_p == NULL) 188 MALLOC(*opv_desc_vector_p, vop_t **, 189 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 190 M_WAITOK | M_ZERO); 191 192 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 193 opv_desc_vector = *opv_desc_vector_p; 194 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 195 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 196 opve_descp = &(opv->opv_desc_ops[j]); 197 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 198 opve_descp->opve_impl; 199 } 200 201 /* Replace unfilled routines with their default (slot 1). */ 202 opv_desc_vector = *(opv->opv_desc_vector_p); 203 if (opv_desc_vector[1] == NULL) 204 panic("vfs_opv_recalc: vector without a default."); 205 for (j = 0; j < vfs_opv_numops; j++) 206 if (opv_desc_vector[j] == NULL) 207 opv_desc_vector[j] = opv_desc_vector[1]; 208 } 209} 210 211/* Add a set of vnode operations (a description) to the table above. */ 212void 213vfs_add_vnodeops(const void *data) 214{ 215 const struct vnodeopv_desc *opv; 216 const struct vnodeopv_desc **newopv; 217 struct vnodeop_desc **newop; 218 int *newref; 219 vop_t **opv_desc_vector; 220 struct vnodeop_desc *desc; 221 int i, j; 222 223 opv = (const struct vnodeopv_desc *)data; 224 MALLOC(newopv, const struct vnodeopv_desc **, 225 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 226 if (vnodeopv_descs) { 227 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 228 FREE(vnodeopv_descs, M_VNODE); 229 } 230 newopv[vnodeopv_num] = opv; 231 vnodeopv_descs = newopv; 232 vnodeopv_num++; 233 234 /* See if we have turned up a new vnode op desc */ 235 opv_desc_vector = *(opv->opv_desc_vector_p); 236 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 237 for (j = 0; j < num_op_descs; j++) { 238 if (desc == vfs_op_descs[j]) { 239 /* found it, increase reference count */ 240 vfs_op_desc_refs[j]++; 241 break; 242 } 243 } 244 if (j == num_op_descs) { 245 /* not found, new entry */ 246 MALLOC(newop, struct vnodeop_desc **, 247 (num_op_descs + 1) * sizeof(*newop), 248 M_VNODE, M_WAITOK); 249 /* new reference count (for unload) */ 250 MALLOC(newref, int *, 251 (num_op_descs + 1) * sizeof(*newref), 252 M_VNODE, M_WAITOK); 253 if (vfs_op_descs) { 254 bcopy(vfs_op_descs, newop, 255 num_op_descs * sizeof(*newop)); 256 FREE(vfs_op_descs, M_VNODE); 257 } 258 if (vfs_op_desc_refs) { 259 bcopy(vfs_op_desc_refs, newref, 260 num_op_descs * sizeof(*newref)); 261 FREE(vfs_op_desc_refs, M_VNODE); 262 } 263 newop[num_op_descs] = desc; 264 newref[num_op_descs] = 1; 265 vfs_op_descs = newop; 266 vfs_op_desc_refs = newref; 267 num_op_descs++; 268 } 269 } 270 vfs_opv_recalc(); 271} 272 273/* Remove a vnode type from the vnode description table above. */ 274void 275vfs_rm_vnodeops(const void *data) 276{ 277 const struct vnodeopv_desc *opv; 278 const struct vnodeopv_desc **newopv; 279 struct vnodeop_desc **newop; 280 int *newref; 281 vop_t **opv_desc_vector; 282 struct vnodeop_desc *desc; 283 int i, j, k; 284 285 opv = (const struct vnodeopv_desc *)data; 286 /* Lower ref counts on descs in the table and release if zero */ 287 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 288 for (j = 0; j < num_op_descs; j++) { 289 if (desc == vfs_op_descs[j]) { 290 /* found it, decrease reference count */ 291 vfs_op_desc_refs[j]--; 292 break; 293 } 294 } 295 for (j = 0; j < num_op_descs; j++) { 296 if (vfs_op_desc_refs[j] > 0) 297 continue; 298 if (vfs_op_desc_refs[j] < 0) 299 panic("vfs_remove_vnodeops: negative refcnt"); 300 /* Entry is going away - replace it with defaultop */ 301 for (k = 0; k < vnodeopv_num; k++) { 302 opv_desc_vector = 303 *(vnodeopv_descs[k]->opv_desc_vector_p); 304 if (opv_desc_vector != NULL) 305 opv_desc_vector[desc->vdesc_offset] = 306 opv_desc_vector[1]; 307 } 308 MALLOC(newop, struct vnodeop_desc **, 309 (num_op_descs - 1) * sizeof(*newop), 310 M_VNODE, M_WAITOK); 311 /* new reference count (for unload) */ 312 MALLOC(newref, int *, 313 (num_op_descs - 1) * sizeof(*newref), 314 M_VNODE, M_WAITOK); 315 for (k = j; k < (num_op_descs - 1); k++) { 316 vfs_op_descs[k] = vfs_op_descs[k + 1]; 317 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 318 } 319 bcopy(vfs_op_descs, newop, 320 (num_op_descs - 1) * sizeof(*newop)); 321 bcopy(vfs_op_desc_refs, newref, 322 (num_op_descs - 1) * sizeof(*newref)); 323 FREE(vfs_op_descs, M_VNODE); 324 FREE(vfs_op_desc_refs, M_VNODE); 325 vfs_op_descs = newop; 326 vfs_op_desc_refs = newref; 327 num_op_descs--; 328 } 329 } 330 331 for (i = 0; i < vnodeopv_num; i++) { 332 if (vnodeopv_descs[i] == opv) { 333 for (j = i; j < (vnodeopv_num - 1); j++) 334 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 335 break; 336 } 337 } 338 if (i == vnodeopv_num) 339 panic("vfs_remove_vnodeops: opv not found"); 340 opv_desc_vector = *(opv->opv_desc_vector_p); 341 if (opv_desc_vector != NULL) 342 FREE(opv_desc_vector, M_VNODE); 343 MALLOC(newopv, const struct vnodeopv_desc **, 344 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 345 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 346 FREE(vnodeopv_descs, M_VNODE); 347 vnodeopv_descs = newopv; 348 vnodeopv_num--; 349 350 vfs_opv_recalc(); 351} 352 353/* 354 * Routines having to do with the management of the vnode table. 355 */ 356struct vattr va_null; 357 358/* 359 * Initialize the vnode structures and initialize each filesystem type. 360 */ 361/* ARGSUSED*/ 362static void 363vfsinit(void *dummy) 364{ 365 366 vattr_null(&va_null); 367} 368SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vfsinit, NULL) 369 370/* Register a new filesystem type in the global table */ 371int 372vfs_register(struct vfsconf *vfc) 373{ 374 struct sysctl_oid *oidp; 375 struct vfsconf *vfsp; 376 377 vfsp = NULL; 378 if (vfsconf) 379 for (vfsp = vfsconf; vfsp->vfc_next; vfsp = vfsp->vfc_next) 380 if (strcmp(vfc->vfc_name, vfsp->vfc_name) == 0) 381 return EEXIST; 382 383 vfc->vfc_typenum = maxvfsconf++; 384 if (vfsp) 385 vfsp->vfc_next = vfc; 386 else 387 vfsconf = vfc; 388 vfc->vfc_next = NULL; 389 390 /* 391 * If this filesystem has a sysctl node under vfs 392 * (i.e. vfs.xxfs), then change the oid number of that node to 393 * match the filesystem's type number. This allows user code 394 * which uses the type number to read sysctl variables defined 395 * by the filesystem to continue working. Since the oids are 396 * in a sorted list, we need to make sure the order is 397 * preserved by re-registering the oid after modifying its 398 * number. 399 */ 400 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 401 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 402 sysctl_unregister_oid(oidp); 403 oidp->oid_number = vfc->vfc_typenum; 404 sysctl_register_oid(oidp); 405 } 406 407 /* 408 * Call init function for this VFS... 409 */ 410 (*(vfc->vfc_vfsops->vfs_init))(vfc); 411 412 return 0; 413} 414 415 416/* Remove registration of a filesystem type */ 417int 418vfs_unregister(struct vfsconf *vfc) 419{ 420 struct vfsconf *vfsp, *prev_vfsp; 421 int error, i, maxtypenum; 422 423 i = vfc->vfc_typenum; 424 425 prev_vfsp = NULL; 426 for (vfsp = vfsconf; vfsp; 427 prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { 428 if (!strcmp(vfc->vfc_name, vfsp->vfc_name)) 429 break; 430 } 431 if (vfsp == NULL) 432 return EINVAL; 433 if (vfsp->vfc_refcount) 434 return EBUSY; 435 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 436 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 437 if (error) 438 return (error); 439 } 440 if (prev_vfsp) 441 prev_vfsp->vfc_next = vfsp->vfc_next; 442 else 443 vfsconf = vfsp->vfc_next; 444 maxtypenum = VFS_GENERIC; 445 for (vfsp = vfsconf; vfsp != NULL; vfsp = vfsp->vfc_next) 446 if (maxtypenum < vfsp->vfc_typenum) 447 maxtypenum = vfsp->vfc_typenum; 448 maxvfsconf = maxtypenum + 1; 449 return 0; 450} 451 452/* 453 * Standard kernel module handling code for filesystem modules. 454 * Referenced from VFS_SET(). 455 */ 456int 457vfs_modevent(module_t mod, int type, void *data) 458{ 459 struct vfsconf *vfc; 460 int error = 0; 461 462 vfc = (struct vfsconf *)data; 463 464 switch (type) { 465 case MOD_LOAD: 466 if (vfc) 467 error = vfs_register(vfc); 468 break; 469 470 case MOD_UNLOAD: 471 if (vfc) 472 error = vfs_unregister(vfc); 473 break; 474 default: /* including MOD_SHUTDOWN */ 475 break; 476 } 477 return (error); 478} 479