vfs_init.c revision 135279
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed 6 * to Berkeley by John Heidemann of the UCLA Ficus project. 7 * 8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_init.c 8.3 (Berkeley) 1/4/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/vfs_init.c 135279 2004-09-15 21:42:03Z phk $"); 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/mount.h> 44#include <sys/sysctl.h> 45#include <sys/vnode.h> 46#include <sys/malloc.h> 47 48 49MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 50 51/* 52 * The highest defined VFS number. 53 */ 54int maxvfsconf = VFS_GENERIC + 1; 55 56/* 57 * Single-linked list of configured VFSes. 58 * New entries are added/deleted by vfs_register()/vfs_unregister() 59 */ 60struct vfsconfhead vfsconf = TAILQ_HEAD_INITIALIZER(vfsconf); 61 62/* 63 * A Zen vnode attribute structure. 64 * 65 * Initialized when the first filesystem registers by vfs_register(). 66 */ 67struct vattr va_null; 68 69/* 70 * vfs_init.c 71 * 72 * Allocate and fill in operations vectors. 73 * 74 * An undocumented feature of this approach to defining operations is that 75 * there can be multiple entries in vfs_opv_descs for the same operations 76 * vector. This allows third parties to extend the set of operations 77 * supported by another layer in a binary compatibile way. For example, 78 * assume that NFS needed to be modified to support Ficus. NFS has an entry 79 * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by 80 * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions) 81 * listing those new operations Ficus adds to NFS, all without modifying the 82 * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but 83 * that is a(whole)nother story.) This is a feature. 84 */ 85 86/* Table of known vnodeop vectors (list of VFS vnode vectors) */ 87static const struct vnodeopv_desc **vnodeopv_descs; 88static int vnodeopv_num; 89 90/* Table of known descs (list of vnode op handlers "vop_access_desc") */ 91static struct vnodeop_desc **vfs_op_descs; 92/* Reference counts for vfs_op_descs */ 93static int *vfs_op_desc_refs; 94/* Number of descriptions */ 95static int num_op_descs; 96/* Number of entries in each description */ 97static int vfs_opv_numops = 64; 98 99/* Allow this number to be tuned at boot */ 100TUNABLE_INT("vfs.opv_numops", &vfs_opv_numops); 101SYSCTL_INT(_vfs, OID_AUTO, opv_numops, CTLFLAG_RDTUN, &vfs_opv_numops, 102 0, "Maximum number of operations in vop_t vector"); 103 104static int int_cmp(const void *a, const void *b); 105 106static int 107int_cmp(const void *a, const void *b) 108{ 109 return(*(const int *)a - *(const int *)b); 110} 111 112/* 113 * Recalculate the operations vector/description (those parts of it that can 114 * be recalculated, that is.) 115 * Always allocate operations vector large enough to hold vfs_opv_numops 116 * entries. The vector is never freed or deallocated once it is initialized, 117 * so that vnodes might safely reference it through their v_op pointer without 118 * vector changing suddenly from under them. 119 */ 120static void 121vfs_opv_recalc(void) 122{ 123 int i, j, k; 124 int *vfs_op_offsets; 125 vop_t ***opv_desc_vector_p; 126 vop_t **opv_desc_vector; 127 struct vnodeopv_entry_desc *opve_descp; 128 const struct vnodeopv_desc *opv; 129 130 if (vfs_op_descs == NULL) 131 panic("vfs_opv_recalc called with null vfs_op_descs"); 132 133 /* 134 * Allocate and initialize temporary array to store 135 * offsets. Sort it to put all uninitialized entries 136 * first and to make holes in existing offset sequence 137 * detectable. 138 */ 139 MALLOC(vfs_op_offsets, int *, 140 num_op_descs * sizeof(int), M_TEMP, M_WAITOK); 141 if (vfs_op_offsets == NULL) 142 panic("vfs_opv_recalc: no memory"); 143 for (i = 0; i < num_op_descs; i++) 144 vfs_op_offsets[i] = vfs_op_descs[i]->vdesc_offset; 145 qsort(vfs_op_offsets, num_op_descs, sizeof(int), int_cmp); 146 147 /* 148 * Run through and make sure all known descs have an offset. 149 * Use vfs_op_offsets to locate holes in offset sequence and 150 * reuse them. 151 * vop_default_desc is hardwired at offset 1, and offset 0 152 * is a panic sanity check. 153 */ 154 j = 1; k = 1; 155 for (i = 0; i < num_op_descs; i++) { 156 if (vfs_op_descs[i]->vdesc_offset != 0) 157 continue; 158 /* 159 * Look at two adjacent entries vfs_op_offsets[j - 1] and 160 * vfs_op_offsets[j] and see if we can fit a new offset 161 * number in between. If not, look at the next pair until 162 * hole is found or the end of the vfs_op_offsets vector is 163 * reached. j has been initialized to 1 above so that 164 * referencing (j-1)-th element is safe and the loop will 165 * never execute if num_op_descs is 1. For each new value s 166 * of i the j loop pick up from where previous iteration has 167 * left off. When the last hole has been consumed or if no 168 * hole has been found, we will start allocating new numbers 169 * starting from the biggest already available offset + 1. 170 */ 171 for (; j < num_op_descs; j++) { 172 if (vfs_op_offsets[j - 1] < k && vfs_op_offsets[j] > k) 173 break; 174 k = vfs_op_offsets[j] + 1; 175 } 176 vfs_op_descs[i]->vdesc_offset = k++; 177 } 178 FREE(vfs_op_offsets, M_TEMP); 179 180 /* Panic if new vops will cause vector overflow */ 181 if (k > vfs_opv_numops) 182 panic("VFS: Ran out of vop_t vector entries. %d entries required, only %d available.\n", k, vfs_opv_numops); 183 184 /* 185 * Allocate and fill in the vectors 186 */ 187 for (i = 0; i < vnodeopv_num; i++) { 188 opv = vnodeopv_descs[i]; 189 opv_desc_vector_p = opv->opv_desc_vector_p; 190 if (*opv_desc_vector_p == NULL) 191 MALLOC(*opv_desc_vector_p, vop_t **, 192 vfs_opv_numops * sizeof(vop_t *), M_VNODE, 193 M_WAITOK | M_ZERO); 194 195 /* Fill in, with slot 0 being to return EOPNOTSUPP */ 196 opv_desc_vector = *opv_desc_vector_p; 197 opv_desc_vector[0] = (vop_t *)vop_eopnotsupp; 198 for (j = 0; opv->opv_desc_ops[j].opve_op; j++) { 199 opve_descp = &(opv->opv_desc_ops[j]); 200 opv_desc_vector[opve_descp->opve_op->vdesc_offset] = 201 opve_descp->opve_impl; 202 } 203 204 /* Replace unfilled routines with their default (slot 1). */ 205 opv_desc_vector = *(opv->opv_desc_vector_p); 206 if (opv_desc_vector[1] == NULL) 207 panic("vfs_opv_recalc: vector without a default."); 208 for (j = 0; j < vfs_opv_numops; j++) 209 if (opv_desc_vector[j] == NULL) 210 opv_desc_vector[j] = opv_desc_vector[1]; 211 } 212} 213 214/* Add a set of vnode operations (a description) to the table above. */ 215void 216vfs_add_vnodeops(const void *data) 217{ 218 const struct vnodeopv_desc *opv; 219 const struct vnodeopv_desc **newopv; 220 struct vnodeop_desc **newop; 221 int *newref; 222 struct vnodeop_desc *desc; 223 int i, j; 224 225 opv = (const struct vnodeopv_desc *)data; 226 MALLOC(newopv, const struct vnodeopv_desc **, 227 (vnodeopv_num + 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 228 if (vnodeopv_descs) { 229 bcopy(vnodeopv_descs, newopv, vnodeopv_num * sizeof(*newopv)); 230 FREE(vnodeopv_descs, M_VNODE); 231 } 232 newopv[vnodeopv_num] = opv; 233 vnodeopv_descs = newopv; 234 vnodeopv_num++; 235 236 /* See if we have turned up a new vnode op desc */ 237 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 238 for (j = 0; j < num_op_descs; j++) { 239 if (desc == vfs_op_descs[j]) { 240 /* found it, increase reference count */ 241 vfs_op_desc_refs[j]++; 242 break; 243 } 244 } 245 if (j == num_op_descs) { 246 /* not found, new entry */ 247 MALLOC(newop, struct vnodeop_desc **, 248 (num_op_descs + 1) * sizeof(*newop), 249 M_VNODE, M_WAITOK); 250 /* new reference count (for unload) */ 251 MALLOC(newref, int *, 252 (num_op_descs + 1) * sizeof(*newref), 253 M_VNODE, M_WAITOK); 254 if (vfs_op_descs) { 255 bcopy(vfs_op_descs, newop, 256 num_op_descs * sizeof(*newop)); 257 FREE(vfs_op_descs, M_VNODE); 258 } 259 if (vfs_op_desc_refs) { 260 bcopy(vfs_op_desc_refs, newref, 261 num_op_descs * sizeof(*newref)); 262 FREE(vfs_op_desc_refs, M_VNODE); 263 } 264 newop[num_op_descs] = desc; 265 newref[num_op_descs] = 1; 266 vfs_op_descs = newop; 267 vfs_op_desc_refs = newref; 268 num_op_descs++; 269 } 270 } 271 vfs_opv_recalc(); 272} 273 274/* Remove a vnode type from the vnode description table above. */ 275void 276vfs_rm_vnodeops(const void *data) 277{ 278 const struct vnodeopv_desc *opv; 279 const struct vnodeopv_desc **newopv; 280 struct vnodeop_desc **newop; 281 int *newref; 282 vop_t **opv_desc_vector; 283 struct vnodeop_desc *desc; 284 int i, j, k; 285 286 opv = (const struct vnodeopv_desc *)data; 287 /* Lower ref counts on descs in the table and release if zero */ 288 for (i = 0; (desc = opv->opv_desc_ops[i].opve_op); i++) { 289 for (j = 0; j < num_op_descs; j++) { 290 if (desc == vfs_op_descs[j]) { 291 /* found it, decrease reference count */ 292 vfs_op_desc_refs[j]--; 293 break; 294 } 295 } 296 for (j = 0; j < num_op_descs; j++) { 297 if (vfs_op_desc_refs[j] > 0) 298 continue; 299 if (vfs_op_desc_refs[j] < 0) 300 panic("vfs_remove_vnodeops: negative refcnt"); 301 /* Entry is going away - replace it with defaultop */ 302 for (k = 0; k < vnodeopv_num; k++) { 303 opv_desc_vector = 304 *(vnodeopv_descs[k]->opv_desc_vector_p); 305 if (opv_desc_vector != NULL) 306 opv_desc_vector[desc->vdesc_offset] = 307 opv_desc_vector[1]; 308 } 309 MALLOC(newop, struct vnodeop_desc **, 310 (num_op_descs - 1) * sizeof(*newop), 311 M_VNODE, M_WAITOK); 312 /* new reference count (for unload) */ 313 MALLOC(newref, int *, 314 (num_op_descs - 1) * sizeof(*newref), 315 M_VNODE, M_WAITOK); 316 for (k = j; k < (num_op_descs - 1); k++) { 317 vfs_op_descs[k] = vfs_op_descs[k + 1]; 318 vfs_op_desc_refs[k] = vfs_op_desc_refs[k + 1]; 319 } 320 bcopy(vfs_op_descs, newop, 321 (num_op_descs - 1) * sizeof(*newop)); 322 bcopy(vfs_op_desc_refs, newref, 323 (num_op_descs - 1) * sizeof(*newref)); 324 FREE(vfs_op_descs, M_VNODE); 325 FREE(vfs_op_desc_refs, M_VNODE); 326 vfs_op_descs = newop; 327 vfs_op_desc_refs = newref; 328 num_op_descs--; 329 } 330 } 331 332 for (i = 0; i < vnodeopv_num; i++) { 333 if (vnodeopv_descs[i] == opv) { 334 for (j = i; j < (vnodeopv_num - 1); j++) 335 vnodeopv_descs[j] = vnodeopv_descs[j + 1]; 336 break; 337 } 338 } 339 if (i == vnodeopv_num) 340 panic("vfs_remove_vnodeops: opv not found"); 341 opv_desc_vector = *(opv->opv_desc_vector_p); 342 if (opv_desc_vector != NULL) 343 FREE(opv_desc_vector, M_VNODE); 344 MALLOC(newopv, const struct vnodeopv_desc **, 345 (vnodeopv_num - 1) * sizeof(*newopv), M_VNODE, M_WAITOK); 346 bcopy(vnodeopv_descs, newopv, (vnodeopv_num - 1) * sizeof(*newopv)); 347 FREE(vnodeopv_descs, M_VNODE); 348 vnodeopv_descs = newopv; 349 vnodeopv_num--; 350 351 vfs_opv_recalc(); 352} 353 354/* 355 * Routines having to do with the management of the vnode table. 356 */ 357 358struct vfsconf * 359vfs_byname(const char *name) 360{ 361 struct vfsconf *vfsp; 362 363 TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) 364 if (!strcmp(name, vfsp->vfc_name)) 365 return (vfsp); 366 return (NULL); 367} 368 369/* Register a new filesystem type in the global table */ 370int 371vfs_register(struct vfsconf *vfc) 372{ 373 struct sysctl_oid *oidp; 374 struct vfsops *vfsops; 375 static int once; 376 377 if (!once) { 378 vattr_null(&va_null); 379 once = 1; 380 } 381 382 if (vfc->vfc_version != VFS_VERSION) { 383 printf("ERROR: filesystem %s, unsupported ABI version %x\n", 384 vfc->vfc_name, vfc->vfc_version); 385 return (EINVAL); 386 } 387 if (vfs_byname(vfc->vfc_name) != NULL) 388 return EEXIST; 389 390 vfc->vfc_typenum = maxvfsconf++; 391 TAILQ_INSERT_TAIL(&vfsconf, vfc, vfc_list); 392 393 /* 394 * If this filesystem has a sysctl node under vfs 395 * (i.e. vfs.xxfs), then change the oid number of that node to 396 * match the filesystem's type number. This allows user code 397 * which uses the type number to read sysctl variables defined 398 * by the filesystem to continue working. Since the oids are 399 * in a sorted list, we need to make sure the order is 400 * preserved by re-registering the oid after modifying its 401 * number. 402 */ 403 SLIST_FOREACH(oidp, &sysctl__vfs_children, oid_link) 404 if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) { 405 sysctl_unregister_oid(oidp); 406 oidp->oid_number = vfc->vfc_typenum; 407 sysctl_register_oid(oidp); 408 } 409 410 /* 411 * Initialise unused ``struct vfsops'' fields, to use 412 * the vfs_std*() functions. Note, we need the mount 413 * and unmount operations, at the least. The check 414 * for vfsops available is just a debugging aid. 415 */ 416 KASSERT(vfc->vfc_vfsops != NULL, 417 ("Filesystem %s has no vfsops", vfc->vfc_name)); 418 /* 419 * Check the mount and unmount operations. 420 */ 421 vfsops = vfc->vfc_vfsops; 422 KASSERT(vfsops->vfs_mount != NULL || vfsops->vfs_omount != NULL, 423 ("Filesystem %s has no (o)mount op", vfc->vfc_name)); 424 KASSERT(vfsops->vfs_unmount != NULL, 425 ("Filesystem %s has no unmount op", vfc->vfc_name)); 426 427 if (vfsops->vfs_start == NULL) 428 /* make a file system operational */ 429 vfsops->vfs_start = vfs_stdstart; 430 if (vfsops->vfs_root == NULL) 431 /* return file system's root vnode */ 432 vfsops->vfs_root = vfs_stdroot; 433 if (vfsops->vfs_quotactl == NULL) 434 /* quota control */ 435 vfsops->vfs_quotactl = vfs_stdquotactl; 436 if (vfsops->vfs_statfs == NULL) 437 /* return file system's status */ 438 vfsops->vfs_statfs = vfs_stdstatfs; 439 if (vfsops->vfs_sync == NULL) 440 /* 441 * flush unwritten data (nosync) 442 * file systems can use vfs_stdsync 443 * explicitly by setting it in the 444 * vfsop vector. 445 */ 446 vfsops->vfs_sync = vfs_stdnosync; 447 if (vfsops->vfs_vget == NULL) 448 /* convert an inode number to a vnode */ 449 vfsops->vfs_vget = vfs_stdvget; 450 if (vfsops->vfs_fhtovp == NULL) 451 /* turn an NFS file handle into a vnode */ 452 vfsops->vfs_fhtovp = vfs_stdfhtovp; 453 if (vfsops->vfs_checkexp == NULL) 454 /* check if file system is exported */ 455 vfsops->vfs_checkexp = vfs_stdcheckexp; 456 if (vfsops->vfs_vptofh == NULL) 457 /* turn a vnode into an NFS file handle */ 458 vfsops->vfs_vptofh = vfs_stdvptofh; 459 if (vfsops->vfs_init == NULL) 460 /* file system specific initialisation */ 461 vfsops->vfs_init = vfs_stdinit; 462 if (vfsops->vfs_uninit == NULL) 463 /* file system specific uninitialisation */ 464 vfsops->vfs_uninit = vfs_stduninit; 465 if (vfsops->vfs_extattrctl == NULL) 466 /* extended attribute control */ 467 vfsops->vfs_extattrctl = vfs_stdextattrctl; 468 if (vfsops->vfs_sysctl == NULL) 469 vfsops->vfs_sysctl = vfs_stdsysctl; 470 471 /* 472 * Call init function for this VFS... 473 */ 474 (*(vfc->vfc_vfsops->vfs_init))(vfc); 475 476 return 0; 477} 478 479 480/* Remove registration of a filesystem type */ 481int 482vfs_unregister(struct vfsconf *vfc) 483{ 484 struct vfsconf *vfsp; 485 int error, i, maxtypenum; 486 487 i = vfc->vfc_typenum; 488 489 vfsp = vfs_byname(vfc->vfc_name); 490 if (vfsp == NULL) 491 return EINVAL; 492 if (vfsp->vfc_refcount) 493 return EBUSY; 494 if (vfc->vfc_vfsops->vfs_uninit != NULL) { 495 error = (*vfc->vfc_vfsops->vfs_uninit)(vfsp); 496 if (error) 497 return (error); 498 } 499 TAILQ_REMOVE(&vfsconf, vfsp, vfc_list); 500 maxtypenum = VFS_GENERIC; 501 TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) 502 if (maxtypenum < vfsp->vfc_typenum) 503 maxtypenum = vfsp->vfc_typenum; 504 maxvfsconf = maxtypenum + 1; 505 return 0; 506} 507 508/* 509 * Standard kernel module handling code for filesystem modules. 510 * Referenced from VFS_SET(). 511 */ 512int 513vfs_modevent(module_t mod, int type, void *data) 514{ 515 struct vfsconf *vfc; 516 int error = 0; 517 518 vfc = (struct vfsconf *)data; 519 520 switch (type) { 521 case MOD_LOAD: 522 if (vfc) 523 error = vfs_register(vfc); 524 break; 525 526 case MOD_UNLOAD: 527 if (vfc) 528 error = vfs_unregister(vfc); 529 break; 530 default: 531 error = EOPNOTSUPP; 532 break; 533 } 534 return (error); 535} 536