1/* 2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1992, 1993 31 * The Regents of the University of California. All rights reserved. 32 * 33 * This code is derived from software contributed to Berkeley by 34 * John Heidemann of the UCLA Ficus project. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 65 * 66 * Ancestors: 67 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 68 * ...and... 69 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 70 */ 71 72/* 73 * Null Layer 74 * 75 * (See mount_null(8) for more information.) 76 * 77 * The null layer duplicates a portion of the file system 78 * name space under a new name. In this respect, it is 79 * similar to the loopback file system. It differs from 80 * the loopback fs in two respects: it is implemented using 81 * a stackable layers techniques, and it's "null-node"s stack above 82 * all lower-layer vnodes, not just over directory vnodes. 83 * 84 * The null layer has two purposes. First, it serves as a demonstration 85 * of layering by proving a layer which does nothing. (It actually 86 * does everything the loopback file system does, which is slightly 87 * more than nothing.) Second, the null layer can serve as a prototype 88 * layer. Since it provides all necessary layer framework, 89 * new file system layers can be created very easily be starting 90 * with a null layer. 91 * 92 * The remainder of this man page examines the null layer as a basis 93 * for constructing new layers. 94 * 95 * 96 * INSTANTIATING NEW NULL LAYERS 97 * 98 * New null layers are created with mount_null(8). 99 * Mount_null(8) takes two arguments, the pathname 100 * of the lower vfs (target-pn) and the pathname where the null 101 * layer will appear in the namespace (alias-pn). After 102 * the null layer is put into place, the contents 103 * of target-pn subtree will be aliased under alias-pn. 104 * 105 * 106 * OPERATION OF A NULL LAYER 107 * 108 * The null layer is the minimum file system layer, 109 * simply bypassing all possible operations to the lower layer 110 * for processing there. The majority of its activity centers 111 * on the bypass routine, though which nearly all vnode operations 112 * pass. 113 * 114 * The bypass routine accepts arbitrary vnode operations for 115 * handling by the lower layer. It begins by examing vnode 116 * operation arguments and replacing any null-nodes by their 117 * lower-layer equivlants. It then invokes the operation 118 * on the lower layer. Finally, it replaces the null-nodes 119 * in the arguments and, if a vnode is return by the operation, 120 * stacks a null-node on top of the returned vnode. 121 * 122 * Although bypass handles most operations, vnop_getattr, vnop_lock, 123 * vnop_unlock, vnop_inactive, vnop_reclaim, and vnop_print are not 124 * bypassed. Vop_getattr must change the fsid being returned. 125 * Vop_lock and vnop_unlock must handle any locking for the 126 * current vnode as well as pass the lock request down. 127 * Vop_inactive and vnop_reclaim are not bypassed so that 128 * they can handle freeing null-layer specific data. Vop_print 129 * is not bypassed to avoid excessive debugging information. 130 * Also, certain vnode operations change the locking state within 131 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 132 * and symlink). Ideally these operations should not change the 133 * lock state, but should be changed to let the caller of the 134 * function unlock them. Otherwise all intermediate vnode layers 135 * (such as union, umapfs, etc) must catch these functions to do 136 * the necessary locking at their layer. 137 * 138 * 139 * INSTANTIATING VNODE STACKS 140 * 141 * Mounting associates the null layer with a lower layer, 142 * effect stacking two VFSes. Vnode stacks are instead 143 * created on demand as files are accessed. 144 * 145 * The initial mount creates a single vnode stack for the 146 * root of the new null layer. All other vnode stacks 147 * are created as a result of vnode operations on 148 * this or other null vnode stacks. 149 * 150 * New vnode stacks come into existance as a result of 151 * an operation which returns a vnode. 152 * The bypass routine stacks a null-node above the new 153 * vnode before returning it to the caller. 154 * 155 * For example, imagine mounting a null layer with 156 * "mount_null /usr/include /dev/layer/null". 157 * Changing directory to /dev/layer/null will assign 158 * the root null-node (which was created when the null layer was mounted). 159 * Now consider opening "sys". A vnop_lookup would be 160 * done on the root null-node. This operation would bypass through 161 * to the lower layer which would return a vnode representing 162 * the UFS "sys". Null_bypass then builds a null-node 163 * aliasing the UFS "sys" and returns this to the caller. 164 * Later operations on the null-node "sys" will repeat this 165 * process when constructing other vnode stacks. 166 * 167 * 168 * CREATING OTHER FILE SYSTEM LAYERS 169 * 170 * One of the easiest ways to construct new file system layers is to make 171 * a copy of the null layer, rename all files and variables, and 172 * then begin modifing the copy. Sed can be used to easily rename 173 * all variables. 174 * 175 * The umap layer is an example of a layer descended from the 176 * null layer. 177 * 178 * 179 * INVOKING OPERATIONS ON LOWER LAYERS 180 * 181 * There are two techniques to invoke operations on a lower layer 182 * when the operation cannot be completely bypassed. Each method 183 * is appropriate in different situations. In both cases, 184 * it is the responsibility of the aliasing layer to make 185 * the operation arguments "correct" for the lower layer 186 * by mapping an vnode arguments to the lower layer. 187 * 188 * The first approach is to call the aliasing layer's bypass routine. 189 * This method is most suitable when you wish to invoke the operation 190 * currently being hanldled on the lower layer. It has the advantage 191 * that the bypass routine already must do argument mapping. 192 * An example of this is null_getattrs in the null layer. 193 * 194 * A second approach is to directly invoked vnode operations on 195 * the lower layer with the VOP_OPERATIONNAME interface. 196 * The advantage of this method is that it is easy to invoke 197 * arbitrary operations on the lower layer. The disadvantage 198 * is that vnodes arguments must be manualy mapped. 199 * 200 */ 201 202#include <sys/param.h> 203#include <sys/systm.h> 204#include <sys/proc.h> 205#include <sys/kauth.h> 206#include <sys/time.h> 207#include <sys/types.h> 208#include <sys/vnode.h> 209#include <sys/mount_internal.h> 210#include <sys/namei.h> 211#include <sys/malloc.h> 212#include <sys/buf.h> 213#include <miscfs/nullfs/null.h> 214 215 216int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 217 218/* 219 * This is the 10-Apr-92 bypass routine. 220 * This version has been optimized for speed, throwing away some 221 * safety checks. It should still always work, but it's not as 222 * robust to programmer errors. 223 * Define SAFETY to include some error checking code. 224 * 225 * In general, we map all vnodes going down and unmap them on the way back. 226 * As an exception to this, vnodes can be marked "unmapped" by setting 227 * the Nth bit in operation's vdesc_flags. 228 * 229 * Also, some BSD vnode operations have the side effect of node_put'ing 230 * their arguments. With stacking, the reference counts are held 231 * by the upper node, not the lower one, so we must handle these 232 * side-effects here. This is not of concern in Sun-derived systems 233 * since there are no such side-effects. 234 * 235 * This makes the following assumptions: 236 * - only one returned vpp 237 * - no INOUT vpp's (Sun's vnop_open has one of these) 238 * - the vnode operation vector of the first vnode should be used 239 * to determine what implementation of the op should be invoked 240 * - all mapped vnodes are of our vnode-type (NEEDSWORK: 241 * problems on rmdir'ing mount points and renaming?) 242 */ 243int 244null_bypass(ap) 245 struct vnop_generic_args /* { 246 struct vnodeop_desc *a_desc; 247 <other random data follows, presumably> 248 } */ *ap; 249{ 250 extern int (**null_vnodeop_p)(void *); /* not extern, really "forward" */ 251 register struct vnode **this_vp_p; 252 int error; 253 struct vnode *old_vps[VDESC_MAX_VPS]; 254 struct vnode **vps_p[VDESC_MAX_VPS]; 255 struct vnode ***vppp; 256 struct vnodeop_desc *descp = ap->a_desc; 257 int reles, i; 258 259 if (null_bug_bypass) 260 printf ("null_bypass: %s\n", descp->vdesc_name); 261 262#ifdef SAFETY 263 /* 264 * We require at least one vp. 265 */ 266 if (descp->vdesc_vp_offsets == NULL || 267 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 268 panic ("null_bypass: no vp's in map.\n"); 269#endif 270 271 /* 272 * Map the vnodes going in. 273 * Later, we'll invoke the operation based on 274 * the first mapped vnode's operation vector. 275 */ 276 reles = descp->vdesc_flags; 277 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 278 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 279 break; /* bail out at end of list */ 280 vps_p[i] = this_vp_p = 281 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 282 /* 283 * We're not guaranteed that any but the first vnode 284 * are of our type. Check for and don't map any 285 * that aren't. (We must always map first vp or vclean fails.) 286 */ 287 if (i && (*this_vp_p == NULL || 288 (*this_vp_p)->v_op != null_vnodeop_p)) { 289 old_vps[i] = NULL; 290 } else { 291 old_vps[i] = *this_vp_p; 292 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 293 /* 294 * XXX - Several operations have the side effect 295 * of vnode_put'ing their vp's. We must account for 296 * that. (This should go away in the future.) 297 */ 298 if (reles & 1) 299 vnode_get(*this_vp_p); 300 } 301 302 } 303 304 /* 305 * Call the operation on the lower layer 306 * with the modified argument structure. 307 */ 308 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 309 310 /* 311 * Maintain the illusion of call-by-value 312 * by restoring vnodes in the argument structure 313 * to their original value. 314 */ 315 reles = descp->vdesc_flags; 316 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 317 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 318 break; /* bail out at end of list */ 319 if (old_vps[i]) { 320 *(vps_p[i]) = old_vps[i]; 321 if (reles & 1) 322 vnode_put(*(vps_p[i])); 323 } 324 } 325 326 /* 327 * Map the possible out-going vpp 328 * (Assumes that the lower layer always returns 329 * a vnode_get'ed vpp unless it gets an error.) 330 */ 331 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 332 !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 333 !error) { 334 /* 335 * XXX - even though some ops have vpp returned vp's, 336 * several ops actually vnode_put this before returning. 337 * We must avoid these ops. 338 * (This should go away when these ops are regularized.) 339 */ 340 if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 341 goto out; 342 vppp = VOPARG_OFFSETTO(struct vnode***, 343 descp->vdesc_vpp_offset,ap); 344 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 345 } 346 347 out: 348 return (error); 349} 350 351/* 352 * We have to carry on the locking protocol on the null layer vnodes 353 * as we progress through the tree. We also have to enforce read-only 354 * if this layer is mounted read-only. 355 */ 356null_lookup(ap) 357 struct vnop_lookup_args /* { 358 struct vnode * a_dvp; 359 struct vnode ** a_vpp; 360 struct componentname * a_cnp; 361 vfs_context_t a_context; 362 } */ *ap; 363{ 364 struct componentname *cnp = ap->a_cnp; 365 struct proc *p = cnp->cn_proc; 366 int flags = cnp->cn_flags; 367 struct vnode *dvp, *vp; 368 int error; 369 370 error = null_bypass(ap); 371 372 /* 373 * We must do the same locking and unlocking at this layer as 374 * is done in the layers below us. We could figure this out 375 * based on the error return and the LASTCN, LOCKPARENT, and 376 * LOCKLEAF flags. However, it is more expidient to just find 377 * out the state of the lower level vnodes and set ours to the 378 * same state. 379 */ 380 dvp = ap->a_dvp; 381 vp = *ap->a_vpp; 382 if (dvp == vp) 383 return (error); 384 return (error); 385} 386 387/* 388 * Setattr call. 389 */ 390int 391null_setattr( 392 struct vnop_setattr_args /* { 393 struct vnodeop_desc *a_desc; 394 struct vnode *a_vp; 395 struct vnode_attr *a_vap; 396 kauth_cred_t a_cred; 397 struct proc *a_p; 398 } */ *ap) 399{ 400 struct vnode *vp = ap->a_vp; 401 struct vnode_attr *vap = ap->a_vap; 402 403 if (VATTR_IS_ACTIVE(vap, va_data_size)) { 404 switch (vp->v_type) { 405 case VDIR: 406 return (EISDIR); 407 case VCHR: 408 case VBLK: 409 case VSOCK: 410 case VFIFO: 411 return (0); 412 case VREG: 413 case VLNK: 414 default: 415 } 416 } 417 return (null_bypass(ap)); 418} 419 420/* 421 * We handle getattr only to change the fsid. 422 */ 423int 424null_getattr(ap) 425 struct vnop_getattr_args /* { 426 struct vnode *a_vp; 427 struct vnode_attr *a_vap; 428 vfs_context_t a_context; 429 } */ *ap; 430{ 431 int error; 432 433 if (error = null_bypass(ap)) 434 return (error); 435 /* Requires that arguments be restored. */ 436 VATTR_RETURN(ap->a_vap, va_fsid, ap->a_vp->v_mount->mnt_vfsstat.f_fsid.val[0]); 437 return (0); 438} 439 440int 441null_access(ap) 442 struct vnop_access_args /* { 443 struct vnode *a_vp; 444 int a_action; 445 vfs_context_t a_context; 446 } */ *ap; 447{ 448 return (null_bypass(ap)); 449} 450 451int 452null_inactive(ap) 453 struct vnop_inactive_args /* { 454 struct vnode *a_vp; 455 vfs_context_t a_context; 456 } */ *ap; 457{ 458 /* 459 * Do nothing (and _don't_ bypass). 460 * Wait to vnode_put lowervp until reclaim, 461 * so that until then our null_node is in the 462 * cache and reusable. 463 * 464 * NEEDSWORK: Someday, consider inactive'ing 465 * the lowervp and then trying to reactivate it 466 * with capabilities (v_id) 467 * like they do in the name lookup cache code. 468 * That's too much work for now. 469 */ 470 return (0); 471} 472 473int 474null_reclaim(ap) 475 struct vnop_reclaim_args /* { 476 struct vnode *a_vp; 477 vfs_context_t a_context; 478 } */ *ap; 479{ 480 struct vnode *vp = ap->a_vp; 481 struct null_node *xp = VTONULL(vp); 482 struct vnode *lowervp = xp->null_lowervp; 483 484 /* 485 * Note: in vnop_reclaim, vp->v_op == dead_vnodeop_p, 486 * so we can't call VOPs on ourself. 487 */ 488 /* After this assignment, this node will not be re-used. */ 489 xp->null_lowervp = NULL; 490 LIST_REMOVE(xp, null_hash); 491 FREE(vp->v_data, M_TEMP); 492 vp->v_data = NULL; 493 vnode_put (lowervp); 494 return (0); 495} 496 497/* 498 * XXX - vnop_strategy must be hand coded because it has no 499 * vnode in its arguments. 500 * This goes away with a merged VM/buffer cache. 501 */ 502int 503null_strategy(ap) 504 struct vnop_strategy_args /* { 505 struct buf *a_bp; 506 } */ *ap; 507{ 508 struct buf *bp = ap->a_bp; 509 int error; 510 struct vnode *savedvp; 511 512 savedvp = vnode(bp); 513 buf_setvnode(bp, NULLVPTOLOWERVP(savedvp)); 514 515 error = VNOP_STRATEGY(bp); 516 517 buf_setvnode(bp, savedvp); 518 519 return (error); 520} 521 522/* 523 * XXX - like vnop_strategy, vnop_bwrite must be hand coded because it has no 524 * vnode in its arguments. 525 * This goes away with a merged VM/buffer cache. 526 */ 527int 528null_bwrite(ap) 529 struct vnop_bwrite_args /* { 530 struct buf *a_bp; 531 } */ *ap; 532{ 533 struct buf *bp = ap->a_bp; 534 int error; 535 struct vnode *savedvp; 536 537 savedvp = buf_vnode(bp); 538 buf_setvnode(bp, NULLVPTOLOWERVP(savedvp)); 539 540 error = VNOP_BWRITE(bp); 541 542 buf_setvnode(bp, savedvp); 543 544 return (error); 545} 546 547/* 548 * Global vfs data structures 549 */ 550 551#define VOPFUNC int (*)(void *) 552 553int (**null_vnodeop_p)(void *); 554struct vnodeopv_entry_desc null_vnodeop_entries[] = { 555 { &vnop_default_desc, (VOPFUNC)null_bypass }, 556 557 { &vnop_lookup_desc, (VOPFUNC)null_lookup }, 558 { &vnop_setattr_desc, (VOPFUNC)null_setattr }, 559 { &vnop_getattr_desc, (VOPFUNC)null_getattr }, 560 { &vnop_access_desc, (VOPFUNC)null_access }, 561 { &vnop_inactive_desc, (VOPFUNC)null_inactive }, 562 { &vnop_reclaim_desc, (VOPFUNC)null_reclaim }, 563 564 { &vnop_strategy_desc, (VOPFUNC)null_strategy }, 565 { &vnop_bwrite_desc, (VOPFUNC)null_bwrite }, 566 567 { (struct vnodeop_desc*)NULL, (int(*)())NULL } 568}; 569struct vnodeopv_desc null_vnodeop_opv_desc = 570 { &null_vnodeop_p, null_vnodeop_entries }; 571