null_vnops.c revision 108470
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1992, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * John Heidemann of the UCLA Ficus project. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 3622521Sdyson * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 371541Srgrimes * 3822521Sdyson * Ancestors: 3922521Sdyson * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 4022521Sdyson * ...and... 4122521Sdyson * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 4222521Sdyson * 4350477Speter * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 108470 2002-12-30 21:18:15Z schweikh $ 441541Srgrimes */ 451541Srgrimes 461541Srgrimes/* 471541Srgrimes * Null Layer 481541Srgrimes * 4977130Sru * (See mount_nullfs(8) for more information.) 501541Srgrimes * 5196755Strhodes * The null layer duplicates a portion of the filesystem 521541Srgrimes * name space under a new name. In this respect, it is 5396755Strhodes * similar to the loopback filesystem. It differs from 541541Srgrimes * the loopback fs in two respects: it is implemented using 5535256Sdes * a stackable layers techniques, and its "null-node"s stack above 561541Srgrimes * all lower-layer vnodes, not just over directory vnodes. 571541Srgrimes * 581541Srgrimes * The null layer has two purposes. First, it serves as a demonstration 591541Srgrimes * of layering by proving a layer which does nothing. (It actually 6096755Strhodes * does everything the loopback filesystem does, which is slightly 611541Srgrimes * more than nothing.) Second, the null layer can serve as a prototype 621541Srgrimes * layer. Since it provides all necessary layer framework, 6396755Strhodes * new filesystem layers can be created very easily be starting 641541Srgrimes * with a null layer. 651541Srgrimes * 661541Srgrimes * The remainder of this man page examines the null layer as a basis 671541Srgrimes * for constructing new layers. 681541Srgrimes * 691541Srgrimes * 701541Srgrimes * INSTANTIATING NEW NULL LAYERS 711541Srgrimes * 7277130Sru * New null layers are created with mount_nullfs(8). 7377130Sru * Mount_nullfs(8) takes two arguments, the pathname 741541Srgrimes * of the lower vfs (target-pn) and the pathname where the null 751541Srgrimes * layer will appear in the namespace (alias-pn). After 761541Srgrimes * the null layer is put into place, the contents 771541Srgrimes * of target-pn subtree will be aliased under alias-pn. 781541Srgrimes * 791541Srgrimes * 801541Srgrimes * OPERATION OF A NULL LAYER 811541Srgrimes * 8296755Strhodes * The null layer is the minimum filesystem layer, 831541Srgrimes * simply bypassing all possible operations to the lower layer 841541Srgrimes * for processing there. The majority of its activity centers 8526963Salex * on the bypass routine, through which nearly all vnode operations 861541Srgrimes * pass. 871541Srgrimes * 881541Srgrimes * The bypass routine accepts arbitrary vnode operations for 891541Srgrimes * handling by the lower layer. It begins by examing vnode 901541Srgrimes * operation arguments and replacing any null-nodes by their 911541Srgrimes * lower-layer equivlants. It then invokes the operation 921541Srgrimes * on the lower layer. Finally, it replaces the null-nodes 931541Srgrimes * in the arguments and, if a vnode is return by the operation, 941541Srgrimes * stacks a null-node on top of the returned vnode. 951541Srgrimes * 9622521Sdyson * Although bypass handles most operations, vop_getattr, vop_lock, 9722521Sdyson * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not 9822521Sdyson * bypassed. Vop_getattr must change the fsid being returned. 9922521Sdyson * Vop_lock and vop_unlock must handle any locking for the 10022521Sdyson * current vnode as well as pass the lock request down. 1011541Srgrimes * Vop_inactive and vop_reclaim are not bypassed so that 10222521Sdyson * they can handle freeing null-layer specific data. Vop_print 10322521Sdyson * is not bypassed to avoid excessive debugging information. 10422521Sdyson * Also, certain vnode operations change the locking state within 10522521Sdyson * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 10622521Sdyson * and symlink). Ideally these operations should not change the 10722521Sdyson * lock state, but should be changed to let the caller of the 10822521Sdyson * function unlock them. Otherwise all intermediate vnode layers 10922521Sdyson * (such as union, umapfs, etc) must catch these functions to do 11022521Sdyson * the necessary locking at their layer. 1111541Srgrimes * 1121541Srgrimes * 1131541Srgrimes * INSTANTIATING VNODE STACKS 1141541Srgrimes * 1151541Srgrimes * Mounting associates the null layer with a lower layer, 1161541Srgrimes * effect stacking two VFSes. Vnode stacks are instead 1171541Srgrimes * created on demand as files are accessed. 1181541Srgrimes * 1191541Srgrimes * The initial mount creates a single vnode stack for the 1201541Srgrimes * root of the new null layer. All other vnode stacks 1211541Srgrimes * are created as a result of vnode operations on 1221541Srgrimes * this or other null vnode stacks. 1231541Srgrimes * 1241541Srgrimes * New vnode stacks come into existance as a result of 1258876Srgrimes * an operation which returns a vnode. 1261541Srgrimes * The bypass routine stacks a null-node above the new 1271541Srgrimes * vnode before returning it to the caller. 1281541Srgrimes * 1291541Srgrimes * For example, imagine mounting a null layer with 13077130Sru * "mount_nullfs /usr/include /dev/layer/null". 1311541Srgrimes * Changing directory to /dev/layer/null will assign 1321541Srgrimes * the root null-node (which was created when the null layer was mounted). 1331541Srgrimes * Now consider opening "sys". A vop_lookup would be 1341541Srgrimes * done on the root null-node. This operation would bypass through 1358876Srgrimes * to the lower layer which would return a vnode representing 1361541Srgrimes * the UFS "sys". Null_bypass then builds a null-node 1371541Srgrimes * aliasing the UFS "sys" and returns this to the caller. 1381541Srgrimes * Later operations on the null-node "sys" will repeat this 1391541Srgrimes * process when constructing other vnode stacks. 1401541Srgrimes * 1411541Srgrimes * 1421541Srgrimes * CREATING OTHER FILE SYSTEM LAYERS 1431541Srgrimes * 14496755Strhodes * One of the easiest ways to construct new filesystem layers is to make 1451541Srgrimes * a copy of the null layer, rename all files and variables, and 1461541Srgrimes * then begin modifing the copy. Sed can be used to easily rename 1471541Srgrimes * all variables. 1481541Srgrimes * 1498876Srgrimes * The umap layer is an example of a layer descended from the 1501541Srgrimes * null layer. 1511541Srgrimes * 1521541Srgrimes * 1531541Srgrimes * INVOKING OPERATIONS ON LOWER LAYERS 1541541Srgrimes * 1558876Srgrimes * There are two techniques to invoke operations on a lower layer 1561541Srgrimes * when the operation cannot be completely bypassed. Each method 1571541Srgrimes * is appropriate in different situations. In both cases, 1581541Srgrimes * it is the responsibility of the aliasing layer to make 1591541Srgrimes * the operation arguments "correct" for the lower layer 160108470Sschweikh * by mapping a vnode arguments to the lower layer. 1611541Srgrimes * 1621541Srgrimes * The first approach is to call the aliasing layer's bypass routine. 1631541Srgrimes * This method is most suitable when you wish to invoke the operation 16426964Salex * currently being handled on the lower layer. It has the advantage 1651541Srgrimes * that the bypass routine already must do argument mapping. 1661541Srgrimes * An example of this is null_getattrs in the null layer. 1671541Srgrimes * 16826964Salex * A second approach is to directly invoke vnode operations on 1691541Srgrimes * the lower layer with the VOP_OPERATIONNAME interface. 1701541Srgrimes * The advantage of this method is that it is easy to invoke 1711541Srgrimes * arbitrary operations on the lower layer. The disadvantage 17226964Salex * is that vnode arguments must be manualy mapped. 1731541Srgrimes * 1741541Srgrimes */ 1751541Srgrimes 1761541Srgrimes#include <sys/param.h> 1771541Srgrimes#include <sys/systm.h> 17876166Smarkm#include <sys/conf.h> 1792960Swollman#include <sys/kernel.h> 18076166Smarkm#include <sys/lock.h> 18176166Smarkm#include <sys/malloc.h> 18276166Smarkm#include <sys/mount.h> 18376166Smarkm#include <sys/mutex.h> 18476166Smarkm#include <sys/namei.h> 18512769Sphk#include <sys/sysctl.h> 1861541Srgrimes#include <sys/vnode.h> 18776166Smarkm 18877031Sru#include <fs/nullfs/null.h> 1891541Srgrimes 19066356Sbp#include <vm/vm.h> 19166356Sbp#include <vm/vm_extern.h> 19266356Sbp#include <vm/vm_object.h> 19366356Sbp#include <vm/vnode_pager.h> 19466356Sbp 19512769Sphkstatic int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 19612769SphkSYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 19712769Sphk &null_bug_bypass, 0, ""); 1981541Srgrimes 19965464Sbpstatic int null_access(struct vop_access_args *ap); 20066356Sbpstatic int null_createvobject(struct vop_createvobject_args *ap); 20166356Sbpstatic int null_destroyvobject(struct vop_destroyvobject_args *ap); 20265464Sbpstatic int null_getattr(struct vop_getattr_args *ap); 20366356Sbpstatic int null_getvobject(struct vop_getvobject_args *ap); 20465464Sbpstatic int null_inactive(struct vop_inactive_args *ap); 20566356Sbpstatic int null_islocked(struct vop_islocked_args *ap); 20665464Sbpstatic int null_lock(struct vop_lock_args *ap); 20765464Sbpstatic int null_lookup(struct vop_lookup_args *ap); 20865467Sbpstatic int null_open(struct vop_open_args *ap); 20965464Sbpstatic int null_print(struct vop_print_args *ap); 21065464Sbpstatic int null_reclaim(struct vop_reclaim_args *ap); 21165467Sbpstatic int null_rename(struct vop_rename_args *ap); 21265464Sbpstatic int null_setattr(struct vop_setattr_args *ap); 21365464Sbpstatic int null_unlock(struct vop_unlock_args *ap); 21412595Sbde 2151541Srgrimes/* 2161541Srgrimes * This is the 10-Apr-92 bypass routine. 2171541Srgrimes * This version has been optimized for speed, throwing away some 2181541Srgrimes * safety checks. It should still always work, but it's not as 2191541Srgrimes * robust to programmer errors. 2201541Srgrimes * 2211541Srgrimes * In general, we map all vnodes going down and unmap them on the way back. 2221541Srgrimes * As an exception to this, vnodes can be marked "unmapped" by setting 2231541Srgrimes * the Nth bit in operation's vdesc_flags. 2241541Srgrimes * 2251541Srgrimes * Also, some BSD vnode operations have the side effect of vrele'ing 2261541Srgrimes * their arguments. With stacking, the reference counts are held 2271541Srgrimes * by the upper node, not the lower one, so we must handle these 2281541Srgrimes * side-effects here. This is not of concern in Sun-derived systems 2291541Srgrimes * since there are no such side-effects. 2301541Srgrimes * 2311541Srgrimes * This makes the following assumptions: 2321541Srgrimes * - only one returned vpp 2331541Srgrimes * - no INOUT vpp's (Sun's vop_open has one of these) 2341541Srgrimes * - the vnode operation vector of the first vnode should be used 2351541Srgrimes * to determine what implementation of the op should be invoked 2361541Srgrimes * - all mapped vnodes are of our vnode-type (NEEDSWORK: 2371541Srgrimes * problems on rmdir'ing mount points and renaming?) 2388876Srgrimes */ 23922521Sdysonint 2401541Srgrimesnull_bypass(ap) 2411541Srgrimes struct vop_generic_args /* { 2421541Srgrimes struct vnodeop_desc *a_desc; 2431541Srgrimes <other random data follows, presumably> 2441541Srgrimes } */ *ap; 2451541Srgrimes{ 2461541Srgrimes register struct vnode **this_vp_p; 2471541Srgrimes int error; 2481541Srgrimes struct vnode *old_vps[VDESC_MAX_VPS]; 2491541Srgrimes struct vnode **vps_p[VDESC_MAX_VPS]; 2501541Srgrimes struct vnode ***vppp; 2511541Srgrimes struct vnodeop_desc *descp = ap->a_desc; 2521541Srgrimes int reles, i; 2531541Srgrimes 2541541Srgrimes if (null_bug_bypass) 2551541Srgrimes printf ("null_bypass: %s\n", descp->vdesc_name); 2561541Srgrimes 25750616Sbde#ifdef DIAGNOSTIC 2581541Srgrimes /* 2591541Srgrimes * We require at least one vp. 2601541Srgrimes */ 2611541Srgrimes if (descp->vdesc_vp_offsets == NULL || 2621541Srgrimes descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 26350616Sbde panic ("null_bypass: no vp's in map"); 2641541Srgrimes#endif 2651541Srgrimes 2661541Srgrimes /* 2671541Srgrimes * Map the vnodes going in. 2681541Srgrimes * Later, we'll invoke the operation based on 2691541Srgrimes * the first mapped vnode's operation vector. 2701541Srgrimes */ 2711541Srgrimes reles = descp->vdesc_flags; 2721541Srgrimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 2731541Srgrimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 2741541Srgrimes break; /* bail out at end of list */ 2758876Srgrimes vps_p[i] = this_vp_p = 2761541Srgrimes VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 2771541Srgrimes /* 2781541Srgrimes * We're not guaranteed that any but the first vnode 2791541Srgrimes * are of our type. Check for and don't map any 2801541Srgrimes * that aren't. (We must always map first vp or vclean fails.) 2811541Srgrimes */ 28224987Skato if (i && (*this_vp_p == NULLVP || 28322521Sdyson (*this_vp_p)->v_op != null_vnodeop_p)) { 28424987Skato old_vps[i] = NULLVP; 2851541Srgrimes } else { 2861541Srgrimes old_vps[i] = *this_vp_p; 2871541Srgrimes *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 2881541Srgrimes /* 2891541Srgrimes * XXX - Several operations have the side effect 2901541Srgrimes * of vrele'ing their vp's. We must account for 2911541Srgrimes * that. (This should go away in the future.) 2921541Srgrimes */ 29366356Sbp if (reles & VDESC_VP0_WILLRELE) 2941541Srgrimes VREF(*this_vp_p); 2951541Srgrimes } 2968876Srgrimes 2971541Srgrimes } 2981541Srgrimes 2991541Srgrimes /* 3001541Srgrimes * Call the operation on the lower layer 3011541Srgrimes * with the modified argument structure. 3021541Srgrimes */ 30366356Sbp if (vps_p[0] && *vps_p[0]) 30466356Sbp error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 30566356Sbp else { 30666356Sbp printf("null_bypass: no map for %s\n", descp->vdesc_name); 30766356Sbp error = EINVAL; 30866356Sbp } 3091541Srgrimes 3101541Srgrimes /* 3111541Srgrimes * Maintain the illusion of call-by-value 3121541Srgrimes * by restoring vnodes in the argument structure 3131541Srgrimes * to their original value. 3141541Srgrimes */ 3151541Srgrimes reles = descp->vdesc_flags; 3161541Srgrimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 3171541Srgrimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 3181541Srgrimes break; /* bail out at end of list */ 3191541Srgrimes if (old_vps[i]) { 3201541Srgrimes *(vps_p[i]) = old_vps[i]; 32166356Sbp#if 0 32266356Sbp if (reles & VDESC_VP0_WILLUNLOCK) 32383366Sjulian VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curthread); 32466356Sbp#endif 32566356Sbp if (reles & VDESC_VP0_WILLRELE) 3261541Srgrimes vrele(*(vps_p[i])); 3271541Srgrimes } 3281541Srgrimes } 3291541Srgrimes 3301541Srgrimes /* 3311541Srgrimes * Map the possible out-going vpp 3321541Srgrimes * (Assumes that the lower layer always returns 3331541Srgrimes * a VREF'ed vpp unless it gets an error.) 3341541Srgrimes */ 3351541Srgrimes if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 3361541Srgrimes !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 3371541Srgrimes !error) { 3381541Srgrimes /* 3391541Srgrimes * XXX - even though some ops have vpp returned vp's, 3401541Srgrimes * several ops actually vrele this before returning. 3411541Srgrimes * We must avoid these ops. 3421541Srgrimes * (This should go away when these ops are regularized.) 3431541Srgrimes */ 3441541Srgrimes if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 3451541Srgrimes goto out; 3461541Srgrimes vppp = VOPARG_OFFSETTO(struct vnode***, 3471541Srgrimes descp->vdesc_vpp_offset,ap); 34829584Sphk if (*vppp) 34998183Ssemenu error = null_nodeget(old_vps[0]->v_mount, **vppp, *vppp); 3501541Srgrimes } 3511541Srgrimes 3521541Srgrimes out: 3531541Srgrimes return (error); 3541541Srgrimes} 3551541Srgrimes 35622521Sdyson/* 35722521Sdyson * We have to carry on the locking protocol on the null layer vnodes 35822521Sdyson * as we progress through the tree. We also have to enforce read-only 35922521Sdyson * if this layer is mounted read-only. 36022521Sdyson */ 36122521Sdysonstatic int 36222521Sdysonnull_lookup(ap) 36322521Sdyson struct vop_lookup_args /* { 36422521Sdyson struct vnode * a_dvp; 36522521Sdyson struct vnode ** a_vpp; 36622521Sdyson struct componentname * a_cnp; 36722521Sdyson } */ *ap; 36822521Sdyson{ 36922521Sdyson struct componentname *cnp = ap->a_cnp; 37066356Sbp struct vnode *dvp = ap->a_dvp; 37183366Sjulian struct thread *td = cnp->cn_thread; 37222521Sdyson int flags = cnp->cn_flags; 37366356Sbp struct vnode *vp, *ldvp, *lvp; 37422521Sdyson int error; 3751541Srgrimes 37666356Sbp if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 37722521Sdyson (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 37822521Sdyson return (EROFS); 37966356Sbp /* 38066356Sbp * Although it is possible to call null_bypass(), we'll do 38166356Sbp * a direct call to reduce overhead 38266356Sbp */ 38366356Sbp ldvp = NULLVPTOLOWERVP(dvp); 38466356Sbp vp = lvp = NULL; 38566356Sbp error = VOP_LOOKUP(ldvp, &lvp, cnp); 38622521Sdyson if (error == EJUSTRETURN && (flags & ISLASTCN) && 38766356Sbp (dvp->v_mount->mnt_flag & MNT_RDONLY) && 38822521Sdyson (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) 38922521Sdyson error = EROFS; 39066356Sbp 39122521Sdyson /* 39266356Sbp * Rely only on the PDIRUNLOCK flag which should be carefully 39366356Sbp * tracked by underlying filesystem. 39422521Sdyson */ 39566356Sbp if (cnp->cn_flags & PDIRUNLOCK) 39683366Sjulian VOP_UNLOCK(dvp, LK_THISLAYER, td); 39766356Sbp if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { 39866356Sbp if (ldvp == lvp) { 39966356Sbp *ap->a_vpp = dvp; 40066356Sbp VREF(dvp); 40166356Sbp vrele(lvp); 40266356Sbp } else { 40398183Ssemenu error = null_nodeget(dvp->v_mount, lvp, &vp); 40498183Ssemenu if (error) { 40598183Ssemenu /* XXX Cleanup needed... */ 40698183Ssemenu panic("null_nodeget failed"); 40798183Ssemenu } 40898183Ssemenu *ap->a_vpp = vp; 40966356Sbp } 41022521Sdyson } 41122521Sdyson return (error); 41222521Sdyson} 41322521Sdyson 4141541Srgrimes/* 41522521Sdyson * Setattr call. Disallow write attempts if the layer is mounted read-only. 41622521Sdyson */ 417105211Sphkstatic int 41822521Sdysonnull_setattr(ap) 41922521Sdyson struct vop_setattr_args /* { 42022521Sdyson struct vnodeop_desc *a_desc; 42122521Sdyson struct vnode *a_vp; 42222521Sdyson struct vattr *a_vap; 42322521Sdyson struct ucred *a_cred; 42483366Sjulian struct thread *a_td; 42522521Sdyson } */ *ap; 42622521Sdyson{ 42722521Sdyson struct vnode *vp = ap->a_vp; 42822521Sdyson struct vattr *vap = ap->a_vap; 42922521Sdyson 43022521Sdyson if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 43122597Smpp vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 43222597Smpp vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 43322521Sdyson (vp->v_mount->mnt_flag & MNT_RDONLY)) 43422521Sdyson return (EROFS); 43522521Sdyson if (vap->va_size != VNOVAL) { 43622521Sdyson switch (vp->v_type) { 43722521Sdyson case VDIR: 43822521Sdyson return (EISDIR); 43922521Sdyson case VCHR: 44022521Sdyson case VBLK: 44122521Sdyson case VSOCK: 44222521Sdyson case VFIFO: 44336840Speter if (vap->va_flags != VNOVAL) 44436840Speter return (EOPNOTSUPP); 44522521Sdyson return (0); 44622521Sdyson case VREG: 44722521Sdyson case VLNK: 44822521Sdyson default: 44922521Sdyson /* 45022521Sdyson * Disallow write attempts if the filesystem is 45122521Sdyson * mounted read-only. 45222521Sdyson */ 45322521Sdyson if (vp->v_mount->mnt_flag & MNT_RDONLY) 45422521Sdyson return (EROFS); 45522521Sdyson } 45622521Sdyson } 45766356Sbp 45822607Smpp return (null_bypass((struct vop_generic_args *)ap)); 45922521Sdyson} 46022521Sdyson 46122521Sdyson/* 4621541Srgrimes * We handle getattr only to change the fsid. 4631541Srgrimes */ 46412769Sphkstatic int 4651541Srgrimesnull_getattr(ap) 4661541Srgrimes struct vop_getattr_args /* { 4671541Srgrimes struct vnode *a_vp; 4681541Srgrimes struct vattr *a_vap; 4691541Srgrimes struct ucred *a_cred; 47083366Sjulian struct thread *a_td; 4711541Srgrimes } */ *ap; 4721541Srgrimes{ 4731541Srgrimes int error; 47422521Sdyson 47543311Sdillon if ((error = null_bypass((struct vop_generic_args *)ap)) != 0) 4761541Srgrimes return (error); 47765467Sbp 47865467Sbp ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 4791541Srgrimes return (0); 4801541Srgrimes} 4811541Srgrimes 48266356Sbp/* 48366356Sbp * Handle to disallow write access if mounted read-only. 48466356Sbp */ 48522521Sdysonstatic int 48622521Sdysonnull_access(ap) 48722521Sdyson struct vop_access_args /* { 48822521Sdyson struct vnode *a_vp; 48922521Sdyson int a_mode; 49022521Sdyson struct ucred *a_cred; 49183366Sjulian struct thread *a_td; 49222521Sdyson } */ *ap; 49322521Sdyson{ 49422521Sdyson struct vnode *vp = ap->a_vp; 49522521Sdyson mode_t mode = ap->a_mode; 4961541Srgrimes 49722521Sdyson /* 49822521Sdyson * Disallow write attempts on read-only layers; 49922521Sdyson * unless the file is a socket, fifo, or a block or 50096755Strhodes * character device resident on the filesystem. 50122521Sdyson */ 50222521Sdyson if (mode & VWRITE) { 50322521Sdyson switch (vp->v_type) { 50422521Sdyson case VDIR: 50522521Sdyson case VLNK: 50622521Sdyson case VREG: 50722521Sdyson if (vp->v_mount->mnt_flag & MNT_RDONLY) 50822521Sdyson return (EROFS); 50922521Sdyson break; 51043305Sdillon default: 51143305Sdillon break; 51222521Sdyson } 51322521Sdyson } 51422607Smpp return (null_bypass((struct vop_generic_args *)ap)); 51522521Sdyson} 51622521Sdyson 51722521Sdyson/* 51865467Sbp * We must handle open to be able to catch MNT_NODEV and friends. 51965467Sbp */ 52065467Sbpstatic int 52165467Sbpnull_open(ap) 52265467Sbp struct vop_open_args /* { 52365467Sbp struct vnode *a_vp; 52465467Sbp int a_mode; 52565467Sbp struct ucred *a_cred; 52683366Sjulian struct thread *a_td; 52765467Sbp } */ *ap; 52865467Sbp{ 52965467Sbp struct vnode *vp = ap->a_vp; 53065467Sbp struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); 53165467Sbp 53265467Sbp if ((vp->v_mount->mnt_flag & MNT_NODEV) && 53365467Sbp (lvp->v_type == VBLK || lvp->v_type == VCHR)) 53465467Sbp return ENXIO; 53565467Sbp 53665467Sbp return (null_bypass((struct vop_generic_args *)ap)); 53765467Sbp} 53865467Sbp 53965467Sbp/* 54065467Sbp * We handle this to eliminate null FS to lower FS 54165467Sbp * file moving. Don't know why we don't allow this, 54265467Sbp * possibly we should. 54365467Sbp */ 54465467Sbpstatic int 54565467Sbpnull_rename(ap) 54665467Sbp struct vop_rename_args /* { 54765467Sbp struct vnode *a_fdvp; 54865467Sbp struct vnode *a_fvp; 54965467Sbp struct componentname *a_fcnp; 55065467Sbp struct vnode *a_tdvp; 55165467Sbp struct vnode *a_tvp; 55265467Sbp struct componentname *a_tcnp; 55365467Sbp } */ *ap; 55465467Sbp{ 55565467Sbp struct vnode *tdvp = ap->a_tdvp; 55665467Sbp struct vnode *fvp = ap->a_fvp; 55765467Sbp struct vnode *fdvp = ap->a_fdvp; 55865467Sbp struct vnode *tvp = ap->a_tvp; 55965467Sbp 56065467Sbp /* Check for cross-device rename. */ 56165467Sbp if ((fvp->v_mount != tdvp->v_mount) || 56265467Sbp (tvp && (fvp->v_mount != tvp->v_mount))) { 56365467Sbp if (tdvp == tvp) 56465467Sbp vrele(tdvp); 56565467Sbp else 56665467Sbp vput(tdvp); 56765467Sbp if (tvp) 56865467Sbp vput(tvp); 56965467Sbp vrele(fdvp); 57065467Sbp vrele(fvp); 57165467Sbp return (EXDEV); 57265467Sbp } 57365467Sbp 57465467Sbp return (null_bypass((struct vop_generic_args *)ap)); 57565467Sbp} 57665467Sbp 57765467Sbp/* 57822521Sdyson * We need to process our own vnode lock and then clear the 57922521Sdyson * interlock flag as it applies only to our vnode, not the 58022521Sdyson * vnodes below us on the stack. 58122521Sdyson */ 58222597Smppstatic int 58322521Sdysonnull_lock(ap) 58422521Sdyson struct vop_lock_args /* { 58522521Sdyson struct vnode *a_vp; 58622521Sdyson int a_flags; 58783366Sjulian struct thread *a_td; 58822521Sdyson } */ *ap; 58922521Sdyson{ 59066356Sbp struct vnode *vp = ap->a_vp; 59166356Sbp int flags = ap->a_flags; 59283366Sjulian struct thread *td = ap->a_td; 59366356Sbp struct vnode *lvp; 59466356Sbp int error; 59522521Sdyson 59666356Sbp if (flags & LK_THISLAYER) { 59797072Ssemenu if (vp->v_vnlock != NULL) { 59897072Ssemenu /* lock is shared across layers */ 59997072Ssemenu if (flags & LK_INTERLOCK) 60097072Ssemenu mtx_unlock(&vp->v_interlock); 60197072Ssemenu return 0; 60297072Ssemenu } 60366356Sbp error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER, 60483366Sjulian &vp->v_interlock, td); 60566356Sbp return (error); 60666356Sbp } 60766356Sbp 60866356Sbp if (vp->v_vnlock != NULL) { 60966356Sbp /* 61066356Sbp * The lower level has exported a struct lock to us. Use 61166356Sbp * it so that all vnodes in the stack lock and unlock 61266356Sbp * simultaneously. Note: we don't DRAIN the lock as DRAIN 61366356Sbp * decommissions the lock - just because our vnode is 61466356Sbp * going away doesn't mean the struct lock below us is. 61566356Sbp * LK_EXCLUSIVE is fine. 61666356Sbp */ 61766356Sbp if ((flags & LK_TYPE_MASK) == LK_DRAIN) { 61866356Sbp NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); 61966356Sbp return(lockmgr(vp->v_vnlock, 62066356Sbp (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, 62183366Sjulian &vp->v_interlock, td)); 62266356Sbp } 62383366Sjulian return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td)); 62466356Sbp } else { 62566356Sbp /* 62666356Sbp * To prevent race conditions involving doing a lookup 62766356Sbp * on "..", we have to lock the lower node, then lock our 62866356Sbp * node. Most of the time it won't matter that we lock our 62966356Sbp * node (as any locking would need the lower one locked 63066356Sbp * first). But we can LK_DRAIN the upper lock as a step 63166356Sbp * towards decomissioning it. 63266356Sbp */ 63366356Sbp lvp = NULLVPTOLOWERVP(vp); 63466570Sbp if (lvp == NULL) 63583366Sjulian return (lockmgr(&vp->v_lock, flags, &vp->v_interlock, td)); 63666356Sbp if (flags & LK_INTERLOCK) { 63772200Sbmilekic mtx_unlock(&vp->v_interlock); 63866356Sbp flags &= ~LK_INTERLOCK; 63966356Sbp } 64066356Sbp if ((flags & LK_TYPE_MASK) == LK_DRAIN) { 64166356Sbp error = VOP_LOCK(lvp, 64283366Sjulian (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, td); 64366356Sbp } else 64483366Sjulian error = VOP_LOCK(lvp, flags, td); 64566356Sbp if (error) 64666356Sbp return (error); 64783366Sjulian error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, td); 64866356Sbp if (error) 64983366Sjulian VOP_UNLOCK(lvp, 0, td); 65066356Sbp return (error); 65166356Sbp } 65222521Sdyson} 65322521Sdyson 65422521Sdyson/* 65522521Sdyson * We need to process our own vnode unlock and then clear the 65622521Sdyson * interlock flag as it applies only to our vnode, not the 65722521Sdyson * vnodes below us on the stack. 65822521Sdyson */ 65922597Smppstatic int 66022521Sdysonnull_unlock(ap) 66122521Sdyson struct vop_unlock_args /* { 66222521Sdyson struct vnode *a_vp; 66322521Sdyson int a_flags; 66483366Sjulian struct thread *a_td; 66522521Sdyson } */ *ap; 66622521Sdyson{ 66766356Sbp struct vnode *vp = ap->a_vp; 66866356Sbp int flags = ap->a_flags; 66983366Sjulian struct thread *td = ap->a_td; 67066570Sbp struct vnode *lvp; 67166356Sbp 67266356Sbp if (vp->v_vnlock != NULL) { 67366356Sbp if (flags & LK_THISLAYER) 67466356Sbp return 0; /* the lock is shared across layers */ 67566356Sbp flags &= ~LK_THISLAYER; 67666356Sbp return (lockmgr(vp->v_vnlock, flags | LK_RELEASE, 67783366Sjulian &vp->v_interlock, td)); 67866356Sbp } 67966570Sbp lvp = NULLVPTOLOWERVP(vp); 68066570Sbp if (lvp == NULL) 68183366Sjulian return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td)); 68266356Sbp if ((flags & LK_THISLAYER) == 0) { 68367145Sbp if (flags & LK_INTERLOCK) { 68472200Sbmilekic mtx_unlock(&vp->v_interlock); 68567145Sbp flags &= ~LK_INTERLOCK; 68667145Sbp } 68783366Sjulian VOP_UNLOCK(lvp, flags & ~LK_INTERLOCK, td); 68866356Sbp } else 68966356Sbp flags &= ~LK_THISLAYER; 69083366Sjulian return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td)); 69122521Sdyson} 69222521Sdyson 69322597Smppstatic int 69466356Sbpnull_islocked(ap) 69566356Sbp struct vop_islocked_args /* { 69666356Sbp struct vnode *a_vp; 69783366Sjulian struct thread *a_td; 69866356Sbp } */ *ap; 69966356Sbp{ 70066356Sbp struct vnode *vp = ap->a_vp; 70183366Sjulian struct thread *td = ap->a_td; 70266356Sbp 70366356Sbp if (vp->v_vnlock != NULL) 70483366Sjulian return (lockstatus(vp->v_vnlock, td)); 70583366Sjulian return (lockstatus(&vp->v_lock, td)); 70666356Sbp} 70766356Sbp 70866356Sbp/* 70966356Sbp * There is no way to tell that someone issued remove/rmdir operation 71066356Sbp * on the underlying filesystem. For now we just have to release lowevrp 71166356Sbp * as soon as possible. 71298183Ssemenu * 71398183Ssemenu * Note, we can't release any resources nor remove vnode from hash before 71498183Ssemenu * appropriate VXLOCK stuff is is done because other process can find this 71598183Ssemenu * vnode in hash during inactivation and may be sitting in vget() and waiting 71698183Ssemenu * for null_inactive to unlock vnode. Thus we will do all those in VOP_RECLAIM. 71766356Sbp */ 71866356Sbpstatic int 7191541Srgrimesnull_inactive(ap) 7201541Srgrimes struct vop_inactive_args /* { 7211541Srgrimes struct vnode *a_vp; 72283366Sjulian struct thread *a_td; 7231541Srgrimes } */ *ap; 7241541Srgrimes{ 72530636Sroberto struct vnode *vp = ap->a_vp; 72698175Ssemenu struct thread *td = ap->a_td; 72792540Smckusick 72898175Ssemenu VOP_UNLOCK(vp, 0, td); 72998175Ssemenu 73092540Smckusick /* 73192540Smckusick * If this is the last reference, then free up the vnode 73292540Smckusick * so as not to tie up the lower vnodes. 73392540Smckusick */ 73498175Ssemenu vrecycle(vp, NULL, td); 73598175Ssemenu 73692540Smckusick return (0); 73792540Smckusick} 73892540Smckusick 73992540Smckusick/* 74098183Ssemenu * Now, the VXLOCK is in force and we're free to destroy the null vnode. 74192540Smckusick */ 74292540Smckusickstatic int 74392540Smckusicknull_reclaim(ap) 74492540Smckusick struct vop_reclaim_args /* { 74592540Smckusick struct vnode *a_vp; 74692540Smckusick struct thread *a_td; 74792540Smckusick } */ *ap; 74892540Smckusick{ 74992540Smckusick struct vnode *vp = ap->a_vp; 75030636Sroberto struct null_node *xp = VTONULL(vp); 75130636Sroberto struct vnode *lowervp = xp->null_lowervp; 75266356Sbp 75398176Ssemenu if (lowervp) { 75498176Ssemenu null_hashrem(xp); 75566356Sbp 75698176Ssemenu vrele(lowervp); 75798176Ssemenu vrele(lowervp); 75898176Ssemenu } 75966356Sbp 7601541Srgrimes vp->v_data = NULL; 761105077Smckusick vp->v_vnlock = &vp->v_lock; 76298176Ssemenu FREE(xp, M_NULLFSNODE); 76366356Sbp 7641541Srgrimes return (0); 7651541Srgrimes} 7661541Srgrimes 76712769Sphkstatic int 7681541Srgrimesnull_print(ap) 7691541Srgrimes struct vop_print_args /* { 7701541Srgrimes struct vnode *a_vp; 7711541Srgrimes } */ *ap; 7721541Srgrimes{ 7731541Srgrimes register struct vnode *vp = ap->a_vp; 774103314Snjl printf("\ttag %s, vp=%p, lowervp=%p\n", vp->v_tag, vp, 775103314Snjl NULLVPTOLOWERVP(vp)); 7761541Srgrimes return (0); 7771541Srgrimes} 7781541Srgrimes 7791541Srgrimes/* 78066356Sbp * Let an underlying filesystem do the work 78166356Sbp */ 78266356Sbpstatic int 78366356Sbpnull_createvobject(ap) 78466356Sbp struct vop_createvobject_args /* { 78566356Sbp struct vnode *vp; 78666356Sbp struct ucred *cred; 78783366Sjulian struct thread *td; 78866356Sbp } */ *ap; 78966356Sbp{ 79066356Sbp struct vnode *vp = ap->a_vp; 79166356Sbp struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL; 79266356Sbp int error; 79366356Sbp 79466356Sbp if (vp->v_type == VNON || lowervp == NULL) 79566356Sbp return 0; 79683366Sjulian error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_td); 79766356Sbp if (error) 79866356Sbp return (error); 799101308Sjeff vp->v_vflag |= VV_OBJBUF; 80066356Sbp return (0); 80166356Sbp} 80266356Sbp 80366356Sbp/* 80466356Sbp * We have nothing to destroy and this operation shouldn't be bypassed. 80566356Sbp */ 80666356Sbpstatic int 80766356Sbpnull_destroyvobject(ap) 80866356Sbp struct vop_destroyvobject_args /* { 80966356Sbp struct vnode *vp; 81066356Sbp } */ *ap; 81166356Sbp{ 81266356Sbp struct vnode *vp = ap->a_vp; 81366356Sbp 814101308Sjeff vp->v_vflag &= ~VV_OBJBUF; 81566356Sbp return (0); 81666356Sbp} 81766356Sbp 81866356Sbpstatic int 81966356Sbpnull_getvobject(ap) 82066356Sbp struct vop_getvobject_args /* { 82166356Sbp struct vnode *vp; 82266356Sbp struct vm_object **objpp; 82366356Sbp } */ *ap; 82466356Sbp{ 82566356Sbp struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); 82666356Sbp 82766356Sbp if (lvp == NULL) 82866356Sbp return EINVAL; 82966356Sbp return (VOP_GETVOBJECT(lvp, ap->a_objpp)); 83066356Sbp} 83166356Sbp 83266356Sbp/* 8331541Srgrimes * Global vfs data structures 8341541Srgrimes */ 83512158Sbdevop_t **null_vnodeop_p; 83612769Sphkstatic struct vnodeopv_entry_desc null_vnodeop_entries[] = { 83730431Sphk { &vop_default_desc, (vop_t *) null_bypass }, 83866356Sbp 83930431Sphk { &vop_access_desc, (vop_t *) null_access }, 84065467Sbp { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, 84166356Sbp { &vop_createvobject_desc, (vop_t *) null_createvobject }, 84266356Sbp { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject }, 84330431Sphk { &vop_getattr_desc, (vop_t *) null_getattr }, 84466356Sbp { &vop_getvobject_desc, (vop_t *) null_getvobject }, 84565467Sbp { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount}, 84630434Sphk { &vop_inactive_desc, (vop_t *) null_inactive }, 84766356Sbp { &vop_islocked_desc, (vop_t *) null_islocked }, 84830431Sphk { &vop_lock_desc, (vop_t *) null_lock }, 84930431Sphk { &vop_lookup_desc, (vop_t *) null_lookup }, 85065467Sbp { &vop_open_desc, (vop_t *) null_open }, 85130431Sphk { &vop_print_desc, (vop_t *) null_print }, 85230431Sphk { &vop_reclaim_desc, (vop_t *) null_reclaim }, 85365467Sbp { &vop_rename_desc, (vop_t *) null_rename }, 85430431Sphk { &vop_setattr_desc, (vop_t *) null_setattr }, 85565467Sbp { &vop_strategy_desc, (vop_t *) vop_eopnotsupp }, 85630431Sphk { &vop_unlock_desc, (vop_t *) null_unlock }, 85712158Sbde { NULL, NULL } 8581541Srgrimes}; 85912769Sphkstatic struct vnodeopv_desc null_vnodeop_opv_desc = 8601541Srgrimes { &null_vnodeop_p, null_vnodeop_entries }; 8612946Swollman 8622946SwollmanVNODEOP_SET(null_vnodeop_opv_desc); 863