null_vnops.c revision 98176
1139776Simp/* 21541Srgrimes * Copyright (c) 1992, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * John Heidemann of the UCLA Ficus project. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3222521Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3422521Sdyson * SUCH DAMAGE. 3522521Sdyson * 3622521Sdyson * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 3722521Sdyson * 3822521Sdyson * Ancestors: 3950477Speter * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 401541Srgrimes * ...and... 411541Srgrimes * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project 421541Srgrimes * 431541Srgrimes * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 98176 2002-06-13 18:25:06Z semenu $ 441541Srgrimes */ 4577130Sru 461541Srgrimes/* 4796755Strhodes * Null Layer 481541Srgrimes * 4996755Strhodes * (See mount_nullfs(8) for more information.) 501541Srgrimes * 5135256Sdes * The null layer duplicates a portion of the filesystem 521541Srgrimes * name space under a new name. In this respect, it is 531541Srgrimes * similar to the loopback filesystem. It differs from 541541Srgrimes * the loopback fs in two respects: it is implemented using 551541Srgrimes * a stackable layers techniques, and its "null-node"s stack above 5696755Strhodes * all lower-layer vnodes, not just over directory vnodes. 571541Srgrimes * 581541Srgrimes * The null layer has two purposes. First, it serves as a demonstration 5996755Strhodes * of layering by proving a layer which does nothing. (It actually 601541Srgrimes * does everything the loopback filesystem does, which is slightly 611541Srgrimes * more than nothing.) Second, the null layer can serve as a prototype 621541Srgrimes * layer. Since it provides all necessary layer framework, 631541Srgrimes * new filesystem layers can be created very easily be starting 641541Srgrimes * with a null layer. 651541Srgrimes * 661541Srgrimes * The remainder of this man page examines the null layer as a basis 671541Srgrimes * for constructing new layers. 6877130Sru * 6977130Sru * 701541Srgrimes * INSTANTIATING NEW NULL LAYERS 711541Srgrimes * 721541Srgrimes * New null layers are created with mount_nullfs(8). 731541Srgrimes * Mount_nullfs(8) takes two arguments, the pathname 741541Srgrimes * of the lower vfs (target-pn) and the pathname where the null 751541Srgrimes * layer will appear in the namespace (alias-pn). After 761541Srgrimes * the null layer is put into place, the contents 771541Srgrimes * of target-pn subtree will be aliased under alias-pn. 7896755Strhodes * 791541Srgrimes * 801541Srgrimes * OPERATION OF A NULL LAYER 8126963Salex * 821541Srgrimes * The null layer is the minimum filesystem layer, 831541Srgrimes * simply bypassing all possible operations to the lower layer 841541Srgrimes * for processing there. The majority of its activity centers 851541Srgrimes * on the bypass routine, through which nearly all vnode operations 861541Srgrimes * pass. 871541Srgrimes * 881541Srgrimes * The bypass routine accepts arbitrary vnode operations for 891541Srgrimes * handling by the lower layer. It begins by examing vnode 901541Srgrimes * operation arguments and replacing any null-nodes by their 911541Srgrimes * lower-layer equivlants. It then invokes the operation 9222521Sdyson * on the lower layer. Finally, it replaces the null-nodes 9322521Sdyson * in the arguments and, if a vnode is return by the operation, 9422521Sdyson * stacks a null-node on top of the returned vnode. 9522521Sdyson * 9622521Sdyson * Although bypass handles most operations, vop_getattr, vop_lock, 971541Srgrimes * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not 9822521Sdyson * bypassed. Vop_getattr must change the fsid being returned. 9922521Sdyson * Vop_lock and vop_unlock must handle any locking for the 10022521Sdyson * current vnode as well as pass the lock request down. 10122521Sdyson * Vop_inactive and vop_reclaim are not bypassed so that 10222521Sdyson * they can handle freeing null-layer specific data. Vop_print 10322521Sdyson * is not bypassed to avoid excessive debugging information. 10422521Sdyson * Also, certain vnode operations change the locking state within 10522521Sdyson * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 10622521Sdyson * and symlink). Ideally these operations should not change the 1071541Srgrimes * lock state, but should be changed to let the caller of the 1081541Srgrimes * function unlock them. Otherwise all intermediate vnode layers 1091541Srgrimes * (such as union, umapfs, etc) must catch these functions to do 1101541Srgrimes * the necessary locking at their layer. 1111541Srgrimes * 1121541Srgrimes * 1131541Srgrimes * INSTANTIATING VNODE STACKS 1141541Srgrimes * 1151541Srgrimes * Mounting associates the null layer with a lower layer, 1161541Srgrimes * effect stacking two VFSes. Vnode stacks are instead 1171541Srgrimes * created on demand as files are accessed. 1181541Srgrimes * 1191541Srgrimes * The initial mount creates a single vnode stack for the 1201541Srgrimes * root of the new null layer. All other vnode stacks 1218876Srgrimes * are created as a result of vnode operations on 1221541Srgrimes * this or other null vnode stacks. 1231541Srgrimes * 1241541Srgrimes * New vnode stacks come into existance as a result of 1251541Srgrimes * an operation which returns a vnode. 12677130Sru * The bypass routine stacks a null-node above the new 1271541Srgrimes * vnode before returning it to the caller. 1281541Srgrimes * 1291541Srgrimes * For example, imagine mounting a null layer with 1301541Srgrimes * "mount_nullfs /usr/include /dev/layer/null". 1318876Srgrimes * Changing directory to /dev/layer/null will assign 1321541Srgrimes * the root null-node (which was created when the null layer was mounted). 1331541Srgrimes * Now consider opening "sys". A vop_lookup would be 1341541Srgrimes * done on the root null-node. This operation would bypass through 1351541Srgrimes * to the lower layer which would return a vnode representing 1361541Srgrimes * the UFS "sys". Null_bypass then builds a null-node 1371541Srgrimes * aliasing the UFS "sys" and returns this to the caller. 1381541Srgrimes * Later operations on the null-node "sys" will repeat this 1391541Srgrimes * process when constructing other vnode stacks. 14096755Strhodes * 1411541Srgrimes * 1421541Srgrimes * CREATING OTHER FILE SYSTEM LAYERS 1431541Srgrimes * 1441541Srgrimes * One of the easiest ways to construct new filesystem layers is to make 1458876Srgrimes * a copy of the null layer, rename all files and variables, and 1461541Srgrimes * then begin modifing the copy. Sed can be used to easily rename 1471541Srgrimes * all variables. 1481541Srgrimes * 1491541Srgrimes * The umap layer is an example of a layer descended from the 1501541Srgrimes * null layer. 1518876Srgrimes * 1521541Srgrimes * 1531541Srgrimes * INVOKING OPERATIONS ON LOWER LAYERS 1541541Srgrimes * 1551541Srgrimes * There are two techniques to invoke operations on a lower layer 156108470Sschweikh * when the operation cannot be completely bypassed. Each method 1571541Srgrimes * is appropriate in different situations. In both cases, 1581541Srgrimes * it is the responsibility of the aliasing layer to make 1591541Srgrimes * the operation arguments "correct" for the lower layer 16026964Salex * by mapping an vnode arguments to the lower layer. 1611541Srgrimes * 1621541Srgrimes * The first approach is to call the aliasing layer's bypass routine. 1631541Srgrimes * This method is most suitable when you wish to invoke the operation 16426964Salex * currently being handled on the lower layer. It has the advantage 1651541Srgrimes * that the bypass routine already must do argument mapping. 1661541Srgrimes * An example of this is null_getattrs in the null layer. 1671541Srgrimes * 16826964Salex * A second approach is to directly invoke vnode operations on 1691541Srgrimes * the lower layer with the VOP_OPERATIONNAME interface. 1701541Srgrimes * The advantage of this method is that it is easy to invoke 1711541Srgrimes * arbitrary operations on the lower layer. The disadvantage 1721541Srgrimes * is that vnode arguments must be manualy mapped. 1731541Srgrimes * 17476166Smarkm */ 1752960Swollman 17676166Smarkm#include <sys/param.h> 17776166Smarkm#include <sys/systm.h> 17876166Smarkm#include <sys/conf.h> 17976166Smarkm#include <sys/kernel.h> 18076166Smarkm#include <sys/lock.h> 18112769Sphk#include <sys/malloc.h> 1821541Srgrimes#include <sys/mount.h> 18376166Smarkm#include <sys/mutex.h> 18477031Sru#include <sys/namei.h> 1851541Srgrimes#include <sys/sysctl.h> 18666356Sbp#include <sys/vnode.h> 18766356Sbp 18866356Sbp#include <fs/nullfs/null.h> 18966356Sbp 19066356Sbp#include <vm/vm.h> 19112769Sphk#include <vm/vm_extern.h> 19212769Sphk#include <vm/vm_object.h> 19312769Sphk#include <vm/vnode_pager.h> 1941541Srgrimes 1951541Srgrimesstatic int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ 1961541SrgrimesSYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 1971541Srgrimes &null_bug_bypass, 0, ""); 1981541Srgrimes 1991541Srgrimesstatic int null_access(struct vop_access_args *ap); 2001541Srgrimesstatic int null_createvobject(struct vop_createvobject_args *ap); 2011541Srgrimesstatic int null_destroyvobject(struct vop_destroyvobject_args *ap); 2021541Srgrimesstatic int null_getattr(struct vop_getattr_args *ap); 2031541Srgrimesstatic int null_getvobject(struct vop_getvobject_args *ap); 2041541Srgrimesstatic int null_inactive(struct vop_inactive_args *ap); 2051541Srgrimesstatic int null_islocked(struct vop_islocked_args *ap); 2061541Srgrimesstatic int null_lock(struct vop_lock_args *ap); 2071541Srgrimesstatic int null_lookup(struct vop_lookup_args *ap); 2081541Srgrimesstatic int null_open(struct vop_open_args *ap); 2091541Srgrimesstatic int null_print(struct vop_print_args *ap); 2101541Srgrimesstatic int null_reclaim(struct vop_reclaim_args *ap); 2111541Srgrimesstatic int null_rename(struct vop_rename_args *ap); 2121541Srgrimesstatic int null_setattr(struct vop_setattr_args *ap); 2131541Srgrimesstatic int null_unlock(struct vop_unlock_args *ap); 2141541Srgrimes 2151541Srgrimes/* 2161541Srgrimes * This is the 10-Apr-92 bypass routine. 2171541Srgrimes * This version has been optimized for speed, throwing away some 2188876Srgrimes * safety checks. It should still always work, but it's not as 21922521Sdyson * robust to programmer errors. 220140728Sphk * 2211541Srgrimes * In general, we map all vnodes going down and unmap them on the way back. 222140732Sphk * As an exception to this, vnodes can be marked "unmapped" by setting 2231541Srgrimes * the Nth bit in operation's vdesc_flags. 2241541Srgrimes * 2251541Srgrimes * Also, some BSD vnode operations have the side effect of vrele'ing 2261541Srgrimes * their arguments. With stacking, the reference counts are held 2271541Srgrimes * by the upper node, not the lower one, so we must handle these 2281541Srgrimes * side-effects here. This is not of concern in Sun-derived systems 2291541Srgrimes * since there are no such side-effects. 2301541Srgrimes * 2311541Srgrimes * This makes the following assumptions: 2321541Srgrimes * - only one returned vpp 23350616Sbde * - no INOUT vpp's (Sun's vop_open has one of these) 2341541Srgrimes * - the vnode operation vector of the first vnode should be used 2351541Srgrimes * to determine what implementation of the op should be invoked 2361541Srgrimes * - all mapped vnodes are of our vnode-type (NEEDSWORK: 2371541Srgrimes * problems on rmdir'ing mount points and renaming?) 2381541Srgrimes */ 23950616Sbdeint 2401541Srgrimesnull_bypass(ap) 2411541Srgrimes struct vop_generic_args /* { 2421541Srgrimes struct vnodeop_desc *a_desc; 2431541Srgrimes <other random data follows, presumably> 2441541Srgrimes } */ *ap; 2451541Srgrimes{ 2461541Srgrimes register struct vnode **this_vp_p; 2471541Srgrimes int error; 2481541Srgrimes struct vnode *old_vps[VDESC_MAX_VPS]; 2491541Srgrimes struct vnode **vps_p[VDESC_MAX_VPS]; 2501541Srgrimes struct vnode ***vppp; 2518876Srgrimes struct vnodeop_desc *descp = ap->a_desc; 2521541Srgrimes int reles, i; 2531541Srgrimes 2541541Srgrimes if (null_bug_bypass) 2551541Srgrimes printf ("null_bypass: %s\n", descp->vdesc_name); 2561541Srgrimes 2571541Srgrimes#ifdef DIAGNOSTIC 25824987Skato /* 259138290Sphk * We require at least one vp. 26024987Skato */ 2611541Srgrimes if (descp->vdesc_vp_offsets == NULL || 2621541Srgrimes descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) 2631541Srgrimes panic ("null_bypass: no vp's in map"); 2641541Srgrimes#endif 2651541Srgrimes 2661541Srgrimes /* 2671541Srgrimes * Map the vnodes going in. 2681541Srgrimes * Later, we'll invoke the operation based on 26966356Sbp * the first mapped vnode's operation vector. 2701541Srgrimes */ 2711541Srgrimes reles = descp->vdesc_flags; 2728876Srgrimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 2731541Srgrimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 2741541Srgrimes break; /* bail out at end of list */ 2751541Srgrimes vps_p[i] = this_vp_p = 2761541Srgrimes VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); 2771541Srgrimes /* 2781541Srgrimes * We're not guaranteed that any but the first vnode 27966356Sbp * are of our type. Check for and don't map any 280140165Sphk * that aren't. (We must always map first vp or vclean fails.) 28166356Sbp */ 28266356Sbp if (i && (*this_vp_p == NULLVP || 28366356Sbp (*this_vp_p)->v_op != null_vnodeop_p)) { 28466356Sbp old_vps[i] = NULLVP; 2851541Srgrimes } else { 2861541Srgrimes old_vps[i] = *this_vp_p; 2871541Srgrimes *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); 2881541Srgrimes /* 2891541Srgrimes * XXX - Several operations have the side effect 2901541Srgrimes * of vrele'ing their vp's. We must account for 2911541Srgrimes * that. (This should go away in the future.) 2921541Srgrimes */ 2931541Srgrimes if (reles & VDESC_VP0_WILLRELE) 2941541Srgrimes VREF(*this_vp_p); 2951541Srgrimes } 2961541Srgrimes 29766356Sbp } 29866356Sbp 299175294Sattilio /* 30066356Sbp * Call the operation on the lower layer 30166356Sbp * with the modified argument structure. 3021541Srgrimes */ 3031541Srgrimes if (vps_p[0] && *vps_p[0]) 3041541Srgrimes error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); 3051541Srgrimes else { 3061541Srgrimes printf("null_bypass: no map for %s\n", descp->vdesc_name); 3071541Srgrimes error = EINVAL; 3081541Srgrimes } 3091541Srgrimes 3101541Srgrimes /* 3111541Srgrimes * Maintain the illusion of call-by-value 3121541Srgrimes * by restoring vnodes in the argument structure 3131541Srgrimes * to their original value. 3141541Srgrimes */ 3151541Srgrimes reles = descp->vdesc_flags; 3161541Srgrimes for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { 3171541Srgrimes if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) 3181541Srgrimes break; /* bail out at end of list */ 3191541Srgrimes if (old_vps[i]) { 3201541Srgrimes *(vps_p[i]) = old_vps[i]; 3211541Srgrimes#if 0 3221541Srgrimes if (reles & VDESC_VP0_WILLUNLOCK) 3231541Srgrimes VOP_UNLOCK(*(vps_p[i]), LK_THISLAYER, curthread); 32429584Sphk#endif 32598183Ssemenu if (reles & VDESC_VP0_WILLRELE) 3261541Srgrimes vrele(*(vps_p[i])); 3271541Srgrimes } 3281541Srgrimes } 3291541Srgrimes 3301541Srgrimes /* 3311541Srgrimes * Map the possible out-going vpp 33222521Sdyson * (Assumes that the lower layer always returns 33322521Sdyson * a VREF'ed vpp unless it gets an error.) 33422521Sdyson */ 33522521Sdyson if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && 33622521Sdyson !(descp->vdesc_flags & VDESC_NOMAP_VPP) && 33722521Sdyson !error) { 338140728Sphk /* 33922521Sdyson * XXX - even though some ops have vpp returned vp's, 34022521Sdyson * several ops actually vrele this before returning. 34166356Sbp * We must avoid these ops. 34222521Sdyson * (This should go away when these ops are regularized.) 34366356Sbp */ 34422521Sdyson if (descp->vdesc_flags & VDESC_VPP_WILLRELE) 3451541Srgrimes goto out; 34666356Sbp vppp = VOPARG_OFFSETTO(struct vnode***, 34722521Sdyson descp->vdesc_vpp_offset,ap); 34822521Sdyson if (*vppp) 34966356Sbp error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); 35066356Sbp } 35166356Sbp 35266356Sbp out: 35366356Sbp return (error); 35466356Sbp} 35566356Sbp 35622521Sdyson/* 35766356Sbp * We have to carry on the locking protocol on the null layer vnodes 35822521Sdyson * as we progress through the tree. We also have to enforce read-only 35922521Sdyson * if this layer is mounted read-only. 36066356Sbp */ 36166356Sbpstatic int 36266356Sbpnull_lookup(ap) 36366356Sbp struct vop_lookup_args /* { 36466356Sbp struct vnode * a_dvp; 36566356Sbp struct vnode ** a_vpp; 36666356Sbp struct componentname * a_cnp; 36798183Ssemenu } */ *ap; 36898183Ssemenu{ 36998183Ssemenu struct componentname *cnp = ap->a_cnp; 37098183Ssemenu struct vnode *dvp = ap->a_dvp; 37198183Ssemenu struct thread *td = cnp->cn_thread; 37298183Ssemenu int flags = cnp->cn_flags; 37366356Sbp struct vnode *vp, *ldvp, *lvp; 37422521Sdyson int error; 37522521Sdyson 37622521Sdyson if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 37722521Sdyson (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 378140776Sphk return (EROFS); 379140776Sphk /* 380140776Sphk * Although it is possible to call null_bypass(), we'll do 381140776Sphk * a direct call to reduce overhead 382140776Sphk */ 383140776Sphk ldvp = NULLVPTOLOWERVP(dvp); 384140776Sphk vp = lvp = NULL; 385140776Sphk error = VOP_LOOKUP(ldvp, &lvp, cnp); 386140776Sphk if (error == EJUSTRETURN && (flags & ISLASTCN) && 387140776Sphk (dvp->v_mount->mnt_flag & MNT_RDONLY) && 388140776Sphk (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) 389140776Sphk error = EROFS; 390140776Sphk 391140776Sphk /* 3921541Srgrimes * Rely only on the PDIRUNLOCK flag which should be carefully 39322521Sdyson * tracked by underlying filesystem. 39422521Sdyson */ 395105211Sphk if (cnp->cn_flags & PDIRUNLOCK) 396140728Sphk VOP_UNLOCK(dvp, LK_THISLAYER, td); 39722521Sdyson if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { 39822521Sdyson if (ldvp == lvp) { 39922521Sdyson *ap->a_vpp = dvp; 40022521Sdyson VREF(dvp); 40122521Sdyson vrele(lvp); 40222597Smpp } else { 40322597Smpp error = null_node_create(dvp->v_mount, lvp, &vp); 40422521Sdyson if (error == 0) 40522521Sdyson *ap->a_vpp = vp; 40622521Sdyson } 40722521Sdyson } 40822521Sdyson return (error); 40922521Sdyson} 41022521Sdyson 41122521Sdyson/* 41222521Sdyson * Setattr call. Disallow write attempts if the layer is mounted read-only. 41322521Sdyson */ 41436840Speterint 41536840Speternull_setattr(ap) 41622521Sdyson struct vop_setattr_args /* { 41722521Sdyson struct vnodeop_desc *a_desc; 41822521Sdyson struct vnode *a_vp; 41922521Sdyson struct vattr *a_vap; 42022521Sdyson struct ucred *a_cred; 42122521Sdyson struct thread *a_td; 42222521Sdyson } */ *ap; 42322521Sdyson{ 42422521Sdyson struct vnode *vp = ap->a_vp; 42522521Sdyson struct vattr *vap = ap->a_vap; 42622521Sdyson 42722521Sdyson if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || 42866356Sbp vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || 42922607Smpp vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && 43022521Sdyson (vp->v_mount->mnt_flag & MNT_RDONLY)) 43122521Sdyson return (EROFS); 43222521Sdyson if (vap->va_size != VNOVAL) { 4331541Srgrimes switch (vp->v_type) { 4341541Srgrimes case VDIR: 43512769Sphk return (EISDIR); 436140728Sphk case VCHR: 4371541Srgrimes case VBLK: 4381541Srgrimes case VSOCK: 43922521Sdyson case VFIFO: 44043311Sdillon if (vap->va_flags != VNOVAL) 4411541Srgrimes return (EOPNOTSUPP); 44265467Sbp return (0); 44365467Sbp case VREG: 4441541Srgrimes case VLNK: 4451541Srgrimes default: 4461541Srgrimes /* 44766356Sbp * Disallow write attempts if the filesystem is 44866356Sbp * mounted read-only. 44966356Sbp */ 45022521Sdyson if (vp->v_mount->mnt_flag & MNT_RDONLY) 451140728Sphk return (EROFS); 45222521Sdyson } 45322521Sdyson } 45422521Sdyson 4551541Srgrimes return (null_bypass((struct vop_generic_args *)ap)); 45622521Sdyson} 45722521Sdyson 45822521Sdyson/* 45996755Strhodes * We handle getattr only to change the fsid. 46022521Sdyson */ 46122521Sdysonstatic int 46222521Sdysonnull_getattr(ap) 46322521Sdyson struct vop_getattr_args /* { 46422521Sdyson struct vnode *a_vp; 46522521Sdyson struct vattr *a_vap; 46622521Sdyson struct ucred *a_cred; 46722521Sdyson struct thread *a_td; 46822521Sdyson } */ *ap; 46943305Sdillon{ 47043305Sdillon int error; 47122521Sdyson 47222521Sdyson if ((error = null_bypass((struct vop_generic_args *)ap)) != 0) 47322607Smpp return (error); 47422521Sdyson 47522521Sdyson ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; 47622521Sdyson return (0); 47765467Sbp} 47865467Sbp 47965467Sbp/* 48065467Sbp * Handle to disallow write access if mounted read-only. 48165467Sbp */ 482140728Sphkstatic int 48365467Sbpnull_access(ap) 48465467Sbp struct vop_access_args /* { 48565467Sbp struct vnode *a_vp; 48665467Sbp int a_mode; 48765467Sbp struct ucred *a_cred; 48865467Sbp struct thread *a_td; 48965467Sbp } */ *ap; 49065467Sbp{ 49165467Sbp struct vnode *vp = ap->a_vp; 49265467Sbp mode_t mode = ap->a_mode; 49365467Sbp 49465467Sbp /* 49565467Sbp * Disallow write attempts on read-only layers; 49665467Sbp * unless the file is a socket, fifo, or a block or 49765467Sbp * character device resident on the filesystem. 49865467Sbp */ 49965467Sbp if (mode & VWRITE) { 50065467Sbp switch (vp->v_type) { 50165467Sbp case VDIR: 50265467Sbp case VLNK: 50365467Sbp case VREG: 50465467Sbp if (vp->v_mount->mnt_flag & MNT_RDONLY) 50565467Sbp return (EROFS); 50665467Sbp break; 50722521Sdyson default: 50822521Sdyson break; 50922521Sdyson } 51022521Sdyson } 51122597Smpp return (null_bypass((struct vop_generic_args *)ap)); 512169671Skib} 51322521Sdyson 51466356Sbp/* 51566356Sbp * We must handle open to be able to catch MNT_NODEV and friends. 516143642Sjeff */ 51766356Sbpstatic int 51866356Sbpnull_open(ap) 51922521Sdyson struct vop_open_args /* { 52066356Sbp struct vnode *a_vp; 521143513Sjeff int a_mode; 522143513Sjeff struct ucred *a_cred; 523143642Sjeff struct thread *a_td; 524143513Sjeff } */ *ap; 525143642Sjeff{ 526143642Sjeff struct vnode *vp = ap->a_vp; 527143642Sjeff struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); 528143642Sjeff 529143642Sjeff if ((vp->v_mount->mnt_flag & MNT_NODEV) && 530143642Sjeff (lvp->v_type == VBLK || lvp->v_type == VCHR)) 531143642Sjeff return ENXIO; 532145424Sjeff 533116469Stjr return (null_bypass((struct vop_generic_args *)ap)); 53466356Sbp} 535143642Sjeff 536143642Sjeff/* 537143642Sjeff * We handle this to eliminate null FS to lower FS 538143642Sjeff * file moving. Don't know why we don't allow this, 539143642Sjeff * possibly we should. 540143642Sjeff */ 541143642Sjeffstatic int 542143642Sjeffnull_rename(ap) 543143642Sjeff struct vop_rename_args /* { 54466356Sbp struct vnode *a_fdvp; 545143642Sjeff struct vnode *a_fvp; 546175294Sattilio struct componentname *a_fcnp; 547150181Skan struct vnode *a_tdvp; 548150181Skan struct vnode *a_tvp; 549150181Skan struct componentname *a_tcnp; 550150181Skan } */ *ap; 551150181Skan{ 552150181Skan struct vnode *tdvp = ap->a_tdvp; 553150181Skan struct vnode *fvp = ap->a_fvp; 554150181Skan struct vnode *fdvp = ap->a_fdvp; 555150181Skan struct vnode *tvp = ap->a_tvp; 556150181Skan 557150181Skan /* Check for cross-device rename. */ 558150181Skan if ((fvp->v_mount != tdvp->v_mount) || 559150181Skan (tvp && (fvp->v_mount != tvp->v_mount))) { 560150181Skan if (tdvp == tvp) 561150181Skan vrele(tdvp); 562150181Skan else 563150181Skan vput(tdvp); 564150181Skan if (tvp) 565150181Skan vput(tvp); 566150181Skan vrele(fdvp); 567150181Skan vrele(fvp); 568175294Sattilio return (EXDEV); 569150181Skan } 570150181Skan 571143642Sjeff return (null_bypass((struct vop_generic_args *)ap)); 572143642Sjeff} 573143642Sjeff 574143642Sjeff/* 575143642Sjeff * We need to process our own vnode lock and then clear the 57622521Sdyson * interlock flag as it applies only to our vnode, not the 57722521Sdyson * vnodes below us on the stack. 57822521Sdyson */ 57922521Sdysonstatic int 58022521Sdysonnull_lock(ap) 58122521Sdyson struct vop_lock_args /* { 58222521Sdyson struct vnode *a_vp; 58322597Smpp int a_flags; 584140728Sphk struct thread *a_td; 58522521Sdyson } */ *ap; 58666356Sbp{ 58766356Sbp struct vnode *vp = ap->a_vp; 588172644Sdaichi int flags = ap->a_flags; 589143642Sjeff struct thread *td = ap->a_td; 59066570Sbp struct vnode *lvp; 591143642Sjeff int error; 59266356Sbp 593172644Sdaichi if (flags & LK_THISLAYER) { 594172644Sdaichi if (vp->v_vnlock != NULL) { 595172644Sdaichi /* lock is shared across layers */ 596172644Sdaichi if (flags & LK_INTERLOCK) 597172644Sdaichi mtx_unlock(&vp->v_interlock); 59866356Sbp return 0; 599143642Sjeff } 600172644Sdaichi error = lockmgr(&vp->v_lock, flags & ~LK_THISLAYER, 601172644Sdaichi &vp->v_interlock, td); 602172644Sdaichi return (error); 603172644Sdaichi } 604172644Sdaichi 605175294Sattilio if (vp->v_vnlock != NULL) { 606172644Sdaichi /* 607172644Sdaichi * The lower level has exported a struct lock to us. Use 608172644Sdaichi * it so that all vnodes in the stack lock and unlock 609172644Sdaichi * simultaneously. Note: we don't DRAIN the lock as DRAIN 610172644Sdaichi * decommissions the lock - just because our vnode is 611172644Sdaichi * going away doesn't mean the struct lock below us is. 612143642Sjeff * LK_EXCLUSIVE is fine. 613172644Sdaichi */ 614143642Sjeff if ((flags & LK_TYPE_MASK) == LK_DRAIN) { 615143642Sjeff NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n"); 61622521Sdyson return(lockmgr(vp->v_vnlock, 61722521Sdyson (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, 61822597Smpp &vp->v_interlock, td)); 619140728Sphk } 62066356Sbp return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td)); 62166356Sbp } else { 62266356Sbp /* 623176559Sattilio * To prevent race conditions involving doing a lookup 62466356Sbp * on "..", we have to lock the lower node, then lock our 62566356Sbp * node. Most of the time it won't matter that we lock our 62666356Sbp * node (as any locking would need the lower one locked 62766356Sbp * first). But we can LK_DRAIN the upper lock as a step 628182943Sed * towards decomissioning it. 62966356Sbp */ 63098183Ssemenu lvp = NULLVPTOLOWERVP(vp); 63198183Ssemenu if (lvp == NULL) 63298183Ssemenu return (lockmgr(&vp->v_lock, flags, &vp->v_interlock, td)); 63398183Ssemenu if (flags & LK_INTERLOCK) { 63498183Ssemenu mtx_unlock(&vp->v_interlock); 63566356Sbp flags &= ~LK_INTERLOCK; 63666356Sbp } 637140728Sphk if ((flags & LK_TYPE_MASK) == LK_DRAIN) { 6381541Srgrimes error = VOP_LOCK(lvp, 63930636Sroberto (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE, td); 64098175Ssemenu } else 64192540Smckusick error = VOP_LOCK(lvp, flags, td); 642141447Sphk if (error) 64398175Ssemenu return (error); 64492540Smckusick error = lockmgr(&vp->v_lock, flags, &vp->v_interlock, td); 64592540Smckusick if (error) 64692540Smckusick VOP_UNLOCK(lvp, 0, td); 64792540Smckusick return (error); 648140936Sphk } 64998175Ssemenu} 65092540Smckusick 65192540Smckusick/* 65292540Smckusick * We need to process our own vnode unlock and then clear the 65392540Smckusick * interlock flag as it applies only to our vnode, not the 65498183Ssemenu * vnodes below us on the stack. 65592540Smckusick */ 65692540Smckusickstatic int 657140728Sphknull_unlock(ap) 65892540Smckusick struct vop_unlock_args /* { 65992540Smckusick struct vnode *a_vp; 66030636Sroberto int a_flags; 66130636Sroberto struct thread *a_td; 662143630Sjeff } */ *ap; 66366356Sbp{ 664155899Sjeff struct vnode *vp = ap->a_vp; 665155899Sjeff int flags = ap->a_flags; 666143744Sjeff struct thread *td = ap->a_td; 667143744Sjeff struct vnode *lvp; 668143744Sjeff 669143744Sjeff if (vp->v_vnlock != NULL) { 670143744Sjeff if (flags & LK_THISLAYER) 671143744Sjeff return 0; /* the lock is shared across layers */ 672155899Sjeff flags &= ~LK_THISLAYER; 673150181Skan return (lockmgr(vp->v_vnlock, flags | LK_RELEASE, 674150181Skan &vp->v_interlock, td)); 675149722Sssouhlal } 676175635Sattilio lvp = NULLVPTOLOWERVP(vp); 677149722Sssouhlal if (lvp == NULL) 678149722Sssouhlal return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td)); 679182943Sed if ((flags & LK_THISLAYER) == 0) { 68098176Ssemenu if (flags & LK_INTERLOCK) { 68166356Sbp mtx_unlock(&vp->v_interlock); 6821541Srgrimes flags &= ~LK_INTERLOCK; 6831541Srgrimes } 6841541Srgrimes VOP_UNLOCK(lvp, flags & ~LK_INTERLOCK, td); 68512769Sphk } else 686140728Sphk flags &= ~LK_THISLAYER; 6871541Srgrimes return (lockmgr(&vp->v_lock, flags | LK_RELEASE, &vp->v_interlock, td)); 688140732Sphk} 689155899Sjeff 690111841Snjlstatic int 6911541Srgrimesnull_islocked(ap) 6921541Srgrimes struct vop_islocked_args /* { 6931541Srgrimes struct vnode *a_vp; 694156585Sjeff struct thread *a_td; 695156585Sjeff } */ *ap; 696156585Sjeff{ 697156585Sjeff struct vnode *vp = ap->a_vp; 698156585Sjeff struct thread *td = ap->a_td; 699156585Sjeff 700156585Sjeff if (vp->v_vnlock != NULL) 701156585Sjeff return (lockstatus(vp->v_vnlock, td)); 702156585Sjeff return (lockstatus(&vp->v_lock, td)); 703156585Sjeff} 704156585Sjeff 705156585Sjeff/* 706156585Sjeff * There is no way to tell that someone issued remove/rmdir operation 707156585Sjeff * on the underlying filesystem. For now we just have to release lowevrp 708156585Sjeff * as soon as possible. 709156585Sjeff */ 710156585Sjeffstatic int 711156585Sjeffnull_inactive(ap) 712156585Sjeff struct vop_inactive_args /* { 713156585Sjeff struct vnode *a_vp; 714156585Sjeff struct thread *a_td; 715156585Sjeff } */ *ap; 716156585Sjeff{ 717156585Sjeff struct vnode *vp = ap->a_vp; 718156585Sjeff struct thread *td = ap->a_td; 719166774Spjd 720166774Spjd VOP_UNLOCK(vp, 0, td); 721166774Spjd 722166774Spjd /* 723166774Spjd * If this is the last reference, then free up the vnode 724166774Spjd * so as not to tie up the lower vnodes. 725166774Spjd */ 726166774Spjd vrecycle(vp, NULL, td); 727166774Spjd 7281541Srgrimes return (0); 7291541Srgrimes} 7301541Srgrimes 731138290Sphk/* 732138290Sphk * We can free memory in null_inactive, but we do this 733138290Sphk * here. (Possible to guard vp->v_data to point somewhere) 734138290Sphk */ 735138290Sphkstatic int 736156585Sjeffnull_reclaim(ap) 737138290Sphk struct vop_reclaim_args /* { 738138290Sphk struct vnode *a_vp; 739169671Skib struct thread *a_td; 740138290Sphk } */ *ap; 741140776Sphk{ 742138290Sphk struct vnode *vp = ap->a_vp; 743138290Sphk struct null_node *xp = VTONULL(vp); 744138290Sphk struct vnode *lowervp = xp->null_lowervp; 745138290Sphk 746138290Sphk if (lowervp) { 747138290Sphk null_hashrem(xp); 748166774Spjd 7491541Srgrimes vrele(lowervp); 750 vrele(lowervp); 751 } 752 753 vp->v_data = NULL; 754 FREE(xp, M_NULLFSNODE); 755 756 return (0); 757} 758 759static int 760null_print(ap) 761 struct vop_print_args /* { 762 struct vnode *a_vp; 763 } */ *ap; 764{ 765 register struct vnode *vp = ap->a_vp; 766 printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp)); 767 return (0); 768} 769 770/* 771 * Let an underlying filesystem do the work 772 */ 773static int 774null_createvobject(ap) 775 struct vop_createvobject_args /* { 776 struct vnode *vp; 777 struct ucred *cred; 778 struct thread *td; 779 } */ *ap; 780{ 781 struct vnode *vp = ap->a_vp; 782 struct vnode *lowervp = VTONULL(vp) ? NULLVPTOLOWERVP(vp) : NULL; 783 int error; 784 785 if (vp->v_type == VNON || lowervp == NULL) 786 return 0; 787 error = VOP_CREATEVOBJECT(lowervp, ap->a_cred, ap->a_td); 788 if (error) 789 return (error); 790 vp->v_flag |= VOBJBUF; 791 return (0); 792} 793 794/* 795 * We have nothing to destroy and this operation shouldn't be bypassed. 796 */ 797static int 798null_destroyvobject(ap) 799 struct vop_destroyvobject_args /* { 800 struct vnode *vp; 801 } */ *ap; 802{ 803 struct vnode *vp = ap->a_vp; 804 805 vp->v_flag &= ~VOBJBUF; 806 return (0); 807} 808 809static int 810null_getvobject(ap) 811 struct vop_getvobject_args /* { 812 struct vnode *vp; 813 struct vm_object **objpp; 814 } */ *ap; 815{ 816 struct vnode *lvp = NULLVPTOLOWERVP(ap->a_vp); 817 818 if (lvp == NULL) 819 return EINVAL; 820 return (VOP_GETVOBJECT(lvp, ap->a_objpp)); 821} 822 823/* 824 * Global vfs data structures 825 */ 826vop_t **null_vnodeop_p; 827static struct vnodeopv_entry_desc null_vnodeop_entries[] = { 828 { &vop_default_desc, (vop_t *) null_bypass }, 829 830 { &vop_access_desc, (vop_t *) null_access }, 831 { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, 832 { &vop_createvobject_desc, (vop_t *) null_createvobject }, 833 { &vop_destroyvobject_desc, (vop_t *) null_destroyvobject }, 834 { &vop_getattr_desc, (vop_t *) null_getattr }, 835 { &vop_getvobject_desc, (vop_t *) null_getvobject }, 836 { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount}, 837 { &vop_inactive_desc, (vop_t *) null_inactive }, 838 { &vop_islocked_desc, (vop_t *) null_islocked }, 839 { &vop_lock_desc, (vop_t *) null_lock }, 840 { &vop_lookup_desc, (vop_t *) null_lookup }, 841 { &vop_open_desc, (vop_t *) null_open }, 842 { &vop_print_desc, (vop_t *) null_print }, 843 { &vop_reclaim_desc, (vop_t *) null_reclaim }, 844 { &vop_rename_desc, (vop_t *) null_rename }, 845 { &vop_setattr_desc, (vop_t *) null_setattr }, 846 { &vop_strategy_desc, (vop_t *) vop_eopnotsupp }, 847 { &vop_unlock_desc, (vop_t *) null_unlock }, 848 { NULL, NULL } 849}; 850static struct vnodeopv_desc null_vnodeop_opv_desc = 851 { &null_vnodeop_p, null_vnodeop_entries }; 852 853VNODEOP_SET(null_vnodeop_opv_desc); 854