nfs_bio.c revision 8876
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * Rick Macklem at The University of Guelph. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 351541Srgrimes * 361541Srgrimes * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 378876Srgrimes * $Id: nfs_bio.c,v 1.13 1995/05/21 21:39:21 davidg Exp $ 381541Srgrimes */ 391541Srgrimes 401541Srgrimes#include <sys/param.h> 411541Srgrimes#include <sys/systm.h> 421541Srgrimes#include <sys/resourcevar.h> 433305Sphk#include <sys/signalvar.h> 441541Srgrimes#include <sys/proc.h> 451541Srgrimes#include <sys/buf.h> 461541Srgrimes#include <sys/vnode.h> 471541Srgrimes#include <sys/mount.h> 481541Srgrimes#include <sys/kernel.h> 491541Srgrimes 501541Srgrimes#include <vm/vm.h> 511541Srgrimes 521541Srgrimes#include <nfs/nfsnode.h> 531541Srgrimes#include <nfs/rpcv2.h> 541541Srgrimes#include <nfs/nfsv2.h> 551541Srgrimes#include <nfs/nfs.h> 561541Srgrimes#include <nfs/nfsmount.h> 571541Srgrimes#include <nfs/nqnfs.h> 581541Srgrimes 592112Swollmanstruct buf *nfs_getcacheblk(); 601541Srgrimesextern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 611541Srgrimesextern int nfs_numasync; 621541Srgrimes 631541Srgrimes/* 641541Srgrimes * Vnode op for read using bio 651541Srgrimes * Any similarity to readip() is purely coincidental 661541Srgrimes */ 671549Srgrimesint 681541Srgrimesnfs_bioread(vp, uio, ioflag, cred) 691541Srgrimes register struct vnode *vp; 701541Srgrimes register struct uio *uio; 711541Srgrimes int ioflag; 721541Srgrimes struct ucred *cred; 731541Srgrimes{ 741541Srgrimes register struct nfsnode *np = VTONFS(vp); 751541Srgrimes register int biosize, diff; 761549Srgrimes struct buf *bp = 0, *rabp; 771541Srgrimes struct vattr vattr; 781541Srgrimes struct proc *p; 791541Srgrimes struct nfsmount *nmp; 805455Sdg daddr_t lbn, rabn; 818692Sdg int bufsize; 827871Sdg int nra, error = 0, n = 0, on = 0, not_readin; 831541Srgrimes 841541Srgrimes#ifdef lint 851541Srgrimes ioflag = ioflag; 861541Srgrimes#endif /* lint */ 871541Srgrimes#ifdef DIAGNOSTIC 881541Srgrimes if (uio->uio_rw != UIO_READ) 891541Srgrimes panic("nfs_read mode"); 901541Srgrimes#endif 911541Srgrimes if (uio->uio_resid == 0) 921541Srgrimes return (0); 931541Srgrimes if (uio->uio_offset < 0 && vp->v_type != VDIR) 941541Srgrimes return (EINVAL); 951541Srgrimes nmp = VFSTONFS(vp->v_mount); 965455Sdg biosize = NFS_MAXDGRAMDATA; 971541Srgrimes p = uio->uio_procp; 981541Srgrimes /* 991541Srgrimes * For nfs, cache consistency can only be maintained approximately. 1001541Srgrimes * Although RFC1094 does not specify the criteria, the following is 1011541Srgrimes * believed to be compatible with the reference port. 1021541Srgrimes * For nqnfs, full cache consistency is maintained within the loop. 1031541Srgrimes * For nfs: 1041541Srgrimes * If the file's modify time on the server has changed since the 1051541Srgrimes * last read rpc or you have written to the file, 1061541Srgrimes * you may have lost data cache consistency with the 1071541Srgrimes * server, so flush all of the file's data out of the cache. 1081541Srgrimes * Then force a getattr rpc to ensure that you have up to date 1091541Srgrimes * attributes. 1101541Srgrimes * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 1111541Srgrimes * the ones changing the modify time. 1121541Srgrimes * NB: This implies that cache data can be read when up to 1131541Srgrimes * NFS_ATTRTIMEO seconds out of date. If you find that you need current 1141541Srgrimes * attributes this could be forced by setting n_attrstamp to 0 before 1151541Srgrimes * the VOP_GETATTR() call. 1161541Srgrimes */ 1171541Srgrimes if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 1181541Srgrimes if (np->n_flag & NMODIFIED) { 1191541Srgrimes if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 1201541Srgrimes vp->v_type != VREG) { 1213305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 1223305Sphk if (error) 1231541Srgrimes return (error); 1241541Srgrimes } 1251541Srgrimes np->n_attrstamp = 0; 1261541Srgrimes np->n_direofoffset = 0; 1273305Sphk error = VOP_GETATTR(vp, &vattr, cred, p); 1283305Sphk if (error) 1291541Srgrimes return (error); 1301541Srgrimes np->n_mtime = vattr.va_mtime.ts_sec; 1311541Srgrimes } else { 1323305Sphk error = VOP_GETATTR(vp, &vattr, cred, p); 1333305Sphk if (error) 1341541Srgrimes return (error); 1351541Srgrimes if (np->n_mtime != vattr.va_mtime.ts_sec) { 1361541Srgrimes np->n_direofoffset = 0; 1373305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 1383305Sphk if (error) 1391541Srgrimes return (error); 1401541Srgrimes np->n_mtime = vattr.va_mtime.ts_sec; 1411541Srgrimes } 1421541Srgrimes } 1431541Srgrimes } 1441541Srgrimes do { 1451541Srgrimes 1461541Srgrimes /* 1471541Srgrimes * Get a valid lease. If cached data is stale, flush it. 1481541Srgrimes */ 1491541Srgrimes if (nmp->nm_flag & NFSMNT_NQNFS) { 1501541Srgrimes if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 1511541Srgrimes do { 1521541Srgrimes error = nqnfs_getlease(vp, NQL_READ, cred, p); 1531541Srgrimes } while (error == NQNFS_EXPIRED); 1541541Srgrimes if (error) 1551541Srgrimes return (error); 1561541Srgrimes if (np->n_lrev != np->n_brev || 1571541Srgrimes (np->n_flag & NQNFSNONCACHE) || 1581541Srgrimes ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 1591541Srgrimes if (vp->v_type == VDIR) { 1601541Srgrimes np->n_direofoffset = 0; 1611541Srgrimes cache_purge(vp); 1621541Srgrimes } 1633305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 1643305Sphk if (error) 1651541Srgrimes return (error); 1661541Srgrimes np->n_brev = np->n_lrev; 1671541Srgrimes } 1681541Srgrimes } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 1691541Srgrimes np->n_direofoffset = 0; 1701541Srgrimes cache_purge(vp); 1713305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 1723305Sphk if (error) 1731541Srgrimes return (error); 1741541Srgrimes } 1751541Srgrimes } 1761541Srgrimes if (np->n_flag & NQNFSNONCACHE) { 1771541Srgrimes switch (vp->v_type) { 1781541Srgrimes case VREG: 1791541Srgrimes error = nfs_readrpc(vp, uio, cred); 1801541Srgrimes break; 1811541Srgrimes case VLNK: 1821541Srgrimes error = nfs_readlinkrpc(vp, uio, cred); 1831541Srgrimes break; 1841541Srgrimes case VDIR: 1851541Srgrimes error = nfs_readdirrpc(vp, uio, cred); 1861541Srgrimes break; 1873305Sphk default: 1888876Srgrimes printf(" NQNFSNONCACHE: type %x unexpected\n", 1893305Sphk vp->v_type); 1903305Sphk break; 1911541Srgrimes }; 1921541Srgrimes return (error); 1931541Srgrimes } 1941541Srgrimes switch (vp->v_type) { 1951541Srgrimes case VREG: 1961541Srgrimes nfsstats.biocache_reads++; 1971541Srgrimes lbn = uio->uio_offset / biosize; 1981541Srgrimes on = uio->uio_offset & (biosize-1); 1991541Srgrimes not_readin = 1; 2001541Srgrimes 2011541Srgrimes /* 2021541Srgrimes * Start the read ahead(s), as required. 2031541Srgrimes */ 2041541Srgrimes if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 2051541Srgrimes lbn == vp->v_lastr + 1) { 2061541Srgrimes for (nra = 0; nra < nmp->nm_readahead && 2071541Srgrimes (lbn + 1 + nra) * biosize < np->n_size; nra++) { 2085455Sdg rabn = lbn + 1 + nra; 2091541Srgrimes if (!incore(vp, rabn)) { 2101541Srgrimes rabp = nfs_getcacheblk(vp, rabn, biosize, p); 2111541Srgrimes if (!rabp) 2121541Srgrimes return (EINTR); 2138692Sdg if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 2141541Srgrimes rabp->b_flags |= (B_READ | B_ASYNC); 2155455Sdg vfs_busy_pages(rabp, 0); 2161541Srgrimes if (nfs_asyncio(rabp, cred)) { 2175455Sdg rabp->b_flags |= B_INVAL|B_ERROR; 2185455Sdg vfs_unbusy_pages(rabp); 2191541Srgrimes brelse(rabp); 2201541Srgrimes } 2215471Sdg } else { 2225471Sdg brelse(rabp); 2231541Srgrimes } 2241541Srgrimes } 2251541Srgrimes } 2261541Srgrimes } 2271541Srgrimes 2281541Srgrimes /* 2291541Srgrimes * If the block is in the cache and has the required data 2301541Srgrimes * in a valid region, just copy it out. 2311541Srgrimes * Otherwise, get the block and write back/read in, 2321541Srgrimes * as required. 2331541Srgrimes */ 2341541Srgrimesagain: 2358692Sdg bufsize = biosize; 2368692Sdg if ((lbn + 1) * biosize > np->n_size) { 2378692Sdg bufsize = np->n_size - lbn * biosize; 2388692Sdg bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 2398692Sdg } 2408692Sdg bp = nfs_getcacheblk(vp, lbn, bufsize, p); 2417871Sdg if (!bp) 2427871Sdg return (EINTR); 2437871Sdg if ((bp->b_flags & B_CACHE) == 0) { 2447871Sdg bp->b_flags |= B_READ; 2457871Sdg not_readin = 0; 2467871Sdg vfs_busy_pages(bp, 0); 2477871Sdg error = nfs_doio(bp, cred, p); 2487871Sdg if (error) { 2497871Sdg brelse(bp); 2507871Sdg return (error); 2511541Srgrimes } 2521541Srgrimes } 2538692Sdg if (bufsize > on) { 2548692Sdg n = min((unsigned)(bufsize - on), uio->uio_resid); 2558692Sdg } else { 2568692Sdg n = 0; 2578692Sdg } 2581541Srgrimes diff = np->n_size - uio->uio_offset; 2591541Srgrimes if (diff < n) 2601541Srgrimes n = diff; 2611541Srgrimes if (not_readin && n > 0) { 2621541Srgrimes if (on < bp->b_validoff || (on + n) > bp->b_validend) { 2636148Sdg bp->b_flags |= B_NOCACHE; 2641541Srgrimes if (bp->b_dirtyend > 0) { 2651541Srgrimes if ((bp->b_flags & B_DELWRI) == 0) 2661541Srgrimes panic("nfsbioread"); 2671541Srgrimes if (VOP_BWRITE(bp) == EINTR) 2681541Srgrimes return (EINTR); 2691541Srgrimes } else 2701541Srgrimes brelse(bp); 2711541Srgrimes goto again; 2721541Srgrimes } 2731541Srgrimes } 2741541Srgrimes vp->v_lastr = lbn; 2751541Srgrimes diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 2761541Srgrimes if (diff < n) 2771541Srgrimes n = diff; 2781541Srgrimes break; 2791541Srgrimes case VLNK: 2801541Srgrimes nfsstats.biocache_readlinks++; 2811541Srgrimes bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 2821541Srgrimes if (!bp) 2831541Srgrimes return (EINTR); 2847871Sdg if ((bp->b_flags & B_CACHE) == 0) { 2851541Srgrimes bp->b_flags |= B_READ; 2865455Sdg vfs_busy_pages(bp, 0); 2873305Sphk error = nfs_doio(bp, cred, p); 2883305Sphk if (error) { 2895455Sdg bp->b_flags |= B_ERROR; 2901541Srgrimes brelse(bp); 2911541Srgrimes return (error); 2921541Srgrimes } 2931541Srgrimes } 2941541Srgrimes n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 2951541Srgrimes on = 0; 2961541Srgrimes break; 2971541Srgrimes case VDIR: 2981541Srgrimes nfsstats.biocache_readdirs++; 2995455Sdg lbn = (daddr_t)uio->uio_offset; 3005455Sdg bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 3011541Srgrimes if (!bp) 3021541Srgrimes return (EINTR); 3035455Sdg 3047871Sdg if ((bp->b_flags & B_CACHE) == 0) { 3051541Srgrimes bp->b_flags |= B_READ; 3065455Sdg vfs_busy_pages(bp, 0); 3073305Sphk error = nfs_doio(bp, cred, p); 3083305Sphk if (error) { 3095455Sdg bp->b_flags |= B_ERROR; 3101541Srgrimes brelse(bp); 3111541Srgrimes return (error); 3121541Srgrimes } 3131541Srgrimes } 3141541Srgrimes 3151541Srgrimes /* 3161541Srgrimes * If not eof and read aheads are enabled, start one. 3171541Srgrimes * (You need the current block first, so that you have the 3181541Srgrimes * directory offset cookie of the next block. 3191541Srgrimes */ 3201541Srgrimes rabn = bp->b_blkno; 3211541Srgrimes if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 3221541Srgrimes rabn != 0 && rabn != np->n_direofoffset && 3231541Srgrimes !incore(vp, rabn)) { 3241541Srgrimes rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 3251541Srgrimes if (rabp) { 3268692Sdg if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 3271541Srgrimes rabp->b_flags |= (B_READ | B_ASYNC); 3285455Sdg vfs_busy_pages(rabp, 0); 3291541Srgrimes if (nfs_asyncio(rabp, cred)) { 3306148Sdg rabp->b_flags |= B_INVAL|B_ERROR; 3315455Sdg vfs_unbusy_pages(rabp); 3321541Srgrimes brelse(rabp); 3331541Srgrimes } 3345471Sdg } else { 3355471Sdg brelse(rabp); 3361541Srgrimes } 3371541Srgrimes } 3381541Srgrimes } 3391541Srgrimes on = 0; 3401541Srgrimes n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 3411541Srgrimes break; 3423305Sphk default: 3433305Sphk printf(" nfsbioread: type %x unexpected\n",vp->v_type); 3443305Sphk break; 3451541Srgrimes }; 3461541Srgrimes 3471541Srgrimes if (n > 0) { 3487871Sdg error = uiomove(bp->b_data + on, (int)n, uio); 3491541Srgrimes } 3501541Srgrimes switch (vp->v_type) { 3511541Srgrimes case VREG: 3521541Srgrimes break; 3531541Srgrimes case VLNK: 3541541Srgrimes n = 0; 3551541Srgrimes break; 3561541Srgrimes case VDIR: 3571541Srgrimes uio->uio_offset = bp->b_blkno; 3581541Srgrimes break; 3593305Sphk default: 3603305Sphk printf(" nfsbioread: type %x unexpected\n",vp->v_type); 3613305Sphk break; 3623305Sphk } 3637871Sdg brelse(bp); 3641541Srgrimes } while (error == 0 && uio->uio_resid > 0 && n > 0); 3651541Srgrimes return (error); 3661541Srgrimes} 3671541Srgrimes 3681541Srgrimes/* 3691541Srgrimes * Vnode op for write using bio 3701541Srgrimes */ 3711549Srgrimesint 3721541Srgrimesnfs_write(ap) 3731541Srgrimes struct vop_write_args /* { 3741541Srgrimes struct vnode *a_vp; 3751541Srgrimes struct uio *a_uio; 3761541Srgrimes int a_ioflag; 3771541Srgrimes struct ucred *a_cred; 3781541Srgrimes } */ *ap; 3791541Srgrimes{ 3801541Srgrimes register int biosize; 3811541Srgrimes register struct uio *uio = ap->a_uio; 3821541Srgrimes struct proc *p = uio->uio_procp; 3831541Srgrimes register struct vnode *vp = ap->a_vp; 3841541Srgrimes struct nfsnode *np = VTONFS(vp); 3851541Srgrimes register struct ucred *cred = ap->a_cred; 3861541Srgrimes int ioflag = ap->a_ioflag; 3871541Srgrimes struct buf *bp; 3881541Srgrimes struct vattr vattr; 3891541Srgrimes struct nfsmount *nmp; 3905455Sdg daddr_t lbn; 3918692Sdg int bufsize; 3921541Srgrimes int n, on, error = 0; 3931541Srgrimes 3941541Srgrimes#ifdef DIAGNOSTIC 3951541Srgrimes if (uio->uio_rw != UIO_WRITE) 3961541Srgrimes panic("nfs_write mode"); 3971541Srgrimes if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 3981541Srgrimes panic("nfs_write proc"); 3991541Srgrimes#endif 4001541Srgrimes if (vp->v_type != VREG) 4011541Srgrimes return (EIO); 4021541Srgrimes if (np->n_flag & NWRITEERR) { 4031541Srgrimes np->n_flag &= ~NWRITEERR; 4041541Srgrimes return (np->n_error); 4051541Srgrimes } 4061541Srgrimes if (ioflag & (IO_APPEND | IO_SYNC)) { 4071541Srgrimes if (np->n_flag & NMODIFIED) { 4081541Srgrimes np->n_attrstamp = 0; 4093305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 4103305Sphk if (error) 4111541Srgrimes return (error); 4121541Srgrimes } 4131541Srgrimes if (ioflag & IO_APPEND) { 4141541Srgrimes np->n_attrstamp = 0; 4153305Sphk error = VOP_GETATTR(vp, &vattr, cred, p); 4163305Sphk if (error) 4171541Srgrimes return (error); 4181541Srgrimes uio->uio_offset = np->n_size; 4191541Srgrimes } 4201541Srgrimes } 4211541Srgrimes nmp = VFSTONFS(vp->v_mount); 4221541Srgrimes if (uio->uio_offset < 0) 4231541Srgrimes return (EINVAL); 4241541Srgrimes if (uio->uio_resid == 0) 4251541Srgrimes return (0); 4261541Srgrimes /* 4271541Srgrimes * Maybe this should be above the vnode op call, but so long as 4281541Srgrimes * file servers have no limits, i don't think it matters 4291541Srgrimes */ 4301541Srgrimes if (p && uio->uio_offset + uio->uio_resid > 4311541Srgrimes p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 4321541Srgrimes psignal(p, SIGXFSZ); 4331541Srgrimes return (EFBIG); 4341541Srgrimes } 4351541Srgrimes /* 4361541Srgrimes * I use nm_rsize, not nm_wsize so that all buffer cache blocks 4371541Srgrimes * will be the same size within a filesystem. nfs_writerpc will 4381541Srgrimes * still use nm_wsize when sizing the rpc's. 4391541Srgrimes */ 4405455Sdg biosize = NFS_MAXDGRAMDATA; 4411541Srgrimes do { 4421541Srgrimes 4431541Srgrimes /* 4443664Sphk * XXX make sure we aren't cached in the VM page cache 4453664Sphk */ 4463664Sphk /* 4471541Srgrimes * Check for a valid write lease. 4481541Srgrimes * If non-cachable, just do the rpc 4491541Srgrimes */ 4501541Srgrimes if ((nmp->nm_flag & NFSMNT_NQNFS) && 4511541Srgrimes NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 4521541Srgrimes do { 4531541Srgrimes error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 4541541Srgrimes } while (error == NQNFS_EXPIRED); 4551541Srgrimes if (error) 4561541Srgrimes return (error); 4571541Srgrimes if (np->n_lrev != np->n_brev || 4581541Srgrimes (np->n_flag & NQNFSNONCACHE)) { 4593305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 4603305Sphk if (error) 4611541Srgrimes return (error); 4621541Srgrimes np->n_brev = np->n_lrev; 4631541Srgrimes } 4641541Srgrimes } 4651541Srgrimes if (np->n_flag & NQNFSNONCACHE) 4661541Srgrimes return (nfs_writerpc(vp, uio, cred, ioflag)); 4671541Srgrimes nfsstats.biocache_writes++; 4681541Srgrimes lbn = uio->uio_offset / biosize; 4691541Srgrimes on = uio->uio_offset & (biosize-1); 4701541Srgrimes n = min((unsigned)(biosize - on), uio->uio_resid); 4711541Srgrimesagain: 4728692Sdg if (uio->uio_offset + n > np->n_size) { 4738692Sdg np->n_size = uio->uio_offset + n; 4748692Sdg vnode_pager_setsize(vp, (u_long)np->n_size); 4758692Sdg } 4768692Sdg bufsize = biosize; 4778692Sdg if ((lbn + 1) * biosize > np->n_size) { 4788692Sdg bufsize = np->n_size - lbn * biosize; 4798692Sdg bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 4808692Sdg } 4818692Sdg bp = nfs_getcacheblk(vp, lbn, bufsize, p); 4821541Srgrimes if (!bp) 4831541Srgrimes return (EINTR); 4841541Srgrimes if (bp->b_wcred == NOCRED) { 4851541Srgrimes crhold(cred); 4861541Srgrimes bp->b_wcred = cred; 4871541Srgrimes } 4881541Srgrimes np->n_flag |= NMODIFIED; 4898692Sdg 4908692Sdg if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) { 4918692Sdg bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); 4921541Srgrimes } 4931541Srgrimes 4941541Srgrimes /* 4951541Srgrimes * If the new write will leave a contiguous dirty 4961541Srgrimes * area, just update the b_dirtyoff and b_dirtyend, 4971541Srgrimes * otherwise force a write rpc of the old dirty area. 4981541Srgrimes */ 4991541Srgrimes if (bp->b_dirtyend > 0 && 5001541Srgrimes (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 5011541Srgrimes bp->b_proc = p; 5021541Srgrimes if (VOP_BWRITE(bp) == EINTR) 5031541Srgrimes return (EINTR); 5041541Srgrimes goto again; 5051541Srgrimes } 5061541Srgrimes 5071541Srgrimes /* 5081541Srgrimes * Check for valid write lease and get one as required. 5091541Srgrimes * In case getblk() and/or bwrite() delayed us. 5101541Srgrimes */ 5111541Srgrimes if ((nmp->nm_flag & NFSMNT_NQNFS) && 5121541Srgrimes NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 5131541Srgrimes do { 5141541Srgrimes error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 5151541Srgrimes } while (error == NQNFS_EXPIRED); 5161541Srgrimes if (error) { 5171541Srgrimes brelse(bp); 5181541Srgrimes return (error); 5191541Srgrimes } 5201541Srgrimes if (np->n_lrev != np->n_brev || 5211541Srgrimes (np->n_flag & NQNFSNONCACHE)) { 5221541Srgrimes brelse(bp); 5233305Sphk error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 5243305Sphk if (error) 5251541Srgrimes return (error); 5261541Srgrimes np->n_brev = np->n_lrev; 5271541Srgrimes goto again; 5281541Srgrimes } 5291541Srgrimes } 5303305Sphk error = uiomove((char *)bp->b_data + on, n, uio); 5313305Sphk if (error) { 5321541Srgrimes bp->b_flags |= B_ERROR; 5331541Srgrimes brelse(bp); 5341541Srgrimes return (error); 5351541Srgrimes } 5361541Srgrimes if (bp->b_dirtyend > 0) { 5371541Srgrimes bp->b_dirtyoff = min(on, bp->b_dirtyoff); 5381541Srgrimes bp->b_dirtyend = max((on + n), bp->b_dirtyend); 5391541Srgrimes } else { 5401541Srgrimes bp->b_dirtyoff = on; 5411541Srgrimes bp->b_dirtyend = on + n; 5421541Srgrimes } 5431541Srgrimes#ifndef notdef 5441541Srgrimes if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 5451541Srgrimes bp->b_validoff > bp->b_dirtyend) { 5461541Srgrimes bp->b_validoff = bp->b_dirtyoff; 5471541Srgrimes bp->b_validend = bp->b_dirtyend; 5481541Srgrimes } else { 5491541Srgrimes bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 5501541Srgrimes bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 5511541Srgrimes } 5521541Srgrimes#else 5531541Srgrimes bp->b_validoff = bp->b_dirtyoff; 5541541Srgrimes bp->b_validend = bp->b_dirtyend; 5551541Srgrimes#endif 5561541Srgrimes if (ioflag & IO_APPEND) 5571541Srgrimes bp->b_flags |= B_APPENDWRITE; 5581541Srgrimes 5591541Srgrimes /* 5601541Srgrimes * If the lease is non-cachable or IO_SYNC do bwrite(). 5611541Srgrimes */ 5621541Srgrimes if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 5631541Srgrimes bp->b_proc = p; 5643305Sphk error = VOP_BWRITE(bp); 5653305Sphk if (error) 5661541Srgrimes return (error); 5671541Srgrimes } else if ((n + on) == biosize && 5681541Srgrimes (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 5691541Srgrimes bp->b_proc = (struct proc *)0; 5701541Srgrimes bawrite(bp); 5711541Srgrimes } else 5721541Srgrimes bdwrite(bp); 5731541Srgrimes } while (uio->uio_resid > 0 && n > 0); 5741541Srgrimes return (0); 5751541Srgrimes} 5761541Srgrimes 5771541Srgrimes/* 5781541Srgrimes * Get an nfs cache block. 5791541Srgrimes * Allocate a new one if the block isn't currently in the cache 5801541Srgrimes * and return the block marked busy. If the calling process is 5811541Srgrimes * interrupted by a signal for an interruptible mount point, return 5821541Srgrimes * NULL. 5831541Srgrimes */ 5841541Srgrimesstruct buf * 5851541Srgrimesnfs_getcacheblk(vp, bn, size, p) 5861541Srgrimes struct vnode *vp; 5871541Srgrimes daddr_t bn; 5881541Srgrimes int size; 5891541Srgrimes struct proc *p; 5901541Srgrimes{ 5911541Srgrimes register struct buf *bp; 5921541Srgrimes struct nfsmount *nmp = VFSTONFS(vp->v_mount); 5931541Srgrimes 5941541Srgrimes if (nmp->nm_flag & NFSMNT_INT) { 5951541Srgrimes bp = getblk(vp, bn, size, PCATCH, 0); 5961541Srgrimes while (bp == (struct buf *)0) { 5971541Srgrimes if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 5981541Srgrimes return ((struct buf *)0); 5991541Srgrimes bp = getblk(vp, bn, size, 0, 2 * hz); 6001541Srgrimes } 6011541Srgrimes } else 6021541Srgrimes bp = getblk(vp, bn, size, 0, 0); 6035455Sdg 6045455Sdg if( vp->v_type == VREG) 6055455Sdg bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE; 6065455Sdg 6071541Srgrimes return (bp); 6081541Srgrimes} 6091541Srgrimes 6101541Srgrimes/* 6111541Srgrimes * Flush and invalidate all dirty buffers. If another process is already 6121541Srgrimes * doing the flush, just wait for completion. 6131541Srgrimes */ 6141549Srgrimesint 6151541Srgrimesnfs_vinvalbuf(vp, flags, cred, p, intrflg) 6161541Srgrimes struct vnode *vp; 6171541Srgrimes int flags; 6181541Srgrimes struct ucred *cred; 6191541Srgrimes struct proc *p; 6201541Srgrimes int intrflg; 6211541Srgrimes{ 6221541Srgrimes register struct nfsnode *np = VTONFS(vp); 6231541Srgrimes struct nfsmount *nmp = VFSTONFS(vp->v_mount); 6241541Srgrimes int error = 0, slpflag, slptimeo; 6251541Srgrimes 6261541Srgrimes if ((nmp->nm_flag & NFSMNT_INT) == 0) 6271541Srgrimes intrflg = 0; 6281541Srgrimes if (intrflg) { 6291541Srgrimes slpflag = PCATCH; 6301541Srgrimes slptimeo = 2 * hz; 6311541Srgrimes } else { 6321541Srgrimes slpflag = 0; 6331541Srgrimes slptimeo = 0; 6341541Srgrimes } 6351541Srgrimes /* 6361541Srgrimes * First wait for any other process doing a flush to complete. 6371541Srgrimes */ 6381541Srgrimes while (np->n_flag & NFLUSHINPROG) { 6391541Srgrimes np->n_flag |= NFLUSHWANT; 6401541Srgrimes error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 6411541Srgrimes slptimeo); 6421541Srgrimes if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 6431541Srgrimes return (EINTR); 6441541Srgrimes } 6451541Srgrimes 6461541Srgrimes /* 6471541Srgrimes * Now, flush as required. 6481541Srgrimes */ 6491541Srgrimes np->n_flag |= NFLUSHINPROG; 6501541Srgrimes error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 6511541Srgrimes while (error) { 6521541Srgrimes if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 6531541Srgrimes np->n_flag &= ~NFLUSHINPROG; 6541541Srgrimes if (np->n_flag & NFLUSHWANT) { 6551541Srgrimes np->n_flag &= ~NFLUSHWANT; 6561541Srgrimes wakeup((caddr_t)&np->n_flag); 6571541Srgrimes } 6581541Srgrimes return (EINTR); 6591541Srgrimes } 6601541Srgrimes error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 6611541Srgrimes } 6621541Srgrimes np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 6631541Srgrimes if (np->n_flag & NFLUSHWANT) { 6641541Srgrimes np->n_flag &= ~NFLUSHWANT; 6651541Srgrimes wakeup((caddr_t)&np->n_flag); 6661541Srgrimes } 6671541Srgrimes return (0); 6681541Srgrimes} 6691541Srgrimes 6701541Srgrimes/* 6711541Srgrimes * Initiate asynchronous I/O. Return an error if no nfsiods are available. 6721541Srgrimes * This is mainly to avoid queueing async I/O requests when the nfsiods 6731541Srgrimes * are all hung on a dead server. 6741541Srgrimes */ 6751549Srgrimesint 6761541Srgrimesnfs_asyncio(bp, cred) 6771541Srgrimes register struct buf *bp; 6781541Srgrimes struct ucred *cred; 6791541Srgrimes{ 6801541Srgrimes register int i; 6811541Srgrimes 6821541Srgrimes if (nfs_numasync == 0) 6831541Srgrimes return (EIO); 6841541Srgrimes for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 6851541Srgrimes if (nfs_iodwant[i]) { 6861541Srgrimes if (bp->b_flags & B_READ) { 6871541Srgrimes if (bp->b_rcred == NOCRED && cred != NOCRED) { 6881541Srgrimes crhold(cred); 6891541Srgrimes bp->b_rcred = cred; 6901541Srgrimes } 6911541Srgrimes } else { 6921541Srgrimes if (bp->b_wcred == NOCRED && cred != NOCRED) { 6931541Srgrimes crhold(cred); 6941541Srgrimes bp->b_wcred = cred; 6951541Srgrimes } 6961541Srgrimes } 6978876Srgrimes 6981541Srgrimes TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 6991541Srgrimes nfs_iodwant[i] = (struct proc *)0; 7001541Srgrimes wakeup((caddr_t)&nfs_iodwant[i]); 7011541Srgrimes return (0); 7021541Srgrimes } 7031541Srgrimes return (EIO); 7041541Srgrimes} 7051541Srgrimes 7061541Srgrimes/* 7071541Srgrimes * Do an I/O operation to/from a cache block. This may be called 7081541Srgrimes * synchronously or from an nfsiod. 7091541Srgrimes */ 7101541Srgrimesint 7111541Srgrimesnfs_doio(bp, cr, p) 7121541Srgrimes register struct buf *bp; 7133305Sphk struct ucred *cr; 7141541Srgrimes struct proc *p; 7151541Srgrimes{ 7161541Srgrimes register struct uio *uiop; 7171541Srgrimes register struct vnode *vp; 7181541Srgrimes struct nfsnode *np; 7191541Srgrimes struct nfsmount *nmp; 7201549Srgrimes int error = 0, diff, len; 7211541Srgrimes struct uio uio; 7221541Srgrimes struct iovec io; 7231541Srgrimes 7241541Srgrimes vp = bp->b_vp; 7251541Srgrimes np = VTONFS(vp); 7261541Srgrimes nmp = VFSTONFS(vp->v_mount); 7271541Srgrimes uiop = &uio; 7281541Srgrimes uiop->uio_iov = &io; 7291541Srgrimes uiop->uio_iovcnt = 1; 7301541Srgrimes uiop->uio_segflg = UIO_SYSSPACE; 7311541Srgrimes uiop->uio_procp = p; 7321541Srgrimes 7331541Srgrimes /* 7341541Srgrimes * Historically, paging was done with physio, but no more. 7351541Srgrimes */ 7363664Sphk if (bp->b_flags & B_PHYS) { 7373664Sphk /* 7383664Sphk * ...though reading /dev/drum still gets us here. 7393664Sphk */ 7401541Srgrimes io.iov_len = uiop->uio_resid = bp->b_bcount; 7413664Sphk /* mapping was done by vmapbuf() */ 7421541Srgrimes io.iov_base = bp->b_data; 7433664Sphk uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 7443664Sphk if (bp->b_flags & B_READ) { 7453664Sphk uiop->uio_rw = UIO_READ; 7463664Sphk nfsstats.read_physios++; 7473664Sphk error = nfs_readrpc(vp, uiop, cr); 7483664Sphk } else { 7493664Sphk uiop->uio_rw = UIO_WRITE; 7503664Sphk nfsstats.write_physios++; 7513664Sphk error = nfs_writerpc(vp, uiop, cr,0); 7523664Sphk } 7533664Sphk if (error) { 7543664Sphk bp->b_flags |= B_ERROR; 7553664Sphk bp->b_error = error; 7563664Sphk } 7573664Sphk } else if (bp->b_flags & B_READ) { 7583664Sphk io.iov_len = uiop->uio_resid = bp->b_bcount; 7593664Sphk io.iov_base = bp->b_data; 7601541Srgrimes uiop->uio_rw = UIO_READ; 7611541Srgrimes switch (vp->v_type) { 7621541Srgrimes case VREG: 7631541Srgrimes uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 7641541Srgrimes nfsstats.read_bios++; 7651541Srgrimes error = nfs_readrpc(vp, uiop, cr); 7661541Srgrimes if (!error) { 7671541Srgrimes bp->b_validoff = 0; 7681541Srgrimes if (uiop->uio_resid) { 7691541Srgrimes /* 7701541Srgrimes * If len > 0, there is a hole in the file and 7711541Srgrimes * no writes after the hole have been pushed to 7721541Srgrimes * the server yet. 7731541Srgrimes * Just zero fill the rest of the valid area. 7741541Srgrimes */ 7751541Srgrimes diff = bp->b_bcount - uiop->uio_resid; 7761541Srgrimes len = np->n_size - (bp->b_blkno * DEV_BSIZE 7771541Srgrimes + diff); 7781541Srgrimes if (len > 0) { 7791541Srgrimes len = min(len, uiop->uio_resid); 7801541Srgrimes bzero((char *)bp->b_data + diff, len); 7811541Srgrimes bp->b_validend = diff + len; 7821541Srgrimes } else 7831541Srgrimes bp->b_validend = diff; 7841541Srgrimes } else 7851541Srgrimes bp->b_validend = bp->b_bcount; 7861541Srgrimes } 7871541Srgrimes if (p && (vp->v_flag & VTEXT) && 7881541Srgrimes (((nmp->nm_flag & NFSMNT_NQNFS) && 7893664Sphk NQNFS_CKINVALID(vp, np, NQL_READ) && 7901541Srgrimes np->n_lrev != np->n_brev) || 7911541Srgrimes (!(nmp->nm_flag & NFSMNT_NQNFS) && 7921541Srgrimes np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 7931541Srgrimes uprintf("Process killed due to text file modification\n"); 7941541Srgrimes psignal(p, SIGKILL); 7951541Srgrimes p->p_flag |= P_NOSWAP; 7961541Srgrimes } 7971541Srgrimes break; 7981541Srgrimes case VLNK: 7991541Srgrimes uiop->uio_offset = 0; 8001541Srgrimes nfsstats.readlink_bios++; 8011541Srgrimes error = nfs_readlinkrpc(vp, uiop, cr); 8021541Srgrimes break; 8031541Srgrimes case VDIR: 8041541Srgrimes uiop->uio_offset = bp->b_lblkno; 8051541Srgrimes nfsstats.readdir_bios++; 8061541Srgrimes if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 8071541Srgrimes error = nfs_readdirlookrpc(vp, uiop, cr); 8081541Srgrimes else 8091541Srgrimes error = nfs_readdirrpc(vp, uiop, cr); 8101541Srgrimes /* 8111541Srgrimes * Save offset cookie in b_blkno. 8121541Srgrimes */ 8131541Srgrimes bp->b_blkno = uiop->uio_offset; 8141541Srgrimes break; 8153305Sphk default: 8163305Sphk printf("nfs_doio: type %x unexpected\n",vp->v_type); 8173305Sphk break; 8181541Srgrimes }; 8191541Srgrimes if (error) { 8201541Srgrimes bp->b_flags |= B_ERROR; 8211541Srgrimes bp->b_error = error; 8221541Srgrimes } 8231541Srgrimes } else { 8241541Srgrimes 8258692Sdg if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) 8268692Sdg bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); 8278692Sdg 8288692Sdg if (bp->b_dirtyend > bp->b_dirtyoff) { 8298692Sdg io.iov_len = uiop->uio_resid = bp->b_dirtyend 8308692Sdg - bp->b_dirtyoff; 8318692Sdg uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 8328692Sdg + bp->b_dirtyoff; 8338692Sdg io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 8348692Sdg uiop->uio_rw = UIO_WRITE; 8358692Sdg nfsstats.write_bios++; 8368692Sdg if (bp->b_flags & B_APPENDWRITE) 8378692Sdg error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 8388692Sdg else 8398692Sdg error = nfs_writerpc(vp, uiop, cr, 0); 8408692Sdg bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 8418692Sdg 8421541Srgrimes /* 8431541Srgrimes * For an interrupted write, the buffer is still valid and the 8441541Srgrimes * write hasn't been pushed to the server yet, so we can't set 8451541Srgrimes * B_ERROR and report the interruption by setting B_EINTR. For 8461541Srgrimes * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 8471541Srgrimes * is essentially a noop. 8481541Srgrimes */ 8498692Sdg if (error == EINTR) { 8508692Sdg bp->b_flags &= ~(B_INVAL|B_NOCACHE); 8518692Sdg bp->b_flags |= B_DELWRI; 8521541Srgrimes 8531541Srgrimes /* 8541541Srgrimes * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 8551541Srgrimes * buffer to the clean list, we have to reassign it back to the 8561541Srgrimes * dirty one. Ugh. 8571541Srgrimes */ 8588692Sdg if (bp->b_flags & B_ASYNC) 8598692Sdg reassignbuf(bp, vp); 8608692Sdg else 8618692Sdg bp->b_flags |= B_EINTR; 8628692Sdg } else { 8638692Sdg if (error) { 8648692Sdg bp->b_flags |= B_ERROR; 8658692Sdg bp->b_error = np->n_error = error; 8668692Sdg np->n_flag |= NWRITEERR; 8678692Sdg } 8688692Sdg bp->b_dirtyoff = bp->b_dirtyend = 0; 8698692Sdg } 8701541Srgrimes } else { 8718692Sdg bp->b_resid = 0; 8728692Sdg biodone(bp); 8738692Sdg return (0); 8741541Srgrimes } 8751541Srgrimes } 8761541Srgrimes bp->b_resid = uiop->uio_resid; 8771541Srgrimes biodone(bp); 8781541Srgrimes return (error); 8791541Srgrimes} 880