nfs_bio.c revision 8876
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from software contributed to Berkeley by
61541Srgrimes * Rick Macklem at The University of Guelph.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
361541Srgrimes *	@(#)nfs_bio.c	8.5 (Berkeley) 1/4/94
378876Srgrimes * $Id: nfs_bio.c,v 1.13 1995/05/21 21:39:21 davidg Exp $
381541Srgrimes */
391541Srgrimes
401541Srgrimes#include <sys/param.h>
411541Srgrimes#include <sys/systm.h>
421541Srgrimes#include <sys/resourcevar.h>
433305Sphk#include <sys/signalvar.h>
441541Srgrimes#include <sys/proc.h>
451541Srgrimes#include <sys/buf.h>
461541Srgrimes#include <sys/vnode.h>
471541Srgrimes#include <sys/mount.h>
481541Srgrimes#include <sys/kernel.h>
491541Srgrimes
501541Srgrimes#include <vm/vm.h>
511541Srgrimes
521541Srgrimes#include <nfs/nfsnode.h>
531541Srgrimes#include <nfs/rpcv2.h>
541541Srgrimes#include <nfs/nfsv2.h>
551541Srgrimes#include <nfs/nfs.h>
561541Srgrimes#include <nfs/nfsmount.h>
571541Srgrimes#include <nfs/nqnfs.h>
581541Srgrimes
592112Swollmanstruct buf *nfs_getcacheblk();
601541Srgrimesextern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
611541Srgrimesextern int nfs_numasync;
621541Srgrimes
631541Srgrimes/*
641541Srgrimes * Vnode op for read using bio
651541Srgrimes * Any similarity to readip() is purely coincidental
661541Srgrimes */
671549Srgrimesint
681541Srgrimesnfs_bioread(vp, uio, ioflag, cred)
691541Srgrimes	register struct vnode *vp;
701541Srgrimes	register struct uio *uio;
711541Srgrimes	int ioflag;
721541Srgrimes	struct ucred *cred;
731541Srgrimes{
741541Srgrimes	register struct nfsnode *np = VTONFS(vp);
751541Srgrimes	register int biosize, diff;
761549Srgrimes	struct buf *bp = 0, *rabp;
771541Srgrimes	struct vattr vattr;
781541Srgrimes	struct proc *p;
791541Srgrimes	struct nfsmount *nmp;
805455Sdg	daddr_t lbn, rabn;
818692Sdg	int bufsize;
827871Sdg	int nra, error = 0, n = 0, on = 0, not_readin;
831541Srgrimes
841541Srgrimes#ifdef lint
851541Srgrimes	ioflag = ioflag;
861541Srgrimes#endif /* lint */
871541Srgrimes#ifdef DIAGNOSTIC
881541Srgrimes	if (uio->uio_rw != UIO_READ)
891541Srgrimes		panic("nfs_read mode");
901541Srgrimes#endif
911541Srgrimes	if (uio->uio_resid == 0)
921541Srgrimes		return (0);
931541Srgrimes	if (uio->uio_offset < 0 && vp->v_type != VDIR)
941541Srgrimes		return (EINVAL);
951541Srgrimes	nmp = VFSTONFS(vp->v_mount);
965455Sdg	biosize = NFS_MAXDGRAMDATA;
971541Srgrimes	p = uio->uio_procp;
981541Srgrimes	/*
991541Srgrimes	 * For nfs, cache consistency can only be maintained approximately.
1001541Srgrimes	 * Although RFC1094 does not specify the criteria, the following is
1011541Srgrimes	 * believed to be compatible with the reference port.
1021541Srgrimes	 * For nqnfs, full cache consistency is maintained within the loop.
1031541Srgrimes	 * For nfs:
1041541Srgrimes	 * If the file's modify time on the server has changed since the
1051541Srgrimes	 * last read rpc or you have written to the file,
1061541Srgrimes	 * you may have lost data cache consistency with the
1071541Srgrimes	 * server, so flush all of the file's data out of the cache.
1081541Srgrimes	 * Then force a getattr rpc to ensure that you have up to date
1091541Srgrimes	 * attributes.
1101541Srgrimes	 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
1111541Srgrimes	 * the ones changing the modify time.
1121541Srgrimes	 * NB: This implies that cache data can be read when up to
1131541Srgrimes	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
1141541Srgrimes	 * attributes this could be forced by setting n_attrstamp to 0 before
1151541Srgrimes	 * the VOP_GETATTR() call.
1161541Srgrimes	 */
1171541Srgrimes	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
1181541Srgrimes		if (np->n_flag & NMODIFIED) {
1191541Srgrimes			if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
1201541Srgrimes			     vp->v_type != VREG) {
1213305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1223305Sphk				if (error)
1231541Srgrimes					return (error);
1241541Srgrimes			}
1251541Srgrimes			np->n_attrstamp = 0;
1261541Srgrimes			np->n_direofoffset = 0;
1273305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
1283305Sphk			if (error)
1291541Srgrimes				return (error);
1301541Srgrimes			np->n_mtime = vattr.va_mtime.ts_sec;
1311541Srgrimes		} else {
1323305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
1333305Sphk			if (error)
1341541Srgrimes				return (error);
1351541Srgrimes			if (np->n_mtime != vattr.va_mtime.ts_sec) {
1361541Srgrimes				np->n_direofoffset = 0;
1373305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1383305Sphk				if (error)
1391541Srgrimes					return (error);
1401541Srgrimes				np->n_mtime = vattr.va_mtime.ts_sec;
1411541Srgrimes			}
1421541Srgrimes		}
1431541Srgrimes	}
1441541Srgrimes	do {
1451541Srgrimes
1461541Srgrimes	    /*
1471541Srgrimes	     * Get a valid lease. If cached data is stale, flush it.
1481541Srgrimes	     */
1491541Srgrimes	    if (nmp->nm_flag & NFSMNT_NQNFS) {
1501541Srgrimes		if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
1511541Srgrimes		    do {
1521541Srgrimes			error = nqnfs_getlease(vp, NQL_READ, cred, p);
1531541Srgrimes		    } while (error == NQNFS_EXPIRED);
1541541Srgrimes		    if (error)
1551541Srgrimes			return (error);
1561541Srgrimes		    if (np->n_lrev != np->n_brev ||
1571541Srgrimes			(np->n_flag & NQNFSNONCACHE) ||
1581541Srgrimes			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
1591541Srgrimes			if (vp->v_type == VDIR) {
1601541Srgrimes			    np->n_direofoffset = 0;
1611541Srgrimes			    cache_purge(vp);
1621541Srgrimes			}
1633305Sphk			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1643305Sphk			if (error)
1651541Srgrimes			    return (error);
1661541Srgrimes			np->n_brev = np->n_lrev;
1671541Srgrimes		    }
1681541Srgrimes		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
1691541Srgrimes		    np->n_direofoffset = 0;
1701541Srgrimes		    cache_purge(vp);
1713305Sphk		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1723305Sphk		    if (error)
1731541Srgrimes			return (error);
1741541Srgrimes		}
1751541Srgrimes	    }
1761541Srgrimes	    if (np->n_flag & NQNFSNONCACHE) {
1771541Srgrimes		switch (vp->v_type) {
1781541Srgrimes		case VREG:
1791541Srgrimes			error = nfs_readrpc(vp, uio, cred);
1801541Srgrimes			break;
1811541Srgrimes		case VLNK:
1821541Srgrimes			error = nfs_readlinkrpc(vp, uio, cred);
1831541Srgrimes			break;
1841541Srgrimes		case VDIR:
1851541Srgrimes			error = nfs_readdirrpc(vp, uio, cred);
1861541Srgrimes			break;
1873305Sphk		default:
1888876Srgrimes			printf(" NQNFSNONCACHE: type %x unexpected\n",
1893305Sphk				vp->v_type);
1903305Sphk			break;
1911541Srgrimes		};
1921541Srgrimes		return (error);
1931541Srgrimes	    }
1941541Srgrimes	    switch (vp->v_type) {
1951541Srgrimes	    case VREG:
1961541Srgrimes		nfsstats.biocache_reads++;
1971541Srgrimes		lbn = uio->uio_offset / biosize;
1981541Srgrimes		on = uio->uio_offset & (biosize-1);
1991541Srgrimes		not_readin = 1;
2001541Srgrimes
2011541Srgrimes		/*
2021541Srgrimes		 * Start the read ahead(s), as required.
2031541Srgrimes		 */
2041541Srgrimes		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
2051541Srgrimes		    lbn == vp->v_lastr + 1) {
2061541Srgrimes		    for (nra = 0; nra < nmp->nm_readahead &&
2071541Srgrimes			(lbn + 1 + nra) * biosize < np->n_size; nra++) {
2085455Sdg			rabn = lbn + 1 + nra;
2091541Srgrimes			if (!incore(vp, rabn)) {
2101541Srgrimes			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
2111541Srgrimes			    if (!rabp)
2121541Srgrimes				return (EINTR);
2138692Sdg			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
2141541Srgrimes				rabp->b_flags |= (B_READ | B_ASYNC);
2155455Sdg				vfs_busy_pages(rabp, 0);
2161541Srgrimes				if (nfs_asyncio(rabp, cred)) {
2175455Sdg				    rabp->b_flags |= B_INVAL|B_ERROR;
2185455Sdg				    vfs_unbusy_pages(rabp);
2191541Srgrimes				    brelse(rabp);
2201541Srgrimes				}
2215471Sdg			    } else {
2225471Sdg				brelse(rabp);
2231541Srgrimes			    }
2241541Srgrimes			}
2251541Srgrimes		    }
2261541Srgrimes		}
2271541Srgrimes
2281541Srgrimes		/*
2291541Srgrimes		 * If the block is in the cache and has the required data
2301541Srgrimes		 * in a valid region, just copy it out.
2311541Srgrimes		 * Otherwise, get the block and write back/read in,
2321541Srgrimes		 * as required.
2331541Srgrimes		 */
2341541Srgrimesagain:
2358692Sdg		bufsize = biosize;
2368692Sdg		if ((lbn + 1) * biosize > np->n_size) {
2378692Sdg			bufsize = np->n_size - lbn * biosize;
2388692Sdg			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
2398692Sdg		}
2408692Sdg		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
2417871Sdg		if (!bp)
2427871Sdg			return (EINTR);
2437871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
2447871Sdg			bp->b_flags |= B_READ;
2457871Sdg			not_readin = 0;
2467871Sdg			vfs_busy_pages(bp, 0);
2477871Sdg			error = nfs_doio(bp, cred, p);
2487871Sdg			if (error) {
2497871Sdg			    brelse(bp);
2507871Sdg			    return (error);
2511541Srgrimes			}
2521541Srgrimes		}
2538692Sdg		if (bufsize > on) {
2548692Sdg			n = min((unsigned)(bufsize - on), uio->uio_resid);
2558692Sdg		} else {
2568692Sdg			n = 0;
2578692Sdg		}
2581541Srgrimes		diff = np->n_size - uio->uio_offset;
2591541Srgrimes		if (diff < n)
2601541Srgrimes			n = diff;
2611541Srgrimes		if (not_readin && n > 0) {
2621541Srgrimes			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
2636148Sdg				bp->b_flags |= B_NOCACHE;
2641541Srgrimes				if (bp->b_dirtyend > 0) {
2651541Srgrimes				    if ((bp->b_flags & B_DELWRI) == 0)
2661541Srgrimes					panic("nfsbioread");
2671541Srgrimes				    if (VOP_BWRITE(bp) == EINTR)
2681541Srgrimes					return (EINTR);
2691541Srgrimes				} else
2701541Srgrimes				    brelse(bp);
2711541Srgrimes				goto again;
2721541Srgrimes			}
2731541Srgrimes		}
2741541Srgrimes		vp->v_lastr = lbn;
2751541Srgrimes		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
2761541Srgrimes		if (diff < n)
2771541Srgrimes			n = diff;
2781541Srgrimes		break;
2791541Srgrimes	    case VLNK:
2801541Srgrimes		nfsstats.biocache_readlinks++;
2811541Srgrimes		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
2821541Srgrimes		if (!bp)
2831541Srgrimes			return (EINTR);
2847871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
2851541Srgrimes			bp->b_flags |= B_READ;
2865455Sdg			vfs_busy_pages(bp, 0);
2873305Sphk			error = nfs_doio(bp, cred, p);
2883305Sphk			if (error) {
2895455Sdg				bp->b_flags |= B_ERROR;
2901541Srgrimes				brelse(bp);
2911541Srgrimes				return (error);
2921541Srgrimes			}
2931541Srgrimes		}
2941541Srgrimes		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
2951541Srgrimes		on = 0;
2961541Srgrimes		break;
2971541Srgrimes	    case VDIR:
2981541Srgrimes		nfsstats.biocache_readdirs++;
2995455Sdg		lbn = (daddr_t)uio->uio_offset;
3005455Sdg		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
3011541Srgrimes		if (!bp)
3021541Srgrimes			return (EINTR);
3035455Sdg
3047871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
3051541Srgrimes			bp->b_flags |= B_READ;
3065455Sdg			vfs_busy_pages(bp, 0);
3073305Sphk			error = nfs_doio(bp, cred, p);
3083305Sphk			if (error) {
3095455Sdg				bp->b_flags |= B_ERROR;
3101541Srgrimes				brelse(bp);
3111541Srgrimes				return (error);
3121541Srgrimes			}
3131541Srgrimes		}
3141541Srgrimes
3151541Srgrimes		/*
3161541Srgrimes		 * If not eof and read aheads are enabled, start one.
3171541Srgrimes		 * (You need the current block first, so that you have the
3181541Srgrimes		 *  directory offset cookie of the next block.
3191541Srgrimes		 */
3201541Srgrimes		rabn = bp->b_blkno;
3211541Srgrimes		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
3221541Srgrimes		    rabn != 0 && rabn != np->n_direofoffset &&
3231541Srgrimes		    !incore(vp, rabn)) {
3241541Srgrimes			rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
3251541Srgrimes			if (rabp) {
3268692Sdg			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
3271541Srgrimes				rabp->b_flags |= (B_READ | B_ASYNC);
3285455Sdg				vfs_busy_pages(rabp, 0);
3291541Srgrimes				if (nfs_asyncio(rabp, cred)) {
3306148Sdg				    rabp->b_flags |= B_INVAL|B_ERROR;
3315455Sdg				    vfs_unbusy_pages(rabp);
3321541Srgrimes				    brelse(rabp);
3331541Srgrimes				}
3345471Sdg			    } else {
3355471Sdg				brelse(rabp);
3361541Srgrimes			    }
3371541Srgrimes			}
3381541Srgrimes		}
3391541Srgrimes		on = 0;
3401541Srgrimes		n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
3411541Srgrimes		break;
3423305Sphk	    default:
3433305Sphk		printf(" nfsbioread: type %x unexpected\n",vp->v_type);
3443305Sphk		break;
3451541Srgrimes	    };
3461541Srgrimes
3471541Srgrimes	    if (n > 0) {
3487871Sdg		error = uiomove(bp->b_data + on, (int)n, uio);
3491541Srgrimes	    }
3501541Srgrimes	    switch (vp->v_type) {
3511541Srgrimes	    case VREG:
3521541Srgrimes		break;
3531541Srgrimes	    case VLNK:
3541541Srgrimes		n = 0;
3551541Srgrimes		break;
3561541Srgrimes	    case VDIR:
3571541Srgrimes		uio->uio_offset = bp->b_blkno;
3581541Srgrimes		break;
3593305Sphk	    default:
3603305Sphk		printf(" nfsbioread: type %x unexpected\n",vp->v_type);
3613305Sphk		break;
3623305Sphk	    }
3637871Sdg 	    brelse(bp);
3641541Srgrimes	} while (error == 0 && uio->uio_resid > 0 && n > 0);
3651541Srgrimes	return (error);
3661541Srgrimes}
3671541Srgrimes
3681541Srgrimes/*
3691541Srgrimes * Vnode op for write using bio
3701541Srgrimes */
3711549Srgrimesint
3721541Srgrimesnfs_write(ap)
3731541Srgrimes	struct vop_write_args /* {
3741541Srgrimes		struct vnode *a_vp;
3751541Srgrimes		struct uio *a_uio;
3761541Srgrimes		int  a_ioflag;
3771541Srgrimes		struct ucred *a_cred;
3781541Srgrimes	} */ *ap;
3791541Srgrimes{
3801541Srgrimes	register int biosize;
3811541Srgrimes	register struct uio *uio = ap->a_uio;
3821541Srgrimes	struct proc *p = uio->uio_procp;
3831541Srgrimes	register struct vnode *vp = ap->a_vp;
3841541Srgrimes	struct nfsnode *np = VTONFS(vp);
3851541Srgrimes	register struct ucred *cred = ap->a_cred;
3861541Srgrimes	int ioflag = ap->a_ioflag;
3871541Srgrimes	struct buf *bp;
3881541Srgrimes	struct vattr vattr;
3891541Srgrimes	struct nfsmount *nmp;
3905455Sdg	daddr_t lbn;
3918692Sdg	int bufsize;
3921541Srgrimes	int n, on, error = 0;
3931541Srgrimes
3941541Srgrimes#ifdef DIAGNOSTIC
3951541Srgrimes	if (uio->uio_rw != UIO_WRITE)
3961541Srgrimes		panic("nfs_write mode");
3971541Srgrimes	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
3981541Srgrimes		panic("nfs_write proc");
3991541Srgrimes#endif
4001541Srgrimes	if (vp->v_type != VREG)
4011541Srgrimes		return (EIO);
4021541Srgrimes	if (np->n_flag & NWRITEERR) {
4031541Srgrimes		np->n_flag &= ~NWRITEERR;
4041541Srgrimes		return (np->n_error);
4051541Srgrimes	}
4061541Srgrimes	if (ioflag & (IO_APPEND | IO_SYNC)) {
4071541Srgrimes		if (np->n_flag & NMODIFIED) {
4081541Srgrimes			np->n_attrstamp = 0;
4093305Sphk			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4103305Sphk			if (error)
4111541Srgrimes				return (error);
4121541Srgrimes		}
4131541Srgrimes		if (ioflag & IO_APPEND) {
4141541Srgrimes			np->n_attrstamp = 0;
4153305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
4163305Sphk			if (error)
4171541Srgrimes				return (error);
4181541Srgrimes			uio->uio_offset = np->n_size;
4191541Srgrimes		}
4201541Srgrimes	}
4211541Srgrimes	nmp = VFSTONFS(vp->v_mount);
4221541Srgrimes	if (uio->uio_offset < 0)
4231541Srgrimes		return (EINVAL);
4241541Srgrimes	if (uio->uio_resid == 0)
4251541Srgrimes		return (0);
4261541Srgrimes	/*
4271541Srgrimes	 * Maybe this should be above the vnode op call, but so long as
4281541Srgrimes	 * file servers have no limits, i don't think it matters
4291541Srgrimes	 */
4301541Srgrimes	if (p && uio->uio_offset + uio->uio_resid >
4311541Srgrimes	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
4321541Srgrimes		psignal(p, SIGXFSZ);
4331541Srgrimes		return (EFBIG);
4341541Srgrimes	}
4351541Srgrimes	/*
4361541Srgrimes	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
4371541Srgrimes	 * will be the same size within a filesystem. nfs_writerpc will
4381541Srgrimes	 * still use nm_wsize when sizing the rpc's.
4391541Srgrimes	 */
4405455Sdg	biosize = NFS_MAXDGRAMDATA;
4411541Srgrimes	do {
4421541Srgrimes
4431541Srgrimes		/*
4443664Sphk		 * XXX make sure we aren't cached in the VM page cache
4453664Sphk		 */
4463664Sphk		/*
4471541Srgrimes		 * Check for a valid write lease.
4481541Srgrimes		 * If non-cachable, just do the rpc
4491541Srgrimes		 */
4501541Srgrimes		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
4511541Srgrimes		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
4521541Srgrimes			do {
4531541Srgrimes				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
4541541Srgrimes			} while (error == NQNFS_EXPIRED);
4551541Srgrimes			if (error)
4561541Srgrimes				return (error);
4571541Srgrimes			if (np->n_lrev != np->n_brev ||
4581541Srgrimes			    (np->n_flag & NQNFSNONCACHE)) {
4593305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4603305Sphk				if (error)
4611541Srgrimes					return (error);
4621541Srgrimes				np->n_brev = np->n_lrev;
4631541Srgrimes			}
4641541Srgrimes		}
4651541Srgrimes		if (np->n_flag & NQNFSNONCACHE)
4661541Srgrimes			return (nfs_writerpc(vp, uio, cred, ioflag));
4671541Srgrimes		nfsstats.biocache_writes++;
4681541Srgrimes		lbn = uio->uio_offset / biosize;
4691541Srgrimes		on = uio->uio_offset & (biosize-1);
4701541Srgrimes		n = min((unsigned)(biosize - on), uio->uio_resid);
4711541Srgrimesagain:
4728692Sdg		if (uio->uio_offset + n > np->n_size) {
4738692Sdg			np->n_size = uio->uio_offset + n;
4748692Sdg			vnode_pager_setsize(vp, (u_long)np->n_size);
4758692Sdg		}
4768692Sdg		bufsize = biosize;
4778692Sdg		if ((lbn + 1) * biosize > np->n_size) {
4788692Sdg			bufsize = np->n_size - lbn * biosize;
4798692Sdg			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
4808692Sdg		}
4818692Sdg		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
4821541Srgrimes		if (!bp)
4831541Srgrimes			return (EINTR);
4841541Srgrimes		if (bp->b_wcred == NOCRED) {
4851541Srgrimes			crhold(cred);
4861541Srgrimes			bp->b_wcred = cred;
4871541Srgrimes		}
4881541Srgrimes		np->n_flag |= NMODIFIED;
4898692Sdg
4908692Sdg		if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
4918692Sdg			bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
4921541Srgrimes		}
4931541Srgrimes
4941541Srgrimes		/*
4951541Srgrimes		 * If the new write will leave a contiguous dirty
4961541Srgrimes		 * area, just update the b_dirtyoff and b_dirtyend,
4971541Srgrimes		 * otherwise force a write rpc of the old dirty area.
4981541Srgrimes		 */
4991541Srgrimes		if (bp->b_dirtyend > 0 &&
5001541Srgrimes		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
5011541Srgrimes			bp->b_proc = p;
5021541Srgrimes			if (VOP_BWRITE(bp) == EINTR)
5031541Srgrimes				return (EINTR);
5041541Srgrimes			goto again;
5051541Srgrimes		}
5061541Srgrimes
5071541Srgrimes		/*
5081541Srgrimes		 * Check for valid write lease and get one as required.
5091541Srgrimes		 * In case getblk() and/or bwrite() delayed us.
5101541Srgrimes		 */
5111541Srgrimes		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
5121541Srgrimes		    NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
5131541Srgrimes			do {
5141541Srgrimes				error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
5151541Srgrimes			} while (error == NQNFS_EXPIRED);
5161541Srgrimes			if (error) {
5171541Srgrimes				brelse(bp);
5181541Srgrimes				return (error);
5191541Srgrimes			}
5201541Srgrimes			if (np->n_lrev != np->n_brev ||
5211541Srgrimes			    (np->n_flag & NQNFSNONCACHE)) {
5221541Srgrimes				brelse(bp);
5233305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
5243305Sphk				if (error)
5251541Srgrimes					return (error);
5261541Srgrimes				np->n_brev = np->n_lrev;
5271541Srgrimes				goto again;
5281541Srgrimes			}
5291541Srgrimes		}
5303305Sphk		error = uiomove((char *)bp->b_data + on, n, uio);
5313305Sphk		if (error) {
5321541Srgrimes			bp->b_flags |= B_ERROR;
5331541Srgrimes			brelse(bp);
5341541Srgrimes			return (error);
5351541Srgrimes		}
5361541Srgrimes		if (bp->b_dirtyend > 0) {
5371541Srgrimes			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
5381541Srgrimes			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
5391541Srgrimes		} else {
5401541Srgrimes			bp->b_dirtyoff = on;
5411541Srgrimes			bp->b_dirtyend = on + n;
5421541Srgrimes		}
5431541Srgrimes#ifndef notdef
5441541Srgrimes		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
5451541Srgrimes		    bp->b_validoff > bp->b_dirtyend) {
5461541Srgrimes			bp->b_validoff = bp->b_dirtyoff;
5471541Srgrimes			bp->b_validend = bp->b_dirtyend;
5481541Srgrimes		} else {
5491541Srgrimes			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
5501541Srgrimes			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
5511541Srgrimes		}
5521541Srgrimes#else
5531541Srgrimes		bp->b_validoff = bp->b_dirtyoff;
5541541Srgrimes		bp->b_validend = bp->b_dirtyend;
5551541Srgrimes#endif
5561541Srgrimes		if (ioflag & IO_APPEND)
5571541Srgrimes			bp->b_flags |= B_APPENDWRITE;
5581541Srgrimes
5591541Srgrimes		/*
5601541Srgrimes		 * If the lease is non-cachable or IO_SYNC do bwrite().
5611541Srgrimes		 */
5621541Srgrimes		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
5631541Srgrimes			bp->b_proc = p;
5643305Sphk			error = VOP_BWRITE(bp);
5653305Sphk			if (error)
5661541Srgrimes				return (error);
5671541Srgrimes		} else if ((n + on) == biosize &&
5681541Srgrimes			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
5691541Srgrimes			bp->b_proc = (struct proc *)0;
5701541Srgrimes			bawrite(bp);
5711541Srgrimes		} else
5721541Srgrimes			bdwrite(bp);
5731541Srgrimes	} while (uio->uio_resid > 0 && n > 0);
5741541Srgrimes	return (0);
5751541Srgrimes}
5761541Srgrimes
5771541Srgrimes/*
5781541Srgrimes * Get an nfs cache block.
5791541Srgrimes * Allocate a new one if the block isn't currently in the cache
5801541Srgrimes * and return the block marked busy. If the calling process is
5811541Srgrimes * interrupted by a signal for an interruptible mount point, return
5821541Srgrimes * NULL.
5831541Srgrimes */
5841541Srgrimesstruct buf *
5851541Srgrimesnfs_getcacheblk(vp, bn, size, p)
5861541Srgrimes	struct vnode *vp;
5871541Srgrimes	daddr_t bn;
5881541Srgrimes	int size;
5891541Srgrimes	struct proc *p;
5901541Srgrimes{
5911541Srgrimes	register struct buf *bp;
5921541Srgrimes	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
5931541Srgrimes
5941541Srgrimes	if (nmp->nm_flag & NFSMNT_INT) {
5951541Srgrimes		bp = getblk(vp, bn, size, PCATCH, 0);
5961541Srgrimes		while (bp == (struct buf *)0) {
5971541Srgrimes			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
5981541Srgrimes				return ((struct buf *)0);
5991541Srgrimes			bp = getblk(vp, bn, size, 0, 2 * hz);
6001541Srgrimes		}
6011541Srgrimes	} else
6021541Srgrimes		bp = getblk(vp, bn, size, 0, 0);
6035455Sdg
6045455Sdg	if( vp->v_type == VREG)
6055455Sdg		bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE;
6065455Sdg
6071541Srgrimes	return (bp);
6081541Srgrimes}
6091541Srgrimes
6101541Srgrimes/*
6111541Srgrimes * Flush and invalidate all dirty buffers. If another process is already
6121541Srgrimes * doing the flush, just wait for completion.
6131541Srgrimes */
6141549Srgrimesint
6151541Srgrimesnfs_vinvalbuf(vp, flags, cred, p, intrflg)
6161541Srgrimes	struct vnode *vp;
6171541Srgrimes	int flags;
6181541Srgrimes	struct ucred *cred;
6191541Srgrimes	struct proc *p;
6201541Srgrimes	int intrflg;
6211541Srgrimes{
6221541Srgrimes	register struct nfsnode *np = VTONFS(vp);
6231541Srgrimes	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
6241541Srgrimes	int error = 0, slpflag, slptimeo;
6251541Srgrimes
6261541Srgrimes	if ((nmp->nm_flag & NFSMNT_INT) == 0)
6271541Srgrimes		intrflg = 0;
6281541Srgrimes	if (intrflg) {
6291541Srgrimes		slpflag = PCATCH;
6301541Srgrimes		slptimeo = 2 * hz;
6311541Srgrimes	} else {
6321541Srgrimes		slpflag = 0;
6331541Srgrimes		slptimeo = 0;
6341541Srgrimes	}
6351541Srgrimes	/*
6361541Srgrimes	 * First wait for any other process doing a flush to complete.
6371541Srgrimes	 */
6381541Srgrimes	while (np->n_flag & NFLUSHINPROG) {
6391541Srgrimes		np->n_flag |= NFLUSHWANT;
6401541Srgrimes		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
6411541Srgrimes			slptimeo);
6421541Srgrimes		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
6431541Srgrimes			return (EINTR);
6441541Srgrimes	}
6451541Srgrimes
6461541Srgrimes	/*
6471541Srgrimes	 * Now, flush as required.
6481541Srgrimes	 */
6491541Srgrimes	np->n_flag |= NFLUSHINPROG;
6501541Srgrimes	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
6511541Srgrimes	while (error) {
6521541Srgrimes		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
6531541Srgrimes			np->n_flag &= ~NFLUSHINPROG;
6541541Srgrimes			if (np->n_flag & NFLUSHWANT) {
6551541Srgrimes				np->n_flag &= ~NFLUSHWANT;
6561541Srgrimes				wakeup((caddr_t)&np->n_flag);
6571541Srgrimes			}
6581541Srgrimes			return (EINTR);
6591541Srgrimes		}
6601541Srgrimes		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
6611541Srgrimes	}
6621541Srgrimes	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
6631541Srgrimes	if (np->n_flag & NFLUSHWANT) {
6641541Srgrimes		np->n_flag &= ~NFLUSHWANT;
6651541Srgrimes		wakeup((caddr_t)&np->n_flag);
6661541Srgrimes	}
6671541Srgrimes	return (0);
6681541Srgrimes}
6691541Srgrimes
6701541Srgrimes/*
6711541Srgrimes * Initiate asynchronous I/O. Return an error if no nfsiods are available.
6721541Srgrimes * This is mainly to avoid queueing async I/O requests when the nfsiods
6731541Srgrimes * are all hung on a dead server.
6741541Srgrimes */
6751549Srgrimesint
6761541Srgrimesnfs_asyncio(bp, cred)
6771541Srgrimes	register struct buf *bp;
6781541Srgrimes	struct ucred *cred;
6791541Srgrimes{
6801541Srgrimes	register int i;
6811541Srgrimes
6821541Srgrimes	if (nfs_numasync == 0)
6831541Srgrimes		return (EIO);
6841541Srgrimes	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
6851541Srgrimes	    if (nfs_iodwant[i]) {
6861541Srgrimes		if (bp->b_flags & B_READ) {
6871541Srgrimes			if (bp->b_rcred == NOCRED && cred != NOCRED) {
6881541Srgrimes				crhold(cred);
6891541Srgrimes				bp->b_rcred = cred;
6901541Srgrimes			}
6911541Srgrimes		} else {
6921541Srgrimes			if (bp->b_wcred == NOCRED && cred != NOCRED) {
6931541Srgrimes				crhold(cred);
6941541Srgrimes				bp->b_wcred = cred;
6951541Srgrimes			}
6961541Srgrimes		}
6978876Srgrimes
6981541Srgrimes		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
6991541Srgrimes		nfs_iodwant[i] = (struct proc *)0;
7001541Srgrimes		wakeup((caddr_t)&nfs_iodwant[i]);
7011541Srgrimes		return (0);
7021541Srgrimes	    }
7031541Srgrimes	return (EIO);
7041541Srgrimes}
7051541Srgrimes
7061541Srgrimes/*
7071541Srgrimes * Do an I/O operation to/from a cache block. This may be called
7081541Srgrimes * synchronously or from an nfsiod.
7091541Srgrimes */
7101541Srgrimesint
7111541Srgrimesnfs_doio(bp, cr, p)
7121541Srgrimes	register struct buf *bp;
7133305Sphk	struct ucred *cr;
7141541Srgrimes	struct proc *p;
7151541Srgrimes{
7161541Srgrimes	register struct uio *uiop;
7171541Srgrimes	register struct vnode *vp;
7181541Srgrimes	struct nfsnode *np;
7191541Srgrimes	struct nfsmount *nmp;
7201549Srgrimes	int error = 0, diff, len;
7211541Srgrimes	struct uio uio;
7221541Srgrimes	struct iovec io;
7231541Srgrimes
7241541Srgrimes	vp = bp->b_vp;
7251541Srgrimes	np = VTONFS(vp);
7261541Srgrimes	nmp = VFSTONFS(vp->v_mount);
7271541Srgrimes	uiop = &uio;
7281541Srgrimes	uiop->uio_iov = &io;
7291541Srgrimes	uiop->uio_iovcnt = 1;
7301541Srgrimes	uiop->uio_segflg = UIO_SYSSPACE;
7311541Srgrimes	uiop->uio_procp = p;
7321541Srgrimes
7331541Srgrimes	/*
7341541Srgrimes	 * Historically, paging was done with physio, but no more.
7351541Srgrimes	 */
7363664Sphk	if (bp->b_flags & B_PHYS) {
7373664Sphk	    /*
7383664Sphk	     * ...though reading /dev/drum still gets us here.
7393664Sphk	     */
7401541Srgrimes	    io.iov_len = uiop->uio_resid = bp->b_bcount;
7413664Sphk	    /* mapping was done by vmapbuf() */
7421541Srgrimes	    io.iov_base = bp->b_data;
7433664Sphk	    uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
7443664Sphk	    if (bp->b_flags & B_READ) {
7453664Sphk		uiop->uio_rw = UIO_READ;
7463664Sphk		nfsstats.read_physios++;
7473664Sphk		error = nfs_readrpc(vp, uiop, cr);
7483664Sphk	    } else {
7493664Sphk		uiop->uio_rw = UIO_WRITE;
7503664Sphk		nfsstats.write_physios++;
7513664Sphk		error = nfs_writerpc(vp, uiop, cr,0);
7523664Sphk	    }
7533664Sphk	    if (error) {
7543664Sphk		bp->b_flags |= B_ERROR;
7553664Sphk		bp->b_error = error;
7563664Sphk	    }
7573664Sphk	} else if (bp->b_flags & B_READ) {
7583664Sphk	    io.iov_len = uiop->uio_resid = bp->b_bcount;
7593664Sphk	    io.iov_base = bp->b_data;
7601541Srgrimes	    uiop->uio_rw = UIO_READ;
7611541Srgrimes	    switch (vp->v_type) {
7621541Srgrimes	    case VREG:
7631541Srgrimes		uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
7641541Srgrimes		nfsstats.read_bios++;
7651541Srgrimes		error = nfs_readrpc(vp, uiop, cr);
7661541Srgrimes		if (!error) {
7671541Srgrimes		    bp->b_validoff = 0;
7681541Srgrimes		    if (uiop->uio_resid) {
7691541Srgrimes			/*
7701541Srgrimes			 * If len > 0, there is a hole in the file and
7711541Srgrimes			 * no writes after the hole have been pushed to
7721541Srgrimes			 * the server yet.
7731541Srgrimes			 * Just zero fill the rest of the valid area.
7741541Srgrimes			 */
7751541Srgrimes			diff = bp->b_bcount - uiop->uio_resid;
7761541Srgrimes			len = np->n_size - (bp->b_blkno * DEV_BSIZE
7771541Srgrimes				+ diff);
7781541Srgrimes			if (len > 0) {
7791541Srgrimes			    len = min(len, uiop->uio_resid);
7801541Srgrimes			    bzero((char *)bp->b_data + diff, len);
7811541Srgrimes			    bp->b_validend = diff + len;
7821541Srgrimes			} else
7831541Srgrimes			    bp->b_validend = diff;
7841541Srgrimes		    } else
7851541Srgrimes			bp->b_validend = bp->b_bcount;
7861541Srgrimes		}
7871541Srgrimes		if (p && (vp->v_flag & VTEXT) &&
7881541Srgrimes			(((nmp->nm_flag & NFSMNT_NQNFS) &&
7893664Sphk			  NQNFS_CKINVALID(vp, np, NQL_READ) &&
7901541Srgrimes			  np->n_lrev != np->n_brev) ||
7911541Srgrimes			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
7921541Srgrimes			  np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
7931541Srgrimes			uprintf("Process killed due to text file modification\n");
7941541Srgrimes			psignal(p, SIGKILL);
7951541Srgrimes			p->p_flag |= P_NOSWAP;
7961541Srgrimes		}
7971541Srgrimes		break;
7981541Srgrimes	    case VLNK:
7991541Srgrimes		uiop->uio_offset = 0;
8001541Srgrimes		nfsstats.readlink_bios++;
8011541Srgrimes		error = nfs_readlinkrpc(vp, uiop, cr);
8021541Srgrimes		break;
8031541Srgrimes	    case VDIR:
8041541Srgrimes		uiop->uio_offset = bp->b_lblkno;
8051541Srgrimes		nfsstats.readdir_bios++;
8061541Srgrimes		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
8071541Srgrimes		    error = nfs_readdirlookrpc(vp, uiop, cr);
8081541Srgrimes		else
8091541Srgrimes		    error = nfs_readdirrpc(vp, uiop, cr);
8101541Srgrimes		/*
8111541Srgrimes		 * Save offset cookie in b_blkno.
8121541Srgrimes		 */
8131541Srgrimes		bp->b_blkno = uiop->uio_offset;
8141541Srgrimes		break;
8153305Sphk	    default:
8163305Sphk		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
8173305Sphk		break;
8181541Srgrimes	    };
8191541Srgrimes	    if (error) {
8201541Srgrimes		bp->b_flags |= B_ERROR;
8211541Srgrimes		bp->b_error = error;
8221541Srgrimes	    }
8231541Srgrimes	} else {
8241541Srgrimes
8258692Sdg	    if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
8268692Sdg		bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
8278692Sdg
8288692Sdg	    if (bp->b_dirtyend > bp->b_dirtyoff) {
8298692Sdg		io.iov_len = uiop->uio_resid = bp->b_dirtyend
8308692Sdg			- bp->b_dirtyoff;
8318692Sdg		uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
8328692Sdg			+ bp->b_dirtyoff;
8338692Sdg		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
8348692Sdg		uiop->uio_rw = UIO_WRITE;
8358692Sdg		nfsstats.write_bios++;
8368692Sdg		if (bp->b_flags & B_APPENDWRITE)
8378692Sdg			error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
8388692Sdg		else
8398692Sdg			error = nfs_writerpc(vp, uiop, cr, 0);
8408692Sdg		bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
8418692Sdg
8421541Srgrimes	    /*
8431541Srgrimes	     * For an interrupted write, the buffer is still valid and the
8441541Srgrimes	     * write hasn't been pushed to the server yet, so we can't set
8451541Srgrimes	     * B_ERROR and report the interruption by setting B_EINTR. For
8461541Srgrimes	     * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
8471541Srgrimes	     * is essentially a noop.
8481541Srgrimes	     */
8498692Sdg		if (error == EINTR) {
8508692Sdg			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
8518692Sdg			bp->b_flags |= B_DELWRI;
8521541Srgrimes
8531541Srgrimes		/*
8541541Srgrimes		 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
8551541Srgrimes		 * buffer to the clean list, we have to reassign it back to the
8561541Srgrimes		 * dirty one. Ugh.
8571541Srgrimes		 */
8588692Sdg			if (bp->b_flags & B_ASYNC)
8598692Sdg				reassignbuf(bp, vp);
8608692Sdg			else
8618692Sdg				bp->b_flags |= B_EINTR;
8628692Sdg	    	} else {
8638692Sdg			if (error) {
8648692Sdg				bp->b_flags |= B_ERROR;
8658692Sdg				bp->b_error = np->n_error = error;
8668692Sdg				np->n_flag |= NWRITEERR;
8678692Sdg			}
8688692Sdg			bp->b_dirtyoff = bp->b_dirtyend = 0;
8698692Sdg		}
8701541Srgrimes	    } else {
8718692Sdg		bp->b_resid = 0;
8728692Sdg		biodone(bp);
8738692Sdg		return (0);
8741541Srgrimes	    }
8751541Srgrimes	}
8761541Srgrimes	bp->b_resid = uiop->uio_resid;
8771541Srgrimes	biodone(bp);
8781541Srgrimes	return (error);
8791541Srgrimes}
880