nfs_bio.c revision 32912
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from software contributed to Berkeley by
61541Srgrimes * Rick Macklem at The University of Guelph.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
3622521Sdyson *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
3732912Stegge * $Id: nfs_bio.c,v 1.47 1998/01/25 06:24:09 dyson Exp $
381541Srgrimes */
391541Srgrimes
4022521Sdyson
411541Srgrimes#include <sys/param.h>
421541Srgrimes#include <sys/systm.h>
431541Srgrimes#include <sys/resourcevar.h>
443305Sphk#include <sys/signalvar.h>
451541Srgrimes#include <sys/proc.h>
461541Srgrimes#include <sys/buf.h>
471541Srgrimes#include <sys/vnode.h>
481541Srgrimes#include <sys/mount.h>
491541Srgrimes#include <sys/kernel.h>
501541Srgrimes
511541Srgrimes#include <vm/vm.h>
5212662Sdg#include <vm/vm_extern.h>
5325930Sdfr#include <vm/vm_prot.h>
5425930Sdfr#include <vm/vm_page.h>
5525930Sdfr#include <vm/vm_object.h>
5625930Sdfr#include <vm/vm_pager.h>
5725930Sdfr#include <vm/vnode_pager.h>
581541Srgrimes
591541Srgrimes#include <nfs/rpcv2.h>
609336Sdfr#include <nfs/nfsproto.h>
611541Srgrimes#include <nfs/nfs.h>
621541Srgrimes#include <nfs/nfsmount.h>
631541Srgrimes#include <nfs/nqnfs.h>
649336Sdfr#include <nfs/nfsnode.h>
651541Srgrimes
6612911Sphkstatic struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
6712588Sbde					struct proc *p));
6812588Sbde
691541Srgrimesextern int nfs_numasync;
709336Sdfrextern struct nfsstats nfsstats;
711541Srgrimes
721541Srgrimes/*
7325930Sdfr * Vnode op for VM getpages.
7425930Sdfr */
7525930Sdfrint
7625930Sdfrnfs_getpages(ap)
7725930Sdfr	struct vop_getpages_args *ap;
7825930Sdfr{
7932755Sdyson	int i, pcount, error;
8032755Sdyson	struct uio uio;
8132755Sdyson	struct iovec iov;
8225930Sdfr	vm_page_t m;
8332755Sdyson	vm_offset_t kva;
8425930Sdfr
8532286Sdyson	if ((ap->a_vp->v_object) == NULL) {
8632286Sdyson		printf("nfs_getpages: called with non-merged cache vnode??\n");
8725930Sdfr		return EOPNOTSUPP;
8825930Sdfr	}
8925930Sdfr
9032755Sdyson	m = ap->a_m[ap->a_reqpage];
9132755Sdyson	kva = vm_pager_map_page(m);
9225930Sdfr
9332755Sdyson	iov.iov_base = (caddr_t) kva;
9432755Sdyson	iov.iov_len = PAGE_SIZE;
9532755Sdyson	uio.uio_iov = &iov;
9632755Sdyson	uio.uio_iovcnt = 1;
9732755Sdyson	uio.uio_offset = IDX_TO_OFF(m->pindex);
9832755Sdyson	uio.uio_resid = PAGE_SIZE;
9932755Sdyson	uio.uio_segflg = UIO_SYSSPACE;
10032755Sdyson	uio.uio_rw = UIO_READ;
10132755Sdyson	uio.uio_procp = curproc;
10225930Sdfr
10332755Sdyson	error = nfs_readrpc(ap->a_vp, &uio, curproc->p_ucred);
10432755Sdyson	vm_pager_unmap_page(kva);
10532755Sdyson
10632755Sdyson	pcount = round_page(ap->a_count) / PAGE_SIZE;
10725930Sdfr	for (i = 0; i < pcount; i++) {
10825930Sdfr		if (i != ap->a_reqpage) {
10925930Sdfr			vnode_pager_freepage(ap->a_m[i]);
11025930Sdfr		}
11125930Sdfr	}
11225930Sdfr
11332755Sdyson	if (error && (uio.uio_resid == PAGE_SIZE))
11425930Sdfr		return VM_PAGER_ERROR;
11525930Sdfr	return 0;
11625930Sdfr}
11725930Sdfr
11825930Sdfr/*
1191541Srgrimes * Vnode op for read using bio
1201541Srgrimes * Any similarity to readip() is purely coincidental
1211541Srgrimes */
1221549Srgrimesint
12325930Sdfrnfs_bioread(vp, uio, ioflag, cred, getpages)
1241541Srgrimes	register struct vnode *vp;
1251541Srgrimes	register struct uio *uio;
1261541Srgrimes	int ioflag;
1271541Srgrimes	struct ucred *cred;
12825930Sdfr	int getpages;
1291541Srgrimes{
1301541Srgrimes	register struct nfsnode *np = VTONFS(vp);
1319336Sdfr	register int biosize, diff, i;
1321549Srgrimes	struct buf *bp = 0, *rabp;
1331541Srgrimes	struct vattr vattr;
1341541Srgrimes	struct proc *p;
1359336Sdfr	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
1365455Sdg	daddr_t lbn, rabn;
1378692Sdg	int bufsize;
1387871Sdg	int nra, error = 0, n = 0, on = 0, not_readin;
1391541Srgrimes
1401541Srgrimes#ifdef DIAGNOSTIC
1411541Srgrimes	if (uio->uio_rw != UIO_READ)
1421541Srgrimes		panic("nfs_read mode");
1431541Srgrimes#endif
1441541Srgrimes	if (uio->uio_resid == 0)
1451541Srgrimes		return (0);
1469336Sdfr	if (uio->uio_offset < 0)
1471541Srgrimes		return (EINVAL);
1481541Srgrimes	p = uio->uio_procp;
1499336Sdfr	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
1509336Sdfr		(void)nfs_fsinfo(nmp, vp, cred, p);
1519428Sdfr	biosize = vp->v_mount->mnt_stat.f_iosize;
1521541Srgrimes	/*
1531541Srgrimes	 * For nfs, cache consistency can only be maintained approximately.
1541541Srgrimes	 * Although RFC1094 does not specify the criteria, the following is
1551541Srgrimes	 * believed to be compatible with the reference port.
1561541Srgrimes	 * For nqnfs, full cache consistency is maintained within the loop.
1571541Srgrimes	 * For nfs:
1581541Srgrimes	 * If the file's modify time on the server has changed since the
1591541Srgrimes	 * last read rpc or you have written to the file,
1601541Srgrimes	 * you may have lost data cache consistency with the
1611541Srgrimes	 * server, so flush all of the file's data out of the cache.
1621541Srgrimes	 * Then force a getattr rpc to ensure that you have up to date
1631541Srgrimes	 * attributes.
1641541Srgrimes	 * NB: This implies that cache data can be read when up to
1651541Srgrimes	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
1661541Srgrimes	 * attributes this could be forced by setting n_attrstamp to 0 before
1671541Srgrimes	 * the VOP_GETATTR() call.
1681541Srgrimes	 */
16910219Sdfr	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
1701541Srgrimes		if (np->n_flag & NMODIFIED) {
1719336Sdfr			if (vp->v_type != VREG) {
1729336Sdfr				if (vp->v_type != VDIR)
1739336Sdfr					panic("nfs: bioread, not dir");
1749336Sdfr				nfs_invaldir(vp);
1753305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1763305Sphk				if (error)
1771541Srgrimes					return (error);
1781541Srgrimes			}
1791541Srgrimes			np->n_attrstamp = 0;
1803305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
1813305Sphk			if (error)
1821541Srgrimes				return (error);
18318397Snate			np->n_mtime = vattr.va_mtime.tv_sec;
1841541Srgrimes		} else {
1853305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
1863305Sphk			if (error)
1871541Srgrimes				return (error);
18818397Snate			if (np->n_mtime != vattr.va_mtime.tv_sec) {
1899336Sdfr				if (vp->v_type == VDIR)
1909336Sdfr					nfs_invaldir(vp);
1913305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
1923305Sphk				if (error)
1931541Srgrimes					return (error);
19418397Snate				np->n_mtime = vattr.va_mtime.tv_sec;
1951541Srgrimes			}
1961541Srgrimes		}
1971541Srgrimes	}
1981541Srgrimes	do {
1991541Srgrimes
2001541Srgrimes	    /*
2011541Srgrimes	     * Get a valid lease. If cached data is stale, flush it.
2021541Srgrimes	     */
2031541Srgrimes	    if (nmp->nm_flag & NFSMNT_NQNFS) {
2049336Sdfr		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
2051541Srgrimes		    do {
2069336Sdfr			error = nqnfs_getlease(vp, ND_READ, cred, p);
2071541Srgrimes		    } while (error == NQNFS_EXPIRED);
2081541Srgrimes		    if (error)
2091541Srgrimes			return (error);
2101541Srgrimes		    if (np->n_lrev != np->n_brev ||
2111541Srgrimes			(np->n_flag & NQNFSNONCACHE) ||
2121541Srgrimes			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
2139336Sdfr			if (vp->v_type == VDIR)
2149336Sdfr			    nfs_invaldir(vp);
2153305Sphk			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
2163305Sphk			if (error)
2171541Srgrimes			    return (error);
2181541Srgrimes			np->n_brev = np->n_lrev;
2191541Srgrimes		    }
2201541Srgrimes		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
2219336Sdfr		    nfs_invaldir(vp);
2223305Sphk		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
2233305Sphk		    if (error)
2241541Srgrimes			return (error);
2251541Srgrimes		}
2261541Srgrimes	    }
2271541Srgrimes	    if (np->n_flag & NQNFSNONCACHE) {
2281541Srgrimes		switch (vp->v_type) {
2291541Srgrimes		case VREG:
2309336Sdfr			return (nfs_readrpc(vp, uio, cred));
2311541Srgrimes		case VLNK:
2329336Sdfr			return (nfs_readlinkrpc(vp, uio, cred));
2331541Srgrimes		case VDIR:
2341541Srgrimes			break;
2353305Sphk		default:
23622521Sdyson			printf(" NQNFSNONCACHE: type %x unexpected\n",
2373305Sphk				vp->v_type);
2381541Srgrimes		};
2391541Srgrimes	    }
2401541Srgrimes	    switch (vp->v_type) {
2411541Srgrimes	    case VREG:
2421541Srgrimes		nfsstats.biocache_reads++;
2431541Srgrimes		lbn = uio->uio_offset / biosize;
2449336Sdfr		on = uio->uio_offset & (biosize - 1);
2451541Srgrimes		not_readin = 1;
2461541Srgrimes
2471541Srgrimes		/*
2481541Srgrimes		 * Start the read ahead(s), as required.
2491541Srgrimes		 */
2509336Sdfr		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
2511541Srgrimes		    for (nra = 0; nra < nmp->nm_readahead &&
25213612Smpp			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
2535455Sdg			rabn = lbn + 1 + nra;
2541541Srgrimes			if (!incore(vp, rabn)) {
2551541Srgrimes			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
2561541Srgrimes			    if (!rabp)
2571541Srgrimes				return (EINTR);
2588692Sdg			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
2591541Srgrimes				rabp->b_flags |= (B_READ | B_ASYNC);
2605455Sdg				vfs_busy_pages(rabp, 0);
2611541Srgrimes				if (nfs_asyncio(rabp, cred)) {
2625455Sdg				    rabp->b_flags |= B_INVAL|B_ERROR;
2635455Sdg				    vfs_unbusy_pages(rabp);
2641541Srgrimes				    brelse(rabp);
2651541Srgrimes				}
26622521Sdyson			    } else
2675471Sdg				brelse(rabp);
2681541Srgrimes			}
2691541Srgrimes		    }
2701541Srgrimes		}
2711541Srgrimes
2721541Srgrimes		/*
2731541Srgrimes		 * If the block is in the cache and has the required data
2741541Srgrimes		 * in a valid region, just copy it out.
2751541Srgrimes		 * Otherwise, get the block and write back/read in,
2761541Srgrimes		 * as required.
2771541Srgrimes		 */
2781541Srgrimesagain:
2798692Sdg		bufsize = biosize;
28013612Smpp		if ((off_t)(lbn + 1) * biosize > np->n_size &&
28113612Smpp		    (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
2828692Sdg			bufsize = np->n_size - lbn * biosize;
2838692Sdg			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
2848692Sdg		}
2858692Sdg		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
2867871Sdg		if (!bp)
2877871Sdg			return (EINTR);
28825930Sdfr		/*
28925930Sdfr		 * If we are being called from nfs_getpages, we must
29025930Sdfr		 * make sure the buffer is a vmio buffer.  The vp will
29125930Sdfr		 * already be setup for vmio but there may be some old
29225930Sdfr		 * non-vmio buffers attached to it.
29325930Sdfr		 */
29425930Sdfr		if (getpages && !(bp->b_flags & B_VMIO)) {
29525930Sdfr#ifdef DIAGNOSTIC
29625930Sdfr			printf("nfs_bioread: non vmio buf found, discarding\n");
29725930Sdfr#endif
29825930Sdfr			bp->b_flags |= B_NOCACHE;
29925930Sdfr			bp->b_flags |= B_INVAFTERWRITE;
30025930Sdfr			if (bp->b_dirtyend > 0) {
30125930Sdfr				if ((bp->b_flags & B_DELWRI) == 0)
30225930Sdfr					panic("nfsbioread");
30325930Sdfr				if (VOP_BWRITE(bp) == EINTR)
30425930Sdfr					return (EINTR);
30525930Sdfr			} else
30625930Sdfr				brelse(bp);
30725930Sdfr			goto again;
30825930Sdfr		}
3097871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
31032755Sdyson		    bp->b_flags |= B_READ;
31132755Sdyson		    bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
31232755Sdyson		    not_readin = 0;
31332755Sdyson		    vfs_busy_pages(bp, 0);
31432755Sdyson		    error = nfs_doio(bp, cred, p);
31532755Sdyson		    if (error) {
31632755Sdyson			brelse(bp);
31732755Sdyson			return (error);
31832755Sdyson		    }
3191541Srgrimes		}
3208692Sdg		if (bufsize > on) {
3218692Sdg			n = min((unsigned)(bufsize - on), uio->uio_resid);
3228692Sdg		} else {
3238692Sdg			n = 0;
3248692Sdg		}
3251541Srgrimes		diff = np->n_size - uio->uio_offset;
3261541Srgrimes		if (diff < n)
3271541Srgrimes			n = diff;
3281541Srgrimes		if (not_readin && n > 0) {
3291541Srgrimes			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
3306148Sdg				bp->b_flags |= B_NOCACHE;
33122521Sdyson				bp->b_flags |= B_INVAFTERWRITE;
3321541Srgrimes				if (bp->b_dirtyend > 0) {
3331541Srgrimes				    if ((bp->b_flags & B_DELWRI) == 0)
3341541Srgrimes					panic("nfsbioread");
3351541Srgrimes				    if (VOP_BWRITE(bp) == EINTR)
3361541Srgrimes					return (EINTR);
3371541Srgrimes				} else
3381541Srgrimes				    brelse(bp);
3391541Srgrimes				goto again;
3401541Srgrimes			}
3411541Srgrimes		}
3421541Srgrimes		vp->v_lastr = lbn;
3431541Srgrimes		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
3441541Srgrimes		if (diff < n)
3451541Srgrimes			n = diff;
3461541Srgrimes		break;
3471541Srgrimes	    case VLNK:
3481541Srgrimes		nfsstats.biocache_readlinks++;
3491541Srgrimes		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
3501541Srgrimes		if (!bp)
3511541Srgrimes			return (EINTR);
3527871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
35332755Sdyson		    bp->b_flags |= B_READ;
35432755Sdyson		    vfs_busy_pages(bp, 0);
35532755Sdyson		    error = nfs_doio(bp, cred, p);
35632755Sdyson		    if (error) {
35732755Sdyson			bp->b_flags |= B_ERROR;
35832755Sdyson			brelse(bp);
35932755Sdyson			return (error);
36032755Sdyson		    }
3611541Srgrimes		}
3621541Srgrimes		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
3631541Srgrimes		on = 0;
3641541Srgrimes		break;
3651541Srgrimes	    case VDIR:
3661541Srgrimes		nfsstats.biocache_readdirs++;
36724577Sdfr		if (np->n_direofoffset
36824577Sdfr		    && uio->uio_offset >= np->n_direofoffset) {
36924577Sdfr		    return (0);
37024577Sdfr		}
3719336Sdfr		lbn = uio->uio_offset / NFS_DIRBLKSIZ;
3729336Sdfr		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
3735455Sdg		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
3741541Srgrimes		if (!bp)
3759336Sdfr		    return (EINTR);
3767871Sdg		if ((bp->b_flags & B_CACHE) == 0) {
3779336Sdfr		    bp->b_flags |= B_READ;
3789336Sdfr		    vfs_busy_pages(bp, 0);
3799336Sdfr		    error = nfs_doio(bp, cred, p);
38032912Stegge		    if (error) {
38132912Stegge			    brelse(bp);
38232912Stegge		    }
38332755Sdyson		    while (error == NFSERR_BAD_COOKIE) {
38432755Sdyson			nfs_invaldir(vp);
38532755Sdyson			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
38632755Sdyson			/*
38732755Sdyson			 * Yuck! The directory has been modified on the
38832755Sdyson			 * server. The only way to get the block is by
38932755Sdyson			 * reading from the beginning to get all the
39032755Sdyson			 * offset cookies.
39132755Sdyson			 */
39232755Sdyson			for (i = 0; i <= lbn && !error; i++) {
39332755Sdyson			    if (np->n_direofoffset
39432755Sdyson				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
39524577Sdfr				    return (0);
39632755Sdyson			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
39732755Sdyson			    if (!bp)
39832755Sdyson				return (EINTR);
39932755Sdyson			    if ((bp->b_flags & B_DONE) == 0) {
40032755Sdyson				bp->b_flags |= B_READ;
40132755Sdyson				vfs_busy_pages(bp, 0);
40232755Sdyson				error = nfs_doio(bp, cred, p);
40332755Sdyson				if (error) {
40432755Sdyson				    brelse(bp);
40532755Sdyson				} else if (i < lbn) {
40632755Sdyson				    brelse(bp);
4079336Sdfr				}
4089336Sdfr			    }
4091541Srgrimes			}
41032912Stegge		    }
41132912Stegge		    if (error)
4129336Sdfr			    return (error);
4131541Srgrimes		}
4141541Srgrimes
4151541Srgrimes		/*
4161541Srgrimes		 * If not eof and read aheads are enabled, start one.
4171541Srgrimes		 * (You need the current block first, so that you have the
4189336Sdfr		 *  directory offset cookie of the next block.)
4191541Srgrimes		 */
4201541Srgrimes		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
4219336Sdfr		    (np->n_direofoffset == 0 ||
4229336Sdfr		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
4239336Sdfr		    !(np->n_flag & NQNFSNONCACHE) &&
4249336Sdfr		    !incore(vp, lbn + 1)) {
4259336Sdfr			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
4261541Srgrimes			if (rabp) {
4278692Sdg			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
4281541Srgrimes				rabp->b_flags |= (B_READ | B_ASYNC);
4295455Sdg				vfs_busy_pages(rabp, 0);
4301541Srgrimes				if (nfs_asyncio(rabp, cred)) {
4316148Sdg				    rabp->b_flags |= B_INVAL|B_ERROR;
4325455Sdg				    vfs_unbusy_pages(rabp);
4331541Srgrimes				    brelse(rabp);
4341541Srgrimes				}
4355471Sdg			    } else {
4365471Sdg				brelse(rabp);
4371541Srgrimes			    }
4381541Srgrimes			}
4391541Srgrimes		}
44026469Sdfr		/*
44126469Sdfr		 * Make sure we use a signed variant of min() since
44226469Sdfr		 * the second term may be negative.
44326469Sdfr		 */
44426469Sdfr		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
4451541Srgrimes		break;
4463305Sphk	    default:
4479336Sdfr		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
4483305Sphk		break;
4491541Srgrimes	    };
4501541Srgrimes
4511541Srgrimes	    if (n > 0) {
4527871Sdg		error = uiomove(bp->b_data + on, (int)n, uio);
4531541Srgrimes	    }
4541541Srgrimes	    switch (vp->v_type) {
4551541Srgrimes	    case VREG:
4561541Srgrimes		break;
4571541Srgrimes	    case VLNK:
4581541Srgrimes		n = 0;
4591541Srgrimes		break;
4601541Srgrimes	    case VDIR:
4619336Sdfr		if (np->n_flag & NQNFSNONCACHE)
4629336Sdfr			bp->b_flags |= B_INVAL;
4631541Srgrimes		break;
4643305Sphk	    default:
4659336Sdfr		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
4663305Sphk	    }
46732755Sdyson	    brelse(bp);
4681541Srgrimes	} while (error == 0 && uio->uio_resid > 0 && n > 0);
4691541Srgrimes	return (error);
4701541Srgrimes}
4711541Srgrimes
4721541Srgrimes/*
4731541Srgrimes * Vnode op for write using bio
4741541Srgrimes */
4751549Srgrimesint
4761541Srgrimesnfs_write(ap)
4771541Srgrimes	struct vop_write_args /* {
4781541Srgrimes		struct vnode *a_vp;
4791541Srgrimes		struct uio *a_uio;
4801541Srgrimes		int  a_ioflag;
4811541Srgrimes		struct ucred *a_cred;
4821541Srgrimes	} */ *ap;
4831541Srgrimes{
4841541Srgrimes	register int biosize;
4851541Srgrimes	register struct uio *uio = ap->a_uio;
4861541Srgrimes	struct proc *p = uio->uio_procp;
4871541Srgrimes	register struct vnode *vp = ap->a_vp;
4881541Srgrimes	struct nfsnode *np = VTONFS(vp);
4891541Srgrimes	register struct ucred *cred = ap->a_cred;
4901541Srgrimes	int ioflag = ap->a_ioflag;
4911541Srgrimes	struct buf *bp;
4921541Srgrimes	struct vattr vattr;
4939336Sdfr	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
49411921Sphk	daddr_t lbn;
4958692Sdg	int bufsize;
4969336Sdfr	int n, on, error = 0, iomode, must_commit;
4971541Srgrimes
4981541Srgrimes#ifdef DIAGNOSTIC
4991541Srgrimes	if (uio->uio_rw != UIO_WRITE)
5001541Srgrimes		panic("nfs_write mode");
5011541Srgrimes	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
5021541Srgrimes		panic("nfs_write proc");
5031541Srgrimes#endif
5041541Srgrimes	if (vp->v_type != VREG)
5051541Srgrimes		return (EIO);
5061541Srgrimes	if (np->n_flag & NWRITEERR) {
5071541Srgrimes		np->n_flag &= ~NWRITEERR;
5081541Srgrimes		return (np->n_error);
5091541Srgrimes	}
5109336Sdfr	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
5119336Sdfr		(void)nfs_fsinfo(nmp, vp, cred, p);
5121541Srgrimes	if (ioflag & (IO_APPEND | IO_SYNC)) {
5131541Srgrimes		if (np->n_flag & NMODIFIED) {
5141541Srgrimes			np->n_attrstamp = 0;
5153305Sphk			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
5163305Sphk			if (error)
5171541Srgrimes				return (error);
5181541Srgrimes		}
5191541Srgrimes		if (ioflag & IO_APPEND) {
5201541Srgrimes			np->n_attrstamp = 0;
5213305Sphk			error = VOP_GETATTR(vp, &vattr, cred, p);
5223305Sphk			if (error)
5231541Srgrimes				return (error);
5241541Srgrimes			uio->uio_offset = np->n_size;
5251541Srgrimes		}
5261541Srgrimes	}
5271541Srgrimes	if (uio->uio_offset < 0)
5281541Srgrimes		return (EINVAL);
5291541Srgrimes	if (uio->uio_resid == 0)
5301541Srgrimes		return (0);
5311541Srgrimes	/*
5321541Srgrimes	 * Maybe this should be above the vnode op call, but so long as
5331541Srgrimes	 * file servers have no limits, i don't think it matters
5341541Srgrimes	 */
5351541Srgrimes	if (p && uio->uio_offset + uio->uio_resid >
5361541Srgrimes	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
5371541Srgrimes		psignal(p, SIGXFSZ);
5381541Srgrimes		return (EFBIG);
5391541Srgrimes	}
5401541Srgrimes	/*
5411541Srgrimes	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
5421541Srgrimes	 * will be the same size within a filesystem. nfs_writerpc will
5431541Srgrimes	 * still use nm_wsize when sizing the rpc's.
5441541Srgrimes	 */
5459428Sdfr	biosize = vp->v_mount->mnt_stat.f_iosize;
5461541Srgrimes	do {
5471541Srgrimes		/*
5481541Srgrimes		 * Check for a valid write lease.
5491541Srgrimes		 */
5501541Srgrimes		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
5519336Sdfr		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
5521541Srgrimes			do {
5539336Sdfr				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
5541541Srgrimes			} while (error == NQNFS_EXPIRED);
5551541Srgrimes			if (error)
5561541Srgrimes				return (error);
5571541Srgrimes			if (np->n_lrev != np->n_brev ||
5581541Srgrimes			    (np->n_flag & NQNFSNONCACHE)) {
5593305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
5603305Sphk				if (error)
5611541Srgrimes					return (error);
5621541Srgrimes				np->n_brev = np->n_lrev;
5631541Srgrimes			}
5641541Srgrimes		}
5659336Sdfr		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
5669336Sdfr		    iomode = NFSV3WRITE_FILESYNC;
5679336Sdfr		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
5689336Sdfr		    if (must_commit)
5699336Sdfr			nfs_clearcommit(vp->v_mount);
5709336Sdfr		    return (error);
5719336Sdfr		}
5721541Srgrimes		nfsstats.biocache_writes++;
5731541Srgrimes		lbn = uio->uio_offset / biosize;
5741541Srgrimes		on = uio->uio_offset & (biosize-1);
5751541Srgrimes		n = min((unsigned)(biosize - on), uio->uio_resid);
5761541Srgrimesagain:
5778692Sdg		if (uio->uio_offset + n > np->n_size) {
5788692Sdg			np->n_size = uio->uio_offset + n;
57925023Sdfr			np->n_flag |= NMODIFIED;
5808692Sdg			vnode_pager_setsize(vp, (u_long)np->n_size);
5818692Sdg		}
5828692Sdg		bufsize = biosize;
5838692Sdg		if ((lbn + 1) * biosize > np->n_size) {
5848692Sdg			bufsize = np->n_size - lbn * biosize;
5858692Sdg			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
5868692Sdg		}
58731617Sdyson		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
5881541Srgrimes		if (!bp)
5891541Srgrimes			return (EINTR);
5901541Srgrimes		if (bp->b_wcred == NOCRED) {
5911541Srgrimes			crhold(cred);
5921541Srgrimes			bp->b_wcred = cred;
5931541Srgrimes		}
5941541Srgrimes		np->n_flag |= NMODIFIED;
5958692Sdg
59631617Sdyson		if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
59731617Sdyson			bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
59831617Sdyson		}
59931617Sdyson
6001541Srgrimes		/*
60131617Sdyson		 * If the new write will leave a contiguous dirty
60231617Sdyson		 * area, just update the b_dirtyoff and b_dirtyend,
60331617Sdyson		 * otherwise force a write rpc of the old dirty area.
60431617Sdyson		 */
60531617Sdyson		if (bp->b_dirtyend > 0 &&
60631617Sdyson		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
60731617Sdyson			bp->b_proc = p;
60831617Sdyson			if (VOP_BWRITE(bp) == EINTR)
60931617Sdyson				return (EINTR);
61031617Sdyson			goto again;
61131617Sdyson		}
61231617Sdyson
61331617Sdyson		/*
6141541Srgrimes		 * Check for valid write lease and get one as required.
6151541Srgrimes		 * In case getblk() and/or bwrite() delayed us.
6161541Srgrimes		 */
6171541Srgrimes		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
6189336Sdfr		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
6191541Srgrimes			do {
6209336Sdfr				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
6211541Srgrimes			} while (error == NQNFS_EXPIRED);
6221541Srgrimes			if (error) {
6231541Srgrimes				brelse(bp);
6241541Srgrimes				return (error);
6251541Srgrimes			}
6261541Srgrimes			if (np->n_lrev != np->n_brev ||
6271541Srgrimes			    (np->n_flag & NQNFSNONCACHE)) {
6281541Srgrimes				brelse(bp);
6293305Sphk				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
6303305Sphk				if (error)
6311541Srgrimes					return (error);
6321541Srgrimes				np->n_brev = np->n_lrev;
6331541Srgrimes				goto again;
6341541Srgrimes			}
6351541Srgrimes		}
6363305Sphk		error = uiomove((char *)bp->b_data + on, n, uio);
6373305Sphk		if (error) {
6381541Srgrimes			bp->b_flags |= B_ERROR;
6391541Srgrimes			brelse(bp);
6401541Srgrimes			return (error);
6411541Srgrimes		}
6421541Srgrimes		if (bp->b_dirtyend > 0) {
6431541Srgrimes			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
6441541Srgrimes			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
6451541Srgrimes		} else {
6461541Srgrimes			bp->b_dirtyoff = on;
6471541Srgrimes			bp->b_dirtyend = on + n;
6481541Srgrimes		}
6491541Srgrimes		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
6501541Srgrimes		    bp->b_validoff > bp->b_dirtyend) {
6511541Srgrimes			bp->b_validoff = bp->b_dirtyoff;
6521541Srgrimes			bp->b_validend = bp->b_dirtyend;
6531541Srgrimes		} else {
6541541Srgrimes			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
6551541Srgrimes			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
6561541Srgrimes		}
65717186Sdfr
6581541Srgrimes		/*
65917186Sdfr		 * Since this block is being modified, it must be written
66017186Sdfr		 * again and not just committed.
66117186Sdfr		 */
66217186Sdfr		bp->b_flags &= ~B_NEEDCOMMIT;
66317186Sdfr
66417186Sdfr		/*
6651541Srgrimes		 * If the lease is non-cachable or IO_SYNC do bwrite().
6661541Srgrimes		 */
6671541Srgrimes		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
6681541Srgrimes			bp->b_proc = p;
6693305Sphk			error = VOP_BWRITE(bp);
6703305Sphk			if (error)
6711541Srgrimes				return (error);
6729336Sdfr			if (np->n_flag & NQNFSNONCACHE) {
6739336Sdfr				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
6749336Sdfr				if (error)
6759336Sdfr					return (error);
6769336Sdfr			}
6771541Srgrimes		} else if ((n + on) == biosize &&
6781541Srgrimes			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
6791541Srgrimes			bp->b_proc = (struct proc *)0;
6809336Sdfr			bp->b_flags |= B_ASYNC;
6819336Sdfr			(void)nfs_writebp(bp, 0);
6821541Srgrimes		} else
6831541Srgrimes			bdwrite(bp);
6841541Srgrimes	} while (uio->uio_resid > 0 && n > 0);
6851541Srgrimes	return (0);
6861541Srgrimes}
6871541Srgrimes
6881541Srgrimes/*
6891541Srgrimes * Get an nfs cache block.
6901541Srgrimes * Allocate a new one if the block isn't currently in the cache
6911541Srgrimes * and return the block marked busy. If the calling process is
6921541Srgrimes * interrupted by a signal for an interruptible mount point, return
6931541Srgrimes * NULL.
6941541Srgrimes */
69512911Sphkstatic struct buf *
6961541Srgrimesnfs_getcacheblk(vp, bn, size, p)
6971541Srgrimes	struct vnode *vp;
6981541Srgrimes	daddr_t bn;
6991541Srgrimes	int size;
7001541Srgrimes	struct proc *p;
7011541Srgrimes{
7021541Srgrimes	register struct buf *bp;
70332755Sdyson	struct mount *mp;
70432755Sdyson	struct nfsmount *nmp;
7051541Srgrimes
70632755Sdyson	mp = vp->v_mount;
70732755Sdyson	nmp = VFSTONFS(mp);
70832755Sdyson
7091541Srgrimes	if (nmp->nm_flag & NFSMNT_INT) {
7101541Srgrimes		bp = getblk(vp, bn, size, PCATCH, 0);
7111541Srgrimes		while (bp == (struct buf *)0) {
7121541Srgrimes			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
7131541Srgrimes				return ((struct buf *)0);
7141541Srgrimes			bp = getblk(vp, bn, size, 0, 2 * hz);
7151541Srgrimes		}
7161541Srgrimes	} else
7171541Srgrimes		bp = getblk(vp, bn, size, 0, 0);
7185455Sdg
71932755Sdyson	if( vp->v_type == VREG) {
72032755Sdyson		int biosize;
72132755Sdyson		biosize = mp->mnt_stat.f_iosize;
7229336Sdfr		bp->b_blkno = (bn * biosize) / DEV_BSIZE;
72332755Sdyson	}
7245455Sdg
7251541Srgrimes	return (bp);
7261541Srgrimes}
7271541Srgrimes
7281541Srgrimes/*
7291541Srgrimes * Flush and invalidate all dirty buffers. If another process is already
7301541Srgrimes * doing the flush, just wait for completion.
7311541Srgrimes */
7321549Srgrimesint
7331541Srgrimesnfs_vinvalbuf(vp, flags, cred, p, intrflg)
7341541Srgrimes	struct vnode *vp;
7351541Srgrimes	int flags;
7361541Srgrimes	struct ucred *cred;
7371541Srgrimes	struct proc *p;
7381541Srgrimes	int intrflg;
7391541Srgrimes{
7401541Srgrimes	register struct nfsnode *np = VTONFS(vp);
7411541Srgrimes	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
7421541Srgrimes	int error = 0, slpflag, slptimeo;
7431541Srgrimes
74432755Sdyson	if (vp->v_flag & VXLOCK) {
74532755Sdyson		return (0);
74632755Sdyson	}
74732755Sdyson
7481541Srgrimes	if ((nmp->nm_flag & NFSMNT_INT) == 0)
7491541Srgrimes		intrflg = 0;
7501541Srgrimes	if (intrflg) {
7511541Srgrimes		slpflag = PCATCH;
7521541Srgrimes		slptimeo = 2 * hz;
7531541Srgrimes	} else {
7541541Srgrimes		slpflag = 0;
7551541Srgrimes		slptimeo = 0;
7561541Srgrimes	}
7571541Srgrimes	/*
7581541Srgrimes	 * First wait for any other process doing a flush to complete.
7591541Srgrimes	 */
7601541Srgrimes	while (np->n_flag & NFLUSHINPROG) {
7611541Srgrimes		np->n_flag |= NFLUSHWANT;
7621541Srgrimes		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
7631541Srgrimes			slptimeo);
7641541Srgrimes		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
7651541Srgrimes			return (EINTR);
7661541Srgrimes	}
7671541Srgrimes
7681541Srgrimes	/*
7691541Srgrimes	 * Now, flush as required.
7701541Srgrimes	 */
7711541Srgrimes	np->n_flag |= NFLUSHINPROG;
7721541Srgrimes	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
7731541Srgrimes	while (error) {
7741541Srgrimes		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
7751541Srgrimes			np->n_flag &= ~NFLUSHINPROG;
7761541Srgrimes			if (np->n_flag & NFLUSHWANT) {
7771541Srgrimes				np->n_flag &= ~NFLUSHWANT;
7781541Srgrimes				wakeup((caddr_t)&np->n_flag);
7791541Srgrimes			}
7801541Srgrimes			return (EINTR);
7811541Srgrimes		}
7821541Srgrimes		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
7831541Srgrimes	}
7841541Srgrimes	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
7851541Srgrimes	if (np->n_flag & NFLUSHWANT) {
7861541Srgrimes		np->n_flag &= ~NFLUSHWANT;
7871541Srgrimes		wakeup((caddr_t)&np->n_flag);
7881541Srgrimes	}
7891541Srgrimes	return (0);
7901541Srgrimes}
7911541Srgrimes
7921541Srgrimes/*
7931541Srgrimes * Initiate asynchronous I/O. Return an error if no nfsiods are available.
7941541Srgrimes * This is mainly to avoid queueing async I/O requests when the nfsiods
7951541Srgrimes * are all hung on a dead server.
7961541Srgrimes */
7971549Srgrimesint
7981541Srgrimesnfs_asyncio(bp, cred)
7991541Srgrimes	register struct buf *bp;
8001541Srgrimes	struct ucred *cred;
8011541Srgrimes{
80219449Sdfr	struct nfsmount *nmp;
80319449Sdfr	int i;
80419449Sdfr	int gotiod;
80519449Sdfr	int slpflag = 0;
80619449Sdfr	int slptimeo = 0;
80719449Sdfr	int error;
8081541Srgrimes
8091541Srgrimes	if (nfs_numasync == 0)
8101541Srgrimes		return (EIO);
81119449Sdfr
81219449Sdfr	nmp = VFSTONFS(bp->b_vp->v_mount);
81319449Sdfragain:
81419449Sdfr	if (nmp->nm_flag & NFSMNT_INT)
81519449Sdfr		slpflag = PCATCH;
81619449Sdfr	gotiod = FALSE;
81719449Sdfr
81819449Sdfr	/*
81919449Sdfr	 * Find a free iod to process this request.
82019449Sdfr	 */
8211541Srgrimes	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
82219449Sdfr		if (nfs_iodwant[i]) {
82319449Sdfr			/*
82419449Sdfr			 * Found one, so wake it up and tell it which
82519449Sdfr			 * mount to process.
82619449Sdfr			 */
82719449Sdfr			NFS_DPF(ASYNCIO,
82819449Sdfr				("nfs_asyncio: waking iod %d for mount %p\n",
82919449Sdfr				 i, nmp));
83019449Sdfr			nfs_iodwant[i] = (struct proc *)0;
83119449Sdfr			nfs_iodmount[i] = nmp;
83219449Sdfr			nmp->nm_bufqiods++;
83319449Sdfr			wakeup((caddr_t)&nfs_iodwant[i]);
83419449Sdfr			gotiod = TRUE;
83525023Sdfr			break;
83619449Sdfr		}
83719449Sdfr
83819449Sdfr	/*
83919449Sdfr	 * If none are free, we may already have an iod working on this mount
84019449Sdfr	 * point.  If so, it will process our request.
84119449Sdfr	 */
84219449Sdfr	if (!gotiod) {
84319449Sdfr		if (nmp->nm_bufqiods > 0) {
84419449Sdfr			NFS_DPF(ASYNCIO,
84519449Sdfr				("nfs_asyncio: %d iods are already processing mount %p\n",
84619449Sdfr				 nmp->nm_bufqiods, nmp));
84719449Sdfr			gotiod = TRUE;
84819449Sdfr		}
84919449Sdfr	}
85019449Sdfr
85119449Sdfr	/*
85219449Sdfr	 * If we have an iod which can process the request, then queue
85319449Sdfr	 * the buffer.
85419449Sdfr	 */
85519449Sdfr	if (gotiod) {
85619449Sdfr		/*
85719449Sdfr		 * Ensure that the queue never grows too large.
85819449Sdfr		 */
85919449Sdfr		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
86019449Sdfr			NFS_DPF(ASYNCIO,
86119449Sdfr				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
86219449Sdfr			nmp->nm_bufqwant = TRUE;
86319449Sdfr			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
86419449Sdfr				       "nfsaio", slptimeo);
86519449Sdfr			if (error) {
86619449Sdfr				if (nfs_sigintr(nmp, NULL, bp->b_proc))
86719449Sdfr					return (EINTR);
86819449Sdfr				if (slpflag == PCATCH) {
86919449Sdfr					slpflag = 0;
87019449Sdfr					slptimeo = 2 * hz;
87119449Sdfr				}
87219449Sdfr			}
87319449Sdfr			/*
87419449Sdfr			 * We might have lost our iod while sleeping,
87519449Sdfr			 * so check and loop if nescessary.
87619449Sdfr			 */
87719449Sdfr			if (nmp->nm_bufqiods == 0) {
87819449Sdfr				NFS_DPF(ASYNCIO,
87919449Sdfr					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
88019449Sdfr				goto again;
88119449Sdfr			}
88219449Sdfr		}
88319449Sdfr
8841541Srgrimes		if (bp->b_flags & B_READ) {
8851541Srgrimes			if (bp->b_rcred == NOCRED && cred != NOCRED) {
8861541Srgrimes				crhold(cred);
8871541Srgrimes				bp->b_rcred = cred;
8881541Srgrimes			}
8891541Srgrimes		} else {
8909336Sdfr			bp->b_flags |= B_WRITEINPROG;
8911541Srgrimes			if (bp->b_wcred == NOCRED && cred != NOCRED) {
8921541Srgrimes				crhold(cred);
8931541Srgrimes				bp->b_wcred = cred;
8941541Srgrimes			}
8951541Srgrimes		}
8968876Srgrimes
89719449Sdfr		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
89819449Sdfr		nmp->nm_bufqlen++;
8991541Srgrimes		return (0);
90019449Sdfr	}
9019336Sdfr
9029336Sdfr	/*
90319449Sdfr	 * All the iods are busy on other mounts, so return EIO to
90419449Sdfr	 * force the caller to process the i/o synchronously.
9059336Sdfr	 */
90619449Sdfr	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
90719449Sdfr	return (EIO);
9081541Srgrimes}
9091541Srgrimes
9101541Srgrimes/*
9111541Srgrimes * Do an I/O operation to/from a cache block. This may be called
9121541Srgrimes * synchronously or from an nfsiod.
9131541Srgrimes */
9141541Srgrimesint
9151541Srgrimesnfs_doio(bp, cr, p)
9161541Srgrimes	register struct buf *bp;
9173305Sphk	struct ucred *cr;
9181541Srgrimes	struct proc *p;
9191541Srgrimes{
9201541Srgrimes	register struct uio *uiop;
9211541Srgrimes	register struct vnode *vp;
9221541Srgrimes	struct nfsnode *np;
9231541Srgrimes	struct nfsmount *nmp;
9249336Sdfr	int error = 0, diff, len, iomode, must_commit = 0;
9251541Srgrimes	struct uio uio;
9261541Srgrimes	struct iovec io;
9271541Srgrimes
9281541Srgrimes	vp = bp->b_vp;
9291541Srgrimes	np = VTONFS(vp);
9301541Srgrimes	nmp = VFSTONFS(vp->v_mount);
9311541Srgrimes	uiop = &uio;
9321541Srgrimes	uiop->uio_iov = &io;
9331541Srgrimes	uiop->uio_iovcnt = 1;
9341541Srgrimes	uiop->uio_segflg = UIO_SYSSPACE;
9351541Srgrimes	uiop->uio_procp = p;
9361541Srgrimes
9371541Srgrimes	/*
9381541Srgrimes	 * Historically, paging was done with physio, but no more.
9391541Srgrimes	 */
9403664Sphk	if (bp->b_flags & B_PHYS) {
9413664Sphk	    /*
9423664Sphk	     * ...though reading /dev/drum still gets us here.
9433664Sphk	     */
9441541Srgrimes	    io.iov_len = uiop->uio_resid = bp->b_bcount;
9453664Sphk	    /* mapping was done by vmapbuf() */
9461541Srgrimes	    io.iov_base = bp->b_data;
9479336Sdfr	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
9483664Sphk	    if (bp->b_flags & B_READ) {
9493664Sphk		uiop->uio_rw = UIO_READ;
9503664Sphk		nfsstats.read_physios++;
9513664Sphk		error = nfs_readrpc(vp, uiop, cr);
9523664Sphk	    } else {
9539336Sdfr		int com;
9549336Sdfr
9559336Sdfr		iomode = NFSV3WRITE_DATASYNC;
9563664Sphk		uiop->uio_rw = UIO_WRITE;
9573664Sphk		nfsstats.write_physios++;
9589336Sdfr		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
9593664Sphk	    }
9603664Sphk	    if (error) {
9613664Sphk		bp->b_flags |= B_ERROR;
9623664Sphk		bp->b_error = error;
9633664Sphk	    }
9643664Sphk	} else if (bp->b_flags & B_READ) {
9653664Sphk	    io.iov_len = uiop->uio_resid = bp->b_bcount;
9663664Sphk	    io.iov_base = bp->b_data;
9671541Srgrimes	    uiop->uio_rw = UIO_READ;
9681541Srgrimes	    switch (vp->v_type) {
9691541Srgrimes	    case VREG:
9709336Sdfr		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
9711541Srgrimes		nfsstats.read_bios++;
9721541Srgrimes		error = nfs_readrpc(vp, uiop, cr);
9731541Srgrimes		if (!error) {
9741541Srgrimes		    bp->b_validoff = 0;
9751541Srgrimes		    if (uiop->uio_resid) {
9761541Srgrimes			/*
9771541Srgrimes			 * If len > 0, there is a hole in the file and
9781541Srgrimes			 * no writes after the hole have been pushed to
9791541Srgrimes			 * the server yet.
9801541Srgrimes			 * Just zero fill the rest of the valid area.
9811541Srgrimes			 */
9821541Srgrimes			diff = bp->b_bcount - uiop->uio_resid;
9839336Sdfr			len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
9841541Srgrimes				+ diff);
9851541Srgrimes			if (len > 0) {
9861541Srgrimes			    len = min(len, uiop->uio_resid);
9871541Srgrimes			    bzero((char *)bp->b_data + diff, len);
9881541Srgrimes			    bp->b_validend = diff + len;
9891541Srgrimes			} else
9901541Srgrimes			    bp->b_validend = diff;
9911541Srgrimes		    } else
9921541Srgrimes			bp->b_validend = bp->b_bcount;
9931541Srgrimes		}
9941541Srgrimes		if (p && (vp->v_flag & VTEXT) &&
9951541Srgrimes			(((nmp->nm_flag & NFSMNT_NQNFS) &&
9969336Sdfr			  NQNFS_CKINVALID(vp, np, ND_READ) &&
9971541Srgrimes			  np->n_lrev != np->n_brev) ||
9981541Srgrimes			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
99918397Snate			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
10001541Srgrimes			uprintf("Process killed due to text file modification\n");
10011541Srgrimes			psignal(p, SIGKILL);
10021541Srgrimes			p->p_flag |= P_NOSWAP;
10031541Srgrimes		}
10041541Srgrimes		break;
10051541Srgrimes	    case VLNK:
10069336Sdfr		uiop->uio_offset = (off_t)0;
10071541Srgrimes		nfsstats.readlink_bios++;
10081541Srgrimes		error = nfs_readlinkrpc(vp, uiop, cr);
10091541Srgrimes		break;
10101541Srgrimes	    case VDIR:
10111541Srgrimes		nfsstats.readdir_bios++;
10129336Sdfr		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
10139336Sdfr		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
10149336Sdfr			error = nfs_readdirplusrpc(vp, uiop, cr);
10159336Sdfr			if (error == NFSERR_NOTSUPP)
10169336Sdfr				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
10179336Sdfr		}
10189336Sdfr		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
10199336Sdfr			error = nfs_readdirrpc(vp, uiop, cr);
10201541Srgrimes		break;
10213305Sphk	    default:
10223305Sphk		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
10233305Sphk		break;
10241541Srgrimes	    };
10251541Srgrimes	    if (error) {
10261541Srgrimes		bp->b_flags |= B_ERROR;
10271541Srgrimes		bp->b_error = error;
10281541Srgrimes	    }
10291541Srgrimes	} else {
10308692Sdg	    if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
10318692Sdg		bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
10328692Sdg
10338692Sdg	    if (bp->b_dirtyend > bp->b_dirtyoff) {
10348692Sdg		io.iov_len = uiop->uio_resid = bp->b_dirtyend
10359336Sdfr		    - bp->b_dirtyoff;
10369336Sdfr		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
10379336Sdfr		    + bp->b_dirtyoff;
10388692Sdg		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
10398692Sdg		uiop->uio_rw = UIO_WRITE;
10408692Sdg		nfsstats.write_bios++;
104125785Sdfr		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
10429336Sdfr		    iomode = NFSV3WRITE_UNSTABLE;
10438692Sdg		else
10449336Sdfr		    iomode = NFSV3WRITE_FILESYNC;
10459336Sdfr		bp->b_flags |= B_WRITEINPROG;
10469336Sdfr		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
104725003Sdfr		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
104825003Sdfr		    bp->b_flags |= B_NEEDCOMMIT;
104925003Sdfr		    if (bp->b_dirtyoff == 0
105025003Sdfr			&& bp->b_dirtyend == bp->b_bufsize)
105125003Sdfr			bp->b_flags |= B_CLUSTEROK;
105225003Sdfr		} else
10539336Sdfr		    bp->b_flags &= ~B_NEEDCOMMIT;
10549336Sdfr		bp->b_flags &= ~B_WRITEINPROG;
10558692Sdg
10569336Sdfr		/*
10579336Sdfr		 * For an interrupted write, the buffer is still valid
10589336Sdfr		 * and the write hasn't been pushed to the server yet,
10599336Sdfr		 * so we can't set B_ERROR and report the interruption
10609336Sdfr		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
10619336Sdfr		 * is not relevant, so the rpc attempt is essentially
10629336Sdfr		 * a noop.  For the case of a V3 write rpc not being
10639336Sdfr		 * committed to stable storage, the block is still
10649336Sdfr		 * dirty and requires either a commit rpc or another
10659336Sdfr		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
10669336Sdfr		 * the block is reused. This is indicated by setting
10679336Sdfr		 * the B_DELWRI and B_NEEDCOMMIT flags.
10689336Sdfr		 */
10699336Sdfr    		if (error == EINTR
10709336Sdfr		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
10718692Sdg			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
107226669Sdyson			++numdirtybuffers;
10738692Sdg			bp->b_flags |= B_DELWRI;
107432755Sdyson			reassignbuf(bp, vp);
107532755Sdyson			if ((bp->b_flags & B_ASYNC) == 0)
107632755Sdyson			    bp->b_flags |= B_EINTR;
10778692Sdg	    	} else {
10788692Sdg			if (error) {
10798692Sdg				bp->b_flags |= B_ERROR;
10808692Sdg				bp->b_error = np->n_error = error;
10818692Sdg				np->n_flag |= NWRITEERR;
10828692Sdg			}
10838692Sdg			bp->b_dirtyoff = bp->b_dirtyend = 0;
10848692Sdg		}
10851541Srgrimes	    } else {
10868692Sdg		bp->b_resid = 0;
10878692Sdg		biodone(bp);
10888692Sdg		return (0);
10891541Srgrimes	    }
10901541Srgrimes	}
10911541Srgrimes	bp->b_resid = uiop->uio_resid;
10929336Sdfr	if (must_commit)
10939336Sdfr		nfs_clearcommit(vp->v_mount);
10941541Srgrimes	biodone(bp);
10951541Srgrimes	return (error);
10961541Srgrimes}
1097