nfs_bio.c revision 79224
1139823Simp/*
21541Srgrimes * Copyright (c) 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from software contributed to Berkeley by
61541Srgrimes * Rick Macklem at The University of Guelph.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3222521Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
3583651Speter *
3683654Speter *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
3722521Sdyson * $FreeBSD: head/sys/nfsclient/nfs_bio.c 79224 2001-07-04 16:20:28Z dillon $
38190380Srwatson */
39190380Srwatson
401541Srgrimes
411541Srgrimes#include <sys/param.h>
4279247Sjhb#include <sys/systm.h>
4379247Sjhb#include <sys/resourcevar.h>
4479247Sjhb#include <sys/signalvar.h>
45192578Srwatson#include <sys/proc.h>
4679247Sjhb#include <sys/bio.h>
4779247Sjhb#include <sys/buf.h>
481541Srgrimes#include <sys/vnode.h>
493305Sphk#include <sys/mount.h>
5079247Sjhb#include <sys/kernel.h>
511541Srgrimes
521541Srgrimes#include <vm/vm.h>
531541Srgrimes#include <vm/vm_extern.h>
5412662Sdg#include <vm/vm_page.h>
5525930Sdfr#include <vm/vm_object.h>
5625930Sdfr#include <vm/vm_pager.h>
5725930Sdfr#include <vm/vnode_pager.h>
5825930Sdfr
591541Srgrimes#include <nfs/rpcv2.h>
601541Srgrimes#include <nfs/nfsproto.h>
619336Sdfr#include <nfs/nfs.h>
6283651Speter#include <nfs/nfsmount.h>
6383651Speter#include <nfs/nqnfs.h>
6483651Speter#include <nfs/nfsnode.h>
65190380Srwatson
661541Srgrimes/*
6783651Speter * Just call nfs_writebp() with the force argument set to 1.
6883651Speter *
69138899Sps * NOTE: B_DONE may or may not be set in a_bp on call.
70138899Sps */
7175580Sphkstatic int
72138899Spsnfs_bwrite(struct buf *bp)
73138899Sps{
74158739Smohans	return (nfs_writebp(bp, 1, curproc));
751541Srgrimes}
7625930Sdfr
7725930Sdfrstruct buf_ops buf_ops_nfs = {
7825930Sdfr	"buf_ops_nfs",
7983651Speter	nfs_bwrite
8025930Sdfr};
8146349Salc
8232755Sdyson
8332755Sdysonstatic struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
8432755Sdyson					struct proc *p));
8534206Sdyson
8636563Speterextern int nfs_numasync;
8783366Sjulianextern int nfs_pbuf_freecnt;
8836563Speterextern struct nfsstats nfsstats;
8936563Speter
90116461Salc/*
9136563Speter * Vnode op for VM getpages.
92138899Sps */
9325930Sdfrint
9436563Speternfs_getpages(ap)
95138899Sps	struct vop_getpages_args /* {
9683366Sjulian		struct vnode *a_vp;
9791406Sjhb		vm_page_t *a_m;
9836563Speter		int a_count;
9936563Speter		int a_reqpage;
10036563Speter		vm_ooffset_t a_offset;
10136563Speter	} */ *ap;
102116461Salc{
103158739Smohans	int i, error, nextoff, size, toff, count, npages;
10436563Speter	struct uio uio;
10525930Sdfr	struct iovec iov;
10625930Sdfr	vm_offset_t kva;
107158739Smohans	struct buf *bp;
108158739Smohans	struct vnode *vp;
109158739Smohans	struct proc *p;
110158739Smohans	struct ucred *cred;
111158739Smohans	struct nfsmount *nmp;
112158739Smohans	vm_page_t *pages;
113158739Smohans
114158739Smohans	GIANT_REQUIRED;
115138899Sps
116138899Sps	vp = ap->a_vp;
117158739Smohans	p = curproc;				/* XXX */
11836563Speter	cred = curproc->p_ucred;		/* XXX */
119158739Smohans	nmp = VFSTONFS(vp->v_mount);
120158739Smohans	pages = ap->a_m;
121122698Salfred	count = ap->a_count;
12283366Sjulian
123158739Smohans	if (vp->v_object == NULL) {
124158739Smohans		printf("nfs_getpages: called with non-merged cache vnode??\n");
12546349Salc		return VM_PAGER_ERROR;
12646349Salc	}
12746349Salc
12834206Sdyson	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
12946349Salc	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
13046349Salc		(void)nfs_fsinfo(nmp, vp, cred, p);
13146349Salc	}
13246349Salc
13346349Salc	npages = btoc(count);
13446349Salc
13546349Salc	/*
13646349Salc	 * If the requested page is partially valid, just return it and
137116461Salc	 * allow the pager to zero-out the blanks.  Partially valid pages
13846349Salc	 * can only occur at the file EOF.
139191964Salc	 */
14046349Salc
14146349Salc	{
14275692Salfred		vm_page_t m = pages[ap->a_reqpage];
14346349Salc
144100450Salc		if (m->valid != 0) {
145116461Salc			/* handled by vm_fault now	  */
14646349Salc			/* vm_page_zero_invalid(m, TRUE); */
14746349Salc			for (i = 0; i < npages; ++i) {
148116461Salc				if (i != ap->a_reqpage)
14946349Salc					vm_page_free(pages[i]);
15046349Salc			}
15146349Salc			return(0);
15234206Sdyson		}
15334206Sdyson	}
15434206Sdyson
15542957Sdillon	/*
15625930Sdfr	 * We use only the kva address for the buffer, but this is extremely
15734206Sdyson	 * convienient and fast.
15836563Speter	 */
159170292Sattilio	bp = getpbuf(&nfs_pbuf_freecnt);
160170292Sattilio
16134206Sdyson	kva = (vm_offset_t) bp->b_data;
16232755Sdyson	pmap_qenter(kva, pages, npages);
16336563Speter
16432755Sdyson	iov.iov_base = (caddr_t) kva;
16532755Sdyson	iov.iov_len = count;
16636563Speter	uio.uio_iov = &iov;
16736563Speter	uio.uio_iovcnt = 1;
16832755Sdyson	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
16932755Sdyson	uio.uio_resid = count;
17083366Sjulian	uio.uio_segflg = UIO_SYSSPACE;
17125930Sdfr	uio.uio_rw = UIO_READ;
172122953Salfred	uio.uio_procp = p;
17334206Sdyson
17432755Sdyson	error = nfs_readrpc(vp, &uio, cred);
17542957Sdillon	pmap_qremove(kva, npages);
17634206Sdyson
17742957Sdillon	relpbuf(bp, &nfs_pbuf_freecnt);
178158739Smohans
179116461Salc	if (error && (uio.uio_resid == count)) {
180100450Salc		printf("nfs_getpages: error %d\n", error);
18142957Sdillon		for (i = 0; i < npages; ++i) {
18242957Sdillon			if (i != ap->a_reqpage)
18375692Salfred				vm_page_free(pages[i]);
18442957Sdillon		}
185100450Salc		return VM_PAGER_ERROR;
186116461Salc	}
18734206Sdyson
18842957Sdillon	/*
18934206Sdyson	 * Calculate the number of bytes read and validate only that number
19045347Sjulian	 * of bytes.  Note that due to pending writes, size may be 0.  This
19145347Sjulian	 * does not mean that the remaining data is invalid!
19245347Sjulian	 */
19345347Sjulian
19445347Sjulian	size = count - uio.uio_resid;
19545347Sjulian
19636563Speter	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
197116461Salc		vm_page_t m;
198100450Salc		nextoff = toff + PAGE_SIZE;
19934206Sdyson		m = pages[i];
20034206Sdyson
20134206Sdyson		m->flags &= ~PG_ZERO;
20236563Speter
20334206Sdyson		if (nextoff <= size) {
20434206Sdyson			/*
20545347Sjulian			 * Read operation filled an entire page
20645347Sjulian			 */
20745347Sjulian			m->valid = VM_PAGE_BITS_ALL;
20834206Sdyson			vm_page_undirty(m);
209192010Salc		} else if (size > toff) {
210192010Salc			/*
21145347Sjulian			 * Read operation filled a partial page.
21245347Sjulian			 */
21346349Salc			m->valid = 0;
21445347Sjulian			vm_page_set_validclean(m, 0, size - toff);
21546349Salc			/* handled by vm_fault now	  */
216192134Salc			/* vm_page_zero_invalid(m, TRUE); */
217192134Salc		}
218192134Salc
21987834Sdillon		if (i != ap->a_reqpage) {
22087834Sdillon			/*
22187834Sdillon			 * Whether or not to leave the page activated is up in
22287834Sdillon			 * the air, but we should put the page on a page queue
22387834Sdillon			 * somewhere (it already is in the object).  Result:
22487834Sdillon			 * It appears that emperical results show that
22587834Sdillon			 * deactivating pages is best.
22634206Sdyson			 */
22725930Sdfr
22834206Sdyson			/*
22934206Sdyson			 * Just in case someone was asking for this page we
23034206Sdyson			 * now tell them that it is ok to use.
23134206Sdyson			 */
23234206Sdyson			if (!error) {
23334206Sdyson				if (m->flags & PG_WANTED)
23434206Sdyson					vm_page_activate(m);
23534206Sdyson				else
23634206Sdyson					vm_page_deactivate(m);
23734206Sdyson				vm_page_wakeup(m);
23834206Sdyson			} else {
23934206Sdyson				vm_page_free(m);
24034206Sdyson			}
241161125Salc		}
24234206Sdyson	}
24334206Sdyson	return 0;
24434206Sdyson}
24538799Sdfr
24634206Sdyson/*
24775692Salfred * Vnode op for VM putpages.
24834206Sdyson */
24925930Sdfrint
25025930Sdfrnfs_putpages(ap)
251100450Salc	struct vop_putpages_args /* {
252116461Salc		struct vnode *a_vp;
25325930Sdfr		vm_page_t *a_m;
25425930Sdfr		int a_count;
25525930Sdfr		int a_sync;
25625930Sdfr		int *a_rtvals;
25734206Sdyson		vm_ooffset_t a_offset;
25834096Smsmith	} */ *ap;
25934096Smsmith{
26083651Speter	struct uio uio;
26134096Smsmith	struct iovec iov;
26234206Sdyson	vm_offset_t kva;
26334206Sdyson	struct buf *bp;
26434206Sdyson	int iomode, must_commit, i, error, npages, count;
26534206Sdyson	off_t offset;
26636563Speter	int *rtvals;
26746349Salc	struct vnode *vp;
26834206Sdyson	struct proc *p;
26936563Speter	struct ucred *cred;
27083366Sjulian	struct nfsmount *nmp;
27136563Speter	struct nfsnode *np;
27236563Speter	vm_page_t *pages;
27346349Salc
27436563Speter	GIANT_REQUIRED;
27534206Sdyson
27636563Speter	vp = ap->a_vp;
27746349Salc	np = VTONFS(vp);
27883366Sjulian	p = curproc;				/* XXX */
27991406Sjhb	cred = curproc->p_ucred;		/* XXX */
28036563Speter	nmp = VFSTONFS(vp->v_mount);
28136563Speter	pages = ap->a_m;
28236563Speter	count = ap->a_count;
28334206Sdyson	rtvals = ap->a_rtvals;
28436563Speter	npages = btoc(count);
28546349Salc	offset = IDX_TO_OFF(pages[0]->pindex);
286158739Smohans
287158739Smohans	GIANT_REQUIRED;
28836563Speter
28976827Salfred	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
290158739Smohans	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
29183366Sjulian		(void)nfs_fsinfo(nmp, vp, cred, p);
292158739Smohans	}
293158739Smohans
29434206Sdyson	for (i = 0; i < npages; i++) {
295158739Smohans		rtvals[i] = VM_PAGER_AGAIN;
296157557Smohans	}
297158739Smohans
298158739Smohans	/*
299158739Smohans	 * When putting pages, do not extend file past EOF.
300158739Smohans	 */
301158739Smohans
302138899Sps	if (offset + count > np->n_size) {
30383651Speter		count = np->n_size - offset;
30434206Sdyson		if (count < 0)
30534206Sdyson			count = 0;
30634206Sdyson	}
30746349Salc
30846349Salc	/*
30946349Salc	 * We use only the kva address for the buffer, but this is extremely
31046349Salc	 * convienient and fast.
31146349Salc	 */
31246349Salc	bp = getpbuf(&nfs_pbuf_freecnt);
31346349Salc
314158739Smohans	kva = (vm_offset_t) bp->b_data;
31546349Salc	pmap_qenter(kva, pages, npages);
31646349Salc
31734206Sdyson	iov.iov_base = (caddr_t) kva;
31834206Sdyson	iov.iov_len = count;
31934206Sdyson	uio.uio_iov = &iov;
32042957Sdillon	uio.uio_iovcnt = 1;
32134206Sdyson	uio.uio_offset = offset;
32234206Sdyson	uio.uio_resid = count;
32336563Speter	uio.uio_segflg = UIO_SYSSPACE;
324170292Sattilio	uio.uio_rw = UIO_WRITE;
325170292Sattilio	uio.uio_procp = p;
32634206Sdyson
32734206Sdyson	if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
32836563Speter	    iomode = NFSV3WRITE_UNSTABLE;
32934206Sdyson	else
33034206Sdyson	    iomode = NFSV3WRITE_FILESYNC;
33146349Salc
33236563Speter	error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
33334206Sdyson
33434206Sdyson	pmap_qremove(kva, npages);
33583366Sjulian	relpbuf(bp, &nfs_pbuf_freecnt);
33634206Sdyson
33734206Sdyson	if (!error) {
33834206Sdyson		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
33934206Sdyson		for (i = 0; i < nwritten; i++) {
34034206Sdyson			rtvals[i] = VM_PAGER_OK;
34134206Sdyson			vm_page_undirty(pages[i]);
342122953Salfred		}
34334206Sdyson		if (must_commit) {
34434206Sdyson			nfs_clearcommit(vp->v_mount);
34542957Sdillon		}
34634206Sdyson	}
34734206Sdyson	return rtvals[0];
34836563Speter}
34934206Sdyson
35034206Sdyson/*
35149945Salc * Vnode op for read using bio
35234206Sdyson */
35376827Salfredint
35436563Speternfs_bioread(vp, uio, ioflag, cred)
35576827Salfred	register struct vnode *vp;
35634206Sdyson	register struct uio *uio;
35736563Speter	int ioflag;
35834096Smsmith	struct ucred *cred;
35934096Smsmith{
36034096Smsmith	register struct nfsnode *np = VTONFS(vp);
361158739Smohans	register int biosize, i;
362158739Smohans	struct buf *bp = 0, *rabp;
363158739Smohans	struct vattr vattr;
364158739Smohans	struct proc *p;
365158739Smohans	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
366158739Smohans	daddr_t lbn, rabn;
367158739Smohans	int bcount;
368158739Smohans	int seqcount;
369158739Smohans	int nra, error = 0, n = 0, on = 0;
370158739Smohans
371158739Smohans#ifdef DIAGNOSTIC
372158739Smohans	if (uio->uio_rw != UIO_READ)
373158739Smohans		panic("nfs_read mode");
374158739Smohans#endif
375158739Smohans	if (uio->uio_resid == 0)
376158739Smohans		return (0);
377158739Smohans	if (uio->uio_offset < 0)	/* XXX VDIR cookies can be negative */
378158739Smohans		return (EINVAL);
379158739Smohans	p = uio->uio_procp;
380158739Smohans
381158739Smohans	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
382158739Smohans	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
383158739Smohans		(void)nfs_fsinfo(nmp, vp, cred, p);
384158739Smohans	if (vp->v_type != VDIR &&
385158739Smohans	    (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
386158739Smohans		return (EFBIG);
387158739Smohans	biosize = vp->v_mount->mnt_stat.f_iosize;
388158739Smohans	seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
389158739Smohans	/*
390158739Smohans	 * For nfs, cache consistency can only be maintained approximately.
391176134Sattilio	 * Although RFC1094 does not specify the criteria, the following is
392158739Smohans	 * believed to be compatible with the reference port.
393158739Smohans	 * For nqnfs, full cache consistency is maintained within the loop.
394158739Smohans	 * For nfs:
395158739Smohans	 * If the file's modify time on the server has changed since the
396158739Smohans	 * last read rpc or you have written to the file,
397158739Smohans	 * you may have lost data cache consistency with the
398158739Smohans	 * server, so flush all of the file's data out of the cache.
399158739Smohans	 * Then force a getattr rpc to ensure that you have up to date
400158739Smohans	 * attributes.
401158739Smohans	 * NB: This implies that cache data can be read when up to
402158739Smohans	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
403158739Smohans	 * attributes this could be forced by setting n_attrstamp to 0 before
404190380Srwatson	 * the VOP_GETATTR() call.
405182371Sattilio	 */
406158739Smohans	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
407158739Smohans		if (np->n_flag & NMODIFIED) {
408158739Smohans			if (vp->v_type != VREG) {
409158739Smohans				if (vp->v_type != VDIR)
410158739Smohans					panic("nfs: bioread, not dir");
411158739Smohans				nfs_invaldir(vp);
412158739Smohans				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
413182371Sattilio				if (error)
414158739Smohans					return (error);
415158739Smohans			}
416158739Smohans			np->n_attrstamp = 0;
417158739Smohans			error = VOP_GETATTR(vp, &vattr, cred, p);
418158739Smohans			if (error)
419158739Smohans				return (error);
420158739Smohans			np->n_mtime = vattr.va_mtime.tv_sec;
421158739Smohans		} else {
422158739Smohans			error = VOP_GETATTR(vp, &vattr, cred, p);
423158739Smohans			if (error)
424158739Smohans				return (error);
425158739Smohans			if (np->n_mtime != vattr.va_mtime.tv_sec) {
426158739Smohans				if (vp->v_type == VDIR)
427158739Smohans					nfs_invaldir(vp);
428158739Smohans				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
429158739Smohans				if (error)
430158739Smohans					return (error);
431158739Smohans				np->n_mtime = vattr.va_mtime.tv_sec;
432176134Sattilio			}
433158739Smohans		}
434158739Smohans	}
435158739Smohans	do {
436158739Smohans
4371541Srgrimes	    /*
4381541Srgrimes	     * Get a valid lease. If cached data is stale, flush it.
4391549Srgrimes	     */
44083651Speter	    if (nmp->nm_flag & NFSMNT_NQNFS) {
4411541Srgrimes		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
44283651Speter		    do {
44383651Speter			error = nqnfs_getlease(vp, ND_READ, cred, p);
444143822Sdas		    } while (error == NQNFS_EXPIRED);
44583366Sjulian		    if (error)
4469336Sdfr			return (error);
4475455Sdg		    if (np->n_lrev != np->n_brev ||
44846349Salc			(np->n_flag & NQNFSNONCACHE) ||
44951344Sdillon			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
45046349Salc			if (vp->v_type == VDIR)
4511541Srgrimes			    nfs_invaldir(vp);
4521541Srgrimes			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
4531541Srgrimes			if (error)
4541541Srgrimes			    return (error);
4551541Srgrimes			np->n_brev = np->n_lrev;
4561541Srgrimes		    }
4571541Srgrimes		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
45836473Speter		    nfs_invaldir(vp);
4591541Srgrimes		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
46083366Sjulian		    if (error)
46151344Sdillon			return (error);
462158739Smohans		}
46336176Speter	    }
464158739Smohans	    if (np->n_flag & NQNFSNONCACHE) {
465158739Smohans		switch (vp->v_type) {
46683366Sjulian		case VREG:
467158739Smohans			return (nfs_readrpc(vp, uio, cred));
468158739Smohans		case VLNK:
469158739Smohans			return (nfs_readlinkrpc(vp, uio, cred));
47036473Speter		case VDIR:
47136473Speter			break;
47236473Speter		default:
473138899Sps			printf(" NQNFSNONCACHE: type %x unexpected\n",
474138899Sps				vp->v_type);
475138899Sps		};
476138899Sps	    }
477138899Sps	    switch (vp->v_type) {
4789428Sdfr	    case VREG:
479108357Sdillon		nfsstats.biocache_reads++;
480158739Smohans		lbn = uio->uio_offset / biosize;
481158739Smohans		on = uio->uio_offset & (biosize - 1);
482158739Smohans
483158739Smohans		/*
484158739Smohans		 * Start the read ahead(s), as required.
48583651Speter		 */
486158739Smohans		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
487158739Smohans		    for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
488158739Smohans			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
489158739Smohans			rabn = lbn + 1 + nra;
490158739Smohans			if (!incore(vp, rabn)) {
491158739Smohans			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
4921541Srgrimes			    if (!rabp)
4931541Srgrimes				return (EINTR);
4941541Srgrimes			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
4951541Srgrimes				rabp->b_flags |= B_ASYNC;
4969336Sdfr				rabp->b_iocmd = BIO_READ;
4971541Srgrimes				vfs_busy_pages(rabp, 0);
4981541Srgrimes				if (nfs_asyncio(rabp, cred, p)) {
4991541Srgrimes				    rabp->b_flags |= B_INVAL;
5001541Srgrimes				    rabp->b_ioflags |= BIO_ERROR;
501158739Smohans				    vfs_unbusy_pages(rabp);
50251344Sdillon				    brelse(rabp);
503158739Smohans				    break;
5045455Sdg				}
505136767Sphk			    } else {
50683366Sjulian				brelse(rabp);
507131691Salfred			    }
508131691Salfred			}
509131691Salfred		    }
510131691Salfred		}
5118692Sdg
51258345Sphk		/*
51358345Sphk		 * Obtain the buffer cache block.  Figure out the buffer size
5145455Sdg		 * when we are at EOF.  If we are modifying the size of the
515134898Sphk		 * buffer based on an EOF condition we need to hold
51658934Sphk		 * nfs_rslock() through obtaining the buffer to prevent
51758934Sphk		 * a potential writer-appender from messing with n_size.
5185455Sdg		 * Otherwise we may accidently truncate the buffer and
5191541Srgrimes		 * lose dirty data.
52055431Sdillon		 *
5211541Srgrimes		 * Note that bcount is *not* DEV_BSIZE aligned.
52255431Sdillon		 */
5235471Sdg
52455431Sdillonagain:
5251541Srgrimes		bcount = biosize;
5261541Srgrimes		if ((off_t)lbn * biosize >= np->n_size) {
5271541Srgrimes			bcount = 0;
5281541Srgrimes		} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
529148268Sps			bcount = np->n_size - (off_t)lbn * biosize;
53046349Salc		}
531158739Smohans		if (bcount != biosize) {
53246349Salc			switch(nfs_rslock(np, p)) {
533158739Smohans			case ENOLCK:
534158739Smohans				goto again;
5358692Sdg				/* not reached */
53683366Sjulian			case EINTR:
53754605Sdillon			case ERESTART:
538131691Salfred				return(EINTR);
539131691Salfred				/* not reached */
540131691Salfred			default:
541131691Salfred				break;
54242957Sdillon			}
54325930Sdfr		}
54446349Salc
54546349Salc		bp = nfs_getcacheblk(vp, lbn, bcount, p);
54625930Sdfr
54746349Salc		if (bcount != biosize)
5487871Sdg			nfs_rsunlock(np, p);
54958345Sphk		if (!bp)
55032755Sdyson			return (EINTR);
551134898Sphk
55232755Sdyson		/*
55332755Sdyson		 * If B_CACHE is not set, we must issue the read.  If this
55432755Sdyson		 * fails, we return an error.
55532755Sdyson		 */
5561541Srgrimes
55746349Salc		if ((bp->b_flags & B_CACHE) == 0) {
55846349Salc		    bp->b_iocmd = BIO_READ;
55946349Salc		    vfs_busy_pages(bp, 0);
56046349Salc		    error = nfs_doio(bp, cred, p);
56146349Salc		    if (error) {
56246349Salc			brelse(bp);
56346349Salc			return (error);
56446349Salc		    }
56546349Salc		}
56646349Salc
56746349Salc		/*
56846349Salc		 * on is the offset into the current bp.  Figure out how many
5691541Srgrimes		 * bytes we can copy out of the bp.  Note that bcount is
5701541Srgrimes		 * NOT DEV_BSIZE aligned.
5711541Srgrimes		 *
57283366Sjulian		 * Then figure out how many bytes we can copy into the uio.
573131691Salfred		 */
574131691Salfred
575131691Salfred		n = 0;
576131691Salfred		if (on < bcount)
5777871Sdg			n = min((unsigned)(bcount - on), uio->uio_resid);
57858345Sphk		break;
57932755Sdyson	    case VLNK:
580134898Sphk		nfsstats.biocache_readlinks++;
58132755Sdyson		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
58258934Sphk		if (!bp)
58332755Sdyson			return (EINTR);
58432755Sdyson		if ((bp->b_flags & B_CACHE) == 0) {
58532755Sdyson		    bp->b_iocmd = BIO_READ;
5861541Srgrimes		    vfs_busy_pages(bp, 0);
5871541Srgrimes		    error = nfs_doio(bp, cred, p);
5881541Srgrimes		    if (error) {
5891541Srgrimes			bp->b_ioflags |= BIO_ERROR;
5901541Srgrimes			brelse(bp);
5911541Srgrimes			return (error);
59224577Sdfr		    }
59324577Sdfr		}
59424577Sdfr		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
59524577Sdfr		on = 0;
59636979Sbde		break;
5979336Sdfr	    case VDIR:
59883366Sjulian		nfsstats.biocache_readdirs++;
599131691Salfred		if (np->n_direofoffset
600131691Salfred		    && uio->uio_offset >= np->n_direofoffset) {
601131691Salfred		    return (0);
602131691Salfred		}
6037871Sdg		lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
60458345Sphk		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
6059336Sdfr		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
606134898Sphk		if (!bp)
60732912Stegge		    return (EINTR);
60832912Stegge		if ((bp->b_flags & B_CACHE) == 0) {
60932912Stegge		    bp->b_iocmd = BIO_READ;
61032755Sdyson		    vfs_busy_pages(bp, 0);
611122953Salfred		    error = nfs_doio(bp, cred, p);
612140731Sphk		    if (error) {
61332755Sdyson			    brelse(bp);
61432755Sdyson		    }
61532755Sdyson		    while (error == NFSERR_BAD_COOKIE) {
61632755Sdyson			printf("got bad cookie vp %p bp %p\n", vp, bp);
61732755Sdyson			nfs_invaldir(vp);
61846349Salc			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
61946349Salc			/*
62046349Salc			 * Yuck! The directory has been modified on the
62146349Salc			 * server. The only way to get the block is by
62232755Sdyson			 * reading from the beginning to get all the
62332755Sdyson			 * offset cookies.
62432755Sdyson			 *
62532755Sdyson			 * Leave the last bp intact unless there is an error.
62624577Sdfr			 * Loop back up to the while if the error is another
62783366Sjulian			 * NFSERR_BAD_COOKIE (double yuch!).
628131691Salfred			 */
629131691Salfred			for (i = 0; i <= lbn && !error; i++) {
630131691Salfred			    if (np->n_direofoffset
631131691Salfred				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
63246349Salc				    return (0);
63358345Sphk			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
63446349Salc			    if (!bp)
635134898Sphk				return (EINTR);
63646349Salc			    if ((bp->b_flags & B_CACHE) == 0) {
63746349Salc				    bp->b_iocmd = BIO_READ;
63846349Salc				    vfs_busy_pages(bp, 0);
63946349Salc				    error = nfs_doio(bp, cred, p);
64046349Salc				    /*
64146349Salc				     * no error + B_INVAL == directory EOF,
64246349Salc				     * use the block.
64346349Salc				     */
64446349Salc				    if (error == 0 && (bp->b_flags & B_INVAL))
64546349Salc					    break;
64646349Salc			    }
64746349Salc			    /*
64846349Salc			     * An error will throw away the block and the
64946349Salc			     * for loop will break out.  If no error and this
65032755Sdyson			     * is not the block we want, we throw away the
6511541Srgrimes			     * block and go for the next one via the for loop.
65232912Stegge			     */
65346349Salc			    if (error || i < lbn)
65446349Salc				    brelse(bp);
65546349Salc			}
65646349Salc		    }
65746349Salc		    /*
65832912Stegge		     * The above while is repeated if we hit another cookie
6599336Sdfr		     * error.  If we hit an error and it wasn't a cookie error,
6601541Srgrimes		     * we give up.
6611541Srgrimes		     */
6621541Srgrimes		    if (error)
6631541Srgrimes			    return (error);
6641541Srgrimes		}
6659336Sdfr
6661541Srgrimes		/*
66789324Speter		 * If not eof and read aheads are enabled, start one.
66839782Smckusick		 * (You need the current block first, so that you have the
6699336Sdfr		 *  directory offset cookie of the next block.)
6709336Sdfr		 */
671136767Sphk		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
67283366Sjulian		    (bp->b_flags & B_INVAL) == 0 &&
6731541Srgrimes		    (np->n_direofoffset == 0 ||
6748692Sdg		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
67558345Sphk		    !(np->n_flag & NQNFSNONCACHE) &&
67658345Sphk		    !incore(vp, lbn + 1)) {
6775455Sdg			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
678134898Sphk			if (rabp) {
67958934Sphk			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
68058934Sphk				rabp->b_flags |= B_ASYNC;
6815455Sdg				rabp->b_iocmd = BIO_READ;
6821541Srgrimes				vfs_busy_pages(rabp, 0);
6831541Srgrimes				if (nfs_asyncio(rabp, cred, p)) {
6845471Sdg				    rabp->b_flags |= B_INVAL;
6855471Sdg				    rabp->b_ioflags |= BIO_ERROR;
6861541Srgrimes				    vfs_unbusy_pages(rabp);
6871541Srgrimes				    brelse(rabp);
6881541Srgrimes				}
68926469Sdfr			    } else {
69046349Salc				brelse(rabp);
69146349Salc			    }
69246349Salc			}
69346349Salc		}
69446349Salc		/*
69546349Salc		 * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
69646349Salc		 * chopped for the EOF condition, we cannot tell how large
69746349Salc		 * NFS directories are going to be until we hit EOF.  So
69883651Speter		 * an NFS directory buffer is *not* chopped to its EOF.  Now,
69946349Salc		 * it just so happens that b_resid will effectively chop it
70026469Sdfr		 * to EOF.  *BUT* this information is lost if the buffer goes
70126469Sdfr		 * away and is reconstituted into a B_CACHE state ( due to
70246349Salc		 * being VMIO ) later.  So we keep track of the directory eof
70346349Salc		 * in np->n_direofoffset and chop it off as an extra step
7041541Srgrimes		 * right here.
7053305Sphk		 */
706158739Smohans		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
707143822Sdas		if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
7083305Sphk			n = np->n_direofoffset - uio->uio_offset;
7091541Srgrimes		break;
7101541Srgrimes	    default:
7111541Srgrimes		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
71234206Sdyson		break;
7131541Srgrimes	    };
714143822Sdas
7151541Srgrimes	    if (n > 0) {
716143822Sdas		    error = uiomove(bp->b_data + on, (int)n, uio);
717143822Sdas	    }
7181541Srgrimes	    switch (vp->v_type) {
7191541Srgrimes	    case VREG:
7201541Srgrimes		break;
7211541Srgrimes	    case VLNK:
7221541Srgrimes		n = 0;
723138899Sps		break;
724138899Sps	    case VDIR:
725138899Sps		/*
726138899Sps		 * Invalidate buffer if caching is disabled, forcing a
727138899Sps		 * re-read from the remote later.
728138899Sps		 */
729138899Sps		if (np->n_flag & NQNFSNONCACHE)
730138899Sps			bp->b_flags |= B_INVAL;
731138899Sps		break;
732138899Sps	    default:
733138899Sps		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
734138899Sps	    }
735138899Sps	    brelse(bp);
736138899Sps	} while (error == 0 && uio->uio_resid > 0 && n > 0);
737138899Sps	return (error);
738138899Sps}
739138899Sps
740138899Sps/*
741138899Sps * Vnode op for write using bio
742138899Sps */
743138899Spsint
744158739Smohansnfs_write(ap)
745158739Smohans	struct vop_write_args /* {
746158739Smohans		struct vnode *a_vp;
747158739Smohans		struct uio *a_uio;
748158739Smohans		int  a_ioflag;
749138899Sps		struct ucred *a_cred;
750138899Sps	} */ *ap;
751138899Sps{
752138899Sps	int biosize;
753138899Sps	struct uio *uio = ap->a_uio;
754138899Sps	struct proc *p = uio->uio_procp;
755158739Smohans	struct vnode *vp = ap->a_vp;
756138899Sps	struct nfsnode *np = VTONFS(vp);
757138899Sps	struct ucred *cred = ap->a_cred;
758138899Sps	int ioflag = ap->a_ioflag;
759138899Sps	struct buf *bp;
760138899Sps	struct vattr vattr;
761138899Sps	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
762138899Sps	daddr_t lbn;
763138899Sps	int bcount;
764138899Sps	int n, on, error = 0, iomode, must_commit;
765138899Sps	int haverslock = 0;
766138899Sps
767138899Sps	GIANT_REQUIRED;
768138899Sps
769138899Sps#ifdef DIAGNOSTIC
770138899Sps	if (uio->uio_rw != UIO_WRITE)
771138899Sps		panic("nfs_write mode");
772138899Sps	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
773138899Sps		panic("nfs_write proc");
774138899Sps#endif
775138899Sps	if (vp->v_type != VREG)
776138899Sps		return (EIO);
777138899Sps	if (np->n_flag & NWRITEERR) {
778138899Sps		np->n_flag &= ~NWRITEERR;
779138899Sps		return (np->n_error);
780138899Sps	}
781138899Sps	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
782138899Sps	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
783138899Sps		(void)nfs_fsinfo(nmp, vp, cred, p);
784138899Sps
785138899Sps	/*
786138899Sps	 * Synchronously flush pending buffers if we are in synchronous
787138899Sps	 * mode or if we are appending.
788138899Sps	 */
789138899Sps	if (ioflag & (IO_APPEND | IO_SYNC)) {
790138899Sps		if (np->n_flag & NMODIFIED) {
791138899Sps			np->n_attrstamp = 0;
792138899Sps			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
793138899Sps			if (error)
794138899Sps				return (error);
795138899Sps		}
796138899Sps	}
797138899Sps
798138899Sps	/*
799138899Sps	 * If IO_APPEND then load uio_offset.  We restart here if we cannot
800138899Sps	 * get the append lock.
801138899Sps	 */
802138899Spsrestart:
803138899Sps	if (ioflag & IO_APPEND) {
804158739Smohans		np->n_attrstamp = 0;
805138899Sps		error = VOP_GETATTR(vp, &vattr, cred, p);
806138899Sps		if (error)
807138899Sps			return (error);
808138899Sps		uio->uio_offset = np->n_size;
809138899Sps	}
810138899Sps
811138899Sps	if (uio->uio_offset < 0)
812138899Sps		return (EINVAL);
813138899Sps	if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
814138899Sps		return (EFBIG);
815138899Sps	if (uio->uio_resid == 0)
816138899Sps		return (0);
817138899Sps
818138899Sps	/*
819138899Sps	 * We need to obtain the rslock if we intend to modify np->n_size
820138899Sps	 * in order to guarentee the append point with multiple contending
821138899Sps	 * writers, to guarentee that no other appenders modify n_size
822138899Sps	 * while we are trying to obtain a truncated buffer (i.e. to avoid
823138899Sps	 * accidently truncating data written by another appender due to
824138899Sps	 * the race), and to ensure that the buffer is populated prior to
825138899Sps	 * our extending of the file.  We hold rslock through the entire
826138899Sps	 * operation.
827138899Sps	 *
828138899Sps	 * Note that we do not synchronize the case where someone truncates
829138899Sps	 * the file while we are appending to it because attempting to lock
830138899Sps	 * this case may deadlock other parts of the system unexpectedly.
831138899Sps	 */
832138899Sps	if ((ioflag & IO_APPEND) ||
833138899Sps	    uio->uio_offset + uio->uio_resid > np->n_size) {
834138899Sps		switch(nfs_rslock(np, p)) {
835138899Sps		case ENOLCK:
836138899Sps			goto restart;
837138899Sps			/* not reached */
838138899Sps		case EINTR:
839138899Sps		case ERESTART:
840138899Sps			return(EINTR);
841138899Sps			/* not reached */
842138899Sps		default:
843138899Sps			break;
844138899Sps		}
845138899Sps		haverslock = 1;
846138899Sps	}
847138899Sps
848138899Sps	/*
849138899Sps	 * Maybe this should be above the vnode op call, but so long as
850138899Sps	 * file servers have no limits, i don't think it matters
851138899Sps	 */
852138899Sps	if (p && uio->uio_offset + uio->uio_resid >
853138899Sps	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
854138899Sps		PROC_LOCK(p);
8551541Srgrimes		psignal(p, SIGXFSZ);
8561541Srgrimes		PROC_UNLOCK(p);
8571549Srgrimes		if (haverslock)
85883651Speter			nfs_rsunlock(np, p);
8591541Srgrimes		return (EFBIG);
86046349Salc	}
86146349Salc
86283366Sjulian	biosize = vp->v_mount->mnt_stat.f_iosize;
86346349Salc
8641541Srgrimes	do {
86546349Salc		/*
8661541Srgrimes		 * Check for a valid write lease.
8671541Srgrimes		 */
8681541Srgrimes		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
8699336Sdfr		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
87011921Sphk			do {
87146349Salc				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
87283651Speter			} while (error == NQNFS_EXPIRED);
87383366Sjulian			if (error)
8741541Srgrimes				break;
8751541Srgrimes			if (np->n_lrev != np->n_brev ||
8761541Srgrimes			    (np->n_flag & NQNFSNONCACHE)) {
8771541Srgrimes				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
87883366Sjulian				if (error)
8791541Srgrimes					break;
8801541Srgrimes				np->n_brev = np->n_lrev;
8811541Srgrimes			}
8821541Srgrimes		}
883158739Smohans		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
8841541Srgrimes		    iomode = NFSV3WRITE_FILESYNC;
8851541Srgrimes		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
886158739Smohans		    if (must_commit)
8871541Srgrimes			    nfs_clearcommit(vp->v_mount);
888158739Smohans		    break;
889158739Smohans		}
890158739Smohans		nfsstats.biocache_writes++;
89136176Speter		lbn = uio->uio_offset / biosize;
892158739Smohans		on = uio->uio_offset & (biosize-1);
893158739Smohans		n = min((unsigned)(biosize - on), uio->uio_resid);
89483366Sjulianagain:
895158739Smohans		/*
896158739Smohans		 * Handle direct append and file extension cases, calculate
89754605Sdillon		 * unaligned buffer size.
89854605Sdillon		 */
89954605Sdillon
90054605Sdillon		if (uio->uio_offset == np->n_size && n) {
90154605Sdillon			/*
9021541Srgrimes			 * Get the buffer (in its pre-append state to maintain
903158739Smohans			 * B_CACHE if it was previously set).  Resize the
9041541Srgrimes			 * nfsnode after we have locked the buffer to prevent
905158739Smohans			 * readers from reading garbage.
906147420Sgreen			 */
907147280Sgreen			bcount = on;
908147280Sgreen			bp = nfs_getcacheblk(vp, lbn, bcount, p);
909147280Sgreen
910147280Sgreen			if (bp != NULL) {
911147280Sgreen				long save;
912147280Sgreen
913147280Sgreen				np->n_size = uio->uio_offset + n;
914147420Sgreen				np->n_flag |= NMODIFIED;
915147280Sgreen				vnode_pager_setsize(vp, np->n_size);
9161541Srgrimes
917190380Srwatson				save = bp->b_flags & B_CACHE;
918140731Sphk				bcount += n;
9193305Sphk				allocbuf(bp, bcount);
9201541Srgrimes				bp->b_flags |= save;
921158739Smohans				bp->b_magic = B_MAGIC_NFS;
922158739Smohans				bp->b_op = &buf_ops_nfs;
9231541Srgrimes			}
92454605Sdillon		} else {
92554605Sdillon			/*
92654605Sdillon			 * Obtain the locked cache block first, and then
92754605Sdillon			 * adjust the file's size as appropriate.
92854605Sdillon			 */
92954605Sdillon			bcount = on + n;
93054605Sdillon			if ((off_t)lbn * biosize + bcount < np->n_size) {
931190380Srwatson				if ((off_t)(lbn + 1) * biosize < np->n_size)
932182371Sattilio					bcount = biosize;
93354605Sdillon				else
93454605Sdillon					bcount = np->n_size - (off_t)lbn * biosize;
935158739Smohans			}
93654605Sdillon
937158739Smohans			bp = nfs_getcacheblk(vp, lbn, bcount, p);
93854605Sdillon
93954605Sdillon			if (uio->uio_offset + n > np->n_size) {
9401541Srgrimes				np->n_size = uio->uio_offset + n;
9411541Srgrimes				np->n_flag |= NMODIFIED;
94236473Speter				vnode_pager_setsize(vp, np->n_size);
94336473Speter			}
9441541Srgrimes		}
9451541Srgrimes
94654605Sdillon		if (!bp) {
947138899Sps			error = EINTR;
948138899Sps			break;
949138899Sps		}
9501541Srgrimes
9511541Srgrimes		/*
9521541Srgrimes		 * Issue a READ if B_CACHE is not set.  In special-append
9531541Srgrimes		 * mode, B_CACHE is based on the buffer prior to the write
954125454Sjhb		 * op and is typically set, avoiding the read.  If a read
95573929Sjhb		 * is required in special append mode, the server will
956125454Sjhb		 * probably send us a short-read since we extended the file
957125454Sjhb		 * on our end, resulting in b_resid == 0 and, thusly,
958125454Sjhb		 * B_CACHE getting set.
959125454Sjhb		 *
960125454Sjhb		 * We can also avoid issuing the read if the write covers
961125454Sjhb		 * the entire buffer.  We have to make sure the buffer state
96273929Sjhb		 * is reasonable in this case since we will not be initiating
9631541Srgrimes		 * I/O.  See the comments in kern/vfs_bio.c's getblk() for
96446349Salc		 * more information.
9659428Sdfr		 *
966147280Sgreen		 * B_CACHE may also be set due to the buffer being cached
967147280Sgreen		 * normally.
968147280Sgreen		 */
969147280Sgreen
970147280Sgreen		if (on == 0 && n == bcount) {
971147280Sgreen			bp->b_flags |= B_CACHE;
972147280Sgreen			bp->b_flags &= ~B_INVAL;
973147280Sgreen			bp->b_ioflags &= ~BIO_ERROR;
974147280Sgreen		}
975158739Smohans
976158739Smohans		if ((bp->b_flags & B_CACHE) == 0) {
977158739Smohans			bp->b_iocmd = BIO_READ;
978158739Smohans			vfs_busy_pages(bp, 0);
979158739Smohans			error = nfs_doio(bp, cred, p);
980147280Sgreen			if (error) {
981147280Sgreen				brelse(bp);
982147280Sgreen				break;
983147280Sgreen			}
984147280Sgreen		}
985147280Sgreen		if (!bp) {
986147280Sgreen			error = EINTR;
987147280Sgreen			break;
988147280Sgreen		}
989147280Sgreen		if (bp->b_wcred == NOCRED) {
990147280Sgreen			crhold(cred);
991158739Smohans			bp->b_wcred = cred;
992147280Sgreen		}
993158739Smohans		np->n_flag |= NMODIFIED;
994147280Sgreen
995147280Sgreen		/*
996147280Sgreen		 * If dirtyend exceeds file size, chop it down.  This should
997147280Sgreen		 * not normally occur but there is an append race where it
998147280Sgreen		 * might occur XXX, so we log it.
999147280Sgreen		 *
1000147280Sgreen		 * If the chopping creates a reverse-indexed or degenerate
1001147280Sgreen		 * situation with dirtyoff/end, we 0 both of them.
1002147280Sgreen		 */
1003147280Sgreen
1004147280Sgreen		if (bp->b_dirtyend > bcount) {
1005147280Sgreen			printf("NFS append race @%lx:%d\n",
1006147280Sgreen			    (long)bp->b_blkno * DEV_BSIZE,
1007147280Sgreen			    bp->b_dirtyend - bcount);
1008147280Sgreen			bp->b_dirtyend = bcount;
1009147280Sgreen		}
1010147280Sgreen
1011147280Sgreen		if (bp->b_dirtyoff >= bp->b_dirtyend)
1012147280Sgreen			bp->b_dirtyoff = bp->b_dirtyend = 0;
1013147280Sgreen
1014147280Sgreen		/*
1015147280Sgreen		 * If the new write will leave a contiguous dirty
1016147280Sgreen		 * area, just update the b_dirtyoff and b_dirtyend,
1017147280Sgreen		 * otherwise force a write rpc of the old dirty area.
1018147280Sgreen		 *
1019147280Sgreen		 * While it is possible to merge discontiguous writes due to
1020148268Sps		 * our having a B_CACHE buffer ( and thus valid read data
1021147280Sgreen		 * for the hole), we don't because it could lead to
1022147280Sgreen		 * significant cache coherency problems with multiple clients,
102346349Salc		 * especially if locking is implemented later on.
10241541Srgrimes		 *
10251541Srgrimes		 * as an optimization we could theoretically maintain
10261541Srgrimes		 * a linked list of discontinuous areas, but we would still
10271541Srgrimes		 * have to commit them separately so there isn't much
10281541Srgrimes		 * advantage to it except perhaps a bit of asynchronization.
10291541Srgrimes		 */
103046349Salc
103146349Salc		if (bp->b_dirtyend > 0 &&
103246349Salc		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
103346349Salc			if (BUF_WRITE(bp) == EINTR)
1034158739Smohans				return (EINTR);
103546349Salc			goto again;
1036158739Smohans		}
103746349Salc
103854605Sdillon		/*
103954605Sdillon		 * Check for valid write lease and get one as required.
104054605Sdillon		 * In case getblk() and/or bwrite() delayed us.
104154605Sdillon		 */
104246349Salc		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
104346349Salc		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
104483366Sjulian			do {
104546349Salc				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
104654605Sdillon			} while (error == NQNFS_EXPIRED);
104754605Sdillon			if (error) {
104846349Salc				brelse(bp);
1049158739Smohans				break;
105054605Sdillon			}
105154605Sdillon			if (np->n_lrev != np->n_brev ||
105254605Sdillon			    (np->n_flag & NQNFSNONCACHE)) {
1053158739Smohans				brelse(bp);
105454605Sdillon				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
105554605Sdillon				if (error)
105654605Sdillon					break;
105754605Sdillon				np->n_brev = np->n_lrev;
105854605Sdillon				goto again;
105954605Sdillon			}
106046349Salc		}
106154605Sdillon
106283651Speter		error = uiomove((char *)bp->b_data + on, n, uio);
106354605Sdillon
106454605Sdillon		/*
106554605Sdillon		 * Since this block is being modified, it must be written
106654605Sdillon		 * again and not just committed.  Since write clustering does
106754605Sdillon		 * not work for the stage 1 data write, only the stage 2
106854605Sdillon		 * commit rpc, we have to clear B_CLUSTEROK as well.
106954605Sdillon		 */
107054605Sdillon		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
107154605Sdillon
1072158739Smohans		if (error) {
107383366Sjulian			bp->b_ioflags |= BIO_ERROR;
1074158739Smohans			brelse(bp);
107546349Salc			break;
107646349Salc		}
107746349Salc
107846349Salc		/*
107946349Salc		 * Only update dirtyoff/dirtyend if not a degenerate
1080158739Smohans		 * condition.
10818692Sdg		 */
108246349Salc		if (n) {
108354605Sdillon			if (bp->b_dirtyend > 0) {
1084131691Salfred				bp->b_dirtyoff = min(on, bp->b_dirtyoff);
1085131691Salfred				bp->b_dirtyend = max((on + n), bp->b_dirtyend);
1086131691Salfred			} else {
108754605Sdillon				bp->b_dirtyoff = on;
108854605Sdillon				bp->b_dirtyend = on + n;
108954605Sdillon			}
109046349Salc			vfs_bio_set_validclean(bp, on, n);
109146349Salc		}
109246349Salc
109346349Salc		/*
109446349Salc		 * If the lease is non-cachable or IO_SYNC do bwrite().
109546349Salc		 *
109683651Speter		 * IO_INVAL appears to be unused.  The idea appears to be
109746349Salc		 * to turn off caching in this case.  Very odd.  XXX
109846349Salc		 */
109946349Salc		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
110046349Salc			if (ioflag & IO_INVAL)
110146349Salc				bp->b_flags |= B_NOCACHE;
110246349Salc			error = BUF_WRITE(bp);
110346349Salc			if (error)
110446349Salc				break;
110546349Salc			if (np->n_flag & NQNFSNONCACHE) {
110646349Salc				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
110746349Salc				if (error)
110846349Salc					break;
110946349Salc			}
111046349Salc		} else if ((n + on) == biosize &&
111158934Sphk			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
111258934Sphk			bp->b_flags |= B_ASYNC;
11138692Sdg			(void)nfs_writebp(bp, 0, 0);
111446349Salc		} else {
111546349Salc			bdwrite(bp);
111658345Sphk		}
111746349Salc	} while (uio->uio_resid > 0 && n > 0);
1118134898Sphk
111946349Salc	if (haverslock)
112046349Salc		nfs_rsunlock(np, p);
112154605Sdillon
112246349Salc	return (error);
112346349Salc}
112484827Sjhb
112584827Sjhb/*
1126158739Smohans * Get an nfs cache block.
11271541Srgrimes *
1128158739Smohans * Allocate a new one if the block isn't currently in the cache
11298692Sdg * and return the block marked busy. If the calling process is
113045347Sjulian * interrupted by a signal for an interruptible mount point, return
113154605Sdillon * NULL.
113254605Sdillon *
113383651Speter * The caller must carefully deal with the possible B_INVAL state of
113454605Sdillon * the buffer.  nfs_doio() clears B_INVAL (and nfs_asyncio() clears it
113554605Sdillon * indirectly), so synchronous reads can be issued without worrying about
113654605Sdillon * the B_INVAL state.  We have to be a little more careful when dealing
113745347Sjulian * with writes (see comments in nfs_write()) when extending a file past
113845347Sjulian * its EOF.
113954605Sdillon */
1140158739Smohansstatic struct buf *
114183651Speternfs_getcacheblk(vp, bn, size, p)
114254605Sdillon	struct vnode *vp;
114354605Sdillon	daddr_t bn;
114454605Sdillon	int size;
114554605Sdillon	struct proc *p;
114645347Sjulian{
114745347Sjulian	register struct buf *bp;
114831617Sdyson	struct mount *mp;
11491541Srgrimes	struct nfsmount *nmp;
115031617Sdyson
115131617Sdyson	mp = vp->v_mount;
115231617Sdyson	nmp = VFSTONFS(mp);
115346349Salc
115483651Speter	if (nmp->nm_flag & NFSMNT_INT) {
115546349Salc		bp = getblk(vp, bn, size, PCATCH, 0);
115683651Speter		while (bp == (struct buf *)0) {
115746349Salc			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
115846349Salc				return ((struct buf *)0);
115946349Salc			bp = getblk(vp, bn, size, 0, 2 * hz);
116046349Salc		}
116146349Salc	} else {
116246349Salc		bp = getblk(vp, bn, size, 0, 0);
116346349Salc	}
116431617Sdyson
116542957Sdillon	if (vp->v_type == VREG) {
116631617Sdyson		int biosize;
116731617Sdyson
1168126853Sphk		biosize = mp->mnt_stat.f_iosize;
1169100194Sdillon		bp->b_blkno = bn * (biosize / DEV_BSIZE);
1170100194Sdillon	}
1171100194Sdillon	return (bp);
117231617Sdyson}
117331617Sdyson
117431617Sdyson/*
11753305Sphk * Flush and invalidate all dirty buffers. If another process is already
117654480Sdillon * doing the flush, just wait for completion.
117754480Sdillon */
117854480Sdillonint
117954480Sdillonnfs_vinvalbuf(vp, flags, cred, p, intrflg)
118054480Sdillon	struct vnode *vp;
118154480Sdillon	int flags;
118254480Sdillon	struct ucred *cred;
118354480Sdillon	struct proc *p;
118454480Sdillon	int intrflg;
11853305Sphk{
118658934Sphk	register struct nfsnode *np = VTONFS(vp);
11871541Srgrimes	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
118854605Sdillon	int error = 0, slpflag, slptimeo;
11891541Srgrimes
119034206Sdyson	if (vp->v_flag & VXLOCK) {
119134206Sdyson		return (0);
119283651Speter	}
119345347Sjulian
119445347Sjulian	if ((nmp->nm_flag & NFSMNT_INT) == 0)
119545347Sjulian		intrflg = 0;
119645347Sjulian	if (intrflg) {
119745347Sjulian		slpflag = PCATCH;
119845347Sjulian		slptimeo = 2 * hz;
119945347Sjulian	} else {
120045347Sjulian		slpflag = 0;
120145347Sjulian		slptimeo = 0;
120245347Sjulian	}
120346349Salc	/*
12041541Srgrimes	 * First wait for any other process doing a flush to complete.
120545347Sjulian	 */
120644679Sjulian	while (np->n_flag & NFLUSHINPROG) {
120783651Speter		np->n_flag |= NFLUSHWANT;
120846349Salc		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
120946349Salc			slptimeo);
121046349Salc		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
12111541Srgrimes			return (EINTR);
121283651Speter	}
121334206Sdyson
121446349Salc	/*
1215126853Sphk	 * Now, flush as required.
12163305Sphk	 */
121754605Sdillon	np->n_flag |= NFLUSHINPROG;
121883651Speter	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
12199336Sdfr	while (error) {
1220158739Smohans		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
122146349Salc			np->n_flag &= ~NFLUSHINPROG;
12221541Srgrimes			if (np->n_flag & NFLUSHWANT) {
122346349Salc				np->n_flag &= ~NFLUSHWANT;
12241541Srgrimes				wakeup((caddr_t)&np->n_flag);
122554605Sdillon			}
122654605Sdillon			return (EINTR);
12271541Srgrimes		}
12281541Srgrimes		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
12291541Srgrimes	}
12301541Srgrimes	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
123154480Sdillon	if (np->n_flag & NFLUSHWANT) {
12321541Srgrimes		np->n_flag &= ~NFLUSHWANT;
12331541Srgrimes		wakeup((caddr_t)&np->n_flag);
12341541Srgrimes	}
12351541Srgrimes	return (0);
123654480Sdillon}
123754480Sdillon
123854480Sdillon/*
123954480Sdillon * Initiate asynchronous I/O. Return an error if no nfsiods are available.
124054480Sdillon * This is mainly to avoid queueing async I/O requests when the nfsiods
124154480Sdillon * are all hung on a dead server.
124254480Sdillon *
12431541Srgrimes * Note: nfs_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp
124412911Sphk * is eventually dequeued by the async daemon, nfs_doio() *will*.
124583651Speter */
12461541Srgrimesint
124783651Speternfs_asyncio(bp, cred, procp)
124832755Sdyson	register struct buf *bp;
124932755Sdyson	struct ucred *cred;
12501541Srgrimes	struct proc *procp;
125132755Sdyson{
125232755Sdyson	struct nfsmount *nmp;
125332755Sdyson	int i;
12541541Srgrimes	int gotiod;
1255138496Sps	int slpflag = 0;
1256138496Sps	int slptimeo = 0;
1257138496Sps	int error;
1258111856Sjeff
1259138496Sps	/*
126099797Sdillon	 * If no async daemons then return EIO to force caller to run the rpc
126199797Sdillon	 * synchronously.
126299797Sdillon	 */
1263111856Sjeff	if (nfs_numasync == 0)
12641541Srgrimes		return (EIO);
126546349Salc
1266111856Sjeff	nmp = VFSTONFS(bp->b_vp->v_mount);
126746349Salc
12685455Sdg	/*
126941791Sdt	 * Commits are usually short and sweet so lets save some cpu and
127032755Sdyson	 * leave the async daemons for more important rpc's (such as reads
127146349Salc	 * and writes).
127232755Sdyson	 */
127341791Sdt	if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
127432755Sdyson	    (nmp->nm_bufqiods > nfs_numasync / 2)) {
12751541Srgrimes		return(EIO);
12761541Srgrimes	}
12771541Srgrimes
12781541Srgrimesagain:
12791541Srgrimes	if (nmp->nm_flag & NFSMNT_INT)
12801541Srgrimes		slpflag = PCATCH;
12811541Srgrimes	gotiod = FALSE;
12821549Srgrimes
1283140731Sphk	/*
12841541Srgrimes	 * Find a free iod to process this request.
128583651Speter	 */
12861541Srgrimes	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
12871541Srgrimes		if (nfs_iodwant[i]) {
1288138469Sps			/*
12891541Srgrimes			 * Found one, so wake it up and tell it which
1290115041Srwatson			 * mount to process.
1291115041Srwatson			 */
1292120730Sjeff			NFS_DPF(ASYNCIO,
1293120730Sjeff				("nfs_asyncio: waking iod %d for mount %p\n",
1294120730Sjeff				 i, nmp));
1295120730Sjeff			nfs_iodwant[i] = (struct proc *)0;
1296120730Sjeff			nfs_iodmount[i] = nmp;
1297143510Sjeff			nmp->nm_bufqiods++;
129832755Sdyson			wakeup((caddr_t)&nfs_iodwant[i]);
129932755Sdyson			gotiod = TRUE;
13001541Srgrimes			break;
13011541Srgrimes		}
13021541Srgrimes
13031541Srgrimes	/*
13041541Srgrimes	 * If none are free, we may already have an iod working on this mount
13051541Srgrimes	 * point.  If so, it will process our request.
13061541Srgrimes	 */
13071541Srgrimes	if (!gotiod) {
13081541Srgrimes		if (nmp->nm_bufqiods > 0) {
13091541Srgrimes			NFS_DPF(ASYNCIO,
1310176134Sattilio				("nfs_asyncio: %d iods are already processing mount %p\n",
13111541Srgrimes				 nmp->nm_bufqiods, nmp));
13121541Srgrimes			gotiod = TRUE;
13131541Srgrimes		}
1314158906Sups	}
1315158915Sups
1316158906Sups	/*
1317158915Sups	 * If we have an iod which can process the request, then queue
1318158906Sups	 * the buffer.
1319158906Sups	 */
1320158906Sups	if (gotiod) {
1321158906Sups		/*
1322158906Sups		 * Ensure that the queue never grows too large.  We still want
1323158906Sups		 * to asynchronize so we block rather then return EIO.
1324158906Sups		 */
1325158906Sups		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
1326158906Sups			NFS_DPF(ASYNCIO,
1327183754Sattilio				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
13281541Srgrimes			nmp->nm_bufqwant = TRUE;
1329138469Sps			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
1330138469Sps				       "nfsaio", slptimeo);
1331183754Sattilio			if (error) {
13321541Srgrimes				if (nfs_sigintr(nmp, NULL, procp))
1333158739Smohans					return (EINTR);
1334157557Smohans				if (slpflag == PCATCH) {
1335157557Smohans					slpflag = 0;
1336158739Smohans					slptimeo = 2 * hz;
1337138469Sps				}
1338176134Sattilio			}
1339138469Sps			/*
13401541Srgrimes			 * We might have lost our iod while sleeping,
13411541Srgrimes			 * so check and loop if nescessary.
13421541Srgrimes			 */
13431541Srgrimes			if (nmp->nm_bufqiods == 0) {
13441541Srgrimes				NFS_DPF(ASYNCIO,
13451541Srgrimes					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
134646349Salc				goto again;
134758934Sphk			}
134846349Salc		}
13491541Srgrimes
13501549Srgrimes		if (bp->b_iocmd == BIO_READ) {
1351134898Sphk			if (bp->b_rcred == NOCRED && cred != NOCRED) {
13521541Srgrimes				crhold(cred);
135389324Speter				bp->b_rcred = cred;
135419449Sdfr			}
135519449Sdfr		} else {
135619449Sdfr			bp->b_flags |= B_WRITEINPROG;
1357131691Salfred			if (bp->b_wcred == NOCRED && cred != NOCRED) {
13581541Srgrimes				crhold(cred);
135955431Sdillon				bp->b_wcred = cred;
136083651Speter			}
136155431Sdillon		}
136255431Sdillon
136355431Sdillon		BUF_KERNPROC(bp);
1364158739Smohans		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
136558345Sphk		nmp->nm_bufqlen++;
136655431Sdillon		return (0);
1367158739Smohans	}
136855431Sdillon
136955431Sdillon	/*
137019449Sdfr	 * All the iods are busy on other mounts, so return EIO to
137119449Sdfr	 * force the caller to process the i/o synchronously.
137219449Sdfr	 */
137319449Sdfr	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
137419449Sdfr	return (EIO);
137519449Sdfr}
137619449Sdfr
137719449Sdfr/*
137889407Speter * Do an I/O operation to/from a cache block. This may be called
137989324Speter * synchronously or from an nfsiod.
138019449Sdfr */
138125023Sdfrint
138219449Sdfrnfs_doio(bp, cr, p)
138319449Sdfr	struct buf *bp;
138419449Sdfr	struct ucred *cr;
138589324Speter	struct proc *p;
138689324Speter{
138789324Speter	struct uio *uiop;
138889324Speter	struct vnode *vp;
138989324Speter	struct nfsnode *np;
139089324Speter	struct nfsmount *nmp;
139189324Speter	int error = 0, iomode, must_commit = 0;
139289324Speter	struct uio uio;
139389407Speter	struct iovec io;
139489407Speter
139589407Speter	vp = bp->b_vp;
139689407Speter	np = VTONFS(vp);
139789407Speter	nmp = VFSTONFS(vp->v_mount);
139889407Speter	uiop = &uio;
139989407Speter	uiop->uio_iov = &io;
140099797Sdillon	uiop->uio_iovcnt = 1;
140189407Speter	uiop->uio_segflg = UIO_SYSSPACE;
140289407Speter	uiop->uio_procp = p;
1403111748Sdes
140489407Speter	/*
140589407Speter	 * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
140689324Speter	 * do this here so we do not have to do it in all the code that
140719449Sdfr	 * calls us.
140819449Sdfr	 */
140919449Sdfr	bp->b_flags &= ~B_INVAL;
141019449Sdfr	bp->b_ioflags &= ~BIO_ERROR;
141119449Sdfr
141219449Sdfr	KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
141319449Sdfr
141419449Sdfr	/*
141519449Sdfr	 * Historically, paging was done with physio, but no more.
141619449Sdfr	 */
141719449Sdfr	if (bp->b_flags & B_PHYS) {
141819449Sdfr	    /*
141919449Sdfr	     * ...though reading /dev/drum still gets us here.
142019449Sdfr	     */
142119449Sdfr	    io.iov_len = uiop->uio_resid = bp->b_bcount;
142219449Sdfr	    /* mapping was done by vmapbuf() */
142319449Sdfr	    io.iov_base = bp->b_data;
142419449Sdfr	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
142555431Sdillon	    if (bp->b_iocmd == BIO_READ) {
142655431Sdillon		uiop->uio_rw = UIO_READ;
142719449Sdfr		nfsstats.read_physios++;
142819449Sdfr		error = nfs_readrpc(vp, uiop, cr);
142919449Sdfr	    } else {
143019449Sdfr		int com;
143119449Sdfr
1432158739Smohans		iomode = NFSV3WRITE_DATASYNC;
1433158739Smohans		uiop->uio_rw = UIO_WRITE;
1434138496Sps		nfsstats.write_physios++;
143519449Sdfr		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
1436131691Salfred	    }
1437158739Smohans	    if (error) {
1438158739Smohans		bp->b_ioflags |= BIO_ERROR;
1439131691Salfred		bp->b_error = error;
1440158739Smohans	    }
144119449Sdfr	} else if (bp->b_iocmd == BIO_READ) {
144219449Sdfr	    io.iov_len = uiop->uio_resid = bp->b_bcount;
144319449Sdfr	    io.iov_base = bp->b_data;
144419449Sdfr	    uiop->uio_rw = UIO_READ;
144519449Sdfr	    switch (vp->v_type) {
144619449Sdfr	    case VREG:
144719449Sdfr		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
144819449Sdfr		nfsstats.read_bios++;
144919449Sdfr		error = nfs_readrpc(vp, uiop, cr);
145019449Sdfr		if (!error) {
145119449Sdfr		    if (uiop->uio_resid) {
145219449Sdfr			/*
145319449Sdfr			 * If we had a short read with no error, we must have
145419449Sdfr			 * hit a file hole.  We should zero-fill the remainder.
145519449Sdfr			 * This can also occur if the server hits the file EOF.
145619449Sdfr			 *
1457172324Smohans			 * Holes used to be able to occur due to pending
1458172324Smohans			 * writes, but that is not possible any longer.
1459172324Smohans			 */
1460172324Smohans			int nread = bp->b_bcount - uiop->uio_resid;
1461172324Smohans			int left  = bp->b_bcount - nread;
1462172324Smohans
1463172324Smohans			if (left > 0)
146458345Sphk				bzero((char *)bp->b_data + nread, left);
146584827Sjhb			uiop->uio_resid = 0;
146684827Sjhb		    }
14671541Srgrimes		}
146884827Sjhb		if (p && (vp->v_flag & VTEXT) &&
146984827Sjhb			(((nmp->nm_flag & NFSMNT_NQNFS) &&
14701541Srgrimes			  NQNFS_CKINVALID(vp, np, ND_READ) &&
14718876Srgrimes			  np->n_lrev != np->n_brev) ||
1472137846Sjeff			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
1473137846Sjeff			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
147448225Smckusick			uprintf("Process killed due to text file modification\n");
147519449Sdfr			PROC_LOCK(p);
147619449Sdfr			psignal(p, SIGKILL);
1477158739Smohans			_PHOLD(p);
1478158739Smohans			PROC_UNLOCK(p);
1479169043Sjhb		}
1480157557Smohans		break;
1481158739Smohans	    case VLNK:
1482158739Smohans		uiop->uio_offset = (off_t)0;
1483158739Smohans		nfsstats.readlink_bios++;
14841541Srgrimes		error = nfs_readlinkrpc(vp, uiop, cr);
148519449Sdfr		break;
14869336Sdfr	    case VDIR:
1487158739Smohans		nfsstats.readdir_bios++;
1488158739Smohans		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
14899336Sdfr		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
149019449Sdfr			error = nfs_readdirplusrpc(vp, uiop, cr);
149119449Sdfr			if (error == NFSERR_NOTSUPP)
14929336Sdfr				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
149319449Sdfr		}
149419449Sdfr		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
14951541Srgrimes			error = nfs_readdirrpc(vp, uiop, cr);
14961541Srgrimes		/*
1497138899Sps		 * end-of-directory sets B_INVAL but does not generate an
1498138899Sps		 * error.
1499138899Sps		 */
1500138899Sps		if (error == 0 && uiop->uio_resid == bp->b_bcount)
1501138899Sps			bp->b_flags |= B_INVAL;
1502138899Sps		break;
1503138899Sps	    default:
1504138899Sps		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
1505138899Sps		break;
1506138899Sps	    };
1507138899Sps	    if (error) {
1508138899Sps		bp->b_ioflags |= BIO_ERROR;
1509138899Sps		bp->b_error = error;
1510138899Sps	    }
1511138899Sps	} else {
1512157557Smohans	    /*
1513157557Smohans	     * If we only need to commit, try to commit
1514158739Smohans	     */
1515157557Smohans	    if (bp->b_flags & B_NEEDCOMMIT) {
1516169043Sjhb		    int retv;
1517169043Sjhb		    off_t off;
1518169043Sjhb
1519169043Sjhb		    off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
1520169043Sjhb		    bp->b_flags |= B_WRITEINPROG;
1521169043Sjhb		    retv = nfs_commit(
1522157557Smohans				bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
1523158739Smohans				bp->b_wcred, p);
1524157557Smohans		    bp->b_flags &= ~B_WRITEINPROG;
1525138899Sps		    if (retv == 0) {
1526138899Sps			    bp->b_dirtyoff = bp->b_dirtyend = 0;
1527138899Sps			    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
1528138899Sps			    bp->b_resid = 0;
15291541Srgrimes			    bufdone(bp);
15301541Srgrimes			    return (0);
15311541Srgrimes		    }
15321541Srgrimes		    if (retv == NFSERR_STALEWRITEVERF) {
15331541Srgrimes			    nfs_clearcommit(bp->b_vp->v_mount);
1534134898Sphk		    }
15351541Srgrimes	    }
153644679Sjulian
15371541Srgrimes	    /*
15381541Srgrimes	     * Setup for actual write
153946349Salc	     */
15401541Srgrimes
15411541Srgrimes	    if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
154283651Speter		bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
1543158739Smohans
1544158739Smohans	    if (bp->b_dirtyend > bp->b_dirtyoff) {
15451541Srgrimes		io.iov_len = uiop->uio_resid = bp->b_dirtyend
15461541Srgrimes		    - bp->b_dirtyoff;
15471541Srgrimes		uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
15481541Srgrimes		    + bp->b_dirtyoff;
15491541Srgrimes		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
15501541Srgrimes		uiop->uio_rw = UIO_WRITE;
155183366Sjulian		nfsstats.write_bios++;
15521541Srgrimes
155346349Salc		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
155458934Sphk		    iomode = NFSV3WRITE_UNSTABLE;
155546349Salc		else
155646349Salc		    iomode = NFSV3WRITE_FILESYNC;
155746349Salc
155858934Sphk		bp->b_flags |= B_WRITEINPROG;
155958934Sphk		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
156046349Salc
156144679Sjulian		/*
1562158739Smohans		 * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
1563158739Smohans		 * to cluster the buffers needing commit.  This will allow
15643664Sphk		 * the system to submit a single commit rpc for the whole
15653664Sphk		 * cluster.  We can do this even if the buffer is not 100%
15661541Srgrimes		 * dirty (relative to the NFS blocksize), so we optimize the
156787834Sdillon		 * append-to-file-case.
15681541Srgrimes		 *
15691541Srgrimes		 * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
15709336Sdfr		 * cleared because write clustering only works for commit
15711541Srgrimes		 * rpc's, not for the data portion of the write).
1572122953Salfred		 */
157387834Sdillon
15741541Srgrimes		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
15751541Srgrimes		    bp->b_flags |= B_NEEDCOMMIT;
15761541Srgrimes		    if (bp->b_dirtyoff == 0
157746349Salc			&& bp->b_dirtyend == bp->b_bcount)
157846349Salc			bp->b_flags |= B_CLUSTEROK;
157946349Salc		} else {
158046349Salc		    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
158183651Speter		}
158246349Salc		bp->b_flags &= ~B_WRITEINPROG;
15831541Srgrimes
158446349Salc		/*
158587834Sdillon		 * For an interrupted write, the buffer is still valid
158646349Salc		 * and the write hasn't been pushed to the server yet,
158746349Salc		 * so we can't set BIO_ERROR and report the interruption
158846349Salc		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
158946349Salc		 * is not relevant, so the rpc attempt is essentially
159046349Salc		 * a noop.  For the case of a V3 write rpc not being
15911541Srgrimes		 * committed to stable storage, the block is still
1592115041Srwatson		 * dirty and requires either a commit rpc or another
1593158739Smohans		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
1594158739Smohans		 * the block is reused. This is indicated by setting
1595158739Smohans		 * the B_DELWRI and B_NEEDCOMMIT flags.
1596158739Smohans		 *
1597158739Smohans		 * If the buffer is marked B_PAGING, it does not reside on
1598158739Smohans		 * the vp's paging queues so we cannot call bdirty().  The
1599158739Smohans		 * bp in this case is not an NFS cache block so we should
1600158739Smohans		 * be safe. XXX
1601158739Smohans		 */
16021541Srgrimes    		if (error == EINTR
16031541Srgrimes		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
16041541Srgrimes			int s;
16059336Sdfr
16061541Srgrimes			s = splbio();
1607122953Salfred			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
16081541Srgrimes			if ((bp->b_flags & B_PAGING) == 0) {
16091541Srgrimes			    bdirty(bp);
16101541Srgrimes			    bp->b_flags &= ~B_DONE;
16119336Sdfr			}
1612192578Srwatson			if (error && (bp->b_flags & B_ASYNC) == 0)
1613192578Srwatson			    bp->b_flags |= B_EINTR;
1614192578Srwatson			splx(s);
1615192578Srwatson	    	} else {
16169336Sdfr		    if (error) {
1617192578Srwatson			bp->b_ioflags |= BIO_ERROR;
1618192578Srwatson			bp->b_error = np->n_error = error;
161946349Salc			np->n_flag |= NWRITEERR;
162046349Salc		    }
162146349Salc		    bp->b_dirtyoff = bp->b_dirtyend = 0;
162246349Salc		}
162339782Smckusick	    } else {
162439782Smckusick		bp->b_resid = 0;
16251541Srgrimes		bufdone(bp);
16263305Sphk		return (0);
1627158739Smohans	    }
16283305Sphk	}
16291541Srgrimes	bp->b_resid = uiop->uio_resid;
16301541Srgrimes	if (must_commit)
163158934Sphk	    nfs_clearcommit(vp->v_mount);
16321541Srgrimes	bufdone(bp);
16331541Srgrimes	return (error);
16341541Srgrimes}
163583651Speter