nfs_bio.c revision 79224
1139823Simp/* 21541Srgrimes * Copyright (c) 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * 51541Srgrimes * This code is derived from software contributed to Berkeley by 61541Srgrimes * Rick Macklem at The University of Guelph. 71541Srgrimes * 81541Srgrimes * Redistribution and use in source and binary forms, with or without 91541Srgrimes * modification, are permitted provided that the following conditions 101541Srgrimes * are met: 111541Srgrimes * 1. Redistributions of source code must retain the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer. 131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer in the 151541Srgrimes * documentation and/or other materials provided with the distribution. 161541Srgrimes * 3. All advertising materials mentioning features or use of this software 171541Srgrimes * must display the following acknowledgement: 181541Srgrimes * This product includes software developed by the University of 191541Srgrimes * California, Berkeley and its contributors. 201541Srgrimes * 4. Neither the name of the University nor the names of its contributors 211541Srgrimes * may be used to endorse or promote products derived from this software 221541Srgrimes * without specific prior written permission. 231541Srgrimes * 241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3222521Sdyson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341541Srgrimes * SUCH DAMAGE. 3583651Speter * 3683654Speter * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 3722521Sdyson * $FreeBSD: head/sys/nfsclient/nfs_bio.c 79224 2001-07-04 16:20:28Z dillon $ 38190380Srwatson */ 39190380Srwatson 401541Srgrimes 411541Srgrimes#include <sys/param.h> 4279247Sjhb#include <sys/systm.h> 4379247Sjhb#include <sys/resourcevar.h> 4479247Sjhb#include <sys/signalvar.h> 45192578Srwatson#include <sys/proc.h> 4679247Sjhb#include <sys/bio.h> 4779247Sjhb#include <sys/buf.h> 481541Srgrimes#include <sys/vnode.h> 493305Sphk#include <sys/mount.h> 5079247Sjhb#include <sys/kernel.h> 511541Srgrimes 521541Srgrimes#include <vm/vm.h> 531541Srgrimes#include <vm/vm_extern.h> 5412662Sdg#include <vm/vm_page.h> 5525930Sdfr#include <vm/vm_object.h> 5625930Sdfr#include <vm/vm_pager.h> 5725930Sdfr#include <vm/vnode_pager.h> 5825930Sdfr 591541Srgrimes#include <nfs/rpcv2.h> 601541Srgrimes#include <nfs/nfsproto.h> 619336Sdfr#include <nfs/nfs.h> 6283651Speter#include <nfs/nfsmount.h> 6383651Speter#include <nfs/nqnfs.h> 6483651Speter#include <nfs/nfsnode.h> 65190380Srwatson 661541Srgrimes/* 6783651Speter * Just call nfs_writebp() with the force argument set to 1. 6883651Speter * 69138899Sps * NOTE: B_DONE may or may not be set in a_bp on call. 70138899Sps */ 7175580Sphkstatic int 72138899Spsnfs_bwrite(struct buf *bp) 73138899Sps{ 74158739Smohans return (nfs_writebp(bp, 1, curproc)); 751541Srgrimes} 7625930Sdfr 7725930Sdfrstruct buf_ops buf_ops_nfs = { 7825930Sdfr "buf_ops_nfs", 7983651Speter nfs_bwrite 8025930Sdfr}; 8146349Salc 8232755Sdyson 8332755Sdysonstatic struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, 8432755Sdyson struct proc *p)); 8534206Sdyson 8636563Speterextern int nfs_numasync; 8783366Sjulianextern int nfs_pbuf_freecnt; 8836563Speterextern struct nfsstats nfsstats; 8936563Speter 90116461Salc/* 9136563Speter * Vnode op for VM getpages. 92138899Sps */ 9325930Sdfrint 9436563Speternfs_getpages(ap) 95138899Sps struct vop_getpages_args /* { 9683366Sjulian struct vnode *a_vp; 9791406Sjhb vm_page_t *a_m; 9836563Speter int a_count; 9936563Speter int a_reqpage; 10036563Speter vm_ooffset_t a_offset; 10136563Speter } */ *ap; 102116461Salc{ 103158739Smohans int i, error, nextoff, size, toff, count, npages; 10436563Speter struct uio uio; 10525930Sdfr struct iovec iov; 10625930Sdfr vm_offset_t kva; 107158739Smohans struct buf *bp; 108158739Smohans struct vnode *vp; 109158739Smohans struct proc *p; 110158739Smohans struct ucred *cred; 111158739Smohans struct nfsmount *nmp; 112158739Smohans vm_page_t *pages; 113158739Smohans 114158739Smohans GIANT_REQUIRED; 115138899Sps 116138899Sps vp = ap->a_vp; 117158739Smohans p = curproc; /* XXX */ 11836563Speter cred = curproc->p_ucred; /* XXX */ 119158739Smohans nmp = VFSTONFS(vp->v_mount); 120158739Smohans pages = ap->a_m; 121122698Salfred count = ap->a_count; 12283366Sjulian 123158739Smohans if (vp->v_object == NULL) { 124158739Smohans printf("nfs_getpages: called with non-merged cache vnode??\n"); 12546349Salc return VM_PAGER_ERROR; 12646349Salc } 12746349Salc 12834206Sdyson if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 12946349Salc (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 13046349Salc (void)nfs_fsinfo(nmp, vp, cred, p); 13146349Salc } 13246349Salc 13346349Salc npages = btoc(count); 13446349Salc 13546349Salc /* 13646349Salc * If the requested page is partially valid, just return it and 137116461Salc * allow the pager to zero-out the blanks. Partially valid pages 13846349Salc * can only occur at the file EOF. 139191964Salc */ 14046349Salc 14146349Salc { 14275692Salfred vm_page_t m = pages[ap->a_reqpage]; 14346349Salc 144100450Salc if (m->valid != 0) { 145116461Salc /* handled by vm_fault now */ 14646349Salc /* vm_page_zero_invalid(m, TRUE); */ 14746349Salc for (i = 0; i < npages; ++i) { 148116461Salc if (i != ap->a_reqpage) 14946349Salc vm_page_free(pages[i]); 15046349Salc } 15146349Salc return(0); 15234206Sdyson } 15334206Sdyson } 15434206Sdyson 15542957Sdillon /* 15625930Sdfr * We use only the kva address for the buffer, but this is extremely 15734206Sdyson * convienient and fast. 15836563Speter */ 159170292Sattilio bp = getpbuf(&nfs_pbuf_freecnt); 160170292Sattilio 16134206Sdyson kva = (vm_offset_t) bp->b_data; 16232755Sdyson pmap_qenter(kva, pages, npages); 16336563Speter 16432755Sdyson iov.iov_base = (caddr_t) kva; 16532755Sdyson iov.iov_len = count; 16636563Speter uio.uio_iov = &iov; 16736563Speter uio.uio_iovcnt = 1; 16832755Sdyson uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); 16932755Sdyson uio.uio_resid = count; 17083366Sjulian uio.uio_segflg = UIO_SYSSPACE; 17125930Sdfr uio.uio_rw = UIO_READ; 172122953Salfred uio.uio_procp = p; 17334206Sdyson 17432755Sdyson error = nfs_readrpc(vp, &uio, cred); 17542957Sdillon pmap_qremove(kva, npages); 17634206Sdyson 17742957Sdillon relpbuf(bp, &nfs_pbuf_freecnt); 178158739Smohans 179116461Salc if (error && (uio.uio_resid == count)) { 180100450Salc printf("nfs_getpages: error %d\n", error); 18142957Sdillon for (i = 0; i < npages; ++i) { 18242957Sdillon if (i != ap->a_reqpage) 18375692Salfred vm_page_free(pages[i]); 18442957Sdillon } 185100450Salc return VM_PAGER_ERROR; 186116461Salc } 18734206Sdyson 18842957Sdillon /* 18934206Sdyson * Calculate the number of bytes read and validate only that number 19045347Sjulian * of bytes. Note that due to pending writes, size may be 0. This 19145347Sjulian * does not mean that the remaining data is invalid! 19245347Sjulian */ 19345347Sjulian 19445347Sjulian size = count - uio.uio_resid; 19545347Sjulian 19636563Speter for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { 197116461Salc vm_page_t m; 198100450Salc nextoff = toff + PAGE_SIZE; 19934206Sdyson m = pages[i]; 20034206Sdyson 20134206Sdyson m->flags &= ~PG_ZERO; 20236563Speter 20334206Sdyson if (nextoff <= size) { 20434206Sdyson /* 20545347Sjulian * Read operation filled an entire page 20645347Sjulian */ 20745347Sjulian m->valid = VM_PAGE_BITS_ALL; 20834206Sdyson vm_page_undirty(m); 209192010Salc } else if (size > toff) { 210192010Salc /* 21145347Sjulian * Read operation filled a partial page. 21245347Sjulian */ 21346349Salc m->valid = 0; 21445347Sjulian vm_page_set_validclean(m, 0, size - toff); 21546349Salc /* handled by vm_fault now */ 216192134Salc /* vm_page_zero_invalid(m, TRUE); */ 217192134Salc } 218192134Salc 21987834Sdillon if (i != ap->a_reqpage) { 22087834Sdillon /* 22187834Sdillon * Whether or not to leave the page activated is up in 22287834Sdillon * the air, but we should put the page on a page queue 22387834Sdillon * somewhere (it already is in the object). Result: 22487834Sdillon * It appears that emperical results show that 22587834Sdillon * deactivating pages is best. 22634206Sdyson */ 22725930Sdfr 22834206Sdyson /* 22934206Sdyson * Just in case someone was asking for this page we 23034206Sdyson * now tell them that it is ok to use. 23134206Sdyson */ 23234206Sdyson if (!error) { 23334206Sdyson if (m->flags & PG_WANTED) 23434206Sdyson vm_page_activate(m); 23534206Sdyson else 23634206Sdyson vm_page_deactivate(m); 23734206Sdyson vm_page_wakeup(m); 23834206Sdyson } else { 23934206Sdyson vm_page_free(m); 24034206Sdyson } 241161125Salc } 24234206Sdyson } 24334206Sdyson return 0; 24434206Sdyson} 24538799Sdfr 24634206Sdyson/* 24775692Salfred * Vnode op for VM putpages. 24834206Sdyson */ 24925930Sdfrint 25025930Sdfrnfs_putpages(ap) 251100450Salc struct vop_putpages_args /* { 252116461Salc struct vnode *a_vp; 25325930Sdfr vm_page_t *a_m; 25425930Sdfr int a_count; 25525930Sdfr int a_sync; 25625930Sdfr int *a_rtvals; 25734206Sdyson vm_ooffset_t a_offset; 25834096Smsmith } */ *ap; 25934096Smsmith{ 26083651Speter struct uio uio; 26134096Smsmith struct iovec iov; 26234206Sdyson vm_offset_t kva; 26334206Sdyson struct buf *bp; 26434206Sdyson int iomode, must_commit, i, error, npages, count; 26534206Sdyson off_t offset; 26636563Speter int *rtvals; 26746349Salc struct vnode *vp; 26834206Sdyson struct proc *p; 26936563Speter struct ucred *cred; 27083366Sjulian struct nfsmount *nmp; 27136563Speter struct nfsnode *np; 27236563Speter vm_page_t *pages; 27346349Salc 27436563Speter GIANT_REQUIRED; 27534206Sdyson 27636563Speter vp = ap->a_vp; 27746349Salc np = VTONFS(vp); 27883366Sjulian p = curproc; /* XXX */ 27991406Sjhb cred = curproc->p_ucred; /* XXX */ 28036563Speter nmp = VFSTONFS(vp->v_mount); 28136563Speter pages = ap->a_m; 28236563Speter count = ap->a_count; 28334206Sdyson rtvals = ap->a_rtvals; 28436563Speter npages = btoc(count); 28546349Salc offset = IDX_TO_OFF(pages[0]->pindex); 286158739Smohans 287158739Smohans GIANT_REQUIRED; 28836563Speter 28976827Salfred if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 290158739Smohans (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 29183366Sjulian (void)nfs_fsinfo(nmp, vp, cred, p); 292158739Smohans } 293158739Smohans 29434206Sdyson for (i = 0; i < npages; i++) { 295158739Smohans rtvals[i] = VM_PAGER_AGAIN; 296157557Smohans } 297158739Smohans 298158739Smohans /* 299158739Smohans * When putting pages, do not extend file past EOF. 300158739Smohans */ 301158739Smohans 302138899Sps if (offset + count > np->n_size) { 30383651Speter count = np->n_size - offset; 30434206Sdyson if (count < 0) 30534206Sdyson count = 0; 30634206Sdyson } 30746349Salc 30846349Salc /* 30946349Salc * We use only the kva address for the buffer, but this is extremely 31046349Salc * convienient and fast. 31146349Salc */ 31246349Salc bp = getpbuf(&nfs_pbuf_freecnt); 31346349Salc 314158739Smohans kva = (vm_offset_t) bp->b_data; 31546349Salc pmap_qenter(kva, pages, npages); 31646349Salc 31734206Sdyson iov.iov_base = (caddr_t) kva; 31834206Sdyson iov.iov_len = count; 31934206Sdyson uio.uio_iov = &iov; 32042957Sdillon uio.uio_iovcnt = 1; 32134206Sdyson uio.uio_offset = offset; 32234206Sdyson uio.uio_resid = count; 32336563Speter uio.uio_segflg = UIO_SYSSPACE; 324170292Sattilio uio.uio_rw = UIO_WRITE; 325170292Sattilio uio.uio_procp = p; 32634206Sdyson 32734206Sdyson if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) 32836563Speter iomode = NFSV3WRITE_UNSTABLE; 32934206Sdyson else 33034206Sdyson iomode = NFSV3WRITE_FILESYNC; 33146349Salc 33236563Speter error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit); 33334206Sdyson 33434206Sdyson pmap_qremove(kva, npages); 33583366Sjulian relpbuf(bp, &nfs_pbuf_freecnt); 33634206Sdyson 33734206Sdyson if (!error) { 33834206Sdyson int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; 33934206Sdyson for (i = 0; i < nwritten; i++) { 34034206Sdyson rtvals[i] = VM_PAGER_OK; 34134206Sdyson vm_page_undirty(pages[i]); 342122953Salfred } 34334206Sdyson if (must_commit) { 34434206Sdyson nfs_clearcommit(vp->v_mount); 34542957Sdillon } 34634206Sdyson } 34734206Sdyson return rtvals[0]; 34836563Speter} 34934206Sdyson 35034206Sdyson/* 35149945Salc * Vnode op for read using bio 35234206Sdyson */ 35376827Salfredint 35436563Speternfs_bioread(vp, uio, ioflag, cred) 35576827Salfred register struct vnode *vp; 35634206Sdyson register struct uio *uio; 35736563Speter int ioflag; 35834096Smsmith struct ucred *cred; 35934096Smsmith{ 36034096Smsmith register struct nfsnode *np = VTONFS(vp); 361158739Smohans register int biosize, i; 362158739Smohans struct buf *bp = 0, *rabp; 363158739Smohans struct vattr vattr; 364158739Smohans struct proc *p; 365158739Smohans struct nfsmount *nmp = VFSTONFS(vp->v_mount); 366158739Smohans daddr_t lbn, rabn; 367158739Smohans int bcount; 368158739Smohans int seqcount; 369158739Smohans int nra, error = 0, n = 0, on = 0; 370158739Smohans 371158739Smohans#ifdef DIAGNOSTIC 372158739Smohans if (uio->uio_rw != UIO_READ) 373158739Smohans panic("nfs_read mode"); 374158739Smohans#endif 375158739Smohans if (uio->uio_resid == 0) 376158739Smohans return (0); 377158739Smohans if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */ 378158739Smohans return (EINVAL); 379158739Smohans p = uio->uio_procp; 380158739Smohans 381158739Smohans if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 382158739Smohans (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 383158739Smohans (void)nfs_fsinfo(nmp, vp, cred, p); 384158739Smohans if (vp->v_type != VDIR && 385158739Smohans (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 386158739Smohans return (EFBIG); 387158739Smohans biosize = vp->v_mount->mnt_stat.f_iosize; 388158739Smohans seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE); 389158739Smohans /* 390158739Smohans * For nfs, cache consistency can only be maintained approximately. 391176134Sattilio * Although RFC1094 does not specify the criteria, the following is 392158739Smohans * believed to be compatible with the reference port. 393158739Smohans * For nqnfs, full cache consistency is maintained within the loop. 394158739Smohans * For nfs: 395158739Smohans * If the file's modify time on the server has changed since the 396158739Smohans * last read rpc or you have written to the file, 397158739Smohans * you may have lost data cache consistency with the 398158739Smohans * server, so flush all of the file's data out of the cache. 399158739Smohans * Then force a getattr rpc to ensure that you have up to date 400158739Smohans * attributes. 401158739Smohans * NB: This implies that cache data can be read when up to 402158739Smohans * NFS_ATTRTIMEO seconds out of date. If you find that you need current 403158739Smohans * attributes this could be forced by setting n_attrstamp to 0 before 404190380Srwatson * the VOP_GETATTR() call. 405182371Sattilio */ 406158739Smohans if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { 407158739Smohans if (np->n_flag & NMODIFIED) { 408158739Smohans if (vp->v_type != VREG) { 409158739Smohans if (vp->v_type != VDIR) 410158739Smohans panic("nfs: bioread, not dir"); 411158739Smohans nfs_invaldir(vp); 412158739Smohans error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 413182371Sattilio if (error) 414158739Smohans return (error); 415158739Smohans } 416158739Smohans np->n_attrstamp = 0; 417158739Smohans error = VOP_GETATTR(vp, &vattr, cred, p); 418158739Smohans if (error) 419158739Smohans return (error); 420158739Smohans np->n_mtime = vattr.va_mtime.tv_sec; 421158739Smohans } else { 422158739Smohans error = VOP_GETATTR(vp, &vattr, cred, p); 423158739Smohans if (error) 424158739Smohans return (error); 425158739Smohans if (np->n_mtime != vattr.va_mtime.tv_sec) { 426158739Smohans if (vp->v_type == VDIR) 427158739Smohans nfs_invaldir(vp); 428158739Smohans error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 429158739Smohans if (error) 430158739Smohans return (error); 431158739Smohans np->n_mtime = vattr.va_mtime.tv_sec; 432176134Sattilio } 433158739Smohans } 434158739Smohans } 435158739Smohans do { 436158739Smohans 4371541Srgrimes /* 4381541Srgrimes * Get a valid lease. If cached data is stale, flush it. 4391549Srgrimes */ 44083651Speter if (nmp->nm_flag & NFSMNT_NQNFS) { 4411541Srgrimes if (NQNFS_CKINVALID(vp, np, ND_READ)) { 44283651Speter do { 44383651Speter error = nqnfs_getlease(vp, ND_READ, cred, p); 444143822Sdas } while (error == NQNFS_EXPIRED); 44583366Sjulian if (error) 4469336Sdfr return (error); 4475455Sdg if (np->n_lrev != np->n_brev || 44846349Salc (np->n_flag & NQNFSNONCACHE) || 44951344Sdillon ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 45046349Salc if (vp->v_type == VDIR) 4511541Srgrimes nfs_invaldir(vp); 4521541Srgrimes error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 4531541Srgrimes if (error) 4541541Srgrimes return (error); 4551541Srgrimes np->n_brev = np->n_lrev; 4561541Srgrimes } 4571541Srgrimes } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 45836473Speter nfs_invaldir(vp); 4591541Srgrimes error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 46083366Sjulian if (error) 46151344Sdillon return (error); 462158739Smohans } 46336176Speter } 464158739Smohans if (np->n_flag & NQNFSNONCACHE) { 465158739Smohans switch (vp->v_type) { 46683366Sjulian case VREG: 467158739Smohans return (nfs_readrpc(vp, uio, cred)); 468158739Smohans case VLNK: 469158739Smohans return (nfs_readlinkrpc(vp, uio, cred)); 47036473Speter case VDIR: 47136473Speter break; 47236473Speter default: 473138899Sps printf(" NQNFSNONCACHE: type %x unexpected\n", 474138899Sps vp->v_type); 475138899Sps }; 476138899Sps } 477138899Sps switch (vp->v_type) { 4789428Sdfr case VREG: 479108357Sdillon nfsstats.biocache_reads++; 480158739Smohans lbn = uio->uio_offset / biosize; 481158739Smohans on = uio->uio_offset & (biosize - 1); 482158739Smohans 483158739Smohans /* 484158739Smohans * Start the read ahead(s), as required. 48583651Speter */ 486158739Smohans if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 487158739Smohans for (nra = 0; nra < nmp->nm_readahead && nra < seqcount && 488158739Smohans (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { 489158739Smohans rabn = lbn + 1 + nra; 490158739Smohans if (!incore(vp, rabn)) { 491158739Smohans rabp = nfs_getcacheblk(vp, rabn, biosize, p); 4921541Srgrimes if (!rabp) 4931541Srgrimes return (EINTR); 4941541Srgrimes if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 4951541Srgrimes rabp->b_flags |= B_ASYNC; 4969336Sdfr rabp->b_iocmd = BIO_READ; 4971541Srgrimes vfs_busy_pages(rabp, 0); 4981541Srgrimes if (nfs_asyncio(rabp, cred, p)) { 4991541Srgrimes rabp->b_flags |= B_INVAL; 5001541Srgrimes rabp->b_ioflags |= BIO_ERROR; 501158739Smohans vfs_unbusy_pages(rabp); 50251344Sdillon brelse(rabp); 503158739Smohans break; 5045455Sdg } 505136767Sphk } else { 50683366Sjulian brelse(rabp); 507131691Salfred } 508131691Salfred } 509131691Salfred } 510131691Salfred } 5118692Sdg 51258345Sphk /* 51358345Sphk * Obtain the buffer cache block. Figure out the buffer size 5145455Sdg * when we are at EOF. If we are modifying the size of the 515134898Sphk * buffer based on an EOF condition we need to hold 51658934Sphk * nfs_rslock() through obtaining the buffer to prevent 51758934Sphk * a potential writer-appender from messing with n_size. 5185455Sdg * Otherwise we may accidently truncate the buffer and 5191541Srgrimes * lose dirty data. 52055431Sdillon * 5211541Srgrimes * Note that bcount is *not* DEV_BSIZE aligned. 52255431Sdillon */ 5235471Sdg 52455431Sdillonagain: 5251541Srgrimes bcount = biosize; 5261541Srgrimes if ((off_t)lbn * biosize >= np->n_size) { 5271541Srgrimes bcount = 0; 5281541Srgrimes } else if ((off_t)(lbn + 1) * biosize > np->n_size) { 529148268Sps bcount = np->n_size - (off_t)lbn * biosize; 53046349Salc } 531158739Smohans if (bcount != biosize) { 53246349Salc switch(nfs_rslock(np, p)) { 533158739Smohans case ENOLCK: 534158739Smohans goto again; 5358692Sdg /* not reached */ 53683366Sjulian case EINTR: 53754605Sdillon case ERESTART: 538131691Salfred return(EINTR); 539131691Salfred /* not reached */ 540131691Salfred default: 541131691Salfred break; 54242957Sdillon } 54325930Sdfr } 54446349Salc 54546349Salc bp = nfs_getcacheblk(vp, lbn, bcount, p); 54625930Sdfr 54746349Salc if (bcount != biosize) 5487871Sdg nfs_rsunlock(np, p); 54958345Sphk if (!bp) 55032755Sdyson return (EINTR); 551134898Sphk 55232755Sdyson /* 55332755Sdyson * If B_CACHE is not set, we must issue the read. If this 55432755Sdyson * fails, we return an error. 55532755Sdyson */ 5561541Srgrimes 55746349Salc if ((bp->b_flags & B_CACHE) == 0) { 55846349Salc bp->b_iocmd = BIO_READ; 55946349Salc vfs_busy_pages(bp, 0); 56046349Salc error = nfs_doio(bp, cred, p); 56146349Salc if (error) { 56246349Salc brelse(bp); 56346349Salc return (error); 56446349Salc } 56546349Salc } 56646349Salc 56746349Salc /* 56846349Salc * on is the offset into the current bp. Figure out how many 5691541Srgrimes * bytes we can copy out of the bp. Note that bcount is 5701541Srgrimes * NOT DEV_BSIZE aligned. 5711541Srgrimes * 57283366Sjulian * Then figure out how many bytes we can copy into the uio. 573131691Salfred */ 574131691Salfred 575131691Salfred n = 0; 576131691Salfred if (on < bcount) 5777871Sdg n = min((unsigned)(bcount - on), uio->uio_resid); 57858345Sphk break; 57932755Sdyson case VLNK: 580134898Sphk nfsstats.biocache_readlinks++; 58132755Sdyson bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 58258934Sphk if (!bp) 58332755Sdyson return (EINTR); 58432755Sdyson if ((bp->b_flags & B_CACHE) == 0) { 58532755Sdyson bp->b_iocmd = BIO_READ; 5861541Srgrimes vfs_busy_pages(bp, 0); 5871541Srgrimes error = nfs_doio(bp, cred, p); 5881541Srgrimes if (error) { 5891541Srgrimes bp->b_ioflags |= BIO_ERROR; 5901541Srgrimes brelse(bp); 5911541Srgrimes return (error); 59224577Sdfr } 59324577Sdfr } 59424577Sdfr n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 59524577Sdfr on = 0; 59636979Sbde break; 5979336Sdfr case VDIR: 59883366Sjulian nfsstats.biocache_readdirs++; 599131691Salfred if (np->n_direofoffset 600131691Salfred && uio->uio_offset >= np->n_direofoffset) { 601131691Salfred return (0); 602131691Salfred } 6037871Sdg lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ; 60458345Sphk on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); 6059336Sdfr bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 606134898Sphk if (!bp) 60732912Stegge return (EINTR); 60832912Stegge if ((bp->b_flags & B_CACHE) == 0) { 60932912Stegge bp->b_iocmd = BIO_READ; 61032755Sdyson vfs_busy_pages(bp, 0); 611122953Salfred error = nfs_doio(bp, cred, p); 612140731Sphk if (error) { 61332755Sdyson brelse(bp); 61432755Sdyson } 61532755Sdyson while (error == NFSERR_BAD_COOKIE) { 61632755Sdyson printf("got bad cookie vp %p bp %p\n", vp, bp); 61732755Sdyson nfs_invaldir(vp); 61846349Salc error = nfs_vinvalbuf(vp, 0, cred, p, 1); 61946349Salc /* 62046349Salc * Yuck! The directory has been modified on the 62146349Salc * server. The only way to get the block is by 62232755Sdyson * reading from the beginning to get all the 62332755Sdyson * offset cookies. 62432755Sdyson * 62532755Sdyson * Leave the last bp intact unless there is an error. 62624577Sdfr * Loop back up to the while if the error is another 62783366Sjulian * NFSERR_BAD_COOKIE (double yuch!). 628131691Salfred */ 629131691Salfred for (i = 0; i <= lbn && !error; i++) { 630131691Salfred if (np->n_direofoffset 631131691Salfred && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) 63246349Salc return (0); 63358345Sphk bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); 63446349Salc if (!bp) 635134898Sphk return (EINTR); 63646349Salc if ((bp->b_flags & B_CACHE) == 0) { 63746349Salc bp->b_iocmd = BIO_READ; 63846349Salc vfs_busy_pages(bp, 0); 63946349Salc error = nfs_doio(bp, cred, p); 64046349Salc /* 64146349Salc * no error + B_INVAL == directory EOF, 64246349Salc * use the block. 64346349Salc */ 64446349Salc if (error == 0 && (bp->b_flags & B_INVAL)) 64546349Salc break; 64646349Salc } 64746349Salc /* 64846349Salc * An error will throw away the block and the 64946349Salc * for loop will break out. If no error and this 65032755Sdyson * is not the block we want, we throw away the 6511541Srgrimes * block and go for the next one via the for loop. 65232912Stegge */ 65346349Salc if (error || i < lbn) 65446349Salc brelse(bp); 65546349Salc } 65646349Salc } 65746349Salc /* 65832912Stegge * The above while is repeated if we hit another cookie 6599336Sdfr * error. If we hit an error and it wasn't a cookie error, 6601541Srgrimes * we give up. 6611541Srgrimes */ 6621541Srgrimes if (error) 6631541Srgrimes return (error); 6641541Srgrimes } 6659336Sdfr 6661541Srgrimes /* 66789324Speter * If not eof and read aheads are enabled, start one. 66839782Smckusick * (You need the current block first, so that you have the 6699336Sdfr * directory offset cookie of the next block.) 6709336Sdfr */ 671136767Sphk if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 67283366Sjulian (bp->b_flags & B_INVAL) == 0 && 6731541Srgrimes (np->n_direofoffset == 0 || 6748692Sdg (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && 67558345Sphk !(np->n_flag & NQNFSNONCACHE) && 67658345Sphk !incore(vp, lbn + 1)) { 6775455Sdg rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); 678134898Sphk if (rabp) { 67958934Sphk if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { 68058934Sphk rabp->b_flags |= B_ASYNC; 6815455Sdg rabp->b_iocmd = BIO_READ; 6821541Srgrimes vfs_busy_pages(rabp, 0); 6831541Srgrimes if (nfs_asyncio(rabp, cred, p)) { 6845471Sdg rabp->b_flags |= B_INVAL; 6855471Sdg rabp->b_ioflags |= BIO_ERROR; 6861541Srgrimes vfs_unbusy_pages(rabp); 6871541Srgrimes brelse(rabp); 6881541Srgrimes } 68926469Sdfr } else { 69046349Salc brelse(rabp); 69146349Salc } 69246349Salc } 69346349Salc } 69446349Salc /* 69546349Salc * Unlike VREG files, whos buffer size ( bp->b_bcount ) is 69646349Salc * chopped for the EOF condition, we cannot tell how large 69746349Salc * NFS directories are going to be until we hit EOF. So 69883651Speter * an NFS directory buffer is *not* chopped to its EOF. Now, 69946349Salc * it just so happens that b_resid will effectively chop it 70026469Sdfr * to EOF. *BUT* this information is lost if the buffer goes 70126469Sdfr * away and is reconstituted into a B_CACHE state ( due to 70246349Salc * being VMIO ) later. So we keep track of the directory eof 70346349Salc * in np->n_direofoffset and chop it off as an extra step 7041541Srgrimes * right here. 7053305Sphk */ 706158739Smohans n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); 707143822Sdas if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset) 7083305Sphk n = np->n_direofoffset - uio->uio_offset; 7091541Srgrimes break; 7101541Srgrimes default: 7111541Srgrimes printf(" nfs_bioread: type %x unexpected\n",vp->v_type); 71234206Sdyson break; 7131541Srgrimes }; 714143822Sdas 7151541Srgrimes if (n > 0) { 716143822Sdas error = uiomove(bp->b_data + on, (int)n, uio); 717143822Sdas } 7181541Srgrimes switch (vp->v_type) { 7191541Srgrimes case VREG: 7201541Srgrimes break; 7211541Srgrimes case VLNK: 7221541Srgrimes n = 0; 723138899Sps break; 724138899Sps case VDIR: 725138899Sps /* 726138899Sps * Invalidate buffer if caching is disabled, forcing a 727138899Sps * re-read from the remote later. 728138899Sps */ 729138899Sps if (np->n_flag & NQNFSNONCACHE) 730138899Sps bp->b_flags |= B_INVAL; 731138899Sps break; 732138899Sps default: 733138899Sps printf(" nfs_bioread: type %x unexpected\n",vp->v_type); 734138899Sps } 735138899Sps brelse(bp); 736138899Sps } while (error == 0 && uio->uio_resid > 0 && n > 0); 737138899Sps return (error); 738138899Sps} 739138899Sps 740138899Sps/* 741138899Sps * Vnode op for write using bio 742138899Sps */ 743138899Spsint 744158739Smohansnfs_write(ap) 745158739Smohans struct vop_write_args /* { 746158739Smohans struct vnode *a_vp; 747158739Smohans struct uio *a_uio; 748158739Smohans int a_ioflag; 749138899Sps struct ucred *a_cred; 750138899Sps } */ *ap; 751138899Sps{ 752138899Sps int biosize; 753138899Sps struct uio *uio = ap->a_uio; 754138899Sps struct proc *p = uio->uio_procp; 755158739Smohans struct vnode *vp = ap->a_vp; 756138899Sps struct nfsnode *np = VTONFS(vp); 757138899Sps struct ucred *cred = ap->a_cred; 758138899Sps int ioflag = ap->a_ioflag; 759138899Sps struct buf *bp; 760138899Sps struct vattr vattr; 761138899Sps struct nfsmount *nmp = VFSTONFS(vp->v_mount); 762138899Sps daddr_t lbn; 763138899Sps int bcount; 764138899Sps int n, on, error = 0, iomode, must_commit; 765138899Sps int haverslock = 0; 766138899Sps 767138899Sps GIANT_REQUIRED; 768138899Sps 769138899Sps#ifdef DIAGNOSTIC 770138899Sps if (uio->uio_rw != UIO_WRITE) 771138899Sps panic("nfs_write mode"); 772138899Sps if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 773138899Sps panic("nfs_write proc"); 774138899Sps#endif 775138899Sps if (vp->v_type != VREG) 776138899Sps return (EIO); 777138899Sps if (np->n_flag & NWRITEERR) { 778138899Sps np->n_flag &= ~NWRITEERR; 779138899Sps return (np->n_error); 780138899Sps } 781138899Sps if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 && 782138899Sps (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 783138899Sps (void)nfs_fsinfo(nmp, vp, cred, p); 784138899Sps 785138899Sps /* 786138899Sps * Synchronously flush pending buffers if we are in synchronous 787138899Sps * mode or if we are appending. 788138899Sps */ 789138899Sps if (ioflag & (IO_APPEND | IO_SYNC)) { 790138899Sps if (np->n_flag & NMODIFIED) { 791138899Sps np->n_attrstamp = 0; 792138899Sps error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 793138899Sps if (error) 794138899Sps return (error); 795138899Sps } 796138899Sps } 797138899Sps 798138899Sps /* 799138899Sps * If IO_APPEND then load uio_offset. We restart here if we cannot 800138899Sps * get the append lock. 801138899Sps */ 802138899Spsrestart: 803138899Sps if (ioflag & IO_APPEND) { 804158739Smohans np->n_attrstamp = 0; 805138899Sps error = VOP_GETATTR(vp, &vattr, cred, p); 806138899Sps if (error) 807138899Sps return (error); 808138899Sps uio->uio_offset = np->n_size; 809138899Sps } 810138899Sps 811138899Sps if (uio->uio_offset < 0) 812138899Sps return (EINVAL); 813138899Sps if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 814138899Sps return (EFBIG); 815138899Sps if (uio->uio_resid == 0) 816138899Sps return (0); 817138899Sps 818138899Sps /* 819138899Sps * We need to obtain the rslock if we intend to modify np->n_size 820138899Sps * in order to guarentee the append point with multiple contending 821138899Sps * writers, to guarentee that no other appenders modify n_size 822138899Sps * while we are trying to obtain a truncated buffer (i.e. to avoid 823138899Sps * accidently truncating data written by another appender due to 824138899Sps * the race), and to ensure that the buffer is populated prior to 825138899Sps * our extending of the file. We hold rslock through the entire 826138899Sps * operation. 827138899Sps * 828138899Sps * Note that we do not synchronize the case where someone truncates 829138899Sps * the file while we are appending to it because attempting to lock 830138899Sps * this case may deadlock other parts of the system unexpectedly. 831138899Sps */ 832138899Sps if ((ioflag & IO_APPEND) || 833138899Sps uio->uio_offset + uio->uio_resid > np->n_size) { 834138899Sps switch(nfs_rslock(np, p)) { 835138899Sps case ENOLCK: 836138899Sps goto restart; 837138899Sps /* not reached */ 838138899Sps case EINTR: 839138899Sps case ERESTART: 840138899Sps return(EINTR); 841138899Sps /* not reached */ 842138899Sps default: 843138899Sps break; 844138899Sps } 845138899Sps haverslock = 1; 846138899Sps } 847138899Sps 848138899Sps /* 849138899Sps * Maybe this should be above the vnode op call, but so long as 850138899Sps * file servers have no limits, i don't think it matters 851138899Sps */ 852138899Sps if (p && uio->uio_offset + uio->uio_resid > 853138899Sps p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 854138899Sps PROC_LOCK(p); 8551541Srgrimes psignal(p, SIGXFSZ); 8561541Srgrimes PROC_UNLOCK(p); 8571549Srgrimes if (haverslock) 85883651Speter nfs_rsunlock(np, p); 8591541Srgrimes return (EFBIG); 86046349Salc } 86146349Salc 86283366Sjulian biosize = vp->v_mount->mnt_stat.f_iosize; 86346349Salc 8641541Srgrimes do { 86546349Salc /* 8661541Srgrimes * Check for a valid write lease. 8671541Srgrimes */ 8681541Srgrimes if ((nmp->nm_flag & NFSMNT_NQNFS) && 8699336Sdfr NQNFS_CKINVALID(vp, np, ND_WRITE)) { 87011921Sphk do { 87146349Salc error = nqnfs_getlease(vp, ND_WRITE, cred, p); 87283651Speter } while (error == NQNFS_EXPIRED); 87383366Sjulian if (error) 8741541Srgrimes break; 8751541Srgrimes if (np->n_lrev != np->n_brev || 8761541Srgrimes (np->n_flag & NQNFSNONCACHE)) { 8771541Srgrimes error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 87883366Sjulian if (error) 8791541Srgrimes break; 8801541Srgrimes np->n_brev = np->n_lrev; 8811541Srgrimes } 8821541Srgrimes } 883158739Smohans if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 8841541Srgrimes iomode = NFSV3WRITE_FILESYNC; 8851541Srgrimes error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); 886158739Smohans if (must_commit) 8871541Srgrimes nfs_clearcommit(vp->v_mount); 888158739Smohans break; 889158739Smohans } 890158739Smohans nfsstats.biocache_writes++; 89136176Speter lbn = uio->uio_offset / biosize; 892158739Smohans on = uio->uio_offset & (biosize-1); 893158739Smohans n = min((unsigned)(biosize - on), uio->uio_resid); 89483366Sjulianagain: 895158739Smohans /* 896158739Smohans * Handle direct append and file extension cases, calculate 89754605Sdillon * unaligned buffer size. 89854605Sdillon */ 89954605Sdillon 90054605Sdillon if (uio->uio_offset == np->n_size && n) { 90154605Sdillon /* 9021541Srgrimes * Get the buffer (in its pre-append state to maintain 903158739Smohans * B_CACHE if it was previously set). Resize the 9041541Srgrimes * nfsnode after we have locked the buffer to prevent 905158739Smohans * readers from reading garbage. 906147420Sgreen */ 907147280Sgreen bcount = on; 908147280Sgreen bp = nfs_getcacheblk(vp, lbn, bcount, p); 909147280Sgreen 910147280Sgreen if (bp != NULL) { 911147280Sgreen long save; 912147280Sgreen 913147280Sgreen np->n_size = uio->uio_offset + n; 914147420Sgreen np->n_flag |= NMODIFIED; 915147280Sgreen vnode_pager_setsize(vp, np->n_size); 9161541Srgrimes 917190380Srwatson save = bp->b_flags & B_CACHE; 918140731Sphk bcount += n; 9193305Sphk allocbuf(bp, bcount); 9201541Srgrimes bp->b_flags |= save; 921158739Smohans bp->b_magic = B_MAGIC_NFS; 922158739Smohans bp->b_op = &buf_ops_nfs; 9231541Srgrimes } 92454605Sdillon } else { 92554605Sdillon /* 92654605Sdillon * Obtain the locked cache block first, and then 92754605Sdillon * adjust the file's size as appropriate. 92854605Sdillon */ 92954605Sdillon bcount = on + n; 93054605Sdillon if ((off_t)lbn * biosize + bcount < np->n_size) { 931190380Srwatson if ((off_t)(lbn + 1) * biosize < np->n_size) 932182371Sattilio bcount = biosize; 93354605Sdillon else 93454605Sdillon bcount = np->n_size - (off_t)lbn * biosize; 935158739Smohans } 93654605Sdillon 937158739Smohans bp = nfs_getcacheblk(vp, lbn, bcount, p); 93854605Sdillon 93954605Sdillon if (uio->uio_offset + n > np->n_size) { 9401541Srgrimes np->n_size = uio->uio_offset + n; 9411541Srgrimes np->n_flag |= NMODIFIED; 94236473Speter vnode_pager_setsize(vp, np->n_size); 94336473Speter } 9441541Srgrimes } 9451541Srgrimes 94654605Sdillon if (!bp) { 947138899Sps error = EINTR; 948138899Sps break; 949138899Sps } 9501541Srgrimes 9511541Srgrimes /* 9521541Srgrimes * Issue a READ if B_CACHE is not set. In special-append 9531541Srgrimes * mode, B_CACHE is based on the buffer prior to the write 954125454Sjhb * op and is typically set, avoiding the read. If a read 95573929Sjhb * is required in special append mode, the server will 956125454Sjhb * probably send us a short-read since we extended the file 957125454Sjhb * on our end, resulting in b_resid == 0 and, thusly, 958125454Sjhb * B_CACHE getting set. 959125454Sjhb * 960125454Sjhb * We can also avoid issuing the read if the write covers 961125454Sjhb * the entire buffer. We have to make sure the buffer state 96273929Sjhb * is reasonable in this case since we will not be initiating 9631541Srgrimes * I/O. See the comments in kern/vfs_bio.c's getblk() for 96446349Salc * more information. 9659428Sdfr * 966147280Sgreen * B_CACHE may also be set due to the buffer being cached 967147280Sgreen * normally. 968147280Sgreen */ 969147280Sgreen 970147280Sgreen if (on == 0 && n == bcount) { 971147280Sgreen bp->b_flags |= B_CACHE; 972147280Sgreen bp->b_flags &= ~B_INVAL; 973147280Sgreen bp->b_ioflags &= ~BIO_ERROR; 974147280Sgreen } 975158739Smohans 976158739Smohans if ((bp->b_flags & B_CACHE) == 0) { 977158739Smohans bp->b_iocmd = BIO_READ; 978158739Smohans vfs_busy_pages(bp, 0); 979158739Smohans error = nfs_doio(bp, cred, p); 980147280Sgreen if (error) { 981147280Sgreen brelse(bp); 982147280Sgreen break; 983147280Sgreen } 984147280Sgreen } 985147280Sgreen if (!bp) { 986147280Sgreen error = EINTR; 987147280Sgreen break; 988147280Sgreen } 989147280Sgreen if (bp->b_wcred == NOCRED) { 990147280Sgreen crhold(cred); 991158739Smohans bp->b_wcred = cred; 992147280Sgreen } 993158739Smohans np->n_flag |= NMODIFIED; 994147280Sgreen 995147280Sgreen /* 996147280Sgreen * If dirtyend exceeds file size, chop it down. This should 997147280Sgreen * not normally occur but there is an append race where it 998147280Sgreen * might occur XXX, so we log it. 999147280Sgreen * 1000147280Sgreen * If the chopping creates a reverse-indexed or degenerate 1001147280Sgreen * situation with dirtyoff/end, we 0 both of them. 1002147280Sgreen */ 1003147280Sgreen 1004147280Sgreen if (bp->b_dirtyend > bcount) { 1005147280Sgreen printf("NFS append race @%lx:%d\n", 1006147280Sgreen (long)bp->b_blkno * DEV_BSIZE, 1007147280Sgreen bp->b_dirtyend - bcount); 1008147280Sgreen bp->b_dirtyend = bcount; 1009147280Sgreen } 1010147280Sgreen 1011147280Sgreen if (bp->b_dirtyoff >= bp->b_dirtyend) 1012147280Sgreen bp->b_dirtyoff = bp->b_dirtyend = 0; 1013147280Sgreen 1014147280Sgreen /* 1015147280Sgreen * If the new write will leave a contiguous dirty 1016147280Sgreen * area, just update the b_dirtyoff and b_dirtyend, 1017147280Sgreen * otherwise force a write rpc of the old dirty area. 1018147280Sgreen * 1019147280Sgreen * While it is possible to merge discontiguous writes due to 1020148268Sps * our having a B_CACHE buffer ( and thus valid read data 1021147280Sgreen * for the hole), we don't because it could lead to 1022147280Sgreen * significant cache coherency problems with multiple clients, 102346349Salc * especially if locking is implemented later on. 10241541Srgrimes * 10251541Srgrimes * as an optimization we could theoretically maintain 10261541Srgrimes * a linked list of discontinuous areas, but we would still 10271541Srgrimes * have to commit them separately so there isn't much 10281541Srgrimes * advantage to it except perhaps a bit of asynchronization. 10291541Srgrimes */ 103046349Salc 103146349Salc if (bp->b_dirtyend > 0 && 103246349Salc (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 103346349Salc if (BUF_WRITE(bp) == EINTR) 1034158739Smohans return (EINTR); 103546349Salc goto again; 1036158739Smohans } 103746349Salc 103854605Sdillon /* 103954605Sdillon * Check for valid write lease and get one as required. 104054605Sdillon * In case getblk() and/or bwrite() delayed us. 104154605Sdillon */ 104246349Salc if ((nmp->nm_flag & NFSMNT_NQNFS) && 104346349Salc NQNFS_CKINVALID(vp, np, ND_WRITE)) { 104483366Sjulian do { 104546349Salc error = nqnfs_getlease(vp, ND_WRITE, cred, p); 104654605Sdillon } while (error == NQNFS_EXPIRED); 104754605Sdillon if (error) { 104846349Salc brelse(bp); 1049158739Smohans break; 105054605Sdillon } 105154605Sdillon if (np->n_lrev != np->n_brev || 105254605Sdillon (np->n_flag & NQNFSNONCACHE)) { 1053158739Smohans brelse(bp); 105454605Sdillon error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 105554605Sdillon if (error) 105654605Sdillon break; 105754605Sdillon np->n_brev = np->n_lrev; 105854605Sdillon goto again; 105954605Sdillon } 106046349Salc } 106154605Sdillon 106283651Speter error = uiomove((char *)bp->b_data + on, n, uio); 106354605Sdillon 106454605Sdillon /* 106554605Sdillon * Since this block is being modified, it must be written 106654605Sdillon * again and not just committed. Since write clustering does 106754605Sdillon * not work for the stage 1 data write, only the stage 2 106854605Sdillon * commit rpc, we have to clear B_CLUSTEROK as well. 106954605Sdillon */ 107054605Sdillon bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 107154605Sdillon 1072158739Smohans if (error) { 107383366Sjulian bp->b_ioflags |= BIO_ERROR; 1074158739Smohans brelse(bp); 107546349Salc break; 107646349Salc } 107746349Salc 107846349Salc /* 107946349Salc * Only update dirtyoff/dirtyend if not a degenerate 1080158739Smohans * condition. 10818692Sdg */ 108246349Salc if (n) { 108354605Sdillon if (bp->b_dirtyend > 0) { 1084131691Salfred bp->b_dirtyoff = min(on, bp->b_dirtyoff); 1085131691Salfred bp->b_dirtyend = max((on + n), bp->b_dirtyend); 1086131691Salfred } else { 108754605Sdillon bp->b_dirtyoff = on; 108854605Sdillon bp->b_dirtyend = on + n; 108954605Sdillon } 109046349Salc vfs_bio_set_validclean(bp, on, n); 109146349Salc } 109246349Salc 109346349Salc /* 109446349Salc * If the lease is non-cachable or IO_SYNC do bwrite(). 109546349Salc * 109683651Speter * IO_INVAL appears to be unused. The idea appears to be 109746349Salc * to turn off caching in this case. Very odd. XXX 109846349Salc */ 109946349Salc if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 110046349Salc if (ioflag & IO_INVAL) 110146349Salc bp->b_flags |= B_NOCACHE; 110246349Salc error = BUF_WRITE(bp); 110346349Salc if (error) 110446349Salc break; 110546349Salc if (np->n_flag & NQNFSNONCACHE) { 110646349Salc error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 110746349Salc if (error) 110846349Salc break; 110946349Salc } 111046349Salc } else if ((n + on) == biosize && 111158934Sphk (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 111258934Sphk bp->b_flags |= B_ASYNC; 11138692Sdg (void)nfs_writebp(bp, 0, 0); 111446349Salc } else { 111546349Salc bdwrite(bp); 111658345Sphk } 111746349Salc } while (uio->uio_resid > 0 && n > 0); 1118134898Sphk 111946349Salc if (haverslock) 112046349Salc nfs_rsunlock(np, p); 112154605Sdillon 112246349Salc return (error); 112346349Salc} 112484827Sjhb 112584827Sjhb/* 1126158739Smohans * Get an nfs cache block. 11271541Srgrimes * 1128158739Smohans * Allocate a new one if the block isn't currently in the cache 11298692Sdg * and return the block marked busy. If the calling process is 113045347Sjulian * interrupted by a signal for an interruptible mount point, return 113154605Sdillon * NULL. 113254605Sdillon * 113383651Speter * The caller must carefully deal with the possible B_INVAL state of 113454605Sdillon * the buffer. nfs_doio() clears B_INVAL (and nfs_asyncio() clears it 113554605Sdillon * indirectly), so synchronous reads can be issued without worrying about 113654605Sdillon * the B_INVAL state. We have to be a little more careful when dealing 113745347Sjulian * with writes (see comments in nfs_write()) when extending a file past 113845347Sjulian * its EOF. 113954605Sdillon */ 1140158739Smohansstatic struct buf * 114183651Speternfs_getcacheblk(vp, bn, size, p) 114254605Sdillon struct vnode *vp; 114354605Sdillon daddr_t bn; 114454605Sdillon int size; 114554605Sdillon struct proc *p; 114645347Sjulian{ 114745347Sjulian register struct buf *bp; 114831617Sdyson struct mount *mp; 11491541Srgrimes struct nfsmount *nmp; 115031617Sdyson 115131617Sdyson mp = vp->v_mount; 115231617Sdyson nmp = VFSTONFS(mp); 115346349Salc 115483651Speter if (nmp->nm_flag & NFSMNT_INT) { 115546349Salc bp = getblk(vp, bn, size, PCATCH, 0); 115683651Speter while (bp == (struct buf *)0) { 115746349Salc if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 115846349Salc return ((struct buf *)0); 115946349Salc bp = getblk(vp, bn, size, 0, 2 * hz); 116046349Salc } 116146349Salc } else { 116246349Salc bp = getblk(vp, bn, size, 0, 0); 116346349Salc } 116431617Sdyson 116542957Sdillon if (vp->v_type == VREG) { 116631617Sdyson int biosize; 116731617Sdyson 1168126853Sphk biosize = mp->mnt_stat.f_iosize; 1169100194Sdillon bp->b_blkno = bn * (biosize / DEV_BSIZE); 1170100194Sdillon } 1171100194Sdillon return (bp); 117231617Sdyson} 117331617Sdyson 117431617Sdyson/* 11753305Sphk * Flush and invalidate all dirty buffers. If another process is already 117654480Sdillon * doing the flush, just wait for completion. 117754480Sdillon */ 117854480Sdillonint 117954480Sdillonnfs_vinvalbuf(vp, flags, cred, p, intrflg) 118054480Sdillon struct vnode *vp; 118154480Sdillon int flags; 118254480Sdillon struct ucred *cred; 118354480Sdillon struct proc *p; 118454480Sdillon int intrflg; 11853305Sphk{ 118658934Sphk register struct nfsnode *np = VTONFS(vp); 11871541Srgrimes struct nfsmount *nmp = VFSTONFS(vp->v_mount); 118854605Sdillon int error = 0, slpflag, slptimeo; 11891541Srgrimes 119034206Sdyson if (vp->v_flag & VXLOCK) { 119134206Sdyson return (0); 119283651Speter } 119345347Sjulian 119445347Sjulian if ((nmp->nm_flag & NFSMNT_INT) == 0) 119545347Sjulian intrflg = 0; 119645347Sjulian if (intrflg) { 119745347Sjulian slpflag = PCATCH; 119845347Sjulian slptimeo = 2 * hz; 119945347Sjulian } else { 120045347Sjulian slpflag = 0; 120145347Sjulian slptimeo = 0; 120245347Sjulian } 120346349Salc /* 12041541Srgrimes * First wait for any other process doing a flush to complete. 120545347Sjulian */ 120644679Sjulian while (np->n_flag & NFLUSHINPROG) { 120783651Speter np->n_flag |= NFLUSHWANT; 120846349Salc error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 120946349Salc slptimeo); 121046349Salc if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 12111541Srgrimes return (EINTR); 121283651Speter } 121334206Sdyson 121446349Salc /* 1215126853Sphk * Now, flush as required. 12163305Sphk */ 121754605Sdillon np->n_flag |= NFLUSHINPROG; 121883651Speter error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 12199336Sdfr while (error) { 1220158739Smohans if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 122146349Salc np->n_flag &= ~NFLUSHINPROG; 12221541Srgrimes if (np->n_flag & NFLUSHWANT) { 122346349Salc np->n_flag &= ~NFLUSHWANT; 12241541Srgrimes wakeup((caddr_t)&np->n_flag); 122554605Sdillon } 122654605Sdillon return (EINTR); 12271541Srgrimes } 12281541Srgrimes error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 12291541Srgrimes } 12301541Srgrimes np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 123154480Sdillon if (np->n_flag & NFLUSHWANT) { 12321541Srgrimes np->n_flag &= ~NFLUSHWANT; 12331541Srgrimes wakeup((caddr_t)&np->n_flag); 12341541Srgrimes } 12351541Srgrimes return (0); 123654480Sdillon} 123754480Sdillon 123854480Sdillon/* 123954480Sdillon * Initiate asynchronous I/O. Return an error if no nfsiods are available. 124054480Sdillon * This is mainly to avoid queueing async I/O requests when the nfsiods 124154480Sdillon * are all hung on a dead server. 124254480Sdillon * 12431541Srgrimes * Note: nfs_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp 124412911Sphk * is eventually dequeued by the async daemon, nfs_doio() *will*. 124583651Speter */ 12461541Srgrimesint 124783651Speternfs_asyncio(bp, cred, procp) 124832755Sdyson register struct buf *bp; 124932755Sdyson struct ucred *cred; 12501541Srgrimes struct proc *procp; 125132755Sdyson{ 125232755Sdyson struct nfsmount *nmp; 125332755Sdyson int i; 12541541Srgrimes int gotiod; 1255138496Sps int slpflag = 0; 1256138496Sps int slptimeo = 0; 1257138496Sps int error; 1258111856Sjeff 1259138496Sps /* 126099797Sdillon * If no async daemons then return EIO to force caller to run the rpc 126199797Sdillon * synchronously. 126299797Sdillon */ 1263111856Sjeff if (nfs_numasync == 0) 12641541Srgrimes return (EIO); 126546349Salc 1266111856Sjeff nmp = VFSTONFS(bp->b_vp->v_mount); 126746349Salc 12685455Sdg /* 126941791Sdt * Commits are usually short and sweet so lets save some cpu and 127032755Sdyson * leave the async daemons for more important rpc's (such as reads 127146349Salc * and writes). 127232755Sdyson */ 127341791Sdt if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) && 127432755Sdyson (nmp->nm_bufqiods > nfs_numasync / 2)) { 12751541Srgrimes return(EIO); 12761541Srgrimes } 12771541Srgrimes 12781541Srgrimesagain: 12791541Srgrimes if (nmp->nm_flag & NFSMNT_INT) 12801541Srgrimes slpflag = PCATCH; 12811541Srgrimes gotiod = FALSE; 12821549Srgrimes 1283140731Sphk /* 12841541Srgrimes * Find a free iod to process this request. 128583651Speter */ 12861541Srgrimes for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 12871541Srgrimes if (nfs_iodwant[i]) { 1288138469Sps /* 12891541Srgrimes * Found one, so wake it up and tell it which 1290115041Srwatson * mount to process. 1291115041Srwatson */ 1292120730Sjeff NFS_DPF(ASYNCIO, 1293120730Sjeff ("nfs_asyncio: waking iod %d for mount %p\n", 1294120730Sjeff i, nmp)); 1295120730Sjeff nfs_iodwant[i] = (struct proc *)0; 1296120730Sjeff nfs_iodmount[i] = nmp; 1297143510Sjeff nmp->nm_bufqiods++; 129832755Sdyson wakeup((caddr_t)&nfs_iodwant[i]); 129932755Sdyson gotiod = TRUE; 13001541Srgrimes break; 13011541Srgrimes } 13021541Srgrimes 13031541Srgrimes /* 13041541Srgrimes * If none are free, we may already have an iod working on this mount 13051541Srgrimes * point. If so, it will process our request. 13061541Srgrimes */ 13071541Srgrimes if (!gotiod) { 13081541Srgrimes if (nmp->nm_bufqiods > 0) { 13091541Srgrimes NFS_DPF(ASYNCIO, 1310176134Sattilio ("nfs_asyncio: %d iods are already processing mount %p\n", 13111541Srgrimes nmp->nm_bufqiods, nmp)); 13121541Srgrimes gotiod = TRUE; 13131541Srgrimes } 1314158906Sups } 1315158915Sups 1316158906Sups /* 1317158915Sups * If we have an iod which can process the request, then queue 1318158906Sups * the buffer. 1319158906Sups */ 1320158906Sups if (gotiod) { 1321158906Sups /* 1322158906Sups * Ensure that the queue never grows too large. We still want 1323158906Sups * to asynchronize so we block rather then return EIO. 1324158906Sups */ 1325158906Sups while (nmp->nm_bufqlen >= 2*nfs_numasync) { 1326158906Sups NFS_DPF(ASYNCIO, 1327183754Sattilio ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp)); 13281541Srgrimes nmp->nm_bufqwant = TRUE; 1329138469Sps error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 1330138469Sps "nfsaio", slptimeo); 1331183754Sattilio if (error) { 13321541Srgrimes if (nfs_sigintr(nmp, NULL, procp)) 1333158739Smohans return (EINTR); 1334157557Smohans if (slpflag == PCATCH) { 1335157557Smohans slpflag = 0; 1336158739Smohans slptimeo = 2 * hz; 1337138469Sps } 1338176134Sattilio } 1339138469Sps /* 13401541Srgrimes * We might have lost our iod while sleeping, 13411541Srgrimes * so check and loop if nescessary. 13421541Srgrimes */ 13431541Srgrimes if (nmp->nm_bufqiods == 0) { 13441541Srgrimes NFS_DPF(ASYNCIO, 13451541Srgrimes ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp)); 134646349Salc goto again; 134758934Sphk } 134846349Salc } 13491541Srgrimes 13501549Srgrimes if (bp->b_iocmd == BIO_READ) { 1351134898Sphk if (bp->b_rcred == NOCRED && cred != NOCRED) { 13521541Srgrimes crhold(cred); 135389324Speter bp->b_rcred = cred; 135419449Sdfr } 135519449Sdfr } else { 135619449Sdfr bp->b_flags |= B_WRITEINPROG; 1357131691Salfred if (bp->b_wcred == NOCRED && cred != NOCRED) { 13581541Srgrimes crhold(cred); 135955431Sdillon bp->b_wcred = cred; 136083651Speter } 136155431Sdillon } 136255431Sdillon 136355431Sdillon BUF_KERNPROC(bp); 1364158739Smohans TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 136558345Sphk nmp->nm_bufqlen++; 136655431Sdillon return (0); 1367158739Smohans } 136855431Sdillon 136955431Sdillon /* 137019449Sdfr * All the iods are busy on other mounts, so return EIO to 137119449Sdfr * force the caller to process the i/o synchronously. 137219449Sdfr */ 137319449Sdfr NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n")); 137419449Sdfr return (EIO); 137519449Sdfr} 137619449Sdfr 137719449Sdfr/* 137889407Speter * Do an I/O operation to/from a cache block. This may be called 137989324Speter * synchronously or from an nfsiod. 138019449Sdfr */ 138125023Sdfrint 138219449Sdfrnfs_doio(bp, cr, p) 138319449Sdfr struct buf *bp; 138419449Sdfr struct ucred *cr; 138589324Speter struct proc *p; 138689324Speter{ 138789324Speter struct uio *uiop; 138889324Speter struct vnode *vp; 138989324Speter struct nfsnode *np; 139089324Speter struct nfsmount *nmp; 139189324Speter int error = 0, iomode, must_commit = 0; 139289324Speter struct uio uio; 139389407Speter struct iovec io; 139489407Speter 139589407Speter vp = bp->b_vp; 139689407Speter np = VTONFS(vp); 139789407Speter nmp = VFSTONFS(vp->v_mount); 139889407Speter uiop = &uio; 139989407Speter uiop->uio_iov = &io; 140099797Sdillon uiop->uio_iovcnt = 1; 140189407Speter uiop->uio_segflg = UIO_SYSSPACE; 140289407Speter uiop->uio_procp = p; 1403111748Sdes 140489407Speter /* 140589407Speter * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We 140689324Speter * do this here so we do not have to do it in all the code that 140719449Sdfr * calls us. 140819449Sdfr */ 140919449Sdfr bp->b_flags &= ~B_INVAL; 141019449Sdfr bp->b_ioflags &= ~BIO_ERROR; 141119449Sdfr 141219449Sdfr KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp)); 141319449Sdfr 141419449Sdfr /* 141519449Sdfr * Historically, paging was done with physio, but no more. 141619449Sdfr */ 141719449Sdfr if (bp->b_flags & B_PHYS) { 141819449Sdfr /* 141919449Sdfr * ...though reading /dev/drum still gets us here. 142019449Sdfr */ 142119449Sdfr io.iov_len = uiop->uio_resid = bp->b_bcount; 142219449Sdfr /* mapping was done by vmapbuf() */ 142319449Sdfr io.iov_base = bp->b_data; 142419449Sdfr uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 142555431Sdillon if (bp->b_iocmd == BIO_READ) { 142655431Sdillon uiop->uio_rw = UIO_READ; 142719449Sdfr nfsstats.read_physios++; 142819449Sdfr error = nfs_readrpc(vp, uiop, cr); 142919449Sdfr } else { 143019449Sdfr int com; 143119449Sdfr 1432158739Smohans iomode = NFSV3WRITE_DATASYNC; 1433158739Smohans uiop->uio_rw = UIO_WRITE; 1434138496Sps nfsstats.write_physios++; 143519449Sdfr error = nfs_writerpc(vp, uiop, cr, &iomode, &com); 1436131691Salfred } 1437158739Smohans if (error) { 1438158739Smohans bp->b_ioflags |= BIO_ERROR; 1439131691Salfred bp->b_error = error; 1440158739Smohans } 144119449Sdfr } else if (bp->b_iocmd == BIO_READ) { 144219449Sdfr io.iov_len = uiop->uio_resid = bp->b_bcount; 144319449Sdfr io.iov_base = bp->b_data; 144419449Sdfr uiop->uio_rw = UIO_READ; 144519449Sdfr switch (vp->v_type) { 144619449Sdfr case VREG: 144719449Sdfr uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 144819449Sdfr nfsstats.read_bios++; 144919449Sdfr error = nfs_readrpc(vp, uiop, cr); 145019449Sdfr if (!error) { 145119449Sdfr if (uiop->uio_resid) { 145219449Sdfr /* 145319449Sdfr * If we had a short read with no error, we must have 145419449Sdfr * hit a file hole. We should zero-fill the remainder. 145519449Sdfr * This can also occur if the server hits the file EOF. 145619449Sdfr * 1457172324Smohans * Holes used to be able to occur due to pending 1458172324Smohans * writes, but that is not possible any longer. 1459172324Smohans */ 1460172324Smohans int nread = bp->b_bcount - uiop->uio_resid; 1461172324Smohans int left = bp->b_bcount - nread; 1462172324Smohans 1463172324Smohans if (left > 0) 146458345Sphk bzero((char *)bp->b_data + nread, left); 146584827Sjhb uiop->uio_resid = 0; 146684827Sjhb } 14671541Srgrimes } 146884827Sjhb if (p && (vp->v_flag & VTEXT) && 146984827Sjhb (((nmp->nm_flag & NFSMNT_NQNFS) && 14701541Srgrimes NQNFS_CKINVALID(vp, np, ND_READ) && 14718876Srgrimes np->n_lrev != np->n_brev) || 1472137846Sjeff (!(nmp->nm_flag & NFSMNT_NQNFS) && 1473137846Sjeff np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 147448225Smckusick uprintf("Process killed due to text file modification\n"); 147519449Sdfr PROC_LOCK(p); 147619449Sdfr psignal(p, SIGKILL); 1477158739Smohans _PHOLD(p); 1478158739Smohans PROC_UNLOCK(p); 1479169043Sjhb } 1480157557Smohans break; 1481158739Smohans case VLNK: 1482158739Smohans uiop->uio_offset = (off_t)0; 1483158739Smohans nfsstats.readlink_bios++; 14841541Srgrimes error = nfs_readlinkrpc(vp, uiop, cr); 148519449Sdfr break; 14869336Sdfr case VDIR: 1487158739Smohans nfsstats.readdir_bios++; 1488158739Smohans uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; 14899336Sdfr if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 149019449Sdfr error = nfs_readdirplusrpc(vp, uiop, cr); 149119449Sdfr if (error == NFSERR_NOTSUPP) 14929336Sdfr nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 149319449Sdfr } 149419449Sdfr if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 14951541Srgrimes error = nfs_readdirrpc(vp, uiop, cr); 14961541Srgrimes /* 1497138899Sps * end-of-directory sets B_INVAL but does not generate an 1498138899Sps * error. 1499138899Sps */ 1500138899Sps if (error == 0 && uiop->uio_resid == bp->b_bcount) 1501138899Sps bp->b_flags |= B_INVAL; 1502138899Sps break; 1503138899Sps default: 1504138899Sps printf("nfs_doio: type %x unexpected\n",vp->v_type); 1505138899Sps break; 1506138899Sps }; 1507138899Sps if (error) { 1508138899Sps bp->b_ioflags |= BIO_ERROR; 1509138899Sps bp->b_error = error; 1510138899Sps } 1511138899Sps } else { 1512157557Smohans /* 1513157557Smohans * If we only need to commit, try to commit 1514158739Smohans */ 1515157557Smohans if (bp->b_flags & B_NEEDCOMMIT) { 1516169043Sjhb int retv; 1517169043Sjhb off_t off; 1518169043Sjhb 1519169043Sjhb off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; 1520169043Sjhb bp->b_flags |= B_WRITEINPROG; 1521169043Sjhb retv = nfs_commit( 1522157557Smohans bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, 1523158739Smohans bp->b_wcred, p); 1524157557Smohans bp->b_flags &= ~B_WRITEINPROG; 1525138899Sps if (retv == 0) { 1526138899Sps bp->b_dirtyoff = bp->b_dirtyend = 0; 1527138899Sps bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 1528138899Sps bp->b_resid = 0; 15291541Srgrimes bufdone(bp); 15301541Srgrimes return (0); 15311541Srgrimes } 15321541Srgrimes if (retv == NFSERR_STALEWRITEVERF) { 15331541Srgrimes nfs_clearcommit(bp->b_vp->v_mount); 1534134898Sphk } 15351541Srgrimes } 153644679Sjulian 15371541Srgrimes /* 15381541Srgrimes * Setup for actual write 153946349Salc */ 15401541Srgrimes 15411541Srgrimes if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) 154283651Speter bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; 1543158739Smohans 1544158739Smohans if (bp->b_dirtyend > bp->b_dirtyoff) { 15451541Srgrimes io.iov_len = uiop->uio_resid = bp->b_dirtyend 15461541Srgrimes - bp->b_dirtyoff; 15471541Srgrimes uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE 15481541Srgrimes + bp->b_dirtyoff; 15491541Srgrimes io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 15501541Srgrimes uiop->uio_rw = UIO_WRITE; 155183366Sjulian nfsstats.write_bios++; 15521541Srgrimes 155346349Salc if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) 155458934Sphk iomode = NFSV3WRITE_UNSTABLE; 155546349Salc else 155646349Salc iomode = NFSV3WRITE_FILESYNC; 155746349Salc 155858934Sphk bp->b_flags |= B_WRITEINPROG; 155958934Sphk error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 156046349Salc 156144679Sjulian /* 1562158739Smohans * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try 1563158739Smohans * to cluster the buffers needing commit. This will allow 15643664Sphk * the system to submit a single commit rpc for the whole 15653664Sphk * cluster. We can do this even if the buffer is not 100% 15661541Srgrimes * dirty (relative to the NFS blocksize), so we optimize the 156787834Sdillon * append-to-file-case. 15681541Srgrimes * 15691541Srgrimes * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be 15709336Sdfr * cleared because write clustering only works for commit 15711541Srgrimes * rpc's, not for the data portion of the write). 1572122953Salfred */ 157387834Sdillon 15741541Srgrimes if (!error && iomode == NFSV3WRITE_UNSTABLE) { 15751541Srgrimes bp->b_flags |= B_NEEDCOMMIT; 15761541Srgrimes if (bp->b_dirtyoff == 0 157746349Salc && bp->b_dirtyend == bp->b_bcount) 157846349Salc bp->b_flags |= B_CLUSTEROK; 157946349Salc } else { 158046349Salc bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 158183651Speter } 158246349Salc bp->b_flags &= ~B_WRITEINPROG; 15831541Srgrimes 158446349Salc /* 158587834Sdillon * For an interrupted write, the buffer is still valid 158646349Salc * and the write hasn't been pushed to the server yet, 158746349Salc * so we can't set BIO_ERROR and report the interruption 158846349Salc * by setting B_EINTR. For the B_ASYNC case, B_EINTR 158946349Salc * is not relevant, so the rpc attempt is essentially 159046349Salc * a noop. For the case of a V3 write rpc not being 15911541Srgrimes * committed to stable storage, the block is still 1592115041Srwatson * dirty and requires either a commit rpc or another 1593158739Smohans * write rpc with iomode == NFSV3WRITE_FILESYNC before 1594158739Smohans * the block is reused. This is indicated by setting 1595158739Smohans * the B_DELWRI and B_NEEDCOMMIT flags. 1596158739Smohans * 1597158739Smohans * If the buffer is marked B_PAGING, it does not reside on 1598158739Smohans * the vp's paging queues so we cannot call bdirty(). The 1599158739Smohans * bp in this case is not an NFS cache block so we should 1600158739Smohans * be safe. XXX 1601158739Smohans */ 16021541Srgrimes if (error == EINTR 16031541Srgrimes || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 16041541Srgrimes int s; 16059336Sdfr 16061541Srgrimes s = splbio(); 1607122953Salfred bp->b_flags &= ~(B_INVAL|B_NOCACHE); 16081541Srgrimes if ((bp->b_flags & B_PAGING) == 0) { 16091541Srgrimes bdirty(bp); 16101541Srgrimes bp->b_flags &= ~B_DONE; 16119336Sdfr } 1612192578Srwatson if (error && (bp->b_flags & B_ASYNC) == 0) 1613192578Srwatson bp->b_flags |= B_EINTR; 1614192578Srwatson splx(s); 1615192578Srwatson } else { 16169336Sdfr if (error) { 1617192578Srwatson bp->b_ioflags |= BIO_ERROR; 1618192578Srwatson bp->b_error = np->n_error = error; 161946349Salc np->n_flag |= NWRITEERR; 162046349Salc } 162146349Salc bp->b_dirtyoff = bp->b_dirtyend = 0; 162246349Salc } 162339782Smckusick } else { 162439782Smckusick bp->b_resid = 0; 16251541Srgrimes bufdone(bp); 16263305Sphk return (0); 1627158739Smohans } 16283305Sphk } 16291541Srgrimes bp->b_resid = uiop->uio_resid; 16301541Srgrimes if (must_commit) 163158934Sphk nfs_clearcommit(vp->v_mount); 16321541Srgrimes bufdone(bp); 16331541Srgrimes return (error); 16341541Srgrimes} 163583651Speter