nfs_bio.c revision 7871
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 37 * $Id: nfs_bio.c,v 1.11 1995/03/04 03:24:34 davidg Exp $ 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/resourcevar.h> 43#include <sys/signalvar.h> 44#include <sys/proc.h> 45#include <sys/buf.h> 46#include <sys/vnode.h> 47#include <sys/mount.h> 48#include <sys/kernel.h> 49 50#include <vm/vm.h> 51 52#include <nfs/nfsnode.h> 53#include <nfs/rpcv2.h> 54#include <nfs/nfsv2.h> 55#include <nfs/nfs.h> 56#include <nfs/nfsmount.h> 57#include <nfs/nqnfs.h> 58 59struct buf *nfs_getcacheblk(); 60extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 61extern int nfs_numasync; 62 63/* 64 * Vnode op for read using bio 65 * Any similarity to readip() is purely coincidental 66 */ 67int 68nfs_bioread(vp, uio, ioflag, cred) 69 register struct vnode *vp; 70 register struct uio *uio; 71 int ioflag; 72 struct ucred *cred; 73{ 74 register struct nfsnode *np = VTONFS(vp); 75 register int biosize, diff; 76 struct buf *bp = 0, *rabp; 77 struct vattr vattr; 78 struct proc *p; 79 struct nfsmount *nmp; 80 daddr_t lbn, rabn; 81 int nra, error = 0, n = 0, on = 0, not_readin; 82 83#ifdef lint 84 ioflag = ioflag; 85#endif /* lint */ 86#ifdef DIAGNOSTIC 87 if (uio->uio_rw != UIO_READ) 88 panic("nfs_read mode"); 89#endif 90 if (uio->uio_resid == 0) 91 return (0); 92 if (uio->uio_offset < 0 && vp->v_type != VDIR) 93 return (EINVAL); 94 nmp = VFSTONFS(vp->v_mount); 95 biosize = NFS_MAXDGRAMDATA; 96 p = uio->uio_procp; 97 /* 98 * For nfs, cache consistency can only be maintained approximately. 99 * Although RFC1094 does not specify the criteria, the following is 100 * believed to be compatible with the reference port. 101 * For nqnfs, full cache consistency is maintained within the loop. 102 * For nfs: 103 * If the file's modify time on the server has changed since the 104 * last read rpc or you have written to the file, 105 * you may have lost data cache consistency with the 106 * server, so flush all of the file's data out of the cache. 107 * Then force a getattr rpc to ensure that you have up to date 108 * attributes. 109 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 110 * the ones changing the modify time. 111 * NB: This implies that cache data can be read when up to 112 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 113 * attributes this could be forced by setting n_attrstamp to 0 before 114 * the VOP_GETATTR() call. 115 */ 116 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 117 if (np->n_flag & NMODIFIED) { 118 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 119 vp->v_type != VREG) { 120 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 121 if (error) 122 return (error); 123 } 124 np->n_attrstamp = 0; 125 np->n_direofoffset = 0; 126 error = VOP_GETATTR(vp, &vattr, cred, p); 127 if (error) 128 return (error); 129 np->n_mtime = vattr.va_mtime.ts_sec; 130 } else { 131 error = VOP_GETATTR(vp, &vattr, cred, p); 132 if (error) 133 return (error); 134 if (np->n_mtime != vattr.va_mtime.ts_sec) { 135 np->n_direofoffset = 0; 136 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 137 if (error) 138 return (error); 139 np->n_mtime = vattr.va_mtime.ts_sec; 140 } 141 } 142 } 143 do { 144 145 /* 146 * Get a valid lease. If cached data is stale, flush it. 147 */ 148 if (nmp->nm_flag & NFSMNT_NQNFS) { 149 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 150 do { 151 error = nqnfs_getlease(vp, NQL_READ, cred, p); 152 } while (error == NQNFS_EXPIRED); 153 if (error) 154 return (error); 155 if (np->n_lrev != np->n_brev || 156 (np->n_flag & NQNFSNONCACHE) || 157 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 158 if (vp->v_type == VDIR) { 159 np->n_direofoffset = 0; 160 cache_purge(vp); 161 } 162 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 163 if (error) 164 return (error); 165 np->n_brev = np->n_lrev; 166 } 167 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 168 np->n_direofoffset = 0; 169 cache_purge(vp); 170 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 171 if (error) 172 return (error); 173 } 174 } 175 if (np->n_flag & NQNFSNONCACHE) { 176 switch (vp->v_type) { 177 case VREG: 178 error = nfs_readrpc(vp, uio, cred); 179 break; 180 case VLNK: 181 error = nfs_readlinkrpc(vp, uio, cred); 182 break; 183 case VDIR: 184 error = nfs_readdirrpc(vp, uio, cred); 185 break; 186 default: 187 printf(" NQNFSNONCACHE: type %x unexpected\n", 188 vp->v_type); 189 break; 190 }; 191 return (error); 192 } 193 switch (vp->v_type) { 194 case VREG: 195 nfsstats.biocache_reads++; 196 lbn = uio->uio_offset / biosize; 197 on = uio->uio_offset & (biosize-1); 198 not_readin = 1; 199 200 /* 201 * Start the read ahead(s), as required. 202 */ 203 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 204 lbn == vp->v_lastr + 1) { 205 for (nra = 0; nra < nmp->nm_readahead && 206 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 207 rabn = lbn + 1 + nra; 208 if (!incore(vp, rabn)) { 209 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 210 if (!rabp) 211 return (EINTR); 212 if ((rabp->b_flags & B_DELWRI) == 0) { 213 rabp->b_flags |= (B_READ | B_ASYNC); 214 vfs_busy_pages(rabp, 0); 215 if (nfs_asyncio(rabp, cred)) { 216 rabp->b_flags |= B_INVAL|B_ERROR; 217 vfs_unbusy_pages(rabp); 218 brelse(rabp); 219 } 220 } else { 221 brelse(rabp); 222 } 223 } 224 } 225 } 226 227 /* 228 * If the block is in the cache and has the required data 229 * in a valid region, just copy it out. 230 * Otherwise, get the block and write back/read in, 231 * as required. 232 */ 233again: 234 bp = nfs_getcacheblk(vp, lbn, biosize, p); 235 if (!bp) 236 return (EINTR); 237 if ((bp->b_flags & B_CACHE) == 0) { 238 bp->b_flags |= B_READ; 239 not_readin = 0; 240 vfs_busy_pages(bp, 0); 241 error = nfs_doio(bp, cred, p); 242 if (error) { 243 brelse(bp); 244 return (error); 245 } 246 } 247 n = min((unsigned)(biosize - on), uio->uio_resid); 248 diff = np->n_size - uio->uio_offset; 249 if (diff < n) 250 n = diff; 251 if (not_readin && n > 0) { 252 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 253 bp->b_flags |= B_NOCACHE; 254 if (bp->b_dirtyend > 0) { 255 if ((bp->b_flags & B_DELWRI) == 0) 256 panic("nfsbioread"); 257 if (VOP_BWRITE(bp) == EINTR) 258 return (EINTR); 259 } else 260 brelse(bp); 261 goto again; 262 } 263 } 264 vp->v_lastr = lbn; 265 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 266 if (diff < n) 267 n = diff; 268 break; 269 case VLNK: 270 nfsstats.biocache_readlinks++; 271 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 272 if (!bp) 273 return (EINTR); 274 if ((bp->b_flags & B_CACHE) == 0) { 275 bp->b_flags |= B_READ; 276 vfs_busy_pages(bp, 0); 277 error = nfs_doio(bp, cred, p); 278 if (error) { 279 bp->b_flags |= B_ERROR; 280 brelse(bp); 281 return (error); 282 } 283 } 284 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 285 on = 0; 286 break; 287 case VDIR: 288 nfsstats.biocache_readdirs++; 289 lbn = (daddr_t)uio->uio_offset; 290 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 291 if (!bp) 292 return (EINTR); 293 294 if ((bp->b_flags & B_CACHE) == 0) { 295 bp->b_flags |= B_READ; 296 vfs_busy_pages(bp, 0); 297 error = nfs_doio(bp, cred, p); 298 if (error) { 299 bp->b_flags |= B_ERROR; 300 brelse(bp); 301 return (error); 302 } 303 } 304 305 /* 306 * If not eof and read aheads are enabled, start one. 307 * (You need the current block first, so that you have the 308 * directory offset cookie of the next block. 309 */ 310 rabn = bp->b_blkno; 311 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 312 rabn != 0 && rabn != np->n_direofoffset && 313 !incore(vp, rabn)) { 314 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 315 if (rabp) { 316 if ((rabp->b_flags & B_CACHE) == 0) { 317 rabp->b_flags |= (B_READ | B_ASYNC); 318 vfs_busy_pages(rabp, 0); 319 if (nfs_asyncio(rabp, cred)) { 320 rabp->b_flags |= B_INVAL|B_ERROR; 321 vfs_unbusy_pages(rabp); 322 brelse(rabp); 323 } 324 } else { 325 brelse(rabp); 326 } 327 } 328 } 329 on = 0; 330 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 331 break; 332 default: 333 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 334 break; 335 }; 336 337 if (n > 0) { 338 error = uiomove(bp->b_data + on, (int)n, uio); 339 } 340 switch (vp->v_type) { 341 case VREG: 342 break; 343 case VLNK: 344 n = 0; 345 break; 346 case VDIR: 347 uio->uio_offset = bp->b_blkno; 348 break; 349 default: 350 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 351 break; 352 } 353 brelse(bp); 354 } while (error == 0 && uio->uio_resid > 0 && n > 0); 355 return (error); 356} 357 358/* 359 * Vnode op for write using bio 360 */ 361int 362nfs_write(ap) 363 struct vop_write_args /* { 364 struct vnode *a_vp; 365 struct uio *a_uio; 366 int a_ioflag; 367 struct ucred *a_cred; 368 } */ *ap; 369{ 370 register int biosize; 371 register struct uio *uio = ap->a_uio; 372 struct proc *p = uio->uio_procp; 373 register struct vnode *vp = ap->a_vp; 374 struct nfsnode *np = VTONFS(vp); 375 register struct ucred *cred = ap->a_cred; 376 int ioflag = ap->a_ioflag; 377 struct buf *bp; 378 struct vattr vattr; 379 struct nfsmount *nmp; 380 daddr_t lbn; 381 int n, on, error = 0; 382 383#ifdef DIAGNOSTIC 384 if (uio->uio_rw != UIO_WRITE) 385 panic("nfs_write mode"); 386 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 387 panic("nfs_write proc"); 388#endif 389 if (vp->v_type != VREG) 390 return (EIO); 391 if (np->n_flag & NWRITEERR) { 392 np->n_flag &= ~NWRITEERR; 393 return (np->n_error); 394 } 395 if (ioflag & (IO_APPEND | IO_SYNC)) { 396 if (np->n_flag & NMODIFIED) { 397 np->n_attrstamp = 0; 398 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 399 if (error) 400 return (error); 401 } 402 if (ioflag & IO_APPEND) { 403 np->n_attrstamp = 0; 404 error = VOP_GETATTR(vp, &vattr, cred, p); 405 if (error) 406 return (error); 407 uio->uio_offset = np->n_size; 408 } 409 } 410 nmp = VFSTONFS(vp->v_mount); 411 if (uio->uio_offset < 0) 412 return (EINVAL); 413 if (uio->uio_resid == 0) 414 return (0); 415 /* 416 * Maybe this should be above the vnode op call, but so long as 417 * file servers have no limits, i don't think it matters 418 */ 419 if (p && uio->uio_offset + uio->uio_resid > 420 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 421 psignal(p, SIGXFSZ); 422 return (EFBIG); 423 } 424 /* 425 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 426 * will be the same size within a filesystem. nfs_writerpc will 427 * still use nm_wsize when sizing the rpc's. 428 */ 429 biosize = NFS_MAXDGRAMDATA; 430 do { 431 432 /* 433 * XXX make sure we aren't cached in the VM page cache 434 */ 435 /* 436 * Check for a valid write lease. 437 * If non-cachable, just do the rpc 438 */ 439 if ((nmp->nm_flag & NFSMNT_NQNFS) && 440 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 441 do { 442 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 443 } while (error == NQNFS_EXPIRED); 444 if (error) 445 return (error); 446 if (np->n_lrev != np->n_brev || 447 (np->n_flag & NQNFSNONCACHE)) { 448 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 449 if (error) 450 return (error); 451 np->n_brev = np->n_lrev; 452 } 453 } 454 if (np->n_flag & NQNFSNONCACHE) 455 return (nfs_writerpc(vp, uio, cred, ioflag)); 456 nfsstats.biocache_writes++; 457 lbn = uio->uio_offset / biosize; 458 on = uio->uio_offset & (biosize-1); 459 n = min((unsigned)(biosize - on), uio->uio_resid); 460again: 461 bp = nfs_getcacheblk(vp, lbn, biosize, p); 462 if (!bp) 463 return (EINTR); 464 if (bp->b_wcred == NOCRED) { 465 crhold(cred); 466 bp->b_wcred = cred; 467 } 468 np->n_flag |= NMODIFIED; 469 if (uio->uio_offset + n > np->n_size) { 470 np->n_size = uio->uio_offset + n; 471 vnode_pager_setsize(vp, (u_long)np->n_size); 472 } 473 474 /* 475 * If the new write will leave a contiguous dirty 476 * area, just update the b_dirtyoff and b_dirtyend, 477 * otherwise force a write rpc of the old dirty area. 478 */ 479 if (bp->b_dirtyend > 0 && 480 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 481 bp->b_proc = p; 482 if (VOP_BWRITE(bp) == EINTR) 483 return (EINTR); 484 goto again; 485 } 486 487 /* 488 * Check for valid write lease and get one as required. 489 * In case getblk() and/or bwrite() delayed us. 490 */ 491 if ((nmp->nm_flag & NFSMNT_NQNFS) && 492 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 493 do { 494 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 495 } while (error == NQNFS_EXPIRED); 496 if (error) { 497 brelse(bp); 498 return (error); 499 } 500 if (np->n_lrev != np->n_brev || 501 (np->n_flag & NQNFSNONCACHE)) { 502 brelse(bp); 503 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 504 if (error) 505 return (error); 506 np->n_brev = np->n_lrev; 507 goto again; 508 } 509 } 510 error = uiomove((char *)bp->b_data + on, n, uio); 511 if (error) { 512 bp->b_flags |= B_ERROR; 513 brelse(bp); 514 return (error); 515 } 516 if (bp->b_dirtyend > 0) { 517 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 518 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 519 } else { 520 bp->b_dirtyoff = on; 521 bp->b_dirtyend = on + n; 522 } 523#ifndef notdef 524 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 525 bp->b_validoff > bp->b_dirtyend) { 526 bp->b_validoff = bp->b_dirtyoff; 527 bp->b_validend = bp->b_dirtyend; 528 } else { 529 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 530 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 531 } 532#else 533 bp->b_validoff = bp->b_dirtyoff; 534 bp->b_validend = bp->b_dirtyend; 535#endif 536 if (ioflag & IO_APPEND) 537 bp->b_flags |= B_APPENDWRITE; 538 539 /* 540 * If the lease is non-cachable or IO_SYNC do bwrite(). 541 */ 542 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 543 bp->b_proc = p; 544 error = VOP_BWRITE(bp); 545 if (error) 546 return (error); 547 } else if ((n + on) == biosize && 548 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 549 bp->b_proc = (struct proc *)0; 550 bawrite(bp); 551 } else 552 bdwrite(bp); 553 } while (uio->uio_resid > 0 && n > 0); 554 return (0); 555} 556 557/* 558 * Get an nfs cache block. 559 * Allocate a new one if the block isn't currently in the cache 560 * and return the block marked busy. If the calling process is 561 * interrupted by a signal for an interruptible mount point, return 562 * NULL. 563 */ 564struct buf * 565nfs_getcacheblk(vp, bn, size, p) 566 struct vnode *vp; 567 daddr_t bn; 568 int size; 569 struct proc *p; 570{ 571 register struct buf *bp; 572 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 573 574 if (nmp->nm_flag & NFSMNT_INT) { 575 bp = getblk(vp, bn, size, PCATCH, 0); 576 while (bp == (struct buf *)0) { 577 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 578 return ((struct buf *)0); 579 bp = getblk(vp, bn, size, 0, 2 * hz); 580 } 581 } else 582 bp = getblk(vp, bn, size, 0, 0); 583 584 if( vp->v_type == VREG) 585 bp->b_blkno = (bn * NFS_MAXDGRAMDATA) / DEV_BSIZE; 586 587 return (bp); 588} 589 590/* 591 * Flush and invalidate all dirty buffers. If another process is already 592 * doing the flush, just wait for completion. 593 */ 594int 595nfs_vinvalbuf(vp, flags, cred, p, intrflg) 596 struct vnode *vp; 597 int flags; 598 struct ucred *cred; 599 struct proc *p; 600 int intrflg; 601{ 602 register struct nfsnode *np = VTONFS(vp); 603 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 604 int error = 0, slpflag, slptimeo; 605 606 if ((nmp->nm_flag & NFSMNT_INT) == 0) 607 intrflg = 0; 608 if (intrflg) { 609 slpflag = PCATCH; 610 slptimeo = 2 * hz; 611 } else { 612 slpflag = 0; 613 slptimeo = 0; 614 } 615 /* 616 * First wait for any other process doing a flush to complete. 617 */ 618 while (np->n_flag & NFLUSHINPROG) { 619 np->n_flag |= NFLUSHWANT; 620 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 621 slptimeo); 622 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 623 return (EINTR); 624 } 625 626 /* 627 * Now, flush as required. 628 */ 629 np->n_flag |= NFLUSHINPROG; 630 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 631 while (error) { 632 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 633 np->n_flag &= ~NFLUSHINPROG; 634 if (np->n_flag & NFLUSHWANT) { 635 np->n_flag &= ~NFLUSHWANT; 636 wakeup((caddr_t)&np->n_flag); 637 } 638 return (EINTR); 639 } 640 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 641 } 642 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 643 if (np->n_flag & NFLUSHWANT) { 644 np->n_flag &= ~NFLUSHWANT; 645 wakeup((caddr_t)&np->n_flag); 646 } 647 return (0); 648} 649 650/* 651 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 652 * This is mainly to avoid queueing async I/O requests when the nfsiods 653 * are all hung on a dead server. 654 */ 655int 656nfs_asyncio(bp, cred) 657 register struct buf *bp; 658 struct ucred *cred; 659{ 660 register int i; 661 662 if (nfs_numasync == 0) 663 return (EIO); 664 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 665 if (nfs_iodwant[i]) { 666 if (bp->b_flags & B_READ) { 667 if (bp->b_rcred == NOCRED && cred != NOCRED) { 668 crhold(cred); 669 bp->b_rcred = cred; 670 } 671 } else { 672 if (bp->b_wcred == NOCRED && cred != NOCRED) { 673 crhold(cred); 674 bp->b_wcred = cred; 675 } 676 } 677 678 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 679 nfs_iodwant[i] = (struct proc *)0; 680 wakeup((caddr_t)&nfs_iodwant[i]); 681 return (0); 682 } 683 return (EIO); 684} 685 686/* 687 * Do an I/O operation to/from a cache block. This may be called 688 * synchronously or from an nfsiod. 689 */ 690int 691nfs_doio(bp, cr, p) 692 register struct buf *bp; 693 struct ucred *cr; 694 struct proc *p; 695{ 696 register struct uio *uiop; 697 register struct vnode *vp; 698 struct nfsnode *np; 699 struct nfsmount *nmp; 700 int error = 0, diff, len; 701 struct uio uio; 702 struct iovec io; 703 704 vp = bp->b_vp; 705 np = VTONFS(vp); 706 nmp = VFSTONFS(vp->v_mount); 707 uiop = &uio; 708 uiop->uio_iov = &io; 709 uiop->uio_iovcnt = 1; 710 uiop->uio_segflg = UIO_SYSSPACE; 711 uiop->uio_procp = p; 712 713 /* 714 * Historically, paging was done with physio, but no more. 715 */ 716 if (bp->b_flags & B_PHYS) { 717 /* 718 * ...though reading /dev/drum still gets us here. 719 */ 720 io.iov_len = uiop->uio_resid = bp->b_bcount; 721 /* mapping was done by vmapbuf() */ 722 io.iov_base = bp->b_data; 723 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 724 if (bp->b_flags & B_READ) { 725 uiop->uio_rw = UIO_READ; 726 nfsstats.read_physios++; 727 error = nfs_readrpc(vp, uiop, cr); 728 } else { 729 uiop->uio_rw = UIO_WRITE; 730 nfsstats.write_physios++; 731 error = nfs_writerpc(vp, uiop, cr,0); 732 } 733 if (error) { 734 bp->b_flags |= B_ERROR; 735 bp->b_error = error; 736 } 737 } else if (bp->b_flags & B_READ) { 738 io.iov_len = uiop->uio_resid = bp->b_bcount; 739 io.iov_base = bp->b_data; 740 uiop->uio_rw = UIO_READ; 741 switch (vp->v_type) { 742 case VREG: 743 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 744 nfsstats.read_bios++; 745 error = nfs_readrpc(vp, uiop, cr); 746 if (!error) { 747 bp->b_validoff = 0; 748 if (uiop->uio_resid) { 749 /* 750 * If len > 0, there is a hole in the file and 751 * no writes after the hole have been pushed to 752 * the server yet. 753 * Just zero fill the rest of the valid area. 754 */ 755 diff = bp->b_bcount - uiop->uio_resid; 756 len = np->n_size - (bp->b_blkno * DEV_BSIZE 757 + diff); 758 if (len > 0) { 759 len = min(len, uiop->uio_resid); 760 bzero((char *)bp->b_data + diff, len); 761 bp->b_validend = diff + len; 762 } else 763 bp->b_validend = diff; 764 } else 765 bp->b_validend = bp->b_bcount; 766 } 767 if (p && (vp->v_flag & VTEXT) && 768 (((nmp->nm_flag & NFSMNT_NQNFS) && 769 NQNFS_CKINVALID(vp, np, NQL_READ) && 770 np->n_lrev != np->n_brev) || 771 (!(nmp->nm_flag & NFSMNT_NQNFS) && 772 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 773 uprintf("Process killed due to text file modification\n"); 774 psignal(p, SIGKILL); 775 p->p_flag |= P_NOSWAP; 776 } 777 break; 778 case VLNK: 779 uiop->uio_offset = 0; 780 nfsstats.readlink_bios++; 781 error = nfs_readlinkrpc(vp, uiop, cr); 782 break; 783 case VDIR: 784 uiop->uio_offset = bp->b_lblkno; 785 nfsstats.readdir_bios++; 786 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 787 error = nfs_readdirlookrpc(vp, uiop, cr); 788 else 789 error = nfs_readdirrpc(vp, uiop, cr); 790 /* 791 * Save offset cookie in b_blkno. 792 */ 793 bp->b_blkno = uiop->uio_offset; 794 break; 795 default: 796 printf("nfs_doio: type %x unexpected\n",vp->v_type); 797 break; 798 }; 799 if (error) { 800 bp->b_flags |= B_ERROR; 801 bp->b_error = error; 802 } 803 } else { 804 io.iov_len = uiop->uio_resid = bp->b_dirtyend 805 - bp->b_dirtyoff; 806 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 807 + bp->b_dirtyoff; 808 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 809 uiop->uio_rw = UIO_WRITE; 810 nfsstats.write_bios++; 811 if (bp->b_flags & B_APPENDWRITE) 812 error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 813 else 814 error = nfs_writerpc(vp, uiop, cr, 0); 815 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 816 817 /* 818 * For an interrupted write, the buffer is still valid and the 819 * write hasn't been pushed to the server yet, so we can't set 820 * B_ERROR and report the interruption by setting B_EINTR. For 821 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 822 * is essentially a noop. 823 */ 824 if (error == EINTR) { 825 bp->b_flags &= ~B_INVAL; 826 bp->b_flags |= B_DELWRI; 827 828 /* 829 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 830 * buffer to the clean list, we have to reassign it back to the 831 * dirty one. Ugh. 832 */ 833 if (bp->b_flags & B_ASYNC) 834 reassignbuf(bp, vp); 835 else 836 bp->b_flags |= B_EINTR; 837 } else { 838 if (error) { 839 bp->b_flags |= B_ERROR; 840 bp->b_error = np->n_error = error; 841 np->n_flag |= NWRITEERR; 842 } 843 bp->b_dirtyoff = bp->b_dirtyend = 0; 844 } 845 } 846 bp->b_resid = uiop->uio_resid; 847 biodone(bp); 848 return (error); 849} 850