vnode_pager.c revision 7162
1/* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * Copyright (c) 1993,1994 John S. Dyson 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40 * $Id: vnode_pager.c,v 1.30 1995/03/16 18:17:34 bde Exp $ 41 */ 42 43/* 44 * Page to/from files (vnodes). 45 * 46 * TODO: 47 * pageouts 48 * fix credential use (uses current process credentials now) 49 */ 50 51/* 52 * MODIFICATIONS: 53 * John S. Dyson 08 Dec 93 54 * 55 * This file in conjunction with some vm_fault mods, eliminate the performance 56 * advantage for using the buffer cache and minimize memory copies. 57 * 58 * 1) Supports multiple - block reads 59 * 2) Bypasses buffer cache for reads 60 * 61 * TODO: 62 * 63 * 1) Totally bypass buffer cache for reads 64 * (Currently will still sometimes use buffer cache for reads) 65 * 2) Bypass buffer cache for writes 66 * (Code does not support it, but mods are simple) 67 */ 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/kernel.h> 72#include <sys/proc.h> 73#include <sys/malloc.h> 74#include <sys/vnode.h> 75#include <sys/uio.h> 76#include <sys/mount.h> 77 78#include <vm/vm.h> 79#include <vm/vm_page.h> 80#include <vm/vnode_pager.h> 81 82#include <sys/buf.h> 83#include <miscfs/specfs/specdev.h> 84 85int vnode_pager_putmulti(); 86 87void vnode_pager_init(); 88void vnode_pager_dealloc(); 89int vnode_pager_getpage(); 90int vnode_pager_getmulti(); 91int vnode_pager_putpage(); 92boolean_t vnode_pager_haspage(); 93 94struct pagerops vnodepagerops = { 95 vnode_pager_init, 96 vnode_pager_alloc, 97 vnode_pager_dealloc, 98 vnode_pager_getpage, 99 vnode_pager_getmulti, 100 vnode_pager_putpage, 101 vnode_pager_putmulti, 102 vnode_pager_haspage 103}; 104 105 106 107static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 108static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 109 110extern vm_map_t pager_map; 111 112struct pagerlst vnode_pager_list; /* list of managed vnodes */ 113 114#define MAXBP (PAGE_SIZE/DEV_BSIZE); 115 116void 117vnode_pager_init() 118{ 119 TAILQ_INIT(&vnode_pager_list); 120} 121 122/* 123 * Allocate (or lookup) pager for a vnode. 124 * Handle is a vnode pointer. 125 */ 126vm_pager_t 127vnode_pager_alloc(handle, size, prot, offset) 128 caddr_t handle; 129 vm_size_t size; 130 vm_prot_t prot; 131 vm_offset_t offset; 132{ 133 register vm_pager_t pager; 134 register vn_pager_t vnp; 135 vm_object_t object, tobject; 136 struct vattr vattr; 137 struct vnode *vp; 138 struct proc *p = curproc; /* XXX */ 139 int rtval; 140 141 /* 142 * Pageout to vnode, no can do yet. 143 */ 144 if (handle == NULL) 145 return (NULL); 146 147 /* 148 * Vnodes keep a pointer to any associated pager so no need to lookup 149 * with vm_pager_lookup. 150 */ 151 vp = (struct vnode *) handle; 152 while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD)) 153 tsleep((caddr_t) object, PVM, "vadead", 0); 154 155 pager = NULL; 156 if (object != NULL) 157 pager = object->pager; 158 if (pager == NULL) { 159 160 /* 161 * Allocate pager structures 162 */ 163 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 164 if (pager == NULL) 165 return (NULL); 166 vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 167 if (vnp == NULL) { 168 free((caddr_t) pager, M_VMPAGER); 169 return (NULL); 170 } 171 /* 172 * And an object of the appropriate size 173 */ 174 if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 175 object = vm_object_allocate(round_page(vattr.va_size)); 176 object->flags = OBJ_CANPERSIST; 177 vm_object_enter(object, pager); 178 object->pager = pager; 179 } else { 180 printf("Error in getattr: %d\n", rtval); 181 free((caddr_t) vnp, M_VMPGDATA); 182 free((caddr_t) pager, M_VMPAGER); 183 return (NULL); 184 } 185 186 /* 187 * Hold a reference to the vnode and initialize pager data. 188 */ 189 VREF(vp); 190 vnp->vnp_flags = 0; 191 vnp->vnp_vp = vp; 192 vnp->vnp_size = vattr.va_size; 193 194 TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 195 pager->pg_handle = handle; 196 pager->pg_type = PG_VNODE; 197 pager->pg_ops = &vnodepagerops; 198 pager->pg_data = (caddr_t) vnp; 199 vp->v_vmdata = (caddr_t) object; 200 } else { 201 202 /* 203 * vm_object_lookup() will remove the object from the cache if 204 * found and also gain a reference to the object. 205 */ 206 (void) vm_object_lookup(pager); 207 } 208 return (pager); 209} 210 211void 212vnode_pager_dealloc(pager) 213 vm_pager_t pager; 214{ 215 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 216 register struct vnode *vp; 217 vm_object_t object; 218 219 vp = vnp->vnp_vp; 220 if (vp) { 221 int s = splbio(); 222 223 object = (vm_object_t) vp->v_vmdata; 224 if (object) { 225 while (object->paging_in_progress) { 226 object->flags |= OBJ_PIPWNT; 227 tsleep(object, PVM, "vnpdea", 0); 228 } 229 } 230 splx(s); 231 232 vp->v_vmdata = NULL; 233 vp->v_flag &= ~(VTEXT | VVMIO); 234 vp->v_flag |= VAGE; 235 vrele(vp); 236 } 237 TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 238 free((caddr_t) vnp, M_VMPGDATA); 239 free((caddr_t) pager, M_VMPAGER); 240} 241 242int 243vnode_pager_getmulti(pager, m, count, reqpage, sync) 244 vm_pager_t pager; 245 vm_page_t *m; 246 int count; 247 int reqpage; 248 boolean_t sync; 249{ 250 251 return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 252} 253 254int 255vnode_pager_getpage(pager, m, sync) 256 vm_pager_t pager; 257 vm_page_t m; 258 boolean_t sync; 259{ 260 261 vm_page_t marray[1]; 262 263 if (pager == NULL) 264 return FALSE; 265 marray[0] = m; 266 267 return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 268} 269 270boolean_t 271vnode_pager_putpage(pager, m, sync) 272 vm_pager_t pager; 273 vm_page_t m; 274 boolean_t sync; 275{ 276 vm_page_t marray[1]; 277 int rtvals[1]; 278 279 if (pager == NULL) 280 return FALSE; 281 marray[0] = m; 282 vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 283 return rtvals[0]; 284} 285 286int 287vnode_pager_putmulti(pager, m, c, sync, rtvals) 288 vm_pager_t pager; 289 vm_page_t *m; 290 int c; 291 boolean_t sync; 292 int *rtvals; 293{ 294 return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 295} 296 297 298boolean_t 299vnode_pager_haspage(pager, offset) 300 vm_pager_t pager; 301 vm_offset_t offset; 302{ 303 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 304 register struct vnode *vp = vnp->vnp_vp; 305 daddr_t bn; 306 int err; 307 daddr_t block; 308 309 /* 310 * If filesystem no longer mounted or offset beyond end of file we do 311 * not have the page. 312 */ 313 if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 314 return FALSE; 315 316 block = offset / vp->v_mount->mnt_stat.f_iosize; 317 if (incore(vp, block)) 318 return TRUE; 319 /* 320 * Read the index to find the disk block to read from. If there is no 321 * block, report that we don't have this data. 322 * 323 * Assumes that the vnode has whole page or nothing. 324 */ 325 err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 326 if (err) 327 return (TRUE); 328 return ((long) bn < 0 ? FALSE : TRUE); 329} 330 331/* 332 * Lets the VM system know about a change in size for a file. 333 * If this vnode is mapped into some address space (i.e. we have a pager 334 * for it) we adjust our own internal size and flush any cached pages in 335 * the associated object that are affected by the size change. 336 * 337 * Note: this routine may be invoked as a result of a pager put 338 * operation (possibly at object termination time), so we must be careful. 339 */ 340void 341vnode_pager_setsize(vp, nsize) 342 struct vnode *vp; 343 u_long nsize; 344{ 345 register vn_pager_t vnp; 346 register vm_object_t object; 347 vm_pager_t pager; 348 349 /* 350 * Not a mapped vnode 351 */ 352 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 353 return; 354 355 /* 356 * Hasn't changed size 357 */ 358 object = (vm_object_t) vp->v_vmdata; 359 if (object == NULL) 360 return; 361 if ((pager = object->pager) == NULL) 362 return; 363 vnp = (vn_pager_t) pager->pg_data; 364 if (nsize == vnp->vnp_size) 365 return; 366 367 /* 368 * No object. This can happen during object termination since 369 * vm_object_page_clean is called after the object has been removed 370 * from the hash table, and clean may cause vnode write operations 371 * which can wind up back here. 372 */ 373 object = vm_object_lookup(pager); 374 if (object == NULL) 375 return; 376 377 /* 378 * File has shrunk. Toss any cached pages beyond the new EOF. 379 */ 380 if (nsize < vnp->vnp_size) { 381 if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 382 vm_object_lock(object); 383 vm_object_page_remove(object, 384 round_page((vm_offset_t) nsize), vnp->vnp_size); 385 vm_object_unlock(object); 386 } 387 /* 388 * this gets rid of garbage at the end of a page that is now 389 * only partially backed by the vnode... 390 */ 391 if (nsize & PAGE_MASK) { 392 vm_offset_t kva; 393 vm_page_t m; 394 395 m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 396 if (m) { 397 kva = vm_pager_map_page(m); 398 bzero((caddr_t) kva + (nsize & PAGE_MASK), 399 round_page(nsize) - nsize); 400 vm_pager_unmap_page(kva); 401 } 402 } 403 } 404 vnp->vnp_size = (vm_offset_t) nsize; 405 object->size = round_page(nsize); 406 407 vm_object_deallocate(object); 408} 409 410void 411vnode_pager_umount(mp) 412 register struct mount *mp; 413{ 414 register vm_pager_t pager, npager; 415 struct vnode *vp; 416 417 for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager) { 418 /* 419 * Save the next pointer now since uncaching may terminate the 420 * object and render pager invalid 421 */ 422 npager = pager->pg_list.tqe_next; 423 vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 424 if (mp == (struct mount *) 0 || vp->v_mount == mp) { 425 VOP_LOCK(vp); 426 (void) vnode_pager_uncache(vp); 427 VOP_UNLOCK(vp); 428 } 429 } 430} 431 432/* 433 * Remove vnode associated object from the object cache. 434 * This routine must be called with the vnode locked. 435 * 436 * XXX unlock the vnode. 437 * We must do this since uncaching the object may result in its 438 * destruction which may initiate paging activity which may necessitate 439 * re-locking the vnode. 440 */ 441boolean_t 442vnode_pager_uncache(vp) 443 register struct vnode *vp; 444{ 445 register vm_object_t object; 446 boolean_t uncached; 447 vm_pager_t pager; 448 449 /* 450 * Not a mapped vnode 451 */ 452 object = (vm_object_t) vp->v_vmdata; 453 if (object == NULL) 454 return (TRUE); 455 456 pager = object->pager; 457 if (pager == NULL) 458 return (TRUE); 459 460#ifdef DEBUG 461 if (!VOP_ISLOCKED(vp)) { 462 extern int (**nfsv2_vnodeop_p)(); 463 464 if (vp->v_op != nfsv2_vnodeop_p) 465 panic("vnode_pager_uncache: vnode not locked!"); 466 } 467#endif 468 /* 469 * Must use vm_object_lookup() as it actually removes the object from 470 * the cache list. 471 */ 472 object = vm_object_lookup(pager); 473 if (object) { 474 uncached = (object->ref_count <= 1); 475 VOP_UNLOCK(vp); 476 pager_cache(object, FALSE); 477 VOP_LOCK(vp); 478 } else 479 uncached = TRUE; 480 return (uncached); 481} 482 483 484void 485vnode_pager_freepage(m) 486 vm_page_t m; 487{ 488 PAGE_WAKEUP(m); 489 vm_page_free(m); 490} 491 492/* 493 * calculate the linear (byte) disk address of specified virtual 494 * file address 495 */ 496vm_offset_t 497vnode_pager_addr(vp, address, run) 498 struct vnode *vp; 499 vm_offset_t address; 500 int *run; 501{ 502 int rtaddress; 503 int bsize; 504 vm_offset_t block; 505 struct vnode *rtvp; 506 int err; 507 int vblock, voffset; 508 509 if ((int) address < 0) 510 return -1; 511 512 bsize = vp->v_mount->mnt_stat.f_iosize; 513 vblock = address / bsize; 514 voffset = address % bsize; 515 516 err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 517 518 if (err || (block == -1)) 519 rtaddress = -1; 520 else { 521 rtaddress = block + voffset / DEV_BSIZE; 522 if( run) { 523 *run += 1; 524 *run *= bsize/PAGE_SIZE; 525 *run -= voffset/PAGE_SIZE; 526 } 527 } 528 529 return rtaddress; 530} 531 532/* 533 * interrupt routine for I/O completion 534 */ 535void 536vnode_pager_iodone(bp) 537 struct buf *bp; 538{ 539 bp->b_flags |= B_DONE; 540 wakeup((caddr_t) bp); 541 if (bp->b_flags & B_ASYNC) { 542 vm_offset_t paddr; 543 vm_page_t m; 544 vm_object_t obj = 0; 545 int i; 546 int npages; 547 548 paddr = (vm_offset_t) bp->b_data; 549 if (bp->b_bufsize != bp->b_bcount) 550 bzero(bp->b_data + bp->b_bcount, 551 bp->b_bufsize - bp->b_bcount); 552 553 npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE; 554 for (i = 0; i < npages; i++) { 555 m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE)); 556 obj = m->object; 557 if (m) { 558 m->dirty = 0; 559 m->valid = VM_PAGE_BITS_ALL; 560 if (m->flags & PG_WANTED) 561 m->flags |= PG_REFERENCED; 562 PAGE_WAKEUP(m); 563 } else { 564 panic("vnode_pager_iodone: page is gone!!!"); 565 } 566 } 567 pmap_qremove(paddr, npages); 568 if (obj) { 569 vm_object_pip_wakeup(obj); 570 } else { 571 panic("vnode_pager_iodone: object is gone???"); 572 } 573 relpbuf(bp); 574 } 575} 576 577/* 578 * small block file system vnode pager input 579 */ 580int 581vnode_pager_input_smlfs(vnp, m) 582 vn_pager_t vnp; 583 vm_page_t m; 584{ 585 int i; 586 int s; 587 struct vnode *dp, *vp; 588 struct buf *bp; 589 vm_offset_t kva; 590 int fileaddr; 591 int block; 592 vm_offset_t bsize; 593 int error = 0; 594 595 vp = vnp->vnp_vp; 596 bsize = vp->v_mount->mnt_stat.f_iosize; 597 598 VOP_BMAP(vp, 0, &dp, 0, 0); 599 600 kva = vm_pager_map_page(m); 601 602 for (i = 0; i < PAGE_SIZE / bsize; i++) { 603 604 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 605 continue; 606 607 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 608 if (fileaddr != -1) { 609 bp = getpbuf(); 610 611 /* build a minimal buffer header */ 612 bp->b_flags = B_BUSY | B_READ | B_CALL; 613 bp->b_iodone = vnode_pager_iodone; 614 bp->b_proc = curproc; 615 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 616 if (bp->b_rcred != NOCRED) 617 crhold(bp->b_rcred); 618 if (bp->b_wcred != NOCRED) 619 crhold(bp->b_wcred); 620 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 621 bp->b_blkno = fileaddr; 622 pbgetvp(dp, bp); 623 bp->b_bcount = bsize; 624 bp->b_bufsize = bsize; 625 626 /* do the input */ 627 VOP_STRATEGY(bp); 628 629 /* we definitely need to be at splbio here */ 630 631 s = splbio(); 632 while ((bp->b_flags & B_DONE) == 0) { 633 tsleep((caddr_t) bp, PVM, "vnsrd", 0); 634 } 635 splx(s); 636 if ((bp->b_flags & B_ERROR) != 0) 637 error = EIO; 638 639 /* 640 * free the buffer header back to the swap buffer pool 641 */ 642 relpbuf(bp); 643 if (error) 644 break; 645 646 vm_page_set_clean(m, i * bsize, bsize); 647 vm_page_set_valid(m, i * bsize, bsize); 648 } else { 649 vm_page_set_clean(m, i * bsize, bsize); 650 bzero((caddr_t) kva + i * bsize, bsize); 651 } 652nextblock: 653 } 654 vm_pager_unmap_page(kva); 655 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 656 if (error) { 657 return VM_PAGER_ERROR; 658 } 659 return VM_PAGER_OK; 660 661} 662 663 664/* 665 * old style vnode pager output routine 666 */ 667int 668vnode_pager_input_old(vnp, m) 669 vn_pager_t vnp; 670 vm_page_t m; 671{ 672 struct uio auio; 673 struct iovec aiov; 674 int error; 675 int size; 676 vm_offset_t kva; 677 678 error = 0; 679 680 /* 681 * Return failure if beyond current EOF 682 */ 683 if (m->offset >= vnp->vnp_size) { 684 return VM_PAGER_BAD; 685 } else { 686 size = PAGE_SIZE; 687 if (m->offset + size > vnp->vnp_size) 688 size = vnp->vnp_size - m->offset; 689 /* 690 * Allocate a kernel virtual address and initialize so that 691 * we can use VOP_READ/WRITE routines. 692 */ 693 kva = vm_pager_map_page(m); 694 aiov.iov_base = (caddr_t) kva; 695 aiov.iov_len = size; 696 auio.uio_iov = &aiov; 697 auio.uio_iovcnt = 1; 698 auio.uio_offset = m->offset; 699 auio.uio_segflg = UIO_SYSSPACE; 700 auio.uio_rw = UIO_READ; 701 auio.uio_resid = size; 702 auio.uio_procp = (struct proc *) 0; 703 704 error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 705 if (!error) { 706 register int count = size - auio.uio_resid; 707 708 if (count == 0) 709 error = EINVAL; 710 else if (count != PAGE_SIZE) 711 bzero((caddr_t) kva + count, PAGE_SIZE - count); 712 } 713 vm_pager_unmap_page(kva); 714 } 715 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 716 m->dirty = 0; 717 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 718} 719 720/* 721 * generic vnode pager input routine 722 */ 723int 724vnode_pager_input(vnp, m, count, reqpage) 725 register vn_pager_t vnp; 726 vm_page_t *m; 727 int count, reqpage; 728{ 729 int i; 730 vm_offset_t kva, foff; 731 int size, sizea; 732 vm_object_t object; 733 struct vnode *dp, *vp; 734 int bsize; 735 736 int first, last; 737 int firstaddr; 738 int block, offset; 739 int runpg; 740 int runend; 741 742 struct buf *bp, *bpa; 743 int counta; 744 int s; 745 int failflag; 746 747 int error = 0; 748 749 object = m[reqpage]->object; /* all vm_page_t items are in same 750 * object */ 751 752 vp = vnp->vnp_vp; 753 bsize = vp->v_mount->mnt_stat.f_iosize; 754 755 /* get the UNDERLYING device for the file with VOP_BMAP() */ 756 757 /* 758 * originally, we did not check for an error return value -- assuming 759 * an fs always has a bmap entry point -- that assumption is wrong!!! 760 */ 761 foff = m[reqpage]->offset; 762 763 /* 764 * if we can't bmap, use old VOP code 765 */ 766 if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 767 for (i = 0; i < count; i++) { 768 if (i != reqpage) { 769 vnode_pager_freepage(m[i]); 770 } 771 } 772 cnt.v_vnodein++; 773 cnt.v_vnodepgsin++; 774 return vnode_pager_input_old(vnp, m[reqpage]); 775 776 /* 777 * if the blocksize is smaller than a page size, then use 778 * special small filesystem code. NFS sometimes has a small 779 * blocksize, but it can handle large reads itself. 780 */ 781 } else if ((PAGE_SIZE / bsize) > 1 && 782 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 783 784 for (i = 0; i < count; i++) { 785 if (i != reqpage) { 786 vnode_pager_freepage(m[i]); 787 } 788 } 789 cnt.v_vnodein++; 790 cnt.v_vnodepgsin++; 791 return vnode_pager_input_smlfs(vnp, m[reqpage]); 792 } 793 /* 794 * if ANY DEV_BSIZE blocks are valid on a large filesystem block 795 * then, the entire page is valid -- 796 */ 797 if (m[reqpage]->valid) { 798 m[reqpage]->valid = VM_PAGE_BITS_ALL; 799 for (i = 0; i < count; i++) { 800 if (i != reqpage) 801 vnode_pager_freepage(m[i]); 802 } 803 return VM_PAGER_OK; 804 } 805 /* 806 * here on direct device I/O 807 */ 808 809 810 firstaddr = -1; 811 /* 812 * calculate the run that includes the required page 813 */ 814 for(first = 0, i = 0; i < count; i = runend) { 815 firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 816 if (firstaddr == -1) { 817 if( i == reqpage && foff < vnp->vnp_size) { 818 printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n", 819 firstaddr, foff, vnp->vnp_size); 820 panic("vnode_pager_input:..."); 821 } 822 vnode_pager_freepage(m[i]); 823 runend = i + 1; 824 first = runend; 825 continue; 826 } 827 runend = i + runpg; 828 if( runend <= reqpage) { 829 int j; 830 for(j = i; j < runend; j++) { 831 vnode_pager_freepage(m[j]); 832 } 833 } else { 834 if( runpg < (count - first)) { 835 for(i=first + runpg; i < count; i++) 836 vnode_pager_freepage(m[i]); 837 count = first + runpg; 838 } 839 break; 840 } 841 first = runend; 842 } 843 844 /* 845 * the first and last page have been calculated now, move input pages 846 * to be zero based... 847 */ 848 if (first != 0) { 849 for (i = first; i < count; i++) { 850 m[i - first] = m[i]; 851 } 852 count -= first; 853 reqpage -= first; 854 } 855 856 /* 857 * calculate the file virtual address for the transfer 858 */ 859 foff = m[0]->offset; 860#if 0 861 printf("foff: 0x%lx, firstaddr: 0x%lx\n", 862 foff, firstaddr); 863 DELAY(6000000); 864#endif 865 866 /* 867 * calculate the size of the transfer 868 */ 869 size = count * PAGE_SIZE; 870 if ((foff + size) > vnp->vnp_size) 871 size = vnp->vnp_size - foff; 872 873 /* 874 * round up physical size for real devices 875 */ 876 if (dp->v_type == VBLK || dp->v_type == VCHR) 877 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 878 879 counta = 0; 880 if (count * PAGE_SIZE > bsize) 881 counta = (count - reqpage) - 1; 882 bpa = 0; 883 sizea = 0; 884 bp = getpbuf(); 885 if (counta) { 886 bpa = (struct buf *) trypbuf(); 887 if (bpa) { 888 count -= counta; 889 sizea = size - count * PAGE_SIZE; 890 size = count * PAGE_SIZE; 891 } 892 } 893 kva = (vm_offset_t) bp->b_data; 894 895 /* 896 * and map the pages to be read into the kva 897 */ 898 pmap_qenter(kva, m, count); 899 900 /* build a minimal buffer header */ 901 bp->b_flags = B_BUSY | B_READ | B_CALL; 902 bp->b_iodone = vnode_pager_iodone; 903 /* B_PHYS is not set, but it is nice to fill this in */ 904 bp->b_proc = curproc; 905 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 906 if (bp->b_rcred != NOCRED) 907 crhold(bp->b_rcred); 908 if (bp->b_wcred != NOCRED) 909 crhold(bp->b_wcred); 910 bp->b_blkno = firstaddr; 911 pbgetvp(dp, bp); 912 bp->b_bcount = size; 913 bp->b_bufsize = size; 914 915 cnt.v_vnodein++; 916 cnt.v_vnodepgsin += count; 917 918 /* do the input */ 919 VOP_STRATEGY(bp); 920 921 if (counta) { 922 for (i = 0; i < counta; i++) { 923 vm_page_deactivate(m[count + i]); 924 } 925 pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta); 926 ++m[count]->object->paging_in_progress; 927 bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC; 928 bpa->b_iodone = vnode_pager_iodone; 929 /* B_PHYS is not set, but it is nice to fill this in */ 930 bpa->b_proc = curproc; 931 bpa->b_rcred = bpa->b_wcred = bpa->b_proc->p_ucred; 932 if (bpa->b_rcred != NOCRED) 933 crhold(bpa->b_rcred); 934 if (bpa->b_wcred != NOCRED) 935 crhold(bpa->b_wcred); 936 bpa->b_blkno = firstaddr + count * (PAGE_SIZE / DEV_BSIZE); 937 pbgetvp(dp, bpa); 938 bpa->b_bcount = sizea; 939 bpa->b_bufsize = counta * PAGE_SIZE; 940 941 cnt.v_vnodepgsin += counta; 942 VOP_STRATEGY(bpa); 943 } 944 s = splbio(); 945 /* we definitely need to be at splbio here */ 946 947 while ((bp->b_flags & B_DONE) == 0) { 948 tsleep((caddr_t) bp, PVM, "vnread", 0); 949 } 950 splx(s); 951 if ((bp->b_flags & B_ERROR) != 0) 952 error = EIO; 953 954 if (!error) { 955 if (size != count * PAGE_SIZE) 956 bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 957 } 958 pmap_qremove(kva, count); 959 960 /* 961 * free the buffer header back to the swap buffer pool 962 */ 963 relpbuf(bp); 964 965finishup: 966 for (i = 0; i < count; i++) { 967 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 968 m[i]->dirty = 0; 969 m[i]->valid = VM_PAGE_BITS_ALL; 970 if (i != reqpage) { 971 972 /* 973 * whether or not to leave the page activated is up in 974 * the air, but we should put the page on a page queue 975 * somewhere. (it already is in the object). Result: 976 * It appears that emperical results show that 977 * deactivating pages is best. 978 */ 979 980 /* 981 * just in case someone was asking for this page we 982 * now tell them that it is ok to use 983 */ 984 if (!error) { 985 vm_page_deactivate(m[i]); 986 PAGE_WAKEUP(m[i]); 987 } else { 988 vnode_pager_freepage(m[i]); 989 } 990 } 991 } 992 if (error) { 993 printf("vnode_pager_input: I/O read error\n"); 994 } 995 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 996} 997 998/* 999 * old-style vnode pager output routine 1000 */ 1001int 1002vnode_pager_output_old(vnp, m) 1003 register vn_pager_t vnp; 1004 vm_page_t m; 1005{ 1006 vm_offset_t kva, kva2; 1007 vm_offset_t size; 1008 struct iovec aiov; 1009 struct uio auio; 1010 struct vnode *vp; 1011 int error; 1012 1013 vp = vnp->vnp_vp; 1014 1015 /* 1016 * Dont return failure if beyond current EOF placate the VM system. 1017 */ 1018 if (m->offset >= vnp->vnp_size) { 1019 return VM_PAGER_OK; 1020 } else { 1021 size = PAGE_SIZE; 1022 if (m->offset + size > vnp->vnp_size) 1023 size = vnp->vnp_size - m->offset; 1024 1025 kva2 = kmem_alloc(pager_map, PAGE_SIZE); 1026 /* 1027 * Allocate a kernel virtual address and initialize so that 1028 * we can use VOP_WRITE routines. 1029 */ 1030 kva = vm_pager_map_page(m); 1031 bcopy((caddr_t) kva, (caddr_t) kva2, size); 1032 vm_pager_unmap_page(kva); 1033 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1034 PAGE_WAKEUP(m); 1035 1036 aiov.iov_base = (caddr_t) kva2; 1037 aiov.iov_len = size; 1038 auio.uio_iov = &aiov; 1039 auio.uio_iovcnt = 1; 1040 auio.uio_offset = m->offset; 1041 auio.uio_segflg = UIO_SYSSPACE; 1042 auio.uio_rw = UIO_WRITE; 1043 auio.uio_resid = size; 1044 auio.uio_procp = (struct proc *) 0; 1045 1046 error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 1047 1048 kmem_free_wakeup(pager_map, kva2, PAGE_SIZE); 1049 if (!error) { 1050 if ((size - auio.uio_resid) == 0) { 1051 error = EINVAL; 1052 } 1053 } 1054 return error ? VM_PAGER_ERROR : VM_PAGER_OK; 1055 } 1056} 1057 1058/* 1059 * vnode pager output on a small-block file system 1060 */ 1061int 1062vnode_pager_output_smlfs(vnp, m) 1063 vn_pager_t vnp; 1064 vm_page_t m; 1065{ 1066 int i; 1067 int s; 1068 struct vnode *dp, *vp; 1069 struct buf *bp; 1070 vm_offset_t kva; 1071 int fileaddr; 1072 vm_offset_t bsize; 1073 int error = 0; 1074 1075 vp = vnp->vnp_vp; 1076 bsize = vp->v_mount->mnt_stat.f_iosize; 1077 1078 VOP_BMAP(vp, 0, &dp, 0, 0); 1079 kva = vm_pager_map_page(m); 1080 for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 1081 1082 if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0) 1083 continue; 1084 /* 1085 * calculate logical block and offset 1086 */ 1087 fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 1088 if (fileaddr != -1) { 1089 1090 bp = getpbuf(); 1091 1092 /* build a minimal buffer header */ 1093 bp->b_flags = B_BUSY | B_CALL | B_WRITE; 1094 bp->b_iodone = vnode_pager_iodone; 1095 bp->b_proc = curproc; 1096 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1097 if (bp->b_rcred != NOCRED) 1098 crhold(bp->b_rcred); 1099 if (bp->b_wcred != NOCRED) 1100 crhold(bp->b_wcred); 1101 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 1102 bp->b_blkno = fileaddr; 1103 pbgetvp(dp, bp); 1104 ++dp->v_numoutput; 1105 /* for NFS */ 1106 bp->b_dirtyoff = 0; 1107 bp->b_dirtyend = bsize; 1108 bp->b_bcount = bsize; 1109 bp->b_bufsize = bsize; 1110 1111 /* do the input */ 1112 VOP_STRATEGY(bp); 1113 1114 /* we definitely need to be at splbio here */ 1115 1116 s = splbio(); 1117 while ((bp->b_flags & B_DONE) == 0) { 1118 tsleep((caddr_t) bp, PVM, "vnswrt", 0); 1119 } 1120 splx(s); 1121 if ((bp->b_flags & B_ERROR) != 0) 1122 error = EIO; 1123 1124 vm_page_set_clean(m, i * bsize, bsize); 1125 /* 1126 * free the buffer header back to the swap buffer pool 1127 */ 1128 relpbuf(bp); 1129 } 1130 } 1131 vm_pager_unmap_page(kva); 1132 if (error) 1133 return VM_PAGER_ERROR; 1134 else 1135 return VM_PAGER_OK; 1136} 1137 1138/* 1139 * generic vnode pager output routine 1140 */ 1141int 1142vnode_pager_output(vnp, m, count, rtvals) 1143 vn_pager_t vnp; 1144 vm_page_t *m; 1145 int count; 1146 int *rtvals; 1147{ 1148 int i, j; 1149 vm_offset_t kva, foff; 1150 int size; 1151 vm_object_t object; 1152 struct vnode *dp, *vp; 1153 struct buf *bp; 1154 vm_offset_t reqaddr; 1155 int bsize; 1156 int s; 1157 daddr_t block; 1158 struct timeval tv; 1159 int runpg; 1160 1161 int error = 0; 1162 1163retryoutput: 1164 object = m[0]->object; /* all vm_page_t items are in same object */ 1165 1166 vp = vnp->vnp_vp; 1167 1168 /* 1169 * Make sure underlying filesystem is still mounted. 1170 */ 1171 if (vp->v_mount == NULL) 1172 return VM_PAGER_FAIL; 1173 1174 bsize = vp->v_mount->mnt_stat.f_iosize; 1175 1176 for (i = 0; i < count; i++) 1177 rtvals[i] = VM_PAGER_AGAIN; 1178 1179 if ((int) m[0]->offset < 0) { 1180 printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset); 1181 m[0]->dirty = 0; 1182 rtvals[0] = VM_PAGER_OK; 1183 return VM_PAGER_OK; 1184 } 1185 /* 1186 * if the filesystem does not have a bmap, then use the old code 1187 */ 1188 if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) || 1189 (block == -1)) { 1190 1191 rtvals[0] = vnode_pager_output_old(vnp, m[0]); 1192 1193 m[0]->dirty = 0; 1194 cnt.v_vnodeout++; 1195 cnt.v_vnodepgsout++; 1196 return rtvals[0]; 1197 } 1198 tv = time; 1199 VOP_UPDATE(vp, &tv, &tv, 0); 1200 1201 /* 1202 * if the filesystem has a small blocksize, then use the small block 1203 * filesystem output code 1204 */ 1205 if ((bsize < PAGE_SIZE) && 1206 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 1207 1208 for (i = 0; i < count; i++) { 1209 rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 1210 if (rtvals[i] == VM_PAGER_OK) { 1211 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1212 } 1213 } 1214 cnt.v_vnodeout++; 1215 cnt.v_vnodepgsout += count; 1216 return rtvals[0]; 1217 } 1218 for (i = 0; i < count; i++) { 1219 foff = m[i]->offset; 1220 if (foff >= vnp->vnp_size) { 1221 for (j = i; j < count; j++) 1222 rtvals[j] = VM_PAGER_BAD; 1223 count = i; 1224 break; 1225 } 1226 } 1227 if (count == 0) { 1228 return rtvals[0]; 1229 } 1230 foff = m[0]->offset; 1231 reqaddr = vnode_pager_addr(vp, foff, &runpg); 1232 if( runpg < count) 1233 count = runpg; 1234 1235 /* 1236 * calculate the size of the transfer 1237 */ 1238 size = count * PAGE_SIZE; 1239 if ((foff + size) > vnp->vnp_size) 1240 size = vnp->vnp_size - foff; 1241 1242 /* 1243 * round up physical size for real devices 1244 */ 1245 if (dp->v_type == VBLK || dp->v_type == VCHR) 1246 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1247 1248 bp = getpbuf(); 1249 kva = (vm_offset_t) bp->b_data; 1250 /* 1251 * and map the pages to be read into the kva 1252 */ 1253 pmap_qenter(kva, m, count); 1254 1255 /* build a minimal buffer header */ 1256 bp->b_flags = B_BUSY | B_WRITE | B_CALL; 1257 bp->b_iodone = vnode_pager_iodone; 1258 /* B_PHYS is not set, but it is nice to fill this in */ 1259 bp->b_proc = curproc; 1260 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1261 1262 if (bp->b_rcred != NOCRED) 1263 crhold(bp->b_rcred); 1264 if (bp->b_wcred != NOCRED) 1265 crhold(bp->b_wcred); 1266 bp->b_blkno = reqaddr; 1267 pbgetvp(dp, bp); 1268 ++dp->v_numoutput; 1269 1270 /* for NFS */ 1271 bp->b_dirtyoff = 0; 1272 bp->b_dirtyend = size; 1273 1274 bp->b_bcount = size; 1275 bp->b_bufsize = size; 1276 1277 cnt.v_vnodeout++; 1278 cnt.v_vnodepgsout += count; 1279 1280 /* do the output */ 1281 VOP_STRATEGY(bp); 1282 1283 s = splbio(); 1284 1285 /* we definitely need to be at splbio here */ 1286 1287 while ((bp->b_flags & B_DONE) == 0) { 1288 tsleep((caddr_t) bp, PVM, "vnwrite", 0); 1289 } 1290 splx(s); 1291 1292 if ((bp->b_flags & B_ERROR) != 0) 1293 error = EIO; 1294 1295 pmap_qremove(kva, count); 1296 1297 /* 1298 * free the buffer header back to the swap buffer pool 1299 */ 1300 relpbuf(bp); 1301 1302 if (!error) { 1303 for (i = 0; i < count; i++) { 1304 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1305 m[i]->dirty = 0; 1306 rtvals[i] = VM_PAGER_OK; 1307 } 1308 } else if (count != 1) { 1309 error = 0; 1310 count = 1; 1311 goto retryoutput; 1312 } 1313 if (error) { 1314 printf("vnode_pager_output: I/O write error\n"); 1315 } 1316 return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 1317} 1318