vnode_pager.c revision 1827
1/* 2 * Copyright (c) 1990 University of Utah. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * Copyright (c) 1993,1994 John S. Dyson 6 * 7 * This code is derived from software contributed to Berkeley by 8 * the Systems Programming Group of the University of Utah Computer 9 * Science Department. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 40 * $Id: vnode_pager.c,v 1.2 1994/05/25 09:21:11 rgrimes Exp $ 41 */ 42 43/* 44 * Page to/from files (vnodes). 45 * 46 * TODO: 47 * pageouts 48 * fix credential use (uses current process credentials now) 49 */ 50 51/* 52 * MODIFICATIONS: 53 * John S. Dyson 08 Dec 93 54 * 55 * This file in conjunction with some vm_fault mods, eliminate the performance 56 * advantage for using the buffer cache and minimize memory copies. 57 * 58 * 1) Supports multiple - block reads 59 * 2) Bypasses buffer cache for reads 60 * 61 * TODO: 62 * 63 * 1) Totally bypass buffer cache for reads 64 * (Currently will still sometimes use buffer cache for reads) 65 * 2) Bypass buffer cache for writes 66 * (Code does not support it, but mods are simple) 67 */ 68 69#include <sys/param.h> 70#include <sys/systm.h> 71#include <sys/proc.h> 72#include <sys/malloc.h> 73#include <sys/vnode.h> 74#include <sys/uio.h> 75#include <sys/mount.h> 76 77#include <vm/vm.h> 78#include <vm/vm_page.h> 79#include <vm/vnode_pager.h> 80 81#include <sys/buf.h> 82#include <miscfs/specfs/specdev.h> 83 84int vnode_pager_putmulti(); 85 86void vnode_pager_init(); 87vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 88void vnode_pager_dealloc(); 89int vnode_pager_getpage(); 90int vnode_pager_getmulti(); 91int vnode_pager_putpage(); 92boolean_t vnode_pager_haspage(); 93 94struct pagerops vnodepagerops = { 95 vnode_pager_init, 96 vnode_pager_alloc, 97 vnode_pager_dealloc, 98 vnode_pager_getpage, 99 vnode_pager_getmulti, 100 vnode_pager_putpage, 101 vnode_pager_putmulti, 102 vnode_pager_haspage 103}; 104 105static int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 106static int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 107struct buf * getpbuf(); 108void relpbuf(struct buf * bp); 109 110extern vm_map_t pager_map; 111 112struct pagerlst vnode_pager_list; /* list of managed vnodes */ 113 114#define MAXBP (PAGE_SIZE/DEV_BSIZE); 115 116void 117vnode_pager_init() 118{ 119 TAILQ_INIT(&vnode_pager_list); 120} 121 122/* 123 * Allocate (or lookup) pager for a vnode. 124 * Handle is a vnode pointer. 125 */ 126vm_pager_t 127vnode_pager_alloc(handle, size, prot, offset) 128 caddr_t handle; 129 vm_size_t size; 130 vm_prot_t prot; 131 vm_offset_t offset; 132{ 133 register vm_pager_t pager; 134 register vn_pager_t vnp; 135 vm_object_t object; 136 struct vattr vattr; 137 struct vnode *vp; 138 struct proc *p = curproc; /* XXX */ 139 140 /* 141 * Pageout to vnode, no can do yet. 142 */ 143 if (handle == NULL) 144 return (NULL); 145 146 /* 147 * Vnodes keep a pointer to any associated pager so no need to lookup 148 * with vm_pager_lookup. 149 */ 150 vp = (struct vnode *) handle; 151 pager = (vm_pager_t) vp->v_vmdata; 152 if (pager == NULL) { 153 154 /* 155 * Allocate pager structures 156 */ 157 pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 158 if (pager == NULL) 159 return (NULL); 160 vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 161 if (vnp == NULL) { 162 free((caddr_t) pager, M_VMPAGER); 163 return (NULL); 164 } 165 166 /* 167 * And an object of the appropriate size 168 */ 169 if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) { 170 object = vm_object_allocate(round_page(vattr.va_size)); 171 vm_object_enter(object, pager); 172 vm_object_setpager(object, pager, 0, TRUE); 173 } else { 174 free((caddr_t) vnp, M_VMPGDATA); 175 free((caddr_t) pager, M_VMPAGER); 176 return (NULL); 177 } 178 179 /* 180 * Hold a reference to the vnode and initialize pager data. 181 */ 182 VREF(vp); 183 vnp->vnp_flags = 0; 184 vnp->vnp_vp = vp; 185 vnp->vnp_size = vattr.va_size; 186 187 TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 188 pager->pg_handle = handle; 189 pager->pg_type = PG_VNODE; 190 pager->pg_ops = &vnodepagerops; 191 pager->pg_data = (caddr_t) vnp; 192 vp->v_vmdata = (caddr_t) pager; 193 } else { 194 195 /* 196 * vm_object_lookup() will remove the object from the cache if 197 * found and also gain a reference to the object. 198 */ 199 object = vm_object_lookup(pager); 200 } 201 return (pager); 202} 203 204void 205vnode_pager_dealloc(pager) 206 vm_pager_t pager; 207{ 208 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 209 register struct vnode *vp; 210 struct proc *p = curproc; /* XXX */ 211 212 if (vp = vnp->vnp_vp) { 213 vp->v_vmdata = NULL; 214 vp->v_flag &= ~VTEXT; 215#if 0 216 /* can hang if done at reboot on NFS FS */ 217 (void) VOP_FSYNC(vp, p->p_ucred, p); 218#endif 219 vrele(vp); 220 } 221 TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 222 free((caddr_t) vnp, M_VMPGDATA); 223 free((caddr_t) pager, M_VMPAGER); 224} 225 226int 227vnode_pager_getmulti(pager, m, count, reqpage, sync) 228 vm_pager_t pager; 229 vm_page_t *m; 230 int count; 231 int reqpage; 232 boolean_t sync; 233{ 234 235 return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 236} 237 238int 239vnode_pager_getpage(pager, m, sync) 240 vm_pager_t pager; 241 vm_page_t m; 242 boolean_t sync; 243{ 244 245 int err; 246 vm_page_t marray[1]; 247 248 if (pager == NULL) 249 return FALSE; 250 marray[0] = m; 251 252 return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 253} 254 255boolean_t 256vnode_pager_putpage(pager, m, sync) 257 vm_pager_t pager; 258 vm_page_t m; 259 boolean_t sync; 260{ 261 int err; 262 vm_page_t marray[1]; 263 int rtvals[1]; 264 265 if (pager == NULL) 266 return FALSE; 267 marray[0] = m; 268 vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 269 return rtvals[0]; 270} 271 272int 273vnode_pager_putmulti(pager, m, c, sync, rtvals) 274 vm_pager_t pager; 275 vm_page_t *m; 276 int c; 277 boolean_t sync; 278 int *rtvals; 279{ 280 return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 281} 282 283 284boolean_t 285vnode_pager_haspage(pager, offset) 286 vm_pager_t pager; 287 vm_offset_t offset; 288{ 289 register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 290 daddr_t bn; 291 int err; 292 293 /* 294 * Offset beyond end of file, do not have the page 295 */ 296 if (offset >= vnp->vnp_size) { 297 return (FALSE); 298 } 299 300 /* 301 * Read the index to find the disk block to read from. If there is no 302 * block, report that we don't have this data. 303 * 304 * Assumes that the vnode has whole page or nothing. 305 */ 306 err = VOP_BMAP(vnp->vnp_vp, 307 offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize, 308 (struct vnode **) 0, &bn, 0); 309 if (err) { 310 return (TRUE); 311 } 312 return ((long) bn < 0 ? FALSE : TRUE); 313} 314 315/* 316 * Lets the VM system know about a change in size for a file. 317 * If this vnode is mapped into some address space (i.e. we have a pager 318 * for it) we adjust our own internal size and flush any cached pages in 319 * the associated object that are affected by the size change. 320 * 321 * Note: this routine may be invoked as a result of a pager put 322 * operation (possibly at object termination time), so we must be careful. 323 */ 324void 325vnode_pager_setsize(vp, nsize) 326 struct vnode *vp; 327 u_long nsize; 328{ 329 register vn_pager_t vnp; 330 register vm_object_t object; 331 vm_pager_t pager; 332 333 /* 334 * Not a mapped vnode 335 */ 336 if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 337 return; 338 339 /* 340 * Hasn't changed size 341 */ 342 pager = (vm_pager_t) vp->v_vmdata; 343 vnp = (vn_pager_t) pager->pg_data; 344 if (nsize == vnp->vnp_size) 345 return; 346 347 /* 348 * No object. This can happen during object termination since 349 * vm_object_page_clean is called after the object has been removed 350 * from the hash table, and clean may cause vnode write operations 351 * which can wind up back here. 352 */ 353 object = vm_object_lookup(pager); 354 if (object == NULL) 355 return; 356 357 /* 358 * File has shrunk. Toss any cached pages beyond the new EOF. 359 */ 360 if (nsize < vnp->vnp_size) { 361 vm_object_lock(object); 362 vm_object_page_remove(object, 363 round_page((vm_offset_t) nsize), vnp->vnp_size); 364 vm_object_unlock(object); 365 366 /* 367 * this gets rid of garbage at the end of a page that is now 368 * only partially backed by the vnode... 369 */ 370 if (nsize & PAGE_MASK) { 371 vm_offset_t kva; 372 vm_page_t m; 373 374 m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 375 if (m) { 376 kva = vm_pager_map_page(m); 377 bzero((caddr_t) kva + (nsize & PAGE_MASK), 378 round_page(nsize) - nsize); 379 vm_pager_unmap_page(kva); 380 } 381 } 382 } else { 383 384 /* 385 * this allows the filesystem and VM cache to stay in sync if 386 * the VM page hasn't been modified... After the page is 387 * removed -- it will be faulted back in from the filesystem 388 * cache. 389 */ 390 if (vnp->vnp_size & PAGE_MASK) { 391 vm_page_t m; 392 393 m = vm_page_lookup(object, trunc_page(vnp->vnp_size)); 394 if (m && (m->flags & PG_CLEAN)) { 395 vm_object_lock(object); 396 vm_object_page_remove(object, 397 vnp->vnp_size, vnp->vnp_size); 398 vm_object_unlock(object); 399 } 400 } 401 } 402 vnp->vnp_size = (vm_offset_t) nsize; 403 object->size = round_page(nsize); 404 405 vm_object_deallocate(object); 406} 407 408void 409vnode_pager_umount(mp) 410 register struct mount *mp; 411{ 412 register vm_pager_t pager, npager; 413 struct vnode *vp; 414 415 pager = vnode_pager_list.tqh_first; 416 while (pager) { 417 418 /* 419 * Save the next pointer now since uncaching may terminate the 420 * object and render pager invalid 421 */ 422 vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 423 npager = pager->pg_list.tqe_next; 424 if (mp == (struct mount *) 0 || vp->v_mount == mp) 425 (void) vnode_pager_uncache(vp); 426 pager = npager; 427 } 428} 429 430/* 431 * Remove vnode associated object from the object cache. 432 * 433 * Note: this routine may be invoked as a result of a pager put 434 * operation (possibly at object termination time), so we must be careful. 435 */ 436boolean_t 437vnode_pager_uncache(vp) 438 register struct vnode *vp; 439{ 440 register vm_object_t object; 441 boolean_t uncached, locked; 442 vm_pager_t pager; 443 444 /* 445 * Not a mapped vnode 446 */ 447 pager = (vm_pager_t) vp->v_vmdata; 448 if (pager == NULL) 449 return (TRUE); 450 451 /* 452 * Unlock the vnode if it is currently locked. We do this since 453 * uncaching the object may result in its destruction which may 454 * initiate paging activity which may necessitate locking the vnode. 455 */ 456 locked = VOP_ISLOCKED(vp); 457 if (locked) 458 VOP_UNLOCK(vp); 459 460 /* 461 * Must use vm_object_lookup() as it actually removes the object from 462 * the cache list. 463 */ 464 object = vm_object_lookup(pager); 465 if (object) { 466 uncached = (object->ref_count <= 1); 467 pager_cache(object, FALSE); 468 } else 469 uncached = TRUE; 470 if (locked) 471 VOP_LOCK(vp); 472 return (uncached); 473} 474 475 476void 477vnode_pager_freepage(m) 478 vm_page_t m; 479{ 480 PAGE_WAKEUP(m); 481 vm_page_free(m); 482} 483 484/* 485 * calculate the linear (byte) disk address of specified virtual 486 * file address 487 */ 488vm_offset_t 489vnode_pager_addr(vp, address) 490 struct vnode *vp; 491 vm_offset_t address; 492{ 493 int rtaddress; 494 int bsize; 495 vm_offset_t block; 496 struct vnode *rtvp; 497 int err; 498 int vblock, voffset; 499 500 bsize = vp->v_mount->mnt_stat.f_iosize; 501 vblock = address / bsize; 502 voffset = address % bsize; 503 504 err = VOP_BMAP(vp, vblock, &rtvp, &block, 0); 505 506 if (err) 507 rtaddress = -1; 508 else 509 rtaddress = block * DEV_BSIZE + voffset; 510 511 return rtaddress; 512} 513 514/* 515 * interrupt routine for I/O completion 516 */ 517void 518vnode_pager_iodone(bp) 519 struct buf *bp; 520{ 521 bp->b_flags |= B_DONE; 522 wakeup((caddr_t) bp); 523} 524 525/* 526 * small block file system vnode pager input 527 */ 528int 529vnode_pager_input_smlfs(vnp, m) 530 vn_pager_t vnp; 531 vm_page_t m; 532{ 533 int i; 534 int s; 535 vm_offset_t paging_offset; 536 struct vnode *dp, *vp; 537 struct buf *bp; 538 vm_offset_t mapsize; 539 vm_offset_t foff; 540 vm_offset_t kva; 541 int fileaddr; 542 int block; 543 vm_offset_t bsize; 544 int error = 0; 545 546 paging_offset = m->object->paging_offset; 547 vp = vnp->vnp_vp; 548 bsize = vp->v_mount->mnt_stat.f_iosize; 549 foff = m->offset + paging_offset; 550 551 VOP_BMAP(vp, foff, &dp, 0, 0); 552 553 kva = vm_pager_map_page(m); 554 555 for (i = 0; i < PAGE_SIZE / bsize; i++) { 556 557 /* 558 * calculate logical block and offset 559 */ 560 block = foff / bsize + i; 561 s = splbio(); 562 while (bp = incore(vp, block)) { 563 int amount; 564 565 /* 566 * wait until the buffer is avail or gone 567 */ 568 if (bp->b_flags & B_BUSY) { 569 bp->b_flags |= B_WANTED; 570 tsleep((caddr_t) bp, PVM, "vnwblk", 0); 571 continue; 572 } 573 amount = bsize; 574 if ((foff + bsize) > vnp->vnp_size) 575 amount = vnp->vnp_size - foff; 576 577 /* 578 * make sure that this page is in the buffer 579 */ 580 if ((amount > 0) && amount <= bp->b_bcount) { 581 bp->b_flags |= B_BUSY; 582 splx(s); 583 584 /* 585 * copy the data from the buffer 586 */ 587 bcopy(bp->b_un.b_addr, (caddr_t) kva + i * bsize, amount); 588 if (amount < bsize) { 589 bzero((caddr_t) kva + amount, bsize - amount); 590 } 591 bp->b_flags &= ~B_BUSY; 592 wakeup((caddr_t) bp); 593 goto nextblock; 594 } 595 break; 596 } 597 splx(s); 598 fileaddr = vnode_pager_addr(vp, foff + i * bsize); 599 if (fileaddr != -1) { 600 bp = getpbuf(); 601 VHOLD(vp); 602 603 /* build a minimal buffer header */ 604 bp->b_flags = B_BUSY | B_READ | B_CALL; 605 bp->b_iodone = vnode_pager_iodone; 606 bp->b_proc = curproc; 607 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 608 if (bp->b_rcred != NOCRED) 609 crhold(bp->b_rcred); 610 if (bp->b_wcred != NOCRED) 611 crhold(bp->b_wcred); 612 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 613 bp->b_blkno = fileaddr / DEV_BSIZE; 614 bgetvp(dp, bp); 615 bp->b_bcount = bsize; 616 bp->b_bufsize = bsize; 617 618 /* do the input */ 619 VOP_STRATEGY(bp); 620 621 /* we definitely need to be at splbio here */ 622 623 s = splbio(); 624 while ((bp->b_flags & B_DONE) == 0) { 625 tsleep((caddr_t) bp, PVM, "vnsrd", 0); 626 } 627 splx(s); 628 if ((bp->b_flags & B_ERROR) != 0) 629 error = EIO; 630 631 /* 632 * free the buffer header back to the swap buffer pool 633 */ 634 relpbuf(bp); 635 HOLDRELE(vp); 636 if (error) 637 break; 638 } else { 639 bzero((caddr_t) kva + i * bsize, bsize); 640 } 641nextblock: 642 } 643 vm_pager_unmap_page(kva); 644 if (error) { 645 return VM_PAGER_FAIL; 646 } 647 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 648 m->flags |= PG_CLEAN; 649 m->flags &= ~PG_LAUNDRY; 650 return VM_PAGER_OK; 651 652} 653 654 655/* 656 * old style vnode pager output routine 657 */ 658int 659vnode_pager_input_old(vnp, m) 660 vn_pager_t vnp; 661 vm_page_t m; 662{ 663 int i; 664 struct uio auio; 665 struct iovec aiov; 666 int error; 667 int size; 668 vm_offset_t foff; 669 vm_offset_t kva; 670 671 error = 0; 672 foff = m->offset + m->object->paging_offset; 673 674 /* 675 * Return failure if beyond current EOF 676 */ 677 if (foff >= vnp->vnp_size) { 678 return VM_PAGER_BAD; 679 } else { 680 size = PAGE_SIZE; 681 if (foff + size > vnp->vnp_size) 682 size = vnp->vnp_size - foff; 683/* 684 * Allocate a kernel virtual address and initialize so that 685 * we can use VOP_READ/WRITE routines. 686 */ 687 kva = vm_pager_map_page(m); 688 aiov.iov_base = (caddr_t) kva; 689 aiov.iov_len = size; 690 auio.uio_iov = &aiov; 691 auio.uio_iovcnt = 1; 692 auio.uio_offset = foff; 693 auio.uio_segflg = UIO_SYSSPACE; 694 auio.uio_rw = UIO_READ; 695 auio.uio_resid = size; 696 auio.uio_procp = (struct proc *) 0; 697 698 error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 699 if (!error) { 700 register int count = size - auio.uio_resid; 701 702 if (count == 0) 703 error = EINVAL; 704 else if (count != PAGE_SIZE) 705 bzero((caddr_t) kva + count, PAGE_SIZE - count); 706 } 707 vm_pager_unmap_page(kva); 708 } 709 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 710 m->flags |= PG_CLEAN; 711 m->flags &= ~PG_LAUNDRY; 712 return error ? VM_PAGER_FAIL : VM_PAGER_OK; 713} 714 715/* 716 * generic vnode pager input routine 717 */ 718int 719vnode_pager_input(vnp, m, count, reqpage) 720 register vn_pager_t vnp; 721 vm_page_t *m; 722 int count, reqpage; 723{ 724 int i, j; 725 vm_offset_t kva, foff; 726 int size; 727 struct proc *p = curproc; /* XXX */ 728 vm_object_t object; 729 vm_offset_t paging_offset; 730 struct vnode *dp, *vp; 731 vm_offset_t mapsize; 732 int bsize; 733 734 int first, last; 735 int reqaddr, firstaddr; 736 int block, offset; 737 738 int nbp; 739 struct buf *bp; 740 int s; 741 int failflag; 742 743 int errtype = 0; /* 0 is file type otherwise vm type */ 744 int error = 0; 745 746 object = m[reqpage]->object; /* all vm_page_t items are in same 747 * object */ 748 paging_offset = object->paging_offset; 749 750 vp = vnp->vnp_vp; 751 bsize = vp->v_mount->mnt_stat.f_iosize; 752 753 /* get the UNDERLYING device for the file with VOP_BMAP() */ 754 755 /* 756 * originally, we did not check for an error return value -- assuming 757 * an fs always has a bmap entry point -- that assumption is wrong!!! 758 */ 759 kva = 0; 760 mapsize = 0; 761 foff = m[reqpage]->offset + paging_offset; 762 if (!VOP_BMAP(vp, foff, &dp, 0, 0)) { 763 764 /* 765 * we do not block for a kva, notice we default to a kva 766 * conservative behavior 767 */ 768 kva = kmem_alloc_pageable(pager_map, (mapsize = count * PAGE_SIZE)); 769 if (!kva) { 770 for (i = 0; i < count; i++) { 771 if (i != reqpage) { 772 vnode_pager_freepage(m[i]); 773 } 774 } 775 m[0] = m[reqpage]; 776 kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE); 777 reqpage = 0; 778 count = 1; 779 } 780 } 781 782 /* 783 * if we can't get a kva or we can't bmap, use old VOP code 784 */ 785 if (!kva) { 786 for (i = 0; i < count; i++) { 787 if (i != reqpage) { 788 vnode_pager_freepage(m[i]); 789 } 790 } 791 return vnode_pager_input_old(vnp, m[reqpage]); 792 793 /* 794 * if the blocksize is smaller than a page size, then use 795 * special small filesystem code. NFS sometimes has a small 796 * blocksize, but it can handle large reads itself. 797 */ 798 } else if ((PAGE_SIZE / bsize) > 1 && 799 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 800 801 kmem_free_wakeup(pager_map, kva, mapsize); 802 803 for (i = 0; i < count; i++) { 804 if (i != reqpage) { 805 vnode_pager_freepage(m[i]); 806 } 807 } 808 return vnode_pager_input_smlfs(vnp, m[reqpage]); 809 } 810/* 811 * here on direct device I/O 812 */ 813 814 815 /* 816 * This pathetic hack gets data from the buffer cache, if it's there. 817 * I believe that this is not really necessary, and the ends can be 818 * gotten by defaulting to the normal vfs read behavior, but this 819 * might be more efficient, because the will NOT invoke read-aheads 820 * and one of the purposes of this code is to bypass the buffer cache 821 * and keep from flushing it by reading in a program. 822 */ 823 824 /* 825 * calculate logical block and offset 826 */ 827 block = foff / bsize; 828 offset = foff % bsize; 829 s = splbio(); 830 831 /* 832 * if we have a buffer in core, then try to use it 833 */ 834 while (bp = incore(vp, block)) { 835 int amount; 836 837 /* 838 * wait until the buffer is avail or gone 839 */ 840 if (bp->b_flags & B_BUSY) { 841 bp->b_flags |= B_WANTED; 842 tsleep((caddr_t) bp, PVM, "vnwblk", 0); 843 continue; 844 } 845 amount = PAGE_SIZE; 846 if ((foff + amount) > vnp->vnp_size) 847 amount = vnp->vnp_size - foff; 848 849 /* 850 * make sure that this page is in the buffer 851 */ 852 if ((amount > 0) && (offset + amount) <= bp->b_bcount) { 853 bp->b_flags |= B_BUSY; 854 splx(s); 855 856 /* 857 * map the requested page 858 */ 859 pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage])); 860 pmap_update(); 861 862 /* 863 * copy the data from the buffer 864 */ 865 bcopy(bp->b_un.b_addr + offset, (caddr_t) kva, amount); 866 if (amount < PAGE_SIZE) { 867 bzero((caddr_t) kva + amount, PAGE_SIZE - amount); 868 } 869 870 /* 871 * unmap the page and free the kva 872 */ 873 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE); 874 kmem_free_wakeup(pager_map, kva, mapsize); 875 876 /* 877 * release the buffer back to the block subsystem 878 */ 879 bp->b_flags &= ~B_BUSY; 880 wakeup((caddr_t) bp); 881 882 /* 883 * we did not have to do any work to get the requested 884 * page, the read behind/ahead does not justify a read 885 */ 886 for (i = 0; i < count; i++) { 887 if (i != reqpage) { 888 vnode_pager_freepage(m[i]); 889 } 890 } 891 count = 1; 892 reqpage = 0; 893 m[0] = m[reqpage]; 894 895 /* 896 * sorry for the goto 897 */ 898 goto finishup; 899 } 900 901 /* 902 * buffer is nowhere to be found, read from the disk 903 */ 904 break; 905 } 906 splx(s); 907 908 reqaddr = vnode_pager_addr(vp, foff); 909 s = splbio(); 910 911 /* 912 * Make sure that our I/O request is contiguous. Scan backward and 913 * stop for the first discontiguous entry or stop for a page being in 914 * buffer cache. 915 */ 916 failflag = 0; 917 first = reqpage; 918 for (i = reqpage - 1; i >= 0; --i) { 919 if (failflag || 920 incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 921 (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 922 != reqaddr + (i - reqpage) * PAGE_SIZE) { 923 vnode_pager_freepage(m[i]); 924 failflag = 1; 925 } else { 926 first = i; 927 } 928 } 929 930 /* 931 * Scan forward and stop for the first non-contiguous entry or stop 932 * for a page being in buffer cache. 933 */ 934 failflag = 0; 935 last = reqpage + 1; 936 for (i = reqpage + 1; i < count; i++) { 937 if (failflag || 938 incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) || 939 (vnode_pager_addr(vp, m[i]->offset + paging_offset)) 940 != reqaddr + (i - reqpage) * PAGE_SIZE) { 941 vnode_pager_freepage(m[i]); 942 failflag = 1; 943 } else { 944 last = i + 1; 945 } 946 } 947 splx(s); 948 949 /* 950 * the first and last page have been calculated now, move input pages 951 * to be zero based... 952 */ 953 count = last; 954 if (first != 0) { 955 for (i = first; i < count; i++) { 956 m[i - first] = m[i]; 957 } 958 count -= first; 959 reqpage -= first; 960 } 961 962 /* 963 * calculate the file virtual address for the transfer 964 */ 965 foff = m[0]->offset + paging_offset; 966 967 /* 968 * and get the disk physical address (in bytes) 969 */ 970 firstaddr = vnode_pager_addr(vp, foff); 971 972 /* 973 * calculate the size of the transfer 974 */ 975 size = count * PAGE_SIZE; 976 if ((foff + size) > vnp->vnp_size) 977 size = vnp->vnp_size - foff; 978 979 /* 980 * round up physical size for real devices 981 */ 982 if (dp->v_type == VBLK || dp->v_type == VCHR) 983 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 984 985 /* 986 * and map the pages to be read into the kva 987 */ 988 for (i = 0; i < count; i++) 989 pmap_kenter(kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 990 991 pmap_update(); 992 bp = getpbuf(); 993 VHOLD(vp); 994 995 /* build a minimal buffer header */ 996 bp->b_flags = B_BUSY | B_READ | B_CALL; 997 bp->b_iodone = vnode_pager_iodone; 998 /* B_PHYS is not set, but it is nice to fill this in */ 999 bp->b_proc = curproc; 1000 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1001 if (bp->b_rcred != NOCRED) 1002 crhold(bp->b_rcred); 1003 if (bp->b_wcred != NOCRED) 1004 crhold(bp->b_wcred); 1005 bp->b_un.b_addr = (caddr_t) kva; 1006 bp->b_blkno = firstaddr / DEV_BSIZE; 1007 bgetvp(dp, bp); 1008 bp->b_bcount = size; 1009 bp->b_bufsize = size; 1010 1011 /* do the input */ 1012 VOP_STRATEGY(bp); 1013 1014 s = splbio(); 1015 /* we definitely need to be at splbio here */ 1016 1017 while ((bp->b_flags & B_DONE) == 0) { 1018 tsleep((caddr_t) bp, PVM, "vnread", 0); 1019 } 1020 splx(s); 1021 if ((bp->b_flags & B_ERROR) != 0) 1022 error = EIO; 1023 1024 if (!error) { 1025 if (size != count * PAGE_SIZE) 1026 bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 1027 } 1028 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 1029 kmem_free_wakeup(pager_map, kva, mapsize); 1030 1031 /* 1032 * free the buffer header back to the swap buffer pool 1033 */ 1034 relpbuf(bp); 1035 HOLDRELE(vp); 1036 1037finishup: 1038 for (i = 0; i < count; i++) { 1039 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1040 m[i]->flags |= PG_CLEAN; 1041 m[i]->flags &= ~PG_LAUNDRY; 1042 if (i != reqpage) { 1043 1044 /* 1045 * whether or not to leave the page activated is up in 1046 * the air, but we should put the page on a page queue 1047 * somewhere. (it already is in the object). Result: 1048 * It appears that emperical results show that 1049 * deactivating pages is best. 1050 */ 1051 1052 /* 1053 * just in case someone was asking for this page we 1054 * now tell them that it is ok to use 1055 */ 1056 if (!error) { 1057 vm_page_deactivate(m[i]); 1058 PAGE_WAKEUP(m[i]); 1059 m[i]->flags &= ~PG_FAKE; 1060 } else { 1061 vnode_pager_freepage(m[i]); 1062 } 1063 } 1064 } 1065 if (error) { 1066 printf("vnode pager read error: %d\n", error); 1067 } 1068 if (errtype) 1069 return error; 1070 return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 1071} 1072 1073/* 1074 * old-style vnode pager output routine 1075 */ 1076int 1077vnode_pager_output_old(vnp, m) 1078 register vn_pager_t vnp; 1079 vm_page_t m; 1080{ 1081 vm_offset_t foff; 1082 vm_offset_t kva; 1083 vm_offset_t size; 1084 struct iovec aiov; 1085 struct uio auio; 1086 struct vnode *vp; 1087 int error; 1088 1089 vp = vnp->vnp_vp; 1090 foff = m->offset + m->object->paging_offset; 1091 1092 /* 1093 * Return failure if beyond current EOF 1094 */ 1095 if (foff >= vnp->vnp_size) { 1096 return VM_PAGER_BAD; 1097 } else { 1098 size = PAGE_SIZE; 1099 if (foff + size > vnp->vnp_size) 1100 size = vnp->vnp_size - foff; 1101/* 1102 * Allocate a kernel virtual address and initialize so that 1103 * we can use VOP_WRITE routines. 1104 */ 1105 kva = vm_pager_map_page(m); 1106 aiov.iov_base = (caddr_t) kva; 1107 aiov.iov_len = size; 1108 auio.uio_iov = &aiov; 1109 auio.uio_iovcnt = 1; 1110 auio.uio_offset = foff; 1111 auio.uio_segflg = UIO_SYSSPACE; 1112 auio.uio_rw = UIO_WRITE; 1113 auio.uio_resid = size; 1114 auio.uio_procp = (struct proc *) 0; 1115 1116 error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 1117 1118 if (!error) { 1119 if ((size - auio.uio_resid) == 0) { 1120 error = EINVAL; 1121 } 1122 } 1123 vm_pager_unmap_page(kva); 1124 return error ? VM_PAGER_FAIL : VM_PAGER_OK; 1125 } 1126} 1127 1128/* 1129 * vnode pager output on a small-block file system 1130 */ 1131int 1132vnode_pager_output_smlfs(vnp, m) 1133 vn_pager_t vnp; 1134 vm_page_t m; 1135{ 1136 int i; 1137 int s; 1138 vm_offset_t paging_offset; 1139 struct vnode *dp, *vp; 1140 struct buf *bp; 1141 vm_offset_t mapsize; 1142 vm_offset_t foff; 1143 vm_offset_t kva; 1144 int fileaddr; 1145 int block; 1146 vm_offset_t bsize; 1147 int error = 0; 1148 1149 paging_offset = m->object->paging_offset; 1150 vp = vnp->vnp_vp; 1151 bsize = vp->v_mount->mnt_stat.f_iosize; 1152 foff = m->offset + paging_offset; 1153 1154 VOP_BMAP(vp, foff, &dp, 0, 0); 1155 kva = vm_pager_map_page(m); 1156 for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 1157 1158 /* 1159 * calculate logical block and offset 1160 */ 1161 fileaddr = vnode_pager_addr(vp, foff + i * bsize); 1162 if (fileaddr != -1) { 1163 s = splbio(); 1164 if (bp = incore(vp, (foff / bsize) + i)) { 1165 bp = getblk(vp, (foff / bsize) + i, bp->b_bufsize, 0, 0); 1166 bp->b_flags |= B_INVAL; 1167 brelse(bp); 1168 } 1169 splx(s); 1170 1171 bp = getpbuf(); 1172 VHOLD(vp); 1173 1174 /* build a minimal buffer header */ 1175 bp->b_flags = B_BUSY | B_CALL | B_WRITE; 1176 bp->b_iodone = vnode_pager_iodone; 1177 bp->b_proc = curproc; 1178 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1179 if (bp->b_rcred != NOCRED) 1180 crhold(bp->b_rcred); 1181 if (bp->b_wcred != NOCRED) 1182 crhold(bp->b_wcred); 1183 bp->b_un.b_addr = (caddr_t) kva + i * bsize; 1184 bp->b_blkno = fileaddr / DEV_BSIZE; 1185 bgetvp(dp, bp); 1186 ++dp->v_numoutput; 1187 /* for NFS */ 1188 bp->b_dirtyoff = 0; 1189 bp->b_dirtyend = bsize; 1190 bp->b_bcount = bsize; 1191 bp->b_bufsize = bsize; 1192 1193 /* do the input */ 1194 VOP_STRATEGY(bp); 1195 1196 /* we definitely need to be at splbio here */ 1197 1198 s = splbio(); 1199 while ((bp->b_flags & B_DONE) == 0) { 1200 tsleep((caddr_t) bp, PVM, "vnswrt", 0); 1201 } 1202 splx(s); 1203 if ((bp->b_flags & B_ERROR) != 0) 1204 error = EIO; 1205 1206 /* 1207 * free the buffer header back to the swap buffer pool 1208 */ 1209 relpbuf(bp); 1210 HOLDRELE(vp); 1211 } 1212 } 1213 vm_pager_unmap_page(kva); 1214 if (error) 1215 return VM_PAGER_FAIL; 1216 else 1217 return VM_PAGER_OK; 1218} 1219 1220/* 1221 * generic vnode pager output routine 1222 */ 1223int 1224vnode_pager_output(vnp, m, count, rtvals) 1225 vn_pager_t vnp; 1226 vm_page_t *m; 1227 int count; 1228 int *rtvals; 1229{ 1230 int i, j; 1231 vm_offset_t kva, foff; 1232 int size; 1233 struct proc *p = curproc; /* XXX */ 1234 vm_object_t object; 1235 vm_offset_t paging_offset; 1236 struct vnode *dp, *vp; 1237 struct buf *bp; 1238 vm_offset_t mapsize; 1239 vm_offset_t reqaddr; 1240 int bsize; 1241 int s; 1242 1243 int error = 0; 1244 1245retryoutput: 1246 object = m[0]->object; /* all vm_page_t items are in same object */ 1247 paging_offset = object->paging_offset; 1248 1249 vp = vnp->vnp_vp; 1250 bsize = vp->v_mount->mnt_stat.f_iosize; 1251 1252 for (i = 0; i < count; i++) 1253 rtvals[i] = VM_PAGER_AGAIN; 1254 1255 /* 1256 * if the filesystem does not have a bmap, then use the old code 1257 */ 1258 if (VOP_BMAP(vp, m[0]->offset + paging_offset, &dp, 0, 0)) { 1259 1260 rtvals[0] = vnode_pager_output_old(vnp, m[0]); 1261 1262 pmap_clear_modify(VM_PAGE_TO_PHYS(m[0])); 1263 m[0]->flags |= PG_CLEAN; 1264 m[0]->flags &= ~PG_LAUNDRY; 1265 return rtvals[0]; 1266 } 1267 1268 /* 1269 * if the filesystem has a small blocksize, then use the small block 1270 * filesystem output code 1271 */ 1272 if ((bsize < PAGE_SIZE) && 1273 (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 1274 1275 for (i = 0; i < count; i++) { 1276 rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 1277 if (rtvals[i] == VM_PAGER_OK) { 1278 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1279 m[i]->flags |= PG_CLEAN; 1280 m[i]->flags &= ~PG_LAUNDRY; 1281 } 1282 } 1283 return rtvals[0]; 1284 } 1285 1286 /* 1287 * get some kva for the output 1288 */ 1289 kva = kmem_alloc_pageable(pager_map, (mapsize = count * PAGE_SIZE)); 1290 if (!kva) { 1291 kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE)); 1292 count = 1; 1293 if (!kva) 1294 return rtvals[0]; 1295 } 1296 for (i = 0; i < count; i++) { 1297 foff = m[i]->offset + paging_offset; 1298 if (foff >= vnp->vnp_size) { 1299 for (j = i; j < count; j++) 1300 rtvals[j] = VM_PAGER_BAD; 1301 count = i; 1302 break; 1303 } 1304 } 1305 if (count == 0) { 1306 return rtvals[0]; 1307 } 1308 foff = m[0]->offset + paging_offset; 1309 reqaddr = vnode_pager_addr(vp, foff); 1310 1311 /* 1312 * Scan forward and stop for the first non-contiguous entry or stop 1313 * for a page being in buffer cache. 1314 */ 1315 for (i = 1; i < count; i++) { 1316 if (vnode_pager_addr(vp, m[i]->offset + paging_offset) 1317 != reqaddr + i * PAGE_SIZE) { 1318 count = i; 1319 break; 1320 } 1321 } 1322 1323 /* 1324 * calculate the size of the transfer 1325 */ 1326 size = count * PAGE_SIZE; 1327 if ((foff + size) > vnp->vnp_size) 1328 size = vnp->vnp_size - foff; 1329 1330 /* 1331 * round up physical size for real devices 1332 */ 1333 if (dp->v_type == VBLK || dp->v_type == VCHR) 1334 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1335 1336 /* 1337 * and map the pages to be read into the kva 1338 */ 1339 for (i = 0; i < count; i++) 1340 pmap_kenter(kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i])); 1341 pmap_update(); 1342/* 1343 printf("vnode: writing foff: %d, devoff: %d, size: %d\n", 1344 foff, reqaddr, size); 1345*/ 1346 1347 /* 1348 * next invalidate the incore vfs_bio data 1349 */ 1350 for (i = 0; i < count; i++) { 1351 int filblock = (foff + i * PAGE_SIZE) / bsize; 1352 struct buf *fbp; 1353 1354 s = splbio(); 1355 if (fbp = incore(vp, filblock)) { 1356 fbp = getblk(vp, filblock, fbp->b_bufsize, 0, 0); 1357 if (fbp->b_flags & B_DELWRI) { 1358 if (fbp->b_bufsize <= PAGE_SIZE) 1359 fbp->b_flags &= ~B_DELWRI; 1360 else { 1361 bwrite(fbp); 1362 fbp = getblk(vp, filblock, 1363 fbp->b_bufsize, 0, 0); 1364 } 1365 } 1366 fbp->b_flags |= B_INVAL; 1367 brelse(fbp); 1368 } 1369 splx(s); 1370 } 1371 1372 1373 bp = getpbuf(); 1374 VHOLD(vp); 1375 /* build a minimal buffer header */ 1376 bp->b_flags = B_BUSY | B_WRITE | B_CALL; 1377 bp->b_iodone = vnode_pager_iodone; 1378 /* B_PHYS is not set, but it is nice to fill this in */ 1379 bp->b_proc = curproc; 1380 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 1381 1382 if (bp->b_rcred != NOCRED) 1383 crhold(bp->b_rcred); 1384 if (bp->b_wcred != NOCRED) 1385 crhold(bp->b_wcred); 1386 bp->b_un.b_addr = (caddr_t) kva; 1387 bp->b_blkno = reqaddr / DEV_BSIZE; 1388 bgetvp(dp, bp); 1389 ++dp->v_numoutput; 1390 1391 /* for NFS */ 1392 bp->b_dirtyoff = 0; 1393 bp->b_dirtyend = size; 1394 1395 bp->b_bcount = size; 1396 bp->b_bufsize = size; 1397 1398 /* do the output */ 1399 VOP_STRATEGY(bp); 1400 1401 s = splbio(); 1402 1403 /* we definitely need to be at splbio here */ 1404 1405 while ((bp->b_flags & B_DONE) == 0) { 1406 tsleep((caddr_t) bp, PVM, "vnwrite", 0); 1407 } 1408 splx(s); 1409 1410 if ((bp->b_flags & B_ERROR) != 0) 1411 error = EIO; 1412 1413 pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count); 1414 kmem_free_wakeup(pager_map, kva, mapsize); 1415 1416 /* 1417 * free the buffer header back to the swap buffer pool 1418 */ 1419 relpbuf(bp); 1420 HOLDRELE(vp); 1421 1422 if (!error) { 1423 for (i = 0; i < count; i++) { 1424 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 1425 m[i]->flags |= PG_CLEAN; 1426 m[i]->flags &= ~PG_LAUNDRY; 1427 rtvals[i] = VM_PAGER_OK; 1428 } 1429 } else if (count != 1) { 1430 error = 0; 1431 count = 1; 1432 goto retryoutput; 1433 } 1434 if (error) { 1435 printf("vnode pager write error: %d\n", error); 1436 } 1437 return (error ? VM_PAGER_FAIL : VM_PAGER_OK); 1438} 1439