vnode_pager.c revision 9507
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 59507Sdg * Copyright (c) 1993, 1994 John S. Dyson 69507Sdg * Copyright (c) 1995, David Greenman 71541Srgrimes * 81541Srgrimes * This code is derived from software contributed to Berkeley by 91541Srgrimes * the Systems Programming Group of the University of Utah Computer 101541Srgrimes * Science Department. 111541Srgrimes * 121541Srgrimes * Redistribution and use in source and binary forms, with or without 131541Srgrimes * modification, are permitted provided that the following conditions 141541Srgrimes * are met: 151541Srgrimes * 1. Redistributions of source code must retain the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer. 171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 181541Srgrimes * notice, this list of conditions and the following disclaimer in the 191541Srgrimes * documentation and/or other materials provided with the distribution. 201541Srgrimes * 3. All advertising materials mentioning features or use of this software 211541Srgrimes * must display the following acknowledgement: 221541Srgrimes * This product includes software developed by the University of 231541Srgrimes * California, Berkeley and its contributors. 241541Srgrimes * 4. Neither the name of the University nor the names of its contributors 251541Srgrimes * may be used to endorse or promote products derived from this software 261541Srgrimes * without specific prior written permission. 271541Srgrimes * 281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 311541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 381541Srgrimes * SUCH DAMAGE. 391541Srgrimes * 401549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 419507Sdg * $Id: vnode_pager.c,v 1.43 1995/07/09 06:58:03 davidg Exp $ 421541Srgrimes */ 431541Srgrimes 441541Srgrimes/* 451541Srgrimes * Page to/from files (vnodes). 461541Srgrimes */ 471541Srgrimes 481549Srgrimes/* 491549Srgrimes * TODO: 509507Sdg * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will 517695Sdg * greatly re-simplify the vnode_pager. 521549Srgrimes */ 531549Srgrimes 541541Srgrimes#include <sys/param.h> 551541Srgrimes#include <sys/systm.h> 565455Sdg#include <sys/kernel.h> 571541Srgrimes#include <sys/proc.h> 581541Srgrimes#include <sys/malloc.h> 591541Srgrimes#include <sys/vnode.h> 601541Srgrimes#include <sys/uio.h> 611541Srgrimes#include <sys/mount.h> 629507Sdg#include <sys/buf.h> 631541Srgrimes 641541Srgrimes#include <vm/vm.h> 651541Srgrimes#include <vm/vm_page.h> 669507Sdg#include <vm/vm_pager.h> 671541Srgrimes#include <vm/vnode_pager.h> 681541Srgrimes 691541Srgrimesstruct pagerops vnodepagerops = { 709507Sdg NULL, 711541Srgrimes vnode_pager_alloc, 721541Srgrimes vnode_pager_dealloc, 739507Sdg vnode_pager_getpages, 749507Sdg vnode_pager_putpages, 759507Sdg vnode_pager_haspage, 769507Sdg NULL 771541Srgrimes}; 781541Srgrimes 791541Srgrimes/* 801541Srgrimes * Allocate (or lookup) pager for a vnode. 811541Srgrimes * Handle is a vnode pointer. 821541Srgrimes */ 839507Sdgvm_object_t 841549Srgrimesvnode_pager_alloc(handle, size, prot, offset) 858416Sdg void *handle; 861541Srgrimes vm_size_t size; 871541Srgrimes vm_prot_t prot; 881549Srgrimes vm_offset_t offset; 891541Srgrimes{ 909456Sdg vm_object_t object; 911541Srgrimes struct vnode *vp; 921541Srgrimes 931541Srgrimes /* 941541Srgrimes * Pageout to vnode, no can do yet. 951541Srgrimes */ 961541Srgrimes if (handle == NULL) 971827Sdg return (NULL); 981541Srgrimes 999411Sdg vp = (struct vnode *) handle; 1009411Sdg 1011541Srgrimes /* 1029411Sdg * Prevent race condition when allocating the object. This 1039411Sdg * can happen with NFS vnodes since the nfsnode isn't locked. 1041541Srgrimes */ 1059411Sdg while (vp->v_flag & VOLOCK) { 1069411Sdg vp->v_flag |= VOWANT; 1079411Sdg tsleep(vp, PVM, "vnpobj", 0); 1089411Sdg } 1099411Sdg vp->v_flag |= VOLOCK; 1109411Sdg 1119411Sdg /* 1129411Sdg * If the object is being terminated, wait for it to 1139411Sdg * go away. 1149411Sdg */ 1159507Sdg while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) { 1169356Sdg tsleep(object, PVM, "vadead", 0); 1179507Sdg } 1185455Sdg 1199507Sdg if (object == NULL) { 1201541Srgrimes /* 1211541Srgrimes * And an object of the appropriate size 1221541Srgrimes */ 1239507Sdg object = vm_object_allocate(OBJT_VNODE, round_page(size)); 1249456Sdg object->flags = OBJ_CANPERSIST; 1251827Sdg 1261541Srgrimes /* 1279507Sdg * Hold a reference to the vnode and initialize object data. 1281541Srgrimes */ 1291541Srgrimes VREF(vp); 1309507Sdg object->un_pager.vnp.vnp_size = size; 1311549Srgrimes 1329507Sdg object->handle = handle; 1339507Sdg vp->v_object = object; 1341541Srgrimes } else { 1351541Srgrimes /* 1369507Sdg * vm_object_reference() will remove the object from the cache if 1379507Sdg * found and gain a reference to the object. 1381541Srgrimes */ 1399507Sdg vm_object_reference(object); 1401541Srgrimes } 1419411Sdg 1429411Sdg if (vp->v_type == VREG) 1437695Sdg vp->v_flag |= VVMIO; 1449411Sdg 1459411Sdg vp->v_flag &= ~VOLOCK; 1469411Sdg if (vp->v_flag & VOWANT) { 1479411Sdg vp->v_flag &= ~VOWANT; 1489411Sdg wakeup(vp); 1499411Sdg } 1509507Sdg return (object); 1511541Srgrimes} 1521541Srgrimes 1531549Srgrimesvoid 1549507Sdgvnode_pager_dealloc(object) 1559507Sdg vm_object_t object; 1561541Srgrimes{ 1579507Sdg register struct vnode *vp = object->handle; 1581541Srgrimes 1599507Sdg if (vp == NULL) 1609507Sdg panic("vnode_pager_dealloc: pager already dealloced"); 1619507Sdg 1629507Sdg if (object->paging_in_progress) { 1635455Sdg int s = splbio(); 1649507Sdg while (object->paging_in_progress) { 1659507Sdg object->flags |= OBJ_PIPWNT; 1669507Sdg tsleep(object, PVM, "vnpdea", 0); 1675455Sdg } 1685455Sdg splx(s); 1691541Srgrimes } 1701541Srgrimes 1719507Sdg object->handle = NULL; 1721827Sdg 1739507Sdg vp->v_object = NULL; 1749507Sdg vp->v_flag &= ~(VTEXT | VVMIO); 1759507Sdg vp->v_flag |= VAGE; 1769507Sdg vrele(vp); 1771549Srgrimes} 1781541Srgrimes 1791549Srgrimesboolean_t 1809507Sdgvnode_pager_haspage(object, offset, before, after) 1819507Sdg vm_object_t object; 1821541Srgrimes vm_offset_t offset; 1839507Sdg int *before; 1849507Sdg int *after; 1851541Srgrimes{ 1869507Sdg struct vnode *vp = object->handle; 1871541Srgrimes daddr_t bn; 1889507Sdg int err, run; 1899507Sdg daddr_t startblock, reqblock; 1901541Srgrimes 1911541Srgrimes /* 1925455Sdg * If filesystem no longer mounted or offset beyond end of file we do 1935455Sdg * not have the page. 1941541Srgrimes */ 1959507Sdg if ((vp->v_mount == NULL) || (offset >= object->un_pager.vnp.vnp_size)) 1964797Sdg return FALSE; 1971541Srgrimes 1989507Sdg startblock = reqblock = offset / vp->v_mount->mnt_stat.f_iosize; 1999507Sdg if (startblock > PFCLUSTER_BEHIND) 2009507Sdg startblock -= PFCLUSTER_BEHIND; 2019507Sdg else 2029507Sdg startblock = 0;; 2037178Sdg 2049507Sdg if (before != NULL) { 2059507Sdg /* 2069507Sdg * Loop looking for a contiguous chunk that includes the 2079507Sdg * requested page. 2089507Sdg */ 2099507Sdg while (TRUE) { 2109507Sdg err = VOP_BMAP(vp, startblock, (struct vnode **) 0, &bn, &run); 2119507Sdg if (err || bn == -1) { 2129507Sdg if (startblock < reqblock) { 2139507Sdg startblock++; 2149507Sdg continue; 2159507Sdg } 2169507Sdg *before = 0; 2179507Sdg if (after != NULL) 2189507Sdg *after = 0; 2199507Sdg return err ? TRUE : FALSE; 2209507Sdg } 2219507Sdg if ((startblock + run) < reqblock) { 2229507Sdg startblock += run + 1; 2239507Sdg continue; 2249507Sdg } 2259507Sdg *before = reqblock - startblock; 2269507Sdg if (after != NULL) 2279507Sdg *after = run; 2289507Sdg return TRUE; 2299507Sdg } 2309507Sdg } 2319507Sdg 2329507Sdg err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, after); 2338876Srgrimes if (err) 2349507Sdg return TRUE; 2351827Sdg return ((long) bn < 0 ? FALSE : TRUE); 2361541Srgrimes} 2371541Srgrimes 2381541Srgrimes/* 2391541Srgrimes * Lets the VM system know about a change in size for a file. 2409507Sdg * We adjust our own internal size and flush any cached pages in 2411541Srgrimes * the associated object that are affected by the size change. 2421541Srgrimes * 2431541Srgrimes * Note: this routine may be invoked as a result of a pager put 2441541Srgrimes * operation (possibly at object termination time), so we must be careful. 2451541Srgrimes */ 2461541Srgrimesvoid 2471541Srgrimesvnode_pager_setsize(vp, nsize) 2481541Srgrimes struct vnode *vp; 2495455Sdg u_long nsize; 2501541Srgrimes{ 2519507Sdg vm_object_t object = vp->v_object; 2521541Srgrimes 2539507Sdg if (object == NULL) 2541541Srgrimes return; 2551827Sdg 2561541Srgrimes /* 2571541Srgrimes * Hasn't changed size 2581541Srgrimes */ 2599507Sdg if (nsize == object->un_pager.vnp.vnp_size) 2603374Sdg return; 2611827Sdg 2621541Srgrimes /* 2631827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 2641541Srgrimes */ 2659507Sdg if (nsize < object->un_pager.vnp.vnp_size) { 2669507Sdg if (round_page((vm_offset_t) nsize) < object->un_pager.vnp.vnp_size) { 2675455Sdg vm_object_page_remove(object, 2689507Sdg round_page((vm_offset_t) nsize), object->un_pager.vnp.vnp_size, FALSE); 2695455Sdg } 2701827Sdg /* 2711827Sdg * this gets rid of garbage at the end of a page that is now 2721827Sdg * only partially backed by the vnode... 2731827Sdg */ 2741827Sdg if (nsize & PAGE_MASK) { 2751827Sdg vm_offset_t kva; 2761827Sdg vm_page_t m; 2771827Sdg 2781827Sdg m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 2791827Sdg if (m) { 2801827Sdg kva = vm_pager_map_page(m); 2811827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 2825455Sdg round_page(nsize) - nsize); 2831827Sdg vm_pager_unmap_page(kva); 2841827Sdg } 2851827Sdg } 2861541Srgrimes } 2879507Sdg object->un_pager.vnp.vnp_size = (vm_offset_t) nsize; 2881827Sdg object->size = round_page(nsize); 2891541Srgrimes} 2901541Srgrimes 2911541Srgrimesvoid 2921541Srgrimesvnode_pager_umount(mp) 2931541Srgrimes register struct mount *mp; 2941541Srgrimes{ 2959507Sdg struct vnode *vp, *nvp; 2961541Srgrimes 2979507Sdgloop: 2989507Sdg for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 2991541Srgrimes /* 3009507Sdg * Vnode can be reclaimed by getnewvnode() while we 3019507Sdg * traverse the list. 3029507Sdg */ 3039507Sdg if (vp->v_mount != mp) 3049507Sdg goto loop; 3059507Sdg 3069507Sdg /* 3071827Sdg * Save the next pointer now since uncaching may terminate the 3089507Sdg * object and render vnode invalid 3091541Srgrimes */ 3109507Sdg nvp = vp->v_mntvnodes.le_next; 3119507Sdg 3129507Sdg if (vp->v_object != NULL) { 3137162Sdg VOP_LOCK(vp); 3149507Sdg vnode_pager_uncache(vp); 3157162Sdg VOP_UNLOCK(vp); 3167162Sdg } 3171541Srgrimes } 3181541Srgrimes} 3191541Srgrimes 3201541Srgrimes/* 3211541Srgrimes * Remove vnode associated object from the object cache. 3227162Sdg * This routine must be called with the vnode locked. 3231541Srgrimes * 3247162Sdg * XXX unlock the vnode. 3257162Sdg * We must do this since uncaching the object may result in its 3267162Sdg * destruction which may initiate paging activity which may necessitate 3277162Sdg * re-locking the vnode. 3281549Srgrimes */ 3299507Sdgvoid 3301549Srgrimesvnode_pager_uncache(vp) 3319507Sdg struct vnode *vp; 3321549Srgrimes{ 3339507Sdg vm_object_t object; 3341549Srgrimes 3351549Srgrimes /* 3361549Srgrimes * Not a mapped vnode 3371549Srgrimes */ 3389356Sdg object = vp->v_object; 3395455Sdg if (object == NULL) 3409507Sdg return; 3415455Sdg 3429507Sdg vm_object_reference(object); 3439507Sdg VOP_UNLOCK(vp); 3449507Sdg pager_cache(object, FALSE); 3459507Sdg VOP_LOCK(vp); 3469507Sdg return; 3471549Srgrimes} 3481541Srgrimes 3491541Srgrimes 3501549Srgrimesvoid 3511549Srgrimesvnode_pager_freepage(m) 3521549Srgrimes vm_page_t m; 3531541Srgrimes{ 3541549Srgrimes PAGE_WAKEUP(m); 3551549Srgrimes vm_page_free(m); 3561549Srgrimes} 3571549Srgrimes 3581549Srgrimes/* 3591549Srgrimes * calculate the linear (byte) disk address of specified virtual 3601549Srgrimes * file address 3611549Srgrimes */ 3621549Srgrimesvm_offset_t 3636151Sdgvnode_pager_addr(vp, address, run) 3641549Srgrimes struct vnode *vp; 3651549Srgrimes vm_offset_t address; 3666151Sdg int *run; 3671549Srgrimes{ 3685455Sdg int rtaddress; 3695455Sdg int bsize; 3701549Srgrimes vm_offset_t block; 3711549Srgrimes struct vnode *rtvp; 3725455Sdg int err; 3735455Sdg int vblock, voffset; 3741549Srgrimes 3755455Sdg if ((int) address < 0) 3765455Sdg return -1; 3775455Sdg 3781549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 3791549Srgrimes vblock = address / bsize; 3801549Srgrimes voffset = address % bsize; 3811549Srgrimes 3826151Sdg err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 3831549Srgrimes 3846151Sdg if (err || (block == -1)) 3851549Srgrimes rtaddress = -1; 3866151Sdg else { 3876626Sdg rtaddress = block + voffset / DEV_BSIZE; 3886151Sdg if( run) { 3896151Sdg *run += 1; 3906151Sdg *run *= bsize/PAGE_SIZE; 3916151Sdg *run -= voffset/PAGE_SIZE; 3926151Sdg } 3936151Sdg } 3941549Srgrimes 3951549Srgrimes return rtaddress; 3961549Srgrimes} 3971549Srgrimes 3981549Srgrimes/* 3991549Srgrimes * interrupt routine for I/O completion 4001549Srgrimes */ 4011549Srgrimesvoid 4021549Srgrimesvnode_pager_iodone(bp) 4031549Srgrimes struct buf *bp; 4041549Srgrimes{ 4051549Srgrimes bp->b_flags |= B_DONE; 4069507Sdg wakeup(bp); 4071549Srgrimes} 4081549Srgrimes 4091549Srgrimes/* 4101549Srgrimes * small block file system vnode pager input 4111549Srgrimes */ 4121549Srgrimesint 4139507Sdgvnode_pager_input_smlfs(object, m) 4149507Sdg vm_object_t object; 4151549Srgrimes vm_page_t m; 4161549Srgrimes{ 4175455Sdg int i; 4185455Sdg int s; 4191549Srgrimes struct vnode *dp, *vp; 4201549Srgrimes struct buf *bp; 4211549Srgrimes vm_offset_t kva; 4225455Sdg int fileaddr; 4231549Srgrimes vm_offset_t bsize; 4245455Sdg int error = 0; 4251549Srgrimes 4269507Sdg vp = object->handle; 4271549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 4281549Srgrimes 4297178Sdg 4305455Sdg VOP_BMAP(vp, 0, &dp, 0, 0); 4311549Srgrimes 4321549Srgrimes kva = vm_pager_map_page(m); 4331549Srgrimes 4341827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 4351827Sdg 4365455Sdg if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 4375455Sdg continue; 4381549Srgrimes 4396151Sdg fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 4401827Sdg if (fileaddr != -1) { 4411549Srgrimes bp = getpbuf(); 4421549Srgrimes 4431827Sdg /* build a minimal buffer header */ 4441549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 4451549Srgrimes bp->b_iodone = vnode_pager_iodone; 4461549Srgrimes bp->b_proc = curproc; 4471549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 4481827Sdg if (bp->b_rcred != NOCRED) 4491549Srgrimes crhold(bp->b_rcred); 4501827Sdg if (bp->b_wcred != NOCRED) 4511549Srgrimes crhold(bp->b_wcred); 4521549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 4536626Sdg bp->b_blkno = fileaddr; 4545455Sdg pbgetvp(dp, bp); 4551549Srgrimes bp->b_bcount = bsize; 4561549Srgrimes bp->b_bufsize = bsize; 4571827Sdg 4581827Sdg /* do the input */ 4591549Srgrimes VOP_STRATEGY(bp); 4601549Srgrimes 4611827Sdg /* we definitely need to be at splbio here */ 4621549Srgrimes 4631549Srgrimes s = splbio(); 4641549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 4659356Sdg tsleep(bp, PVM, "vnsrd", 0); 4661549Srgrimes } 4671549Srgrimes splx(s); 4681549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 4691549Srgrimes error = EIO; 4701549Srgrimes 4711827Sdg /* 4721827Sdg * free the buffer header back to the swap buffer pool 4731827Sdg */ 4741549Srgrimes relpbuf(bp); 4751827Sdg if (error) 4761549Srgrimes break; 4775455Sdg 4787695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 4797695Sdg vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize); 4801549Srgrimes } else { 4817695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 4821549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 4831549Srgrimes } 4841549Srgrimes } 4851549Srgrimes vm_pager_unmap_page(kva); 4865455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 4871827Sdg if (error) { 4884207Sdg return VM_PAGER_ERROR; 4891549Srgrimes } 4901549Srgrimes return VM_PAGER_OK; 4911549Srgrimes 4921549Srgrimes} 4931549Srgrimes 4941549Srgrimes 4951549Srgrimes/* 4961549Srgrimes * old style vnode pager output routine 4971549Srgrimes */ 4981549Srgrimesint 4999507Sdgvnode_pager_input_old(object, m) 5009507Sdg vm_object_t object; 5011549Srgrimes vm_page_t m; 5021549Srgrimes{ 5031541Srgrimes struct uio auio; 5041541Srgrimes struct iovec aiov; 5055455Sdg int error; 5065455Sdg int size; 5071549Srgrimes vm_offset_t kva; 5081549Srgrimes 5091549Srgrimes error = 0; 5101827Sdg 5111549Srgrimes /* 5121549Srgrimes * Return failure if beyond current EOF 5131549Srgrimes */ 5149507Sdg if (m->offset >= object->un_pager.vnp.vnp_size) { 5151549Srgrimes return VM_PAGER_BAD; 5161549Srgrimes } else { 5171549Srgrimes size = PAGE_SIZE; 5189507Sdg if (m->offset + size > object->un_pager.vnp.vnp_size) 5199507Sdg size = object->un_pager.vnp.vnp_size - m->offset; 5207178Sdg 5215455Sdg /* 5225455Sdg * Allocate a kernel virtual address and initialize so that 5235455Sdg * we can use VOP_READ/WRITE routines. 5245455Sdg */ 5251549Srgrimes kva = vm_pager_map_page(m); 5267178Sdg 5271827Sdg aiov.iov_base = (caddr_t) kva; 5281549Srgrimes aiov.iov_len = size; 5291549Srgrimes auio.uio_iov = &aiov; 5301549Srgrimes auio.uio_iovcnt = 1; 5315455Sdg auio.uio_offset = m->offset; 5321549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 5331549Srgrimes auio.uio_rw = UIO_READ; 5341549Srgrimes auio.uio_resid = size; 5351827Sdg auio.uio_procp = (struct proc *) 0; 5361549Srgrimes 5379507Sdg error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred); 5381549Srgrimes if (!error) { 5391549Srgrimes register int count = size - auio.uio_resid; 5401549Srgrimes 5411549Srgrimes if (count == 0) 5421549Srgrimes error = EINVAL; 5431549Srgrimes else if (count != PAGE_SIZE) 5441827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 5451549Srgrimes } 5461549Srgrimes vm_pager_unmap_page(kva); 5471549Srgrimes } 5481549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 5495455Sdg m->dirty = 0; 5504207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 5511549Srgrimes} 5521549Srgrimes 5531549Srgrimes/* 5541549Srgrimes * generic vnode pager input routine 5551549Srgrimes */ 5561549Srgrimesint 5579507Sdgvnode_pager_getpages(object, m, count, reqpage) 5589507Sdg vm_object_t object; 5591549Srgrimes vm_page_t *m; 5609507Sdg int count; 5619507Sdg int reqpage; 5621549Srgrimes{ 5631541Srgrimes vm_offset_t kva, foff; 5649507Sdg int i, size, bsize, first, firstaddr; 5651549Srgrimes struct vnode *dp, *vp; 5666151Sdg int runpg; 5676151Sdg int runend; 5687178Sdg struct buf *bp; 5695455Sdg int s; 5705455Sdg int error = 0; 5711549Srgrimes 5729507Sdg vp = object->handle; 5731549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5741549Srgrimes 5751549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 5761827Sdg 5771549Srgrimes /* 5781827Sdg * originally, we did not check for an error return value -- assuming 5791827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 5801549Srgrimes */ 5815455Sdg foff = m[reqpage]->offset; 5821827Sdg 5831549Srgrimes /* 5841887Sdg * if we can't bmap, use old VOP code 5851549Srgrimes */ 5865455Sdg if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 5871549Srgrimes for (i = 0; i < count; i++) { 5881549Srgrimes if (i != reqpage) { 5891549Srgrimes vnode_pager_freepage(m[i]); 5901549Srgrimes } 5911549Srgrimes } 5923612Sdg cnt.v_vnodein++; 5933612Sdg cnt.v_vnodepgsin++; 5949507Sdg return vnode_pager_input_old(object, m[reqpage]); 5951549Srgrimes 5961827Sdg /* 5971827Sdg * if the blocksize is smaller than a page size, then use 5981827Sdg * special small filesystem code. NFS sometimes has a small 5991827Sdg * blocksize, but it can handle large reads itself. 6001827Sdg */ 6011827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 6025455Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 6031827Sdg 6041549Srgrimes for (i = 0; i < count; i++) { 6051549Srgrimes if (i != reqpage) { 6061549Srgrimes vnode_pager_freepage(m[i]); 6071549Srgrimes } 6081549Srgrimes } 6093612Sdg cnt.v_vnodein++; 6103612Sdg cnt.v_vnodepgsin++; 6119507Sdg return vnode_pager_input_smlfs(object, m[reqpage]); 6121549Srgrimes } 6131549Srgrimes /* 6145455Sdg * if ANY DEV_BSIZE blocks are valid on a large filesystem block 6155455Sdg * then, the entire page is valid -- 6161549Srgrimes */ 6175455Sdg if (m[reqpage]->valid) { 6185455Sdg m[reqpage]->valid = VM_PAGE_BITS_ALL; 6195455Sdg for (i = 0; i < count; i++) { 6205455Sdg if (i != reqpage) 6215455Sdg vnode_pager_freepage(m[i]); 6221549Srgrimes } 6235455Sdg return VM_PAGER_OK; 6241549Srgrimes } 6257178Sdg 6265455Sdg /* 6275455Sdg * here on direct device I/O 6285455Sdg */ 6291549Srgrimes 6306151Sdg firstaddr = -1; 6311549Srgrimes /* 6326151Sdg * calculate the run that includes the required page 6331549Srgrimes */ 6346151Sdg for(first = 0, i = 0; i < count; i = runend) { 6356151Sdg firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 6366151Sdg if (firstaddr == -1) { 6379507Sdg if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { 6389507Sdg panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d", 6399507Sdg firstaddr, foff, object->un_pager.vnp.vnp_size); 6406151Sdg } 6411549Srgrimes vnode_pager_freepage(m[i]); 6426151Sdg runend = i + 1; 6436151Sdg first = runend; 6446151Sdg continue; 6451549Srgrimes } 6466151Sdg runend = i + runpg; 6479507Sdg if (runend <= reqpage) { 6486151Sdg int j; 6499507Sdg for (j = i; j < runend; j++) { 6506151Sdg vnode_pager_freepage(m[j]); 6516151Sdg } 6521549Srgrimes } else { 6539507Sdg if (runpg < (count - first)) { 6549507Sdg for (i = first + runpg; i < count; i++) 6556151Sdg vnode_pager_freepage(m[i]); 6566151Sdg count = first + runpg; 6576151Sdg } 6586151Sdg break; 6591549Srgrimes } 6606151Sdg first = runend; 6611549Srgrimes } 6621549Srgrimes 6631549Srgrimes /* 6641827Sdg * the first and last page have been calculated now, move input pages 6651827Sdg * to be zero based... 6661549Srgrimes */ 6671549Srgrimes if (first != 0) { 6681549Srgrimes for (i = first; i < count; i++) { 6691549Srgrimes m[i - first] = m[i]; 6701549Srgrimes } 6711549Srgrimes count -= first; 6721549Srgrimes reqpage -= first; 6731549Srgrimes } 6746151Sdg 6751549Srgrimes /* 6761549Srgrimes * calculate the file virtual address for the transfer 6771549Srgrimes */ 6785455Sdg foff = m[0]->offset; 6791827Sdg 6801549Srgrimes /* 6811549Srgrimes * calculate the size of the transfer 6821549Srgrimes */ 6831549Srgrimes size = count * PAGE_SIZE; 6849507Sdg if ((foff + size) > object->un_pager.vnp.vnp_size) 6859507Sdg size = object->un_pager.vnp.vnp_size - foff; 6861549Srgrimes 6871549Srgrimes /* 6881549Srgrimes * round up physical size for real devices 6891549Srgrimes */ 6901827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 6911549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 6921549Srgrimes 6935841Sdg bp = getpbuf(); 6945455Sdg kva = (vm_offset_t) bp->b_data; 6951887Sdg 6961549Srgrimes /* 6971549Srgrimes * and map the pages to be read into the kva 6981549Srgrimes */ 6991887Sdg pmap_qenter(kva, m, count); 7001549Srgrimes 7011549Srgrimes /* build a minimal buffer header */ 7021549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 7031549Srgrimes bp->b_iodone = vnode_pager_iodone; 7041549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 7051549Srgrimes bp->b_proc = curproc; 7061549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 7071827Sdg if (bp->b_rcred != NOCRED) 7081549Srgrimes crhold(bp->b_rcred); 7091827Sdg if (bp->b_wcred != NOCRED) 7101549Srgrimes crhold(bp->b_wcred); 7116626Sdg bp->b_blkno = firstaddr; 7125455Sdg pbgetvp(dp, bp); 7131549Srgrimes bp->b_bcount = size; 7141549Srgrimes bp->b_bufsize = size; 7151549Srgrimes 7163612Sdg cnt.v_vnodein++; 7173612Sdg cnt.v_vnodepgsin += count; 7183612Sdg 7191549Srgrimes /* do the input */ 7201549Srgrimes VOP_STRATEGY(bp); 7213612Sdg 7221549Srgrimes s = splbio(); 7231549Srgrimes /* we definitely need to be at splbio here */ 7241549Srgrimes 7251549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 7269356Sdg tsleep(bp, PVM, "vnread", 0); 7271549Srgrimes } 7281549Srgrimes splx(s); 7291549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 7301549Srgrimes error = EIO; 7311549Srgrimes 7321549Srgrimes if (!error) { 7331549Srgrimes if (size != count * PAGE_SIZE) 7341827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 7351549Srgrimes } 7365455Sdg pmap_qremove(kva, count); 7371549Srgrimes 7381549Srgrimes /* 7391549Srgrimes * free the buffer header back to the swap buffer pool 7401549Srgrimes */ 7411549Srgrimes relpbuf(bp); 7421549Srgrimes 7431549Srgrimes for (i = 0; i < count; i++) { 7442386Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 7455455Sdg m[i]->dirty = 0; 7465455Sdg m[i]->valid = VM_PAGE_BITS_ALL; 7471549Srgrimes if (i != reqpage) { 7481827Sdg 7491549Srgrimes /* 7501827Sdg * whether or not to leave the page activated is up in 7511827Sdg * the air, but we should put the page on a page queue 7521827Sdg * somewhere. (it already is in the object). Result: 7531827Sdg * It appears that emperical results show that 7541827Sdg * deactivating pages is best. 7551549Srgrimes */ 7561827Sdg 7571549Srgrimes /* 7581827Sdg * just in case someone was asking for this page we 7591827Sdg * now tell them that it is ok to use 7601549Srgrimes */ 7611549Srgrimes if (!error) { 7625841Sdg vm_page_deactivate(m[i]); 7631549Srgrimes PAGE_WAKEUP(m[i]); 7641549Srgrimes } else { 7651549Srgrimes vnode_pager_freepage(m[i]); 7661549Srgrimes } 7671549Srgrimes } 7681549Srgrimes } 7691549Srgrimes if (error) { 7709507Sdg printf("vnode_pager_getpages: I/O read error\n"); 7711549Srgrimes } 7724207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 7731549Srgrimes} 7741549Srgrimes 7751549Srgrimes/* 7761549Srgrimes * generic vnode pager output routine 7771549Srgrimes */ 7781549Srgrimesint 7799507Sdgvnode_pager_putpages(object, m, count, sync, rtvals) 7809507Sdg vm_object_t object; 7811549Srgrimes vm_page_t *m; 7825455Sdg int count; 7839507Sdg boolean_t sync; 7845455Sdg int *rtvals; 7851549Srgrimes{ 7867695Sdg int i; 7871549Srgrimes 7887695Sdg struct vnode *vp; 7897695Sdg int maxsize, ncount; 7907695Sdg struct uio auio; 7917695Sdg struct iovec aiov; 7927695Sdg int error; 7931549Srgrimes 7949507Sdg vp = object->handle;; 7951827Sdg for (i = 0; i < count; i++) 7961549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 7971549Srgrimes 7985455Sdg if ((int) m[0]->offset < 0) { 7999507Sdg printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty); 8007695Sdg rtvals[0] = VM_PAGER_BAD; 8017695Sdg return VM_PAGER_BAD; 8025455Sdg } 8037178Sdg 8047695Sdg maxsize = count * PAGE_SIZE; 8057695Sdg ncount = count; 8061549Srgrimes 8079507Sdg if (maxsize + m[0]->offset > object->un_pager.vnp.vnp_size) { 8089507Sdg if (object->un_pager.vnp.vnp_size > m[0]->offset) 8099507Sdg maxsize = object->un_pager.vnp.vnp_size - m[0]->offset; 8108585Sdg else 8118585Sdg maxsize = 0; 8127695Sdg ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE; 8138585Sdg if (ncount < count) { 8148585Sdg for (i = ncount; i < count; i++) { 8157695Sdg rtvals[i] = VM_PAGER_BAD; 8161549Srgrimes } 8178585Sdg if (ncount == 0) { 8189507Sdg printf("vnode_pager_putpages: write past end of file: %d, %d\n", 8199507Sdg m[0]->offset, object->un_pager.vnp.vnp_size); 8207695Sdg return rtvals[0]; 8217695Sdg } 8221549Srgrimes } 8231541Srgrimes } 8247695Sdg 8258585Sdg for (i = 0; i < count; i++) { 8268585Sdg m[i]->busy++; 8277695Sdg m[i]->flags &= ~PG_BUSY; 8281549Srgrimes } 8291827Sdg 8307695Sdg aiov.iov_base = (caddr_t) 0; 8317695Sdg aiov.iov_len = maxsize; 8327695Sdg auio.uio_iov = &aiov; 8337695Sdg auio.uio_iovcnt = 1; 8347695Sdg auio.uio_offset = m[0]->offset; 8357695Sdg auio.uio_segflg = UIO_NOCOPY; 8367695Sdg auio.uio_rw = UIO_WRITE; 8377695Sdg auio.uio_resid = maxsize; 8387695Sdg auio.uio_procp = (struct proc *) 0; 8397695Sdg error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred); 8403612Sdg cnt.v_vnodeout++; 8417695Sdg cnt.v_vnodepgsout += ncount; 8423612Sdg 8438585Sdg if (error) { 8449507Sdg printf("vnode_pager_putpages: I/O error %d\n", error); 8457695Sdg } 8468585Sdg if (auio.uio_resid) { 8479507Sdg printf("vnode_pager_putpages: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset); 8487695Sdg } 8498585Sdg for (i = 0; i < count; i++) { 8508585Sdg m[i]->busy--; 8518585Sdg if (i < ncount) { 8527695Sdg rtvals[i] = VM_PAGER_OK; 8537695Sdg } 8548585Sdg if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) 8559507Sdg wakeup(m[i]); 8567695Sdg } 8577695Sdg return rtvals[0]; 8587695Sdg} 8591549Srgrimes 8607695Sdgstruct vnode * 8619507Sdgvnode_pager_lock(object) 8629507Sdg vm_object_t object; 8639507Sdg{ 8649507Sdg for (; object != NULL; object = object->backing_object) { 8659507Sdg if (object->type != OBJT_VNODE) 8667695Sdg continue; 8671549Srgrimes 8689507Sdg VOP_LOCK(object->handle); 8699507Sdg return object->handle; 8701549Srgrimes } 8719507Sdg return NULL; 8727695Sdg} 873