vnode_pager.c revision 12767
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 59507Sdg * Copyright (c) 1993, 1994 John S. Dyson 69507Sdg * Copyright (c) 1995, David Greenman 71541Srgrimes * 81541Srgrimes * This code is derived from software contributed to Berkeley by 91541Srgrimes * the Systems Programming Group of the University of Utah Computer 101541Srgrimes * Science Department. 111541Srgrimes * 121541Srgrimes * Redistribution and use in source and binary forms, with or without 131541Srgrimes * modification, are permitted provided that the following conditions 141541Srgrimes * are met: 151541Srgrimes * 1. Redistributions of source code must retain the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer. 171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 181541Srgrimes * notice, this list of conditions and the following disclaimer in the 191541Srgrimes * documentation and/or other materials provided with the distribution. 201541Srgrimes * 3. All advertising materials mentioning features or use of this software 211541Srgrimes * must display the following acknowledgement: 221541Srgrimes * This product includes software developed by the University of 231541Srgrimes * California, Berkeley and its contributors. 241541Srgrimes * 4. Neither the name of the University nor the names of its contributors 251541Srgrimes * may be used to endorse or promote products derived from this software 261541Srgrimes * without specific prior written permission. 271541Srgrimes * 281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 311541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 381541Srgrimes * SUCH DAMAGE. 391541Srgrimes * 401549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 4112767Sdyson * $Id: vnode_pager.c,v 1.54 1995/12/07 12:48:31 davidg Exp $ 421541Srgrimes */ 431541Srgrimes 441541Srgrimes/* 451541Srgrimes * Page to/from files (vnodes). 461541Srgrimes */ 471541Srgrimes 481549Srgrimes/* 491549Srgrimes * TODO: 509507Sdg * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will 517695Sdg * greatly re-simplify the vnode_pager. 521549Srgrimes */ 531549Srgrimes 541541Srgrimes#include <sys/param.h> 551541Srgrimes#include <sys/systm.h> 565455Sdg#include <sys/kernel.h> 571541Srgrimes#include <sys/proc.h> 581541Srgrimes#include <sys/malloc.h> 591541Srgrimes#include <sys/vnode.h> 601541Srgrimes#include <sys/uio.h> 611541Srgrimes#include <sys/mount.h> 629507Sdg#include <sys/buf.h> 6312662Sdg#include <sys/vmmeter.h> 641541Srgrimes 651541Srgrimes#include <vm/vm.h> 6612662Sdg#include <vm/vm_param.h> 6712662Sdg#include <vm/vm_prot.h> 6812662Sdg#include <vm/vm_object.h> 691541Srgrimes#include <vm/vm_page.h> 709507Sdg#include <vm/vm_pager.h> 711541Srgrimes#include <vm/vnode_pager.h> 7212662Sdg#include <vm/vm_extern.h> 731541Srgrimes 7412767Sdysonextern vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address, 7511943Sbde int *run)); 7611943Sbdeextern void vnode_pager_iodone __P((struct buf *bp)); 7711943Sbdeextern int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m)); 7811943Sbdeextern int vnode_pager_input_old __P((vm_object_t object, vm_page_t m)); 7911943Sbde 801541Srgrimesstruct pagerops vnodepagerops = { 819507Sdg NULL, 821541Srgrimes vnode_pager_alloc, 831541Srgrimes vnode_pager_dealloc, 849507Sdg vnode_pager_getpages, 859507Sdg vnode_pager_putpages, 869507Sdg vnode_pager_haspage, 879507Sdg NULL 881541Srgrimes}; 891541Srgrimes 9011943Sbdestatic int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m, 9111943Sbde int count, int reqpage)); 9211943Sbdestatic int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m, 9311943Sbde int count, boolean_t sync, 9411943Sbde int *rtvals)); 9510556Sdyson 961541Srgrimes/* 971541Srgrimes * Allocate (or lookup) pager for a vnode. 981541Srgrimes * Handle is a vnode pointer. 991541Srgrimes */ 1009507Sdgvm_object_t 1011549Srgrimesvnode_pager_alloc(handle, size, prot, offset) 1028416Sdg void *handle; 1031541Srgrimes vm_size_t size; 1041541Srgrimes vm_prot_t prot; 10512767Sdyson vm_ooffset_t offset; 1061541Srgrimes{ 1079456Sdg vm_object_t object; 1081541Srgrimes struct vnode *vp; 1091541Srgrimes 1101541Srgrimes /* 1111541Srgrimes * Pageout to vnode, no can do yet. 1121541Srgrimes */ 1131541Srgrimes if (handle == NULL) 1141827Sdg return (NULL); 1151541Srgrimes 1169411Sdg vp = (struct vnode *) handle; 1179411Sdg 1181541Srgrimes /* 1199411Sdg * Prevent race condition when allocating the object. This 1209411Sdg * can happen with NFS vnodes since the nfsnode isn't locked. 1211541Srgrimes */ 1229411Sdg while (vp->v_flag & VOLOCK) { 1239411Sdg vp->v_flag |= VOWANT; 1249411Sdg tsleep(vp, PVM, "vnpobj", 0); 1259411Sdg } 1269411Sdg vp->v_flag |= VOLOCK; 1279411Sdg 1289411Sdg /* 1299411Sdg * If the object is being terminated, wait for it to 1309411Sdg * go away. 1319411Sdg */ 1329507Sdg while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) { 1339356Sdg tsleep(object, PVM, "vadead", 0); 1349507Sdg } 1355455Sdg 1369507Sdg if (object == NULL) { 1371541Srgrimes /* 1381541Srgrimes * And an object of the appropriate size 1391541Srgrimes */ 14012767Sdyson object = vm_object_allocate(OBJT_VNODE, size); 1419456Sdg object->flags = OBJ_CANPERSIST; 1421827Sdg 1431541Srgrimes /* 1449507Sdg * Hold a reference to the vnode and initialize object data. 1451541Srgrimes */ 1461541Srgrimes VREF(vp); 14712767Sdyson object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE; 1481549Srgrimes 1499507Sdg object->handle = handle; 1509507Sdg vp->v_object = object; 1511541Srgrimes } else { 1521541Srgrimes /* 1539507Sdg * vm_object_reference() will remove the object from the cache if 1549507Sdg * found and gain a reference to the object. 1551541Srgrimes */ 1569507Sdg vm_object_reference(object); 1571541Srgrimes } 1589411Sdg 1599411Sdg if (vp->v_type == VREG) 1607695Sdg vp->v_flag |= VVMIO; 1619411Sdg 1629411Sdg vp->v_flag &= ~VOLOCK; 1639411Sdg if (vp->v_flag & VOWANT) { 1649411Sdg vp->v_flag &= ~VOWANT; 1659411Sdg wakeup(vp); 1669411Sdg } 1679507Sdg return (object); 1681541Srgrimes} 1691541Srgrimes 1701549Srgrimesvoid 1719507Sdgvnode_pager_dealloc(object) 1729507Sdg vm_object_t object; 1731541Srgrimes{ 1749507Sdg register struct vnode *vp = object->handle; 1751541Srgrimes 1769507Sdg if (vp == NULL) 1779507Sdg panic("vnode_pager_dealloc: pager already dealloced"); 1789507Sdg 1799507Sdg if (object->paging_in_progress) { 1805455Sdg int s = splbio(); 1819507Sdg while (object->paging_in_progress) { 1829507Sdg object->flags |= OBJ_PIPWNT; 1839507Sdg tsleep(object, PVM, "vnpdea", 0); 1845455Sdg } 1855455Sdg splx(s); 1861541Srgrimes } 1871541Srgrimes 1889507Sdg object->handle = NULL; 1891827Sdg 1909507Sdg vp->v_object = NULL; 1919507Sdg vp->v_flag &= ~(VTEXT | VVMIO); 1929507Sdg vp->v_flag |= VAGE; 1939507Sdg vrele(vp); 1941549Srgrimes} 1951541Srgrimes 1961549Srgrimesboolean_t 19712767Sdysonvnode_pager_haspage(object, pindex, before, after) 1989507Sdg vm_object_t object; 19912767Sdyson vm_pindex_t pindex; 2009507Sdg int *before; 2019507Sdg int *after; 2021541Srgrimes{ 2039507Sdg struct vnode *vp = object->handle; 2041541Srgrimes daddr_t bn; 20512423Sphk int err; 20610556Sdyson daddr_t reqblock; 20711701Sdyson int poff; 20811701Sdyson int bsize; 20911701Sdyson int pagesperblock; 2101541Srgrimes 2111541Srgrimes /* 2125455Sdg * If filesystem no longer mounted or offset beyond end of file we do 2135455Sdg * not have the page. 2141541Srgrimes */ 21512767Sdyson if ((vp->v_mount == NULL) || 21612767Sdyson (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)) 2174797Sdg return FALSE; 2181541Srgrimes 21911576Sdg bsize = vp->v_mount->mnt_stat.f_iosize; 22010556Sdyson pagesperblock = bsize / PAGE_SIZE; 22112767Sdyson reqblock = pindex / pagesperblock; 22210556Sdyson err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, 22310556Sdyson after, before); 2248876Srgrimes if (err) 2259507Sdg return TRUE; 22610702Sdyson if ( bn == -1) 22710576Sdyson return FALSE; 22812767Sdyson poff = pindex - (reqblock * pagesperblock); 22910556Sdyson if (before) { 23010556Sdyson *before *= pagesperblock; 23110556Sdyson *before += poff; 23210556Sdyson } 23310556Sdyson if (after) { 23410669Sdyson int numafter; 23510556Sdyson *after *= pagesperblock; 23610669Sdyson numafter = pagesperblock - (poff + 1); 23712767Sdyson if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) { 23812767Sdyson numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex))); 23910669Sdyson } 24010669Sdyson *after += numafter; 24110556Sdyson } 24210576Sdyson return TRUE; 2431541Srgrimes} 2441541Srgrimes 2451541Srgrimes/* 2461541Srgrimes * Lets the VM system know about a change in size for a file. 2479507Sdg * We adjust our own internal size and flush any cached pages in 2481541Srgrimes * the associated object that are affected by the size change. 2491541Srgrimes * 2501541Srgrimes * Note: this routine may be invoked as a result of a pager put 2511541Srgrimes * operation (possibly at object termination time), so we must be careful. 2521541Srgrimes */ 2531541Srgrimesvoid 2541541Srgrimesvnode_pager_setsize(vp, nsize) 2551541Srgrimes struct vnode *vp; 25612767Sdyson vm_ooffset_t nsize; 2571541Srgrimes{ 2589507Sdg vm_object_t object = vp->v_object; 2591541Srgrimes 2609507Sdg if (object == NULL) 2611541Srgrimes return; 2621827Sdg 2631541Srgrimes /* 2641541Srgrimes * Hasn't changed size 2651541Srgrimes */ 2669507Sdg if (nsize == object->un_pager.vnp.vnp_size) 2673374Sdg return; 2681827Sdg 2691541Srgrimes /* 2701827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 2711541Srgrimes */ 2729507Sdg if (nsize < object->un_pager.vnp.vnp_size) { 27312767Sdyson vm_ooffset_t nsizerounded; 27412767Sdyson nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_SIZE - 1)); 27512767Sdyson if (nsizerounded < object->un_pager.vnp.vnp_size) { 2765455Sdg vm_object_page_remove(object, 27712767Sdyson OFF_TO_IDX(nsize + PAGE_SIZE - 1), 27812767Sdyson OFF_TO_IDX(object->un_pager.vnp.vnp_size), 27912767Sdyson FALSE); 2805455Sdg } 2811827Sdg /* 2821827Sdg * this gets rid of garbage at the end of a page that is now 2831827Sdg * only partially backed by the vnode... 2841827Sdg */ 2851827Sdg if (nsize & PAGE_MASK) { 2861827Sdg vm_offset_t kva; 2871827Sdg vm_page_t m; 2881827Sdg 28912767Sdyson m = vm_page_lookup(object, OFF_TO_IDX(nsize)); 2901827Sdg if (m) { 2911827Sdg kva = vm_pager_map_page(m); 2921827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 29312767Sdyson (int) (round_page(nsize) - nsize)); 2941827Sdg vm_pager_unmap_page(kva); 2951827Sdg } 2961827Sdg } 2971541Srgrimes } 29812767Sdyson object->un_pager.vnp.vnp_size = nsize; 29912767Sdyson object->size = OFF_TO_IDX(nsize + PAGE_SIZE - 1); 3001541Srgrimes} 3011541Srgrimes 3021541Srgrimesvoid 3031541Srgrimesvnode_pager_umount(mp) 3041541Srgrimes register struct mount *mp; 3051541Srgrimes{ 3069507Sdg struct vnode *vp, *nvp; 3071541Srgrimes 3089507Sdgloop: 3099507Sdg for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { 3101541Srgrimes /* 3119507Sdg * Vnode can be reclaimed by getnewvnode() while we 3129507Sdg * traverse the list. 3139507Sdg */ 3149507Sdg if (vp->v_mount != mp) 3159507Sdg goto loop; 3169507Sdg 3179507Sdg /* 3181827Sdg * Save the next pointer now since uncaching may terminate the 3199507Sdg * object and render vnode invalid 3201541Srgrimes */ 3219507Sdg nvp = vp->v_mntvnodes.le_next; 3229507Sdg 3239507Sdg if (vp->v_object != NULL) { 3247162Sdg VOP_LOCK(vp); 3259507Sdg vnode_pager_uncache(vp); 3267162Sdg VOP_UNLOCK(vp); 3277162Sdg } 3281541Srgrimes } 3291541Srgrimes} 3301541Srgrimes 3311541Srgrimes/* 3321541Srgrimes * Remove vnode associated object from the object cache. 3337162Sdg * This routine must be called with the vnode locked. 3341541Srgrimes * 3357162Sdg * XXX unlock the vnode. 3367162Sdg * We must do this since uncaching the object may result in its 3377162Sdg * destruction which may initiate paging activity which may necessitate 3387162Sdg * re-locking the vnode. 3391549Srgrimes */ 3409507Sdgvoid 3411549Srgrimesvnode_pager_uncache(vp) 3429507Sdg struct vnode *vp; 3431549Srgrimes{ 3449507Sdg vm_object_t object; 3451549Srgrimes 3461549Srgrimes /* 3471549Srgrimes * Not a mapped vnode 3481549Srgrimes */ 3499356Sdg object = vp->v_object; 3505455Sdg if (object == NULL) 3519507Sdg return; 3525455Sdg 3539507Sdg vm_object_reference(object); 3549507Sdg VOP_UNLOCK(vp); 3559507Sdg pager_cache(object, FALSE); 3569507Sdg VOP_LOCK(vp); 3579507Sdg return; 3581549Srgrimes} 3591541Srgrimes 3601541Srgrimes 3611549Srgrimesvoid 3621549Srgrimesvnode_pager_freepage(m) 3631549Srgrimes vm_page_t m; 3641541Srgrimes{ 3651549Srgrimes PAGE_WAKEUP(m); 3661549Srgrimes vm_page_free(m); 3671549Srgrimes} 3681549Srgrimes 3691549Srgrimes/* 3701549Srgrimes * calculate the linear (byte) disk address of specified virtual 3711549Srgrimes * file address 3721549Srgrimes */ 3731549Srgrimesvm_offset_t 3746151Sdgvnode_pager_addr(vp, address, run) 3751549Srgrimes struct vnode *vp; 37612767Sdyson vm_ooffset_t address; 3776151Sdg int *run; 3781549Srgrimes{ 3795455Sdg int rtaddress; 3805455Sdg int bsize; 38112767Sdyson daddr_t block; 3821549Srgrimes struct vnode *rtvp; 3835455Sdg int err; 38412767Sdyson daddr_t vblock; 38512767Sdyson int voffset; 3861549Srgrimes 3875455Sdg if ((int) address < 0) 3885455Sdg return -1; 3895455Sdg 39011701Sdyson if (vp->v_mount == NULL) 39111701Sdyson return -1; 39211701Sdyson 3931549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 3941549Srgrimes vblock = address / bsize; 3951549Srgrimes voffset = address % bsize; 3961549Srgrimes 39710551Sdyson err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL); 3981549Srgrimes 3996151Sdg if (err || (block == -1)) 4001549Srgrimes rtaddress = -1; 4016151Sdg else { 4026626Sdg rtaddress = block + voffset / DEV_BSIZE; 4036151Sdg if( run) { 4046151Sdg *run += 1; 4056151Sdg *run *= bsize/PAGE_SIZE; 4066151Sdg *run -= voffset/PAGE_SIZE; 4076151Sdg } 4086151Sdg } 4091549Srgrimes 4101549Srgrimes return rtaddress; 4111549Srgrimes} 4121549Srgrimes 4131549Srgrimes/* 4141549Srgrimes * interrupt routine for I/O completion 4151549Srgrimes */ 4161549Srgrimesvoid 4171549Srgrimesvnode_pager_iodone(bp) 4181549Srgrimes struct buf *bp; 4191549Srgrimes{ 4201549Srgrimes bp->b_flags |= B_DONE; 4219507Sdg wakeup(bp); 4221549Srgrimes} 4231549Srgrimes 4241549Srgrimes/* 4251549Srgrimes * small block file system vnode pager input 4261549Srgrimes */ 4271549Srgrimesint 4289507Sdgvnode_pager_input_smlfs(object, m) 4299507Sdg vm_object_t object; 4301549Srgrimes vm_page_t m; 4311549Srgrimes{ 4325455Sdg int i; 4335455Sdg int s; 4341549Srgrimes struct vnode *dp, *vp; 4351549Srgrimes struct buf *bp; 4361549Srgrimes vm_offset_t kva; 4375455Sdg int fileaddr; 4381549Srgrimes vm_offset_t bsize; 4395455Sdg int error = 0; 4401549Srgrimes 4419507Sdg vp = object->handle; 44211701Sdyson if (vp->v_mount == NULL) 44311701Sdyson return VM_PAGER_BAD; 44411701Sdyson 4451549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 4461549Srgrimes 4477178Sdg 44810551Sdyson VOP_BMAP(vp, 0, &dp, 0, NULL, NULL); 4491549Srgrimes 4501549Srgrimes kva = vm_pager_map_page(m); 4511549Srgrimes 4521827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 4531827Sdg 45412767Sdyson if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid)) 4555455Sdg continue; 4561549Srgrimes 45712767Sdyson fileaddr = vnode_pager_addr(vp, 45812767Sdyson IDX_TO_OFF(m->pindex) + i * bsize, (int *)0); 4591827Sdg if (fileaddr != -1) { 4601549Srgrimes bp = getpbuf(); 4611549Srgrimes 4621827Sdg /* build a minimal buffer header */ 4631549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 4641549Srgrimes bp->b_iodone = vnode_pager_iodone; 4651549Srgrimes bp->b_proc = curproc; 4661549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 4671827Sdg if (bp->b_rcred != NOCRED) 4681549Srgrimes crhold(bp->b_rcred); 4691827Sdg if (bp->b_wcred != NOCRED) 4701549Srgrimes crhold(bp->b_wcred); 4711549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 4726626Sdg bp->b_blkno = fileaddr; 4735455Sdg pbgetvp(dp, bp); 4741549Srgrimes bp->b_bcount = bsize; 4751549Srgrimes bp->b_bufsize = bsize; 4761827Sdg 4771827Sdg /* do the input */ 4781549Srgrimes VOP_STRATEGY(bp); 4791549Srgrimes 4801827Sdg /* we definitely need to be at splbio here */ 4811549Srgrimes 4821549Srgrimes s = splbio(); 4831549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 4849356Sdg tsleep(bp, PVM, "vnsrd", 0); 4851549Srgrimes } 4861549Srgrimes splx(s); 4871549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 4881549Srgrimes error = EIO; 4891549Srgrimes 4901827Sdg /* 4911827Sdg * free the buffer header back to the swap buffer pool 4921827Sdg */ 4931549Srgrimes relpbuf(bp); 4941827Sdg if (error) 4951549Srgrimes break; 4965455Sdg 49710556Sdyson vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 4981549Srgrimes } else { 49910669Sdyson vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 5001549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 5011549Srgrimes } 5021549Srgrimes } 5031549Srgrimes vm_pager_unmap_page(kva); 5045455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 50510669Sdyson m->flags &= ~PG_ZERO; 5061827Sdg if (error) { 5074207Sdg return VM_PAGER_ERROR; 5081549Srgrimes } 5091549Srgrimes return VM_PAGER_OK; 5101549Srgrimes 5111549Srgrimes} 5121549Srgrimes 5131549Srgrimes 5141549Srgrimes/* 5151549Srgrimes * old style vnode pager output routine 5161549Srgrimes */ 5171549Srgrimesint 5189507Sdgvnode_pager_input_old(object, m) 5199507Sdg vm_object_t object; 5201549Srgrimes vm_page_t m; 5211549Srgrimes{ 5221541Srgrimes struct uio auio; 5231541Srgrimes struct iovec aiov; 5245455Sdg int error; 5255455Sdg int size; 5261549Srgrimes vm_offset_t kva; 5271549Srgrimes 5281549Srgrimes error = 0; 5291827Sdg 5301549Srgrimes /* 5311549Srgrimes * Return failure if beyond current EOF 5321549Srgrimes */ 53312767Sdyson if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { 5341549Srgrimes return VM_PAGER_BAD; 5351549Srgrimes } else { 5361549Srgrimes size = PAGE_SIZE; 53712767Sdyson if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) 53812767Sdyson size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); 5397178Sdg 5405455Sdg /* 5415455Sdg * Allocate a kernel virtual address and initialize so that 5425455Sdg * we can use VOP_READ/WRITE routines. 5435455Sdg */ 5441549Srgrimes kva = vm_pager_map_page(m); 5457178Sdg 5461827Sdg aiov.iov_base = (caddr_t) kva; 5471549Srgrimes aiov.iov_len = size; 5481549Srgrimes auio.uio_iov = &aiov; 5491549Srgrimes auio.uio_iovcnt = 1; 55012767Sdyson auio.uio_offset = IDX_TO_OFF(m->pindex); 5511549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 5521549Srgrimes auio.uio_rw = UIO_READ; 5531549Srgrimes auio.uio_resid = size; 5541827Sdg auio.uio_procp = (struct proc *) 0; 5551549Srgrimes 5569507Sdg error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred); 5571549Srgrimes if (!error) { 5581549Srgrimes register int count = size - auio.uio_resid; 5591549Srgrimes 5601549Srgrimes if (count == 0) 5611549Srgrimes error = EINVAL; 5621549Srgrimes else if (count != PAGE_SIZE) 5631827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 5641549Srgrimes } 5651549Srgrimes vm_pager_unmap_page(kva); 5661549Srgrimes } 5671549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 5685455Sdg m->dirty = 0; 56910669Sdyson m->flags &= ~PG_ZERO; 5704207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 5711549Srgrimes} 5721549Srgrimes 5731549Srgrimes/* 5741549Srgrimes * generic vnode pager input routine 5751549Srgrimes */ 57610556Sdyson 5771549Srgrimesint 5789507Sdgvnode_pager_getpages(object, m, count, reqpage) 5799507Sdg vm_object_t object; 5801549Srgrimes vm_page_t *m; 5819507Sdg int count; 5829507Sdg int reqpage; 5831549Srgrimes{ 58410556Sdyson int rtval; 58510556Sdyson struct vnode *vp; 58610556Sdyson vp = object->handle; 58711701Sdyson rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0); 58810556Sdyson if (rtval == EOPNOTSUPP) 58911943Sbde return vnode_pager_leaf_getpages(object, m, count, reqpage); 59010556Sdyson else 59110556Sdyson return rtval; 59210556Sdyson} 59310556Sdyson 59410556Sdysonstatic int 59510556Sdysonvnode_pager_leaf_getpages(object, m, count, reqpage) 59610556Sdyson vm_object_t object; 59710556Sdyson vm_page_t *m; 59810556Sdyson int count; 59910556Sdyson int reqpage; 60010556Sdyson{ 60112767Sdyson vm_offset_t kva; 60212767Sdyson off_t foff; 6039507Sdg int i, size, bsize, first, firstaddr; 6041549Srgrimes struct vnode *dp, *vp; 6056151Sdg int runpg; 6066151Sdg int runend; 6077178Sdg struct buf *bp; 6085455Sdg int s; 6095455Sdg int error = 0; 6101549Srgrimes 6119507Sdg vp = object->handle; 61211701Sdyson if (vp->v_mount == NULL) 61311701Sdyson return VM_PAGER_BAD; 61411701Sdyson 6151549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 6161549Srgrimes 6171549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 6181827Sdg 6191549Srgrimes /* 6201827Sdg * originally, we did not check for an error return value -- assuming 6211827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 6221549Srgrimes */ 62312767Sdyson foff = IDX_TO_OFF(m[reqpage]->pindex); 6241827Sdg 6251549Srgrimes /* 6261887Sdg * if we can't bmap, use old VOP code 6271549Srgrimes */ 62810551Sdyson if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) { 6291549Srgrimes for (i = 0; i < count; i++) { 6301549Srgrimes if (i != reqpage) { 6311549Srgrimes vnode_pager_freepage(m[i]); 6321549Srgrimes } 6331549Srgrimes } 6343612Sdg cnt.v_vnodein++; 6353612Sdg cnt.v_vnodepgsin++; 6369507Sdg return vnode_pager_input_old(object, m[reqpage]); 6371549Srgrimes 6381827Sdg /* 6391827Sdg * if the blocksize is smaller than a page size, then use 6401827Sdg * special small filesystem code. NFS sometimes has a small 6411827Sdg * blocksize, but it can handle large reads itself. 6421827Sdg */ 6431827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 6445455Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 6451827Sdg 6461549Srgrimes for (i = 0; i < count; i++) { 6471549Srgrimes if (i != reqpage) { 6481549Srgrimes vnode_pager_freepage(m[i]); 6491549Srgrimes } 6501549Srgrimes } 6513612Sdg cnt.v_vnodein++; 6523612Sdg cnt.v_vnodepgsin++; 6539507Sdg return vnode_pager_input_smlfs(object, m[reqpage]); 6541549Srgrimes } 6551549Srgrimes /* 6565455Sdg * if ANY DEV_BSIZE blocks are valid on a large filesystem block 6575455Sdg * then, the entire page is valid -- 6581549Srgrimes */ 6595455Sdg if (m[reqpage]->valid) { 6605455Sdg m[reqpage]->valid = VM_PAGE_BITS_ALL; 6615455Sdg for (i = 0; i < count; i++) { 6625455Sdg if (i != reqpage) 6635455Sdg vnode_pager_freepage(m[i]); 6641549Srgrimes } 6655455Sdg return VM_PAGER_OK; 6661549Srgrimes } 6677178Sdg 6685455Sdg /* 6695455Sdg * here on direct device I/O 6705455Sdg */ 6711549Srgrimes 6726151Sdg firstaddr = -1; 6731549Srgrimes /* 6746151Sdg * calculate the run that includes the required page 6751549Srgrimes */ 6766151Sdg for(first = 0, i = 0; i < count; i = runend) { 67712767Sdyson firstaddr = vnode_pager_addr(vp, 67812767Sdyson IDX_TO_OFF(m[i]->pindex), &runpg); 6796151Sdg if (firstaddr == -1) { 6809507Sdg if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { 6819507Sdg panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d", 6829507Sdg firstaddr, foff, object->un_pager.vnp.vnp_size); 6836151Sdg } 6841549Srgrimes vnode_pager_freepage(m[i]); 6856151Sdg runend = i + 1; 6866151Sdg first = runend; 6876151Sdg continue; 6881549Srgrimes } 6896151Sdg runend = i + runpg; 6909507Sdg if (runend <= reqpage) { 6916151Sdg int j; 6929507Sdg for (j = i; j < runend; j++) { 6936151Sdg vnode_pager_freepage(m[j]); 6946151Sdg } 6951549Srgrimes } else { 6969507Sdg if (runpg < (count - first)) { 6979507Sdg for (i = first + runpg; i < count; i++) 6986151Sdg vnode_pager_freepage(m[i]); 6996151Sdg count = first + runpg; 7006151Sdg } 7016151Sdg break; 7021549Srgrimes } 7036151Sdg first = runend; 7041549Srgrimes } 7051549Srgrimes 7061549Srgrimes /* 7071827Sdg * the first and last page have been calculated now, move input pages 7081827Sdg * to be zero based... 7091549Srgrimes */ 7101549Srgrimes if (first != 0) { 7111549Srgrimes for (i = first; i < count; i++) { 7121549Srgrimes m[i - first] = m[i]; 7131549Srgrimes } 7141549Srgrimes count -= first; 7151549Srgrimes reqpage -= first; 7161549Srgrimes } 7176151Sdg 7181549Srgrimes /* 7191549Srgrimes * calculate the file virtual address for the transfer 7201549Srgrimes */ 72112767Sdyson foff = IDX_TO_OFF(m[0]->pindex); 7221827Sdg 7231549Srgrimes /* 7241549Srgrimes * calculate the size of the transfer 7251549Srgrimes */ 7261549Srgrimes size = count * PAGE_SIZE; 7279507Sdg if ((foff + size) > object->un_pager.vnp.vnp_size) 7289507Sdg size = object->un_pager.vnp.vnp_size - foff; 7291549Srgrimes 7301549Srgrimes /* 7311549Srgrimes * round up physical size for real devices 7321549Srgrimes */ 7331827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 7341549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 7351549Srgrimes 7365841Sdg bp = getpbuf(); 7375455Sdg kva = (vm_offset_t) bp->b_data; 7381887Sdg 7391549Srgrimes /* 7401549Srgrimes * and map the pages to be read into the kva 7411549Srgrimes */ 7421887Sdg pmap_qenter(kva, m, count); 7431549Srgrimes 7441549Srgrimes /* build a minimal buffer header */ 7451549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 7461549Srgrimes bp->b_iodone = vnode_pager_iodone; 7471549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 7481549Srgrimes bp->b_proc = curproc; 7491549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 7501827Sdg if (bp->b_rcred != NOCRED) 7511549Srgrimes crhold(bp->b_rcred); 7521827Sdg if (bp->b_wcred != NOCRED) 7531549Srgrimes crhold(bp->b_wcred); 7546626Sdg bp->b_blkno = firstaddr; 7555455Sdg pbgetvp(dp, bp); 7561549Srgrimes bp->b_bcount = size; 7571549Srgrimes bp->b_bufsize = size; 7581549Srgrimes 7593612Sdg cnt.v_vnodein++; 7603612Sdg cnt.v_vnodepgsin += count; 7613612Sdg 7621549Srgrimes /* do the input */ 7631549Srgrimes VOP_STRATEGY(bp); 7643612Sdg 7651549Srgrimes s = splbio(); 7661549Srgrimes /* we definitely need to be at splbio here */ 7671549Srgrimes 7681549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 7699356Sdg tsleep(bp, PVM, "vnread", 0); 7701549Srgrimes } 7711549Srgrimes splx(s); 7721549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 7731549Srgrimes error = EIO; 7741549Srgrimes 7751549Srgrimes if (!error) { 7761549Srgrimes if (size != count * PAGE_SIZE) 7771827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 7781549Srgrimes } 7795455Sdg pmap_qremove(kva, count); 7801549Srgrimes 7811549Srgrimes /* 7821549Srgrimes * free the buffer header back to the swap buffer pool 7831549Srgrimes */ 7841549Srgrimes relpbuf(bp); 7851549Srgrimes 7861549Srgrimes for (i = 0; i < count; i++) { 7872386Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 7885455Sdg m[i]->dirty = 0; 7895455Sdg m[i]->valid = VM_PAGE_BITS_ALL; 79010669Sdyson m[i]->flags &= ~PG_ZERO; 7911549Srgrimes if (i != reqpage) { 7921827Sdg 7931549Srgrimes /* 7941827Sdg * whether or not to leave the page activated is up in 7951827Sdg * the air, but we should put the page on a page queue 7961827Sdg * somewhere. (it already is in the object). Result: 7971827Sdg * It appears that emperical results show that 7981827Sdg * deactivating pages is best. 7991549Srgrimes */ 8001827Sdg 8011549Srgrimes /* 8021827Sdg * just in case someone was asking for this page we 8031827Sdg * now tell them that it is ok to use 8041549Srgrimes */ 8051549Srgrimes if (!error) { 8065841Sdg vm_page_deactivate(m[i]); 8071549Srgrimes PAGE_WAKEUP(m[i]); 8081549Srgrimes } else { 8091549Srgrimes vnode_pager_freepage(m[i]); 8101549Srgrimes } 8111549Srgrimes } 8121549Srgrimes } 8131549Srgrimes if (error) { 8149507Sdg printf("vnode_pager_getpages: I/O read error\n"); 8151549Srgrimes } 8164207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 8171549Srgrimes} 8181549Srgrimes 81910556Sdysonint 82010556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals) 82110556Sdyson vm_object_t object; 82210556Sdyson vm_page_t *m; 82310556Sdyson int count; 82410556Sdyson boolean_t sync; 82510556Sdyson int *rtvals; 82610556Sdyson{ 82710556Sdyson int rtval; 82810556Sdyson struct vnode *vp; 82910556Sdyson vp = object->handle; 83011701Sdyson rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0); 83110556Sdyson if (rtval == EOPNOTSUPP) 83211943Sbde return vnode_pager_leaf_putpages(object, m, count, sync, rtvals); 83310556Sdyson else 83410556Sdyson return rtval; 83510556Sdyson} 83610556Sdyson 8371549Srgrimes/* 8381549Srgrimes * generic vnode pager output routine 8391549Srgrimes */ 84010556Sdysonstatic int 84110556Sdysonvnode_pager_leaf_putpages(object, m, count, sync, rtvals) 8429507Sdg vm_object_t object; 8431549Srgrimes vm_page_t *m; 8445455Sdg int count; 8459507Sdg boolean_t sync; 8465455Sdg int *rtvals; 8471549Srgrimes{ 8487695Sdg int i; 8491549Srgrimes 8507695Sdg struct vnode *vp; 8517695Sdg int maxsize, ncount; 85212767Sdyson vm_ooffset_t poffset; 8537695Sdg struct uio auio; 8547695Sdg struct iovec aiov; 8557695Sdg int error; 8561549Srgrimes 8579507Sdg vp = object->handle;; 8581827Sdg for (i = 0; i < count; i++) 8591549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 8601549Srgrimes 86112767Sdyson if ((int) m[0]->pindex < 0) { 86212767Sdyson printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty); 8637695Sdg rtvals[0] = VM_PAGER_BAD; 8647695Sdg return VM_PAGER_BAD; 8655455Sdg } 8667178Sdg 8677695Sdg maxsize = count * PAGE_SIZE; 8687695Sdg ncount = count; 8691549Srgrimes 87012767Sdyson poffset = IDX_TO_OFF(m[0]->pindex); 87112767Sdyson if (maxsize + poffset > object->un_pager.vnp.vnp_size) { 87212767Sdyson if (object->un_pager.vnp.vnp_size > poffset) 87312767Sdyson maxsize = object->un_pager.vnp.vnp_size - poffset; 8748585Sdg else 8758585Sdg maxsize = 0; 8767695Sdg ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE; 8778585Sdg if (ncount < count) { 8788585Sdg for (i = ncount; i < count; i++) { 8797695Sdg rtvals[i] = VM_PAGER_BAD; 8801549Srgrimes } 88112767Sdyson#ifdef BOGUS 8828585Sdg if (ncount == 0) { 88312767Sdyson printf("vnode_pager_putpages: write past end of file: %d, %lu\n", 88412767Sdyson poffset, 88512767Sdyson (unsigned long) object->un_pager.vnp.vnp_size); 8867695Sdg return rtvals[0]; 8877695Sdg } 88812767Sdyson#endif 8891549Srgrimes } 8901541Srgrimes } 8917695Sdg 8928585Sdg for (i = 0; i < count; i++) { 8938585Sdg m[i]->busy++; 8947695Sdg m[i]->flags &= ~PG_BUSY; 8951549Srgrimes } 8961827Sdg 8977695Sdg aiov.iov_base = (caddr_t) 0; 8987695Sdg aiov.iov_len = maxsize; 8997695Sdg auio.uio_iov = &aiov; 9007695Sdg auio.uio_iovcnt = 1; 90112767Sdyson auio.uio_offset = poffset; 9027695Sdg auio.uio_segflg = UIO_NOCOPY; 9037695Sdg auio.uio_rw = UIO_WRITE; 9047695Sdg auio.uio_resid = maxsize; 9057695Sdg auio.uio_procp = (struct proc *) 0; 90612767Sdyson error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred); 9073612Sdg cnt.v_vnodeout++; 9087695Sdg cnt.v_vnodepgsout += ncount; 9093612Sdg 9108585Sdg if (error) { 9119507Sdg printf("vnode_pager_putpages: I/O error %d\n", error); 9127695Sdg } 9138585Sdg if (auio.uio_resid) { 91412767Sdyson printf("vnode_pager_putpages: residual I/O %d at %d\n", 91512767Sdyson auio.uio_resid, m[0]->pindex); 9167695Sdg } 9178585Sdg for (i = 0; i < count; i++) { 9188585Sdg m[i]->busy--; 9198585Sdg if (i < ncount) { 9207695Sdg rtvals[i] = VM_PAGER_OK; 9217695Sdg } 9228585Sdg if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) 9239507Sdg wakeup(m[i]); 9247695Sdg } 9257695Sdg return rtvals[0]; 9267695Sdg} 9271549Srgrimes 9287695Sdgstruct vnode * 9299507Sdgvnode_pager_lock(object) 9309507Sdg vm_object_t object; 9319507Sdg{ 9329507Sdg for (; object != NULL; object = object->backing_object) { 9339507Sdg if (object->type != OBJT_VNODE) 9347695Sdg continue; 9351549Srgrimes 9369507Sdg VOP_LOCK(object->handle); 9379507Sdg return object->handle; 9381549Srgrimes } 9399507Sdg return NULL; 9407695Sdg} 941