vnode_pager.c revision 8585
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 51549Srgrimes * Copyright (c) 1993,1994 John S. Dyson 61541Srgrimes * 71541Srgrimes * This code is derived from software contributed to Berkeley by 81541Srgrimes * the Systems Programming Group of the University of Utah Computer 91541Srgrimes * Science Department. 101541Srgrimes * 111541Srgrimes * Redistribution and use in source and binary forms, with or without 121541Srgrimes * modification, are permitted provided that the following conditions 131541Srgrimes * are met: 141541Srgrimes * 1. Redistributions of source code must retain the above copyright 151541Srgrimes * notice, this list of conditions and the following disclaimer. 161541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 171541Srgrimes * notice, this list of conditions and the following disclaimer in the 181541Srgrimes * documentation and/or other materials provided with the distribution. 191541Srgrimes * 3. All advertising materials mentioning features or use of this software 201541Srgrimes * must display the following acknowledgement: 211541Srgrimes * This product includes software developed by the University of 221541Srgrimes * California, Berkeley and its contributors. 231541Srgrimes * 4. Neither the name of the University nor the names of its contributors 241541Srgrimes * may be used to endorse or promote products derived from this software 251541Srgrimes * without specific prior written permission. 261541Srgrimes * 271541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 281541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 291541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 301541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 311541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 321541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 331541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 341541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 351541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 361541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 371541Srgrimes * SUCH DAMAGE. 381541Srgrimes * 391549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 408585Sdg * $Id: vnode_pager.c,v 1.38 1995/05/10 18:56:09 davidg Exp $ 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * Page to/from files (vnodes). 451541Srgrimes * 461541Srgrimes * TODO: 471541Srgrimes * pageouts 481541Srgrimes * fix credential use (uses current process credentials now) 491541Srgrimes */ 501541Srgrimes 511549Srgrimes/* 527695Sdg * 1) Supports multiple - block reads/writes 531549Srgrimes * 2) Bypasses buffer cache for reads 541827Sdg * 551549Srgrimes * TODO: 567695Sdg * Implement getpage/putpage interface for filesystems. Should 577695Sdg * greatly re-simplify the vnode_pager. 581549Srgrimes * 591549Srgrimes */ 601549Srgrimes 611541Srgrimes#include <sys/param.h> 621541Srgrimes#include <sys/systm.h> 635455Sdg#include <sys/kernel.h> 641541Srgrimes#include <sys/proc.h> 651541Srgrimes#include <sys/malloc.h> 661541Srgrimes#include <sys/vnode.h> 671541Srgrimes#include <sys/uio.h> 681541Srgrimes#include <sys/mount.h> 691541Srgrimes 701541Srgrimes#include <vm/vm.h> 711541Srgrimes#include <vm/vm_page.h> 721541Srgrimes#include <vm/vnode_pager.h> 731541Srgrimes 741549Srgrimes#include <sys/buf.h> 751549Srgrimes#include <miscfs/specfs/specdev.h> 761541Srgrimes 775455Sdgint vnode_pager_putmulti(); 781541Srgrimes 795455Sdgvoid vnode_pager_init(); 805455Sdgvoid vnode_pager_dealloc(); 815455Sdgint vnode_pager_getpage(); 825455Sdgint vnode_pager_getmulti(); 835455Sdgint vnode_pager_putpage(); 841827Sdgboolean_t vnode_pager_haspage(); 851541Srgrimes 861541Srgrimesstruct pagerops vnodepagerops = { 871541Srgrimes vnode_pager_init, 881541Srgrimes vnode_pager_alloc, 891541Srgrimes vnode_pager_dealloc, 901541Srgrimes vnode_pager_getpage, 911549Srgrimes vnode_pager_getmulti, 921541Srgrimes vnode_pager_putpage, 931549Srgrimes vnode_pager_putmulti, 941549Srgrimes vnode_pager_haspage 951541Srgrimes}; 961541Srgrimes 971887Sdg 981887Sdg 991827Sdgstatic int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 1001827Sdgstatic int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 1011549Srgrimes 1021549Srgrimesextern vm_map_t pager_map; 1031549Srgrimes 1041549Srgrimesstruct pagerlst vnode_pager_list; /* list of managed vnodes */ 1051549Srgrimes 1061549Srgrimes#define MAXBP (PAGE_SIZE/DEV_BSIZE); 1071549Srgrimes 1081549Srgrimesvoid 1091541Srgrimesvnode_pager_init() 1101541Srgrimes{ 1111541Srgrimes TAILQ_INIT(&vnode_pager_list); 1121541Srgrimes} 1131541Srgrimes 1141541Srgrimes/* 1151541Srgrimes * Allocate (or lookup) pager for a vnode. 1161541Srgrimes * Handle is a vnode pointer. 1171541Srgrimes */ 1181549Srgrimesvm_pager_t 1191549Srgrimesvnode_pager_alloc(handle, size, prot, offset) 1208416Sdg void *handle; 1211541Srgrimes vm_size_t size; 1221541Srgrimes vm_prot_t prot; 1231549Srgrimes vm_offset_t offset; 1241541Srgrimes{ 1251541Srgrimes register vm_pager_t pager; 1261541Srgrimes register vn_pager_t vnp; 1275455Sdg vm_object_t object, tobject; 1281541Srgrimes struct vattr vattr; 1291541Srgrimes struct vnode *vp; 1301541Srgrimes struct proc *p = curproc; /* XXX */ 1315455Sdg int rtval; 1321541Srgrimes 1331541Srgrimes /* 1341541Srgrimes * Pageout to vnode, no can do yet. 1351541Srgrimes */ 1361541Srgrimes if (handle == NULL) 1371827Sdg return (NULL); 1381541Srgrimes 1391541Srgrimes /* 1401827Sdg * Vnodes keep a pointer to any associated pager so no need to lookup 1411827Sdg * with vm_pager_lookup. 1421541Srgrimes */ 1431827Sdg vp = (struct vnode *) handle; 1447695Sdg while ((object = (vm_object_t) vp->v_vmdata) && 1457695Sdg (object->flags & OBJ_DEAD)) 1465455Sdg tsleep((caddr_t) object, PVM, "vadead", 0); 1475455Sdg 1483374Sdg pager = NULL; 1495455Sdg if (object != NULL) 1503374Sdg pager = object->pager; 1511541Srgrimes if (pager == NULL) { 1521827Sdg 1531541Srgrimes /* 1541541Srgrimes * Allocate pager structures 1551541Srgrimes */ 1561827Sdg pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 1571541Srgrimes if (pager == NULL) 1581827Sdg return (NULL); 1591827Sdg vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 1601541Srgrimes if (vnp == NULL) { 1611827Sdg free((caddr_t) pager, M_VMPAGER); 1621827Sdg return (NULL); 1631541Srgrimes } 1641541Srgrimes /* 1651541Srgrimes * And an object of the appropriate size 1661541Srgrimes */ 1675455Sdg if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 1681541Srgrimes object = vm_object_allocate(round_page(vattr.va_size)); 1697014Sdg object->flags = OBJ_CANPERSIST; 1701541Srgrimes vm_object_enter(object, pager); 1716585Sdg object->pager = pager; 1721541Srgrimes } else { 1731827Sdg free((caddr_t) vnp, M_VMPGDATA); 1741827Sdg free((caddr_t) pager, M_VMPAGER); 1751827Sdg return (NULL); 1761541Srgrimes } 1771827Sdg 1781541Srgrimes /* 1791541Srgrimes * Hold a reference to the vnode and initialize pager data. 1801541Srgrimes */ 1811541Srgrimes VREF(vp); 1821541Srgrimes vnp->vnp_flags = 0; 1831541Srgrimes vnp->vnp_vp = vp; 1841541Srgrimes vnp->vnp_size = vattr.va_size; 1851549Srgrimes 1861541Srgrimes TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 1871541Srgrimes pager->pg_handle = handle; 1881541Srgrimes pager->pg_type = PG_VNODE; 1891541Srgrimes pager->pg_ops = &vnodepagerops; 1901827Sdg pager->pg_data = (caddr_t) vnp; 1913374Sdg vp->v_vmdata = (caddr_t) object; 1921541Srgrimes } else { 1931827Sdg 1941541Srgrimes /* 1951827Sdg * vm_object_lookup() will remove the object from the cache if 1961827Sdg * found and also gain a reference to the object. 1971541Srgrimes */ 1983374Sdg (void) vm_object_lookup(pager); 1991541Srgrimes } 2007695Sdg if( vp->v_type == VREG) 2017695Sdg vp->v_flag |= VVMIO; 2021827Sdg return (pager); 2031541Srgrimes} 2041541Srgrimes 2051549Srgrimesvoid 2061541Srgrimesvnode_pager_dealloc(pager) 2071541Srgrimes vm_pager_t pager; 2081541Srgrimes{ 2091827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 2101541Srgrimes register struct vnode *vp; 2115455Sdg vm_object_t object; 2121541Srgrimes 2133449Sphk vp = vnp->vnp_vp; 2143449Sphk if (vp) { 2155455Sdg int s = splbio(); 2165455Sdg 2175455Sdg object = (vm_object_t) vp->v_vmdata; 2185455Sdg if (object) { 2195455Sdg while (object->paging_in_progress) { 2206618Sdg object->flags |= OBJ_PIPWNT; 2215455Sdg tsleep(object, PVM, "vnpdea", 0); 2225455Sdg } 2235455Sdg } 2245455Sdg splx(s); 2255455Sdg 2261541Srgrimes vp->v_vmdata = NULL; 2275455Sdg vp->v_flag &= ~(VTEXT | VVMIO); 2286947Sdg vp->v_flag |= VAGE; 2291541Srgrimes vrele(vp); 2301541Srgrimes } 2311541Srgrimes TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 2321827Sdg free((caddr_t) vnp, M_VMPGDATA); 2331827Sdg free((caddr_t) pager, M_VMPAGER); 2341541Srgrimes} 2351541Srgrimes 2361549Srgrimesint 2371549Srgrimesvnode_pager_getmulti(pager, m, count, reqpage, sync) 2381541Srgrimes vm_pager_t pager; 2391549Srgrimes vm_page_t *m; 2405455Sdg int count; 2415455Sdg int reqpage; 2421541Srgrimes boolean_t sync; 2431541Srgrimes{ 2441827Sdg 2451549Srgrimes return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 2461549Srgrimes} 2471541Srgrimes 2481549Srgrimesint 2491549Srgrimesvnode_pager_getpage(pager, m, sync) 2501549Srgrimes vm_pager_t pager; 2511549Srgrimes vm_page_t m; 2521549Srgrimes boolean_t sync; 2531549Srgrimes{ 2541549Srgrimes 2551549Srgrimes vm_page_t marray[1]; 2561827Sdg 2571549Srgrimes if (pager == NULL) 2581549Srgrimes return FALSE; 2591549Srgrimes marray[0] = m; 2601549Srgrimes 2611827Sdg return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 2621541Srgrimes} 2631541Srgrimes 2641549Srgrimesboolean_t 2651549Srgrimesvnode_pager_putpage(pager, m, sync) 2661541Srgrimes vm_pager_t pager; 2671549Srgrimes vm_page_t m; 2681541Srgrimes boolean_t sync; 2691541Srgrimes{ 2701549Srgrimes vm_page_t marray[1]; 2715455Sdg int rtvals[1]; 2721541Srgrimes 2731541Srgrimes if (pager == NULL) 2741549Srgrimes return FALSE; 2751549Srgrimes marray[0] = m; 2761827Sdg vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 2771549Srgrimes return rtvals[0]; 2781541Srgrimes} 2791541Srgrimes 2801549Srgrimesint 2811549Srgrimesvnode_pager_putmulti(pager, m, c, sync, rtvals) 2821549Srgrimes vm_pager_t pager; 2831549Srgrimes vm_page_t *m; 2845455Sdg int c; 2851549Srgrimes boolean_t sync; 2865455Sdg int *rtvals; 2871549Srgrimes{ 2881827Sdg return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 2891549Srgrimes} 2901549Srgrimes 2911549Srgrimes 2921549Srgrimesboolean_t 2931541Srgrimesvnode_pager_haspage(pager, offset) 2941541Srgrimes vm_pager_t pager; 2951541Srgrimes vm_offset_t offset; 2961541Srgrimes{ 2971827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 2984797Sdg register struct vnode *vp = vnp->vnp_vp; 2991541Srgrimes daddr_t bn; 3005455Sdg int err; 3014446Sdg daddr_t block; 3021541Srgrimes 3031541Srgrimes /* 3045455Sdg * If filesystem no longer mounted or offset beyond end of file we do 3055455Sdg * not have the page. 3061541Srgrimes */ 3074797Sdg if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 3084797Sdg return FALSE; 3091541Srgrimes 3104797Sdg block = offset / vp->v_mount->mnt_stat.f_iosize; 3114797Sdg if (incore(vp, block)) 3124446Sdg return TRUE; 3137178Sdg 3141541Srgrimes /* 3151827Sdg * Read the index to find the disk block to read from. If there is no 3161827Sdg * block, report that we don't have this data. 3171827Sdg * 3181541Srgrimes * Assumes that the vnode has whole page or nothing. 3191541Srgrimes */ 3204797Sdg err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 3217178Sdg if (err) 3221827Sdg return (TRUE); 3231827Sdg return ((long) bn < 0 ? FALSE : TRUE); 3241541Srgrimes} 3251541Srgrimes 3261541Srgrimes/* 3271541Srgrimes * Lets the VM system know about a change in size for a file. 3281541Srgrimes * If this vnode is mapped into some address space (i.e. we have a pager 3291541Srgrimes * for it) we adjust our own internal size and flush any cached pages in 3301541Srgrimes * the associated object that are affected by the size change. 3311541Srgrimes * 3321541Srgrimes * Note: this routine may be invoked as a result of a pager put 3331541Srgrimes * operation (possibly at object termination time), so we must be careful. 3341541Srgrimes */ 3351541Srgrimesvoid 3361541Srgrimesvnode_pager_setsize(vp, nsize) 3371541Srgrimes struct vnode *vp; 3385455Sdg u_long nsize; 3391541Srgrimes{ 3401541Srgrimes register vn_pager_t vnp; 3411541Srgrimes register vm_object_t object; 3421541Srgrimes vm_pager_t pager; 3431541Srgrimes 3441541Srgrimes /* 3451541Srgrimes * Not a mapped vnode 3461541Srgrimes */ 3471541Srgrimes if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 3481541Srgrimes return; 3491827Sdg 3501541Srgrimes /* 3511541Srgrimes * Hasn't changed size 3521541Srgrimes */ 3533374Sdg object = (vm_object_t) vp->v_vmdata; 3545455Sdg if (object == NULL) 3553374Sdg return; 3565455Sdg if ((pager = object->pager) == NULL) 3573374Sdg return; 3581827Sdg vnp = (vn_pager_t) pager->pg_data; 3591541Srgrimes if (nsize == vnp->vnp_size) 3601541Srgrimes return; 3611827Sdg 3621541Srgrimes /* 3631827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 3641541Srgrimes */ 3651827Sdg if (nsize < vnp->vnp_size) { 3665455Sdg if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 3675455Sdg vm_object_lock(object); 3685455Sdg vm_object_page_remove(object, 3697204Sdg round_page((vm_offset_t) nsize), vnp->vnp_size, FALSE); 3705455Sdg vm_object_unlock(object); 3715455Sdg } 3721827Sdg /* 3731827Sdg * this gets rid of garbage at the end of a page that is now 3741827Sdg * only partially backed by the vnode... 3751827Sdg */ 3761827Sdg if (nsize & PAGE_MASK) { 3771827Sdg vm_offset_t kva; 3781827Sdg vm_page_t m; 3791827Sdg 3801827Sdg m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 3811827Sdg if (m) { 3821827Sdg kva = vm_pager_map_page(m); 3831827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 3845455Sdg round_page(nsize) - nsize); 3851827Sdg vm_pager_unmap_page(kva); 3861827Sdg } 3871827Sdg } 3881541Srgrimes } 3891827Sdg vnp->vnp_size = (vm_offset_t) nsize; 3901827Sdg object->size = round_page(nsize); 3911541Srgrimes} 3921541Srgrimes 3931541Srgrimesvoid 3941541Srgrimesvnode_pager_umount(mp) 3951541Srgrimes register struct mount *mp; 3961541Srgrimes{ 3971541Srgrimes register vm_pager_t pager, npager; 3981541Srgrimes struct vnode *vp; 3991541Srgrimes 4007162Sdg for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager) { 4011541Srgrimes /* 4021827Sdg * Save the next pointer now since uncaching may terminate the 4031827Sdg * object and render pager invalid 4041541Srgrimes */ 4057162Sdg npager = pager->pg_list.tqe_next; 4061827Sdg vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 4077162Sdg if (mp == (struct mount *) 0 || vp->v_mount == mp) { 4087162Sdg VOP_LOCK(vp); 4091541Srgrimes (void) vnode_pager_uncache(vp); 4107162Sdg VOP_UNLOCK(vp); 4117162Sdg } 4121541Srgrimes } 4131541Srgrimes} 4141541Srgrimes 4151541Srgrimes/* 4161541Srgrimes * Remove vnode associated object from the object cache. 4177162Sdg * This routine must be called with the vnode locked. 4181541Srgrimes * 4197162Sdg * XXX unlock the vnode. 4207162Sdg * We must do this since uncaching the object may result in its 4217162Sdg * destruction which may initiate paging activity which may necessitate 4227162Sdg * re-locking the vnode. 4231549Srgrimes */ 4241549Srgrimesboolean_t 4251549Srgrimesvnode_pager_uncache(vp) 4261549Srgrimes register struct vnode *vp; 4271549Srgrimes{ 4281549Srgrimes register vm_object_t object; 4297162Sdg boolean_t uncached; 4301549Srgrimes vm_pager_t pager; 4311549Srgrimes 4321549Srgrimes /* 4331549Srgrimes * Not a mapped vnode 4341549Srgrimes */ 4353374Sdg object = (vm_object_t) vp->v_vmdata; 4365455Sdg if (object == NULL) 4375455Sdg return (TRUE); 4385455Sdg 4393374Sdg pager = object->pager; 4401549Srgrimes if (pager == NULL) 4411549Srgrimes return (TRUE); 4421827Sdg 4437162Sdg#ifdef DEBUG 4447162Sdg if (!VOP_ISLOCKED(vp)) { 4457162Sdg extern int (**nfsv2_vnodeop_p)(); 4461827Sdg 4477162Sdg if (vp->v_op != nfsv2_vnodeop_p) 4487162Sdg panic("vnode_pager_uncache: vnode not locked!"); 4497162Sdg } 4507162Sdg#endif 4511549Srgrimes /* 4521827Sdg * Must use vm_object_lookup() as it actually removes the object from 4531827Sdg * the cache list. 4541549Srgrimes */ 4551549Srgrimes object = vm_object_lookup(pager); 4561549Srgrimes if (object) { 4571549Srgrimes uncached = (object->ref_count <= 1); 4587162Sdg VOP_UNLOCK(vp); 4591549Srgrimes pager_cache(object, FALSE); 4607162Sdg VOP_LOCK(vp); 4611549Srgrimes } else 4621549Srgrimes uncached = TRUE; 4631827Sdg return (uncached); 4641549Srgrimes} 4651541Srgrimes 4661541Srgrimes 4671549Srgrimesvoid 4681549Srgrimesvnode_pager_freepage(m) 4691549Srgrimes vm_page_t m; 4701541Srgrimes{ 4711549Srgrimes PAGE_WAKEUP(m); 4721549Srgrimes vm_page_free(m); 4731549Srgrimes} 4741549Srgrimes 4751549Srgrimes/* 4761549Srgrimes * calculate the linear (byte) disk address of specified virtual 4771549Srgrimes * file address 4781549Srgrimes */ 4791549Srgrimesvm_offset_t 4806151Sdgvnode_pager_addr(vp, address, run) 4811549Srgrimes struct vnode *vp; 4821549Srgrimes vm_offset_t address; 4836151Sdg int *run; 4841549Srgrimes{ 4855455Sdg int rtaddress; 4865455Sdg int bsize; 4871549Srgrimes vm_offset_t block; 4881549Srgrimes struct vnode *rtvp; 4895455Sdg int err; 4905455Sdg int vblock, voffset; 4911549Srgrimes 4925455Sdg if ((int) address < 0) 4935455Sdg return -1; 4945455Sdg 4951549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 4961549Srgrimes vblock = address / bsize; 4971549Srgrimes voffset = address % bsize; 4981549Srgrimes 4996151Sdg err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 5001549Srgrimes 5016151Sdg if (err || (block == -1)) 5021549Srgrimes rtaddress = -1; 5036151Sdg else { 5046626Sdg rtaddress = block + voffset / DEV_BSIZE; 5056151Sdg if( run) { 5066151Sdg *run += 1; 5076151Sdg *run *= bsize/PAGE_SIZE; 5086151Sdg *run -= voffset/PAGE_SIZE; 5096151Sdg } 5106151Sdg } 5111549Srgrimes 5121549Srgrimes return rtaddress; 5131549Srgrimes} 5141549Srgrimes 5151549Srgrimes/* 5161549Srgrimes * interrupt routine for I/O completion 5171549Srgrimes */ 5181549Srgrimesvoid 5191549Srgrimesvnode_pager_iodone(bp) 5201549Srgrimes struct buf *bp; 5211549Srgrimes{ 5221549Srgrimes bp->b_flags |= B_DONE; 5231827Sdg wakeup((caddr_t) bp); 5241549Srgrimes} 5251549Srgrimes 5261549Srgrimes/* 5271549Srgrimes * small block file system vnode pager input 5281549Srgrimes */ 5291549Srgrimesint 5301549Srgrimesvnode_pager_input_smlfs(vnp, m) 5311549Srgrimes vn_pager_t vnp; 5321549Srgrimes vm_page_t m; 5331549Srgrimes{ 5345455Sdg int i; 5355455Sdg int s; 5361549Srgrimes struct vnode *dp, *vp; 5371549Srgrimes struct buf *bp; 5381549Srgrimes vm_offset_t kva; 5395455Sdg int fileaddr; 5405455Sdg int block; 5411549Srgrimes vm_offset_t bsize; 5425455Sdg int error = 0; 5431549Srgrimes 5441549Srgrimes vp = vnp->vnp_vp; 5451549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5461549Srgrimes 5477178Sdg 5485455Sdg VOP_BMAP(vp, 0, &dp, 0, 0); 5491549Srgrimes 5501549Srgrimes kva = vm_pager_map_page(m); 5511549Srgrimes 5521827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 5531827Sdg 5545455Sdg if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 5555455Sdg continue; 5561549Srgrimes 5576151Sdg fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 5581827Sdg if (fileaddr != -1) { 5591549Srgrimes bp = getpbuf(); 5601549Srgrimes 5611827Sdg /* build a minimal buffer header */ 5621549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 5631549Srgrimes bp->b_iodone = vnode_pager_iodone; 5641549Srgrimes bp->b_proc = curproc; 5651549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 5661827Sdg if (bp->b_rcred != NOCRED) 5671549Srgrimes crhold(bp->b_rcred); 5681827Sdg if (bp->b_wcred != NOCRED) 5691549Srgrimes crhold(bp->b_wcred); 5701549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 5716626Sdg bp->b_blkno = fileaddr; 5725455Sdg pbgetvp(dp, bp); 5731549Srgrimes bp->b_bcount = bsize; 5741549Srgrimes bp->b_bufsize = bsize; 5751827Sdg 5761827Sdg /* do the input */ 5771549Srgrimes VOP_STRATEGY(bp); 5781549Srgrimes 5791827Sdg /* we definitely need to be at splbio here */ 5801549Srgrimes 5811549Srgrimes s = splbio(); 5821549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 5831827Sdg tsleep((caddr_t) bp, PVM, "vnsrd", 0); 5841549Srgrimes } 5851549Srgrimes splx(s); 5861549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 5871549Srgrimes error = EIO; 5881549Srgrimes 5891827Sdg /* 5901827Sdg * free the buffer header back to the swap buffer pool 5911827Sdg */ 5921549Srgrimes relpbuf(bp); 5931827Sdg if (error) 5941549Srgrimes break; 5955455Sdg 5967695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 5977695Sdg vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize); 5981549Srgrimes } else { 5997695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 6001549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 6011549Srgrimes } 6021549Srgrimesnextblock: 6031549Srgrimes } 6041549Srgrimes vm_pager_unmap_page(kva); 6055455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 6061827Sdg if (error) { 6074207Sdg return VM_PAGER_ERROR; 6081549Srgrimes } 6091549Srgrimes return VM_PAGER_OK; 6101549Srgrimes 6111549Srgrimes} 6121549Srgrimes 6131549Srgrimes 6141549Srgrimes/* 6151549Srgrimes * old style vnode pager output routine 6161549Srgrimes */ 6171549Srgrimesint 6181549Srgrimesvnode_pager_input_old(vnp, m) 6191549Srgrimes vn_pager_t vnp; 6201549Srgrimes vm_page_t m; 6211549Srgrimes{ 6221541Srgrimes struct uio auio; 6231541Srgrimes struct iovec aiov; 6245455Sdg int error; 6255455Sdg int size; 6261549Srgrimes vm_offset_t kva; 6271549Srgrimes 6281549Srgrimes error = 0; 6291827Sdg 6301549Srgrimes /* 6311549Srgrimes * Return failure if beyond current EOF 6321549Srgrimes */ 6335455Sdg if (m->offset >= vnp->vnp_size) { 6341549Srgrimes return VM_PAGER_BAD; 6351549Srgrimes } else { 6361549Srgrimes size = PAGE_SIZE; 6375455Sdg if (m->offset + size > vnp->vnp_size) 6385455Sdg size = vnp->vnp_size - m->offset; 6397178Sdg 6405455Sdg /* 6415455Sdg * Allocate a kernel virtual address and initialize so that 6425455Sdg * we can use VOP_READ/WRITE routines. 6435455Sdg */ 6441549Srgrimes kva = vm_pager_map_page(m); 6457178Sdg 6461827Sdg aiov.iov_base = (caddr_t) kva; 6471549Srgrimes aiov.iov_len = size; 6481549Srgrimes auio.uio_iov = &aiov; 6491549Srgrimes auio.uio_iovcnt = 1; 6505455Sdg auio.uio_offset = m->offset; 6511549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 6521549Srgrimes auio.uio_rw = UIO_READ; 6531549Srgrimes auio.uio_resid = size; 6541827Sdg auio.uio_procp = (struct proc *) 0; 6551549Srgrimes 6561549Srgrimes error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 6571549Srgrimes if (!error) { 6581549Srgrimes register int count = size - auio.uio_resid; 6591549Srgrimes 6601549Srgrimes if (count == 0) 6611549Srgrimes error = EINVAL; 6621549Srgrimes else if (count != PAGE_SIZE) 6631827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 6641549Srgrimes } 6651549Srgrimes vm_pager_unmap_page(kva); 6661549Srgrimes } 6671549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 6685455Sdg m->dirty = 0; 6694207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 6701549Srgrimes} 6711549Srgrimes 6721549Srgrimes/* 6731549Srgrimes * generic vnode pager input routine 6741549Srgrimes */ 6751549Srgrimesint 6761549Srgrimesvnode_pager_input(vnp, m, count, reqpage) 6771549Srgrimes register vn_pager_t vnp; 6781549Srgrimes vm_page_t *m; 6795455Sdg int count, reqpage; 6801549Srgrimes{ 6815455Sdg int i; 6821541Srgrimes vm_offset_t kva, foff; 6837178Sdg int size; 6841549Srgrimes vm_object_t object; 6851549Srgrimes struct vnode *dp, *vp; 6865455Sdg int bsize; 6871541Srgrimes 6885455Sdg int first, last; 6896151Sdg int firstaddr; 6905455Sdg int block, offset; 6916151Sdg int runpg; 6926151Sdg int runend; 6931549Srgrimes 6947178Sdg struct buf *bp; 6955455Sdg int s; 6965455Sdg int failflag; 6971549Srgrimes 6985455Sdg int error = 0; 6991549Srgrimes 7001827Sdg object = m[reqpage]->object; /* all vm_page_t items are in same 7011827Sdg * object */ 7021549Srgrimes 7031549Srgrimes vp = vnp->vnp_vp; 7041549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 7051549Srgrimes 7061549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 7071827Sdg 7081549Srgrimes /* 7091827Sdg * originally, we did not check for an error return value -- assuming 7101827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 7111549Srgrimes */ 7125455Sdg foff = m[reqpage]->offset; 7131827Sdg 7141549Srgrimes /* 7151887Sdg * if we can't bmap, use old VOP code 7161549Srgrimes */ 7175455Sdg if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 7181549Srgrimes for (i = 0; i < count; i++) { 7191549Srgrimes if (i != reqpage) { 7201549Srgrimes vnode_pager_freepage(m[i]); 7211549Srgrimes } 7221549Srgrimes } 7233612Sdg cnt.v_vnodein++; 7243612Sdg cnt.v_vnodepgsin++; 7251549Srgrimes return vnode_pager_input_old(vnp, m[reqpage]); 7261549Srgrimes 7271827Sdg /* 7281827Sdg * if the blocksize is smaller than a page size, then use 7291827Sdg * special small filesystem code. NFS sometimes has a small 7301827Sdg * blocksize, but it can handle large reads itself. 7311827Sdg */ 7321827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 7335455Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 7341827Sdg 7351549Srgrimes for (i = 0; i < count; i++) { 7361549Srgrimes if (i != reqpage) { 7371549Srgrimes vnode_pager_freepage(m[i]); 7381549Srgrimes } 7391549Srgrimes } 7403612Sdg cnt.v_vnodein++; 7413612Sdg cnt.v_vnodepgsin++; 7421549Srgrimes return vnode_pager_input_smlfs(vnp, m[reqpage]); 7431549Srgrimes } 7441549Srgrimes /* 7455455Sdg * if ANY DEV_BSIZE blocks are valid on a large filesystem block 7465455Sdg * then, the entire page is valid -- 7471549Srgrimes */ 7485455Sdg if (m[reqpage]->valid) { 7495455Sdg m[reqpage]->valid = VM_PAGE_BITS_ALL; 7505455Sdg for (i = 0; i < count; i++) { 7515455Sdg if (i != reqpage) 7525455Sdg vnode_pager_freepage(m[i]); 7531549Srgrimes } 7545455Sdg return VM_PAGER_OK; 7551549Srgrimes } 7567178Sdg 7575455Sdg /* 7585455Sdg * here on direct device I/O 7595455Sdg */ 7601549Srgrimes 7616151Sdg firstaddr = -1; 7621549Srgrimes /* 7636151Sdg * calculate the run that includes the required page 7641549Srgrimes */ 7656151Sdg for(first = 0, i = 0; i < count; i = runend) { 7666151Sdg firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 7676151Sdg if (firstaddr == -1) { 7686151Sdg if( i == reqpage && foff < vnp->vnp_size) { 7696151Sdg printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n", 7706151Sdg firstaddr, foff, vnp->vnp_size); 7716151Sdg panic("vnode_pager_input:..."); 7726151Sdg } 7731549Srgrimes vnode_pager_freepage(m[i]); 7746151Sdg runend = i + 1; 7756151Sdg first = runend; 7766151Sdg continue; 7771549Srgrimes } 7786151Sdg runend = i + runpg; 7796151Sdg if( runend <= reqpage) { 7806151Sdg int j; 7816151Sdg for(j = i; j < runend; j++) { 7826151Sdg vnode_pager_freepage(m[j]); 7836151Sdg } 7841549Srgrimes } else { 7856151Sdg if( runpg < (count - first)) { 7866151Sdg for(i=first + runpg; i < count; i++) 7876151Sdg vnode_pager_freepage(m[i]); 7886151Sdg count = first + runpg; 7896151Sdg } 7906151Sdg break; 7911549Srgrimes } 7926151Sdg first = runend; 7931549Srgrimes } 7941549Srgrimes 7951549Srgrimes /* 7961827Sdg * the first and last page have been calculated now, move input pages 7971827Sdg * to be zero based... 7981549Srgrimes */ 7991549Srgrimes if (first != 0) { 8001549Srgrimes for (i = first; i < count; i++) { 8011549Srgrimes m[i - first] = m[i]; 8021549Srgrimes } 8031549Srgrimes count -= first; 8041549Srgrimes reqpage -= first; 8051549Srgrimes } 8066151Sdg 8071549Srgrimes /* 8081549Srgrimes * calculate the file virtual address for the transfer 8091549Srgrimes */ 8105455Sdg foff = m[0]->offset; 8111827Sdg 8121549Srgrimes /* 8131549Srgrimes * calculate the size of the transfer 8141549Srgrimes */ 8151549Srgrimes size = count * PAGE_SIZE; 8161549Srgrimes if ((foff + size) > vnp->vnp_size) 8171549Srgrimes size = vnp->vnp_size - foff; 8181549Srgrimes 8191549Srgrimes /* 8201549Srgrimes * round up physical size for real devices 8211549Srgrimes */ 8221827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 8231549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 8241549Srgrimes 8255841Sdg bp = getpbuf(); 8265455Sdg kva = (vm_offset_t) bp->b_data; 8271887Sdg 8281549Srgrimes /* 8291549Srgrimes * and map the pages to be read into the kva 8301549Srgrimes */ 8311887Sdg pmap_qenter(kva, m, count); 8321549Srgrimes 8331549Srgrimes /* build a minimal buffer header */ 8341549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 8351549Srgrimes bp->b_iodone = vnode_pager_iodone; 8361549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 8371549Srgrimes bp->b_proc = curproc; 8381549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 8391827Sdg if (bp->b_rcred != NOCRED) 8401549Srgrimes crhold(bp->b_rcred); 8411827Sdg if (bp->b_wcred != NOCRED) 8421549Srgrimes crhold(bp->b_wcred); 8436626Sdg bp->b_blkno = firstaddr; 8445455Sdg pbgetvp(dp, bp); 8451549Srgrimes bp->b_bcount = size; 8461549Srgrimes bp->b_bufsize = size; 8471549Srgrimes 8483612Sdg cnt.v_vnodein++; 8493612Sdg cnt.v_vnodepgsin += count; 8503612Sdg 8511549Srgrimes /* do the input */ 8521549Srgrimes VOP_STRATEGY(bp); 8533612Sdg 8541549Srgrimes s = splbio(); 8551549Srgrimes /* we definitely need to be at splbio here */ 8561549Srgrimes 8571549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 8581827Sdg tsleep((caddr_t) bp, PVM, "vnread", 0); 8591549Srgrimes } 8601549Srgrimes splx(s); 8611549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 8621549Srgrimes error = EIO; 8631549Srgrimes 8641549Srgrimes if (!error) { 8651549Srgrimes if (size != count * PAGE_SIZE) 8661827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 8671549Srgrimes } 8685455Sdg pmap_qremove(kva, count); 8691549Srgrimes 8701549Srgrimes /* 8711549Srgrimes * free the buffer header back to the swap buffer pool 8721549Srgrimes */ 8731549Srgrimes relpbuf(bp); 8741549Srgrimes 8751549Srgrimesfinishup: 8761549Srgrimes for (i = 0; i < count; i++) { 8772386Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 8785455Sdg m[i]->dirty = 0; 8795455Sdg m[i]->valid = VM_PAGE_BITS_ALL; 8801549Srgrimes if (i != reqpage) { 8811827Sdg 8821549Srgrimes /* 8831827Sdg * whether or not to leave the page activated is up in 8841827Sdg * the air, but we should put the page on a page queue 8851827Sdg * somewhere. (it already is in the object). Result: 8861827Sdg * It appears that emperical results show that 8871827Sdg * deactivating pages is best. 8881549Srgrimes */ 8891827Sdg 8901549Srgrimes /* 8911827Sdg * just in case someone was asking for this page we 8921827Sdg * now tell them that it is ok to use 8931549Srgrimes */ 8941549Srgrimes if (!error) { 8955841Sdg vm_page_deactivate(m[i]); 8961549Srgrimes PAGE_WAKEUP(m[i]); 8971549Srgrimes } else { 8981549Srgrimes vnode_pager_freepage(m[i]); 8991549Srgrimes } 9001549Srgrimes } 9011549Srgrimes } 9021549Srgrimes if (error) { 9034207Sdg printf("vnode_pager_input: I/O read error\n"); 9041549Srgrimes } 9054207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 9061549Srgrimes} 9071549Srgrimes 9081549Srgrimes/* 9091549Srgrimes * generic vnode pager output routine 9101549Srgrimes */ 9111549Srgrimesint 9121549Srgrimesvnode_pager_output(vnp, m, count, rtvals) 9131549Srgrimes vn_pager_t vnp; 9141549Srgrimes vm_page_t *m; 9155455Sdg int count; 9165455Sdg int *rtvals; 9171549Srgrimes{ 9187695Sdg int i; 9191549Srgrimes 9207695Sdg struct vnode *vp; 9217695Sdg int maxsize, ncount; 9227695Sdg struct uio auio; 9237695Sdg struct iovec aiov; 9247695Sdg int error; 9251549Srgrimes 9261549Srgrimes vp = vnp->vnp_vp; 9271827Sdg for (i = 0; i < count; i++) 9281549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 9291549Srgrimes 9305455Sdg if ((int) m[0]->offset < 0) { 9317695Sdg printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty); 9327695Sdg rtvals[0] = VM_PAGER_BAD; 9337695Sdg return VM_PAGER_BAD; 9345455Sdg } 9357178Sdg 9367695Sdg maxsize = count * PAGE_SIZE; 9377695Sdg ncount = count; 9381549Srgrimes 9398585Sdg if (maxsize + m[0]->offset > vnp->vnp_size) { 9408585Sdg if (vnp->vnp_size > m[0]->offset) 9418585Sdg maxsize = vnp->vnp_size - m[0]->offset; 9428585Sdg else 9438585Sdg maxsize = 0; 9447695Sdg ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE; 9458585Sdg if (ncount < count) { 9468585Sdg for (i = ncount; i < count; i++) { 9477695Sdg rtvals[i] = VM_PAGER_BAD; 9481549Srgrimes } 9498585Sdg if (ncount == 0) { 9507695Sdg printf("vnode_pager_output: write past end of file: %d, %d\n", 9517695Sdg m[0]->offset, vnp->vnp_size); 9527695Sdg return rtvals[0]; 9537695Sdg } 9541549Srgrimes } 9551541Srgrimes } 9567695Sdg 9578585Sdg for (i = 0; i < count; i++) { 9588585Sdg m[i]->busy++; 9597695Sdg m[i]->flags &= ~PG_BUSY; 9601549Srgrimes } 9611827Sdg 9627695Sdg aiov.iov_base = (caddr_t) 0; 9637695Sdg aiov.iov_len = maxsize; 9647695Sdg auio.uio_iov = &aiov; 9657695Sdg auio.uio_iovcnt = 1; 9667695Sdg auio.uio_offset = m[0]->offset; 9677695Sdg auio.uio_segflg = UIO_NOCOPY; 9687695Sdg auio.uio_rw = UIO_WRITE; 9697695Sdg auio.uio_resid = maxsize; 9707695Sdg auio.uio_procp = (struct proc *) 0; 9717695Sdg error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred); 9723612Sdg cnt.v_vnodeout++; 9737695Sdg cnt.v_vnodepgsout += ncount; 9743612Sdg 9758585Sdg if (error) { 9767695Sdg printf("vnode_pager_output: I/O error %d\n", error); 9777695Sdg } 9788585Sdg if (auio.uio_resid) { 9797695Sdg printf("vnode_pager_output: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset); 9807695Sdg } 9818585Sdg for (i = 0; i < count; i++) { 9828585Sdg m[i]->busy--; 9838585Sdg if (i < ncount) { 9847695Sdg rtvals[i] = VM_PAGER_OK; 9857695Sdg } 9868585Sdg if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) 9877695Sdg wakeup((caddr_t) m[i]); 9887695Sdg } 9897695Sdg return rtvals[0]; 9907695Sdg} 9911549Srgrimes 9927695Sdgstruct vnode * 9937695Sdgvnode_pager_lock(vm_object_t object) { 9941549Srgrimes 9957695Sdg for(;object;object=object->shadow) { 9967695Sdg vn_pager_t vnp; 9977695Sdg if( !object->pager || (object->pager->pg_type != PG_VNODE)) 9987695Sdg continue; 9991549Srgrimes 10007695Sdg vnp = (vn_pager_t) object->pager->pg_data; 10017695Sdg VOP_LOCK(vnp->vnp_vp); 10027695Sdg return vnp->vnp_vp; 10031549Srgrimes } 10047695Sdg return (struct vnode *)NULL; 10057695Sdg} 10061549Srgrimes 10077695Sdgvoid 10087695Sdgvnode_pager_unlock(struct vnode *vp) { 10097178Sdg VOP_UNLOCK(vp); 10107695Sdg} 10117178Sdg 1012