vnode_pager.c revision 9411
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 51549Srgrimes * Copyright (c) 1993,1994 John S. Dyson 61541Srgrimes * 71541Srgrimes * This code is derived from software contributed to Berkeley by 81541Srgrimes * the Systems Programming Group of the University of Utah Computer 91541Srgrimes * Science Department. 101541Srgrimes * 111541Srgrimes * Redistribution and use in source and binary forms, with or without 121541Srgrimes * modification, are permitted provided that the following conditions 131541Srgrimes * are met: 141541Srgrimes * 1. Redistributions of source code must retain the above copyright 151541Srgrimes * notice, this list of conditions and the following disclaimer. 161541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 171541Srgrimes * notice, this list of conditions and the following disclaimer in the 181541Srgrimes * documentation and/or other materials provided with the distribution. 191541Srgrimes * 3. All advertising materials mentioning features or use of this software 201541Srgrimes * must display the following acknowledgement: 211541Srgrimes * This product includes software developed by the University of 221541Srgrimes * California, Berkeley and its contributors. 231541Srgrimes * 4. Neither the name of the University nor the names of its contributors 241541Srgrimes * may be used to endorse or promote products derived from this software 251541Srgrimes * without specific prior written permission. 261541Srgrimes * 271541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 281541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 291541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 301541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 311541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 321541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 331541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 341541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 351541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 361541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 371541Srgrimes * SUCH DAMAGE. 381541Srgrimes * 391549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 409411Sdg * $Id: vnode_pager.c,v 1.41 1995/06/28 12:01:13 davidg Exp $ 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * Page to/from files (vnodes). 451541Srgrimes * 461541Srgrimes * TODO: 471541Srgrimes * pageouts 481541Srgrimes * fix credential use (uses current process credentials now) 491541Srgrimes */ 501541Srgrimes 511549Srgrimes/* 527695Sdg * 1) Supports multiple - block reads/writes 531549Srgrimes * 2) Bypasses buffer cache for reads 541827Sdg * 551549Srgrimes * TODO: 567695Sdg * Implement getpage/putpage interface for filesystems. Should 577695Sdg * greatly re-simplify the vnode_pager. 581549Srgrimes * 591549Srgrimes */ 601549Srgrimes 611541Srgrimes#include <sys/param.h> 621541Srgrimes#include <sys/systm.h> 635455Sdg#include <sys/kernel.h> 641541Srgrimes#include <sys/proc.h> 651541Srgrimes#include <sys/malloc.h> 661541Srgrimes#include <sys/vnode.h> 671541Srgrimes#include <sys/uio.h> 681541Srgrimes#include <sys/mount.h> 691541Srgrimes 701541Srgrimes#include <vm/vm.h> 711541Srgrimes#include <vm/vm_page.h> 721541Srgrimes#include <vm/vnode_pager.h> 731541Srgrimes 741549Srgrimes#include <sys/buf.h> 751549Srgrimes#include <miscfs/specfs/specdev.h> 761541Srgrimes 775455Sdgint vnode_pager_putmulti(); 781541Srgrimes 795455Sdgvoid vnode_pager_init(); 805455Sdgvoid vnode_pager_dealloc(); 815455Sdgint vnode_pager_getpage(); 825455Sdgint vnode_pager_getmulti(); 835455Sdgint vnode_pager_putpage(); 841827Sdgboolean_t vnode_pager_haspage(); 851541Srgrimes 861541Srgrimesstruct pagerops vnodepagerops = { 871541Srgrimes vnode_pager_init, 881541Srgrimes vnode_pager_alloc, 891541Srgrimes vnode_pager_dealloc, 901541Srgrimes vnode_pager_getpage, 911549Srgrimes vnode_pager_getmulti, 921541Srgrimes vnode_pager_putpage, 931549Srgrimes vnode_pager_putmulti, 941549Srgrimes vnode_pager_haspage 951541Srgrimes}; 961541Srgrimes 971887Sdg 981887Sdg 991827Sdgstatic int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 1001827Sdgstatic int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 1011549Srgrimes 1021549Srgrimesextern vm_map_t pager_map; 1031549Srgrimes 1041549Srgrimesstruct pagerlst vnode_pager_list; /* list of managed vnodes */ 1051549Srgrimes 1061549Srgrimes#define MAXBP (PAGE_SIZE/DEV_BSIZE); 1071549Srgrimes 1081549Srgrimesvoid 1091541Srgrimesvnode_pager_init() 1101541Srgrimes{ 1111541Srgrimes TAILQ_INIT(&vnode_pager_list); 1121541Srgrimes} 1131541Srgrimes 1141541Srgrimes/* 1151541Srgrimes * Allocate (or lookup) pager for a vnode. 1161541Srgrimes * Handle is a vnode pointer. 1171541Srgrimes */ 1181549Srgrimesvm_pager_t 1191549Srgrimesvnode_pager_alloc(handle, size, prot, offset) 1208416Sdg void *handle; 1211541Srgrimes vm_size_t size; 1221541Srgrimes vm_prot_t prot; 1231549Srgrimes vm_offset_t offset; 1241541Srgrimes{ 1251541Srgrimes register vm_pager_t pager; 1261541Srgrimes register vn_pager_t vnp; 1275455Sdg vm_object_t object, tobject; 1281541Srgrimes struct vattr vattr; 1291541Srgrimes struct vnode *vp; 1301541Srgrimes struct proc *p = curproc; /* XXX */ 1315455Sdg int rtval; 1321541Srgrimes 1331541Srgrimes /* 1341541Srgrimes * Pageout to vnode, no can do yet. 1351541Srgrimes */ 1361541Srgrimes if (handle == NULL) 1371827Sdg return (NULL); 1381541Srgrimes 1399411Sdg vp = (struct vnode *) handle; 1409411Sdg 1411541Srgrimes /* 1429411Sdg * Prevent race condition when allocating the object. This 1439411Sdg * can happen with NFS vnodes since the nfsnode isn't locked. 1441541Srgrimes */ 1459411Sdg while (vp->v_flag & VOLOCK) { 1469411Sdg vp->v_flag |= VOWANT; 1479411Sdg tsleep(vp, PVM, "vnpobj", 0); 1489411Sdg } 1499411Sdg vp->v_flag |= VOLOCK; 1509411Sdg 1519411Sdg /* 1529411Sdg * If the object is being terminated, wait for it to 1539411Sdg * go away. 1549411Sdg */ 1559411Sdg while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) 1569356Sdg tsleep(object, PVM, "vadead", 0); 1575455Sdg 1583374Sdg pager = NULL; 1595455Sdg if (object != NULL) 1603374Sdg pager = object->pager; 1611541Srgrimes if (pager == NULL) { 1621827Sdg 1631541Srgrimes /* 1641541Srgrimes * Allocate pager structures 1651541Srgrimes */ 1661827Sdg pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 1671827Sdg vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 1689411Sdg 1691541Srgrimes /* 1701541Srgrimes * And an object of the appropriate size 1711541Srgrimes */ 1725455Sdg if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 1731541Srgrimes object = vm_object_allocate(round_page(vattr.va_size)); 1747014Sdg object->flags = OBJ_CANPERSIST; 1751541Srgrimes vm_object_enter(object, pager); 1766585Sdg object->pager = pager; 1771541Srgrimes } else { 1789411Sdg /* 1799411Sdg * The VOP_GETATTR failed... 1809411Sdg * Unlock, wakeup any waiters, free pagers, and exit. 1819411Sdg */ 1829411Sdg vp->v_flag &= ~VOLOCK; 1839411Sdg if (vp->v_flag & VOWANT) { 1849411Sdg vp->v_flag &= ~VOWANT; 1859411Sdg wakeup(vp); 1869411Sdg } 1871827Sdg free((caddr_t) vnp, M_VMPGDATA); 1881827Sdg free((caddr_t) pager, M_VMPAGER); 1891827Sdg return (NULL); 1901541Srgrimes } 1911827Sdg 1921541Srgrimes /* 1931541Srgrimes * Hold a reference to the vnode and initialize pager data. 1941541Srgrimes */ 1951541Srgrimes VREF(vp); 1961541Srgrimes vnp->vnp_flags = 0; 1971541Srgrimes vnp->vnp_vp = vp; 1981541Srgrimes vnp->vnp_size = vattr.va_size; 1991549Srgrimes 2001541Srgrimes TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 2011541Srgrimes pager->pg_handle = handle; 2021541Srgrimes pager->pg_type = PG_VNODE; 2031541Srgrimes pager->pg_ops = &vnodepagerops; 2041827Sdg pager->pg_data = (caddr_t) vnp; 2059356Sdg vp->v_object = (caddr_t) object; 2061541Srgrimes } else { 2071827Sdg 2081541Srgrimes /* 2091827Sdg * vm_object_lookup() will remove the object from the cache if 2101827Sdg * found and also gain a reference to the object. 2111541Srgrimes */ 2123374Sdg (void) vm_object_lookup(pager); 2131541Srgrimes } 2149411Sdg 2159411Sdg if (vp->v_type == VREG) 2167695Sdg vp->v_flag |= VVMIO; 2179411Sdg 2189411Sdg vp->v_flag &= ~VOLOCK; 2199411Sdg if (vp->v_flag & VOWANT) { 2209411Sdg vp->v_flag &= ~VOWANT; 2219411Sdg wakeup(vp); 2229411Sdg } 2231827Sdg return (pager); 2241541Srgrimes} 2251541Srgrimes 2261549Srgrimesvoid 2271541Srgrimesvnode_pager_dealloc(pager) 2281541Srgrimes vm_pager_t pager; 2291541Srgrimes{ 2301827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 2311541Srgrimes register struct vnode *vp; 2325455Sdg vm_object_t object; 2331541Srgrimes 2343449Sphk vp = vnp->vnp_vp; 2353449Sphk if (vp) { 2365455Sdg int s = splbio(); 2375455Sdg 2389356Sdg object = vp->v_object; 2395455Sdg if (object) { 2405455Sdg while (object->paging_in_progress) { 2416618Sdg object->flags |= OBJ_PIPWNT; 2425455Sdg tsleep(object, PVM, "vnpdea", 0); 2435455Sdg } 2445455Sdg } 2455455Sdg splx(s); 2465455Sdg 2479356Sdg vp->v_object = NULL; 2485455Sdg vp->v_flag &= ~(VTEXT | VVMIO); 2496947Sdg vp->v_flag |= VAGE; 2501541Srgrimes vrele(vp); 2511541Srgrimes } 2521541Srgrimes TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 2531827Sdg free((caddr_t) vnp, M_VMPGDATA); 2541827Sdg free((caddr_t) pager, M_VMPAGER); 2551541Srgrimes} 2561541Srgrimes 2571549Srgrimesint 2581549Srgrimesvnode_pager_getmulti(pager, m, count, reqpage, sync) 2591541Srgrimes vm_pager_t pager; 2601549Srgrimes vm_page_t *m; 2615455Sdg int count; 2625455Sdg int reqpage; 2631541Srgrimes boolean_t sync; 2641541Srgrimes{ 2651827Sdg 2661549Srgrimes return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 2671549Srgrimes} 2681541Srgrimes 2691549Srgrimesint 2701549Srgrimesvnode_pager_getpage(pager, m, sync) 2711549Srgrimes vm_pager_t pager; 2721549Srgrimes vm_page_t m; 2731549Srgrimes boolean_t sync; 2741549Srgrimes{ 2751549Srgrimes 2761549Srgrimes vm_page_t marray[1]; 2771827Sdg 2781549Srgrimes if (pager == NULL) 2791549Srgrimes return FALSE; 2801549Srgrimes marray[0] = m; 2811549Srgrimes 2821827Sdg return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 2831541Srgrimes} 2841541Srgrimes 2851549Srgrimesboolean_t 2861549Srgrimesvnode_pager_putpage(pager, m, sync) 2871541Srgrimes vm_pager_t pager; 2881549Srgrimes vm_page_t m; 2891541Srgrimes boolean_t sync; 2901541Srgrimes{ 2911549Srgrimes vm_page_t marray[1]; 2925455Sdg int rtvals[1]; 2931541Srgrimes 2941541Srgrimes if (pager == NULL) 2951549Srgrimes return FALSE; 2961549Srgrimes marray[0] = m; 2971827Sdg vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 2981549Srgrimes return rtvals[0]; 2991541Srgrimes} 3001541Srgrimes 3011549Srgrimesint 3021549Srgrimesvnode_pager_putmulti(pager, m, c, sync, rtvals) 3031549Srgrimes vm_pager_t pager; 3041549Srgrimes vm_page_t *m; 3055455Sdg int c; 3061549Srgrimes boolean_t sync; 3075455Sdg int *rtvals; 3081549Srgrimes{ 3091827Sdg return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 3101549Srgrimes} 3111549Srgrimes 3121549Srgrimes 3131549Srgrimesboolean_t 3141541Srgrimesvnode_pager_haspage(pager, offset) 3151541Srgrimes vm_pager_t pager; 3161541Srgrimes vm_offset_t offset; 3171541Srgrimes{ 3181827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 3194797Sdg register struct vnode *vp = vnp->vnp_vp; 3201541Srgrimes daddr_t bn; 3215455Sdg int err; 3224446Sdg daddr_t block; 3231541Srgrimes 3241541Srgrimes /* 3255455Sdg * If filesystem no longer mounted or offset beyond end of file we do 3265455Sdg * not have the page. 3271541Srgrimes */ 3284797Sdg if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 3294797Sdg return FALSE; 3301541Srgrimes 3314797Sdg block = offset / vp->v_mount->mnt_stat.f_iosize; 3324797Sdg if (incore(vp, block)) 3334446Sdg return TRUE; 3347178Sdg 3351541Srgrimes /* 3361827Sdg * Read the index to find the disk block to read from. If there is no 3371827Sdg * block, report that we don't have this data. 3388876Srgrimes * 3391541Srgrimes * Assumes that the vnode has whole page or nothing. 3401541Srgrimes */ 3414797Sdg err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 3428876Srgrimes if (err) 3431827Sdg return (TRUE); 3441827Sdg return ((long) bn < 0 ? FALSE : TRUE); 3451541Srgrimes} 3461541Srgrimes 3471541Srgrimes/* 3481541Srgrimes * Lets the VM system know about a change in size for a file. 3491541Srgrimes * If this vnode is mapped into some address space (i.e. we have a pager 3501541Srgrimes * for it) we adjust our own internal size and flush any cached pages in 3511541Srgrimes * the associated object that are affected by the size change. 3521541Srgrimes * 3531541Srgrimes * Note: this routine may be invoked as a result of a pager put 3541541Srgrimes * operation (possibly at object termination time), so we must be careful. 3551541Srgrimes */ 3561541Srgrimesvoid 3571541Srgrimesvnode_pager_setsize(vp, nsize) 3581541Srgrimes struct vnode *vp; 3595455Sdg u_long nsize; 3601541Srgrimes{ 3611541Srgrimes register vn_pager_t vnp; 3621541Srgrimes register vm_object_t object; 3631541Srgrimes vm_pager_t pager; 3641541Srgrimes 3651541Srgrimes /* 3661541Srgrimes * Not a mapped vnode 3671541Srgrimes */ 3689356Sdg if (vp == NULL || vp->v_type != VREG || vp->v_object == NULL) 3691541Srgrimes return; 3701827Sdg 3711541Srgrimes /* 3721541Srgrimes * Hasn't changed size 3731541Srgrimes */ 3749356Sdg object = vp->v_object; 3755455Sdg if (object == NULL) 3763374Sdg return; 3775455Sdg if ((pager = object->pager) == NULL) 3783374Sdg return; 3791827Sdg vnp = (vn_pager_t) pager->pg_data; 3801541Srgrimes if (nsize == vnp->vnp_size) 3811541Srgrimes return; 3821827Sdg 3831541Srgrimes /* 3841827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 3851541Srgrimes */ 3861827Sdg if (nsize < vnp->vnp_size) { 3875455Sdg if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 3885455Sdg vm_object_lock(object); 3895455Sdg vm_object_page_remove(object, 3907204Sdg round_page((vm_offset_t) nsize), vnp->vnp_size, FALSE); 3915455Sdg vm_object_unlock(object); 3925455Sdg } 3931827Sdg /* 3941827Sdg * this gets rid of garbage at the end of a page that is now 3951827Sdg * only partially backed by the vnode... 3961827Sdg */ 3971827Sdg if (nsize & PAGE_MASK) { 3981827Sdg vm_offset_t kva; 3991827Sdg vm_page_t m; 4001827Sdg 4011827Sdg m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 4021827Sdg if (m) { 4031827Sdg kva = vm_pager_map_page(m); 4041827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 4055455Sdg round_page(nsize) - nsize); 4061827Sdg vm_pager_unmap_page(kva); 4071827Sdg } 4081827Sdg } 4091541Srgrimes } 4101827Sdg vnp->vnp_size = (vm_offset_t) nsize; 4111827Sdg object->size = round_page(nsize); 4121541Srgrimes} 4131541Srgrimes 4141541Srgrimesvoid 4151541Srgrimesvnode_pager_umount(mp) 4161541Srgrimes register struct mount *mp; 4171541Srgrimes{ 4181541Srgrimes register vm_pager_t pager, npager; 4191541Srgrimes struct vnode *vp; 4201541Srgrimes 4217162Sdg for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager) { 4221541Srgrimes /* 4231827Sdg * Save the next pointer now since uncaching may terminate the 4241827Sdg * object and render pager invalid 4251541Srgrimes */ 4267162Sdg npager = pager->pg_list.tqe_next; 4271827Sdg vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 4287162Sdg if (mp == (struct mount *) 0 || vp->v_mount == mp) { 4297162Sdg VOP_LOCK(vp); 4301541Srgrimes (void) vnode_pager_uncache(vp); 4317162Sdg VOP_UNLOCK(vp); 4327162Sdg } 4331541Srgrimes } 4341541Srgrimes} 4351541Srgrimes 4361541Srgrimes/* 4371541Srgrimes * Remove vnode associated object from the object cache. 4387162Sdg * This routine must be called with the vnode locked. 4391541Srgrimes * 4407162Sdg * XXX unlock the vnode. 4417162Sdg * We must do this since uncaching the object may result in its 4427162Sdg * destruction which may initiate paging activity which may necessitate 4437162Sdg * re-locking the vnode. 4441549Srgrimes */ 4451549Srgrimesboolean_t 4461549Srgrimesvnode_pager_uncache(vp) 4471549Srgrimes register struct vnode *vp; 4481549Srgrimes{ 4491549Srgrimes register vm_object_t object; 4507162Sdg boolean_t uncached; 4511549Srgrimes vm_pager_t pager; 4521549Srgrimes 4531549Srgrimes /* 4541549Srgrimes * Not a mapped vnode 4551549Srgrimes */ 4569356Sdg object = vp->v_object; 4575455Sdg if (object == NULL) 4585455Sdg return (TRUE); 4595455Sdg 4603374Sdg pager = object->pager; 4611549Srgrimes if (pager == NULL) 4621549Srgrimes return (TRUE); 4631827Sdg 4647162Sdg#ifdef DEBUG 4657162Sdg if (!VOP_ISLOCKED(vp)) { 4667162Sdg extern int (**nfsv2_vnodeop_p)(); 4671827Sdg 4687162Sdg if (vp->v_op != nfsv2_vnodeop_p) 4697162Sdg panic("vnode_pager_uncache: vnode not locked!"); 4707162Sdg } 4717162Sdg#endif 4721549Srgrimes /* 4731827Sdg * Must use vm_object_lookup() as it actually removes the object from 4741827Sdg * the cache list. 4751549Srgrimes */ 4761549Srgrimes object = vm_object_lookup(pager); 4771549Srgrimes if (object) { 4781549Srgrimes uncached = (object->ref_count <= 1); 4797162Sdg VOP_UNLOCK(vp); 4801549Srgrimes pager_cache(object, FALSE); 4817162Sdg VOP_LOCK(vp); 4821549Srgrimes } else 4831549Srgrimes uncached = TRUE; 4841827Sdg return (uncached); 4851549Srgrimes} 4861541Srgrimes 4871541Srgrimes 4881549Srgrimesvoid 4891549Srgrimesvnode_pager_freepage(m) 4901549Srgrimes vm_page_t m; 4911541Srgrimes{ 4921549Srgrimes PAGE_WAKEUP(m); 4931549Srgrimes vm_page_free(m); 4941549Srgrimes} 4951549Srgrimes 4961549Srgrimes/* 4971549Srgrimes * calculate the linear (byte) disk address of specified virtual 4981549Srgrimes * file address 4991549Srgrimes */ 5001549Srgrimesvm_offset_t 5016151Sdgvnode_pager_addr(vp, address, run) 5021549Srgrimes struct vnode *vp; 5031549Srgrimes vm_offset_t address; 5046151Sdg int *run; 5051549Srgrimes{ 5065455Sdg int rtaddress; 5075455Sdg int bsize; 5081549Srgrimes vm_offset_t block; 5091549Srgrimes struct vnode *rtvp; 5105455Sdg int err; 5115455Sdg int vblock, voffset; 5121549Srgrimes 5135455Sdg if ((int) address < 0) 5145455Sdg return -1; 5155455Sdg 5161549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5171549Srgrimes vblock = address / bsize; 5181549Srgrimes voffset = address % bsize; 5191549Srgrimes 5206151Sdg err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 5211549Srgrimes 5226151Sdg if (err || (block == -1)) 5231549Srgrimes rtaddress = -1; 5246151Sdg else { 5256626Sdg rtaddress = block + voffset / DEV_BSIZE; 5266151Sdg if( run) { 5276151Sdg *run += 1; 5286151Sdg *run *= bsize/PAGE_SIZE; 5296151Sdg *run -= voffset/PAGE_SIZE; 5306151Sdg } 5316151Sdg } 5321549Srgrimes 5331549Srgrimes return rtaddress; 5341549Srgrimes} 5351549Srgrimes 5361549Srgrimes/* 5371549Srgrimes * interrupt routine for I/O completion 5381549Srgrimes */ 5391549Srgrimesvoid 5401549Srgrimesvnode_pager_iodone(bp) 5411549Srgrimes struct buf *bp; 5421549Srgrimes{ 5431549Srgrimes bp->b_flags |= B_DONE; 5441827Sdg wakeup((caddr_t) bp); 5451549Srgrimes} 5461549Srgrimes 5471549Srgrimes/* 5481549Srgrimes * small block file system vnode pager input 5491549Srgrimes */ 5501549Srgrimesint 5511549Srgrimesvnode_pager_input_smlfs(vnp, m) 5521549Srgrimes vn_pager_t vnp; 5531549Srgrimes vm_page_t m; 5541549Srgrimes{ 5555455Sdg int i; 5565455Sdg int s; 5571549Srgrimes struct vnode *dp, *vp; 5581549Srgrimes struct buf *bp; 5591549Srgrimes vm_offset_t kva; 5605455Sdg int fileaddr; 5615455Sdg int block; 5621549Srgrimes vm_offset_t bsize; 5635455Sdg int error = 0; 5641549Srgrimes 5651549Srgrimes vp = vnp->vnp_vp; 5661549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5671549Srgrimes 5687178Sdg 5695455Sdg VOP_BMAP(vp, 0, &dp, 0, 0); 5701549Srgrimes 5711549Srgrimes kva = vm_pager_map_page(m); 5721549Srgrimes 5731827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 5741827Sdg 5755455Sdg if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 5765455Sdg continue; 5771549Srgrimes 5786151Sdg fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 5791827Sdg if (fileaddr != -1) { 5801549Srgrimes bp = getpbuf(); 5811549Srgrimes 5821827Sdg /* build a minimal buffer header */ 5831549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 5841549Srgrimes bp->b_iodone = vnode_pager_iodone; 5851549Srgrimes bp->b_proc = curproc; 5861549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 5871827Sdg if (bp->b_rcred != NOCRED) 5881549Srgrimes crhold(bp->b_rcred); 5891827Sdg if (bp->b_wcred != NOCRED) 5901549Srgrimes crhold(bp->b_wcred); 5911549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 5926626Sdg bp->b_blkno = fileaddr; 5935455Sdg pbgetvp(dp, bp); 5941549Srgrimes bp->b_bcount = bsize; 5951549Srgrimes bp->b_bufsize = bsize; 5961827Sdg 5971827Sdg /* do the input */ 5981549Srgrimes VOP_STRATEGY(bp); 5991549Srgrimes 6001827Sdg /* we definitely need to be at splbio here */ 6011549Srgrimes 6021549Srgrimes s = splbio(); 6031549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 6049356Sdg tsleep(bp, PVM, "vnsrd", 0); 6051549Srgrimes } 6061549Srgrimes splx(s); 6071549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 6081549Srgrimes error = EIO; 6091549Srgrimes 6101827Sdg /* 6111827Sdg * free the buffer header back to the swap buffer pool 6121827Sdg */ 6131549Srgrimes relpbuf(bp); 6141827Sdg if (error) 6151549Srgrimes break; 6165455Sdg 6177695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 6187695Sdg vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize); 6191549Srgrimes } else { 6207695Sdg vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize); 6211549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 6221549Srgrimes } 6231549Srgrimesnextblock: 6241549Srgrimes } 6251549Srgrimes vm_pager_unmap_page(kva); 6265455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 6271827Sdg if (error) { 6284207Sdg return VM_PAGER_ERROR; 6291549Srgrimes } 6301549Srgrimes return VM_PAGER_OK; 6311549Srgrimes 6321549Srgrimes} 6331549Srgrimes 6341549Srgrimes 6351549Srgrimes/* 6361549Srgrimes * old style vnode pager output routine 6371549Srgrimes */ 6381549Srgrimesint 6391549Srgrimesvnode_pager_input_old(vnp, m) 6401549Srgrimes vn_pager_t vnp; 6411549Srgrimes vm_page_t m; 6421549Srgrimes{ 6431541Srgrimes struct uio auio; 6441541Srgrimes struct iovec aiov; 6455455Sdg int error; 6465455Sdg int size; 6471549Srgrimes vm_offset_t kva; 6481549Srgrimes 6491549Srgrimes error = 0; 6501827Sdg 6511549Srgrimes /* 6521549Srgrimes * Return failure if beyond current EOF 6531549Srgrimes */ 6545455Sdg if (m->offset >= vnp->vnp_size) { 6551549Srgrimes return VM_PAGER_BAD; 6561549Srgrimes } else { 6571549Srgrimes size = PAGE_SIZE; 6585455Sdg if (m->offset + size > vnp->vnp_size) 6595455Sdg size = vnp->vnp_size - m->offset; 6607178Sdg 6615455Sdg /* 6625455Sdg * Allocate a kernel virtual address and initialize so that 6635455Sdg * we can use VOP_READ/WRITE routines. 6645455Sdg */ 6651549Srgrimes kva = vm_pager_map_page(m); 6667178Sdg 6671827Sdg aiov.iov_base = (caddr_t) kva; 6681549Srgrimes aiov.iov_len = size; 6691549Srgrimes auio.uio_iov = &aiov; 6701549Srgrimes auio.uio_iovcnt = 1; 6715455Sdg auio.uio_offset = m->offset; 6721549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 6731549Srgrimes auio.uio_rw = UIO_READ; 6741549Srgrimes auio.uio_resid = size; 6751827Sdg auio.uio_procp = (struct proc *) 0; 6761549Srgrimes 6771549Srgrimes error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 6781549Srgrimes if (!error) { 6791549Srgrimes register int count = size - auio.uio_resid; 6801549Srgrimes 6811549Srgrimes if (count == 0) 6821549Srgrimes error = EINVAL; 6831549Srgrimes else if (count != PAGE_SIZE) 6841827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 6851549Srgrimes } 6861549Srgrimes vm_pager_unmap_page(kva); 6871549Srgrimes } 6881549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 6895455Sdg m->dirty = 0; 6904207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 6911549Srgrimes} 6921549Srgrimes 6931549Srgrimes/* 6941549Srgrimes * generic vnode pager input routine 6951549Srgrimes */ 6961549Srgrimesint 6971549Srgrimesvnode_pager_input(vnp, m, count, reqpage) 6981549Srgrimes register vn_pager_t vnp; 6991549Srgrimes vm_page_t *m; 7005455Sdg int count, reqpage; 7011549Srgrimes{ 7025455Sdg int i; 7031541Srgrimes vm_offset_t kva, foff; 7047178Sdg int size; 7051549Srgrimes vm_object_t object; 7061549Srgrimes struct vnode *dp, *vp; 7075455Sdg int bsize; 7081541Srgrimes 7095455Sdg int first, last; 7106151Sdg int firstaddr; 7115455Sdg int block, offset; 7126151Sdg int runpg; 7136151Sdg int runend; 7141549Srgrimes 7157178Sdg struct buf *bp; 7165455Sdg int s; 7175455Sdg int failflag; 7181549Srgrimes 7195455Sdg int error = 0; 7201549Srgrimes 7211827Sdg object = m[reqpage]->object; /* all vm_page_t items are in same 7221827Sdg * object */ 7231549Srgrimes 7241549Srgrimes vp = vnp->vnp_vp; 7251549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 7261549Srgrimes 7271549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 7281827Sdg 7291549Srgrimes /* 7301827Sdg * originally, we did not check for an error return value -- assuming 7311827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 7321549Srgrimes */ 7335455Sdg foff = m[reqpage]->offset; 7341827Sdg 7351549Srgrimes /* 7361887Sdg * if we can't bmap, use old VOP code 7371549Srgrimes */ 7385455Sdg if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 7391549Srgrimes for (i = 0; i < count; i++) { 7401549Srgrimes if (i != reqpage) { 7411549Srgrimes vnode_pager_freepage(m[i]); 7421549Srgrimes } 7431549Srgrimes } 7443612Sdg cnt.v_vnodein++; 7453612Sdg cnt.v_vnodepgsin++; 7461549Srgrimes return vnode_pager_input_old(vnp, m[reqpage]); 7471549Srgrimes 7481827Sdg /* 7491827Sdg * if the blocksize is smaller than a page size, then use 7501827Sdg * special small filesystem code. NFS sometimes has a small 7511827Sdg * blocksize, but it can handle large reads itself. 7521827Sdg */ 7531827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 7545455Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 7551827Sdg 7561549Srgrimes for (i = 0; i < count; i++) { 7571549Srgrimes if (i != reqpage) { 7581549Srgrimes vnode_pager_freepage(m[i]); 7591549Srgrimes } 7601549Srgrimes } 7613612Sdg cnt.v_vnodein++; 7623612Sdg cnt.v_vnodepgsin++; 7631549Srgrimes return vnode_pager_input_smlfs(vnp, m[reqpage]); 7641549Srgrimes } 7651549Srgrimes /* 7665455Sdg * if ANY DEV_BSIZE blocks are valid on a large filesystem block 7675455Sdg * then, the entire page is valid -- 7681549Srgrimes */ 7695455Sdg if (m[reqpage]->valid) { 7705455Sdg m[reqpage]->valid = VM_PAGE_BITS_ALL; 7715455Sdg for (i = 0; i < count; i++) { 7725455Sdg if (i != reqpage) 7735455Sdg vnode_pager_freepage(m[i]); 7741549Srgrimes } 7755455Sdg return VM_PAGER_OK; 7761549Srgrimes } 7777178Sdg 7785455Sdg /* 7795455Sdg * here on direct device I/O 7805455Sdg */ 7811549Srgrimes 7826151Sdg firstaddr = -1; 7831549Srgrimes /* 7846151Sdg * calculate the run that includes the required page 7851549Srgrimes */ 7866151Sdg for(first = 0, i = 0; i < count; i = runend) { 7876151Sdg firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 7886151Sdg if (firstaddr == -1) { 7896151Sdg if( i == reqpage && foff < vnp->vnp_size) { 7906151Sdg printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n", 7916151Sdg firstaddr, foff, vnp->vnp_size); 7926151Sdg panic("vnode_pager_input:..."); 7936151Sdg } 7941549Srgrimes vnode_pager_freepage(m[i]); 7956151Sdg runend = i + 1; 7966151Sdg first = runend; 7976151Sdg continue; 7981549Srgrimes } 7996151Sdg runend = i + runpg; 8006151Sdg if( runend <= reqpage) { 8016151Sdg int j; 8026151Sdg for(j = i; j < runend; j++) { 8036151Sdg vnode_pager_freepage(m[j]); 8046151Sdg } 8051549Srgrimes } else { 8066151Sdg if( runpg < (count - first)) { 8076151Sdg for(i=first + runpg; i < count; i++) 8086151Sdg vnode_pager_freepage(m[i]); 8096151Sdg count = first + runpg; 8106151Sdg } 8116151Sdg break; 8121549Srgrimes } 8136151Sdg first = runend; 8141549Srgrimes } 8151549Srgrimes 8161549Srgrimes /* 8171827Sdg * the first and last page have been calculated now, move input pages 8181827Sdg * to be zero based... 8191549Srgrimes */ 8201549Srgrimes if (first != 0) { 8211549Srgrimes for (i = first; i < count; i++) { 8221549Srgrimes m[i - first] = m[i]; 8231549Srgrimes } 8241549Srgrimes count -= first; 8251549Srgrimes reqpage -= first; 8261549Srgrimes } 8276151Sdg 8281549Srgrimes /* 8291549Srgrimes * calculate the file virtual address for the transfer 8301549Srgrimes */ 8315455Sdg foff = m[0]->offset; 8321827Sdg 8331549Srgrimes /* 8341549Srgrimes * calculate the size of the transfer 8351549Srgrimes */ 8361549Srgrimes size = count * PAGE_SIZE; 8371549Srgrimes if ((foff + size) > vnp->vnp_size) 8381549Srgrimes size = vnp->vnp_size - foff; 8391549Srgrimes 8401549Srgrimes /* 8411549Srgrimes * round up physical size for real devices 8421549Srgrimes */ 8431827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 8441549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 8451549Srgrimes 8465841Sdg bp = getpbuf(); 8475455Sdg kva = (vm_offset_t) bp->b_data; 8481887Sdg 8491549Srgrimes /* 8501549Srgrimes * and map the pages to be read into the kva 8511549Srgrimes */ 8521887Sdg pmap_qenter(kva, m, count); 8531549Srgrimes 8541549Srgrimes /* build a minimal buffer header */ 8551549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 8561549Srgrimes bp->b_iodone = vnode_pager_iodone; 8571549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 8581549Srgrimes bp->b_proc = curproc; 8591549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 8601827Sdg if (bp->b_rcred != NOCRED) 8611549Srgrimes crhold(bp->b_rcred); 8621827Sdg if (bp->b_wcred != NOCRED) 8631549Srgrimes crhold(bp->b_wcred); 8646626Sdg bp->b_blkno = firstaddr; 8655455Sdg pbgetvp(dp, bp); 8661549Srgrimes bp->b_bcount = size; 8671549Srgrimes bp->b_bufsize = size; 8681549Srgrimes 8693612Sdg cnt.v_vnodein++; 8703612Sdg cnt.v_vnodepgsin += count; 8713612Sdg 8721549Srgrimes /* do the input */ 8731549Srgrimes VOP_STRATEGY(bp); 8743612Sdg 8751549Srgrimes s = splbio(); 8761549Srgrimes /* we definitely need to be at splbio here */ 8771549Srgrimes 8781549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 8799356Sdg tsleep(bp, PVM, "vnread", 0); 8801549Srgrimes } 8811549Srgrimes splx(s); 8821549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 8831549Srgrimes error = EIO; 8841549Srgrimes 8851549Srgrimes if (!error) { 8861549Srgrimes if (size != count * PAGE_SIZE) 8871827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 8881549Srgrimes } 8895455Sdg pmap_qremove(kva, count); 8901549Srgrimes 8911549Srgrimes /* 8921549Srgrimes * free the buffer header back to the swap buffer pool 8931549Srgrimes */ 8941549Srgrimes relpbuf(bp); 8951549Srgrimes 8961549Srgrimesfinishup: 8971549Srgrimes for (i = 0; i < count; i++) { 8982386Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 8995455Sdg m[i]->dirty = 0; 9005455Sdg m[i]->valid = VM_PAGE_BITS_ALL; 9011549Srgrimes if (i != reqpage) { 9021827Sdg 9031549Srgrimes /* 9041827Sdg * whether or not to leave the page activated is up in 9051827Sdg * the air, but we should put the page on a page queue 9061827Sdg * somewhere. (it already is in the object). Result: 9071827Sdg * It appears that emperical results show that 9081827Sdg * deactivating pages is best. 9091549Srgrimes */ 9101827Sdg 9111549Srgrimes /* 9121827Sdg * just in case someone was asking for this page we 9131827Sdg * now tell them that it is ok to use 9141549Srgrimes */ 9151549Srgrimes if (!error) { 9165841Sdg vm_page_deactivate(m[i]); 9171549Srgrimes PAGE_WAKEUP(m[i]); 9181549Srgrimes } else { 9191549Srgrimes vnode_pager_freepage(m[i]); 9201549Srgrimes } 9211549Srgrimes } 9221549Srgrimes } 9231549Srgrimes if (error) { 9244207Sdg printf("vnode_pager_input: I/O read error\n"); 9251549Srgrimes } 9264207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 9271549Srgrimes} 9281549Srgrimes 9291549Srgrimes/* 9301549Srgrimes * generic vnode pager output routine 9311549Srgrimes */ 9321549Srgrimesint 9331549Srgrimesvnode_pager_output(vnp, m, count, rtvals) 9341549Srgrimes vn_pager_t vnp; 9351549Srgrimes vm_page_t *m; 9365455Sdg int count; 9375455Sdg int *rtvals; 9381549Srgrimes{ 9397695Sdg int i; 9401549Srgrimes 9417695Sdg struct vnode *vp; 9427695Sdg int maxsize, ncount; 9437695Sdg struct uio auio; 9447695Sdg struct iovec aiov; 9457695Sdg int error; 9461549Srgrimes 9471549Srgrimes vp = vnp->vnp_vp; 9481827Sdg for (i = 0; i < count; i++) 9491549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 9501549Srgrimes 9515455Sdg if ((int) m[0]->offset < 0) { 9527695Sdg printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty); 9537695Sdg rtvals[0] = VM_PAGER_BAD; 9547695Sdg return VM_PAGER_BAD; 9555455Sdg } 9567178Sdg 9577695Sdg maxsize = count * PAGE_SIZE; 9587695Sdg ncount = count; 9591549Srgrimes 9608585Sdg if (maxsize + m[0]->offset > vnp->vnp_size) { 9618585Sdg if (vnp->vnp_size > m[0]->offset) 9628585Sdg maxsize = vnp->vnp_size - m[0]->offset; 9638585Sdg else 9648585Sdg maxsize = 0; 9657695Sdg ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE; 9668585Sdg if (ncount < count) { 9678585Sdg for (i = ncount; i < count; i++) { 9687695Sdg rtvals[i] = VM_PAGER_BAD; 9691549Srgrimes } 9708585Sdg if (ncount == 0) { 9717695Sdg printf("vnode_pager_output: write past end of file: %d, %d\n", 9727695Sdg m[0]->offset, vnp->vnp_size); 9737695Sdg return rtvals[0]; 9747695Sdg } 9751549Srgrimes } 9761541Srgrimes } 9777695Sdg 9788585Sdg for (i = 0; i < count; i++) { 9798585Sdg m[i]->busy++; 9807695Sdg m[i]->flags &= ~PG_BUSY; 9811549Srgrimes } 9821827Sdg 9837695Sdg aiov.iov_base = (caddr_t) 0; 9847695Sdg aiov.iov_len = maxsize; 9857695Sdg auio.uio_iov = &aiov; 9867695Sdg auio.uio_iovcnt = 1; 9877695Sdg auio.uio_offset = m[0]->offset; 9887695Sdg auio.uio_segflg = UIO_NOCOPY; 9897695Sdg auio.uio_rw = UIO_WRITE; 9907695Sdg auio.uio_resid = maxsize; 9917695Sdg auio.uio_procp = (struct proc *) 0; 9927695Sdg error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred); 9933612Sdg cnt.v_vnodeout++; 9947695Sdg cnt.v_vnodepgsout += ncount; 9953612Sdg 9968585Sdg if (error) { 9977695Sdg printf("vnode_pager_output: I/O error %d\n", error); 9987695Sdg } 9998585Sdg if (auio.uio_resid) { 10007695Sdg printf("vnode_pager_output: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset); 10017695Sdg } 10028585Sdg for (i = 0; i < count; i++) { 10038585Sdg m[i]->busy--; 10048585Sdg if (i < ncount) { 10057695Sdg rtvals[i] = VM_PAGER_OK; 10067695Sdg } 10078585Sdg if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) 10087695Sdg wakeup((caddr_t) m[i]); 10097695Sdg } 10107695Sdg return rtvals[0]; 10117695Sdg} 10121549Srgrimes 10137695Sdgstruct vnode * 10147695Sdgvnode_pager_lock(vm_object_t object) { 10151549Srgrimes 10167695Sdg for(;object;object=object->shadow) { 10177695Sdg vn_pager_t vnp; 10187695Sdg if( !object->pager || (object->pager->pg_type != PG_VNODE)) 10197695Sdg continue; 10201549Srgrimes 10217695Sdg vnp = (vn_pager_t) object->pager->pg_data; 10227695Sdg VOP_LOCK(vnp->vnp_vp); 10237695Sdg return vnp->vnp_vp; 10241549Srgrimes } 10257695Sdg return (struct vnode *)NULL; 10267695Sdg} 10271549Srgrimes 10287695Sdgvoid 10297695Sdgvnode_pager_unlock(struct vnode *vp) { 10307178Sdg VOP_UNLOCK(vp); 10317695Sdg} 10327178Sdg 1033