vnode_pager.c revision 6626
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 51549Srgrimes * Copyright (c) 1993,1994 John S. Dyson 61541Srgrimes * 71541Srgrimes * This code is derived from software contributed to Berkeley by 81541Srgrimes * the Systems Programming Group of the University of Utah Computer 91541Srgrimes * Science Department. 101541Srgrimes * 111541Srgrimes * Redistribution and use in source and binary forms, with or without 121541Srgrimes * modification, are permitted provided that the following conditions 131541Srgrimes * are met: 141541Srgrimes * 1. Redistributions of source code must retain the above copyright 151541Srgrimes * notice, this list of conditions and the following disclaimer. 161541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 171541Srgrimes * notice, this list of conditions and the following disclaimer in the 181541Srgrimes * documentation and/or other materials provided with the distribution. 191541Srgrimes * 3. All advertising materials mentioning features or use of this software 201541Srgrimes * must display the following acknowledgement: 211541Srgrimes * This product includes software developed by the University of 221541Srgrimes * California, Berkeley and its contributors. 231541Srgrimes * 4. Neither the name of the University nor the names of its contributors 241541Srgrimes * may be used to endorse or promote products derived from this software 251541Srgrimes * without specific prior written permission. 261541Srgrimes * 271541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 281541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 291541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 301541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 311541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 321541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 331541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 341541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 351541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 361541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 371541Srgrimes * SUCH DAMAGE. 381541Srgrimes * 391549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 406626Sdg * $Id: vnode_pager.c,v 1.24 1995/02/22 09:15:35 davidg Exp $ 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * Page to/from files (vnodes). 451541Srgrimes * 461541Srgrimes * TODO: 471541Srgrimes * pageouts 481541Srgrimes * fix credential use (uses current process credentials now) 491541Srgrimes */ 501541Srgrimes 511549Srgrimes/* 521549Srgrimes * MODIFICATIONS: 531549Srgrimes * John S. Dyson 08 Dec 93 541549Srgrimes * 551549Srgrimes * This file in conjunction with some vm_fault mods, eliminate the performance 561549Srgrimes * advantage for using the buffer cache and minimize memory copies. 571549Srgrimes * 581549Srgrimes * 1) Supports multiple - block reads 591549Srgrimes * 2) Bypasses buffer cache for reads 601827Sdg * 611549Srgrimes * TODO: 621549Srgrimes * 631549Srgrimes * 1) Totally bypass buffer cache for reads 641549Srgrimes * (Currently will still sometimes use buffer cache for reads) 651549Srgrimes * 2) Bypass buffer cache for writes 661549Srgrimes * (Code does not support it, but mods are simple) 671549Srgrimes */ 681549Srgrimes 691541Srgrimes#include <sys/param.h> 701541Srgrimes#include <sys/systm.h> 715455Sdg#include <sys/kernel.h> 721541Srgrimes#include <sys/proc.h> 731541Srgrimes#include <sys/malloc.h> 741541Srgrimes#include <sys/vnode.h> 751541Srgrimes#include <sys/uio.h> 761541Srgrimes#include <sys/mount.h> 771541Srgrimes 781541Srgrimes#include <vm/vm.h> 791541Srgrimes#include <vm/vm_page.h> 801541Srgrimes#include <vm/vnode_pager.h> 811541Srgrimes 821549Srgrimes#include <sys/buf.h> 831549Srgrimes#include <miscfs/specfs/specdev.h> 841541Srgrimes 855455Sdgint vnode_pager_putmulti(); 861541Srgrimes 875455Sdgvoid vnode_pager_init(); 881827Sdgvm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t); 895455Sdgvoid vnode_pager_dealloc(); 905455Sdgint vnode_pager_getpage(); 915455Sdgint vnode_pager_getmulti(); 925455Sdgint vnode_pager_putpage(); 931827Sdgboolean_t vnode_pager_haspage(); 941541Srgrimes 951541Srgrimesstruct pagerops vnodepagerops = { 961541Srgrimes vnode_pager_init, 971541Srgrimes vnode_pager_alloc, 981541Srgrimes vnode_pager_dealloc, 991541Srgrimes vnode_pager_getpage, 1001549Srgrimes vnode_pager_getmulti, 1011541Srgrimes vnode_pager_putpage, 1021549Srgrimes vnode_pager_putmulti, 1031549Srgrimes vnode_pager_haspage 1041541Srgrimes}; 1051541Srgrimes 1061887Sdg 1071887Sdg 1081827Sdgstatic int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage); 1091827Sdgstatic int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals); 1101549Srgrimes 1111549Srgrimesextern vm_map_t pager_map; 1121549Srgrimes 1131549Srgrimesstruct pagerlst vnode_pager_list; /* list of managed vnodes */ 1141549Srgrimes 1151549Srgrimes#define MAXBP (PAGE_SIZE/DEV_BSIZE); 1161549Srgrimes 1171549Srgrimesvoid 1181541Srgrimesvnode_pager_init() 1191541Srgrimes{ 1201541Srgrimes TAILQ_INIT(&vnode_pager_list); 1211541Srgrimes} 1221541Srgrimes 1231541Srgrimes/* 1241541Srgrimes * Allocate (or lookup) pager for a vnode. 1251541Srgrimes * Handle is a vnode pointer. 1261541Srgrimes */ 1271549Srgrimesvm_pager_t 1281549Srgrimesvnode_pager_alloc(handle, size, prot, offset) 1291541Srgrimes caddr_t handle; 1301541Srgrimes vm_size_t size; 1311541Srgrimes vm_prot_t prot; 1321549Srgrimes vm_offset_t offset; 1331541Srgrimes{ 1341541Srgrimes register vm_pager_t pager; 1351541Srgrimes register vn_pager_t vnp; 1365455Sdg vm_object_t object, tobject; 1371541Srgrimes struct vattr vattr; 1381541Srgrimes struct vnode *vp; 1391541Srgrimes struct proc *p = curproc; /* XXX */ 1405455Sdg int rtval; 1411541Srgrimes 1421541Srgrimes /* 1431541Srgrimes * Pageout to vnode, no can do yet. 1441541Srgrimes */ 1451541Srgrimes if (handle == NULL) 1461827Sdg return (NULL); 1471541Srgrimes 1481541Srgrimes /* 1491827Sdg * Vnodes keep a pointer to any associated pager so no need to lookup 1501827Sdg * with vm_pager_lookup. 1511541Srgrimes */ 1521827Sdg vp = (struct vnode *) handle; 1535455Sdg while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD)) 1545455Sdg tsleep((caddr_t) object, PVM, "vadead", 0); 1555455Sdg 1563374Sdg pager = NULL; 1575455Sdg if (object != NULL) 1583374Sdg pager = object->pager; 1591541Srgrimes if (pager == NULL) { 1601827Sdg 1611541Srgrimes /* 1621541Srgrimes * Allocate pager structures 1631541Srgrimes */ 1641827Sdg pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK); 1651541Srgrimes if (pager == NULL) 1661827Sdg return (NULL); 1671827Sdg vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK); 1681541Srgrimes if (vnp == NULL) { 1691827Sdg free((caddr_t) pager, M_VMPAGER); 1701827Sdg return (NULL); 1711541Srgrimes } 1721541Srgrimes /* 1731541Srgrimes * And an object of the appropriate size 1741541Srgrimes */ 1755455Sdg if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) { 1761541Srgrimes object = vm_object_allocate(round_page(vattr.va_size)); 1775519Sdg object->flags &= ~OBJ_INTERNAL; 1786585Sdg object->flags |= OBJ_CANPERSIST; 1791541Srgrimes vm_object_enter(object, pager); 1806585Sdg object->pager = pager; 1811541Srgrimes } else { 1825455Sdg printf("Error in getattr: %d\n", rtval); 1831827Sdg free((caddr_t) vnp, M_VMPGDATA); 1841827Sdg free((caddr_t) pager, M_VMPAGER); 1851827Sdg return (NULL); 1861541Srgrimes } 1871827Sdg 1881541Srgrimes /* 1891541Srgrimes * Hold a reference to the vnode and initialize pager data. 1901541Srgrimes */ 1911541Srgrimes VREF(vp); 1921541Srgrimes vnp->vnp_flags = 0; 1931541Srgrimes vnp->vnp_vp = vp; 1941541Srgrimes vnp->vnp_size = vattr.va_size; 1951549Srgrimes 1961541Srgrimes TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list); 1971541Srgrimes pager->pg_handle = handle; 1981541Srgrimes pager->pg_type = PG_VNODE; 1991541Srgrimes pager->pg_ops = &vnodepagerops; 2001827Sdg pager->pg_data = (caddr_t) vnp; 2013374Sdg vp->v_vmdata = (caddr_t) object; 2021541Srgrimes } else { 2031827Sdg 2041541Srgrimes /* 2051827Sdg * vm_object_lookup() will remove the object from the cache if 2061827Sdg * found and also gain a reference to the object. 2071541Srgrimes */ 2083374Sdg (void) vm_object_lookup(pager); 2091541Srgrimes } 2101827Sdg return (pager); 2111541Srgrimes} 2121541Srgrimes 2131549Srgrimesvoid 2141541Srgrimesvnode_pager_dealloc(pager) 2151541Srgrimes vm_pager_t pager; 2161541Srgrimes{ 2171827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 2181541Srgrimes register struct vnode *vp; 2195455Sdg vm_object_t object; 2201541Srgrimes 2213449Sphk vp = vnp->vnp_vp; 2223449Sphk if (vp) { 2235455Sdg int s = splbio(); 2245455Sdg 2255455Sdg object = (vm_object_t) vp->v_vmdata; 2265455Sdg if (object) { 2275455Sdg while (object->paging_in_progress) { 2286618Sdg object->flags |= OBJ_PIPWNT; 2295455Sdg tsleep(object, PVM, "vnpdea", 0); 2305455Sdg } 2315455Sdg } 2325455Sdg splx(s); 2335455Sdg 2341541Srgrimes vp->v_vmdata = NULL; 2355455Sdg vp->v_flag &= ~(VTEXT | VVMIO); 2361541Srgrimes vrele(vp); 2371541Srgrimes } 2381541Srgrimes TAILQ_REMOVE(&vnode_pager_list, pager, pg_list); 2391827Sdg free((caddr_t) vnp, M_VMPGDATA); 2401827Sdg free((caddr_t) pager, M_VMPAGER); 2411541Srgrimes} 2421541Srgrimes 2431549Srgrimesint 2441549Srgrimesvnode_pager_getmulti(pager, m, count, reqpage, sync) 2451541Srgrimes vm_pager_t pager; 2461549Srgrimes vm_page_t *m; 2475455Sdg int count; 2485455Sdg int reqpage; 2491541Srgrimes boolean_t sync; 2501541Srgrimes{ 2511827Sdg 2521549Srgrimes return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage); 2531549Srgrimes} 2541541Srgrimes 2551549Srgrimesint 2561549Srgrimesvnode_pager_getpage(pager, m, sync) 2571549Srgrimes vm_pager_t pager; 2581549Srgrimes vm_page_t m; 2591549Srgrimes boolean_t sync; 2601549Srgrimes{ 2611549Srgrimes 2621549Srgrimes vm_page_t marray[1]; 2631827Sdg 2641549Srgrimes if (pager == NULL) 2651549Srgrimes return FALSE; 2661549Srgrimes marray[0] = m; 2671549Srgrimes 2681827Sdg return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0); 2691541Srgrimes} 2701541Srgrimes 2711549Srgrimesboolean_t 2721549Srgrimesvnode_pager_putpage(pager, m, sync) 2731541Srgrimes vm_pager_t pager; 2741549Srgrimes vm_page_t m; 2751541Srgrimes boolean_t sync; 2761541Srgrimes{ 2771549Srgrimes vm_page_t marray[1]; 2785455Sdg int rtvals[1]; 2791541Srgrimes 2801541Srgrimes if (pager == NULL) 2811549Srgrimes return FALSE; 2821549Srgrimes marray[0] = m; 2831827Sdg vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals); 2841549Srgrimes return rtvals[0]; 2851541Srgrimes} 2861541Srgrimes 2871549Srgrimesint 2881549Srgrimesvnode_pager_putmulti(pager, m, c, sync, rtvals) 2891549Srgrimes vm_pager_t pager; 2901549Srgrimes vm_page_t *m; 2915455Sdg int c; 2921549Srgrimes boolean_t sync; 2935455Sdg int *rtvals; 2941549Srgrimes{ 2951827Sdg return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals); 2961549Srgrimes} 2971549Srgrimes 2981549Srgrimes 2991549Srgrimesboolean_t 3001541Srgrimesvnode_pager_haspage(pager, offset) 3011541Srgrimes vm_pager_t pager; 3021541Srgrimes vm_offset_t offset; 3031541Srgrimes{ 3041827Sdg register vn_pager_t vnp = (vn_pager_t) pager->pg_data; 3054797Sdg register struct vnode *vp = vnp->vnp_vp; 3061541Srgrimes daddr_t bn; 3075455Sdg int err; 3084446Sdg daddr_t block; 3091541Srgrimes 3101541Srgrimes /* 3115455Sdg * If filesystem no longer mounted or offset beyond end of file we do 3125455Sdg * not have the page. 3131541Srgrimes */ 3144797Sdg if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size)) 3154797Sdg return FALSE; 3161541Srgrimes 3174797Sdg block = offset / vp->v_mount->mnt_stat.f_iosize; 3184797Sdg if (incore(vp, block)) 3194446Sdg return TRUE; 3201541Srgrimes /* 3211827Sdg * Read the index to find the disk block to read from. If there is no 3221827Sdg * block, report that we don't have this data. 3231827Sdg * 3241541Srgrimes * Assumes that the vnode has whole page or nothing. 3251541Srgrimes */ 3264797Sdg err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0); 3275455Sdg if (err) 3281827Sdg return (TRUE); 3291827Sdg return ((long) bn < 0 ? FALSE : TRUE); 3301541Srgrimes} 3311541Srgrimes 3321541Srgrimes/* 3331541Srgrimes * Lets the VM system know about a change in size for a file. 3341541Srgrimes * If this vnode is mapped into some address space (i.e. we have a pager 3351541Srgrimes * for it) we adjust our own internal size and flush any cached pages in 3361541Srgrimes * the associated object that are affected by the size change. 3371541Srgrimes * 3381541Srgrimes * Note: this routine may be invoked as a result of a pager put 3391541Srgrimes * operation (possibly at object termination time), so we must be careful. 3401541Srgrimes */ 3411541Srgrimesvoid 3421541Srgrimesvnode_pager_setsize(vp, nsize) 3431541Srgrimes struct vnode *vp; 3445455Sdg u_long nsize; 3451541Srgrimes{ 3461541Srgrimes register vn_pager_t vnp; 3471541Srgrimes register vm_object_t object; 3481541Srgrimes vm_pager_t pager; 3491541Srgrimes 3501541Srgrimes /* 3511541Srgrimes * Not a mapped vnode 3521541Srgrimes */ 3531541Srgrimes if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL) 3541541Srgrimes return; 3551827Sdg 3561541Srgrimes /* 3571541Srgrimes * Hasn't changed size 3581541Srgrimes */ 3593374Sdg object = (vm_object_t) vp->v_vmdata; 3605455Sdg if (object == NULL) 3613374Sdg return; 3625455Sdg if ((pager = object->pager) == NULL) 3633374Sdg return; 3641827Sdg vnp = (vn_pager_t) pager->pg_data; 3651541Srgrimes if (nsize == vnp->vnp_size) 3661541Srgrimes return; 3671827Sdg 3681541Srgrimes /* 3691827Sdg * No object. This can happen during object termination since 3701827Sdg * vm_object_page_clean is called after the object has been removed 3711827Sdg * from the hash table, and clean may cause vnode write operations 3721827Sdg * which can wind up back here. 3731541Srgrimes */ 3741541Srgrimes object = vm_object_lookup(pager); 3751541Srgrimes if (object == NULL) 3761541Srgrimes return; 3771541Srgrimes 3781541Srgrimes /* 3791827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 3801541Srgrimes */ 3811827Sdg if (nsize < vnp->vnp_size) { 3825455Sdg if (round_page((vm_offset_t) nsize) < vnp->vnp_size) { 3835455Sdg vm_object_lock(object); 3845455Sdg vm_object_page_remove(object, 3855455Sdg round_page((vm_offset_t) nsize), vnp->vnp_size); 3865455Sdg vm_object_unlock(object); 3875455Sdg } 3881827Sdg /* 3891827Sdg * this gets rid of garbage at the end of a page that is now 3901827Sdg * only partially backed by the vnode... 3911827Sdg */ 3921827Sdg if (nsize & PAGE_MASK) { 3931827Sdg vm_offset_t kva; 3941827Sdg vm_page_t m; 3951827Sdg 3961827Sdg m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize)); 3971827Sdg if (m) { 3981827Sdg kva = vm_pager_map_page(m); 3991827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 4005455Sdg round_page(nsize) - nsize); 4011827Sdg vm_pager_unmap_page(kva); 4021827Sdg } 4031827Sdg } 4041541Srgrimes } 4051827Sdg vnp->vnp_size = (vm_offset_t) nsize; 4061827Sdg object->size = round_page(nsize); 4071827Sdg 4081541Srgrimes vm_object_deallocate(object); 4091541Srgrimes} 4101541Srgrimes 4111541Srgrimesvoid 4121541Srgrimesvnode_pager_umount(mp) 4131541Srgrimes register struct mount *mp; 4141541Srgrimes{ 4151541Srgrimes register vm_pager_t pager, npager; 4161541Srgrimes struct vnode *vp; 4171541Srgrimes 4181549Srgrimes pager = vnode_pager_list.tqh_first; 4191827Sdg while (pager) { 4201827Sdg 4211541Srgrimes /* 4221827Sdg * Save the next pointer now since uncaching may terminate the 4231827Sdg * object and render pager invalid 4241541Srgrimes */ 4251827Sdg vp = ((vn_pager_t) pager->pg_data)->vnp_vp; 4261541Srgrimes npager = pager->pg_list.tqe_next; 4271827Sdg if (mp == (struct mount *) 0 || vp->v_mount == mp) 4281541Srgrimes (void) vnode_pager_uncache(vp); 4291549Srgrimes pager = npager; 4301541Srgrimes } 4311541Srgrimes} 4321541Srgrimes 4331541Srgrimes/* 4341541Srgrimes * Remove vnode associated object from the object cache. 4351541Srgrimes * 4361549Srgrimes * Note: this routine may be invoked as a result of a pager put 4371549Srgrimes * operation (possibly at object termination time), so we must be careful. 4381549Srgrimes */ 4391549Srgrimesboolean_t 4401549Srgrimesvnode_pager_uncache(vp) 4411549Srgrimes register struct vnode *vp; 4421549Srgrimes{ 4431549Srgrimes register vm_object_t object; 4441549Srgrimes boolean_t uncached, locked; 4451549Srgrimes vm_pager_t pager; 4461549Srgrimes 4471549Srgrimes /* 4481549Srgrimes * Not a mapped vnode 4491549Srgrimes */ 4503374Sdg object = (vm_object_t) vp->v_vmdata; 4515455Sdg if (object == NULL) 4525455Sdg return (TRUE); 4535455Sdg 4543374Sdg pager = object->pager; 4551549Srgrimes if (pager == NULL) 4561549Srgrimes return (TRUE); 4571827Sdg 4581549Srgrimes /* 4591827Sdg * Unlock the vnode if it is currently locked. We do this since 4601827Sdg * uncaching the object may result in its destruction which may 4611827Sdg * initiate paging activity which may necessitate locking the vnode. 4621549Srgrimes */ 4631549Srgrimes locked = VOP_ISLOCKED(vp); 4641549Srgrimes if (locked) 4651549Srgrimes VOP_UNLOCK(vp); 4661827Sdg 4671549Srgrimes /* 4681827Sdg * Must use vm_object_lookup() as it actually removes the object from 4691827Sdg * the cache list. 4701549Srgrimes */ 4711549Srgrimes object = vm_object_lookup(pager); 4721549Srgrimes if (object) { 4731549Srgrimes uncached = (object->ref_count <= 1); 4741549Srgrimes pager_cache(object, FALSE); 4751549Srgrimes } else 4761549Srgrimes uncached = TRUE; 4771549Srgrimes if (locked) 4781549Srgrimes VOP_LOCK(vp); 4791827Sdg return (uncached); 4801549Srgrimes} 4811541Srgrimes 4821541Srgrimes 4831549Srgrimesvoid 4841549Srgrimesvnode_pager_freepage(m) 4851549Srgrimes vm_page_t m; 4861541Srgrimes{ 4871549Srgrimes PAGE_WAKEUP(m); 4881549Srgrimes vm_page_free(m); 4891549Srgrimes} 4901549Srgrimes 4911549Srgrimes/* 4921549Srgrimes * calculate the linear (byte) disk address of specified virtual 4931549Srgrimes * file address 4941549Srgrimes */ 4951549Srgrimesvm_offset_t 4966151Sdgvnode_pager_addr(vp, address, run) 4971549Srgrimes struct vnode *vp; 4981549Srgrimes vm_offset_t address; 4996151Sdg int *run; 5001549Srgrimes{ 5015455Sdg int rtaddress; 5025455Sdg int bsize; 5031549Srgrimes vm_offset_t block; 5041549Srgrimes struct vnode *rtvp; 5055455Sdg int err; 5065455Sdg int vblock, voffset; 5071549Srgrimes 5085455Sdg if ((int) address < 0) 5095455Sdg return -1; 5105455Sdg 5111549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5121549Srgrimes vblock = address / bsize; 5131549Srgrimes voffset = address % bsize; 5141549Srgrimes 5156151Sdg err = VOP_BMAP(vp, vblock, &rtvp, &block, run); 5161549Srgrimes 5176151Sdg if (err || (block == -1)) 5181549Srgrimes rtaddress = -1; 5196151Sdg else { 5206626Sdg rtaddress = block + voffset / DEV_BSIZE; 5216151Sdg if( run) { 5226151Sdg *run += 1; 5236151Sdg *run *= bsize/PAGE_SIZE; 5246151Sdg *run -= voffset/PAGE_SIZE; 5256151Sdg } 5266151Sdg } 5271549Srgrimes 5281549Srgrimes return rtaddress; 5291549Srgrimes} 5301549Srgrimes 5311549Srgrimes/* 5321549Srgrimes * interrupt routine for I/O completion 5331549Srgrimes */ 5341549Srgrimesvoid 5351549Srgrimesvnode_pager_iodone(bp) 5361549Srgrimes struct buf *bp; 5371549Srgrimes{ 5381549Srgrimes bp->b_flags |= B_DONE; 5391827Sdg wakeup((caddr_t) bp); 5405455Sdg if (bp->b_flags & B_ASYNC) { 5411887Sdg vm_offset_t paddr; 5421887Sdg vm_page_t m; 5431887Sdg vm_object_t obj = 0; 5441887Sdg int i; 5451887Sdg int npages; 5461887Sdg 5471887Sdg paddr = (vm_offset_t) bp->b_data; 5485455Sdg if (bp->b_bufsize != bp->b_bcount) 5495455Sdg bzero(bp->b_data + bp->b_bcount, 5505455Sdg bp->b_bufsize - bp->b_bcount); 5511887Sdg 5521887Sdg npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE; 5535455Sdg for (i = 0; i < npages; i++) { 5541887Sdg m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE)); 5551887Sdg obj = m->object; 5565455Sdg if (m) { 5575455Sdg m->dirty = 0; 5585455Sdg m->valid = VM_PAGE_BITS_ALL; 5595455Sdg if (m->flags & PG_WANTED) 5605455Sdg m->flags |= PG_REFERENCED; 5611887Sdg PAGE_WAKEUP(m); 5621887Sdg } else { 5631887Sdg panic("vnode_pager_iodone: page is gone!!!"); 5641887Sdg } 5651887Sdg } 5665455Sdg pmap_qremove(paddr, npages); 5675455Sdg if (obj) { 5681887Sdg --obj->paging_in_progress; 5696618Sdg if (obj->paging_in_progress == 0 && 5706618Sdg (obj->flags & OBJ_PIPWNT)) { 5716618Sdg obj->flags &= ~OBJ_PIPWNT; 5721887Sdg wakeup((caddr_t) obj); 5736618Sdg } 5741887Sdg } else { 5751887Sdg panic("vnode_pager_iodone: object is gone???"); 5761887Sdg } 5771887Sdg relpbuf(bp); 5781887Sdg } 5791549Srgrimes} 5801549Srgrimes 5811549Srgrimes/* 5821549Srgrimes * small block file system vnode pager input 5831549Srgrimes */ 5841549Srgrimesint 5851549Srgrimesvnode_pager_input_smlfs(vnp, m) 5861549Srgrimes vn_pager_t vnp; 5871549Srgrimes vm_page_t m; 5881549Srgrimes{ 5895455Sdg int i; 5905455Sdg int s; 5911549Srgrimes struct vnode *dp, *vp; 5921549Srgrimes struct buf *bp; 5931549Srgrimes vm_offset_t kva; 5945455Sdg int fileaddr; 5955455Sdg int block; 5961549Srgrimes vm_offset_t bsize; 5975455Sdg int error = 0; 5981549Srgrimes 5991549Srgrimes vp = vnp->vnp_vp; 6001549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 6011549Srgrimes 6025455Sdg VOP_BMAP(vp, 0, &dp, 0, 0); 6031549Srgrimes 6041549Srgrimes kva = vm_pager_map_page(m); 6051549Srgrimes 6061827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 6071827Sdg 6085455Sdg if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid)) 6095455Sdg continue; 6101549Srgrimes 6116151Sdg fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 6121827Sdg if (fileaddr != -1) { 6131549Srgrimes bp = getpbuf(); 6141549Srgrimes 6151827Sdg /* build a minimal buffer header */ 6161549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 6171549Srgrimes bp->b_iodone = vnode_pager_iodone; 6181549Srgrimes bp->b_proc = curproc; 6191549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 6201827Sdg if (bp->b_rcred != NOCRED) 6211549Srgrimes crhold(bp->b_rcred); 6221827Sdg if (bp->b_wcred != NOCRED) 6231549Srgrimes crhold(bp->b_wcred); 6241549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 6256626Sdg bp->b_blkno = fileaddr; 6265455Sdg pbgetvp(dp, bp); 6271549Srgrimes bp->b_bcount = bsize; 6281549Srgrimes bp->b_bufsize = bsize; 6291827Sdg 6301827Sdg /* do the input */ 6311549Srgrimes VOP_STRATEGY(bp); 6321549Srgrimes 6331827Sdg /* we definitely need to be at splbio here */ 6341549Srgrimes 6351549Srgrimes s = splbio(); 6361549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 6371827Sdg tsleep((caddr_t) bp, PVM, "vnsrd", 0); 6381549Srgrimes } 6391549Srgrimes splx(s); 6401549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 6411549Srgrimes error = EIO; 6421549Srgrimes 6431827Sdg /* 6441827Sdg * free the buffer header back to the swap buffer pool 6451827Sdg */ 6461549Srgrimes relpbuf(bp); 6471549Srgrimes HOLDRELE(vp); 6481827Sdg if (error) 6491549Srgrimes break; 6505455Sdg 6515455Sdg vm_page_set_clean(m, i * bsize, bsize); 6525455Sdg vm_page_set_valid(m, i * bsize, bsize); 6531549Srgrimes } else { 6545455Sdg vm_page_set_clean(m, i * bsize, bsize); 6551549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 6561549Srgrimes } 6571549Srgrimesnextblock: 6581549Srgrimes } 6591549Srgrimes vm_pager_unmap_page(kva); 6605455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 6611827Sdg if (error) { 6624207Sdg return VM_PAGER_ERROR; 6631549Srgrimes } 6641549Srgrimes return VM_PAGER_OK; 6651549Srgrimes 6661549Srgrimes} 6671549Srgrimes 6681549Srgrimes 6691549Srgrimes/* 6701549Srgrimes * old style vnode pager output routine 6711549Srgrimes */ 6721549Srgrimesint 6731549Srgrimesvnode_pager_input_old(vnp, m) 6741549Srgrimes vn_pager_t vnp; 6751549Srgrimes vm_page_t m; 6761549Srgrimes{ 6771541Srgrimes struct uio auio; 6781541Srgrimes struct iovec aiov; 6795455Sdg int error; 6805455Sdg int size; 6811549Srgrimes vm_offset_t kva; 6821549Srgrimes 6831549Srgrimes error = 0; 6841827Sdg 6851549Srgrimes /* 6861549Srgrimes * Return failure if beyond current EOF 6871549Srgrimes */ 6885455Sdg if (m->offset >= vnp->vnp_size) { 6891549Srgrimes return VM_PAGER_BAD; 6901549Srgrimes } else { 6911549Srgrimes size = PAGE_SIZE; 6925455Sdg if (m->offset + size > vnp->vnp_size) 6935455Sdg size = vnp->vnp_size - m->offset; 6945455Sdg /* 6955455Sdg * Allocate a kernel virtual address and initialize so that 6965455Sdg * we can use VOP_READ/WRITE routines. 6975455Sdg */ 6981549Srgrimes kva = vm_pager_map_page(m); 6991827Sdg aiov.iov_base = (caddr_t) kva; 7001549Srgrimes aiov.iov_len = size; 7011549Srgrimes auio.uio_iov = &aiov; 7021549Srgrimes auio.uio_iovcnt = 1; 7035455Sdg auio.uio_offset = m->offset; 7041549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 7051549Srgrimes auio.uio_rw = UIO_READ; 7061549Srgrimes auio.uio_resid = size; 7071827Sdg auio.uio_procp = (struct proc *) 0; 7081549Srgrimes 7091549Srgrimes error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred); 7101549Srgrimes if (!error) { 7111549Srgrimes register int count = size - auio.uio_resid; 7121549Srgrimes 7131549Srgrimes if (count == 0) 7141549Srgrimes error = EINVAL; 7151549Srgrimes else if (count != PAGE_SIZE) 7161827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 7171549Srgrimes } 7181549Srgrimes vm_pager_unmap_page(kva); 7191549Srgrimes } 7201549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 7215455Sdg m->dirty = 0; 7224207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 7231549Srgrimes} 7241549Srgrimes 7251549Srgrimes/* 7261549Srgrimes * generic vnode pager input routine 7271549Srgrimes */ 7281549Srgrimesint 7291549Srgrimesvnode_pager_input(vnp, m, count, reqpage) 7301549Srgrimes register vn_pager_t vnp; 7311549Srgrimes vm_page_t *m; 7325455Sdg int count, reqpage; 7331549Srgrimes{ 7345455Sdg int i; 7351541Srgrimes vm_offset_t kva, foff; 7365455Sdg int size, sizea; 7371549Srgrimes vm_object_t object; 7381549Srgrimes struct vnode *dp, *vp; 7395455Sdg int bsize; 7401541Srgrimes 7415455Sdg int first, last; 7426151Sdg int firstaddr; 7435455Sdg int block, offset; 7446151Sdg int runpg; 7456151Sdg int runend; 7461549Srgrimes 7471887Sdg struct buf *bp, *bpa; 7485455Sdg int counta; 7495455Sdg int s; 7505455Sdg int failflag; 7511549Srgrimes 7525455Sdg int error = 0; 7531549Srgrimes 7541827Sdg object = m[reqpage]->object; /* all vm_page_t items are in same 7551827Sdg * object */ 7561549Srgrimes 7571549Srgrimes vp = vnp->vnp_vp; 7581549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 7591549Srgrimes 7601549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 7611827Sdg 7621549Srgrimes /* 7631827Sdg * originally, we did not check for an error return value -- assuming 7641827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 7651549Srgrimes */ 7665455Sdg foff = m[reqpage]->offset; 7671827Sdg 7681549Srgrimes /* 7691887Sdg * if we can't bmap, use old VOP code 7701549Srgrimes */ 7715455Sdg if (VOP_BMAP(vp, 0, &dp, 0, 0)) { 7721549Srgrimes for (i = 0; i < count; i++) { 7731549Srgrimes if (i != reqpage) { 7741549Srgrimes vnode_pager_freepage(m[i]); 7751549Srgrimes } 7761549Srgrimes } 7773612Sdg cnt.v_vnodein++; 7783612Sdg cnt.v_vnodepgsin++; 7791549Srgrimes return vnode_pager_input_old(vnp, m[reqpage]); 7801549Srgrimes 7811827Sdg /* 7821827Sdg * if the blocksize is smaller than a page size, then use 7831827Sdg * special small filesystem code. NFS sometimes has a small 7841827Sdg * blocksize, but it can handle large reads itself. 7851827Sdg */ 7861827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 7875455Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 7881827Sdg 7891549Srgrimes for (i = 0; i < count; i++) { 7901549Srgrimes if (i != reqpage) { 7911549Srgrimes vnode_pager_freepage(m[i]); 7921549Srgrimes } 7931549Srgrimes } 7943612Sdg cnt.v_vnodein++; 7953612Sdg cnt.v_vnodepgsin++; 7961549Srgrimes return vnode_pager_input_smlfs(vnp, m[reqpage]); 7971549Srgrimes } 7981549Srgrimes /* 7995455Sdg * if ANY DEV_BSIZE blocks are valid on a large filesystem block 8005455Sdg * then, the entire page is valid -- 8011549Srgrimes */ 8025455Sdg if (m[reqpage]->valid) { 8035455Sdg m[reqpage]->valid = VM_PAGE_BITS_ALL; 8045455Sdg for (i = 0; i < count; i++) { 8055455Sdg if (i != reqpage) 8065455Sdg vnode_pager_freepage(m[i]); 8071549Srgrimes } 8085455Sdg return VM_PAGER_OK; 8091549Srgrimes } 8105455Sdg /* 8115455Sdg * here on direct device I/O 8125455Sdg */ 8131549Srgrimes 8141827Sdg 8156151Sdg firstaddr = -1; 8161549Srgrimes /* 8176151Sdg * calculate the run that includes the required page 8181549Srgrimes */ 8196151Sdg for(first = 0, i = 0; i < count; i = runend) { 8206151Sdg firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg); 8216151Sdg if (firstaddr == -1) { 8226151Sdg if( i == reqpage && foff < vnp->vnp_size) { 8236151Sdg printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n", 8246151Sdg firstaddr, foff, vnp->vnp_size); 8256151Sdg panic("vnode_pager_input:..."); 8266151Sdg } 8271549Srgrimes vnode_pager_freepage(m[i]); 8286151Sdg runend = i + 1; 8296151Sdg first = runend; 8306151Sdg continue; 8311549Srgrimes } 8326151Sdg runend = i + runpg; 8336151Sdg if( runend <= reqpage) { 8346151Sdg int j; 8356151Sdg for(j = i; j < runend; j++) { 8366151Sdg vnode_pager_freepage(m[j]); 8376151Sdg } 8381549Srgrimes } else { 8396151Sdg if( runpg < (count - first)) { 8406151Sdg for(i=first + runpg; i < count; i++) 8416151Sdg vnode_pager_freepage(m[i]); 8426151Sdg count = first + runpg; 8436151Sdg } 8446151Sdg break; 8451549Srgrimes } 8466151Sdg first = runend; 8471549Srgrimes } 8481549Srgrimes 8491549Srgrimes /* 8501827Sdg * the first and last page have been calculated now, move input pages 8511827Sdg * to be zero based... 8521549Srgrimes */ 8531549Srgrimes if (first != 0) { 8541549Srgrimes for (i = first; i < count; i++) { 8551549Srgrimes m[i - first] = m[i]; 8561549Srgrimes } 8571549Srgrimes count -= first; 8581549Srgrimes reqpage -= first; 8591549Srgrimes } 8606151Sdg 8611549Srgrimes /* 8621549Srgrimes * calculate the file virtual address for the transfer 8631549Srgrimes */ 8645455Sdg foff = m[0]->offset; 8656151Sdg#if 0 8666151Sdg printf("foff: 0x%lx, firstaddr: 0x%lx\n", 8676151Sdg foff, firstaddr); 8686151Sdg DELAY(6000000); 8696151Sdg#endif 8701827Sdg 8711549Srgrimes /* 8721549Srgrimes * calculate the size of the transfer 8731549Srgrimes */ 8741549Srgrimes size = count * PAGE_SIZE; 8751549Srgrimes if ((foff + size) > vnp->vnp_size) 8761549Srgrimes size = vnp->vnp_size - foff; 8771549Srgrimes 8781549Srgrimes /* 8791549Srgrimes * round up physical size for real devices 8801549Srgrimes */ 8811827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 8821549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 8831549Srgrimes 8841887Sdg counta = 0; 8855455Sdg if (count * PAGE_SIZE > bsize) 8861887Sdg counta = (count - reqpage) - 1; 8871887Sdg bpa = 0; 8881887Sdg sizea = 0; 8895841Sdg bp = getpbuf(); 8905455Sdg if (counta) { 8915841Sdg bpa = (struct buf *) trypbuf(); 8925841Sdg if (bpa) { 8935841Sdg count -= counta; 8945841Sdg sizea = size - count * PAGE_SIZE; 8955841Sdg size = count * PAGE_SIZE; 8965841Sdg } 8971887Sdg } 8985455Sdg kva = (vm_offset_t) bp->b_data; 8991887Sdg 9001549Srgrimes /* 9011549Srgrimes * and map the pages to be read into the kva 9021549Srgrimes */ 9031887Sdg pmap_qenter(kva, m, count); 9041549Srgrimes 9051549Srgrimes /* build a minimal buffer header */ 9061549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 9071549Srgrimes bp->b_iodone = vnode_pager_iodone; 9081549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 9091549Srgrimes bp->b_proc = curproc; 9101549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 9111827Sdg if (bp->b_rcred != NOCRED) 9121549Srgrimes crhold(bp->b_rcred); 9131827Sdg if (bp->b_wcred != NOCRED) 9141549Srgrimes crhold(bp->b_wcred); 9156626Sdg bp->b_blkno = firstaddr; 9165455Sdg pbgetvp(dp, bp); 9171549Srgrimes bp->b_bcount = size; 9181549Srgrimes bp->b_bufsize = size; 9191549Srgrimes 9203612Sdg cnt.v_vnodein++; 9213612Sdg cnt.v_vnodepgsin += count; 9223612Sdg 9231549Srgrimes /* do the input */ 9241549Srgrimes VOP_STRATEGY(bp); 9253612Sdg 9265455Sdg if (counta) { 9275455Sdg for (i = 0; i < counta; i++) { 9285455Sdg vm_page_deactivate(m[count + i]); 9291887Sdg } 9305455Sdg pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta); 9311887Sdg ++m[count]->object->paging_in_progress; 9321887Sdg bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC; 9331887Sdg bpa->b_iodone = vnode_pager_iodone; 9341887Sdg /* B_PHYS is not set, but it is nice to fill this in */ 9351887Sdg bpa->b_proc = curproc; 9361887Sdg bpa->b_rcred = bpa->b_wcred = bpa->b_proc->p_ucred; 9371887Sdg if (bpa->b_rcred != NOCRED) 9381887Sdg crhold(bpa->b_rcred); 9391887Sdg if (bpa->b_wcred != NOCRED) 9401887Sdg crhold(bpa->b_wcred); 9416626Sdg bpa->b_blkno = firstaddr + count * (PAGE_SIZE / DEV_BSIZE); 9425455Sdg pbgetvp(dp, bpa); 9431887Sdg bpa->b_bcount = sizea; 9445455Sdg bpa->b_bufsize = counta * PAGE_SIZE; 9451549Srgrimes 9463612Sdg cnt.v_vnodepgsin += counta; 9471887Sdg VOP_STRATEGY(bpa); 9481887Sdg } 9491549Srgrimes s = splbio(); 9501549Srgrimes /* we definitely need to be at splbio here */ 9511549Srgrimes 9521549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 9531827Sdg tsleep((caddr_t) bp, PVM, "vnread", 0); 9541549Srgrimes } 9551549Srgrimes splx(s); 9561549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 9571549Srgrimes error = EIO; 9581549Srgrimes 9591549Srgrimes if (!error) { 9601549Srgrimes if (size != count * PAGE_SIZE) 9611827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 9621549Srgrimes } 9635455Sdg pmap_qremove(kva, count); 9641549Srgrimes 9651549Srgrimes /* 9661549Srgrimes * free the buffer header back to the swap buffer pool 9671549Srgrimes */ 9681549Srgrimes relpbuf(bp); 9691549Srgrimes HOLDRELE(vp); 9701549Srgrimes 9711549Srgrimesfinishup: 9721549Srgrimes for (i = 0; i < count; i++) { 9732386Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 9745455Sdg m[i]->dirty = 0; 9755455Sdg m[i]->valid = VM_PAGE_BITS_ALL; 9761549Srgrimes if (i != reqpage) { 9771827Sdg 9781549Srgrimes /* 9791827Sdg * whether or not to leave the page activated is up in 9801827Sdg * the air, but we should put the page on a page queue 9811827Sdg * somewhere. (it already is in the object). Result: 9821827Sdg * It appears that emperical results show that 9831827Sdg * deactivating pages is best. 9841549Srgrimes */ 9851827Sdg 9861549Srgrimes /* 9871827Sdg * just in case someone was asking for this page we 9881827Sdg * now tell them that it is ok to use 9891549Srgrimes */ 9901549Srgrimes if (!error) { 9915841Sdg vm_page_deactivate(m[i]); 9921549Srgrimes PAGE_WAKEUP(m[i]); 9931549Srgrimes } else { 9941549Srgrimes vnode_pager_freepage(m[i]); 9951549Srgrimes } 9961549Srgrimes } 9971549Srgrimes } 9981549Srgrimes if (error) { 9994207Sdg printf("vnode_pager_input: I/O read error\n"); 10001549Srgrimes } 10014207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 10021549Srgrimes} 10031549Srgrimes 10041549Srgrimes/* 10051549Srgrimes * old-style vnode pager output routine 10061549Srgrimes */ 10071549Srgrimesint 10081549Srgrimesvnode_pager_output_old(vnp, m) 10091549Srgrimes register vn_pager_t vnp; 10101541Srgrimes vm_page_t m; 10111549Srgrimes{ 10125455Sdg vm_offset_t kva, kva2; 10131549Srgrimes vm_offset_t size; 10141549Srgrimes struct iovec aiov; 10151549Srgrimes struct uio auio; 10161549Srgrimes struct vnode *vp; 10175455Sdg int error; 10181541Srgrimes 10191549Srgrimes vp = vnp->vnp_vp; 10201827Sdg 10211541Srgrimes /* 10225455Sdg * Dont return failure if beyond current EOF placate the VM system. 10231541Srgrimes */ 10245455Sdg if (m->offset >= vnp->vnp_size) { 10255455Sdg return VM_PAGER_OK; 10261549Srgrimes } else { 10271549Srgrimes size = PAGE_SIZE; 10285455Sdg if (m->offset + size > vnp->vnp_size) 10295455Sdg size = vnp->vnp_size - m->offset; 10305455Sdg 10315455Sdg kva2 = kmem_alloc(pager_map, PAGE_SIZE); 10325455Sdg /* 10335455Sdg * Allocate a kernel virtual address and initialize so that 10345455Sdg * we can use VOP_WRITE routines. 10355455Sdg */ 10361549Srgrimes kva = vm_pager_map_page(m); 10375455Sdg bcopy((caddr_t) kva, (caddr_t) kva2, size); 10385455Sdg vm_pager_unmap_page(kva); 10395455Sdg pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 10405455Sdg PAGE_WAKEUP(m); 10415455Sdg 10425455Sdg aiov.iov_base = (caddr_t) kva2; 10431549Srgrimes aiov.iov_len = size; 10441549Srgrimes auio.uio_iov = &aiov; 10451549Srgrimes auio.uio_iovcnt = 1; 10465455Sdg auio.uio_offset = m->offset; 10471549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 10481549Srgrimes auio.uio_rw = UIO_WRITE; 10491549Srgrimes auio.uio_resid = size; 10501827Sdg auio.uio_procp = (struct proc *) 0; 10511549Srgrimes 10521549Srgrimes error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred); 10531549Srgrimes 10545455Sdg kmem_free_wakeup(pager_map, kva2, PAGE_SIZE); 10551549Srgrimes if (!error) { 10561549Srgrimes if ((size - auio.uio_resid) == 0) { 10571549Srgrimes error = EINVAL; 10581549Srgrimes } 10591549Srgrimes } 10605455Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 10611549Srgrimes } 10621549Srgrimes} 10631549Srgrimes 10641549Srgrimes/* 10651549Srgrimes * vnode pager output on a small-block file system 10661549Srgrimes */ 10671549Srgrimesint 10681549Srgrimesvnode_pager_output_smlfs(vnp, m) 10691549Srgrimes vn_pager_t vnp; 10701549Srgrimes vm_page_t m; 10711549Srgrimes{ 10725455Sdg int i; 10735455Sdg int s; 10741549Srgrimes struct vnode *dp, *vp; 10751549Srgrimes struct buf *bp; 10761549Srgrimes vm_offset_t kva; 10775455Sdg int fileaddr; 10781549Srgrimes vm_offset_t bsize; 10795455Sdg int error = 0; 10801549Srgrimes 10811549Srgrimes vp = vnp->vnp_vp; 10821549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 10831549Srgrimes 10845455Sdg VOP_BMAP(vp, 0, &dp, 0, 0); 10851549Srgrimes kva = vm_pager_map_page(m); 10861827Sdg for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) { 10871827Sdg 10885455Sdg if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0) 10895455Sdg continue; 10901827Sdg /* 10911827Sdg * calculate logical block and offset 10921827Sdg */ 10936151Sdg fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0); 10941827Sdg if (fileaddr != -1) { 10951549Srgrimes 10961549Srgrimes bp = getpbuf(); 10971549Srgrimes 10981827Sdg /* build a minimal buffer header */ 10991549Srgrimes bp->b_flags = B_BUSY | B_CALL | B_WRITE; 11001549Srgrimes bp->b_iodone = vnode_pager_iodone; 11011549Srgrimes bp->b_proc = curproc; 11021549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 11031827Sdg if (bp->b_rcred != NOCRED) 11041549Srgrimes crhold(bp->b_rcred); 11051827Sdg if (bp->b_wcred != NOCRED) 11061549Srgrimes crhold(bp->b_wcred); 11071549Srgrimes bp->b_un.b_addr = (caddr_t) kva + i * bsize; 11086626Sdg bp->b_blkno = fileaddr; 11095455Sdg pbgetvp(dp, bp); 11101549Srgrimes ++dp->v_numoutput; 11111827Sdg /* for NFS */ 11121549Srgrimes bp->b_dirtyoff = 0; 11131549Srgrimes bp->b_dirtyend = bsize; 11141549Srgrimes bp->b_bcount = bsize; 11151549Srgrimes bp->b_bufsize = bsize; 11161827Sdg 11171827Sdg /* do the input */ 11181549Srgrimes VOP_STRATEGY(bp); 11191549Srgrimes 11201827Sdg /* we definitely need to be at splbio here */ 11211549Srgrimes 11221549Srgrimes s = splbio(); 11231549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 11241827Sdg tsleep((caddr_t) bp, PVM, "vnswrt", 0); 11251549Srgrimes } 11261549Srgrimes splx(s); 11271549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 11281549Srgrimes error = EIO; 11291549Srgrimes 11305455Sdg vm_page_set_clean(m, i * bsize, bsize); 11311827Sdg /* 11321827Sdg * free the buffer header back to the swap buffer pool 11331827Sdg */ 11341549Srgrimes relpbuf(bp); 11351549Srgrimes HOLDRELE(vp); 11361827Sdg } 11371541Srgrimes } 11381549Srgrimes vm_pager_unmap_page(kva); 11391827Sdg if (error) 11404207Sdg return VM_PAGER_ERROR; 11411541Srgrimes else 11421549Srgrimes return VM_PAGER_OK; 11431549Srgrimes} 11441549Srgrimes 11451549Srgrimes/* 11461549Srgrimes * generic vnode pager output routine 11471549Srgrimes */ 11481549Srgrimesint 11491549Srgrimesvnode_pager_output(vnp, m, count, rtvals) 11501549Srgrimes vn_pager_t vnp; 11511549Srgrimes vm_page_t *m; 11525455Sdg int count; 11535455Sdg int *rtvals; 11541549Srgrimes{ 11555455Sdg int i, j; 11561549Srgrimes vm_offset_t kva, foff; 11575455Sdg int size; 11581549Srgrimes vm_object_t object; 11591549Srgrimes struct vnode *dp, *vp; 11601549Srgrimes struct buf *bp; 11611549Srgrimes vm_offset_t reqaddr; 11625455Sdg int bsize; 11635455Sdg int s; 11645455Sdg daddr_t block; 11655455Sdg struct timeval tv; 11666151Sdg int runpg; 11671549Srgrimes 11685455Sdg int error = 0; 11691549Srgrimes 11701549Srgrimesretryoutput: 11711549Srgrimes object = m[0]->object; /* all vm_page_t items are in same object */ 11721549Srgrimes 11731549Srgrimes vp = vnp->vnp_vp; 11744797Sdg 11754797Sdg /* 11764797Sdg * Make sure underlying filesystem is still mounted. 11774797Sdg */ 11784797Sdg if (vp->v_mount == NULL) 11794797Sdg return VM_PAGER_FAIL; 11804797Sdg 11811549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 11821549Srgrimes 11831827Sdg for (i = 0; i < count; i++) 11841549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 11851549Srgrimes 11865455Sdg if ((int) m[0]->offset < 0) { 11875455Sdg printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset); 11885455Sdg m[0]->dirty = 0; 11895455Sdg rtvals[0] = VM_PAGER_OK; 11905455Sdg return VM_PAGER_OK; 11915455Sdg } 11921549Srgrimes /* 11931827Sdg * if the filesystem does not have a bmap, then use the old code 11941549Srgrimes */ 11955455Sdg if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) || 11965455Sdg (block == -1)) { 11971549Srgrimes 11981549Srgrimes rtvals[0] = vnode_pager_output_old(vnp, m[0]); 11991549Srgrimes 12005455Sdg m[0]->dirty = 0; 12013612Sdg cnt.v_vnodeout++; 12023612Sdg cnt.v_vnodepgsout++; 12031549Srgrimes return rtvals[0]; 12041541Srgrimes } 12055455Sdg tv = time; 12065455Sdg VOP_UPDATE(vp, &tv, &tv, 0); 12071541Srgrimes 12081549Srgrimes /* 12091827Sdg * if the filesystem has a small blocksize, then use the small block 12101827Sdg * filesystem output code 12111549Srgrimes */ 12121549Srgrimes if ((bsize < PAGE_SIZE) && 12131827Sdg (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { 12141549Srgrimes 12151827Sdg for (i = 0; i < count; i++) { 12161549Srgrimes rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]); 12171827Sdg if (rtvals[i] == VM_PAGER_OK) { 12181549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 12191549Srgrimes } 12201549Srgrimes } 12213612Sdg cnt.v_vnodeout++; 12223612Sdg cnt.v_vnodepgsout += count; 12231549Srgrimes return rtvals[0]; 12241541Srgrimes } 12251827Sdg for (i = 0; i < count; i++) { 12265455Sdg foff = m[i]->offset; 12271549Srgrimes if (foff >= vnp->vnp_size) { 12281827Sdg for (j = i; j < count; j++) 12291549Srgrimes rtvals[j] = VM_PAGER_BAD; 12301549Srgrimes count = i; 12311549Srgrimes break; 12321549Srgrimes } 12331549Srgrimes } 12341549Srgrimes if (count == 0) { 12351549Srgrimes return rtvals[0]; 12361549Srgrimes } 12375455Sdg foff = m[0]->offset; 12386151Sdg reqaddr = vnode_pager_addr(vp, foff, &runpg); 12396151Sdg if( runpg < count) 12406151Sdg count = runpg; 12411827Sdg 12421549Srgrimes /* 12431549Srgrimes * calculate the size of the transfer 12441549Srgrimes */ 12451549Srgrimes size = count * PAGE_SIZE; 12461549Srgrimes if ((foff + size) > vnp->vnp_size) 12471549Srgrimes size = vnp->vnp_size - foff; 12481549Srgrimes 12491549Srgrimes /* 12501549Srgrimes * round up physical size for real devices 12511549Srgrimes */ 12521827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 12531549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 12541549Srgrimes 12551887Sdg bp = getpbuf(); 12565455Sdg kva = (vm_offset_t) bp->b_data; 12571549Srgrimes /* 12581549Srgrimes * and map the pages to be read into the kva 12591549Srgrimes */ 12601887Sdg pmap_qenter(kva, m, count); 12611827Sdg 12621549Srgrimes /* build a minimal buffer header */ 12631549Srgrimes bp->b_flags = B_BUSY | B_WRITE | B_CALL; 12641549Srgrimes bp->b_iodone = vnode_pager_iodone; 12651549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 12661549Srgrimes bp->b_proc = curproc; 12671549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 12681549Srgrimes 12691827Sdg if (bp->b_rcred != NOCRED) 12701549Srgrimes crhold(bp->b_rcred); 12711827Sdg if (bp->b_wcred != NOCRED) 12721549Srgrimes crhold(bp->b_wcred); 12736626Sdg bp->b_blkno = reqaddr; 12745455Sdg pbgetvp(dp, bp); 12751549Srgrimes ++dp->v_numoutput; 12761827Sdg 12771549Srgrimes /* for NFS */ 12781549Srgrimes bp->b_dirtyoff = 0; 12791549Srgrimes bp->b_dirtyend = size; 12801549Srgrimes 12811549Srgrimes bp->b_bcount = size; 12821549Srgrimes bp->b_bufsize = size; 12831549Srgrimes 12843612Sdg cnt.v_vnodeout++; 12853612Sdg cnt.v_vnodepgsout += count; 12863612Sdg 12871549Srgrimes /* do the output */ 12881549Srgrimes VOP_STRATEGY(bp); 12891549Srgrimes 12901549Srgrimes s = splbio(); 12911549Srgrimes 12921549Srgrimes /* we definitely need to be at splbio here */ 12931549Srgrimes 12941549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 12951827Sdg tsleep((caddr_t) bp, PVM, "vnwrite", 0); 12961549Srgrimes } 12971549Srgrimes splx(s); 12981549Srgrimes 12991549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 13001549Srgrimes error = EIO; 13011549Srgrimes 13025455Sdg pmap_qremove(kva, count); 13031549Srgrimes 13041549Srgrimes /* 13051549Srgrimes * free the buffer header back to the swap buffer pool 13061549Srgrimes */ 13071549Srgrimes relpbuf(bp); 13081549Srgrimes HOLDRELE(vp); 13091549Srgrimes 13101827Sdg if (!error) { 13111827Sdg for (i = 0; i < count; i++) { 13121549Srgrimes pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); 13135455Sdg m[i]->dirty = 0; 13141549Srgrimes rtvals[i] = VM_PAGER_OK; 13151549Srgrimes } 13161827Sdg } else if (count != 1) { 13171549Srgrimes error = 0; 13181549Srgrimes count = 1; 13191549Srgrimes goto retryoutput; 13201549Srgrimes } 13211549Srgrimes if (error) { 13224207Sdg printf("vnode_pager_output: I/O write error\n"); 13231549Srgrimes } 13245455Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 13251541Srgrimes} 1326