vnode_pager.c revision 45347
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 59507Sdg * Copyright (c) 1993, 1994 John S. Dyson 69507Sdg * Copyright (c) 1995, David Greenman 71541Srgrimes * 81541Srgrimes * This code is derived from software contributed to Berkeley by 91541Srgrimes * the Systems Programming Group of the University of Utah Computer 101541Srgrimes * Science Department. 111541Srgrimes * 121541Srgrimes * Redistribution and use in source and binary forms, with or without 131541Srgrimes * modification, are permitted provided that the following conditions 141541Srgrimes * are met: 151541Srgrimes * 1. Redistributions of source code must retain the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer. 171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 181541Srgrimes * notice, this list of conditions and the following disclaimer in the 191541Srgrimes * documentation and/or other materials provided with the distribution. 201541Srgrimes * 3. All advertising materials mentioning features or use of this software 211541Srgrimes * must display the following acknowledgement: 221541Srgrimes * This product includes software developed by the University of 231541Srgrimes * California, Berkeley and its contributors. 241541Srgrimes * 4. Neither the name of the University nor the names of its contributors 251541Srgrimes * may be used to endorse or promote products derived from this software 261541Srgrimes * without specific prior written permission. 271541Srgrimes * 281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 311541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 381541Srgrimes * SUCH DAMAGE. 391541Srgrimes * 401549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 4145347Sjulian * $Id: vnode_pager.c,v 1.105 1999/03/27 02:39:01 eivind Exp $ 421541Srgrimes */ 431541Srgrimes 441541Srgrimes/* 451541Srgrimes * Page to/from files (vnodes). 461541Srgrimes */ 471541Srgrimes 481549Srgrimes/* 491549Srgrimes * TODO: 509507Sdg * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will 517695Sdg * greatly re-simplify the vnode_pager. 521549Srgrimes */ 531549Srgrimes 541541Srgrimes#include <sys/param.h> 551541Srgrimes#include <sys/systm.h> 561541Srgrimes#include <sys/proc.h> 571541Srgrimes#include <sys/vnode.h> 581541Srgrimes#include <sys/mount.h> 599507Sdg#include <sys/buf.h> 6012662Sdg#include <sys/vmmeter.h> 611541Srgrimes 621541Srgrimes#include <vm/vm.h> 6312662Sdg#include <vm/vm_prot.h> 6412662Sdg#include <vm/vm_object.h> 651541Srgrimes#include <vm/vm_page.h> 669507Sdg#include <vm/vm_pager.h> 6731853Sdyson#include <vm/vm_map.h> 681541Srgrimes#include <vm/vnode_pager.h> 6912662Sdg#include <vm/vm_extern.h> 701541Srgrimes 7112820Sphkstatic vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address, 7211943Sbde int *run)); 7312820Sphkstatic void vnode_pager_iodone __P((struct buf *bp)); 7412820Sphkstatic int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m)); 7512820Sphkstatic int vnode_pager_input_old __P((vm_object_t object, vm_page_t m)); 7612820Sphkstatic void vnode_pager_dealloc __P((vm_object_t)); 7712820Sphkstatic int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); 7843129Sdillonstatic void vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *)); 7912820Sphkstatic boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *)); 8011943Sbde 811541Srgrimesstruct pagerops vnodepagerops = { 829507Sdg NULL, 831541Srgrimes vnode_pager_alloc, 841541Srgrimes vnode_pager_dealloc, 859507Sdg vnode_pager_getpages, 869507Sdg vnode_pager_putpages, 879507Sdg vnode_pager_haspage, 889507Sdg NULL 891541Srgrimes}; 901541Srgrimes 9142957Sdillonint vnode_pbuf_freecnt = -1; /* start out unlimited */ 9210556Sdyson 9342957Sdillon 941541Srgrimes/* 951541Srgrimes * Allocate (or lookup) pager for a vnode. 961541Srgrimes * Handle is a vnode pointer. 971541Srgrimes */ 989507Sdgvm_object_t 9940286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 10028751Sbde vm_ooffset_t offset) 1011541Srgrimes{ 1029456Sdg vm_object_t object; 1031541Srgrimes struct vnode *vp; 1041541Srgrimes 1051541Srgrimes /* 1061541Srgrimes * Pageout to vnode, no can do yet. 1071541Srgrimes */ 1081541Srgrimes if (handle == NULL) 1091827Sdg return (NULL); 1101541Srgrimes 11142957Sdillon /* 11242957Sdillon * XXX hack - This initialization should be put somewhere else. 11342957Sdillon */ 11442957Sdillon if (vnode_pbuf_freecnt < 0) { 11542957Sdillon vnode_pbuf_freecnt = nswbuf / 2 + 1; 11642957Sdillon } 11742957Sdillon 1189411Sdg vp = (struct vnode *) handle; 1199411Sdg 1201541Srgrimes /* 1219411Sdg * Prevent race condition when allocating the object. This 1229411Sdg * can happen with NFS vnodes since the nfsnode isn't locked. 1231541Srgrimes */ 1249411Sdg while (vp->v_flag & VOLOCK) { 1259411Sdg vp->v_flag |= VOWANT; 1269411Sdg tsleep(vp, PVM, "vnpobj", 0); 1279411Sdg } 1289411Sdg vp->v_flag |= VOLOCK; 1299411Sdg 1309411Sdg /* 1319411Sdg * If the object is being terminated, wait for it to 1329411Sdg * go away. 1339411Sdg */ 13413490Sdyson while (((object = vp->v_object) != NULL) && 13513490Sdyson (object->flags & OBJ_DEAD)) { 1369356Sdg tsleep(object, PVM, "vadead", 0); 1379507Sdg } 1385455Sdg 13932071Sdyson if (vp->v_usecount == 0) 14032071Sdyson panic("vnode_pager_alloc: no vnode reference"); 14132071Sdyson 1429507Sdg if (object == NULL) { 1431541Srgrimes /* 1441541Srgrimes * And an object of the appropriate size 1451541Srgrimes */ 14640286Sdg object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); 14732286Sdyson object->flags = 0; 1481827Sdg 14940286Sdg object->un_pager.vnp.vnp_size = size; 1501549Srgrimes 1519507Sdg object->handle = handle; 1529507Sdg vp->v_object = object; 15332286Sdyson vp->v_usecount++; 1541541Srgrimes } else { 15532286Sdyson object->ref_count++; 15632286Sdyson vp->v_usecount++; 1571541Srgrimes } 1589411Sdg 1599411Sdg vp->v_flag &= ~VOLOCK; 1609411Sdg if (vp->v_flag & VOWANT) { 1619411Sdg vp->v_flag &= ~VOWANT; 1629411Sdg wakeup(vp); 1639411Sdg } 1649507Sdg return (object); 1651541Srgrimes} 1661541Srgrimes 16712820Sphkstatic void 1689507Sdgvnode_pager_dealloc(object) 1699507Sdg vm_object_t object; 1701541Srgrimes{ 1719507Sdg register struct vnode *vp = object->handle; 1721541Srgrimes 1739507Sdg if (vp == NULL) 1749507Sdg panic("vnode_pager_dealloc: pager already dealloced"); 1759507Sdg 17633817Sdyson vm_object_pip_wait(object, "vnpdea"); 1771541Srgrimes 1789507Sdg object->handle = NULL; 17933109Sdyson object->type = OBJT_DEAD; 1809507Sdg vp->v_object = NULL; 18133109Sdyson vp->v_flag &= ~(VTEXT | VOBJBUF); 1821549Srgrimes} 1831541Srgrimes 18412820Sphkstatic boolean_t 18512767Sdysonvnode_pager_haspage(object, pindex, before, after) 1869507Sdg vm_object_t object; 18712767Sdyson vm_pindex_t pindex; 1889507Sdg int *before; 1899507Sdg int *after; 1901541Srgrimes{ 1919507Sdg struct vnode *vp = object->handle; 1921541Srgrimes daddr_t bn; 19312423Sphk int err; 19410556Sdyson daddr_t reqblock; 19511701Sdyson int poff; 19611701Sdyson int bsize; 19712914Sdyson int pagesperblock, blocksperpage; 1981541Srgrimes 19932585Sdyson if ((vp == NULL) || (vp->v_flag & VDOOMED)) 20032585Sdyson return FALSE; 20132585Sdyson 2021541Srgrimes /* 2035455Sdg * If filesystem no longer mounted or offset beyond end of file we do 2045455Sdg * not have the page. 2051541Srgrimes */ 20612767Sdyson if ((vp->v_mount == NULL) || 20712767Sdyson (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)) 2084797Sdg return FALSE; 2091541Srgrimes 21011576Sdg bsize = vp->v_mount->mnt_stat.f_iosize; 21110556Sdyson pagesperblock = bsize / PAGE_SIZE; 21212914Sdyson blocksperpage = 0; 21312914Sdyson if (pagesperblock > 0) { 21412914Sdyson reqblock = pindex / pagesperblock; 21512914Sdyson } else { 21612914Sdyson blocksperpage = (PAGE_SIZE / bsize); 21712914Sdyson reqblock = pindex * blocksperpage; 21812914Sdyson } 21910556Sdyson err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, 22010556Sdyson after, before); 2218876Srgrimes if (err) 2229507Sdg return TRUE; 22310702Sdyson if ( bn == -1) 22410576Sdyson return FALSE; 22512914Sdyson if (pagesperblock > 0) { 22612914Sdyson poff = pindex - (reqblock * pagesperblock); 22712914Sdyson if (before) { 22812914Sdyson *before *= pagesperblock; 22912914Sdyson *before += poff; 23010669Sdyson } 23112914Sdyson if (after) { 23212914Sdyson int numafter; 23312914Sdyson *after *= pagesperblock; 23412914Sdyson numafter = pagesperblock - (poff + 1); 23512914Sdyson if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) { 23612914Sdyson numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex))); 23712914Sdyson } 23812914Sdyson *after += numafter; 23912914Sdyson } 24012914Sdyson } else { 24112914Sdyson if (before) { 24212914Sdyson *before /= blocksperpage; 24312914Sdyson } 24412914Sdyson 24512914Sdyson if (after) { 24612914Sdyson *after /= blocksperpage; 24712914Sdyson } 24810556Sdyson } 24910576Sdyson return TRUE; 2501541Srgrimes} 2511541Srgrimes 2521541Srgrimes/* 2531541Srgrimes * Lets the VM system know about a change in size for a file. 2549507Sdg * We adjust our own internal size and flush any cached pages in 2551541Srgrimes * the associated object that are affected by the size change. 2561541Srgrimes * 2571541Srgrimes * Note: this routine may be invoked as a result of a pager put 2581541Srgrimes * operation (possibly at object termination time), so we must be careful. 2591541Srgrimes */ 2601541Srgrimesvoid 2611541Srgrimesvnode_pager_setsize(vp, nsize) 2621541Srgrimes struct vnode *vp; 26312767Sdyson vm_ooffset_t nsize; 2641541Srgrimes{ 26538542Sluoqi vm_pindex_t nobjsize; 2669507Sdg vm_object_t object = vp->v_object; 2671541Srgrimes 2689507Sdg if (object == NULL) 2691541Srgrimes return; 2701827Sdg 2711541Srgrimes /* 2721541Srgrimes * Hasn't changed size 2731541Srgrimes */ 2749507Sdg if (nsize == object->un_pager.vnp.vnp_size) 2753374Sdg return; 2761827Sdg 27738542Sluoqi nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); 27838542Sluoqi 2791541Srgrimes /* 2801827Sdg * File has shrunk. Toss any cached pages beyond the new EOF. 2811541Srgrimes */ 2829507Sdg if (nsize < object->un_pager.vnp.vnp_size) { 28338542Sluoqi vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size); 28438542Sluoqi if (nobjsize < object->size) { 28538542Sluoqi vm_object_page_remove(object, nobjsize, object->size, 28638542Sluoqi FALSE); 2875455Sdg } 2881827Sdg /* 2891827Sdg * this gets rid of garbage at the end of a page that is now 2901827Sdg * only partially backed by the vnode... 2911827Sdg */ 2921827Sdg if (nsize & PAGE_MASK) { 2931827Sdg vm_offset_t kva; 2941827Sdg vm_page_t m; 2951827Sdg 29612767Sdyson m = vm_page_lookup(object, OFF_TO_IDX(nsize)); 2971827Sdg if (m) { 2981827Sdg kva = vm_pager_map_page(m); 2991827Sdg bzero((caddr_t) kva + (nsize & PAGE_MASK), 30012767Sdyson (int) (round_page(nsize) - nsize)); 3011827Sdg vm_pager_unmap_page(kva); 3021827Sdg } 3031827Sdg } 3041541Srgrimes } 30512767Sdyson object->un_pager.vnp.vnp_size = nsize; 30638542Sluoqi object->size = nobjsize; 3071541Srgrimes} 3081541Srgrimes 3091541Srgrimesvoid 3101549Srgrimesvnode_pager_freepage(m) 3111549Srgrimes vm_page_t m; 3121541Srgrimes{ 3131549Srgrimes vm_page_free(m); 3141549Srgrimes} 3151549Srgrimes 3161549Srgrimes/* 3171549Srgrimes * calculate the linear (byte) disk address of specified virtual 3181549Srgrimes * file address 3191549Srgrimes */ 32012820Sphkstatic vm_offset_t 3216151Sdgvnode_pager_addr(vp, address, run) 3221549Srgrimes struct vnode *vp; 32312767Sdyson vm_ooffset_t address; 3246151Sdg int *run; 3251549Srgrimes{ 3265455Sdg int rtaddress; 3275455Sdg int bsize; 32812767Sdyson daddr_t block; 3291549Srgrimes struct vnode *rtvp; 3305455Sdg int err; 33112767Sdyson daddr_t vblock; 33212767Sdyson int voffset; 3331549Srgrimes 3345455Sdg if ((int) address < 0) 3355455Sdg return -1; 3365455Sdg 33711701Sdyson if (vp->v_mount == NULL) 33811701Sdyson return -1; 33911701Sdyson 3401549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 3411549Srgrimes vblock = address / bsize; 3421549Srgrimes voffset = address % bsize; 3431549Srgrimes 34410551Sdyson err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL); 3451549Srgrimes 3466151Sdg if (err || (block == -1)) 3471549Srgrimes rtaddress = -1; 3486151Sdg else { 3496626Sdg rtaddress = block + voffset / DEV_BSIZE; 3506151Sdg if( run) { 3516151Sdg *run += 1; 3526151Sdg *run *= bsize/PAGE_SIZE; 3536151Sdg *run -= voffset/PAGE_SIZE; 3546151Sdg } 3556151Sdg } 3561549Srgrimes 3571549Srgrimes return rtaddress; 3581549Srgrimes} 3591549Srgrimes 3601549Srgrimes/* 3611549Srgrimes * interrupt routine for I/O completion 3621549Srgrimes */ 36312820Sphkstatic void 3641549Srgrimesvnode_pager_iodone(bp) 3651549Srgrimes struct buf *bp; 3661549Srgrimes{ 3671549Srgrimes bp->b_flags |= B_DONE; 3689507Sdg wakeup(bp); 3691549Srgrimes} 3701549Srgrimes 3711549Srgrimes/* 3721549Srgrimes * small block file system vnode pager input 3731549Srgrimes */ 37412820Sphkstatic int 3759507Sdgvnode_pager_input_smlfs(object, m) 3769507Sdg vm_object_t object; 3771549Srgrimes vm_page_t m; 3781549Srgrimes{ 3795455Sdg int i; 3805455Sdg int s; 3811549Srgrimes struct vnode *dp, *vp; 3821549Srgrimes struct buf *bp; 3831549Srgrimes vm_offset_t kva; 3845455Sdg int fileaddr; 3851549Srgrimes vm_offset_t bsize; 3865455Sdg int error = 0; 3871549Srgrimes 3889507Sdg vp = object->handle; 38911701Sdyson if (vp->v_mount == NULL) 39011701Sdyson return VM_PAGER_BAD; 39111701Sdyson 3921549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 3931549Srgrimes 3947178Sdg 39510551Sdyson VOP_BMAP(vp, 0, &dp, 0, NULL, NULL); 3961549Srgrimes 3971549Srgrimes kva = vm_pager_map_page(m); 3981549Srgrimes 3991827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 4001827Sdg 40112767Sdyson if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid)) 4025455Sdg continue; 4031549Srgrimes 40412767Sdyson fileaddr = vnode_pager_addr(vp, 40512767Sdyson IDX_TO_OFF(m->pindex) + i * bsize, (int *)0); 4061827Sdg if (fileaddr != -1) { 40742957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 4081549Srgrimes 4091827Sdg /* build a minimal buffer header */ 4101549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 4111549Srgrimes bp->b_iodone = vnode_pager_iodone; 4121549Srgrimes bp->b_proc = curproc; 4131549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 4141827Sdg if (bp->b_rcred != NOCRED) 4151549Srgrimes crhold(bp->b_rcred); 4161827Sdg if (bp->b_wcred != NOCRED) 4171549Srgrimes crhold(bp->b_wcred); 41831493Sphk bp->b_data = (caddr_t) kva + i * bsize; 4196626Sdg bp->b_blkno = fileaddr; 4205455Sdg pbgetvp(dp, bp); 4211549Srgrimes bp->b_bcount = bsize; 4221549Srgrimes bp->b_bufsize = bsize; 4231827Sdg 4241827Sdg /* do the input */ 42537384Sjulian VOP_STRATEGY(bp->b_vp, bp); 4261549Srgrimes 42733758Sdyson /* we definitely need to be at splvm here */ 4281549Srgrimes 42933758Sdyson s = splvm(); 4301549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 4319356Sdg tsleep(bp, PVM, "vnsrd", 0); 4321549Srgrimes } 4331549Srgrimes splx(s); 4341549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 4351549Srgrimes error = EIO; 4361549Srgrimes 4371827Sdg /* 4381827Sdg * free the buffer header back to the swap buffer pool 4391827Sdg */ 44042957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 4411827Sdg if (error) 4421549Srgrimes break; 4435455Sdg 44415583Sphk vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); 4451549Srgrimes } else { 44615583Sphk vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); 4471549Srgrimes bzero((caddr_t) kva + i * bsize, bsize); 4481549Srgrimes } 4491549Srgrimes } 4501549Srgrimes vm_pager_unmap_page(kva); 45117334Sdyson pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 45238799Sdfr vm_page_flag_clear(m, PG_ZERO); 4531827Sdg if (error) { 4544207Sdg return VM_PAGER_ERROR; 4551549Srgrimes } 4561549Srgrimes return VM_PAGER_OK; 4571549Srgrimes 4581549Srgrimes} 4591549Srgrimes 4601549Srgrimes 4611549Srgrimes/* 4621549Srgrimes * old style vnode pager output routine 4631549Srgrimes */ 46412820Sphkstatic int 4659507Sdgvnode_pager_input_old(object, m) 4669507Sdg vm_object_t object; 4671549Srgrimes vm_page_t m; 4681549Srgrimes{ 4691541Srgrimes struct uio auio; 4701541Srgrimes struct iovec aiov; 4715455Sdg int error; 4725455Sdg int size; 4731549Srgrimes vm_offset_t kva; 4741549Srgrimes 4751549Srgrimes error = 0; 4761827Sdg 4771549Srgrimes /* 4781549Srgrimes * Return failure if beyond current EOF 4791549Srgrimes */ 48012767Sdyson if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { 4811549Srgrimes return VM_PAGER_BAD; 4821549Srgrimes } else { 4831549Srgrimes size = PAGE_SIZE; 48412767Sdyson if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) 48512767Sdyson size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); 4867178Sdg 4875455Sdg /* 4885455Sdg * Allocate a kernel virtual address and initialize so that 4895455Sdg * we can use VOP_READ/WRITE routines. 4905455Sdg */ 4911549Srgrimes kva = vm_pager_map_page(m); 4927178Sdg 4931827Sdg aiov.iov_base = (caddr_t) kva; 4941549Srgrimes aiov.iov_len = size; 4951549Srgrimes auio.uio_iov = &aiov; 4961549Srgrimes auio.uio_iovcnt = 1; 49712767Sdyson auio.uio_offset = IDX_TO_OFF(m->pindex); 4981549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 4991549Srgrimes auio.uio_rw = UIO_READ; 5001549Srgrimes auio.uio_resid = size; 50141503Srvb auio.uio_procp = curproc; 5021549Srgrimes 5039507Sdg error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred); 5041549Srgrimes if (!error) { 5051549Srgrimes register int count = size - auio.uio_resid; 5061549Srgrimes 5071549Srgrimes if (count == 0) 5081549Srgrimes error = EINVAL; 5091549Srgrimes else if (count != PAGE_SIZE) 5101827Sdg bzero((caddr_t) kva + count, PAGE_SIZE - count); 5111549Srgrimes } 5121549Srgrimes vm_pager_unmap_page(kva); 5131549Srgrimes } 51417334Sdyson pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 5155455Sdg m->dirty = 0; 51638799Sdfr vm_page_flag_clear(m, PG_ZERO); 51739739Srvb if (!error) 51839739Srvb m->valid = VM_PAGE_BITS_ALL; 5194207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 5201549Srgrimes} 5211549Srgrimes 5221549Srgrimes/* 5231549Srgrimes * generic vnode pager input routine 5241549Srgrimes */ 52510556Sdyson 52633847Smsmith/* 52733847Smsmith * EOPNOTSUPP is no longer legal. For local media VFS's that do not 52833847Smsmith * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to 52933847Smsmith * vnode_pager_generic_getpages() to implement the previous behaviour. 53033847Smsmith * 53133847Smsmith * All other FS's should use the bypass to get to the local media 53233847Smsmith * backing vp's VOP_GETPAGES. 53333847Smsmith */ 53412820Sphkstatic int 5359507Sdgvnode_pager_getpages(object, m, count, reqpage) 5369507Sdg vm_object_t object; 5371549Srgrimes vm_page_t *m; 5389507Sdg int count; 5399507Sdg int reqpage; 5401549Srgrimes{ 54110556Sdyson int rtval; 54210556Sdyson struct vnode *vp; 54334403Smsmith int bytes = count * PAGE_SIZE; 54432286Sdyson 54510556Sdyson vp = object->handle; 54633847Smsmith /* 54733847Smsmith * XXX temporary diagnostic message to help track stale FS code, 54833847Smsmith * Returning EOPNOTSUPP from here may make things unhappy. 54933847Smsmith */ 55034403Smsmith rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0); 55134403Smsmith if (rtval == EOPNOTSUPP) { 55234403Smsmith printf("vnode_pager: *** WARNING *** stale FS getpages\n"); 55334403Smsmith rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage); 55434403Smsmith } 55533847Smsmith return rtval; 55610556Sdyson} 55710556Sdyson 55833847Smsmith 55933847Smsmith/* 56033847Smsmith * This is now called from local media FS's to operate against their 56133847Smsmith * own vnodes if they fail to implement VOP_GETPAGES. 56233847Smsmith */ 56333847Smsmithint 56433847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage) 56533847Smsmith struct vnode *vp; 56610556Sdyson vm_page_t *m; 56733847Smsmith int bytecount; 56810556Sdyson int reqpage; 56910556Sdyson{ 57033847Smsmith vm_object_t object; 57112767Sdyson vm_offset_t kva; 57234206Sdyson off_t foff, tfoff, nextoff; 5739507Sdg int i, size, bsize, first, firstaddr; 57433847Smsmith struct vnode *dp; 5756151Sdg int runpg; 5766151Sdg int runend; 5777178Sdg struct buf *bp; 5785455Sdg int s; 57933847Smsmith int count; 5805455Sdg int error = 0; 5811549Srgrimes 58233847Smsmith object = vp->v_object; 58333847Smsmith count = bytecount / PAGE_SIZE; 58433847Smsmith 58511701Sdyson if (vp->v_mount == NULL) 58611701Sdyson return VM_PAGER_BAD; 58711701Sdyson 5881549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5891549Srgrimes 5901549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 5911827Sdg 5921549Srgrimes /* 5931827Sdg * originally, we did not check for an error return value -- assuming 5941827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 5951549Srgrimes */ 59612767Sdyson foff = IDX_TO_OFF(m[reqpage]->pindex); 5971827Sdg 5981549Srgrimes /* 5991887Sdg * if we can't bmap, use old VOP code 6001549Srgrimes */ 60110551Sdyson if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) { 6021549Srgrimes for (i = 0; i < count; i++) { 6031549Srgrimes if (i != reqpage) { 6041549Srgrimes vnode_pager_freepage(m[i]); 6051549Srgrimes } 6061549Srgrimes } 6073612Sdg cnt.v_vnodein++; 6083612Sdg cnt.v_vnodepgsin++; 6099507Sdg return vnode_pager_input_old(object, m[reqpage]); 6101549Srgrimes 6111827Sdg /* 6121827Sdg * if the blocksize is smaller than a page size, then use 6131827Sdg * special small filesystem code. NFS sometimes has a small 6141827Sdg * blocksize, but it can handle large reads itself. 6151827Sdg */ 6161827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 61738866Sbde (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { 6181549Srgrimes for (i = 0; i < count; i++) { 6191549Srgrimes if (i != reqpage) { 6201549Srgrimes vnode_pager_freepage(m[i]); 6211549Srgrimes } 6221549Srgrimes } 6233612Sdg cnt.v_vnodein++; 6243612Sdg cnt.v_vnodepgsin++; 6259507Sdg return vnode_pager_input_smlfs(object, m[reqpage]); 6261549Srgrimes } 62745347Sjulian 6281549Srgrimes /* 62945347Sjulian * If we have a completely valid page available to us, we can 63045347Sjulian * clean up and return. Otherwise we have to re-read the 63145347Sjulian * media. 6321549Srgrimes */ 63325930Sdfr 63445347Sjulian if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { 6355455Sdg for (i = 0; i < count; i++) { 6365455Sdg if (i != reqpage) 6375455Sdg vnode_pager_freepage(m[i]); 6381549Srgrimes } 6395455Sdg return VM_PAGER_OK; 6401549Srgrimes } 64145347Sjulian m[reqpage]->valid = 0; 6427178Sdg 6435455Sdg /* 6445455Sdg * here on direct device I/O 6455455Sdg */ 6461549Srgrimes 6476151Sdg firstaddr = -1; 6481549Srgrimes /* 6496151Sdg * calculate the run that includes the required page 6501549Srgrimes */ 6516151Sdg for(first = 0, i = 0; i < count; i = runend) { 65212767Sdyson firstaddr = vnode_pager_addr(vp, 65312767Sdyson IDX_TO_OFF(m[i]->pindex), &runpg); 6546151Sdg if (firstaddr == -1) { 6559507Sdg if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { 65637562Sbde /* XXX no %qd in kernel. */ 65737562Sbde panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: 0x%lx%08lx, vnp_size: 0x%lx%08lx", 65837562Sbde firstaddr, (u_long)(foff >> 32), 65937562Sbde (u_long)(u_int32_t)foff, 66037562Sbde (u_long)(u_int32_t) 66137562Sbde (object->un_pager.vnp.vnp_size >> 32), 66237562Sbde (u_long)(u_int32_t) 66337562Sbde object->un_pager.vnp.vnp_size); 6646151Sdg } 6651549Srgrimes vnode_pager_freepage(m[i]); 6666151Sdg runend = i + 1; 6676151Sdg first = runend; 6686151Sdg continue; 6691549Srgrimes } 6706151Sdg runend = i + runpg; 6719507Sdg if (runend <= reqpage) { 6726151Sdg int j; 6739507Sdg for (j = i; j < runend; j++) { 6746151Sdg vnode_pager_freepage(m[j]); 6756151Sdg } 6761549Srgrimes } else { 6779507Sdg if (runpg < (count - first)) { 6789507Sdg for (i = first + runpg; i < count; i++) 6796151Sdg vnode_pager_freepage(m[i]); 6806151Sdg count = first + runpg; 6816151Sdg } 6826151Sdg break; 6831549Srgrimes } 6846151Sdg first = runend; 6851549Srgrimes } 6861549Srgrimes 6871549Srgrimes /* 6881827Sdg * the first and last page have been calculated now, move input pages 6891827Sdg * to be zero based... 6901549Srgrimes */ 6911549Srgrimes if (first != 0) { 6921549Srgrimes for (i = first; i < count; i++) { 6931549Srgrimes m[i - first] = m[i]; 6941549Srgrimes } 6951549Srgrimes count -= first; 6961549Srgrimes reqpage -= first; 6971549Srgrimes } 6986151Sdg 6991549Srgrimes /* 7001549Srgrimes * calculate the file virtual address for the transfer 7011549Srgrimes */ 70212767Sdyson foff = IDX_TO_OFF(m[0]->pindex); 7031827Sdg 7041549Srgrimes /* 7051549Srgrimes * calculate the size of the transfer 7061549Srgrimes */ 7071549Srgrimes size = count * PAGE_SIZE; 7089507Sdg if ((foff + size) > object->un_pager.vnp.vnp_size) 7099507Sdg size = object->un_pager.vnp.vnp_size - foff; 7101549Srgrimes 7111549Srgrimes /* 7121549Srgrimes * round up physical size for real devices 7131549Srgrimes */ 7141827Sdg if (dp->v_type == VBLK || dp->v_type == VCHR) 7151549Srgrimes size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 7161549Srgrimes 71742957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 7185455Sdg kva = (vm_offset_t) bp->b_data; 7191887Sdg 7201549Srgrimes /* 7211549Srgrimes * and map the pages to be read into the kva 7221549Srgrimes */ 7231887Sdg pmap_qenter(kva, m, count); 7241549Srgrimes 7251549Srgrimes /* build a minimal buffer header */ 7261549Srgrimes bp->b_flags = B_BUSY | B_READ | B_CALL; 7271549Srgrimes bp->b_iodone = vnode_pager_iodone; 7281549Srgrimes /* B_PHYS is not set, but it is nice to fill this in */ 7291549Srgrimes bp->b_proc = curproc; 7301549Srgrimes bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; 7311827Sdg if (bp->b_rcred != NOCRED) 7321549Srgrimes crhold(bp->b_rcred); 7331827Sdg if (bp->b_wcred != NOCRED) 7341549Srgrimes crhold(bp->b_wcred); 7356626Sdg bp->b_blkno = firstaddr; 7365455Sdg pbgetvp(dp, bp); 7371549Srgrimes bp->b_bcount = size; 7381549Srgrimes bp->b_bufsize = size; 7391549Srgrimes 7403612Sdg cnt.v_vnodein++; 7413612Sdg cnt.v_vnodepgsin += count; 7423612Sdg 7431549Srgrimes /* do the input */ 74437384Sjulian VOP_STRATEGY(bp->b_vp, bp); 7453612Sdg 74633758Sdyson s = splvm(); 74733758Sdyson /* we definitely need to be at splvm here */ 7481549Srgrimes 7491549Srgrimes while ((bp->b_flags & B_DONE) == 0) { 7509356Sdg tsleep(bp, PVM, "vnread", 0); 7511549Srgrimes } 7521549Srgrimes splx(s); 7531549Srgrimes if ((bp->b_flags & B_ERROR) != 0) 7541549Srgrimes error = EIO; 7551549Srgrimes 7561549Srgrimes if (!error) { 7571549Srgrimes if (size != count * PAGE_SIZE) 7581827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 7591549Srgrimes } 7605455Sdg pmap_qremove(kva, count); 7611549Srgrimes 7621549Srgrimes /* 7631549Srgrimes * free the buffer header back to the swap buffer pool 7641549Srgrimes */ 76542957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 7661549Srgrimes 76734206Sdyson for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { 76834206Sdyson vm_page_t mt; 76934206Sdyson 77034206Sdyson nextoff = tfoff + PAGE_SIZE; 77134206Sdyson mt = m[i]; 77234206Sdyson 77334206Sdyson if (nextoff <= size) { 77445347Sjulian /* 77545347Sjulian * Read filled up entire page. 77645347Sjulian */ 77734206Sdyson mt->valid = VM_PAGE_BITS_ALL; 77834206Sdyson mt->dirty = 0; 77934206Sdyson pmap_clear_modify(VM_PAGE_TO_PHYS(mt)); 78034206Sdyson } else { 78145347Sjulian /* 78245347Sjulian * Read did not fill up entire page. Since this 78345347Sjulian * is getpages, the page may be mapped, so we have 78445347Sjulian * to zero the invalid portions of the page even 78545347Sjulian * though we aren't setting them valid. 78645347Sjulian * 78745347Sjulian * Currently we do not set the entire page valid, 78845347Sjulian * we just try to clear the piece that we couldn't 78945347Sjulian * read. 79045347Sjulian */ 79145347Sjulian vm_page_set_validclean(mt, 0, size - tfoff); 79245347Sjulian vm_page_zero_invalid(mt, FALSE); 79334206Sdyson } 79434206Sdyson 79538799Sdfr vm_page_flag_clear(mt, PG_ZERO); 7961549Srgrimes if (i != reqpage) { 7971827Sdg 7981549Srgrimes /* 7991827Sdg * whether or not to leave the page activated is up in 8001827Sdg * the air, but we should put the page on a page queue 8011827Sdg * somewhere. (it already is in the object). Result: 8021827Sdg * It appears that emperical results show that 8031827Sdg * deactivating pages is best. 8041549Srgrimes */ 8051827Sdg 8061549Srgrimes /* 8071827Sdg * just in case someone was asking for this page we 8081827Sdg * now tell them that it is ok to use 8091549Srgrimes */ 8101549Srgrimes if (!error) { 81134206Sdyson if (mt->flags & PG_WANTED) 81234206Sdyson vm_page_activate(mt); 81333109Sdyson else 81434206Sdyson vm_page_deactivate(mt); 81538799Sdfr vm_page_wakeup(mt); 8161549Srgrimes } else { 81734206Sdyson vnode_pager_freepage(mt); 8181549Srgrimes } 8191549Srgrimes } 8201549Srgrimes } 8211549Srgrimes if (error) { 8229507Sdg printf("vnode_pager_getpages: I/O read error\n"); 8231549Srgrimes } 8244207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 8251549Srgrimes} 8261549Srgrimes 82733847Smsmith/* 82833847Smsmith * EOPNOTSUPP is no longer legal. For local media VFS's that do not 82933847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to 83033847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour. 83133847Smsmith * 83233847Smsmith * All other FS's should use the bypass to get to the local media 83333847Smsmith * backing vp's VOP_PUTPAGES. 83433847Smsmith */ 83543129Sdillonstatic void 83610556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals) 83710556Sdyson vm_object_t object; 83810556Sdyson vm_page_t *m; 83910556Sdyson int count; 84010556Sdyson boolean_t sync; 84110556Sdyson int *rtvals; 84210556Sdyson{ 84310556Sdyson int rtval; 84410556Sdyson struct vnode *vp; 84534403Smsmith int bytes = count * PAGE_SIZE; 84618973Sdyson 84744321Salc /* 84844321Salc * Force synchronous operation if we are extremely low on memory 84944321Salc * to prevent a low-memory deadlock. VOP operations often need to 85044321Salc * allocate more memory to initiate the I/O ( i.e. do a BMAP 85144321Salc * operation ). The swapper handles the case by limiting the amount 85244321Salc * of asynchronous I/O, but that sort of solution doesn't scale well 85344321Salc * for the vnode pager without a lot of work. 85444321Salc * 85544321Salc * Also, the backing vnode's iodone routine may not wake the pageout 85644321Salc * daemon up. This should be probably be addressed XXX. 85744321Salc */ 85844321Salc 85944321Salc if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min) 86044321Salc sync |= OBJPC_SYNC; 86144321Salc 86244321Salc /* 86344321Salc * Call device-specific putpages function 86444321Salc */ 86544321Salc 86610556Sdyson vp = object->handle; 86734403Smsmith rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0); 86834403Smsmith if (rtval == EOPNOTSUPP) { 86934403Smsmith printf("vnode_pager: *** WARNING *** stale FS putpages\n"); 87034403Smsmith rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals); 87134403Smsmith } 87210556Sdyson} 87310556Sdyson 87433847Smsmith 8751549Srgrimes/* 87633847Smsmith * This is now called from local media FS's to operate against their 87745057Seivind * own vnodes if they fail to implement VOP_PUTPAGES. 8781549Srgrimes */ 87933847Smsmithint 88034206Sdysonvnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) 88133847Smsmith struct vnode *vp; 8821549Srgrimes vm_page_t *m; 88333847Smsmith int bytecount; 88434206Sdyson int flags; 8855455Sdg int *rtvals; 8861549Srgrimes{ 8877695Sdg int i; 88833847Smsmith vm_object_t object; 88933847Smsmith int count; 8901549Srgrimes 8917695Sdg int maxsize, ncount; 89212767Sdyson vm_ooffset_t poffset; 8937695Sdg struct uio auio; 8947695Sdg struct iovec aiov; 8957695Sdg int error; 89634206Sdyson int ioflags; 8971549Srgrimes 89833847Smsmith object = vp->v_object; 89933847Smsmith count = bytecount / PAGE_SIZE; 90033847Smsmith 9011827Sdg for (i = 0; i < count; i++) 9021549Srgrimes rtvals[i] = VM_PAGER_AGAIN; 9031549Srgrimes 90412767Sdyson if ((int) m[0]->pindex < 0) { 90534206Sdyson printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", 90634206Sdyson m[0]->pindex, m[0]->dirty); 9077695Sdg rtvals[0] = VM_PAGER_BAD; 9087695Sdg return VM_PAGER_BAD; 9095455Sdg } 9107178Sdg 9117695Sdg maxsize = count * PAGE_SIZE; 9127695Sdg ncount = count; 9131549Srgrimes 91412767Sdyson poffset = IDX_TO_OFF(m[0]->pindex); 91512767Sdyson if (maxsize + poffset > object->un_pager.vnp.vnp_size) { 91612767Sdyson if (object->un_pager.vnp.vnp_size > poffset) 91712767Sdyson maxsize = object->un_pager.vnp.vnp_size - poffset; 9188585Sdg else 9198585Sdg maxsize = 0; 92015583Sphk ncount = btoc(maxsize); 9218585Sdg if (ncount < count) { 9228585Sdg for (i = ncount; i < count; i++) { 9237695Sdg rtvals[i] = VM_PAGER_BAD; 9241549Srgrimes } 9251549Srgrimes } 9261541Srgrimes } 9277695Sdg 92834206Sdyson ioflags = IO_VMIO; 92934206Sdyson ioflags |= (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) ? IO_SYNC: 0; 93034206Sdyson ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0; 9311827Sdg 9327695Sdg aiov.iov_base = (caddr_t) 0; 9337695Sdg aiov.iov_len = maxsize; 9347695Sdg auio.uio_iov = &aiov; 9357695Sdg auio.uio_iovcnt = 1; 93612767Sdyson auio.uio_offset = poffset; 9377695Sdg auio.uio_segflg = UIO_NOCOPY; 9387695Sdg auio.uio_rw = UIO_WRITE; 9397695Sdg auio.uio_resid = maxsize; 9407695Sdg auio.uio_procp = (struct proc *) 0; 94134206Sdyson error = VOP_WRITE(vp, &auio, ioflags, curproc->p_ucred); 9423612Sdg cnt.v_vnodeout++; 9437695Sdg cnt.v_vnodepgsout += ncount; 9443612Sdg 9458585Sdg if (error) { 9469507Sdg printf("vnode_pager_putpages: I/O error %d\n", error); 9477695Sdg } 9488585Sdg if (auio.uio_resid) { 94937555Sbde printf("vnode_pager_putpages: residual I/O %d at %lu\n", 95037555Sbde auio.uio_resid, (u_long)m[0]->pindex); 9517695Sdg } 95233936Sdyson for (i = 0; i < ncount; i++) { 95333936Sdyson rtvals[i] = VM_PAGER_OK; 9547695Sdg } 9557695Sdg return rtvals[0]; 9567695Sdg} 9571549Srgrimes 9587695Sdgstruct vnode * 9599507Sdgvnode_pager_lock(object) 9609507Sdg vm_object_t object; 9619507Sdg{ 96222521Sdyson struct proc *p = curproc; /* XXX */ 96322521Sdyson 9649507Sdg for (; object != NULL; object = object->backing_object) { 9659507Sdg if (object->type != OBJT_VNODE) 9667695Sdg continue; 96732585Sdyson if (object->flags & OBJ_DEAD) 96832585Sdyson return NULL; 9691549Srgrimes 97032585Sdyson while (vget(object->handle, 97132585Sdyson LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p)) { 97234611Sdyson if ((object->flags & OBJ_DEAD) || (object->type != OBJT_VNODE)) 97334611Sdyson return NULL; 97432585Sdyson printf("vnode_pager_lock: retrying\n"); 97532585Sdyson } 9769507Sdg return object->handle; 9771549Srgrimes } 9789507Sdg return NULL; 9797695Sdg} 980