1139825Simp/*- 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 59507Sdg * Copyright (c) 1993, 1994 John S. Dyson 69507Sdg * Copyright (c) 1995, David Greenman 71541Srgrimes * 81541Srgrimes * This code is derived from software contributed to Berkeley by 91541Srgrimes * the Systems Programming Group of the University of Utah Computer 101541Srgrimes * Science Department. 111541Srgrimes * 121541Srgrimes * Redistribution and use in source and binary forms, with or without 131541Srgrimes * modification, are permitted provided that the following conditions 141541Srgrimes * are met: 151541Srgrimes * 1. Redistributions of source code must retain the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer. 171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 181541Srgrimes * notice, this list of conditions and the following disclaimer in the 191541Srgrimes * documentation and/or other materials provided with the distribution. 201541Srgrimes * 3. All advertising materials mentioning features or use of this software 2158705Scharnier * must display the following acknowledgement: 221541Srgrimes * This product includes software developed by the University of 231541Srgrimes * California, Berkeley and its contributors. 241541Srgrimes * 4. Neither the name of the University nor the names of its contributors 251541Srgrimes * may be used to endorse or promote products derived from this software 261541Srgrimes * without specific prior written permission. 271541Srgrimes * 281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 311541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 381541Srgrimes * SUCH DAMAGE. 391541Srgrimes * 401549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * Page to/from files (vnodes). 451541Srgrimes */ 461541Srgrimes 471549Srgrimes/* 481549Srgrimes * TODO: 499507Sdg * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will 507695Sdg * greatly re-simplify the vnode_pager. 511549Srgrimes */ 521549Srgrimes 53116226Sobrien#include <sys/cdefs.h> 54116226Sobrien__FBSDID("$FreeBSD$"); 55116226Sobrien 561541Srgrimes#include <sys/param.h> 571541Srgrimes#include <sys/systm.h> 581541Srgrimes#include <sys/proc.h> 591541Srgrimes#include <sys/vnode.h> 601541Srgrimes#include <sys/mount.h> 6160041Sphk#include <sys/bio.h> 629507Sdg#include <sys/buf.h> 6312662Sdg#include <sys/vmmeter.h> 64140767Sphk#include <sys/limits.h> 6551340Sdillon#include <sys/conf.h> 66248084Sattilio#include <sys/rwlock.h> 67127926Salc#include <sys/sf_buf.h> 681541Srgrimes 69148875Sssouhlal#include <machine/atomic.h> 70148875Sssouhlal 711541Srgrimes#include <vm/vm.h> 72239065Skib#include <vm/vm_param.h> 7312662Sdg#include <vm/vm_object.h> 741541Srgrimes#include <vm/vm_page.h> 759507Sdg#include <vm/vm_pager.h> 7631853Sdyson#include <vm/vm_map.h> 771541Srgrimes#include <vm/vnode_pager.h> 7812662Sdg#include <vm/vm_extern.h> 791541Srgrimes 80163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, 81163359Salc daddr_t *rtaddress, int *run); 8292727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); 8392727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m); 8492727Salfredstatic void vnode_pager_dealloc(vm_object_t); 8592727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int); 8692727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); 8792727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); 88194766Skibstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, 89194766Skib vm_ooffset_t, struct ucred *cred); 9011943Sbde 911541Srgrimesstruct pagerops vnodepagerops = { 92118466Sphk .pgo_alloc = vnode_pager_alloc, 93118466Sphk .pgo_dealloc = vnode_pager_dealloc, 94118466Sphk .pgo_getpages = vnode_pager_getpages, 95118466Sphk .pgo_putpages = vnode_pager_putpages, 96118466Sphk .pgo_haspage = vnode_pager_haspage, 971541Srgrimes}; 981541Srgrimes 9979127Sjhbint vnode_pbuf_freecnt; 10010556Sdyson 101140767Sphk/* Create the VM system backing object for this vnode */ 102140767Sphkint 103155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) 104140767Sphk{ 105140767Sphk vm_object_t object; 106140767Sphk vm_ooffset_t size = isize; 107140767Sphk struct vattr va; 108140767Sphk 109140767Sphk if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) 110140767Sphk return (0); 111140767Sphk 112140767Sphk while ((object = vp->v_object) != NULL) { 113248084Sattilio VM_OBJECT_WLOCK(object); 114140767Sphk if (!(object->flags & OBJ_DEAD)) { 115248084Sattilio VM_OBJECT_WUNLOCK(object); 116140767Sphk return (0); 117140767Sphk } 118175294Sattilio VOP_UNLOCK(vp, 0); 119140767Sphk vm_object_set_flag(object, OBJ_DISCONNECTWNT); 120247346Sattilio VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead", 0); 121175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 122140767Sphk } 123140767Sphk 124140767Sphk if (size == 0) { 125140767Sphk if (vn_isdisk(vp, NULL)) { 126140767Sphk size = IDX_TO_OFF(INT_MAX); 127140767Sphk } else { 128182371Sattilio if (VOP_GETATTR(vp, &va, td->td_ucred)) 129140767Sphk return (0); 130140767Sphk size = va.va_size; 131140767Sphk } 132140767Sphk } 133140767Sphk 134194766Skib object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred); 135140767Sphk /* 136140767Sphk * Dereference the reference we just created. This assumes 137140767Sphk * that the object is associated with the vp. 138140767Sphk */ 139248084Sattilio VM_OBJECT_WLOCK(object); 140140767Sphk object->ref_count--; 141248084Sattilio VM_OBJECT_WUNLOCK(object); 142140767Sphk vrele(vp); 143140767Sphk 144140767Sphk KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object")); 145140767Sphk 146140767Sphk return (0); 147140767Sphk} 148140767Sphk 149140929Sphkvoid 150140929Sphkvnode_destroy_vobject(struct vnode *vp) 151140929Sphk{ 152140929Sphk struct vm_object *obj; 153140929Sphk 154140929Sphk obj = vp->v_object; 155140929Sphk if (obj == NULL) 156140929Sphk return; 157171599Spjd ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); 158248084Sattilio VM_OBJECT_WLOCK(obj); 159140929Sphk if (obj->ref_count == 0) { 160140929Sphk /* 161140929Sphk * don't double-terminate the object 162140929Sphk */ 163140929Sphk if ((obj->flags & OBJ_DEAD) == 0) 164140929Sphk vm_object_terminate(obj); 165140929Sphk else 166248084Sattilio VM_OBJECT_WUNLOCK(obj); 167140929Sphk } else { 168140929Sphk /* 169140929Sphk * Woe to the process that tries to page now :-). 170140929Sphk */ 171140929Sphk vm_pager_deallocate(obj); 172248084Sattilio VM_OBJECT_WUNLOCK(obj); 173140929Sphk } 174144610Sjeff vp->v_object = NULL; 175140929Sphk} 176140929Sphk 177140929Sphk 1781541Srgrimes/* 1791541Srgrimes * Allocate (or lookup) pager for a vnode. 1801541Srgrimes * Handle is a vnode pointer. 18198604Salc * 18298604Salc * MPSAFE 1831541Srgrimes */ 1849507Sdgvm_object_t 18540286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 186194766Skib vm_ooffset_t offset, struct ucred *cred) 1871541Srgrimes{ 1889456Sdg vm_object_t object; 1891541Srgrimes struct vnode *vp; 1901541Srgrimes 1911541Srgrimes /* 1921541Srgrimes * Pageout to vnode, no can do yet. 1931541Srgrimes */ 1941541Srgrimes if (handle == NULL) 1951827Sdg return (NULL); 1961541Srgrimes 1979411Sdg vp = (struct vnode *) handle; 1989411Sdg 1991541Srgrimes /* 2009411Sdg * If the object is being terminated, wait for it to 2019411Sdg * go away. 2029411Sdg */ 203179159Supsretry: 204114074Salc while ((object = vp->v_object) != NULL) { 205248084Sattilio VM_OBJECT_WLOCK(object); 206181020Sjhb if ((object->flags & OBJ_DEAD) == 0) 207114074Salc break; 208137297Salc vm_object_set_flag(object, OBJ_DISCONNECTWNT); 209247346Sattilio VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vadead", 0); 2109507Sdg } 2115455Sdg 212250026Skib KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference")); 21332071Sdyson 2149507Sdg if (object == NULL) { 2151541Srgrimes /* 216179159Sups * Add an object of the appropriate size 2171541Srgrimes */ 21840286Sdg object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); 2191827Sdg 22040286Sdg object->un_pager.vnp.vnp_size = size; 221232071Skib object->un_pager.vnp.writemappings = 0; 2221549Srgrimes 2239507Sdg object->handle = handle; 224179765Sups VI_LOCK(vp); 225179765Sups if (vp->v_object != NULL) { 226179159Sups /* 227179159Sups * Object has been created while we were sleeping 228179159Sups */ 229179765Sups VI_UNLOCK(vp); 230179159Sups vm_object_destroy(object); 231179159Sups goto retry; 232179159Sups } 2339507Sdg vp->v_object = object; 234179765Sups VI_UNLOCK(vp); 235179765Sups } else { 23632286Sdyson object->ref_count++; 237248084Sattilio VM_OBJECT_WUNLOCK(object); 238179765Sups } 239143559Sjeff vref(vp); 2409507Sdg return (object); 2411541Srgrimes} 2421541Srgrimes 243114774Salc/* 244114774Salc * The object must be locked. 245114774Salc */ 24612820Sphkstatic void 2479507Sdgvnode_pager_dealloc(object) 2489507Sdg vm_object_t object; 2491541Srgrimes{ 250202529Skib struct vnode *vp; 251202529Skib int refs; 2521541Srgrimes 253202529Skib vp = object->handle; 2549507Sdg if (vp == NULL) 2559507Sdg panic("vnode_pager_dealloc: pager already dealloced"); 2569507Sdg 257248084Sattilio VM_OBJECT_ASSERT_WLOCKED(object); 25833817Sdyson vm_object_pip_wait(object, "vnpdea"); 259202529Skib refs = object->ref_count; 2601541Srgrimes 2619507Sdg object->handle = NULL; 26233109Sdyson object->type = OBJT_DEAD; 263137297Salc if (object->flags & OBJ_DISCONNECTWNT) { 264137297Salc vm_object_clear_flag(object, OBJ_DISCONNECTWNT); 265137297Salc wakeup(object); 266137297Salc } 267171599Spjd ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc"); 268232071Skib if (object->un_pager.vnp.writemappings > 0) { 269232071Skib object->un_pager.vnp.writemappings = 0; 270242476Skib VOP_ADD_WRITECOUNT(vp, -1); 271232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 272232701Sjhb __func__, vp, vp->v_writecount); 273232071Skib } 2749507Sdg vp->v_object = NULL; 275241025Skib VOP_UNSET_TEXT(vp); 276248084Sattilio VM_OBJECT_WUNLOCK(object); 277202529Skib while (refs-- > 0) 278202529Skib vunref(vp); 279248084Sattilio VM_OBJECT_WLOCK(object); 2801549Srgrimes} 2811541Srgrimes 28212820Sphkstatic boolean_t 28312767Sdysonvnode_pager_haspage(object, pindex, before, after) 2849507Sdg vm_object_t object; 28512767Sdyson vm_pindex_t pindex; 2869507Sdg int *before; 2879507Sdg int *after; 2881541Srgrimes{ 2899507Sdg struct vnode *vp = object->handle; 29096572Sphk daddr_t bn; 29112423Sphk int err; 29210556Sdyson daddr_t reqblock; 29311701Sdyson int poff; 29411701Sdyson int bsize; 29512914Sdyson int pagesperblock, blocksperpage; 2961541Srgrimes 297248084Sattilio VM_OBJECT_ASSERT_WLOCKED(object); 29851340Sdillon /* 29951340Sdillon * If no vp or vp is doomed or marked transparent to VM, we do not 30051340Sdillon * have the page. 30151340Sdillon */ 302155384Sjeff if (vp == NULL || vp->v_iflag & VI_DOOMED) 30332585Sdyson return FALSE; 3041541Srgrimes /* 305155384Sjeff * If the offset is beyond end of file we do 3065455Sdg * not have the page. 3071541Srgrimes */ 308155384Sjeff if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size) 3094797Sdg return FALSE; 3101541Srgrimes 31111576Sdg bsize = vp->v_mount->mnt_stat.f_iosize; 31210556Sdyson pagesperblock = bsize / PAGE_SIZE; 31312914Sdyson blocksperpage = 0; 31412914Sdyson if (pagesperblock > 0) { 31512914Sdyson reqblock = pindex / pagesperblock; 31612914Sdyson } else { 31712914Sdyson blocksperpage = (PAGE_SIZE / bsize); 31812914Sdyson reqblock = pindex * blocksperpage; 31912914Sdyson } 320248084Sattilio VM_OBJECT_WUNLOCK(object); 321119045Sphk err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before); 322248084Sattilio VM_OBJECT_WLOCK(object); 3238876Srgrimes if (err) 3249507Sdg return TRUE; 32592029Seivind if (bn == -1) 32610576Sdyson return FALSE; 32712914Sdyson if (pagesperblock > 0) { 32812914Sdyson poff = pindex - (reqblock * pagesperblock); 32912914Sdyson if (before) { 33012914Sdyson *before *= pagesperblock; 33112914Sdyson *before += poff; 33210669Sdyson } 33312914Sdyson if (after) { 33412914Sdyson int numafter; 33512914Sdyson *after *= pagesperblock; 33612914Sdyson numafter = pagesperblock - (poff + 1); 33799211Srobert if (IDX_TO_OFF(pindex + numafter) > 33899211Srobert object->un_pager.vnp.vnp_size) { 33999211Srobert numafter = 34099211Srobert OFF_TO_IDX(object->un_pager.vnp.vnp_size) - 34199211Srobert pindex; 34212914Sdyson } 34312914Sdyson *after += numafter; 34412914Sdyson } 34512914Sdyson } else { 34612914Sdyson if (before) { 34712914Sdyson *before /= blocksperpage; 34812914Sdyson } 34912914Sdyson 35012914Sdyson if (after) { 35112914Sdyson *after /= blocksperpage; 35212914Sdyson } 35310556Sdyson } 35410576Sdyson return TRUE; 3551541Srgrimes} 3561541Srgrimes 3571541Srgrimes/* 3581541Srgrimes * Lets the VM system know about a change in size for a file. 3599507Sdg * We adjust our own internal size and flush any cached pages in 3601541Srgrimes * the associated object that are affected by the size change. 3611541Srgrimes * 3621541Srgrimes * Note: this routine may be invoked as a result of a pager put 3631541Srgrimes * operation (possibly at object termination time), so we must be careful. 3641541Srgrimes */ 3651541Srgrimesvoid 3661541Srgrimesvnode_pager_setsize(vp, nsize) 3671541Srgrimes struct vnode *vp; 36812767Sdyson vm_ooffset_t nsize; 3691541Srgrimes{ 370116167Salc vm_object_t object; 371116167Salc vm_page_t m; 37238542Sluoqi vm_pindex_t nobjsize; 3731541Srgrimes 374116167Salc if ((object = vp->v_object) == NULL) 3751541Srgrimes return; 376188386Skib/* ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */ 377248084Sattilio VM_OBJECT_WLOCK(object); 378250028Skib if (object->type == OBJT_DEAD) { 379250028Skib VM_OBJECT_WUNLOCK(object); 380250028Skib return; 381250028Skib } 382250028Skib KASSERT(object->type == OBJT_VNODE, 383250028Skib ("not vnode-backed object %p", object)); 384116167Salc if (nsize == object->un_pager.vnp.vnp_size) { 385116167Salc /* 386116167Salc * Hasn't changed size 387116167Salc */ 388248084Sattilio VM_OBJECT_WUNLOCK(object); 3893374Sdg return; 390116167Salc } 39138542Sluoqi nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); 3929507Sdg if (nsize < object->un_pager.vnp.vnp_size) { 393116167Salc /* 394116167Salc * File has shrunk. Toss any cached pages beyond the new EOF. 395116167Salc */ 396116167Salc if (nobjsize < object->size) 39738542Sluoqi vm_object_page_remove(object, nobjsize, object->size, 398223677Salc 0); 3991827Sdg /* 4001827Sdg * this gets rid of garbage at the end of a page that is now 40187834Sdillon * only partially backed by the vnode. 40287834Sdillon * 40387834Sdillon * XXX for some reason (I don't know yet), if we take a 40487834Sdillon * completely invalid page and mark it partially valid 40587834Sdillon * it can screw up NFS reads, so we don't allow the case. 4061827Sdg */ 407116167Salc if ((nsize & PAGE_MASK) && 408121230Salc (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && 409121230Salc m->valid != 0) { 410121230Salc int base = (int)nsize & PAGE_MASK; 411121230Salc int size = PAGE_SIZE - base; 41270374Sdillon 413121230Salc /* 414121230Salc * Clear out partial-page garbage in case 415121230Salc * the page has been mapped. 416121230Salc */ 417121230Salc pmap_zero_page_area(m, base, size); 41870374Sdillon 419121230Salc /* 420193303Salc * Update the valid bits to reflect the blocks that 421193303Salc * have been zeroed. Some of these valid bits may 422193303Salc * have already been set. 423193303Salc */ 424228156Skib vm_page_set_valid_range(m, base, size); 425193303Salc 426193303Salc /* 427193303Salc * Round "base" to the next block boundary so that the 428193303Salc * dirty bit for a partially zeroed block is not 429193303Salc * cleared. 430193303Salc */ 431193303Salc base = roundup2(base, DEV_BSIZE); 432193303Salc 433193303Salc /* 434193303Salc * Clear out partial-page dirty bits. 435121230Salc * 436121230Salc * note that we do not clear out the valid 437121230Salc * bits. This would prevent bogus_page 438121230Salc * replacement from working properly. 439121230Salc */ 440193303Salc vm_page_clear_dirty(m, base, PAGE_SIZE - base); 441172875Salc } else if ((nsize & PAGE_MASK) && 442237172Sattilio vm_page_is_cached(object, OFF_TO_IDX(nsize))) { 443172875Salc vm_page_cache_free(object, OFF_TO_IDX(nsize), 444172875Salc nobjsize); 4451827Sdg } 4461541Srgrimes } 44712767Sdyson object->un_pager.vnp.vnp_size = nsize; 44838542Sluoqi object->size = nobjsize; 449248084Sattilio VM_OBJECT_WUNLOCK(object); 4501541Srgrimes} 4511541Srgrimes 4521549Srgrimes/* 4531549Srgrimes * calculate the linear (byte) disk address of specified virtual 4541549Srgrimes * file address 4551549Srgrimes */ 456163359Salcstatic int 457163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress, 458163359Salc int *run) 4591549Srgrimes{ 4605455Sdg int bsize; 4615455Sdg int err; 46212767Sdyson daddr_t vblock; 463146340Sbz daddr_t voffset; 4641549Srgrimes 465138531Salc if (address < 0) 4665455Sdg return -1; 4675455Sdg 468155384Sjeff if (vp->v_iflag & VI_DOOMED) 46911701Sdyson return -1; 47011701Sdyson 4711549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 4721549Srgrimes vblock = address / bsize; 4731549Srgrimes voffset = address % bsize; 4741549Srgrimes 475163359Salc err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL); 476163359Salc if (err == 0) { 477163359Salc if (*rtaddress != -1) 478163359Salc *rtaddress += voffset / DEV_BSIZE; 47992029Seivind if (run) { 4806151Sdg *run += 1; 4816151Sdg *run *= bsize/PAGE_SIZE; 4826151Sdg *run -= voffset/PAGE_SIZE; 4836151Sdg } 4846151Sdg } 4851549Srgrimes 486163359Salc return (err); 4871549Srgrimes} 4881549Srgrimes 4891549Srgrimes/* 49096755Strhodes * small block filesystem vnode pager input 4911549Srgrimes */ 49212820Sphkstatic int 4939507Sdgvnode_pager_input_smlfs(object, m) 4949507Sdg vm_object_t object; 4951549Srgrimes vm_page_t m; 4961549Srgrimes{ 497137726Sphk struct vnode *vp; 498137726Sphk struct bufobj *bo; 4991549Srgrimes struct buf *bp; 500127926Salc struct sf_buf *sf; 501146340Sbz daddr_t fileaddr; 5021549Srgrimes vm_offset_t bsize; 503227102Skib vm_page_bits_t bits; 504227102Skib int error, i; 5051549Srgrimes 506227102Skib error = 0; 5079507Sdg vp = object->handle; 508155384Sjeff if (vp->v_iflag & VI_DOOMED) 50911701Sdyson return VM_PAGER_BAD; 51011701Sdyson 5111549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5121549Srgrimes 513137726Sphk VOP_BMAP(vp, 0, &bo, 0, NULL, NULL); 5141549Srgrimes 515127926Salc sf = sf_buf_alloc(m, 0); 5161549Srgrimes 5171827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 51886092Sdillon vm_ooffset_t address; 5191827Sdg 520191935Salc bits = vm_page_bits(i * bsize, bsize); 521191935Salc if (m->valid & bits) 5225455Sdg continue; 5231549Srgrimes 52486092Sdillon address = IDX_TO_OFF(m->pindex) + i * bsize; 52586092Sdillon if (address >= object->un_pager.vnp.vnp_size) { 52686092Sdillon fileaddr = -1; 52786092Sdillon } else { 528163359Salc error = vnode_pager_addr(vp, address, &fileaddr, NULL); 529163359Salc if (error) 530163359Salc break; 53186092Sdillon } 5321827Sdg if (fileaddr != -1) { 53342957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 5341549Srgrimes 5351827Sdg /* build a minimal buffer header */ 53658345Sphk bp->b_iocmd = BIO_READ; 537119092Sphk bp->b_iodone = bdone; 53884827Sjhb KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); 53984827Sjhb KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); 54091406Sjhb bp->b_rcred = crhold(curthread->td_ucred); 54191406Sjhb bp->b_wcred = crhold(curthread->td_ucred); 542127926Salc bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize; 5436626Sdg bp->b_blkno = fileaddr; 544137726Sphk pbgetbo(bo, bp); 545233627Smckusick bp->b_vp = vp; 5461549Srgrimes bp->b_bcount = bsize; 5471549Srgrimes bp->b_bufsize = bsize; 54870374Sdillon bp->b_runningbufspace = bp->b_bufsize; 549189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 5501827Sdg 5511827Sdg /* do the input */ 552121205Sphk bp->b_iooffset = dbtob(bp->b_blkno); 553136927Sphk bstrategy(bp); 5541549Srgrimes 555119092Sphk bwait(bp, PVM, "vnsrd"); 556119092Sphk 55758934Sphk if ((bp->b_ioflags & BIO_ERROR) != 0) 5581549Srgrimes error = EIO; 5591549Srgrimes 5601827Sdg /* 5611827Sdg * free the buffer header back to the swap buffer pool 5621827Sdg */ 563233627Smckusick bp->b_vp = NULL; 564137726Sphk pbrelbo(bp); 56542957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 5661827Sdg if (error) 5671549Srgrimes break; 568191935Salc } else 569127926Salc bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); 570191935Salc KASSERT((m->dirty & bits) == 0, 571191935Salc ("vnode_pager_input_smlfs: page %p is dirty", m)); 572248084Sattilio VM_OBJECT_WLOCK(object); 573191935Salc m->valid |= bits; 574248084Sattilio VM_OBJECT_WUNLOCK(object); 5751549Srgrimes } 576127926Salc sf_buf_free(sf); 5771827Sdg if (error) { 5784207Sdg return VM_PAGER_ERROR; 5791549Srgrimes } 5801549Srgrimes return VM_PAGER_OK; 5811549Srgrimes} 5821549Srgrimes 5831549Srgrimes/* 584139296Sphk * old style vnode pager input routine 5851549Srgrimes */ 58612820Sphkstatic int 5879507Sdgvnode_pager_input_old(object, m) 5889507Sdg vm_object_t object; 5891549Srgrimes vm_page_t m; 5901549Srgrimes{ 5911541Srgrimes struct uio auio; 5921541Srgrimes struct iovec aiov; 5935455Sdg int error; 5945455Sdg int size; 595127926Salc struct sf_buf *sf; 59677398Sjhb struct vnode *vp; 5971549Srgrimes 598248084Sattilio VM_OBJECT_ASSERT_WLOCKED(object); 5991549Srgrimes error = 0; 6001827Sdg 6011549Srgrimes /* 6021549Srgrimes * Return failure if beyond current EOF 6031549Srgrimes */ 60412767Sdyson if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { 6051549Srgrimes return VM_PAGER_BAD; 6061549Srgrimes } else { 6071549Srgrimes size = PAGE_SIZE; 60812767Sdyson if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) 60912767Sdyson size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); 610121495Salc vp = object->handle; 611248084Sattilio VM_OBJECT_WUNLOCK(object); 6127178Sdg 6135455Sdg /* 6145455Sdg * Allocate a kernel virtual address and initialize so that 6155455Sdg * we can use VOP_READ/WRITE routines. 6165455Sdg */ 617127926Salc sf = sf_buf_alloc(m, 0); 6187178Sdg 619127926Salc aiov.iov_base = (caddr_t)sf_buf_kva(sf); 6201549Srgrimes aiov.iov_len = size; 6211549Srgrimes auio.uio_iov = &aiov; 6221549Srgrimes auio.uio_iovcnt = 1; 62312767Sdyson auio.uio_offset = IDX_TO_OFF(m->pindex); 6241549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 6251549Srgrimes auio.uio_rw = UIO_READ; 6261549Srgrimes auio.uio_resid = size; 62783366Sjulian auio.uio_td = curthread; 6281549Srgrimes 62991406Sjhb error = VOP_READ(vp, &auio, 0, curthread->td_ucred); 6301549Srgrimes if (!error) { 63179242Sdillon int count = size - auio.uio_resid; 6321549Srgrimes 6331549Srgrimes if (count == 0) 6341549Srgrimes error = EINVAL; 6351549Srgrimes else if (count != PAGE_SIZE) 636127926Salc bzero((caddr_t)sf_buf_kva(sf) + count, 637127926Salc PAGE_SIZE - count); 6381549Srgrimes } 639127926Salc sf_buf_free(sf); 640121230Salc 641248084Sattilio VM_OBJECT_WLOCK(object); 6421549Srgrimes } 643191935Salc KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m)); 64439739Srvb if (!error) 64539739Srvb m->valid = VM_PAGE_BITS_ALL; 6464207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 6471549Srgrimes} 6481549Srgrimes 6491549Srgrimes/* 6501549Srgrimes * generic vnode pager input routine 6511549Srgrimes */ 65210556Sdyson 65333847Smsmith/* 65476827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES 65599211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages() 65699211Srobert * to implement the previous behaviour. 65733847Smsmith * 65833847Smsmith * All other FS's should use the bypass to get to the local media 65933847Smsmith * backing vp's VOP_GETPAGES. 66033847Smsmith */ 66112820Sphkstatic int 6629507Sdgvnode_pager_getpages(object, m, count, reqpage) 6639507Sdg vm_object_t object; 6641549Srgrimes vm_page_t *m; 6659507Sdg int count; 6669507Sdg int reqpage; 6671549Srgrimes{ 66810556Sdyson int rtval; 66910556Sdyson struct vnode *vp; 67034403Smsmith int bytes = count * PAGE_SIZE; 67132286Sdyson 67210556Sdyson vp = object->handle; 673248084Sattilio VM_OBJECT_WUNLOCK(object); 67434403Smsmith rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0); 67576827Salfred KASSERT(rtval != EOPNOTSUPP, 67676827Salfred ("vnode_pager: FS getpages not implemented\n")); 677248084Sattilio VM_OBJECT_WLOCK(object); 67833847Smsmith return rtval; 67910556Sdyson} 68010556Sdyson 68133847Smsmith/* 68233847Smsmith * This is now called from local media FS's to operate against their 68333847Smsmith * own vnodes if they fail to implement VOP_GETPAGES. 68433847Smsmith */ 68533847Smsmithint 68633847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage) 68733847Smsmith struct vnode *vp; 68810556Sdyson vm_page_t *m; 68933847Smsmith int bytecount; 69010556Sdyson int reqpage; 69110556Sdyson{ 69233847Smsmith vm_object_t object; 69312767Sdyson vm_offset_t kva; 69434206Sdyson off_t foff, tfoff, nextoff; 695146340Sbz int i, j, size, bsize, first; 696163140Salc daddr_t firstaddr, reqblock; 697137726Sphk struct bufobj *bo; 6986151Sdg int runpg; 6996151Sdg int runend; 7007178Sdg struct buf *bp; 701248512Skib struct mount *mp; 70233847Smsmith int count; 703163210Salc int error; 7041549Srgrimes 70533847Smsmith object = vp->v_object; 70633847Smsmith count = bytecount / PAGE_SIZE; 70733847Smsmith 708137726Sphk KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, 709137726Sphk ("vnode_pager_generic_getpages does not support devices")); 710155384Sjeff if (vp->v_iflag & VI_DOOMED) 71111701Sdyson return VM_PAGER_BAD; 71211701Sdyson 7131549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 7141549Srgrimes 7151549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 7161827Sdg 7171549Srgrimes /* 7181827Sdg * originally, we did not check for an error return value -- assuming 7191827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 7201549Srgrimes */ 72112767Sdyson foff = IDX_TO_OFF(m[reqpage]->pindex); 7221827Sdg 7231549Srgrimes /* 7241887Sdg * if we can't bmap, use old VOP code 7251549Srgrimes */ 726163210Salc error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); 727163210Salc if (error == EOPNOTSUPP) { 728248084Sattilio VM_OBJECT_WLOCK(object); 729207410Skmacy 730100832Salc for (i = 0; i < count; i++) 731207410Skmacy if (i != reqpage) { 732207410Skmacy vm_page_lock(m[i]); 73375692Salfred vm_page_free(m[i]); 734207410Skmacy vm_page_unlock(m[i]); 735207410Skmacy } 736170292Sattilio PCPU_INC(cnt.v_vnodein); 737170292Sattilio PCPU_INC(cnt.v_vnodepgsin); 738121495Salc error = vnode_pager_input_old(object, m[reqpage]); 739248084Sattilio VM_OBJECT_WUNLOCK(object); 740121495Salc return (error); 741163210Salc } else if (error != 0) { 742248084Sattilio VM_OBJECT_WLOCK(object); 743163210Salc for (i = 0; i < count; i++) 744207410Skmacy if (i != reqpage) { 745207410Skmacy vm_page_lock(m[i]); 746163210Salc vm_page_free(m[i]); 747207410Skmacy vm_page_unlock(m[i]); 748207410Skmacy } 749248084Sattilio VM_OBJECT_WUNLOCK(object); 750163210Salc return (VM_PAGER_ERROR); 7511549Srgrimes 7521827Sdg /* 7531827Sdg * if the blocksize is smaller than a page size, then use 7541827Sdg * special small filesystem code. NFS sometimes has a small 7551827Sdg * blocksize, but it can handle large reads itself. 7561827Sdg */ 7571827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 75838866Sbde (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { 759248084Sattilio VM_OBJECT_WLOCK(object); 760100832Salc for (i = 0; i < count; i++) 761207410Skmacy if (i != reqpage) { 762207410Skmacy vm_page_lock(m[i]); 76375692Salfred vm_page_free(m[i]); 764207410Skmacy vm_page_unlock(m[i]); 765207410Skmacy } 766248084Sattilio VM_OBJECT_WUNLOCK(object); 767170292Sattilio PCPU_INC(cnt.v_vnodein); 768170292Sattilio PCPU_INC(cnt.v_vnodepgsin); 7699507Sdg return vnode_pager_input_smlfs(object, m[reqpage]); 7701549Srgrimes } 77145347Sjulian 7721549Srgrimes /* 77345347Sjulian * If we have a completely valid page available to us, we can 77445347Sjulian * clean up and return. Otherwise we have to re-read the 77545347Sjulian * media. 7761549Srgrimes */ 777248084Sattilio VM_OBJECT_WLOCK(object); 77845347Sjulian if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { 779100832Salc for (i = 0; i < count; i++) 780207410Skmacy if (i != reqpage) { 781207410Skmacy vm_page_lock(m[i]); 78275692Salfred vm_page_free(m[i]); 783207410Skmacy vm_page_unlock(m[i]); 784207410Skmacy } 785248084Sattilio VM_OBJECT_WUNLOCK(object); 7865455Sdg return VM_PAGER_OK; 787163140Salc } else if (reqblock == -1) { 788163140Salc pmap_zero_page(m[reqpage]); 789192010Salc KASSERT(m[reqpage]->dirty == 0, 790192010Salc ("vnode_pager_generic_getpages: page %p is dirty", m)); 791163140Salc m[reqpage]->valid = VM_PAGE_BITS_ALL; 792163140Salc for (i = 0; i < count; i++) 793207410Skmacy if (i != reqpage) { 794207410Skmacy vm_page_lock(m[i]); 795163140Salc vm_page_free(m[i]); 796207410Skmacy vm_page_unlock(m[i]); 797207410Skmacy } 798248084Sattilio VM_OBJECT_WUNLOCK(object); 799163140Salc return (VM_PAGER_OK); 8001549Srgrimes } 80145347Sjulian m[reqpage]->valid = 0; 802248084Sattilio VM_OBJECT_WUNLOCK(object); 8037178Sdg 8045455Sdg /* 8055455Sdg * here on direct device I/O 8065455Sdg */ 80792029Seivind firstaddr = -1; 8081549Srgrimes 8091549Srgrimes /* 8106151Sdg * calculate the run that includes the required page 8111549Srgrimes */ 81292029Seivind for (first = 0, i = 0; i < count; i = runend) { 813163359Salc if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, 814163359Salc &runpg) != 0) { 815248084Sattilio VM_OBJECT_WLOCK(object); 816163359Salc for (; i < count; i++) 817207410Skmacy if (i != reqpage) { 818207410Skmacy vm_page_lock(m[i]); 819163359Salc vm_page_free(m[i]); 820207410Skmacy vm_page_unlock(m[i]); 821207410Skmacy } 822248084Sattilio VM_OBJECT_WUNLOCK(object); 823163359Salc return (VM_PAGER_ERROR); 824163359Salc } 8256151Sdg if (firstaddr == -1) { 826248084Sattilio VM_OBJECT_WLOCK(object); 8279507Sdg if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { 828146340Sbz panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx", 829146340Sbz (intmax_t)firstaddr, (uintmax_t)(foff >> 32), 830106603Smux (uintmax_t)foff, 831106603Smux (uintmax_t) 832106603Smux (object->un_pager.vnp.vnp_size >> 32), 833106603Smux (uintmax_t)object->un_pager.vnp.vnp_size); 8346151Sdg } 835207410Skmacy vm_page_lock(m[i]); 83675692Salfred vm_page_free(m[i]); 837207410Skmacy vm_page_unlock(m[i]); 838248084Sattilio VM_OBJECT_WUNLOCK(object); 8396151Sdg runend = i + 1; 8406151Sdg first = runend; 8416151Sdg continue; 8421549Srgrimes } 8436151Sdg runend = i + runpg; 8449507Sdg if (runend <= reqpage) { 845248084Sattilio VM_OBJECT_WLOCK(object); 846207410Skmacy for (j = i; j < runend; j++) { 847207410Skmacy vm_page_lock(m[j]); 84875692Salfred vm_page_free(m[j]); 849207410Skmacy vm_page_unlock(m[j]); 850207410Skmacy } 851248084Sattilio VM_OBJECT_WUNLOCK(object); 8521549Srgrimes } else { 8539507Sdg if (runpg < (count - first)) { 854248084Sattilio VM_OBJECT_WLOCK(object); 855207410Skmacy for (i = first + runpg; i < count; i++) { 856207410Skmacy vm_page_lock(m[i]); 85775692Salfred vm_page_free(m[i]); 858207410Skmacy vm_page_unlock(m[i]); 859207410Skmacy } 860248084Sattilio VM_OBJECT_WUNLOCK(object); 8616151Sdg count = first + runpg; 8626151Sdg } 8636151Sdg break; 8641549Srgrimes } 8656151Sdg first = runend; 8661549Srgrimes } 8671549Srgrimes 8681549Srgrimes /* 8691827Sdg * the first and last page have been calculated now, move input pages 8701827Sdg * to be zero based... 8711549Srgrimes */ 8721549Srgrimes if (first != 0) { 873163361Salc m += first; 8741549Srgrimes count -= first; 8751549Srgrimes reqpage -= first; 8761549Srgrimes } 8776151Sdg 8781549Srgrimes /* 8791549Srgrimes * calculate the file virtual address for the transfer 8801549Srgrimes */ 88112767Sdyson foff = IDX_TO_OFF(m[0]->pindex); 8821827Sdg 8831549Srgrimes /* 8841549Srgrimes * calculate the size of the transfer 8851549Srgrimes */ 8861549Srgrimes size = count * PAGE_SIZE; 887134892Sphk KASSERT(count > 0, ("zero count")); 8889507Sdg if ((foff + size) > object->un_pager.vnp.vnp_size) 8899507Sdg size = object->un_pager.vnp.vnp_size - foff; 890134892Sphk KASSERT(size > 0, ("zero size")); 8911549Srgrimes 8921549Srgrimes /* 89351340Sdillon * round up physical size for real devices. 8941549Srgrimes */ 895137726Sphk if (1) { 896137726Sphk int secmask = bo->bo_bsize - 1; 897136977Sphk KASSERT(secmask < PAGE_SIZE && secmask > 0, 898136977Sphk ("vnode_pager_generic_getpages: sector size %d too large", 899136977Sphk secmask + 1)); 90051340Sdillon size = (size + secmask) & ~secmask; 90151340Sdillon } 9021549Srgrimes 90342957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 904248283Skib kva = (vm_offset_t)bp->b_data; 9051887Sdg 9061549Srgrimes /* 907248512Skib * and map the pages to be read into the kva, if the filesystem 908248512Skib * requires mapped buffers. 9091549Srgrimes */ 910248512Skib mp = vp->v_mount; 911248512Skib if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 && 912248512Skib unmapped_buf_allowed) { 913248512Skib bp->b_data = unmapped_buf; 914248512Skib bp->b_kvabase = unmapped_buf; 915248512Skib bp->b_offset = 0; 916248512Skib bp->b_flags |= B_UNMAPPED; 917248512Skib bp->b_npages = count; 918248512Skib for (i = 0; i < count; i++) 919248512Skib bp->b_pages[i] = m[i]; 920248512Skib } else 921248512Skib pmap_qenter(kva, m, count); 9221549Srgrimes 9231549Srgrimes /* build a minimal buffer header */ 92458345Sphk bp->b_iocmd = BIO_READ; 925119092Sphk bp->b_iodone = bdone; 92684827Sjhb KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); 92784827Sjhb KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); 92891406Sjhb bp->b_rcred = crhold(curthread->td_ucred); 92991406Sjhb bp->b_wcred = crhold(curthread->td_ucred); 9306626Sdg bp->b_blkno = firstaddr; 931137726Sphk pbgetbo(bo, bp); 932233627Smckusick bp->b_vp = vp; 9331549Srgrimes bp->b_bcount = size; 9341549Srgrimes bp->b_bufsize = size; 93570374Sdillon bp->b_runningbufspace = bp->b_bufsize; 936189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 9371549Srgrimes 938170292Sattilio PCPU_INC(cnt.v_vnodein); 939170292Sattilio PCPU_ADD(cnt.v_vnodepgsin, count); 9403612Sdg 9411549Srgrimes /* do the input */ 942121205Sphk bp->b_iooffset = dbtob(bp->b_blkno); 943136927Sphk bstrategy(bp); 9443612Sdg 945119092Sphk bwait(bp, PVM, "vnread"); 9461549Srgrimes 94758934Sphk if ((bp->b_ioflags & BIO_ERROR) != 0) 9481549Srgrimes error = EIO; 9491549Srgrimes 950248550Skib if (error == 0 && size != count * PAGE_SIZE) { 951248512Skib if ((bp->b_flags & B_UNMAPPED) != 0) { 952248512Skib bp->b_flags &= ~B_UNMAPPED; 953248512Skib pmap_qenter(kva, m, count); 954248512Skib } 955248512Skib bzero((caddr_t)kva + size, PAGE_SIZE * count - size); 9561549Srgrimes } 957248512Skib if ((bp->b_flags & B_UNMAPPED) == 0) 958248512Skib pmap_qremove(kva, count); 959248512Skib if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) { 960248512Skib bp->b_data = (caddr_t)kva; 961248512Skib bp->b_kvabase = (caddr_t)kva; 962248512Skib bp->b_flags &= ~B_UNMAPPED; 963248512Skib for (i = 0; i < count; i++) 964248512Skib bp->b_pages[i] = NULL; 965248512Skib } 9661549Srgrimes 9671549Srgrimes /* 9681549Srgrimes * free the buffer header back to the swap buffer pool 9691549Srgrimes */ 970233627Smckusick bp->b_vp = NULL; 971137726Sphk pbrelbo(bp); 97242957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 9731549Srgrimes 974248084Sattilio VM_OBJECT_WLOCK(object); 97534206Sdyson for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { 97634206Sdyson vm_page_t mt; 97734206Sdyson 97834206Sdyson nextoff = tfoff + PAGE_SIZE; 97934206Sdyson mt = m[i]; 98034206Sdyson 98147239Sdt if (nextoff <= object->un_pager.vnp.vnp_size) { 98245347Sjulian /* 98345347Sjulian * Read filled up entire page. 98445347Sjulian */ 98534206Sdyson mt->valid = VM_PAGE_BITS_ALL; 986191478Salc KASSERT(mt->dirty == 0, 987191478Salc ("vnode_pager_generic_getpages: page %p is dirty", 988191478Salc mt)); 989191478Salc KASSERT(!pmap_page_is_mapped(mt), 990191478Salc ("vnode_pager_generic_getpages: page %p is mapped", 991191478Salc mt)); 99234206Sdyson } else { 99345347Sjulian /* 994192134Salc * Read did not fill up entire page. 99545347Sjulian * 99645347Sjulian * Currently we do not set the entire page valid, 99745347Sjulian * we just try to clear the piece that we couldn't 99845347Sjulian * read. 99945347Sjulian */ 1000228156Skib vm_page_set_valid_range(mt, 0, 100147239Sdt object->un_pager.vnp.vnp_size - tfoff); 1002192134Salc KASSERT((mt->dirty & vm_page_bits(0, 1003192134Salc object->un_pager.vnp.vnp_size - tfoff)) == 0, 1004192134Salc ("vnode_pager_generic_getpages: page %p is dirty", 1005192134Salc mt)); 100634206Sdyson } 100734206Sdyson 1008239040Skib if (i != reqpage) 1009239246Skib vm_page_readahead_finish(mt); 10101549Srgrimes } 1011248084Sattilio VM_OBJECT_WUNLOCK(object); 10121549Srgrimes if (error) { 10139507Sdg printf("vnode_pager_getpages: I/O read error\n"); 10141549Srgrimes } 10154207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 10161549Srgrimes} 10171549Srgrimes 101833847Smsmith/* 101933847Smsmith * EOPNOTSUPP is no longer legal. For local media VFS's that do not 102033847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to 102133847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour. 102233847Smsmith * 102333847Smsmith * All other FS's should use the bypass to get to the local media 102433847Smsmith * backing vp's VOP_PUTPAGES. 102533847Smsmith */ 102643129Sdillonstatic void 102710556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals) 102810556Sdyson vm_object_t object; 102910556Sdyson vm_page_t *m; 103010556Sdyson int count; 103110556Sdyson boolean_t sync; 103210556Sdyson int *rtvals; 103310556Sdyson{ 103410556Sdyson int rtval; 103510556Sdyson struct vnode *vp; 103634403Smsmith int bytes = count * PAGE_SIZE; 103718973Sdyson 103844321Salc /* 103944321Salc * Force synchronous operation if we are extremely low on memory 104044321Salc * to prevent a low-memory deadlock. VOP operations often need to 104144321Salc * allocate more memory to initiate the I/O ( i.e. do a BMAP 104244321Salc * operation ). The swapper handles the case by limiting the amount 104344321Salc * of asynchronous I/O, but that sort of solution doesn't scale well 104444321Salc * for the vnode pager without a lot of work. 104544321Salc * 104644321Salc * Also, the backing vnode's iodone routine may not wake the pageout 104744321Salc * daemon up. This should be probably be addressed XXX. 104844321Salc */ 104944321Salc 1050170170Sattilio if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min) 105144321Salc sync |= OBJPC_SYNC; 105244321Salc 105344321Salc /* 105444321Salc * Call device-specific putpages function 105544321Salc */ 105610556Sdyson vp = object->handle; 1057248084Sattilio VM_OBJECT_WUNLOCK(object); 105834403Smsmith rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0); 105976827Salfred KASSERT(rtval != EOPNOTSUPP, 106076827Salfred ("vnode_pager: stale FS putpages\n")); 1061248084Sattilio VM_OBJECT_WLOCK(object); 106210556Sdyson} 106310556Sdyson 106433847Smsmith 10651549Srgrimes/* 106633847Smsmith * This is now called from local media FS's to operate against their 106745057Seivind * own vnodes if they fail to implement VOP_PUTPAGES. 106870374Sdillon * 106970374Sdillon * This is typically called indirectly via the pageout daemon and 107070374Sdillon * clustering has already typically occured, so in general we ask the 107170374Sdillon * underlying filesystem to write the data out asynchronously rather 107270374Sdillon * then delayed. 10731549Srgrimes */ 107433847Smsmithint 1075208574Salcvnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, 1076208574Salc int flags, int *rtvals) 10771549Srgrimes{ 10787695Sdg int i; 107933847Smsmith vm_object_t object; 1080208574Salc vm_page_t m; 108133847Smsmith int count; 10821549Srgrimes 10837695Sdg int maxsize, ncount; 108412767Sdyson vm_ooffset_t poffset; 10857695Sdg struct uio auio; 10867695Sdg struct iovec aiov; 10877695Sdg int error; 108834206Sdyson int ioflags; 1089151951Sps int ppscheck = 0; 1090151951Sps static struct timeval lastfail; 1091151951Sps static int curfail; 10921549Srgrimes 109333847Smsmith object = vp->v_object; 109433847Smsmith count = bytecount / PAGE_SIZE; 109533847Smsmith 10961827Sdg for (i = 0; i < count; i++) 1097222586Skib rtvals[i] = VM_PAGER_ERROR; 10981549Srgrimes 1099208574Salc if ((int64_t)ma[0]->pindex < 0) { 1100119544Smarcel printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n", 1101208574Salc (long)ma[0]->pindex, (u_long)ma[0]->dirty); 11027695Sdg rtvals[0] = VM_PAGER_BAD; 11037695Sdg return VM_PAGER_BAD; 11045455Sdg } 11057178Sdg 11067695Sdg maxsize = count * PAGE_SIZE; 11077695Sdg ncount = count; 11081549Srgrimes 1109208574Salc poffset = IDX_TO_OFF(ma[0]->pindex); 111084854Sdillon 111184854Sdillon /* 111284854Sdillon * If the page-aligned write is larger then the actual file we 111384854Sdillon * have to invalidate pages occuring beyond the file EOF. However, 111484854Sdillon * there is an edge case where a file may not be page-aligned where 111584854Sdillon * the last page is partially invalid. In this case the filesystem 111684854Sdillon * may not properly clear the dirty bits for the entire page (which 111784854Sdillon * could be VM_PAGE_BITS_ALL due to the page having been mmap()d). 111884854Sdillon * With the page locked we are free to fix-up the dirty bits here. 111987834Sdillon * 112087834Sdillon * We do not under any circumstances truncate the valid bits, as 112187834Sdillon * this will screw up bogus page replacement. 112284854Sdillon */ 1123248084Sattilio VM_OBJECT_WLOCK(object); 112412767Sdyson if (maxsize + poffset > object->un_pager.vnp.vnp_size) { 112584854Sdillon if (object->un_pager.vnp.vnp_size > poffset) { 112684854Sdillon int pgoff; 112784854Sdillon 112812767Sdyson maxsize = object->un_pager.vnp.vnp_size - poffset; 112984854Sdillon ncount = btoc(maxsize); 113084854Sdillon if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { 1131208574Salc /* 1132208574Salc * If the object is locked and the following 1133208574Salc * conditions hold, then the page's dirty 1134208574Salc * field cannot be concurrently changed by a 1135208574Salc * pmap operation. 1136208574Salc */ 1137208574Salc m = ma[ncount - 1]; 1138254138Sattilio vm_page_assert_sbusied(m); 1139237168Salc KASSERT(!pmap_page_is_write_mapped(m), 1140208574Salc ("vnode_pager_generic_putpages: page %p is not read-only", m)); 1141208574Salc vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 1142208574Salc pgoff); 114384854Sdillon } 114484854Sdillon } else { 11458585Sdg maxsize = 0; 114684854Sdillon ncount = 0; 114784854Sdillon } 11488585Sdg if (ncount < count) { 11498585Sdg for (i = ncount; i < count; i++) { 11507695Sdg rtvals[i] = VM_PAGER_BAD; 11511549Srgrimes } 11521549Srgrimes } 11531541Srgrimes } 1154248084Sattilio VM_OBJECT_WUNLOCK(object); 11557695Sdg 115670374Sdillon /* 1157226366Sjhb * pageouts are already clustered, use IO_ASYNC to force a bawrite() 115870374Sdillon * rather then a bdwrite() to prevent paging I/O from saturating 1159108358Sdillon * the buffer cache. Dummy-up the sequential heuristic to cause 1160108358Sdillon * large ranges to cluster. If neither IO_SYNC or IO_ASYNC is set, 1161108358Sdillon * the system decides how to cluster. 116270374Sdillon */ 116334206Sdyson ioflags = IO_VMIO; 1164108358Sdillon if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) 1165108358Sdillon ioflags |= IO_SYNC; 1166108358Sdillon else if ((flags & VM_PAGER_CLUSTER_OK) == 0) 1167108358Sdillon ioflags |= IO_ASYNC; 116834206Sdyson ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0; 1169108358Sdillon ioflags |= IO_SEQMAX << IO_SEQSHIFT; 11701827Sdg 11717695Sdg aiov.iov_base = (caddr_t) 0; 11727695Sdg aiov.iov_len = maxsize; 11737695Sdg auio.uio_iov = &aiov; 11747695Sdg auio.uio_iovcnt = 1; 117512767Sdyson auio.uio_offset = poffset; 11767695Sdg auio.uio_segflg = UIO_NOCOPY; 11777695Sdg auio.uio_rw = UIO_WRITE; 11787695Sdg auio.uio_resid = maxsize; 117983366Sjulian auio.uio_td = (struct thread *) 0; 118091406Sjhb error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred); 1181170292Sattilio PCPU_INC(cnt.v_vnodeout); 1182170292Sattilio PCPU_ADD(cnt.v_vnodepgsout, ncount); 11833612Sdg 11848585Sdg if (error) { 1185151951Sps if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1))) 1186151951Sps printf("vnode_pager_putpages: I/O error %d\n", error); 11877695Sdg } 11888585Sdg if (auio.uio_resid) { 1189151951Sps if (ppscheck || ppsratecheck(&lastfail, &curfail, 1)) 1190194990Skib printf("vnode_pager_putpages: residual I/O %zd at %lu\n", 1191208574Salc auio.uio_resid, (u_long)ma[0]->pindex); 11927695Sdg } 119333936Sdyson for (i = 0; i < ncount; i++) { 119433936Sdyson rtvals[i] = VM_PAGER_OK; 11957695Sdg } 11967695Sdg return rtvals[0]; 11977695Sdg} 1198222586Skib 1199222586Skibvoid 1200222586Skibvnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written) 1201222586Skib{ 1202222991Skib vm_object_t obj; 1203222586Skib int i, pos; 1204222586Skib 1205222991Skib if (written == 0) 1206222991Skib return; 1207222991Skib obj = ma[0]->object; 1208248084Sattilio VM_OBJECT_WLOCK(obj); 1209222586Skib for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) { 1210222586Skib if (pos < trunc_page(written)) { 1211222586Skib rtvals[i] = VM_PAGER_OK; 1212222586Skib vm_page_undirty(ma[i]); 1213222586Skib } else { 1214222586Skib /* Partially written page. */ 1215222586Skib rtvals[i] = VM_PAGER_AGAIN; 1216222586Skib vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK); 1217222586Skib } 1218222586Skib } 1219248084Sattilio VM_OBJECT_WUNLOCK(obj); 1220222586Skib} 1221232071Skib 1222232071Skibvoid 1223232071Skibvnode_pager_update_writecount(vm_object_t object, vm_offset_t start, 1224232071Skib vm_offset_t end) 1225232071Skib{ 1226232071Skib struct vnode *vp; 1227232071Skib vm_ooffset_t old_wm; 1228232071Skib 1229248084Sattilio VM_OBJECT_WLOCK(object); 1230232071Skib if (object->type != OBJT_VNODE) { 1231248084Sattilio VM_OBJECT_WUNLOCK(object); 1232232071Skib return; 1233232071Skib } 1234232071Skib old_wm = object->un_pager.vnp.writemappings; 1235232071Skib object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start; 1236232071Skib vp = object->handle; 1237232071Skib if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) { 1238232071Skib ASSERT_VOP_ELOCKED(vp, "v_writecount inc"); 1239242476Skib VOP_ADD_WRITECOUNT(vp, 1); 1240232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", 1241232701Sjhb __func__, vp, vp->v_writecount); 1242232071Skib } else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) { 1243232071Skib ASSERT_VOP_ELOCKED(vp, "v_writecount dec"); 1244242476Skib VOP_ADD_WRITECOUNT(vp, -1); 1245232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 1246232701Sjhb __func__, vp, vp->v_writecount); 1247232071Skib } 1248248084Sattilio VM_OBJECT_WUNLOCK(object); 1249232071Skib} 1250232071Skib 1251232071Skibvoid 1252232071Skibvnode_pager_release_writecount(vm_object_t object, vm_offset_t start, 1253232071Skib vm_offset_t end) 1254232071Skib{ 1255232071Skib struct vnode *vp; 1256232071Skib struct mount *mp; 1257232071Skib vm_offset_t inc; 1258232071Skib 1259248084Sattilio VM_OBJECT_WLOCK(object); 1260232071Skib 1261232071Skib /* 1262232071Skib * First, recheck the object type to account for the race when 1263232071Skib * the vnode is reclaimed. 1264232071Skib */ 1265232071Skib if (object->type != OBJT_VNODE) { 1266248084Sattilio VM_OBJECT_WUNLOCK(object); 1267232071Skib return; 1268232071Skib } 1269232071Skib 1270232071Skib /* 1271232071Skib * Optimize for the case when writemappings is not going to 1272232071Skib * zero. 1273232071Skib */ 1274232071Skib inc = end - start; 1275232071Skib if (object->un_pager.vnp.writemappings != inc) { 1276232071Skib object->un_pager.vnp.writemappings -= inc; 1277248084Sattilio VM_OBJECT_WUNLOCK(object); 1278232071Skib return; 1279232071Skib } 1280232071Skib 1281232071Skib vp = object->handle; 1282232071Skib vhold(vp); 1283248084Sattilio VM_OBJECT_WUNLOCK(object); 1284232071Skib mp = NULL; 1285232071Skib vn_start_write(vp, &mp, V_WAIT); 1286232071Skib vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1287232071Skib 1288232071Skib /* 1289232071Skib * Decrement the object's writemappings, by swapping the start 1290232071Skib * and end arguments for vnode_pager_update_writecount(). If 1291232071Skib * there was not a race with vnode reclaimation, then the 1292232071Skib * vnode's v_writecount is decremented. 1293232071Skib */ 1294232071Skib vnode_pager_update_writecount(object, end, start); 1295232071Skib VOP_UNLOCK(vp, 0); 1296232071Skib vdrop(vp); 1297232071Skib if (mp != NULL) 1298232071Skib vn_finished_write(mp); 1299232071Skib} 1300