vnode_pager.c revision 239246
1139825Simp/*- 21541Srgrimes * Copyright (c) 1990 University of Utah. 31549Srgrimes * Copyright (c) 1991 The Regents of the University of California. 41549Srgrimes * All rights reserved. 59507Sdg * Copyright (c) 1993, 1994 John S. Dyson 69507Sdg * Copyright (c) 1995, David Greenman 71541Srgrimes * 81541Srgrimes * This code is derived from software contributed to Berkeley by 91541Srgrimes * the Systems Programming Group of the University of Utah Computer 101541Srgrimes * Science Department. 111541Srgrimes * 121541Srgrimes * Redistribution and use in source and binary forms, with or without 131541Srgrimes * modification, are permitted provided that the following conditions 141541Srgrimes * are met: 151541Srgrimes * 1. Redistributions of source code must retain the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer. 171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 181541Srgrimes * notice, this list of conditions and the following disclaimer in the 191541Srgrimes * documentation and/or other materials provided with the distribution. 201541Srgrimes * 3. All advertising materials mentioning features or use of this software 2158705Scharnier * must display the following acknowledgement: 221541Srgrimes * This product includes software developed by the University of 231541Srgrimes * California, Berkeley and its contributors. 241541Srgrimes * 4. Neither the name of the University nor the names of its contributors 251541Srgrimes * may be used to endorse or promote products derived from this software 261541Srgrimes * without specific prior written permission. 271541Srgrimes * 281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 311541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 381541Srgrimes * SUCH DAMAGE. 391541Srgrimes * 401549Srgrimes * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 411541Srgrimes */ 421541Srgrimes 431541Srgrimes/* 441541Srgrimes * Page to/from files (vnodes). 451541Srgrimes */ 461541Srgrimes 471549Srgrimes/* 481549Srgrimes * TODO: 499507Sdg * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will 507695Sdg * greatly re-simplify the vnode_pager. 511549Srgrimes */ 521549Srgrimes 53116226Sobrien#include <sys/cdefs.h> 54116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vnode_pager.c 239246 2012-08-14 11:45:47Z kib $"); 55116226Sobrien 561541Srgrimes#include <sys/param.h> 571541Srgrimes#include <sys/systm.h> 581541Srgrimes#include <sys/proc.h> 591541Srgrimes#include <sys/vnode.h> 601541Srgrimes#include <sys/mount.h> 6160041Sphk#include <sys/bio.h> 629507Sdg#include <sys/buf.h> 6312662Sdg#include <sys/vmmeter.h> 64140767Sphk#include <sys/limits.h> 6551340Sdillon#include <sys/conf.h> 66127926Salc#include <sys/sf_buf.h> 671541Srgrimes 68148875Sssouhlal#include <machine/atomic.h> 69148875Sssouhlal 701541Srgrimes#include <vm/vm.h> 71239065Skib#include <vm/vm_param.h> 7212662Sdg#include <vm/vm_object.h> 731541Srgrimes#include <vm/vm_page.h> 749507Sdg#include <vm/vm_pager.h> 7531853Sdyson#include <vm/vm_map.h> 761541Srgrimes#include <vm/vnode_pager.h> 7712662Sdg#include <vm/vm_extern.h> 781541Srgrimes 79163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, 80163359Salc daddr_t *rtaddress, int *run); 8192727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); 8292727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m); 8392727Salfredstatic void vnode_pager_dealloc(vm_object_t); 8492727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int); 8592727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); 8692727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); 87194766Skibstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, 88194766Skib vm_ooffset_t, struct ucred *cred); 8911943Sbde 901541Srgrimesstruct pagerops vnodepagerops = { 91118466Sphk .pgo_alloc = vnode_pager_alloc, 92118466Sphk .pgo_dealloc = vnode_pager_dealloc, 93118466Sphk .pgo_getpages = vnode_pager_getpages, 94118466Sphk .pgo_putpages = vnode_pager_putpages, 95118466Sphk .pgo_haspage = vnode_pager_haspage, 961541Srgrimes}; 971541Srgrimes 9879127Sjhbint vnode_pbuf_freecnt; 9910556Sdyson 100140767Sphk/* Create the VM system backing object for this vnode */ 101140767Sphkint 102155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) 103140767Sphk{ 104140767Sphk vm_object_t object; 105140767Sphk vm_ooffset_t size = isize; 106140767Sphk struct vattr va; 107140767Sphk 108140767Sphk if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE) 109140767Sphk return (0); 110140767Sphk 111140767Sphk while ((object = vp->v_object) != NULL) { 112140767Sphk VM_OBJECT_LOCK(object); 113140767Sphk if (!(object->flags & OBJ_DEAD)) { 114140767Sphk VM_OBJECT_UNLOCK(object); 115140767Sphk return (0); 116140767Sphk } 117175294Sattilio VOP_UNLOCK(vp, 0); 118140767Sphk vm_object_set_flag(object, OBJ_DISCONNECTWNT); 119140767Sphk msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0); 120175202Sattilio vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 121140767Sphk } 122140767Sphk 123140767Sphk if (size == 0) { 124140767Sphk if (vn_isdisk(vp, NULL)) { 125140767Sphk size = IDX_TO_OFF(INT_MAX); 126140767Sphk } else { 127182371Sattilio if (VOP_GETATTR(vp, &va, td->td_ucred)) 128140767Sphk return (0); 129140767Sphk size = va.va_size; 130140767Sphk } 131140767Sphk } 132140767Sphk 133194766Skib object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred); 134140767Sphk /* 135140767Sphk * Dereference the reference we just created. This assumes 136140767Sphk * that the object is associated with the vp. 137140767Sphk */ 138140767Sphk VM_OBJECT_LOCK(object); 139140767Sphk object->ref_count--; 140140767Sphk VM_OBJECT_UNLOCK(object); 141140767Sphk vrele(vp); 142140767Sphk 143140767Sphk KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object")); 144140767Sphk 145140767Sphk return (0); 146140767Sphk} 147140767Sphk 148140929Sphkvoid 149140929Sphkvnode_destroy_vobject(struct vnode *vp) 150140929Sphk{ 151140929Sphk struct vm_object *obj; 152140929Sphk 153140929Sphk obj = vp->v_object; 154140929Sphk if (obj == NULL) 155140929Sphk return; 156171599Spjd ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject"); 157140929Sphk VM_OBJECT_LOCK(obj); 158140929Sphk if (obj->ref_count == 0) { 159140929Sphk /* 160140929Sphk * vclean() may be called twice. The first time 161140929Sphk * removes the primary reference to the object, 162140929Sphk * the second time goes one further and is a 163140929Sphk * special-case to terminate the object. 164140929Sphk * 165140929Sphk * don't double-terminate the object 166140929Sphk */ 167140929Sphk if ((obj->flags & OBJ_DEAD) == 0) 168140929Sphk vm_object_terminate(obj); 169140929Sphk else 170140929Sphk VM_OBJECT_UNLOCK(obj); 171140929Sphk } else { 172140929Sphk /* 173140929Sphk * Woe to the process that tries to page now :-). 174140929Sphk */ 175140929Sphk vm_pager_deallocate(obj); 176140929Sphk VM_OBJECT_UNLOCK(obj); 177140929Sphk } 178144610Sjeff vp->v_object = NULL; 179140929Sphk} 180140929Sphk 181140929Sphk 1821541Srgrimes/* 1831541Srgrimes * Allocate (or lookup) pager for a vnode. 1841541Srgrimes * Handle is a vnode pointer. 18598604Salc * 18698604Salc * MPSAFE 1871541Srgrimes */ 1889507Sdgvm_object_t 18940286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, 190194766Skib vm_ooffset_t offset, struct ucred *cred) 1911541Srgrimes{ 1929456Sdg vm_object_t object; 1931541Srgrimes struct vnode *vp; 1941541Srgrimes 1951541Srgrimes /* 1961541Srgrimes * Pageout to vnode, no can do yet. 1971541Srgrimes */ 1981541Srgrimes if (handle == NULL) 1991827Sdg return (NULL); 2001541Srgrimes 2019411Sdg vp = (struct vnode *) handle; 2029411Sdg 2031541Srgrimes /* 2049411Sdg * If the object is being terminated, wait for it to 2059411Sdg * go away. 2069411Sdg */ 207179159Supsretry: 208114074Salc while ((object = vp->v_object) != NULL) { 209114074Salc VM_OBJECT_LOCK(object); 210181020Sjhb if ((object->flags & OBJ_DEAD) == 0) 211114074Salc break; 212137297Salc vm_object_set_flag(object, OBJ_DISCONNECTWNT); 213114074Salc msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0); 2149507Sdg } 2155455Sdg 21632071Sdyson if (vp->v_usecount == 0) 21732071Sdyson panic("vnode_pager_alloc: no vnode reference"); 21832071Sdyson 2199507Sdg if (object == NULL) { 2201541Srgrimes /* 221179159Sups * Add an object of the appropriate size 2221541Srgrimes */ 22340286Sdg object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size))); 2241827Sdg 22540286Sdg object->un_pager.vnp.vnp_size = size; 226232071Skib object->un_pager.vnp.writemappings = 0; 2271549Srgrimes 2289507Sdg object->handle = handle; 229179765Sups VI_LOCK(vp); 230179765Sups if (vp->v_object != NULL) { 231179159Sups /* 232179159Sups * Object has been created while we were sleeping 233179159Sups */ 234179765Sups VI_UNLOCK(vp); 235179159Sups vm_object_destroy(object); 236179159Sups goto retry; 237179159Sups } 2389507Sdg vp->v_object = object; 239179765Sups VI_UNLOCK(vp); 240179765Sups } else { 24132286Sdyson object->ref_count++; 242179765Sups VM_OBJECT_UNLOCK(object); 243179765Sups } 244143559Sjeff vref(vp); 2459507Sdg return (object); 2461541Srgrimes} 2471541Srgrimes 248114774Salc/* 249114774Salc * The object must be locked. 250114774Salc */ 25112820Sphkstatic void 2529507Sdgvnode_pager_dealloc(object) 2539507Sdg vm_object_t object; 2541541Srgrimes{ 255202529Skib struct vnode *vp; 256202529Skib int refs; 2571541Srgrimes 258202529Skib vp = object->handle; 2599507Sdg if (vp == NULL) 2609507Sdg panic("vnode_pager_dealloc: pager already dealloced"); 2619507Sdg 262114774Salc VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 26333817Sdyson vm_object_pip_wait(object, "vnpdea"); 264202529Skib refs = object->ref_count; 2651541Srgrimes 2669507Sdg object->handle = NULL; 26733109Sdyson object->type = OBJT_DEAD; 268137297Salc if (object->flags & OBJ_DISCONNECTWNT) { 269137297Salc vm_object_clear_flag(object, OBJ_DISCONNECTWNT); 270137297Salc wakeup(object); 271137297Salc } 272171599Spjd ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc"); 273232071Skib if (object->un_pager.vnp.writemappings > 0) { 274232071Skib object->un_pager.vnp.writemappings = 0; 275232071Skib vp->v_writecount--; 276232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 277232701Sjhb __func__, vp, vp->v_writecount); 278232071Skib } 2799507Sdg vp->v_object = NULL; 280140734Sphk vp->v_vflag &= ~VV_TEXT; 281232071Skib VM_OBJECT_UNLOCK(object); 282202529Skib while (refs-- > 0) 283202529Skib vunref(vp); 284232071Skib VM_OBJECT_LOCK(object); 2851549Srgrimes} 2861541Srgrimes 28712820Sphkstatic boolean_t 28812767Sdysonvnode_pager_haspage(object, pindex, before, after) 2899507Sdg vm_object_t object; 29012767Sdyson vm_pindex_t pindex; 2919507Sdg int *before; 2929507Sdg int *after; 2931541Srgrimes{ 2949507Sdg struct vnode *vp = object->handle; 29596572Sphk daddr_t bn; 29612423Sphk int err; 29710556Sdyson daddr_t reqblock; 29811701Sdyson int poff; 29911701Sdyson int bsize; 30012914Sdyson int pagesperblock, blocksperpage; 301140723Sjeff int vfslocked; 3021541Srgrimes 303116695Salc VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 30451340Sdillon /* 30551340Sdillon * If no vp or vp is doomed or marked transparent to VM, we do not 30651340Sdillon * have the page. 30751340Sdillon */ 308155384Sjeff if (vp == NULL || vp->v_iflag & VI_DOOMED) 30932585Sdyson return FALSE; 3101541Srgrimes /* 311155384Sjeff * If the offset is beyond end of file we do 3125455Sdg * not have the page. 3131541Srgrimes */ 314155384Sjeff if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size) 3154797Sdg return FALSE; 3161541Srgrimes 31711576Sdg bsize = vp->v_mount->mnt_stat.f_iosize; 31810556Sdyson pagesperblock = bsize / PAGE_SIZE; 31912914Sdyson blocksperpage = 0; 32012914Sdyson if (pagesperblock > 0) { 32112914Sdyson reqblock = pindex / pagesperblock; 32212914Sdyson } else { 32312914Sdyson blocksperpage = (PAGE_SIZE / bsize); 32412914Sdyson reqblock = pindex * blocksperpage; 32512914Sdyson } 326116695Salc VM_OBJECT_UNLOCK(object); 327140723Sjeff vfslocked = VFS_LOCK_GIANT(vp->v_mount); 328119045Sphk err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before); 329140723Sjeff VFS_UNLOCK_GIANT(vfslocked); 330116695Salc VM_OBJECT_LOCK(object); 3318876Srgrimes if (err) 3329507Sdg return TRUE; 33392029Seivind if (bn == -1) 33410576Sdyson return FALSE; 33512914Sdyson if (pagesperblock > 0) { 33612914Sdyson poff = pindex - (reqblock * pagesperblock); 33712914Sdyson if (before) { 33812914Sdyson *before *= pagesperblock; 33912914Sdyson *before += poff; 34010669Sdyson } 34112914Sdyson if (after) { 34212914Sdyson int numafter; 34312914Sdyson *after *= pagesperblock; 34412914Sdyson numafter = pagesperblock - (poff + 1); 34599211Srobert if (IDX_TO_OFF(pindex + numafter) > 34699211Srobert object->un_pager.vnp.vnp_size) { 34799211Srobert numafter = 34899211Srobert OFF_TO_IDX(object->un_pager.vnp.vnp_size) - 34999211Srobert pindex; 35012914Sdyson } 35112914Sdyson *after += numafter; 35212914Sdyson } 35312914Sdyson } else { 35412914Sdyson if (before) { 35512914Sdyson *before /= blocksperpage; 35612914Sdyson } 35712914Sdyson 35812914Sdyson if (after) { 35912914Sdyson *after /= blocksperpage; 36012914Sdyson } 36110556Sdyson } 36210576Sdyson return TRUE; 3631541Srgrimes} 3641541Srgrimes 3651541Srgrimes/* 3661541Srgrimes * Lets the VM system know about a change in size for a file. 3679507Sdg * We adjust our own internal size and flush any cached pages in 3681541Srgrimes * the associated object that are affected by the size change. 3691541Srgrimes * 3701541Srgrimes * Note: this routine may be invoked as a result of a pager put 3711541Srgrimes * operation (possibly at object termination time), so we must be careful. 3721541Srgrimes */ 3731541Srgrimesvoid 3741541Srgrimesvnode_pager_setsize(vp, nsize) 3751541Srgrimes struct vnode *vp; 37612767Sdyson vm_ooffset_t nsize; 3771541Srgrimes{ 378116167Salc vm_object_t object; 379116167Salc vm_page_t m; 38038542Sluoqi vm_pindex_t nobjsize; 3811541Srgrimes 382116167Salc if ((object = vp->v_object) == NULL) 3831541Srgrimes return; 384188386Skib/* ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */ 385116167Salc VM_OBJECT_LOCK(object); 386116167Salc if (nsize == object->un_pager.vnp.vnp_size) { 387116167Salc /* 388116167Salc * Hasn't changed size 389116167Salc */ 390116167Salc VM_OBJECT_UNLOCK(object); 3913374Sdg return; 392116167Salc } 39338542Sluoqi nobjsize = OFF_TO_IDX(nsize + PAGE_MASK); 3949507Sdg if (nsize < object->un_pager.vnp.vnp_size) { 395116167Salc /* 396116167Salc * File has shrunk. Toss any cached pages beyond the new EOF. 397116167Salc */ 398116167Salc if (nobjsize < object->size) 39938542Sluoqi vm_object_page_remove(object, nobjsize, object->size, 400223677Salc 0); 4011827Sdg /* 4021827Sdg * this gets rid of garbage at the end of a page that is now 40387834Sdillon * only partially backed by the vnode. 40487834Sdillon * 40587834Sdillon * XXX for some reason (I don't know yet), if we take a 40687834Sdillon * completely invalid page and mark it partially valid 40787834Sdillon * it can screw up NFS reads, so we don't allow the case. 4081827Sdg */ 409116167Salc if ((nsize & PAGE_MASK) && 410121230Salc (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && 411121230Salc m->valid != 0) { 412121230Salc int base = (int)nsize & PAGE_MASK; 413121230Salc int size = PAGE_SIZE - base; 41470374Sdillon 415121230Salc /* 416121230Salc * Clear out partial-page garbage in case 417121230Salc * the page has been mapped. 418121230Salc */ 419121230Salc pmap_zero_page_area(m, base, size); 42070374Sdillon 421121230Salc /* 422193303Salc * Update the valid bits to reflect the blocks that 423193303Salc * have been zeroed. Some of these valid bits may 424193303Salc * have already been set. 425193303Salc */ 426228156Skib vm_page_set_valid_range(m, base, size); 427193303Salc 428193303Salc /* 429193303Salc * Round "base" to the next block boundary so that the 430193303Salc * dirty bit for a partially zeroed block is not 431193303Salc * cleared. 432193303Salc */ 433193303Salc base = roundup2(base, DEV_BSIZE); 434193303Salc 435193303Salc /* 436193303Salc * Clear out partial-page dirty bits. 437121230Salc * 438121230Salc * note that we do not clear out the valid 439121230Salc * bits. This would prevent bogus_page 440121230Salc * replacement from working properly. 441121230Salc */ 442193303Salc vm_page_clear_dirty(m, base, PAGE_SIZE - base); 443172875Salc } else if ((nsize & PAGE_MASK) && 444237172Sattilio vm_page_is_cached(object, OFF_TO_IDX(nsize))) { 445172875Salc vm_page_cache_free(object, OFF_TO_IDX(nsize), 446172875Salc nobjsize); 4471827Sdg } 4481541Srgrimes } 44912767Sdyson object->un_pager.vnp.vnp_size = nsize; 45038542Sluoqi object->size = nobjsize; 451116167Salc VM_OBJECT_UNLOCK(object); 4521541Srgrimes} 4531541Srgrimes 4541549Srgrimes/* 4551549Srgrimes * calculate the linear (byte) disk address of specified virtual 4561549Srgrimes * file address 4571549Srgrimes */ 458163359Salcstatic int 459163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress, 460163359Salc int *run) 4611549Srgrimes{ 4625455Sdg int bsize; 4635455Sdg int err; 46412767Sdyson daddr_t vblock; 465146340Sbz daddr_t voffset; 4661549Srgrimes 467138531Salc if (address < 0) 4685455Sdg return -1; 4695455Sdg 470155384Sjeff if (vp->v_iflag & VI_DOOMED) 47111701Sdyson return -1; 47211701Sdyson 4731549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 4741549Srgrimes vblock = address / bsize; 4751549Srgrimes voffset = address % bsize; 4761549Srgrimes 477163359Salc err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL); 478163359Salc if (err == 0) { 479163359Salc if (*rtaddress != -1) 480163359Salc *rtaddress += voffset / DEV_BSIZE; 48192029Seivind if (run) { 4826151Sdg *run += 1; 4836151Sdg *run *= bsize/PAGE_SIZE; 4846151Sdg *run -= voffset/PAGE_SIZE; 4856151Sdg } 4866151Sdg } 4871549Srgrimes 488163359Salc return (err); 4891549Srgrimes} 4901549Srgrimes 4911549Srgrimes/* 49296755Strhodes * small block filesystem vnode pager input 4931549Srgrimes */ 49412820Sphkstatic int 4959507Sdgvnode_pager_input_smlfs(object, m) 4969507Sdg vm_object_t object; 4971549Srgrimes vm_page_t m; 4981549Srgrimes{ 499137726Sphk struct vnode *vp; 500137726Sphk struct bufobj *bo; 5011549Srgrimes struct buf *bp; 502127926Salc struct sf_buf *sf; 503146340Sbz daddr_t fileaddr; 5041549Srgrimes vm_offset_t bsize; 505227102Skib vm_page_bits_t bits; 506227102Skib int error, i; 5071549Srgrimes 508227102Skib error = 0; 5099507Sdg vp = object->handle; 510155384Sjeff if (vp->v_iflag & VI_DOOMED) 51111701Sdyson return VM_PAGER_BAD; 51211701Sdyson 5131549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 5141549Srgrimes 515137726Sphk VOP_BMAP(vp, 0, &bo, 0, NULL, NULL); 5161549Srgrimes 517127926Salc sf = sf_buf_alloc(m, 0); 5181549Srgrimes 5191827Sdg for (i = 0; i < PAGE_SIZE / bsize; i++) { 52086092Sdillon vm_ooffset_t address; 5211827Sdg 522191935Salc bits = vm_page_bits(i * bsize, bsize); 523191935Salc if (m->valid & bits) 5245455Sdg continue; 5251549Srgrimes 52686092Sdillon address = IDX_TO_OFF(m->pindex) + i * bsize; 52786092Sdillon if (address >= object->un_pager.vnp.vnp_size) { 52886092Sdillon fileaddr = -1; 52986092Sdillon } else { 530163359Salc error = vnode_pager_addr(vp, address, &fileaddr, NULL); 531163359Salc if (error) 532163359Salc break; 53386092Sdillon } 5341827Sdg if (fileaddr != -1) { 53542957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 5361549Srgrimes 5371827Sdg /* build a minimal buffer header */ 53858345Sphk bp->b_iocmd = BIO_READ; 539119092Sphk bp->b_iodone = bdone; 54084827Sjhb KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); 54184827Sjhb KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); 54291406Sjhb bp->b_rcred = crhold(curthread->td_ucred); 54391406Sjhb bp->b_wcred = crhold(curthread->td_ucred); 544127926Salc bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize; 5456626Sdg bp->b_blkno = fileaddr; 546137726Sphk pbgetbo(bo, bp); 547233627Smckusick bp->b_vp = vp; 5481549Srgrimes bp->b_bcount = bsize; 5491549Srgrimes bp->b_bufsize = bsize; 55070374Sdillon bp->b_runningbufspace = bp->b_bufsize; 551189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 5521827Sdg 5531827Sdg /* do the input */ 554121205Sphk bp->b_iooffset = dbtob(bp->b_blkno); 555136927Sphk bstrategy(bp); 5561549Srgrimes 557119092Sphk bwait(bp, PVM, "vnsrd"); 558119092Sphk 55958934Sphk if ((bp->b_ioflags & BIO_ERROR) != 0) 5601549Srgrimes error = EIO; 5611549Srgrimes 5621827Sdg /* 5631827Sdg * free the buffer header back to the swap buffer pool 5641827Sdg */ 565233627Smckusick bp->b_vp = NULL; 566137726Sphk pbrelbo(bp); 56742957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 5681827Sdg if (error) 5691549Srgrimes break; 570191935Salc } else 571127926Salc bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); 572191935Salc KASSERT((m->dirty & bits) == 0, 573191935Salc ("vnode_pager_input_smlfs: page %p is dirty", m)); 574191935Salc VM_OBJECT_LOCK(object); 575191935Salc m->valid |= bits; 576191935Salc VM_OBJECT_UNLOCK(object); 5771549Srgrimes } 578127926Salc sf_buf_free(sf); 5791827Sdg if (error) { 5804207Sdg return VM_PAGER_ERROR; 5811549Srgrimes } 5821549Srgrimes return VM_PAGER_OK; 5831549Srgrimes} 5841549Srgrimes 5851549Srgrimes/* 586139296Sphk * old style vnode pager input routine 5871549Srgrimes */ 58812820Sphkstatic int 5899507Sdgvnode_pager_input_old(object, m) 5909507Sdg vm_object_t object; 5911549Srgrimes vm_page_t m; 5921549Srgrimes{ 5931541Srgrimes struct uio auio; 5941541Srgrimes struct iovec aiov; 5955455Sdg int error; 5965455Sdg int size; 597127926Salc struct sf_buf *sf; 59877398Sjhb struct vnode *vp; 5991549Srgrimes 600121495Salc VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 6011549Srgrimes error = 0; 6021827Sdg 6031549Srgrimes /* 6041549Srgrimes * Return failure if beyond current EOF 6051549Srgrimes */ 60612767Sdyson if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { 6071549Srgrimes return VM_PAGER_BAD; 6081549Srgrimes } else { 6091549Srgrimes size = PAGE_SIZE; 61012767Sdyson if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) 61112767Sdyson size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); 612121495Salc vp = object->handle; 613121495Salc VM_OBJECT_UNLOCK(object); 6147178Sdg 6155455Sdg /* 6165455Sdg * Allocate a kernel virtual address and initialize so that 6175455Sdg * we can use VOP_READ/WRITE routines. 6185455Sdg */ 619127926Salc sf = sf_buf_alloc(m, 0); 6207178Sdg 621127926Salc aiov.iov_base = (caddr_t)sf_buf_kva(sf); 6221549Srgrimes aiov.iov_len = size; 6231549Srgrimes auio.uio_iov = &aiov; 6241549Srgrimes auio.uio_iovcnt = 1; 62512767Sdyson auio.uio_offset = IDX_TO_OFF(m->pindex); 6261549Srgrimes auio.uio_segflg = UIO_SYSSPACE; 6271549Srgrimes auio.uio_rw = UIO_READ; 6281549Srgrimes auio.uio_resid = size; 62983366Sjulian auio.uio_td = curthread; 6301549Srgrimes 63191406Sjhb error = VOP_READ(vp, &auio, 0, curthread->td_ucred); 6321549Srgrimes if (!error) { 63379242Sdillon int count = size - auio.uio_resid; 6341549Srgrimes 6351549Srgrimes if (count == 0) 6361549Srgrimes error = EINVAL; 6371549Srgrimes else if (count != PAGE_SIZE) 638127926Salc bzero((caddr_t)sf_buf_kva(sf) + count, 639127926Salc PAGE_SIZE - count); 6401549Srgrimes } 641127926Salc sf_buf_free(sf); 642121230Salc 643121230Salc VM_OBJECT_LOCK(object); 6441549Srgrimes } 645191935Salc KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m)); 64639739Srvb if (!error) 64739739Srvb m->valid = VM_PAGE_BITS_ALL; 6484207Sdg return error ? VM_PAGER_ERROR : VM_PAGER_OK; 6491549Srgrimes} 6501549Srgrimes 6511549Srgrimes/* 6521549Srgrimes * generic vnode pager input routine 6531549Srgrimes */ 65410556Sdyson 65533847Smsmith/* 65676827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES 65799211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages() 65899211Srobert * to implement the previous behaviour. 65933847Smsmith * 66033847Smsmith * All other FS's should use the bypass to get to the local media 66133847Smsmith * backing vp's VOP_GETPAGES. 66233847Smsmith */ 66312820Sphkstatic int 6649507Sdgvnode_pager_getpages(object, m, count, reqpage) 6659507Sdg vm_object_t object; 6661549Srgrimes vm_page_t *m; 6679507Sdg int count; 6689507Sdg int reqpage; 6691549Srgrimes{ 67010556Sdyson int rtval; 67110556Sdyson struct vnode *vp; 67234403Smsmith int bytes = count * PAGE_SIZE; 673140723Sjeff int vfslocked; 67432286Sdyson 67510556Sdyson vp = object->handle; 676116279Salc VM_OBJECT_UNLOCK(object); 677140723Sjeff vfslocked = VFS_LOCK_GIANT(vp->v_mount); 67834403Smsmith rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0); 67976827Salfred KASSERT(rtval != EOPNOTSUPP, 68076827Salfred ("vnode_pager: FS getpages not implemented\n")); 681140723Sjeff VFS_UNLOCK_GIANT(vfslocked); 682116279Salc VM_OBJECT_LOCK(object); 68333847Smsmith return rtval; 68410556Sdyson} 68510556Sdyson 68633847Smsmith/* 68733847Smsmith * This is now called from local media FS's to operate against their 68833847Smsmith * own vnodes if they fail to implement VOP_GETPAGES. 68933847Smsmith */ 69033847Smsmithint 69133847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage) 69233847Smsmith struct vnode *vp; 69310556Sdyson vm_page_t *m; 69433847Smsmith int bytecount; 69510556Sdyson int reqpage; 69610556Sdyson{ 69733847Smsmith vm_object_t object; 69812767Sdyson vm_offset_t kva; 69934206Sdyson off_t foff, tfoff, nextoff; 700146340Sbz int i, j, size, bsize, first; 701163140Salc daddr_t firstaddr, reqblock; 702137726Sphk struct bufobj *bo; 7036151Sdg int runpg; 7046151Sdg int runend; 7057178Sdg struct buf *bp; 70633847Smsmith int count; 707163210Salc int error; 7081549Srgrimes 70933847Smsmith object = vp->v_object; 71033847Smsmith count = bytecount / PAGE_SIZE; 71133847Smsmith 712137726Sphk KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, 713137726Sphk ("vnode_pager_generic_getpages does not support devices")); 714155384Sjeff if (vp->v_iflag & VI_DOOMED) 71511701Sdyson return VM_PAGER_BAD; 71611701Sdyson 7171549Srgrimes bsize = vp->v_mount->mnt_stat.f_iosize; 7181549Srgrimes 7191549Srgrimes /* get the UNDERLYING device for the file with VOP_BMAP() */ 7201827Sdg 7211549Srgrimes /* 7221827Sdg * originally, we did not check for an error return value -- assuming 7231827Sdg * an fs always has a bmap entry point -- that assumption is wrong!!! 7241549Srgrimes */ 72512767Sdyson foff = IDX_TO_OFF(m[reqpage]->pindex); 7261827Sdg 7271549Srgrimes /* 7281887Sdg * if we can't bmap, use old VOP code 7291549Srgrimes */ 730163210Salc error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); 731163210Salc if (error == EOPNOTSUPP) { 732116512Salc VM_OBJECT_LOCK(object); 733207410Skmacy 734100832Salc for (i = 0; i < count; i++) 735207410Skmacy if (i != reqpage) { 736207410Skmacy vm_page_lock(m[i]); 73775692Salfred vm_page_free(m[i]); 738207410Skmacy vm_page_unlock(m[i]); 739207410Skmacy } 740170292Sattilio PCPU_INC(cnt.v_vnodein); 741170292Sattilio PCPU_INC(cnt.v_vnodepgsin); 742121495Salc error = vnode_pager_input_old(object, m[reqpage]); 743121495Salc VM_OBJECT_UNLOCK(object); 744121495Salc return (error); 745163210Salc } else if (error != 0) { 746163210Salc VM_OBJECT_LOCK(object); 747163210Salc for (i = 0; i < count; i++) 748207410Skmacy if (i != reqpage) { 749207410Skmacy vm_page_lock(m[i]); 750163210Salc vm_page_free(m[i]); 751207410Skmacy vm_page_unlock(m[i]); 752207410Skmacy } 753163210Salc VM_OBJECT_UNLOCK(object); 754163210Salc return (VM_PAGER_ERROR); 7551549Srgrimes 7561827Sdg /* 7571827Sdg * if the blocksize is smaller than a page size, then use 7581827Sdg * special small filesystem code. NFS sometimes has a small 7591827Sdg * blocksize, but it can handle large reads itself. 7601827Sdg */ 7611827Sdg } else if ((PAGE_SIZE / bsize) > 1 && 76238866Sbde (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { 763116512Salc VM_OBJECT_LOCK(object); 764100832Salc for (i = 0; i < count; i++) 765207410Skmacy if (i != reqpage) { 766207410Skmacy vm_page_lock(m[i]); 76775692Salfred vm_page_free(m[i]); 768207410Skmacy vm_page_unlock(m[i]); 769207410Skmacy } 770116512Salc VM_OBJECT_UNLOCK(object); 771170292Sattilio PCPU_INC(cnt.v_vnodein); 772170292Sattilio PCPU_INC(cnt.v_vnodepgsin); 7739507Sdg return vnode_pager_input_smlfs(object, m[reqpage]); 7741549Srgrimes } 77545347Sjulian 7761549Srgrimes /* 77745347Sjulian * If we have a completely valid page available to us, we can 77845347Sjulian * clean up and return. Otherwise we have to re-read the 77945347Sjulian * media. 7801549Srgrimes */ 781121227Salc VM_OBJECT_LOCK(object); 78245347Sjulian if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { 783100832Salc for (i = 0; i < count; i++) 784207410Skmacy if (i != reqpage) { 785207410Skmacy vm_page_lock(m[i]); 78675692Salfred vm_page_free(m[i]); 787207410Skmacy vm_page_unlock(m[i]); 788207410Skmacy } 789116512Salc VM_OBJECT_UNLOCK(object); 7905455Sdg return VM_PAGER_OK; 791163140Salc } else if (reqblock == -1) { 792163140Salc pmap_zero_page(m[reqpage]); 793192010Salc KASSERT(m[reqpage]->dirty == 0, 794192010Salc ("vnode_pager_generic_getpages: page %p is dirty", m)); 795163140Salc m[reqpage]->valid = VM_PAGE_BITS_ALL; 796163140Salc for (i = 0; i < count; i++) 797207410Skmacy if (i != reqpage) { 798207410Skmacy vm_page_lock(m[i]); 799163140Salc vm_page_free(m[i]); 800207410Skmacy vm_page_unlock(m[i]); 801207410Skmacy } 802163140Salc VM_OBJECT_UNLOCK(object); 803163140Salc return (VM_PAGER_OK); 8041549Srgrimes } 80545347Sjulian m[reqpage]->valid = 0; 806121227Salc VM_OBJECT_UNLOCK(object); 8077178Sdg 8085455Sdg /* 8095455Sdg * here on direct device I/O 8105455Sdg */ 81192029Seivind firstaddr = -1; 8121549Srgrimes 8131549Srgrimes /* 8146151Sdg * calculate the run that includes the required page 8151549Srgrimes */ 81692029Seivind for (first = 0, i = 0; i < count; i = runend) { 817163359Salc if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, 818163359Salc &runpg) != 0) { 819163359Salc VM_OBJECT_LOCK(object); 820163359Salc for (; i < count; i++) 821207410Skmacy if (i != reqpage) { 822207410Skmacy vm_page_lock(m[i]); 823163359Salc vm_page_free(m[i]); 824207410Skmacy vm_page_unlock(m[i]); 825207410Skmacy } 826163359Salc VM_OBJECT_UNLOCK(object); 827163359Salc return (VM_PAGER_ERROR); 828163359Salc } 8296151Sdg if (firstaddr == -1) { 830116512Salc VM_OBJECT_LOCK(object); 8319507Sdg if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { 832146340Sbz panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx", 833146340Sbz (intmax_t)firstaddr, (uintmax_t)(foff >> 32), 834106603Smux (uintmax_t)foff, 835106603Smux (uintmax_t) 836106603Smux (object->un_pager.vnp.vnp_size >> 32), 837106603Smux (uintmax_t)object->un_pager.vnp.vnp_size); 8386151Sdg } 839207410Skmacy vm_page_lock(m[i]); 84075692Salfred vm_page_free(m[i]); 841207410Skmacy vm_page_unlock(m[i]); 842116512Salc VM_OBJECT_UNLOCK(object); 8436151Sdg runend = i + 1; 8446151Sdg first = runend; 8456151Sdg continue; 8461549Srgrimes } 8476151Sdg runend = i + runpg; 8489507Sdg if (runend <= reqpage) { 849116512Salc VM_OBJECT_LOCK(object); 850207410Skmacy for (j = i; j < runend; j++) { 851207410Skmacy vm_page_lock(m[j]); 85275692Salfred vm_page_free(m[j]); 853207410Skmacy vm_page_unlock(m[j]); 854207410Skmacy } 855116512Salc VM_OBJECT_UNLOCK(object); 8561549Srgrimes } else { 8579507Sdg if (runpg < (count - first)) { 858116512Salc VM_OBJECT_LOCK(object); 859207410Skmacy for (i = first + runpg; i < count; i++) { 860207410Skmacy vm_page_lock(m[i]); 86175692Salfred vm_page_free(m[i]); 862207410Skmacy vm_page_unlock(m[i]); 863207410Skmacy } 864116512Salc VM_OBJECT_UNLOCK(object); 8656151Sdg count = first + runpg; 8666151Sdg } 8676151Sdg break; 8681549Srgrimes } 8696151Sdg first = runend; 8701549Srgrimes } 8711549Srgrimes 8721549Srgrimes /* 8731827Sdg * the first and last page have been calculated now, move input pages 8741827Sdg * to be zero based... 8751549Srgrimes */ 8761549Srgrimes if (first != 0) { 877163361Salc m += first; 8781549Srgrimes count -= first; 8791549Srgrimes reqpage -= first; 8801549Srgrimes } 8816151Sdg 8821549Srgrimes /* 8831549Srgrimes * calculate the file virtual address for the transfer 8841549Srgrimes */ 88512767Sdyson foff = IDX_TO_OFF(m[0]->pindex); 8861827Sdg 8871549Srgrimes /* 8881549Srgrimes * calculate the size of the transfer 8891549Srgrimes */ 8901549Srgrimes size = count * PAGE_SIZE; 891134892Sphk KASSERT(count > 0, ("zero count")); 8929507Sdg if ((foff + size) > object->un_pager.vnp.vnp_size) 8939507Sdg size = object->un_pager.vnp.vnp_size - foff; 894134892Sphk KASSERT(size > 0, ("zero size")); 8951549Srgrimes 8961549Srgrimes /* 89751340Sdillon * round up physical size for real devices. 8981549Srgrimes */ 899137726Sphk if (1) { 900137726Sphk int secmask = bo->bo_bsize - 1; 901136977Sphk KASSERT(secmask < PAGE_SIZE && secmask > 0, 902136977Sphk ("vnode_pager_generic_getpages: sector size %d too large", 903136977Sphk secmask + 1)); 90451340Sdillon size = (size + secmask) & ~secmask; 90551340Sdillon } 9061549Srgrimes 90742957Sdillon bp = getpbuf(&vnode_pbuf_freecnt); 9085455Sdg kva = (vm_offset_t) bp->b_data; 9091887Sdg 9101549Srgrimes /* 9111549Srgrimes * and map the pages to be read into the kva 9121549Srgrimes */ 9131887Sdg pmap_qenter(kva, m, count); 9141549Srgrimes 9151549Srgrimes /* build a minimal buffer header */ 91658345Sphk bp->b_iocmd = BIO_READ; 917119092Sphk bp->b_iodone = bdone; 91884827Sjhb KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); 91984827Sjhb KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); 92091406Sjhb bp->b_rcred = crhold(curthread->td_ucred); 92191406Sjhb bp->b_wcred = crhold(curthread->td_ucred); 9226626Sdg bp->b_blkno = firstaddr; 923137726Sphk pbgetbo(bo, bp); 924233627Smckusick bp->b_vp = vp; 9251549Srgrimes bp->b_bcount = size; 9261549Srgrimes bp->b_bufsize = size; 92770374Sdillon bp->b_runningbufspace = bp->b_bufsize; 928189595Sjhb atomic_add_long(&runningbufspace, bp->b_runningbufspace); 9291549Srgrimes 930170292Sattilio PCPU_INC(cnt.v_vnodein); 931170292Sattilio PCPU_ADD(cnt.v_vnodepgsin, count); 9323612Sdg 9331549Srgrimes /* do the input */ 934121205Sphk bp->b_iooffset = dbtob(bp->b_blkno); 935136927Sphk bstrategy(bp); 9363612Sdg 937119092Sphk bwait(bp, PVM, "vnread"); 9381549Srgrimes 93958934Sphk if ((bp->b_ioflags & BIO_ERROR) != 0) 9401549Srgrimes error = EIO; 9411549Srgrimes 9421549Srgrimes if (!error) { 9431549Srgrimes if (size != count * PAGE_SIZE) 9441827Sdg bzero((caddr_t) kva + size, PAGE_SIZE * count - size); 9451549Srgrimes } 9465455Sdg pmap_qremove(kva, count); 9471549Srgrimes 9481549Srgrimes /* 9491549Srgrimes * free the buffer header back to the swap buffer pool 9501549Srgrimes */ 951233627Smckusick bp->b_vp = NULL; 952137726Sphk pbrelbo(bp); 95342957Sdillon relpbuf(bp, &vnode_pbuf_freecnt); 9541549Srgrimes 955116512Salc VM_OBJECT_LOCK(object); 95634206Sdyson for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { 95734206Sdyson vm_page_t mt; 95834206Sdyson 95934206Sdyson nextoff = tfoff + PAGE_SIZE; 96034206Sdyson mt = m[i]; 96134206Sdyson 96247239Sdt if (nextoff <= object->un_pager.vnp.vnp_size) { 96345347Sjulian /* 96445347Sjulian * Read filled up entire page. 96545347Sjulian */ 96634206Sdyson mt->valid = VM_PAGE_BITS_ALL; 967191478Salc KASSERT(mt->dirty == 0, 968191478Salc ("vnode_pager_generic_getpages: page %p is dirty", 969191478Salc mt)); 970191478Salc KASSERT(!pmap_page_is_mapped(mt), 971191478Salc ("vnode_pager_generic_getpages: page %p is mapped", 972191478Salc mt)); 97334206Sdyson } else { 97445347Sjulian /* 975192134Salc * Read did not fill up entire page. 97645347Sjulian * 97745347Sjulian * Currently we do not set the entire page valid, 97845347Sjulian * we just try to clear the piece that we couldn't 97945347Sjulian * read. 98045347Sjulian */ 981228156Skib vm_page_set_valid_range(mt, 0, 98247239Sdt object->un_pager.vnp.vnp_size - tfoff); 983192134Salc KASSERT((mt->dirty & vm_page_bits(0, 984192134Salc object->un_pager.vnp.vnp_size - tfoff)) == 0, 985192134Salc ("vnode_pager_generic_getpages: page %p is dirty", 986192134Salc mt)); 98734206Sdyson } 98834206Sdyson 989239040Skib if (i != reqpage) 990239246Skib vm_page_readahead_finish(mt); 9911549Srgrimes } 992116512Salc VM_OBJECT_UNLOCK(object); 9931549Srgrimes if (error) { 9949507Sdg printf("vnode_pager_getpages: I/O read error\n"); 9951549Srgrimes } 9964207Sdg return (error ? VM_PAGER_ERROR : VM_PAGER_OK); 9971549Srgrimes} 9981549Srgrimes 99933847Smsmith/* 100033847Smsmith * EOPNOTSUPP is no longer legal. For local media VFS's that do not 100133847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to 100233847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour. 100333847Smsmith * 100433847Smsmith * All other FS's should use the bypass to get to the local media 100533847Smsmith * backing vp's VOP_PUTPAGES. 100633847Smsmith */ 100743129Sdillonstatic void 100810556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals) 100910556Sdyson vm_object_t object; 101010556Sdyson vm_page_t *m; 101110556Sdyson int count; 101210556Sdyson boolean_t sync; 101310556Sdyson int *rtvals; 101410556Sdyson{ 101510556Sdyson int rtval; 101610556Sdyson struct vnode *vp; 101734403Smsmith int bytes = count * PAGE_SIZE; 101818973Sdyson 101944321Salc /* 102044321Salc * Force synchronous operation if we are extremely low on memory 102144321Salc * to prevent a low-memory deadlock. VOP operations often need to 102244321Salc * allocate more memory to initiate the I/O ( i.e. do a BMAP 102344321Salc * operation ). The swapper handles the case by limiting the amount 102444321Salc * of asynchronous I/O, but that sort of solution doesn't scale well 102544321Salc * for the vnode pager without a lot of work. 102644321Salc * 102744321Salc * Also, the backing vnode's iodone routine may not wake the pageout 102844321Salc * daemon up. This should be probably be addressed XXX. 102944321Salc */ 103044321Salc 1031170170Sattilio if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min) 103244321Salc sync |= OBJPC_SYNC; 103344321Salc 103444321Salc /* 103544321Salc * Call device-specific putpages function 103644321Salc */ 103710556Sdyson vp = object->handle; 1038121455Salc VM_OBJECT_UNLOCK(object); 103934403Smsmith rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0); 104076827Salfred KASSERT(rtval != EOPNOTSUPP, 104176827Salfred ("vnode_pager: stale FS putpages\n")); 1042121455Salc VM_OBJECT_LOCK(object); 104310556Sdyson} 104410556Sdyson 104533847Smsmith 10461549Srgrimes/* 104733847Smsmith * This is now called from local media FS's to operate against their 104845057Seivind * own vnodes if they fail to implement VOP_PUTPAGES. 104970374Sdillon * 105070374Sdillon * This is typically called indirectly via the pageout daemon and 105170374Sdillon * clustering has already typically occured, so in general we ask the 105270374Sdillon * underlying filesystem to write the data out asynchronously rather 105370374Sdillon * then delayed. 10541549Srgrimes */ 105533847Smsmithint 1056208574Salcvnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, 1057208574Salc int flags, int *rtvals) 10581549Srgrimes{ 10597695Sdg int i; 106033847Smsmith vm_object_t object; 1061208574Salc vm_page_t m; 106233847Smsmith int count; 10631549Srgrimes 10647695Sdg int maxsize, ncount; 106512767Sdyson vm_ooffset_t poffset; 10667695Sdg struct uio auio; 10677695Sdg struct iovec aiov; 10687695Sdg int error; 106934206Sdyson int ioflags; 1070151951Sps int ppscheck = 0; 1071151951Sps static struct timeval lastfail; 1072151951Sps static int curfail; 10731549Srgrimes 107433847Smsmith object = vp->v_object; 107533847Smsmith count = bytecount / PAGE_SIZE; 107633847Smsmith 10771827Sdg for (i = 0; i < count; i++) 1078222586Skib rtvals[i] = VM_PAGER_ERROR; 10791549Srgrimes 1080208574Salc if ((int64_t)ma[0]->pindex < 0) { 1081119544Smarcel printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n", 1082208574Salc (long)ma[0]->pindex, (u_long)ma[0]->dirty); 10837695Sdg rtvals[0] = VM_PAGER_BAD; 10847695Sdg return VM_PAGER_BAD; 10855455Sdg } 10867178Sdg 10877695Sdg maxsize = count * PAGE_SIZE; 10887695Sdg ncount = count; 10891549Srgrimes 1090208574Salc poffset = IDX_TO_OFF(ma[0]->pindex); 109184854Sdillon 109284854Sdillon /* 109384854Sdillon * If the page-aligned write is larger then the actual file we 109484854Sdillon * have to invalidate pages occuring beyond the file EOF. However, 109584854Sdillon * there is an edge case where a file may not be page-aligned where 109684854Sdillon * the last page is partially invalid. In this case the filesystem 109784854Sdillon * may not properly clear the dirty bits for the entire page (which 109884854Sdillon * could be VM_PAGE_BITS_ALL due to the page having been mmap()d). 109984854Sdillon * With the page locked we are free to fix-up the dirty bits here. 110087834Sdillon * 110187834Sdillon * We do not under any circumstances truncate the valid bits, as 110287834Sdillon * this will screw up bogus page replacement. 110384854Sdillon */ 1104208574Salc VM_OBJECT_LOCK(object); 110512767Sdyson if (maxsize + poffset > object->un_pager.vnp.vnp_size) { 110684854Sdillon if (object->un_pager.vnp.vnp_size > poffset) { 110784854Sdillon int pgoff; 110884854Sdillon 110912767Sdyson maxsize = object->un_pager.vnp.vnp_size - poffset; 111084854Sdillon ncount = btoc(maxsize); 111184854Sdillon if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { 1112208574Salc /* 1113208574Salc * If the object is locked and the following 1114208574Salc * conditions hold, then the page's dirty 1115208574Salc * field cannot be concurrently changed by a 1116208574Salc * pmap operation. 1117208574Salc */ 1118208574Salc m = ma[ncount - 1]; 1119208574Salc KASSERT(m->busy > 0, 1120208574Salc ("vnode_pager_generic_putpages: page %p is not busy", m)); 1121237168Salc KASSERT(!pmap_page_is_write_mapped(m), 1122208574Salc ("vnode_pager_generic_putpages: page %p is not read-only", m)); 1123208574Salc vm_page_clear_dirty(m, pgoff, PAGE_SIZE - 1124208574Salc pgoff); 112584854Sdillon } 112684854Sdillon } else { 11278585Sdg maxsize = 0; 112884854Sdillon ncount = 0; 112984854Sdillon } 11308585Sdg if (ncount < count) { 11318585Sdg for (i = ncount; i < count; i++) { 11327695Sdg rtvals[i] = VM_PAGER_BAD; 11331549Srgrimes } 11341549Srgrimes } 11351541Srgrimes } 1136208574Salc VM_OBJECT_UNLOCK(object); 11377695Sdg 113870374Sdillon /* 1139226366Sjhb * pageouts are already clustered, use IO_ASYNC to force a bawrite() 114070374Sdillon * rather then a bdwrite() to prevent paging I/O from saturating 1141108358Sdillon * the buffer cache. Dummy-up the sequential heuristic to cause 1142108358Sdillon * large ranges to cluster. If neither IO_SYNC or IO_ASYNC is set, 1143108358Sdillon * the system decides how to cluster. 114470374Sdillon */ 114534206Sdyson ioflags = IO_VMIO; 1146108358Sdillon if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) 1147108358Sdillon ioflags |= IO_SYNC; 1148108358Sdillon else if ((flags & VM_PAGER_CLUSTER_OK) == 0) 1149108358Sdillon ioflags |= IO_ASYNC; 115034206Sdyson ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0; 1151108358Sdillon ioflags |= IO_SEQMAX << IO_SEQSHIFT; 11521827Sdg 11537695Sdg aiov.iov_base = (caddr_t) 0; 11547695Sdg aiov.iov_len = maxsize; 11557695Sdg auio.uio_iov = &aiov; 11567695Sdg auio.uio_iovcnt = 1; 115712767Sdyson auio.uio_offset = poffset; 11587695Sdg auio.uio_segflg = UIO_NOCOPY; 11597695Sdg auio.uio_rw = UIO_WRITE; 11607695Sdg auio.uio_resid = maxsize; 116183366Sjulian auio.uio_td = (struct thread *) 0; 116291406Sjhb error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred); 1163170292Sattilio PCPU_INC(cnt.v_vnodeout); 1164170292Sattilio PCPU_ADD(cnt.v_vnodepgsout, ncount); 11653612Sdg 11668585Sdg if (error) { 1167151951Sps if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1))) 1168151951Sps printf("vnode_pager_putpages: I/O error %d\n", error); 11697695Sdg } 11708585Sdg if (auio.uio_resid) { 1171151951Sps if (ppscheck || ppsratecheck(&lastfail, &curfail, 1)) 1172194990Skib printf("vnode_pager_putpages: residual I/O %zd at %lu\n", 1173208574Salc auio.uio_resid, (u_long)ma[0]->pindex); 11747695Sdg } 117533936Sdyson for (i = 0; i < ncount; i++) { 117633936Sdyson rtvals[i] = VM_PAGER_OK; 11777695Sdg } 11787695Sdg return rtvals[0]; 11797695Sdg} 1180222586Skib 1181222586Skibvoid 1182222586Skibvnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written) 1183222586Skib{ 1184222991Skib vm_object_t obj; 1185222586Skib int i, pos; 1186222586Skib 1187222991Skib if (written == 0) 1188222991Skib return; 1189222991Skib obj = ma[0]->object; 1190222991Skib VM_OBJECT_LOCK(obj); 1191222586Skib for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) { 1192222586Skib if (pos < trunc_page(written)) { 1193222586Skib rtvals[i] = VM_PAGER_OK; 1194222586Skib vm_page_undirty(ma[i]); 1195222586Skib } else { 1196222586Skib /* Partially written page. */ 1197222586Skib rtvals[i] = VM_PAGER_AGAIN; 1198222586Skib vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK); 1199222586Skib } 1200222586Skib } 1201222991Skib VM_OBJECT_UNLOCK(obj); 1202222586Skib} 1203232071Skib 1204232071Skibvoid 1205232071Skibvnode_pager_update_writecount(vm_object_t object, vm_offset_t start, 1206232071Skib vm_offset_t end) 1207232071Skib{ 1208232071Skib struct vnode *vp; 1209232071Skib vm_ooffset_t old_wm; 1210232071Skib 1211232071Skib VM_OBJECT_LOCK(object); 1212232071Skib if (object->type != OBJT_VNODE) { 1213232071Skib VM_OBJECT_UNLOCK(object); 1214232071Skib return; 1215232071Skib } 1216232071Skib old_wm = object->un_pager.vnp.writemappings; 1217232071Skib object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start; 1218232071Skib vp = object->handle; 1219232071Skib if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) { 1220232071Skib ASSERT_VOP_ELOCKED(vp, "v_writecount inc"); 1221232071Skib vp->v_writecount++; 1222232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", 1223232701Sjhb __func__, vp, vp->v_writecount); 1224232071Skib } else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) { 1225232071Skib ASSERT_VOP_ELOCKED(vp, "v_writecount dec"); 1226232071Skib vp->v_writecount--; 1227232701Sjhb CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", 1228232701Sjhb __func__, vp, vp->v_writecount); 1229232071Skib } 1230232071Skib VM_OBJECT_UNLOCK(object); 1231232071Skib} 1232232071Skib 1233232071Skibvoid 1234232071Skibvnode_pager_release_writecount(vm_object_t object, vm_offset_t start, 1235232071Skib vm_offset_t end) 1236232071Skib{ 1237232071Skib struct vnode *vp; 1238232071Skib struct mount *mp; 1239232071Skib vm_offset_t inc; 1240232071Skib int vfslocked; 1241232071Skib 1242232071Skib VM_OBJECT_LOCK(object); 1243232071Skib 1244232071Skib /* 1245232071Skib * First, recheck the object type to account for the race when 1246232071Skib * the vnode is reclaimed. 1247232071Skib */ 1248232071Skib if (object->type != OBJT_VNODE) { 1249232071Skib VM_OBJECT_UNLOCK(object); 1250232071Skib return; 1251232071Skib } 1252232071Skib 1253232071Skib /* 1254232071Skib * Optimize for the case when writemappings is not going to 1255232071Skib * zero. 1256232071Skib */ 1257232071Skib inc = end - start; 1258232071Skib if (object->un_pager.vnp.writemappings != inc) { 1259232071Skib object->un_pager.vnp.writemappings -= inc; 1260232071Skib VM_OBJECT_UNLOCK(object); 1261232071Skib return; 1262232071Skib } 1263232071Skib 1264232071Skib vp = object->handle; 1265232071Skib vhold(vp); 1266232071Skib VM_OBJECT_UNLOCK(object); 1267232071Skib vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1268232071Skib mp = NULL; 1269232071Skib vn_start_write(vp, &mp, V_WAIT); 1270232071Skib vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1271232071Skib 1272232071Skib /* 1273232071Skib * Decrement the object's writemappings, by swapping the start 1274232071Skib * and end arguments for vnode_pager_update_writecount(). If 1275232071Skib * there was not a race with vnode reclaimation, then the 1276232071Skib * vnode's v_writecount is decremented. 1277232071Skib */ 1278232071Skib vnode_pager_update_writecount(object, end, start); 1279232071Skib VOP_UNLOCK(vp, 0); 1280232071Skib vdrop(vp); 1281232071Skib if (mp != NULL) 1282232071Skib vn_finished_write(mp); 1283232071Skib VFS_UNLOCK_GIANT(vfslocked); 1284232071Skib} 1285