vnode_pager.c revision 222991
1139825Simp/*-
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
2158705Scharnier *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes */
461541Srgrimes
471549Srgrimes/*
481549Srgrimes * TODO:
499507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
507695Sdg *	greatly re-simplify the vnode_pager.
511549Srgrimes */
521549Srgrimes
53116226Sobrien#include <sys/cdefs.h>
54116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vnode_pager.c 222991 2011-06-11 20:13:28Z kib $");
55116226Sobrien
561541Srgrimes#include <sys/param.h>
571541Srgrimes#include <sys/systm.h>
581541Srgrimes#include <sys/proc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/mount.h>
6160041Sphk#include <sys/bio.h>
629507Sdg#include <sys/buf.h>
6312662Sdg#include <sys/vmmeter.h>
64140767Sphk#include <sys/limits.h>
6551340Sdillon#include <sys/conf.h>
66127926Salc#include <sys/sf_buf.h>
671541Srgrimes
68148875Sssouhlal#include <machine/atomic.h>
69148875Sssouhlal
701541Srgrimes#include <vm/vm.h>
7112662Sdg#include <vm/vm_object.h>
721541Srgrimes#include <vm/vm_page.h>
739507Sdg#include <vm/vm_pager.h>
7431853Sdyson#include <vm/vm_map.h>
751541Srgrimes#include <vm/vnode_pager.h>
7612662Sdg#include <vm/vm_extern.h>
771541Srgrimes
78163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
79163359Salc    daddr_t *rtaddress, int *run);
8092727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
8192727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m);
8292727Salfredstatic void vnode_pager_dealloc(vm_object_t);
8392727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
8492727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
8592727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
86194766Skibstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
87194766Skib    vm_ooffset_t, struct ucred *cred);
8811943Sbde
891541Srgrimesstruct pagerops vnodepagerops = {
90118466Sphk	.pgo_alloc =	vnode_pager_alloc,
91118466Sphk	.pgo_dealloc =	vnode_pager_dealloc,
92118466Sphk	.pgo_getpages =	vnode_pager_getpages,
93118466Sphk	.pgo_putpages =	vnode_pager_putpages,
94118466Sphk	.pgo_haspage =	vnode_pager_haspage,
951541Srgrimes};
961541Srgrimes
9779127Sjhbint vnode_pbuf_freecnt;
9810556Sdyson
99140767Sphk/* Create the VM system backing object for this vnode */
100140767Sphkint
101155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
102140767Sphk{
103140767Sphk	vm_object_t object;
104140767Sphk	vm_ooffset_t size = isize;
105140767Sphk	struct vattr va;
106140767Sphk
107140767Sphk	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
108140767Sphk		return (0);
109140767Sphk
110140767Sphk	while ((object = vp->v_object) != NULL) {
111140767Sphk		VM_OBJECT_LOCK(object);
112140767Sphk		if (!(object->flags & OBJ_DEAD)) {
113140767Sphk			VM_OBJECT_UNLOCK(object);
114140767Sphk			return (0);
115140767Sphk		}
116175294Sattilio		VOP_UNLOCK(vp, 0);
117140767Sphk		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
118140767Sphk		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0);
119175202Sattilio		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
120140767Sphk	}
121140767Sphk
122140767Sphk	if (size == 0) {
123140767Sphk		if (vn_isdisk(vp, NULL)) {
124140767Sphk			size = IDX_TO_OFF(INT_MAX);
125140767Sphk		} else {
126182371Sattilio			if (VOP_GETATTR(vp, &va, td->td_ucred))
127140767Sphk				return (0);
128140767Sphk			size = va.va_size;
129140767Sphk		}
130140767Sphk	}
131140767Sphk
132194766Skib	object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred);
133140767Sphk	/*
134140767Sphk	 * Dereference the reference we just created.  This assumes
135140767Sphk	 * that the object is associated with the vp.
136140767Sphk	 */
137140767Sphk	VM_OBJECT_LOCK(object);
138140767Sphk	object->ref_count--;
139140767Sphk	VM_OBJECT_UNLOCK(object);
140140767Sphk	vrele(vp);
141140767Sphk
142140767Sphk	KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object"));
143140767Sphk
144140767Sphk	return (0);
145140767Sphk}
146140767Sphk
147140929Sphkvoid
148140929Sphkvnode_destroy_vobject(struct vnode *vp)
149140929Sphk{
150140929Sphk	struct vm_object *obj;
151140929Sphk
152140929Sphk	obj = vp->v_object;
153140929Sphk	if (obj == NULL)
154140929Sphk		return;
155171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject");
156140929Sphk	VM_OBJECT_LOCK(obj);
157140929Sphk	if (obj->ref_count == 0) {
158140929Sphk		/*
159140929Sphk		 * vclean() may be called twice. The first time
160140929Sphk		 * removes the primary reference to the object,
161140929Sphk		 * the second time goes one further and is a
162140929Sphk		 * special-case to terminate the object.
163140929Sphk		 *
164140929Sphk		 * don't double-terminate the object
165140929Sphk		 */
166140929Sphk		if ((obj->flags & OBJ_DEAD) == 0)
167140929Sphk			vm_object_terminate(obj);
168140929Sphk		else
169140929Sphk			VM_OBJECT_UNLOCK(obj);
170140929Sphk	} else {
171140929Sphk		/*
172140929Sphk		 * Woe to the process that tries to page now :-).
173140929Sphk		 */
174140929Sphk		vm_pager_deallocate(obj);
175140929Sphk		VM_OBJECT_UNLOCK(obj);
176140929Sphk	}
177144610Sjeff	vp->v_object = NULL;
178140929Sphk}
179140929Sphk
180140929Sphk
1811541Srgrimes/*
1821541Srgrimes * Allocate (or lookup) pager for a vnode.
1831541Srgrimes * Handle is a vnode pointer.
18498604Salc *
18598604Salc * MPSAFE
1861541Srgrimes */
1879507Sdgvm_object_t
18840286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
189194766Skib    vm_ooffset_t offset, struct ucred *cred)
1901541Srgrimes{
1919456Sdg	vm_object_t object;
1921541Srgrimes	struct vnode *vp;
1931541Srgrimes
1941541Srgrimes	/*
1951541Srgrimes	 * Pageout to vnode, no can do yet.
1961541Srgrimes	 */
1971541Srgrimes	if (handle == NULL)
1981827Sdg		return (NULL);
1991541Srgrimes
2009411Sdg	vp = (struct vnode *) handle;
2019411Sdg
2021541Srgrimes	/*
2039411Sdg	 * If the object is being terminated, wait for it to
2049411Sdg	 * go away.
2059411Sdg	 */
206179159Supsretry:
207114074Salc	while ((object = vp->v_object) != NULL) {
208114074Salc		VM_OBJECT_LOCK(object);
209181020Sjhb		if ((object->flags & OBJ_DEAD) == 0)
210114074Salc			break;
211137297Salc		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
212114074Salc		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0);
2139507Sdg	}
2145455Sdg
21532071Sdyson	if (vp->v_usecount == 0)
21632071Sdyson		panic("vnode_pager_alloc: no vnode reference");
21732071Sdyson
2189507Sdg	if (object == NULL) {
2191541Srgrimes		/*
220179159Sups		 * Add an object of the appropriate size
2211541Srgrimes		 */
22240286Sdg		object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
2231827Sdg
22440286Sdg		object->un_pager.vnp.vnp_size = size;
2251549Srgrimes
2269507Sdg		object->handle = handle;
227179765Sups		VI_LOCK(vp);
228179765Sups		if (vp->v_object != NULL) {
229179159Sups			/*
230179159Sups			 * Object has been created while we were sleeping
231179159Sups			 */
232179765Sups			VI_UNLOCK(vp);
233179159Sups			vm_object_destroy(object);
234179159Sups			goto retry;
235179159Sups		}
2369507Sdg		vp->v_object = object;
237179765Sups		VI_UNLOCK(vp);
238179765Sups	} else {
23932286Sdyson		object->ref_count++;
240179765Sups		VM_OBJECT_UNLOCK(object);
241179765Sups	}
242143559Sjeff	vref(vp);
2439507Sdg	return (object);
2441541Srgrimes}
2451541Srgrimes
246114774Salc/*
247114774Salc *	The object must be locked.
248114774Salc */
24912820Sphkstatic void
2509507Sdgvnode_pager_dealloc(object)
2519507Sdg	vm_object_t object;
2521541Srgrimes{
253202529Skib	struct vnode *vp;
254202529Skib	int refs;
2551541Srgrimes
256202529Skib	vp = object->handle;
2579507Sdg	if (vp == NULL)
2589507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
2599507Sdg
260114774Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
26133817Sdyson	vm_object_pip_wait(object, "vnpdea");
262202529Skib	refs = object->ref_count;
2631541Srgrimes
2649507Sdg	object->handle = NULL;
26533109Sdyson	object->type = OBJT_DEAD;
266137297Salc	if (object->flags & OBJ_DISCONNECTWNT) {
267137297Salc		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
268137297Salc		wakeup(object);
269137297Salc	}
270171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
2719507Sdg	vp->v_object = NULL;
272140734Sphk	vp->v_vflag &= ~VV_TEXT;
273202529Skib	while (refs-- > 0)
274202529Skib		vunref(vp);
2751549Srgrimes}
2761541Srgrimes
27712820Sphkstatic boolean_t
27812767Sdysonvnode_pager_haspage(object, pindex, before, after)
2799507Sdg	vm_object_t object;
28012767Sdyson	vm_pindex_t pindex;
2819507Sdg	int *before;
2829507Sdg	int *after;
2831541Srgrimes{
2849507Sdg	struct vnode *vp = object->handle;
28596572Sphk	daddr_t bn;
28612423Sphk	int err;
28710556Sdyson	daddr_t reqblock;
28811701Sdyson	int poff;
28911701Sdyson	int bsize;
29012914Sdyson	int pagesperblock, blocksperpage;
291140723Sjeff	int vfslocked;
2921541Srgrimes
293116695Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
29451340Sdillon	/*
29551340Sdillon	 * If no vp or vp is doomed or marked transparent to VM, we do not
29651340Sdillon	 * have the page.
29751340Sdillon	 */
298155384Sjeff	if (vp == NULL || vp->v_iflag & VI_DOOMED)
29932585Sdyson		return FALSE;
3001541Srgrimes	/*
301155384Sjeff	 * If the offset is beyond end of file we do
3025455Sdg	 * not have the page.
3031541Srgrimes	 */
304155384Sjeff	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
3054797Sdg		return FALSE;
3061541Srgrimes
30711576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
30810556Sdyson	pagesperblock = bsize / PAGE_SIZE;
30912914Sdyson	blocksperpage = 0;
31012914Sdyson	if (pagesperblock > 0) {
31112914Sdyson		reqblock = pindex / pagesperblock;
31212914Sdyson	} else {
31312914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
31412914Sdyson		reqblock = pindex * blocksperpage;
31512914Sdyson	}
316116695Salc	VM_OBJECT_UNLOCK(object);
317140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
318119045Sphk	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
319140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
320116695Salc	VM_OBJECT_LOCK(object);
3218876Srgrimes	if (err)
3229507Sdg		return TRUE;
32392029Seivind	if (bn == -1)
32410576Sdyson		return FALSE;
32512914Sdyson	if (pagesperblock > 0) {
32612914Sdyson		poff = pindex - (reqblock * pagesperblock);
32712914Sdyson		if (before) {
32812914Sdyson			*before *= pagesperblock;
32912914Sdyson			*before += poff;
33010669Sdyson		}
33112914Sdyson		if (after) {
33212914Sdyson			int numafter;
33312914Sdyson			*after *= pagesperblock;
33412914Sdyson			numafter = pagesperblock - (poff + 1);
33599211Srobert			if (IDX_TO_OFF(pindex + numafter) >
33699211Srobert			    object->un_pager.vnp.vnp_size) {
33799211Srobert				numafter =
33899211Srobert		    		    OFF_TO_IDX(object->un_pager.vnp.vnp_size) -
33999211Srobert				    pindex;
34012914Sdyson			}
34112914Sdyson			*after += numafter;
34212914Sdyson		}
34312914Sdyson	} else {
34412914Sdyson		if (before) {
34512914Sdyson			*before /= blocksperpage;
34612914Sdyson		}
34712914Sdyson
34812914Sdyson		if (after) {
34912914Sdyson			*after /= blocksperpage;
35012914Sdyson		}
35110556Sdyson	}
35210576Sdyson	return TRUE;
3531541Srgrimes}
3541541Srgrimes
3551541Srgrimes/*
3561541Srgrimes * Lets the VM system know about a change in size for a file.
3579507Sdg * We adjust our own internal size and flush any cached pages in
3581541Srgrimes * the associated object that are affected by the size change.
3591541Srgrimes *
3601541Srgrimes * Note: this routine may be invoked as a result of a pager put
3611541Srgrimes * operation (possibly at object termination time), so we must be careful.
3621541Srgrimes */
3631541Srgrimesvoid
3641541Srgrimesvnode_pager_setsize(vp, nsize)
3651541Srgrimes	struct vnode *vp;
36612767Sdyson	vm_ooffset_t nsize;
3671541Srgrimes{
368116167Salc	vm_object_t object;
369116167Salc	vm_page_t m;
37038542Sluoqi	vm_pindex_t nobjsize;
3711541Srgrimes
372116167Salc	if ((object = vp->v_object) == NULL)
3731541Srgrimes		return;
374188386Skib/* 	ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */
375116167Salc	VM_OBJECT_LOCK(object);
376116167Salc	if (nsize == object->un_pager.vnp.vnp_size) {
377116167Salc		/*
378116167Salc		 * Hasn't changed size
379116167Salc		 */
380116167Salc		VM_OBJECT_UNLOCK(object);
3813374Sdg		return;
382116167Salc	}
38338542Sluoqi	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
3849507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
385116167Salc		/*
386116167Salc		 * File has shrunk. Toss any cached pages beyond the new EOF.
387116167Salc		 */
388116167Salc		if (nobjsize < object->size)
38938542Sluoqi			vm_object_page_remove(object, nobjsize, object->size,
390116167Salc			    FALSE);
3911827Sdg		/*
3921827Sdg		 * this gets rid of garbage at the end of a page that is now
39387834Sdillon		 * only partially backed by the vnode.
39487834Sdillon		 *
39587834Sdillon		 * XXX for some reason (I don't know yet), if we take a
39687834Sdillon		 * completely invalid page and mark it partially valid
39787834Sdillon		 * it can screw up NFS reads, so we don't allow the case.
3981827Sdg		 */
399116167Salc		if ((nsize & PAGE_MASK) &&
400121230Salc		    (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL &&
401121230Salc		    m->valid != 0) {
402121230Salc			int base = (int)nsize & PAGE_MASK;
403121230Salc			int size = PAGE_SIZE - base;
40470374Sdillon
405121230Salc			/*
406121230Salc			 * Clear out partial-page garbage in case
407121230Salc			 * the page has been mapped.
408121230Salc			 */
409121230Salc			pmap_zero_page_area(m, base, size);
41070374Sdillon
411121230Salc			/*
412193303Salc			 * Update the valid bits to reflect the blocks that
413193303Salc			 * have been zeroed.  Some of these valid bits may
414193303Salc			 * have already been set.
415193303Salc			 */
416193303Salc			vm_page_set_valid(m, base, size);
417193303Salc
418193303Salc			/*
419193303Salc			 * Round "base" to the next block boundary so that the
420193303Salc			 * dirty bit for a partially zeroed block is not
421193303Salc			 * cleared.
422193303Salc			 */
423193303Salc			base = roundup2(base, DEV_BSIZE);
424193303Salc
425193303Salc			/*
426193303Salc			 * Clear out partial-page dirty bits.
427121230Salc			 *
428121230Salc			 * note that we do not clear out the valid
429121230Salc			 * bits.  This would prevent bogus_page
430121230Salc			 * replacement from working properly.
431121230Salc			 */
432193303Salc			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
433172875Salc		} else if ((nsize & PAGE_MASK) &&
434172875Salc		    __predict_false(object->cache != NULL)) {
435172875Salc			vm_page_cache_free(object, OFF_TO_IDX(nsize),
436172875Salc			    nobjsize);
4371827Sdg		}
4381541Srgrimes	}
43912767Sdyson	object->un_pager.vnp.vnp_size = nsize;
44038542Sluoqi	object->size = nobjsize;
441116167Salc	VM_OBJECT_UNLOCK(object);
4421541Srgrimes}
4431541Srgrimes
4441549Srgrimes/*
4451549Srgrimes * calculate the linear (byte) disk address of specified virtual
4461549Srgrimes * file address
4471549Srgrimes */
448163359Salcstatic int
449163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress,
450163359Salc    int *run)
4511549Srgrimes{
4525455Sdg	int bsize;
4535455Sdg	int err;
45412767Sdyson	daddr_t vblock;
455146340Sbz	daddr_t voffset;
4561549Srgrimes
457138531Salc	if (address < 0)
4585455Sdg		return -1;
4595455Sdg
460155384Sjeff	if (vp->v_iflag & VI_DOOMED)
46111701Sdyson		return -1;
46211701Sdyson
4631549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4641549Srgrimes	vblock = address / bsize;
4651549Srgrimes	voffset = address % bsize;
4661549Srgrimes
467163359Salc	err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL);
468163359Salc	if (err == 0) {
469163359Salc		if (*rtaddress != -1)
470163359Salc			*rtaddress += voffset / DEV_BSIZE;
47192029Seivind		if (run) {
4726151Sdg			*run += 1;
4736151Sdg			*run *= bsize/PAGE_SIZE;
4746151Sdg			*run -= voffset/PAGE_SIZE;
4756151Sdg		}
4766151Sdg	}
4771549Srgrimes
478163359Salc	return (err);
4791549Srgrimes}
4801549Srgrimes
4811549Srgrimes/*
48296755Strhodes * small block filesystem vnode pager input
4831549Srgrimes */
48412820Sphkstatic int
4859507Sdgvnode_pager_input_smlfs(object, m)
4869507Sdg	vm_object_t object;
4871549Srgrimes	vm_page_t m;
4881549Srgrimes{
489191935Salc	int bits, i;
490137726Sphk	struct vnode *vp;
491137726Sphk	struct bufobj *bo;
4921549Srgrimes	struct buf *bp;
493127926Salc	struct sf_buf *sf;
494146340Sbz	daddr_t fileaddr;
4951549Srgrimes	vm_offset_t bsize;
4965455Sdg	int error = 0;
4971549Srgrimes
4989507Sdg	vp = object->handle;
499155384Sjeff	if (vp->v_iflag & VI_DOOMED)
50011701Sdyson		return VM_PAGER_BAD;
50111701Sdyson
5021549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5031549Srgrimes
504137726Sphk	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);
5051549Srgrimes
506127926Salc	sf = sf_buf_alloc(m, 0);
5071549Srgrimes
5081827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
50986092Sdillon		vm_ooffset_t address;
5101827Sdg
511191935Salc		bits = vm_page_bits(i * bsize, bsize);
512191935Salc		if (m->valid & bits)
5135455Sdg			continue;
5141549Srgrimes
51586092Sdillon		address = IDX_TO_OFF(m->pindex) + i * bsize;
51686092Sdillon		if (address >= object->un_pager.vnp.vnp_size) {
51786092Sdillon			fileaddr = -1;
51886092Sdillon		} else {
519163359Salc			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
520163359Salc			if (error)
521163359Salc				break;
52286092Sdillon		}
5231827Sdg		if (fileaddr != -1) {
52442957Sdillon			bp = getpbuf(&vnode_pbuf_freecnt);
5251549Srgrimes
5261827Sdg			/* build a minimal buffer header */
52758345Sphk			bp->b_iocmd = BIO_READ;
528119092Sphk			bp->b_iodone = bdone;
52984827Sjhb			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
53084827Sjhb			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
53191406Sjhb			bp->b_rcred = crhold(curthread->td_ucred);
53291406Sjhb			bp->b_wcred = crhold(curthread->td_ucred);
533127926Salc			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
5346626Sdg			bp->b_blkno = fileaddr;
535137726Sphk			pbgetbo(bo, bp);
5361549Srgrimes			bp->b_bcount = bsize;
5371549Srgrimes			bp->b_bufsize = bsize;
53870374Sdillon			bp->b_runningbufspace = bp->b_bufsize;
539189595Sjhb			atomic_add_long(&runningbufspace, bp->b_runningbufspace);
5401827Sdg
5411827Sdg			/* do the input */
542121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
543136927Sphk			bstrategy(bp);
5441549Srgrimes
545119092Sphk			bwait(bp, PVM, "vnsrd");
546119092Sphk
54758934Sphk			if ((bp->b_ioflags & BIO_ERROR) != 0)
5481549Srgrimes				error = EIO;
5491549Srgrimes
5501827Sdg			/*
5511827Sdg			 * free the buffer header back to the swap buffer pool
5521827Sdg			 */
553137726Sphk			pbrelbo(bp);
55442957Sdillon			relpbuf(bp, &vnode_pbuf_freecnt);
5551827Sdg			if (error)
5561549Srgrimes				break;
557191935Salc		} else
558127926Salc			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
559191935Salc		KASSERT((m->dirty & bits) == 0,
560191935Salc		    ("vnode_pager_input_smlfs: page %p is dirty", m));
561191935Salc		VM_OBJECT_LOCK(object);
562191935Salc		m->valid |= bits;
563191935Salc		VM_OBJECT_UNLOCK(object);
5641549Srgrimes	}
565127926Salc	sf_buf_free(sf);
5661827Sdg	if (error) {
5674207Sdg		return VM_PAGER_ERROR;
5681549Srgrimes	}
5691549Srgrimes	return VM_PAGER_OK;
5701549Srgrimes}
5711549Srgrimes
5721549Srgrimes/*
573139296Sphk * old style vnode pager input routine
5741549Srgrimes */
57512820Sphkstatic int
5769507Sdgvnode_pager_input_old(object, m)
5779507Sdg	vm_object_t object;
5781549Srgrimes	vm_page_t m;
5791549Srgrimes{
5801541Srgrimes	struct uio auio;
5811541Srgrimes	struct iovec aiov;
5825455Sdg	int error;
5835455Sdg	int size;
584127926Salc	struct sf_buf *sf;
58577398Sjhb	struct vnode *vp;
5861549Srgrimes
587121495Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
5881549Srgrimes	error = 0;
5891827Sdg
5901549Srgrimes	/*
5911549Srgrimes	 * Return failure if beyond current EOF
5921549Srgrimes	 */
59312767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
5941549Srgrimes		return VM_PAGER_BAD;
5951549Srgrimes	} else {
5961549Srgrimes		size = PAGE_SIZE;
59712767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
59812767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
599121495Salc		vp = object->handle;
600121495Salc		VM_OBJECT_UNLOCK(object);
6017178Sdg
6025455Sdg		/*
6035455Sdg		 * Allocate a kernel virtual address and initialize so that
6045455Sdg		 * we can use VOP_READ/WRITE routines.
6055455Sdg		 */
606127926Salc		sf = sf_buf_alloc(m, 0);
6077178Sdg
608127926Salc		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
6091549Srgrimes		aiov.iov_len = size;
6101549Srgrimes		auio.uio_iov = &aiov;
6111549Srgrimes		auio.uio_iovcnt = 1;
61212767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
6131549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
6141549Srgrimes		auio.uio_rw = UIO_READ;
6151549Srgrimes		auio.uio_resid = size;
61683366Sjulian		auio.uio_td = curthread;
6171549Srgrimes
61891406Sjhb		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
6191549Srgrimes		if (!error) {
62079242Sdillon			int count = size - auio.uio_resid;
6211549Srgrimes
6221549Srgrimes			if (count == 0)
6231549Srgrimes				error = EINVAL;
6241549Srgrimes			else if (count != PAGE_SIZE)
625127926Salc				bzero((caddr_t)sf_buf_kva(sf) + count,
626127926Salc				    PAGE_SIZE - count);
6271549Srgrimes		}
628127926Salc		sf_buf_free(sf);
629121230Salc
630121230Salc		VM_OBJECT_LOCK(object);
6311549Srgrimes	}
632191935Salc	KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m));
63339739Srvb	if (!error)
63439739Srvb		m->valid = VM_PAGE_BITS_ALL;
6354207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6361549Srgrimes}
6371549Srgrimes
6381549Srgrimes/*
6391549Srgrimes * generic vnode pager input routine
6401549Srgrimes */
64110556Sdyson
64233847Smsmith/*
64376827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES
64499211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages()
64599211Srobert * to implement the previous behaviour.
64633847Smsmith *
64733847Smsmith * All other FS's should use the bypass to get to the local media
64833847Smsmith * backing vp's VOP_GETPAGES.
64933847Smsmith */
65012820Sphkstatic int
6519507Sdgvnode_pager_getpages(object, m, count, reqpage)
6529507Sdg	vm_object_t object;
6531549Srgrimes	vm_page_t *m;
6549507Sdg	int count;
6559507Sdg	int reqpage;
6561549Srgrimes{
65710556Sdyson	int rtval;
65810556Sdyson	struct vnode *vp;
65934403Smsmith	int bytes = count * PAGE_SIZE;
660140723Sjeff	int vfslocked;
66132286Sdyson
66210556Sdyson	vp = object->handle;
663116279Salc	VM_OBJECT_UNLOCK(object);
664140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
66534403Smsmith	rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
66676827Salfred	KASSERT(rtval != EOPNOTSUPP,
66776827Salfred	    ("vnode_pager: FS getpages not implemented\n"));
668140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
669116279Salc	VM_OBJECT_LOCK(object);
67033847Smsmith	return rtval;
67110556Sdyson}
67210556Sdyson
67333847Smsmith/*
67433847Smsmith * This is now called from local media FS's to operate against their
67533847Smsmith * own vnodes if they fail to implement VOP_GETPAGES.
67633847Smsmith */
67733847Smsmithint
67833847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage)
67933847Smsmith	struct vnode *vp;
68010556Sdyson	vm_page_t *m;
68133847Smsmith	int bytecount;
68210556Sdyson	int reqpage;
68310556Sdyson{
68433847Smsmith	vm_object_t object;
68512767Sdyson	vm_offset_t kva;
68634206Sdyson	off_t foff, tfoff, nextoff;
687146340Sbz	int i, j, size, bsize, first;
688163140Salc	daddr_t firstaddr, reqblock;
689137726Sphk	struct bufobj *bo;
6906151Sdg	int runpg;
6916151Sdg	int runend;
6927178Sdg	struct buf *bp;
69333847Smsmith	int count;
694163210Salc	int error;
6951549Srgrimes
69633847Smsmith	object = vp->v_object;
69733847Smsmith	count = bytecount / PAGE_SIZE;
69833847Smsmith
699137726Sphk	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
700137726Sphk	    ("vnode_pager_generic_getpages does not support devices"));
701155384Sjeff	if (vp->v_iflag & VI_DOOMED)
70211701Sdyson		return VM_PAGER_BAD;
70311701Sdyson
7041549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7051549Srgrimes
7061549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7071827Sdg
7081549Srgrimes	/*
7091827Sdg	 * originally, we did not check for an error return value -- assuming
7101827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7111549Srgrimes	 */
71212767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
7131827Sdg
7141549Srgrimes	/*
7151887Sdg	 * if we can't bmap, use old VOP code
7161549Srgrimes	 */
717163210Salc	error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL);
718163210Salc	if (error == EOPNOTSUPP) {
719116512Salc		VM_OBJECT_LOCK(object);
720207410Skmacy
721100832Salc		for (i = 0; i < count; i++)
722207410Skmacy			if (i != reqpage) {
723207410Skmacy				vm_page_lock(m[i]);
72475692Salfred				vm_page_free(m[i]);
725207410Skmacy				vm_page_unlock(m[i]);
726207410Skmacy			}
727170292Sattilio		PCPU_INC(cnt.v_vnodein);
728170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
729121495Salc		error = vnode_pager_input_old(object, m[reqpage]);
730121495Salc		VM_OBJECT_UNLOCK(object);
731121495Salc		return (error);
732163210Salc	} else if (error != 0) {
733163210Salc		VM_OBJECT_LOCK(object);
734163210Salc		for (i = 0; i < count; i++)
735207410Skmacy			if (i != reqpage) {
736207410Skmacy				vm_page_lock(m[i]);
737163210Salc				vm_page_free(m[i]);
738207410Skmacy				vm_page_unlock(m[i]);
739207410Skmacy			}
740163210Salc		VM_OBJECT_UNLOCK(object);
741163210Salc		return (VM_PAGER_ERROR);
7421549Srgrimes
7431827Sdg		/*
7441827Sdg		 * if the blocksize is smaller than a page size, then use
7451827Sdg		 * special small filesystem code.  NFS sometimes has a small
7461827Sdg		 * blocksize, but it can handle large reads itself.
7471827Sdg		 */
7481827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
74938866Sbde	    (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
750116512Salc		VM_OBJECT_LOCK(object);
751100832Salc		for (i = 0; i < count; i++)
752207410Skmacy			if (i != reqpage) {
753207410Skmacy				vm_page_lock(m[i]);
75475692Salfred				vm_page_free(m[i]);
755207410Skmacy				vm_page_unlock(m[i]);
756207410Skmacy			}
757116512Salc		VM_OBJECT_UNLOCK(object);
758170292Sattilio		PCPU_INC(cnt.v_vnodein);
759170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
7609507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
7611549Srgrimes	}
76245347Sjulian
7631549Srgrimes	/*
76445347Sjulian	 * If we have a completely valid page available to us, we can
76545347Sjulian	 * clean up and return.  Otherwise we have to re-read the
76645347Sjulian	 * media.
7671549Srgrimes	 */
768121227Salc	VM_OBJECT_LOCK(object);
76945347Sjulian	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
770100832Salc		for (i = 0; i < count; i++)
771207410Skmacy			if (i != reqpage) {
772207410Skmacy				vm_page_lock(m[i]);
77375692Salfred				vm_page_free(m[i]);
774207410Skmacy				vm_page_unlock(m[i]);
775207410Skmacy			}
776116512Salc		VM_OBJECT_UNLOCK(object);
7775455Sdg		return VM_PAGER_OK;
778163140Salc	} else if (reqblock == -1) {
779163140Salc		pmap_zero_page(m[reqpage]);
780192010Salc		KASSERT(m[reqpage]->dirty == 0,
781192010Salc		    ("vnode_pager_generic_getpages: page %p is dirty", m));
782163140Salc		m[reqpage]->valid = VM_PAGE_BITS_ALL;
783163140Salc		for (i = 0; i < count; i++)
784207410Skmacy			if (i != reqpage) {
785207410Skmacy				vm_page_lock(m[i]);
786163140Salc				vm_page_free(m[i]);
787207410Skmacy				vm_page_unlock(m[i]);
788207410Skmacy			}
789163140Salc		VM_OBJECT_UNLOCK(object);
790163140Salc		return (VM_PAGER_OK);
7911549Srgrimes	}
79245347Sjulian	m[reqpage]->valid = 0;
793121227Salc	VM_OBJECT_UNLOCK(object);
7947178Sdg
7955455Sdg	/*
7965455Sdg	 * here on direct device I/O
7975455Sdg	 */
79892029Seivind	firstaddr = -1;
7991549Srgrimes
8001549Srgrimes	/*
8016151Sdg	 * calculate the run that includes the required page
8021549Srgrimes	 */
80392029Seivind	for (first = 0, i = 0; i < count; i = runend) {
804163359Salc		if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr,
805163359Salc		    &runpg) != 0) {
806163359Salc			VM_OBJECT_LOCK(object);
807163359Salc			for (; i < count; i++)
808207410Skmacy				if (i != reqpage) {
809207410Skmacy					vm_page_lock(m[i]);
810163359Salc					vm_page_free(m[i]);
811207410Skmacy					vm_page_unlock(m[i]);
812207410Skmacy				}
813163359Salc			VM_OBJECT_UNLOCK(object);
814163359Salc			return (VM_PAGER_ERROR);
815163359Salc		}
8166151Sdg		if (firstaddr == -1) {
817116512Salc			VM_OBJECT_LOCK(object);
8189507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
819146340Sbz				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx",
820146340Sbz				    (intmax_t)firstaddr, (uintmax_t)(foff >> 32),
821106603Smux				    (uintmax_t)foff,
822106603Smux				    (uintmax_t)
823106603Smux				    (object->un_pager.vnp.vnp_size >> 32),
824106603Smux				    (uintmax_t)object->un_pager.vnp.vnp_size);
8256151Sdg			}
826207410Skmacy			vm_page_lock(m[i]);
82775692Salfred			vm_page_free(m[i]);
828207410Skmacy			vm_page_unlock(m[i]);
829116512Salc			VM_OBJECT_UNLOCK(object);
8306151Sdg			runend = i + 1;
8316151Sdg			first = runend;
8326151Sdg			continue;
8331549Srgrimes		}
8346151Sdg		runend = i + runpg;
8359507Sdg		if (runend <= reqpage) {
836116512Salc			VM_OBJECT_LOCK(object);
837207410Skmacy			for (j = i; j < runend; j++) {
838207410Skmacy				vm_page_lock(m[j]);
83975692Salfred				vm_page_free(m[j]);
840207410Skmacy				vm_page_unlock(m[j]);
841207410Skmacy			}
842116512Salc			VM_OBJECT_UNLOCK(object);
8431549Srgrimes		} else {
8449507Sdg			if (runpg < (count - first)) {
845116512Salc				VM_OBJECT_LOCK(object);
846207410Skmacy				for (i = first + runpg; i < count; i++) {
847207410Skmacy					vm_page_lock(m[i]);
84875692Salfred					vm_page_free(m[i]);
849207410Skmacy					vm_page_unlock(m[i]);
850207410Skmacy				}
851116512Salc				VM_OBJECT_UNLOCK(object);
8526151Sdg				count = first + runpg;
8536151Sdg			}
8546151Sdg			break;
8551549Srgrimes		}
8566151Sdg		first = runend;
8571549Srgrimes	}
8581549Srgrimes
8591549Srgrimes	/*
8601827Sdg	 * the first and last page have been calculated now, move input pages
8611827Sdg	 * to be zero based...
8621549Srgrimes	 */
8631549Srgrimes	if (first != 0) {
864163361Salc		m += first;
8651549Srgrimes		count -= first;
8661549Srgrimes		reqpage -= first;
8671549Srgrimes	}
8686151Sdg
8691549Srgrimes	/*
8701549Srgrimes	 * calculate the file virtual address for the transfer
8711549Srgrimes	 */
87212767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
8731827Sdg
8741549Srgrimes	/*
8751549Srgrimes	 * calculate the size of the transfer
8761549Srgrimes	 */
8771549Srgrimes	size = count * PAGE_SIZE;
878134892Sphk	KASSERT(count > 0, ("zero count"));
8799507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
8809507Sdg		size = object->un_pager.vnp.vnp_size - foff;
881134892Sphk	KASSERT(size > 0, ("zero size"));
8821549Srgrimes
8831549Srgrimes	/*
88451340Sdillon	 * round up physical size for real devices.
8851549Srgrimes	 */
886137726Sphk	if (1) {
887137726Sphk		int secmask = bo->bo_bsize - 1;
888136977Sphk		KASSERT(secmask < PAGE_SIZE && secmask > 0,
889136977Sphk		    ("vnode_pager_generic_getpages: sector size %d too large",
890136977Sphk		    secmask + 1));
89151340Sdillon		size = (size + secmask) & ~secmask;
89251340Sdillon	}
8931549Srgrimes
89442957Sdillon	bp = getpbuf(&vnode_pbuf_freecnt);
8955455Sdg	kva = (vm_offset_t) bp->b_data;
8961887Sdg
8971549Srgrimes	/*
8981549Srgrimes	 * and map the pages to be read into the kva
8991549Srgrimes	 */
9001887Sdg	pmap_qenter(kva, m, count);
9011549Srgrimes
9021549Srgrimes	/* build a minimal buffer header */
90358345Sphk	bp->b_iocmd = BIO_READ;
904119092Sphk	bp->b_iodone = bdone;
90584827Sjhb	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
90684827Sjhb	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
90791406Sjhb	bp->b_rcred = crhold(curthread->td_ucred);
90891406Sjhb	bp->b_wcred = crhold(curthread->td_ucred);
9096626Sdg	bp->b_blkno = firstaddr;
910137726Sphk	pbgetbo(bo, bp);
9111549Srgrimes	bp->b_bcount = size;
9121549Srgrimes	bp->b_bufsize = size;
91370374Sdillon	bp->b_runningbufspace = bp->b_bufsize;
914189595Sjhb	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
9151549Srgrimes
916170292Sattilio	PCPU_INC(cnt.v_vnodein);
917170292Sattilio	PCPU_ADD(cnt.v_vnodepgsin, count);
9183612Sdg
9191549Srgrimes	/* do the input */
920121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
921136927Sphk	bstrategy(bp);
9223612Sdg
923119092Sphk	bwait(bp, PVM, "vnread");
9241549Srgrimes
92558934Sphk	if ((bp->b_ioflags & BIO_ERROR) != 0)
9261549Srgrimes		error = EIO;
9271549Srgrimes
9281549Srgrimes	if (!error) {
9291549Srgrimes		if (size != count * PAGE_SIZE)
9301827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
9311549Srgrimes	}
9325455Sdg	pmap_qremove(kva, count);
9331549Srgrimes
9341549Srgrimes	/*
9351549Srgrimes	 * free the buffer header back to the swap buffer pool
9361549Srgrimes	 */
937137726Sphk	pbrelbo(bp);
93842957Sdillon	relpbuf(bp, &vnode_pbuf_freecnt);
9391549Srgrimes
940116512Salc	VM_OBJECT_LOCK(object);
94134206Sdyson	for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
94234206Sdyson		vm_page_t mt;
94334206Sdyson
94434206Sdyson		nextoff = tfoff + PAGE_SIZE;
94534206Sdyson		mt = m[i];
94634206Sdyson
94747239Sdt		if (nextoff <= object->un_pager.vnp.vnp_size) {
94845347Sjulian			/*
94945347Sjulian			 * Read filled up entire page.
95045347Sjulian			 */
95134206Sdyson			mt->valid = VM_PAGE_BITS_ALL;
952191478Salc			KASSERT(mt->dirty == 0,
953191478Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
954191478Salc			    mt));
955191478Salc			KASSERT(!pmap_page_is_mapped(mt),
956191478Salc			    ("vnode_pager_generic_getpages: page %p is mapped",
957191478Salc			    mt));
95834206Sdyson		} else {
95945347Sjulian			/*
960192134Salc			 * Read did not fill up entire page.
96145347Sjulian			 *
96245347Sjulian			 * Currently we do not set the entire page valid,
96345347Sjulian			 * we just try to clear the piece that we couldn't
96445347Sjulian			 * read.
96545347Sjulian			 */
966192134Salc			vm_page_set_valid(mt, 0,
96747239Sdt			    object->un_pager.vnp.vnp_size - tfoff);
968192134Salc			KASSERT((mt->dirty & vm_page_bits(0,
969192134Salc			    object->un_pager.vnp.vnp_size - tfoff)) == 0,
970192134Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
971192134Salc			    mt));
97234206Sdyson		}
97334206Sdyson
9741549Srgrimes		if (i != reqpage) {
9751827Sdg
9761549Srgrimes			/*
9771827Sdg			 * whether or not to leave the page activated is up in
9781827Sdg			 * the air, but we should put the page on a page queue
9791827Sdg			 * somewhere. (it already is in the object). Result:
98058634Scharnier			 * It appears that empirical results show that
9811827Sdg			 * deactivating pages is best.
9821549Srgrimes			 */
9831827Sdg
9841549Srgrimes			/*
9851827Sdg			 * just in case someone was asking for this page we
9861827Sdg			 * now tell them that it is ok to use
9871549Srgrimes			 */
9881549Srgrimes			if (!error) {
989207746Salc				if (mt->oflags & VPO_WANTED) {
990207746Salc					vm_page_lock(mt);
99134206Sdyson					vm_page_activate(mt);
992207746Salc					vm_page_unlock(mt);
993207746Salc				} else {
994207746Salc					vm_page_lock(mt);
99534206Sdyson					vm_page_deactivate(mt);
996207746Salc					vm_page_unlock(mt);
997207746Salc				}
99838799Sdfr				vm_page_wakeup(mt);
9991549Srgrimes			} else {
1000207746Salc				vm_page_lock(mt);
100175692Salfred				vm_page_free(mt);
1002207746Salc				vm_page_unlock(mt);
10031549Srgrimes			}
10041549Srgrimes		}
10051549Srgrimes	}
1006116512Salc	VM_OBJECT_UNLOCK(object);
10071549Srgrimes	if (error) {
10089507Sdg		printf("vnode_pager_getpages: I/O read error\n");
10091549Srgrimes	}
10104207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
10111549Srgrimes}
10121549Srgrimes
101333847Smsmith/*
101433847Smsmith * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
101533847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
101633847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour.
101733847Smsmith *
101833847Smsmith * All other FS's should use the bypass to get to the local media
101933847Smsmith * backing vp's VOP_PUTPAGES.
102033847Smsmith */
102143129Sdillonstatic void
102210556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
102310556Sdyson	vm_object_t object;
102410556Sdyson	vm_page_t *m;
102510556Sdyson	int count;
102610556Sdyson	boolean_t sync;
102710556Sdyson	int *rtvals;
102810556Sdyson{
102910556Sdyson	int rtval;
103010556Sdyson	struct vnode *vp;
103134403Smsmith	int bytes = count * PAGE_SIZE;
103218973Sdyson
103344321Salc	/*
103444321Salc	 * Force synchronous operation if we are extremely low on memory
103544321Salc	 * to prevent a low-memory deadlock.  VOP operations often need to
103644321Salc	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
103744321Salc	 * operation ).  The swapper handles the case by limiting the amount
103844321Salc	 * of asynchronous I/O, but that sort of solution doesn't scale well
103944321Salc	 * for the vnode pager without a lot of work.
104044321Salc	 *
104144321Salc	 * Also, the backing vnode's iodone routine may not wake the pageout
104244321Salc	 * daemon up.  This should be probably be addressed XXX.
104344321Salc	 */
104444321Salc
1045170170Sattilio	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
104644321Salc		sync |= OBJPC_SYNC;
104744321Salc
104844321Salc	/*
104944321Salc	 * Call device-specific putpages function
105044321Salc	 */
105110556Sdyson	vp = object->handle;
1052121455Salc	VM_OBJECT_UNLOCK(object);
105334403Smsmith	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
105476827Salfred	KASSERT(rtval != EOPNOTSUPP,
105576827Salfred	    ("vnode_pager: stale FS putpages\n"));
1056121455Salc	VM_OBJECT_LOCK(object);
105710556Sdyson}
105810556Sdyson
105933847Smsmith
10601549Srgrimes/*
106133847Smsmith * This is now called from local media FS's to operate against their
106245057Seivind * own vnodes if they fail to implement VOP_PUTPAGES.
106370374Sdillon *
106470374Sdillon * This is typically called indirectly via the pageout daemon and
106570374Sdillon * clustering has already typically occured, so in general we ask the
106670374Sdillon * underlying filesystem to write the data out asynchronously rather
106770374Sdillon * then delayed.
10681549Srgrimes */
106933847Smsmithint
1070208574Salcvnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
1071208574Salc    int flags, int *rtvals)
10721549Srgrimes{
10737695Sdg	int i;
107433847Smsmith	vm_object_t object;
1075208574Salc	vm_page_t m;
107633847Smsmith	int count;
10771549Srgrimes
10787695Sdg	int maxsize, ncount;
107912767Sdyson	vm_ooffset_t poffset;
10807695Sdg	struct uio auio;
10817695Sdg	struct iovec aiov;
10827695Sdg	int error;
108334206Sdyson	int ioflags;
1084151951Sps	int ppscheck = 0;
1085151951Sps	static struct timeval lastfail;
1086151951Sps	static int curfail;
10871549Srgrimes
108833847Smsmith	object = vp->v_object;
108933847Smsmith	count = bytecount / PAGE_SIZE;
109033847Smsmith
10911827Sdg	for (i = 0; i < count; i++)
1092222586Skib		rtvals[i] = VM_PAGER_ERROR;
10931549Srgrimes
1094208574Salc	if ((int64_t)ma[0]->pindex < 0) {
1095119544Smarcel		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n",
1096208574Salc		    (long)ma[0]->pindex, (u_long)ma[0]->dirty);
10977695Sdg		rtvals[0] = VM_PAGER_BAD;
10987695Sdg		return VM_PAGER_BAD;
10995455Sdg	}
11007178Sdg
11017695Sdg	maxsize = count * PAGE_SIZE;
11027695Sdg	ncount = count;
11031549Srgrimes
1104208574Salc	poffset = IDX_TO_OFF(ma[0]->pindex);
110584854Sdillon
110684854Sdillon	/*
110784854Sdillon	 * If the page-aligned write is larger then the actual file we
110884854Sdillon	 * have to invalidate pages occuring beyond the file EOF.  However,
110984854Sdillon	 * there is an edge case where a file may not be page-aligned where
111084854Sdillon	 * the last page is partially invalid.  In this case the filesystem
111184854Sdillon	 * may not properly clear the dirty bits for the entire page (which
111284854Sdillon	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
111384854Sdillon	 * With the page locked we are free to fix-up the dirty bits here.
111487834Sdillon	 *
111587834Sdillon	 * We do not under any circumstances truncate the valid bits, as
111687834Sdillon	 * this will screw up bogus page replacement.
111784854Sdillon	 */
1118208574Salc	VM_OBJECT_LOCK(object);
111912767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
112084854Sdillon		if (object->un_pager.vnp.vnp_size > poffset) {
112184854Sdillon			int pgoff;
112284854Sdillon
112312767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
112484854Sdillon			ncount = btoc(maxsize);
112584854Sdillon			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
1126208574Salc				/*
1127208574Salc				 * If the object is locked and the following
1128208574Salc				 * conditions hold, then the page's dirty
1129208574Salc				 * field cannot be concurrently changed by a
1130208574Salc				 * pmap operation.
1131208574Salc				 */
1132208574Salc				m = ma[ncount - 1];
1133208574Salc				KASSERT(m->busy > 0,
1134208574Salc		("vnode_pager_generic_putpages: page %p is not busy", m));
1135208574Salc				KASSERT((m->flags & PG_WRITEABLE) == 0,
1136208574Salc		("vnode_pager_generic_putpages: page %p is not read-only", m));
1137208574Salc				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
1138208574Salc				    pgoff);
113984854Sdillon			}
114084854Sdillon		} else {
11418585Sdg			maxsize = 0;
114284854Sdillon			ncount = 0;
114384854Sdillon		}
11448585Sdg		if (ncount < count) {
11458585Sdg			for (i = ncount; i < count; i++) {
11467695Sdg				rtvals[i] = VM_PAGER_BAD;
11471549Srgrimes			}
11481549Srgrimes		}
11491541Srgrimes	}
1150208574Salc	VM_OBJECT_UNLOCK(object);
11517695Sdg
115270374Sdillon	/*
115370374Sdillon	 * pageouts are already clustered, use IO_ASYNC t o force a bawrite()
115470374Sdillon	 * rather then a bdwrite() to prevent paging I/O from saturating
1155108358Sdillon	 * the buffer cache.  Dummy-up the sequential heuristic to cause
1156108358Sdillon	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
1157108358Sdillon	 * the system decides how to cluster.
115870374Sdillon	 */
115934206Sdyson	ioflags = IO_VMIO;
1160108358Sdillon	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
1161108358Sdillon		ioflags |= IO_SYNC;
1162108358Sdillon	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
1163108358Sdillon		ioflags |= IO_ASYNC;
116434206Sdyson	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
1165108358Sdillon	ioflags |= IO_SEQMAX << IO_SEQSHIFT;
11661827Sdg
11677695Sdg	aiov.iov_base = (caddr_t) 0;
11687695Sdg	aiov.iov_len = maxsize;
11697695Sdg	auio.uio_iov = &aiov;
11707695Sdg	auio.uio_iovcnt = 1;
117112767Sdyson	auio.uio_offset = poffset;
11727695Sdg	auio.uio_segflg = UIO_NOCOPY;
11737695Sdg	auio.uio_rw = UIO_WRITE;
11747695Sdg	auio.uio_resid = maxsize;
117583366Sjulian	auio.uio_td = (struct thread *) 0;
117691406Sjhb	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
1177170292Sattilio	PCPU_INC(cnt.v_vnodeout);
1178170292Sattilio	PCPU_ADD(cnt.v_vnodepgsout, ncount);
11793612Sdg
11808585Sdg	if (error) {
1181151951Sps		if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1)))
1182151951Sps			printf("vnode_pager_putpages: I/O error %d\n", error);
11837695Sdg	}
11848585Sdg	if (auio.uio_resid) {
1185151951Sps		if (ppscheck || ppsratecheck(&lastfail, &curfail, 1))
1186194990Skib			printf("vnode_pager_putpages: residual I/O %zd at %lu\n",
1187208574Salc			    auio.uio_resid, (u_long)ma[0]->pindex);
11887695Sdg	}
118933936Sdyson	for (i = 0; i < ncount; i++) {
119033936Sdyson		rtvals[i] = VM_PAGER_OK;
11917695Sdg	}
11927695Sdg	return rtvals[0];
11937695Sdg}
1194222586Skib
1195222586Skibvoid
1196222586Skibvnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written)
1197222586Skib{
1198222991Skib	vm_object_t obj;
1199222586Skib	int i, pos;
1200222586Skib
1201222991Skib	if (written == 0)
1202222991Skib		return;
1203222991Skib	obj = ma[0]->object;
1204222991Skib	VM_OBJECT_LOCK(obj);
1205222586Skib	for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) {
1206222586Skib		if (pos < trunc_page(written)) {
1207222586Skib			rtvals[i] = VM_PAGER_OK;
1208222586Skib			vm_page_undirty(ma[i]);
1209222586Skib		} else {
1210222586Skib			/* Partially written page. */
1211222586Skib			rtvals[i] = VM_PAGER_AGAIN;
1212222586Skib			vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK);
1213222586Skib		}
1214222586Skib	}
1215222991Skib	VM_OBJECT_UNLOCK(obj);
1216222586Skib}
1217