vnode_pager.c revision 179765
1139825Simp/*-
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
2158705Scharnier *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes */
461541Srgrimes
471549Srgrimes/*
481549Srgrimes * TODO:
499507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
507695Sdg *	greatly re-simplify the vnode_pager.
511549Srgrimes */
521549Srgrimes
53116226Sobrien#include <sys/cdefs.h>
54116226Sobrien__FBSDID("$FreeBSD: head/sys/vm/vnode_pager.c 179765 2008-06-12 20:46:47Z ups $");
55116226Sobrien
561541Srgrimes#include <sys/param.h>
571541Srgrimes#include <sys/systm.h>
581541Srgrimes#include <sys/proc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/mount.h>
6160041Sphk#include <sys/bio.h>
629507Sdg#include <sys/buf.h>
6312662Sdg#include <sys/vmmeter.h>
64140767Sphk#include <sys/limits.h>
6551340Sdillon#include <sys/conf.h>
66127926Salc#include <sys/sf_buf.h>
671541Srgrimes
68148875Sssouhlal#include <machine/atomic.h>
69148875Sssouhlal
701541Srgrimes#include <vm/vm.h>
7112662Sdg#include <vm/vm_object.h>
721541Srgrimes#include <vm/vm_page.h>
739507Sdg#include <vm/vm_pager.h>
7431853Sdyson#include <vm/vm_map.h>
751541Srgrimes#include <vm/vnode_pager.h>
7612662Sdg#include <vm/vm_extern.h>
771541Srgrimes
78163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
79163359Salc    daddr_t *rtaddress, int *run);
8092727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
8192727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m);
8292727Salfredstatic void vnode_pager_dealloc(vm_object_t);
8392727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
8492727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
8592727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
86140767Sphkstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t);
8711943Sbde
881541Srgrimesstruct pagerops vnodepagerops = {
89118466Sphk	.pgo_alloc =	vnode_pager_alloc,
90118466Sphk	.pgo_dealloc =	vnode_pager_dealloc,
91118466Sphk	.pgo_getpages =	vnode_pager_getpages,
92118466Sphk	.pgo_putpages =	vnode_pager_putpages,
93118466Sphk	.pgo_haspage =	vnode_pager_haspage,
941541Srgrimes};
951541Srgrimes
9679127Sjhbint vnode_pbuf_freecnt;
9710556Sdyson
98140767Sphk/* Create the VM system backing object for this vnode */
99140767Sphkint
100155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
101140767Sphk{
102140767Sphk	vm_object_t object;
103140767Sphk	vm_ooffset_t size = isize;
104140767Sphk	struct vattr va;
105140767Sphk
106140767Sphk	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
107140767Sphk		return (0);
108140767Sphk
109140767Sphk	while ((object = vp->v_object) != NULL) {
110140767Sphk		VM_OBJECT_LOCK(object);
111140767Sphk		if (!(object->flags & OBJ_DEAD)) {
112140767Sphk			VM_OBJECT_UNLOCK(object);
113140767Sphk			return (0);
114140767Sphk		}
115175294Sattilio		VOP_UNLOCK(vp, 0);
116140767Sphk		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
117140767Sphk		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vodead", 0);
118175202Sattilio		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
119140767Sphk	}
120140767Sphk
121140767Sphk	if (size == 0) {
122140767Sphk		if (vn_isdisk(vp, NULL)) {
123140767Sphk			size = IDX_TO_OFF(INT_MAX);
124140767Sphk		} else {
125140767Sphk			if (VOP_GETATTR(vp, &va, td->td_ucred, td) != 0)
126140767Sphk				return (0);
127140767Sphk			size = va.va_size;
128140767Sphk		}
129140767Sphk	}
130140767Sphk
131140767Sphk	object = vnode_pager_alloc(vp, size, 0, 0);
132140767Sphk	/*
133140767Sphk	 * Dereference the reference we just created.  This assumes
134140767Sphk	 * that the object is associated with the vp.
135140767Sphk	 */
136140767Sphk	VM_OBJECT_LOCK(object);
137140767Sphk	object->ref_count--;
138140767Sphk	VM_OBJECT_UNLOCK(object);
139140767Sphk	vrele(vp);
140140767Sphk
141140767Sphk	KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object"));
142140767Sphk
143140767Sphk	return (0);
144140767Sphk}
145140767Sphk
146140929Sphkvoid
147140929Sphkvnode_destroy_vobject(struct vnode *vp)
148140929Sphk{
149140929Sphk	struct vm_object *obj;
150140929Sphk
151140929Sphk	obj = vp->v_object;
152140929Sphk	if (obj == NULL)
153140929Sphk		return;
154171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject");
155140929Sphk	VM_OBJECT_LOCK(obj);
156140929Sphk	if (obj->ref_count == 0) {
157140929Sphk		/*
158140929Sphk		 * vclean() may be called twice. The first time
159140929Sphk		 * removes the primary reference to the object,
160140929Sphk		 * the second time goes one further and is a
161140929Sphk		 * special-case to terminate the object.
162140929Sphk		 *
163140929Sphk		 * don't double-terminate the object
164140929Sphk		 */
165140929Sphk		if ((obj->flags & OBJ_DEAD) == 0)
166140929Sphk			vm_object_terminate(obj);
167140929Sphk		else
168140929Sphk			VM_OBJECT_UNLOCK(obj);
169140929Sphk	} else {
170140929Sphk		/*
171140929Sphk		 * Woe to the process that tries to page now :-).
172140929Sphk		 */
173140929Sphk		vm_pager_deallocate(obj);
174140929Sphk		VM_OBJECT_UNLOCK(obj);
175140929Sphk	}
176144610Sjeff	vp->v_object = NULL;
177140929Sphk}
178140929Sphk
179140929Sphk
1801541Srgrimes/*
1811541Srgrimes * Allocate (or lookup) pager for a vnode.
1821541Srgrimes * Handle is a vnode pointer.
18398604Salc *
18498604Salc * MPSAFE
1851541Srgrimes */
1869507Sdgvm_object_t
18740286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
18828751Sbde		  vm_ooffset_t offset)
1891541Srgrimes{
1909456Sdg	vm_object_t object;
1911541Srgrimes	struct vnode *vp;
1921541Srgrimes
1931541Srgrimes	/*
1941541Srgrimes	 * Pageout to vnode, no can do yet.
1951541Srgrimes	 */
1961541Srgrimes	if (handle == NULL)
1971827Sdg		return (NULL);
1981541Srgrimes
1999411Sdg	vp = (struct vnode *) handle;
2009411Sdg
2011541Srgrimes	/*
2029411Sdg	 * If the object is being terminated, wait for it to
2039411Sdg	 * go away.
2049411Sdg	 */
205179159Supsretry:
206114074Salc	while ((object = vp->v_object) != NULL) {
207114074Salc		VM_OBJECT_LOCK(object);
208179159Sups		if ((object->flags & OBJ_DEAD) == 0)
209114074Salc			break;
210137297Salc		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
211114074Salc		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0);
2129507Sdg	}
2135455Sdg
21432071Sdyson	if (vp->v_usecount == 0)
21532071Sdyson		panic("vnode_pager_alloc: no vnode reference");
21632071Sdyson
2179507Sdg	if (object == NULL) {
2181541Srgrimes		/*
219179159Sups		 * Add an object of the appropriate size
2201541Srgrimes		 */
22140286Sdg		object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
2221827Sdg
22340286Sdg		object->un_pager.vnp.vnp_size = size;
2241549Srgrimes
2259507Sdg		object->handle = handle;
226145826Sjeff		if (VFS_NEEDSGIANT(vp->v_mount))
227145826Sjeff			vm_object_set_flag(object, OBJ_NEEDGIANT);
228179765Sups		VI_LOCK(vp);
229179765Sups		if (vp->v_object != NULL) {
230179159Sups			/*
231179159Sups			 * Object has been created while we were sleeping
232179159Sups			 */
233179765Sups			VI_UNLOCK(vp);
234179159Sups			vm_object_destroy(object);
235179159Sups			goto retry;
236179159Sups		}
2379507Sdg		vp->v_object = object;
238179765Sups		VI_UNLOCK(vp);
239179765Sups	} else {
24032286Sdyson		object->ref_count++;
241179765Sups		VM_OBJECT_UNLOCK(object);
242179765Sups	}
243143559Sjeff	vref(vp);
2449507Sdg	return (object);
2451541Srgrimes}
2461541Srgrimes
247114774Salc/*
248114774Salc *	The object must be locked.
249114774Salc */
25012820Sphkstatic void
2519507Sdgvnode_pager_dealloc(object)
2529507Sdg	vm_object_t object;
2531541Srgrimes{
25479242Sdillon	struct vnode *vp = object->handle;
2551541Srgrimes
2569507Sdg	if (vp == NULL)
2579507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
2589507Sdg
259114774Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
26033817Sdyson	vm_object_pip_wait(object, "vnpdea");
2611541Srgrimes
2629507Sdg	object->handle = NULL;
26333109Sdyson	object->type = OBJT_DEAD;
264137297Salc	if (object->flags & OBJ_DISCONNECTWNT) {
265137297Salc		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
266137297Salc		wakeup(object);
267137297Salc	}
268171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
2699507Sdg	vp->v_object = NULL;
270140734Sphk	vp->v_vflag &= ~VV_TEXT;
2711549Srgrimes}
2721541Srgrimes
27312820Sphkstatic boolean_t
27412767Sdysonvnode_pager_haspage(object, pindex, before, after)
2759507Sdg	vm_object_t object;
27612767Sdyson	vm_pindex_t pindex;
2779507Sdg	int *before;
2789507Sdg	int *after;
2791541Srgrimes{
2809507Sdg	struct vnode *vp = object->handle;
28196572Sphk	daddr_t bn;
28212423Sphk	int err;
28310556Sdyson	daddr_t reqblock;
28411701Sdyson	int poff;
28511701Sdyson	int bsize;
28612914Sdyson	int pagesperblock, blocksperpage;
287140723Sjeff	int vfslocked;
2881541Srgrimes
289116695Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
29051340Sdillon	/*
29151340Sdillon	 * If no vp or vp is doomed or marked transparent to VM, we do not
29251340Sdillon	 * have the page.
29351340Sdillon	 */
294155384Sjeff	if (vp == NULL || vp->v_iflag & VI_DOOMED)
29532585Sdyson		return FALSE;
2961541Srgrimes	/*
297155384Sjeff	 * If the offset is beyond end of file we do
2985455Sdg	 * not have the page.
2991541Srgrimes	 */
300155384Sjeff	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
3014797Sdg		return FALSE;
3021541Srgrimes
30311576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
30410556Sdyson	pagesperblock = bsize / PAGE_SIZE;
30512914Sdyson	blocksperpage = 0;
30612914Sdyson	if (pagesperblock > 0) {
30712914Sdyson		reqblock = pindex / pagesperblock;
30812914Sdyson	} else {
30912914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
31012914Sdyson		reqblock = pindex * blocksperpage;
31112914Sdyson	}
312116695Salc	VM_OBJECT_UNLOCK(object);
313140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
314119045Sphk	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
315140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
316116695Salc	VM_OBJECT_LOCK(object);
3178876Srgrimes	if (err)
3189507Sdg		return TRUE;
31992029Seivind	if (bn == -1)
32010576Sdyson		return FALSE;
32112914Sdyson	if (pagesperblock > 0) {
32212914Sdyson		poff = pindex - (reqblock * pagesperblock);
32312914Sdyson		if (before) {
32412914Sdyson			*before *= pagesperblock;
32512914Sdyson			*before += poff;
32610669Sdyson		}
32712914Sdyson		if (after) {
32812914Sdyson			int numafter;
32912914Sdyson			*after *= pagesperblock;
33012914Sdyson			numafter = pagesperblock - (poff + 1);
33199211Srobert			if (IDX_TO_OFF(pindex + numafter) >
33299211Srobert			    object->un_pager.vnp.vnp_size) {
33399211Srobert				numafter =
33499211Srobert		    		    OFF_TO_IDX(object->un_pager.vnp.vnp_size) -
33599211Srobert				    pindex;
33612914Sdyson			}
33712914Sdyson			*after += numafter;
33812914Sdyson		}
33912914Sdyson	} else {
34012914Sdyson		if (before) {
34112914Sdyson			*before /= blocksperpage;
34212914Sdyson		}
34312914Sdyson
34412914Sdyson		if (after) {
34512914Sdyson			*after /= blocksperpage;
34612914Sdyson		}
34710556Sdyson	}
34810576Sdyson	return TRUE;
3491541Srgrimes}
3501541Srgrimes
3511541Srgrimes/*
3521541Srgrimes * Lets the VM system know about a change in size for a file.
3539507Sdg * We adjust our own internal size and flush any cached pages in
3541541Srgrimes * the associated object that are affected by the size change.
3551541Srgrimes *
3561541Srgrimes * Note: this routine may be invoked as a result of a pager put
3571541Srgrimes * operation (possibly at object termination time), so we must be careful.
3581541Srgrimes */
3591541Srgrimesvoid
3601541Srgrimesvnode_pager_setsize(vp, nsize)
3611541Srgrimes	struct vnode *vp;
36212767Sdyson	vm_ooffset_t nsize;
3631541Srgrimes{
364116167Salc	vm_object_t object;
365116167Salc	vm_page_t m;
36638542Sluoqi	vm_pindex_t nobjsize;
3671541Srgrimes
368116167Salc	if ((object = vp->v_object) == NULL)
3691541Srgrimes		return;
370116167Salc	VM_OBJECT_LOCK(object);
371116167Salc	if (nsize == object->un_pager.vnp.vnp_size) {
372116167Salc		/*
373116167Salc		 * Hasn't changed size
374116167Salc		 */
375116167Salc		VM_OBJECT_UNLOCK(object);
3763374Sdg		return;
377116167Salc	}
37838542Sluoqi	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
3799507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
380116167Salc		/*
381116167Salc		 * File has shrunk. Toss any cached pages beyond the new EOF.
382116167Salc		 */
383116167Salc		if (nobjsize < object->size)
38438542Sluoqi			vm_object_page_remove(object, nobjsize, object->size,
385116167Salc			    FALSE);
3861827Sdg		/*
3871827Sdg		 * this gets rid of garbage at the end of a page that is now
38887834Sdillon		 * only partially backed by the vnode.
38987834Sdillon		 *
39087834Sdillon		 * XXX for some reason (I don't know yet), if we take a
39187834Sdillon		 * completely invalid page and mark it partially valid
39287834Sdillon		 * it can screw up NFS reads, so we don't allow the case.
3931827Sdg		 */
394116167Salc		if ((nsize & PAGE_MASK) &&
395121230Salc		    (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL &&
396121230Salc		    m->valid != 0) {
397121230Salc			int base = (int)nsize & PAGE_MASK;
398121230Salc			int size = PAGE_SIZE - base;
39970374Sdillon
400121230Salc			/*
401121230Salc			 * Clear out partial-page garbage in case
402121230Salc			 * the page has been mapped.
403121230Salc			 */
404121230Salc			pmap_zero_page_area(m, base, size);
40570374Sdillon
406121230Salc			/*
407121230Salc			 * Clear out partial-page dirty bits.  This
408121230Salc			 * has the side effect of setting the valid
409121230Salc			 * bits, but that is ok.  There are a bunch
410121230Salc			 * of places in the VM system where we expected
411121230Salc			 * m->dirty == VM_PAGE_BITS_ALL.  The file EOF
412121230Salc			 * case is one of them.  If the page is still
413121230Salc			 * partially dirty, make it fully dirty.
414121230Salc			 *
415121230Salc			 * note that we do not clear out the valid
416121230Salc			 * bits.  This would prevent bogus_page
417121230Salc			 * replacement from working properly.
418121230Salc			 */
419173846Salc			vm_page_lock_queues();
420121230Salc			vm_page_set_validclean(m, base, size);
421121230Salc			if (m->dirty != 0)
422121230Salc				m->dirty = VM_PAGE_BITS_ALL;
423116167Salc			vm_page_unlock_queues();
424172875Salc		} else if ((nsize & PAGE_MASK) &&
425172875Salc		    __predict_false(object->cache != NULL)) {
426172875Salc			vm_page_cache_free(object, OFF_TO_IDX(nsize),
427172875Salc			    nobjsize);
4281827Sdg		}
4291541Srgrimes	}
43012767Sdyson	object->un_pager.vnp.vnp_size = nsize;
43138542Sluoqi	object->size = nobjsize;
432116167Salc	VM_OBJECT_UNLOCK(object);
4331541Srgrimes}
4341541Srgrimes
4351549Srgrimes/*
4361549Srgrimes * calculate the linear (byte) disk address of specified virtual
4371549Srgrimes * file address
4381549Srgrimes */
439163359Salcstatic int
440163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress,
441163359Salc    int *run)
4421549Srgrimes{
4435455Sdg	int bsize;
4445455Sdg	int err;
44512767Sdyson	daddr_t vblock;
446146340Sbz	daddr_t voffset;
4471549Srgrimes
448138531Salc	if (address < 0)
4495455Sdg		return -1;
4505455Sdg
451155384Sjeff	if (vp->v_iflag & VI_DOOMED)
45211701Sdyson		return -1;
45311701Sdyson
4541549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4551549Srgrimes	vblock = address / bsize;
4561549Srgrimes	voffset = address % bsize;
4571549Srgrimes
458163359Salc	err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL);
459163359Salc	if (err == 0) {
460163359Salc		if (*rtaddress != -1)
461163359Salc			*rtaddress += voffset / DEV_BSIZE;
46292029Seivind		if (run) {
4636151Sdg			*run += 1;
4646151Sdg			*run *= bsize/PAGE_SIZE;
4656151Sdg			*run -= voffset/PAGE_SIZE;
4666151Sdg		}
4676151Sdg	}
4681549Srgrimes
469163359Salc	return (err);
4701549Srgrimes}
4711549Srgrimes
4721549Srgrimes/*
47396755Strhodes * small block filesystem vnode pager input
4741549Srgrimes */
47512820Sphkstatic int
4769507Sdgvnode_pager_input_smlfs(object, m)
4779507Sdg	vm_object_t object;
4781549Srgrimes	vm_page_t m;
4791549Srgrimes{
4805455Sdg	int i;
481137726Sphk	struct vnode *vp;
482137726Sphk	struct bufobj *bo;
4831549Srgrimes	struct buf *bp;
484127926Salc	struct sf_buf *sf;
485146340Sbz	daddr_t fileaddr;
4861549Srgrimes	vm_offset_t bsize;
4875455Sdg	int error = 0;
4881549Srgrimes
4899507Sdg	vp = object->handle;
490155384Sjeff	if (vp->v_iflag & VI_DOOMED)
49111701Sdyson		return VM_PAGER_BAD;
49211701Sdyson
4931549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4941549Srgrimes
495137726Sphk	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);
4961549Srgrimes
497127926Salc	sf = sf_buf_alloc(m, 0);
4981549Srgrimes
4991827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
50086092Sdillon		vm_ooffset_t address;
5011827Sdg
50245561Sdt		if (vm_page_bits(i * bsize, bsize) & m->valid)
5035455Sdg			continue;
5041549Srgrimes
50586092Sdillon		address = IDX_TO_OFF(m->pindex) + i * bsize;
50686092Sdillon		if (address >= object->un_pager.vnp.vnp_size) {
50786092Sdillon			fileaddr = -1;
50886092Sdillon		} else {
509163359Salc			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
510163359Salc			if (error)
511163359Salc				break;
51286092Sdillon		}
5131827Sdg		if (fileaddr != -1) {
51442957Sdillon			bp = getpbuf(&vnode_pbuf_freecnt);
5151549Srgrimes
5161827Sdg			/* build a minimal buffer header */
51758345Sphk			bp->b_iocmd = BIO_READ;
518119092Sphk			bp->b_iodone = bdone;
51984827Sjhb			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
52084827Sjhb			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
52191406Sjhb			bp->b_rcred = crhold(curthread->td_ucred);
52291406Sjhb			bp->b_wcred = crhold(curthread->td_ucred);
523127926Salc			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
5246626Sdg			bp->b_blkno = fileaddr;
525137726Sphk			pbgetbo(bo, bp);
5261549Srgrimes			bp->b_bcount = bsize;
5271549Srgrimes			bp->b_bufsize = bsize;
52870374Sdillon			bp->b_runningbufspace = bp->b_bufsize;
529148875Sssouhlal			atomic_add_int(&runningbufspace, bp->b_runningbufspace);
5301827Sdg
5311827Sdg			/* do the input */
532121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
533136927Sphk			bstrategy(bp);
5341549Srgrimes
535119092Sphk			bwait(bp, PVM, "vnsrd");
536119092Sphk
53758934Sphk			if ((bp->b_ioflags & BIO_ERROR) != 0)
5381549Srgrimes				error = EIO;
5391549Srgrimes
5401827Sdg			/*
5411827Sdg			 * free the buffer header back to the swap buffer pool
5421827Sdg			 */
543137726Sphk			pbrelbo(bp);
54442957Sdillon			relpbuf(bp, &vnode_pbuf_freecnt);
5451827Sdg			if (error)
5461549Srgrimes				break;
5475455Sdg
548121264Salc			VM_OBJECT_LOCK(object);
549107189Salc			vm_page_lock_queues();
55015583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
551107189Salc			vm_page_unlock_queues();
552121264Salc			VM_OBJECT_UNLOCK(object);
5531549Srgrimes		} else {
554121264Salc			VM_OBJECT_LOCK(object);
555107189Salc			vm_page_lock_queues();
55615583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
557107189Salc			vm_page_unlock_queues();
558121264Salc			VM_OBJECT_UNLOCK(object);
559127926Salc			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
5601549Srgrimes		}
5611549Srgrimes	}
562127926Salc	sf_buf_free(sf);
563107347Salc	vm_page_lock_queues();
56460755Speter	pmap_clear_modify(m);
565107347Salc	vm_page_unlock_queues();
5661827Sdg	if (error) {
5674207Sdg		return VM_PAGER_ERROR;
5681549Srgrimes	}
5691549Srgrimes	return VM_PAGER_OK;
5701549Srgrimes
5711549Srgrimes}
5721549Srgrimes
5731549Srgrimes
5741549Srgrimes/*
575139296Sphk * old style vnode pager input routine
5761549Srgrimes */
57712820Sphkstatic int
5789507Sdgvnode_pager_input_old(object, m)
5799507Sdg	vm_object_t object;
5801549Srgrimes	vm_page_t m;
5811549Srgrimes{
5821541Srgrimes	struct uio auio;
5831541Srgrimes	struct iovec aiov;
5845455Sdg	int error;
5855455Sdg	int size;
586127926Salc	struct sf_buf *sf;
58777398Sjhb	struct vnode *vp;
5881549Srgrimes
589121495Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
5901549Srgrimes	error = 0;
5911827Sdg
5921549Srgrimes	/*
5931549Srgrimes	 * Return failure if beyond current EOF
5941549Srgrimes	 */
59512767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
5961549Srgrimes		return VM_PAGER_BAD;
5971549Srgrimes	} else {
5981549Srgrimes		size = PAGE_SIZE;
59912767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
60012767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
601121495Salc		vp = object->handle;
602121495Salc		VM_OBJECT_UNLOCK(object);
6037178Sdg
6045455Sdg		/*
6055455Sdg		 * Allocate a kernel virtual address and initialize so that
6065455Sdg		 * we can use VOP_READ/WRITE routines.
6075455Sdg		 */
608127926Salc		sf = sf_buf_alloc(m, 0);
6097178Sdg
610127926Salc		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
6111549Srgrimes		aiov.iov_len = size;
6121549Srgrimes		auio.uio_iov = &aiov;
6131549Srgrimes		auio.uio_iovcnt = 1;
61412767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
6151549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
6161549Srgrimes		auio.uio_rw = UIO_READ;
6171549Srgrimes		auio.uio_resid = size;
61883366Sjulian		auio.uio_td = curthread;
6191549Srgrimes
62091406Sjhb		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
6211549Srgrimes		if (!error) {
62279242Sdillon			int count = size - auio.uio_resid;
6231549Srgrimes
6241549Srgrimes			if (count == 0)
6251549Srgrimes				error = EINVAL;
6261549Srgrimes			else if (count != PAGE_SIZE)
627127926Salc				bzero((caddr_t)sf_buf_kva(sf) + count,
628127926Salc				    PAGE_SIZE - count);
6291549Srgrimes		}
630127926Salc		sf_buf_free(sf);
631121230Salc
632121230Salc		VM_OBJECT_LOCK(object);
6331549Srgrimes	}
634107347Salc	vm_page_lock_queues();
63560755Speter	pmap_clear_modify(m);
63649945Salc	vm_page_undirty(m);
637121230Salc	vm_page_unlock_queues();
63839739Srvb	if (!error)
63939739Srvb		m->valid = VM_PAGE_BITS_ALL;
6404207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6411549Srgrimes}
6421549Srgrimes
6431549Srgrimes/*
6441549Srgrimes * generic vnode pager input routine
6451549Srgrimes */
64610556Sdyson
64733847Smsmith/*
64876827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES
64999211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages()
65099211Srobert * to implement the previous behaviour.
65133847Smsmith *
65233847Smsmith * All other FS's should use the bypass to get to the local media
65333847Smsmith * backing vp's VOP_GETPAGES.
65433847Smsmith */
65512820Sphkstatic int
6569507Sdgvnode_pager_getpages(object, m, count, reqpage)
6579507Sdg	vm_object_t object;
6581549Srgrimes	vm_page_t *m;
6599507Sdg	int count;
6609507Sdg	int reqpage;
6611549Srgrimes{
66210556Sdyson	int rtval;
66310556Sdyson	struct vnode *vp;
66434403Smsmith	int bytes = count * PAGE_SIZE;
665140723Sjeff	int vfslocked;
66632286Sdyson
66710556Sdyson	vp = object->handle;
668116279Salc	VM_OBJECT_UNLOCK(object);
669140723Sjeff	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
67034403Smsmith	rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
67176827Salfred	KASSERT(rtval != EOPNOTSUPP,
67276827Salfred	    ("vnode_pager: FS getpages not implemented\n"));
673140723Sjeff	VFS_UNLOCK_GIANT(vfslocked);
674116279Salc	VM_OBJECT_LOCK(object);
67533847Smsmith	return rtval;
67610556Sdyson}
67710556Sdyson
67833847Smsmith/*
67933847Smsmith * This is now called from local media FS's to operate against their
68033847Smsmith * own vnodes if they fail to implement VOP_GETPAGES.
68133847Smsmith */
68233847Smsmithint
68333847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage)
68433847Smsmith	struct vnode *vp;
68510556Sdyson	vm_page_t *m;
68633847Smsmith	int bytecount;
68710556Sdyson	int reqpage;
68810556Sdyson{
68933847Smsmith	vm_object_t object;
69012767Sdyson	vm_offset_t kva;
69134206Sdyson	off_t foff, tfoff, nextoff;
692146340Sbz	int i, j, size, bsize, first;
693163140Salc	daddr_t firstaddr, reqblock;
694137726Sphk	struct bufobj *bo;
6956151Sdg	int runpg;
6966151Sdg	int runend;
6977178Sdg	struct buf *bp;
69833847Smsmith	int count;
699163210Salc	int error;
7001549Srgrimes
70133847Smsmith	object = vp->v_object;
70233847Smsmith	count = bytecount / PAGE_SIZE;
70333847Smsmith
704137726Sphk	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
705137726Sphk	    ("vnode_pager_generic_getpages does not support devices"));
706155384Sjeff	if (vp->v_iflag & VI_DOOMED)
70711701Sdyson		return VM_PAGER_BAD;
70811701Sdyson
7091549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7101549Srgrimes
7111549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7121827Sdg
7131549Srgrimes	/*
7141827Sdg	 * originally, we did not check for an error return value -- assuming
7151827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7161549Srgrimes	 */
71712767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
7181827Sdg
7191549Srgrimes	/*
7201887Sdg	 * if we can't bmap, use old VOP code
7211549Srgrimes	 */
722163210Salc	error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL);
723163210Salc	if (error == EOPNOTSUPP) {
724116512Salc		VM_OBJECT_LOCK(object);
725100832Salc		vm_page_lock_queues();
726100832Salc		for (i = 0; i < count; i++)
727100832Salc			if (i != reqpage)
72875692Salfred				vm_page_free(m[i]);
729100832Salc		vm_page_unlock_queues();
730170292Sattilio		PCPU_INC(cnt.v_vnodein);
731170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
732121495Salc		error = vnode_pager_input_old(object, m[reqpage]);
733121495Salc		VM_OBJECT_UNLOCK(object);
734121495Salc		return (error);
735163210Salc	} else if (error != 0) {
736163210Salc		VM_OBJECT_LOCK(object);
737163210Salc		vm_page_lock_queues();
738163210Salc		for (i = 0; i < count; i++)
739163210Salc			if (i != reqpage)
740163210Salc				vm_page_free(m[i]);
741163210Salc		vm_page_unlock_queues();
742163210Salc		VM_OBJECT_UNLOCK(object);
743163210Salc		return (VM_PAGER_ERROR);
7441549Srgrimes
7451827Sdg		/*
7461827Sdg		 * if the blocksize is smaller than a page size, then use
7471827Sdg		 * special small filesystem code.  NFS sometimes has a small
7481827Sdg		 * blocksize, but it can handle large reads itself.
7491827Sdg		 */
7501827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
75138866Sbde	    (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
752116512Salc		VM_OBJECT_LOCK(object);
753100832Salc		vm_page_lock_queues();
754100832Salc		for (i = 0; i < count; i++)
755100832Salc			if (i != reqpage)
75675692Salfred				vm_page_free(m[i]);
757100832Salc		vm_page_unlock_queues();
758116512Salc		VM_OBJECT_UNLOCK(object);
759170292Sattilio		PCPU_INC(cnt.v_vnodein);
760170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
7619507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
7621549Srgrimes	}
76345347Sjulian
7641549Srgrimes	/*
76545347Sjulian	 * If we have a completely valid page available to us, we can
76645347Sjulian	 * clean up and return.  Otherwise we have to re-read the
76745347Sjulian	 * media.
7681549Srgrimes	 */
769121227Salc	VM_OBJECT_LOCK(object);
77045347Sjulian	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
771100832Salc		vm_page_lock_queues();
772100832Salc		for (i = 0; i < count; i++)
7735455Sdg			if (i != reqpage)
77475692Salfred				vm_page_free(m[i]);
775100832Salc		vm_page_unlock_queues();
776116512Salc		VM_OBJECT_UNLOCK(object);
7775455Sdg		return VM_PAGER_OK;
778163140Salc	} else if (reqblock == -1) {
779163140Salc		pmap_zero_page(m[reqpage]);
780163140Salc		vm_page_undirty(m[reqpage]);
781163140Salc		m[reqpage]->valid = VM_PAGE_BITS_ALL;
782163140Salc		vm_page_lock_queues();
783163140Salc		for (i = 0; i < count; i++)
784163140Salc			if (i != reqpage)
785163140Salc				vm_page_free(m[i]);
786163140Salc		vm_page_unlock_queues();
787163140Salc		VM_OBJECT_UNLOCK(object);
788163140Salc		return (VM_PAGER_OK);
7891549Srgrimes	}
79045347Sjulian	m[reqpage]->valid = 0;
791121227Salc	VM_OBJECT_UNLOCK(object);
7927178Sdg
7935455Sdg	/*
7945455Sdg	 * here on direct device I/O
7955455Sdg	 */
79692029Seivind	firstaddr = -1;
7971549Srgrimes
7981549Srgrimes	/*
7996151Sdg	 * calculate the run that includes the required page
8001549Srgrimes	 */
80192029Seivind	for (first = 0, i = 0; i < count; i = runend) {
802163359Salc		if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr,
803163359Salc		    &runpg) != 0) {
804163359Salc			VM_OBJECT_LOCK(object);
805163359Salc			vm_page_lock_queues();
806163359Salc			for (; i < count; i++)
807163359Salc				if (i != reqpage)
808163359Salc					vm_page_free(m[i]);
809163359Salc			vm_page_unlock_queues();
810163359Salc			VM_OBJECT_UNLOCK(object);
811163359Salc			return (VM_PAGER_ERROR);
812163359Salc		}
8136151Sdg		if (firstaddr == -1) {
814116512Salc			VM_OBJECT_LOCK(object);
8159507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
816146340Sbz				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx",
817146340Sbz				    (intmax_t)firstaddr, (uintmax_t)(foff >> 32),
818106603Smux				    (uintmax_t)foff,
819106603Smux				    (uintmax_t)
820106603Smux				    (object->un_pager.vnp.vnp_size >> 32),
821106603Smux				    (uintmax_t)object->un_pager.vnp.vnp_size);
8226151Sdg			}
823100832Salc			vm_page_lock_queues();
82475692Salfred			vm_page_free(m[i]);
825100832Salc			vm_page_unlock_queues();
826116512Salc			VM_OBJECT_UNLOCK(object);
8276151Sdg			runend = i + 1;
8286151Sdg			first = runend;
8296151Sdg			continue;
8301549Srgrimes		}
8316151Sdg		runend = i + runpg;
8329507Sdg		if (runend <= reqpage) {
833116512Salc			VM_OBJECT_LOCK(object);
834100832Salc			vm_page_lock_queues();
835100832Salc			for (j = i; j < runend; j++)
83675692Salfred				vm_page_free(m[j]);
837100832Salc			vm_page_unlock_queues();
838116512Salc			VM_OBJECT_UNLOCK(object);
8391549Srgrimes		} else {
8409507Sdg			if (runpg < (count - first)) {
841116512Salc				VM_OBJECT_LOCK(object);
842100832Salc				vm_page_lock_queues();
8439507Sdg				for (i = first + runpg; i < count; i++)
84475692Salfred					vm_page_free(m[i]);
845100832Salc				vm_page_unlock_queues();
846116512Salc				VM_OBJECT_UNLOCK(object);
8476151Sdg				count = first + runpg;
8486151Sdg			}
8496151Sdg			break;
8501549Srgrimes		}
8516151Sdg		first = runend;
8521549Srgrimes	}
8531549Srgrimes
8541549Srgrimes	/*
8551827Sdg	 * the first and last page have been calculated now, move input pages
8561827Sdg	 * to be zero based...
8571549Srgrimes	 */
8581549Srgrimes	if (first != 0) {
859163361Salc		m += first;
8601549Srgrimes		count -= first;
8611549Srgrimes		reqpage -= first;
8621549Srgrimes	}
8636151Sdg
8641549Srgrimes	/*
8651549Srgrimes	 * calculate the file virtual address for the transfer
8661549Srgrimes	 */
86712767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
8681827Sdg
8691549Srgrimes	/*
8701549Srgrimes	 * calculate the size of the transfer
8711549Srgrimes	 */
8721549Srgrimes	size = count * PAGE_SIZE;
873134892Sphk	KASSERT(count > 0, ("zero count"));
8749507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
8759507Sdg		size = object->un_pager.vnp.vnp_size - foff;
876134892Sphk	KASSERT(size > 0, ("zero size"));
8771549Srgrimes
8781549Srgrimes	/*
87951340Sdillon	 * round up physical size for real devices.
8801549Srgrimes	 */
881137726Sphk	if (1) {
882137726Sphk		int secmask = bo->bo_bsize - 1;
883136977Sphk		KASSERT(secmask < PAGE_SIZE && secmask > 0,
884136977Sphk		    ("vnode_pager_generic_getpages: sector size %d too large",
885136977Sphk		    secmask + 1));
88651340Sdillon		size = (size + secmask) & ~secmask;
88751340Sdillon	}
8881549Srgrimes
88942957Sdillon	bp = getpbuf(&vnode_pbuf_freecnt);
8905455Sdg	kva = (vm_offset_t) bp->b_data;
8911887Sdg
8921549Srgrimes	/*
8931549Srgrimes	 * and map the pages to be read into the kva
8941549Srgrimes	 */
8951887Sdg	pmap_qenter(kva, m, count);
8961549Srgrimes
8971549Srgrimes	/* build a minimal buffer header */
89858345Sphk	bp->b_iocmd = BIO_READ;
899119092Sphk	bp->b_iodone = bdone;
90084827Sjhb	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
90184827Sjhb	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
90291406Sjhb	bp->b_rcred = crhold(curthread->td_ucred);
90391406Sjhb	bp->b_wcred = crhold(curthread->td_ucred);
9046626Sdg	bp->b_blkno = firstaddr;
905137726Sphk	pbgetbo(bo, bp);
9061549Srgrimes	bp->b_bcount = size;
9071549Srgrimes	bp->b_bufsize = size;
90870374Sdillon	bp->b_runningbufspace = bp->b_bufsize;
909148875Sssouhlal	atomic_add_int(&runningbufspace, bp->b_runningbufspace);
9101549Srgrimes
911170292Sattilio	PCPU_INC(cnt.v_vnodein);
912170292Sattilio	PCPU_ADD(cnt.v_vnodepgsin, count);
9133612Sdg
9141549Srgrimes	/* do the input */
915121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
916136927Sphk	bstrategy(bp);
9173612Sdg
918119092Sphk	bwait(bp, PVM, "vnread");
9191549Srgrimes
92058934Sphk	if ((bp->b_ioflags & BIO_ERROR) != 0)
9211549Srgrimes		error = EIO;
9221549Srgrimes
9231549Srgrimes	if (!error) {
9241549Srgrimes		if (size != count * PAGE_SIZE)
9251827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
9261549Srgrimes	}
9275455Sdg	pmap_qremove(kva, count);
9281549Srgrimes
9291549Srgrimes	/*
9301549Srgrimes	 * free the buffer header back to the swap buffer pool
9311549Srgrimes	 */
932137726Sphk	pbrelbo(bp);
93342957Sdillon	relpbuf(bp, &vnode_pbuf_freecnt);
9341549Srgrimes
935116512Salc	VM_OBJECT_LOCK(object);
936100736Salc	vm_page_lock_queues();
93734206Sdyson	for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
93834206Sdyson		vm_page_t mt;
93934206Sdyson
94034206Sdyson		nextoff = tfoff + PAGE_SIZE;
94134206Sdyson		mt = m[i];
94234206Sdyson
94347239Sdt		if (nextoff <= object->un_pager.vnp.vnp_size) {
94445347Sjulian			/*
94545347Sjulian			 * Read filled up entire page.
94645347Sjulian			 */
94734206Sdyson			mt->valid = VM_PAGE_BITS_ALL;
94849945Salc			vm_page_undirty(mt);	/* should be an assert? XXX */
94960755Speter			pmap_clear_modify(mt);
95034206Sdyson		} else {
95145347Sjulian			/*
95245347Sjulian			 * Read did not fill up entire page.  Since this
95345347Sjulian			 * is getpages, the page may be mapped, so we have
95445347Sjulian			 * to zero the invalid portions of the page even
95545347Sjulian			 * though we aren't setting them valid.
95645347Sjulian			 *
95745347Sjulian			 * Currently we do not set the entire page valid,
95845347Sjulian			 * we just try to clear the piece that we couldn't
95945347Sjulian			 * read.
96045347Sjulian			 */
96147239Sdt			vm_page_set_validclean(mt, 0,
96247239Sdt			    object->un_pager.vnp.vnp_size - tfoff);
96346349Salc			/* handled by vm_fault now */
96446349Salc			/* vm_page_zero_invalid(mt, FALSE); */
96534206Sdyson		}
96634206Sdyson
9671549Srgrimes		if (i != reqpage) {
9681827Sdg
9691549Srgrimes			/*
9701827Sdg			 * whether or not to leave the page activated is up in
9711827Sdg			 * the air, but we should put the page on a page queue
9721827Sdg			 * somewhere. (it already is in the object). Result:
97358634Scharnier			 * It appears that empirical results show that
9741827Sdg			 * deactivating pages is best.
9751549Srgrimes			 */
9761827Sdg
9771549Srgrimes			/*
9781827Sdg			 * just in case someone was asking for this page we
9791827Sdg			 * now tell them that it is ok to use
9801549Srgrimes			 */
9811549Srgrimes			if (!error) {
982161125Salc				if (mt->oflags & VPO_WANTED)
98334206Sdyson					vm_page_activate(mt);
98433109Sdyson				else
98534206Sdyson					vm_page_deactivate(mt);
98638799Sdfr				vm_page_wakeup(mt);
9871549Srgrimes			} else {
98875692Salfred				vm_page_free(mt);
9891549Srgrimes			}
9901549Srgrimes		}
9911549Srgrimes	}
992100736Salc	vm_page_unlock_queues();
993116512Salc	VM_OBJECT_UNLOCK(object);
9941549Srgrimes	if (error) {
9959507Sdg		printf("vnode_pager_getpages: I/O read error\n");
9961549Srgrimes	}
9974207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
9981549Srgrimes}
9991549Srgrimes
100033847Smsmith/*
100133847Smsmith * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
100233847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
100333847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour.
100433847Smsmith *
100533847Smsmith * All other FS's should use the bypass to get to the local media
100633847Smsmith * backing vp's VOP_PUTPAGES.
100733847Smsmith */
100843129Sdillonstatic void
100910556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
101010556Sdyson	vm_object_t object;
101110556Sdyson	vm_page_t *m;
101210556Sdyson	int count;
101310556Sdyson	boolean_t sync;
101410556Sdyson	int *rtvals;
101510556Sdyson{
101610556Sdyson	int rtval;
101710556Sdyson	struct vnode *vp;
101862976Smckusick	struct mount *mp;
101934403Smsmith	int bytes = count * PAGE_SIZE;
102018973Sdyson
102144321Salc	/*
102244321Salc	 * Force synchronous operation if we are extremely low on memory
102344321Salc	 * to prevent a low-memory deadlock.  VOP operations often need to
102444321Salc	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
102544321Salc	 * operation ).  The swapper handles the case by limiting the amount
102644321Salc	 * of asynchronous I/O, but that sort of solution doesn't scale well
102744321Salc	 * for the vnode pager without a lot of work.
102844321Salc	 *
102944321Salc	 * Also, the backing vnode's iodone routine may not wake the pageout
103044321Salc	 * daemon up.  This should be probably be addressed XXX.
103144321Salc	 */
103244321Salc
1033170170Sattilio	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
103444321Salc		sync |= OBJPC_SYNC;
103544321Salc
103644321Salc	/*
103744321Salc	 * Call device-specific putpages function
103844321Salc	 */
103910556Sdyson	vp = object->handle;
1040121455Salc	VM_OBJECT_UNLOCK(object);
104162976Smckusick	if (vp->v_type != VREG)
104262976Smckusick		mp = NULL;
104334403Smsmith	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
104476827Salfred	KASSERT(rtval != EOPNOTSUPP,
104576827Salfred	    ("vnode_pager: stale FS putpages\n"));
1046121455Salc	VM_OBJECT_LOCK(object);
104710556Sdyson}
104810556Sdyson
104933847Smsmith
10501549Srgrimes/*
105133847Smsmith * This is now called from local media FS's to operate against their
105245057Seivind * own vnodes if they fail to implement VOP_PUTPAGES.
105370374Sdillon *
105470374Sdillon * This is typically called indirectly via the pageout daemon and
105570374Sdillon * clustering has already typically occured, so in general we ask the
105670374Sdillon * underlying filesystem to write the data out asynchronously rather
105770374Sdillon * then delayed.
10581549Srgrimes */
105933847Smsmithint
106034206Sdysonvnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
106133847Smsmith	struct vnode *vp;
10621549Srgrimes	vm_page_t *m;
106333847Smsmith	int bytecount;
106434206Sdyson	int flags;
10655455Sdg	int *rtvals;
10661549Srgrimes{
10677695Sdg	int i;
106833847Smsmith	vm_object_t object;
106933847Smsmith	int count;
10701549Srgrimes
10717695Sdg	int maxsize, ncount;
107212767Sdyson	vm_ooffset_t poffset;
10737695Sdg	struct uio auio;
10747695Sdg	struct iovec aiov;
10757695Sdg	int error;
107634206Sdyson	int ioflags;
1077151951Sps	int ppscheck = 0;
1078151951Sps	static struct timeval lastfail;
1079151951Sps	static int curfail;
10801549Srgrimes
108133847Smsmith	object = vp->v_object;
108233847Smsmith	count = bytecount / PAGE_SIZE;
108333847Smsmith
10841827Sdg	for (i = 0; i < count; i++)
10851549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
10861549Srgrimes
1087138406Salc	if ((int64_t)m[0]->pindex < 0) {
1088119544Smarcel		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n",
1089119544Smarcel			(long)m[0]->pindex, (u_long)m[0]->dirty);
10907695Sdg		rtvals[0] = VM_PAGER_BAD;
10917695Sdg		return VM_PAGER_BAD;
10925455Sdg	}
10937178Sdg
10947695Sdg	maxsize = count * PAGE_SIZE;
10957695Sdg	ncount = count;
10961549Srgrimes
109712767Sdyson	poffset = IDX_TO_OFF(m[0]->pindex);
109884854Sdillon
109984854Sdillon	/*
110084854Sdillon	 * If the page-aligned write is larger then the actual file we
110184854Sdillon	 * have to invalidate pages occuring beyond the file EOF.  However,
110284854Sdillon	 * there is an edge case where a file may not be page-aligned where
110384854Sdillon	 * the last page is partially invalid.  In this case the filesystem
110484854Sdillon	 * may not properly clear the dirty bits for the entire page (which
110584854Sdillon	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
110684854Sdillon	 * With the page locked we are free to fix-up the dirty bits here.
110787834Sdillon	 *
110887834Sdillon	 * We do not under any circumstances truncate the valid bits, as
110987834Sdillon	 * this will screw up bogus page replacement.
111084854Sdillon	 */
111112767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
111284854Sdillon		if (object->un_pager.vnp.vnp_size > poffset) {
111384854Sdillon			int pgoff;
111484854Sdillon
111512767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
111684854Sdillon			ncount = btoc(maxsize);
111784854Sdillon			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
1118119370Salc				vm_page_lock_queues();
111984854Sdillon				vm_page_clear_dirty(m[ncount - 1], pgoff,
112084854Sdillon					PAGE_SIZE - pgoff);
1121119370Salc				vm_page_unlock_queues();
112284854Sdillon			}
112384854Sdillon		} else {
11248585Sdg			maxsize = 0;
112584854Sdillon			ncount = 0;
112684854Sdillon		}
11278585Sdg		if (ncount < count) {
11288585Sdg			for (i = ncount; i < count; i++) {
11297695Sdg				rtvals[i] = VM_PAGER_BAD;
11301549Srgrimes			}
11311549Srgrimes		}
11321541Srgrimes	}
11337695Sdg
113470374Sdillon	/*
113570374Sdillon	 * pageouts are already clustered, use IO_ASYNC t o force a bawrite()
113670374Sdillon	 * rather then a bdwrite() to prevent paging I/O from saturating
1137108358Sdillon	 * the buffer cache.  Dummy-up the sequential heuristic to cause
1138108358Sdillon	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
1139108358Sdillon	 * the system decides how to cluster.
114070374Sdillon	 */
114134206Sdyson	ioflags = IO_VMIO;
1142108358Sdillon	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
1143108358Sdillon		ioflags |= IO_SYNC;
1144108358Sdillon	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
1145108358Sdillon		ioflags |= IO_ASYNC;
114634206Sdyson	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
1147108358Sdillon	ioflags |= IO_SEQMAX << IO_SEQSHIFT;
11481827Sdg
11497695Sdg	aiov.iov_base = (caddr_t) 0;
11507695Sdg	aiov.iov_len = maxsize;
11517695Sdg	auio.uio_iov = &aiov;
11527695Sdg	auio.uio_iovcnt = 1;
115312767Sdyson	auio.uio_offset = poffset;
11547695Sdg	auio.uio_segflg = UIO_NOCOPY;
11557695Sdg	auio.uio_rw = UIO_WRITE;
11567695Sdg	auio.uio_resid = maxsize;
115783366Sjulian	auio.uio_td = (struct thread *) 0;
115891406Sjhb	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
1159170292Sattilio	PCPU_INC(cnt.v_vnodeout);
1160170292Sattilio	PCPU_ADD(cnt.v_vnodepgsout, ncount);
11613612Sdg
11628585Sdg	if (error) {
1163151951Sps		if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1)))
1164151951Sps			printf("vnode_pager_putpages: I/O error %d\n", error);
11657695Sdg	}
11668585Sdg	if (auio.uio_resid) {
1167151951Sps		if (ppscheck || ppsratecheck(&lastfail, &curfail, 1))
1168151951Sps			printf("vnode_pager_putpages: residual I/O %d at %lu\n",
1169151951Sps			    auio.uio_resid, (u_long)m[0]->pindex);
11707695Sdg	}
117133936Sdyson	for (i = 0; i < ncount; i++) {
117233936Sdyson		rtvals[i] = VM_PAGER_OK;
11737695Sdg	}
11747695Sdg	return rtvals[0];
11757695Sdg}
11761549Srgrimes
11777695Sdgstruct vnode *
1178120183Salcvnode_pager_lock(vm_object_t first_object)
11799507Sdg{
1180120183Salc	struct vnode *vp;
1181120183Salc	vm_object_t backing_object, object;
118222521Sdyson
1183120183Salc	VM_OBJECT_LOCK_ASSERT(first_object, MA_OWNED);
1184120183Salc	for (object = first_object; object != NULL; object = backing_object) {
1185120183Salc		if (object->type != OBJT_VNODE) {
1186120183Salc			if ((backing_object = object->backing_object) != NULL)
1187120183Salc				VM_OBJECT_LOCK(backing_object);
1188120183Salc			if (object != first_object)
1189120183Salc				VM_OBJECT_UNLOCK(object);
11907695Sdg			continue;
1191120183Salc		}
1192120183Salc	retry:
119377094Sjhb		if (object->flags & OBJ_DEAD) {
1194120183Salc			if (object != first_object)
1195120183Salc				VM_OBJECT_UNLOCK(object);
119632585Sdyson			return NULL;
119777094Sjhb		}
1198120183Salc		vp = object->handle;
1199120183Salc		VI_LOCK(vp);
1200120183Salc		VM_OBJECT_UNLOCK(object);
1201120183Salc		if (first_object != object)
1202120183Salc			VM_OBJECT_UNLOCK(first_object);
1203145826Sjeff		VFS_ASSERT_GIANT(vp->v_mount);
1204144367Sjeff		if (vget(vp, LK_CANRECURSE | LK_INTERLOCK |
1205120183Salc		    LK_RETRY | LK_SHARED, curthread)) {
1206120183Salc			VM_OBJECT_LOCK(first_object);
1207120183Salc			if (object != first_object)
1208120183Salc				VM_OBJECT_LOCK(object);
1209120183Salc			if (object->type != OBJT_VNODE) {
1210120183Salc				if (object != first_object)
1211120183Salc					VM_OBJECT_UNLOCK(object);
121234611Sdyson				return NULL;
1213120183Salc			}
121432585Sdyson			printf("vnode_pager_lock: retrying\n");
1215120183Salc			goto retry;
121632585Sdyson		}
1217120183Salc		VM_OBJECT_LOCK(first_object);
1218120183Salc		return (vp);
12191549Srgrimes	}
12209507Sdg	return NULL;
12217695Sdg}
1222