1139825Simp/*-
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
2158705Scharnier *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes */
461541Srgrimes
471549Srgrimes/*
481549Srgrimes * TODO:
499507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
507695Sdg *	greatly re-simplify the vnode_pager.
511549Srgrimes */
521549Srgrimes
53116226Sobrien#include <sys/cdefs.h>
54116226Sobrien__FBSDID("$FreeBSD$");
55116226Sobrien
561541Srgrimes#include <sys/param.h>
571541Srgrimes#include <sys/systm.h>
581541Srgrimes#include <sys/proc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/mount.h>
6160041Sphk#include <sys/bio.h>
629507Sdg#include <sys/buf.h>
6312662Sdg#include <sys/vmmeter.h>
64140767Sphk#include <sys/limits.h>
6551340Sdillon#include <sys/conf.h>
66248084Sattilio#include <sys/rwlock.h>
67127926Salc#include <sys/sf_buf.h>
681541Srgrimes
69148875Sssouhlal#include <machine/atomic.h>
70148875Sssouhlal
711541Srgrimes#include <vm/vm.h>
72239065Skib#include <vm/vm_param.h>
7312662Sdg#include <vm/vm_object.h>
741541Srgrimes#include <vm/vm_page.h>
759507Sdg#include <vm/vm_pager.h>
7631853Sdyson#include <vm/vm_map.h>
771541Srgrimes#include <vm/vnode_pager.h>
7812662Sdg#include <vm/vm_extern.h>
791541Srgrimes
80163359Salcstatic int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
81163359Salc    daddr_t *rtaddress, int *run);
8292727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
8392727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m);
8492727Salfredstatic void vnode_pager_dealloc(vm_object_t);
8592727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
8692727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
8792727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
88194766Skibstatic vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
89194766Skib    vm_ooffset_t, struct ucred *cred);
9011943Sbde
911541Srgrimesstruct pagerops vnodepagerops = {
92118466Sphk	.pgo_alloc =	vnode_pager_alloc,
93118466Sphk	.pgo_dealloc =	vnode_pager_dealloc,
94118466Sphk	.pgo_getpages =	vnode_pager_getpages,
95118466Sphk	.pgo_putpages =	vnode_pager_putpages,
96118466Sphk	.pgo_haspage =	vnode_pager_haspage,
971541Srgrimes};
981541Srgrimes
9979127Sjhbint vnode_pbuf_freecnt;
10010556Sdyson
101140767Sphk/* Create the VM system backing object for this vnode */
102140767Sphkint
103155177Syarvnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
104140767Sphk{
105140767Sphk	vm_object_t object;
106140767Sphk	vm_ooffset_t size = isize;
107140767Sphk	struct vattr va;
108140767Sphk
109140767Sphk	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
110140767Sphk		return (0);
111140767Sphk
112140767Sphk	while ((object = vp->v_object) != NULL) {
113248084Sattilio		VM_OBJECT_WLOCK(object);
114140767Sphk		if (!(object->flags & OBJ_DEAD)) {
115248084Sattilio			VM_OBJECT_WUNLOCK(object);
116140767Sphk			return (0);
117140767Sphk		}
118175294Sattilio		VOP_UNLOCK(vp, 0);
119140767Sphk		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
120247346Sattilio		VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead", 0);
121175202Sattilio		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
122140767Sphk	}
123140767Sphk
124140767Sphk	if (size == 0) {
125140767Sphk		if (vn_isdisk(vp, NULL)) {
126140767Sphk			size = IDX_TO_OFF(INT_MAX);
127140767Sphk		} else {
128182371Sattilio			if (VOP_GETATTR(vp, &va, td->td_ucred))
129140767Sphk				return (0);
130140767Sphk			size = va.va_size;
131140767Sphk		}
132140767Sphk	}
133140767Sphk
134194766Skib	object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred);
135140767Sphk	/*
136140767Sphk	 * Dereference the reference we just created.  This assumes
137140767Sphk	 * that the object is associated with the vp.
138140767Sphk	 */
139248084Sattilio	VM_OBJECT_WLOCK(object);
140140767Sphk	object->ref_count--;
141248084Sattilio	VM_OBJECT_WUNLOCK(object);
142140767Sphk	vrele(vp);
143140767Sphk
144140767Sphk	KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object"));
145140767Sphk
146140767Sphk	return (0);
147140767Sphk}
148140767Sphk
149140929Sphkvoid
150140929Sphkvnode_destroy_vobject(struct vnode *vp)
151140929Sphk{
152140929Sphk	struct vm_object *obj;
153140929Sphk
154140929Sphk	obj = vp->v_object;
155140929Sphk	if (obj == NULL)
156140929Sphk		return;
157171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject");
158248084Sattilio	VM_OBJECT_WLOCK(obj);
159140929Sphk	if (obj->ref_count == 0) {
160140929Sphk		/*
161140929Sphk		 * don't double-terminate the object
162140929Sphk		 */
163140929Sphk		if ((obj->flags & OBJ_DEAD) == 0)
164140929Sphk			vm_object_terminate(obj);
165140929Sphk		else
166248084Sattilio			VM_OBJECT_WUNLOCK(obj);
167140929Sphk	} else {
168140929Sphk		/*
169140929Sphk		 * Woe to the process that tries to page now :-).
170140929Sphk		 */
171140929Sphk		vm_pager_deallocate(obj);
172248084Sattilio		VM_OBJECT_WUNLOCK(obj);
173140929Sphk	}
174144610Sjeff	vp->v_object = NULL;
175140929Sphk}
176140929Sphk
177140929Sphk
1781541Srgrimes/*
1791541Srgrimes * Allocate (or lookup) pager for a vnode.
1801541Srgrimes * Handle is a vnode pointer.
18198604Salc *
18298604Salc * MPSAFE
1831541Srgrimes */
1849507Sdgvm_object_t
18540286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
186194766Skib    vm_ooffset_t offset, struct ucred *cred)
1871541Srgrimes{
1889456Sdg	vm_object_t object;
1891541Srgrimes	struct vnode *vp;
1901541Srgrimes
1911541Srgrimes	/*
1921541Srgrimes	 * Pageout to vnode, no can do yet.
1931541Srgrimes	 */
1941541Srgrimes	if (handle == NULL)
1951827Sdg		return (NULL);
1961541Srgrimes
1979411Sdg	vp = (struct vnode *) handle;
1989411Sdg
1991541Srgrimes	/*
2009411Sdg	 * If the object is being terminated, wait for it to
2019411Sdg	 * go away.
2029411Sdg	 */
203179159Supsretry:
204114074Salc	while ((object = vp->v_object) != NULL) {
205248084Sattilio		VM_OBJECT_WLOCK(object);
206181020Sjhb		if ((object->flags & OBJ_DEAD) == 0)
207114074Salc			break;
208137297Salc		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
209247346Sattilio		VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vadead", 0);
2109507Sdg	}
2115455Sdg
212250026Skib	KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference"));
21332071Sdyson
2149507Sdg	if (object == NULL) {
2151541Srgrimes		/*
216179159Sups		 * Add an object of the appropriate size
2171541Srgrimes		 */
21840286Sdg		object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
2191827Sdg
22040286Sdg		object->un_pager.vnp.vnp_size = size;
221232071Skib		object->un_pager.vnp.writemappings = 0;
2221549Srgrimes
2239507Sdg		object->handle = handle;
224179765Sups		VI_LOCK(vp);
225179765Sups		if (vp->v_object != NULL) {
226179159Sups			/*
227179159Sups			 * Object has been created while we were sleeping
228179159Sups			 */
229179765Sups			VI_UNLOCK(vp);
230179159Sups			vm_object_destroy(object);
231179159Sups			goto retry;
232179159Sups		}
2339507Sdg		vp->v_object = object;
234179765Sups		VI_UNLOCK(vp);
235179765Sups	} else {
23632286Sdyson		object->ref_count++;
237248084Sattilio		VM_OBJECT_WUNLOCK(object);
238179765Sups	}
239143559Sjeff	vref(vp);
2409507Sdg	return (object);
2411541Srgrimes}
2421541Srgrimes
243114774Salc/*
244114774Salc *	The object must be locked.
245114774Salc */
24612820Sphkstatic void
2479507Sdgvnode_pager_dealloc(object)
2489507Sdg	vm_object_t object;
2491541Srgrimes{
250202529Skib	struct vnode *vp;
251202529Skib	int refs;
2521541Srgrimes
253202529Skib	vp = object->handle;
2549507Sdg	if (vp == NULL)
2559507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
2569507Sdg
257248084Sattilio	VM_OBJECT_ASSERT_WLOCKED(object);
25833817Sdyson	vm_object_pip_wait(object, "vnpdea");
259202529Skib	refs = object->ref_count;
2601541Srgrimes
2619507Sdg	object->handle = NULL;
26233109Sdyson	object->type = OBJT_DEAD;
263137297Salc	if (object->flags & OBJ_DISCONNECTWNT) {
264137297Salc		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
265137297Salc		wakeup(object);
266137297Salc	}
267171599Spjd	ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
268232071Skib	if (object->un_pager.vnp.writemappings > 0) {
269232071Skib		object->un_pager.vnp.writemappings = 0;
270242476Skib		VOP_ADD_WRITECOUNT(vp, -1);
271232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
272232701Sjhb		    __func__, vp, vp->v_writecount);
273232071Skib	}
2749507Sdg	vp->v_object = NULL;
275241025Skib	VOP_UNSET_TEXT(vp);
276248084Sattilio	VM_OBJECT_WUNLOCK(object);
277202529Skib	while (refs-- > 0)
278202529Skib		vunref(vp);
279248084Sattilio	VM_OBJECT_WLOCK(object);
2801549Srgrimes}
2811541Srgrimes
28212820Sphkstatic boolean_t
28312767Sdysonvnode_pager_haspage(object, pindex, before, after)
2849507Sdg	vm_object_t object;
28512767Sdyson	vm_pindex_t pindex;
2869507Sdg	int *before;
2879507Sdg	int *after;
2881541Srgrimes{
2899507Sdg	struct vnode *vp = object->handle;
29096572Sphk	daddr_t bn;
29112423Sphk	int err;
29210556Sdyson	daddr_t reqblock;
29311701Sdyson	int poff;
29411701Sdyson	int bsize;
29512914Sdyson	int pagesperblock, blocksperpage;
2961541Srgrimes
297248084Sattilio	VM_OBJECT_ASSERT_WLOCKED(object);
29851340Sdillon	/*
29951340Sdillon	 * If no vp or vp is doomed or marked transparent to VM, we do not
30051340Sdillon	 * have the page.
30151340Sdillon	 */
302155384Sjeff	if (vp == NULL || vp->v_iflag & VI_DOOMED)
30332585Sdyson		return FALSE;
3041541Srgrimes	/*
305155384Sjeff	 * If the offset is beyond end of file we do
3065455Sdg	 * not have the page.
3071541Srgrimes	 */
308155384Sjeff	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
3094797Sdg		return FALSE;
3101541Srgrimes
31111576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
31210556Sdyson	pagesperblock = bsize / PAGE_SIZE;
31312914Sdyson	blocksperpage = 0;
31412914Sdyson	if (pagesperblock > 0) {
31512914Sdyson		reqblock = pindex / pagesperblock;
31612914Sdyson	} else {
31712914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
31812914Sdyson		reqblock = pindex * blocksperpage;
31912914Sdyson	}
320248084Sattilio	VM_OBJECT_WUNLOCK(object);
321119045Sphk	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
322248084Sattilio	VM_OBJECT_WLOCK(object);
3238876Srgrimes	if (err)
3249507Sdg		return TRUE;
32592029Seivind	if (bn == -1)
32610576Sdyson		return FALSE;
32712914Sdyson	if (pagesperblock > 0) {
32812914Sdyson		poff = pindex - (reqblock * pagesperblock);
32912914Sdyson		if (before) {
33012914Sdyson			*before *= pagesperblock;
33112914Sdyson			*before += poff;
33210669Sdyson		}
33312914Sdyson		if (after) {
33412914Sdyson			int numafter;
33512914Sdyson			*after *= pagesperblock;
33612914Sdyson			numafter = pagesperblock - (poff + 1);
33799211Srobert			if (IDX_TO_OFF(pindex + numafter) >
33899211Srobert			    object->un_pager.vnp.vnp_size) {
33999211Srobert				numafter =
34099211Srobert		    		    OFF_TO_IDX(object->un_pager.vnp.vnp_size) -
34199211Srobert				    pindex;
34212914Sdyson			}
34312914Sdyson			*after += numafter;
34412914Sdyson		}
34512914Sdyson	} else {
34612914Sdyson		if (before) {
34712914Sdyson			*before /= blocksperpage;
34812914Sdyson		}
34912914Sdyson
35012914Sdyson		if (after) {
35112914Sdyson			*after /= blocksperpage;
35212914Sdyson		}
35310556Sdyson	}
35410576Sdyson	return TRUE;
3551541Srgrimes}
3561541Srgrimes
3571541Srgrimes/*
3581541Srgrimes * Lets the VM system know about a change in size for a file.
3599507Sdg * We adjust our own internal size and flush any cached pages in
3601541Srgrimes * the associated object that are affected by the size change.
3611541Srgrimes *
3621541Srgrimes * Note: this routine may be invoked as a result of a pager put
3631541Srgrimes * operation (possibly at object termination time), so we must be careful.
3641541Srgrimes */
3651541Srgrimesvoid
3661541Srgrimesvnode_pager_setsize(vp, nsize)
3671541Srgrimes	struct vnode *vp;
36812767Sdyson	vm_ooffset_t nsize;
3691541Srgrimes{
370116167Salc	vm_object_t object;
371116167Salc	vm_page_t m;
37238542Sluoqi	vm_pindex_t nobjsize;
3731541Srgrimes
374116167Salc	if ((object = vp->v_object) == NULL)
3751541Srgrimes		return;
376188386Skib/* 	ASSERT_VOP_ELOCKED(vp, "vnode_pager_setsize and not locked vnode"); */
377248084Sattilio	VM_OBJECT_WLOCK(object);
378250028Skib	if (object->type == OBJT_DEAD) {
379250028Skib		VM_OBJECT_WUNLOCK(object);
380250028Skib		return;
381250028Skib	}
382250028Skib	KASSERT(object->type == OBJT_VNODE,
383250028Skib	    ("not vnode-backed object %p", object));
384116167Salc	if (nsize == object->un_pager.vnp.vnp_size) {
385116167Salc		/*
386116167Salc		 * Hasn't changed size
387116167Salc		 */
388248084Sattilio		VM_OBJECT_WUNLOCK(object);
3893374Sdg		return;
390116167Salc	}
39138542Sluoqi	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
3929507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
393116167Salc		/*
394116167Salc		 * File has shrunk. Toss any cached pages beyond the new EOF.
395116167Salc		 */
396116167Salc		if (nobjsize < object->size)
39738542Sluoqi			vm_object_page_remove(object, nobjsize, object->size,
398223677Salc			    0);
3991827Sdg		/*
4001827Sdg		 * this gets rid of garbage at the end of a page that is now
40187834Sdillon		 * only partially backed by the vnode.
40287834Sdillon		 *
40387834Sdillon		 * XXX for some reason (I don't know yet), if we take a
40487834Sdillon		 * completely invalid page and mark it partially valid
40587834Sdillon		 * it can screw up NFS reads, so we don't allow the case.
4061827Sdg		 */
407116167Salc		if ((nsize & PAGE_MASK) &&
408121230Salc		    (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL &&
409121230Salc		    m->valid != 0) {
410121230Salc			int base = (int)nsize & PAGE_MASK;
411121230Salc			int size = PAGE_SIZE - base;
41270374Sdillon
413121230Salc			/*
414121230Salc			 * Clear out partial-page garbage in case
415121230Salc			 * the page has been mapped.
416121230Salc			 */
417121230Salc			pmap_zero_page_area(m, base, size);
41870374Sdillon
419121230Salc			/*
420193303Salc			 * Update the valid bits to reflect the blocks that
421193303Salc			 * have been zeroed.  Some of these valid bits may
422193303Salc			 * have already been set.
423193303Salc			 */
424228156Skib			vm_page_set_valid_range(m, base, size);
425193303Salc
426193303Salc			/*
427193303Salc			 * Round "base" to the next block boundary so that the
428193303Salc			 * dirty bit for a partially zeroed block is not
429193303Salc			 * cleared.
430193303Salc			 */
431193303Salc			base = roundup2(base, DEV_BSIZE);
432193303Salc
433193303Salc			/*
434193303Salc			 * Clear out partial-page dirty bits.
435121230Salc			 *
436121230Salc			 * note that we do not clear out the valid
437121230Salc			 * bits.  This would prevent bogus_page
438121230Salc			 * replacement from working properly.
439121230Salc			 */
440193303Salc			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
441172875Salc		} else if ((nsize & PAGE_MASK) &&
442237172Sattilio		    vm_page_is_cached(object, OFF_TO_IDX(nsize))) {
443172875Salc			vm_page_cache_free(object, OFF_TO_IDX(nsize),
444172875Salc			    nobjsize);
4451827Sdg		}
4461541Srgrimes	}
44712767Sdyson	object->un_pager.vnp.vnp_size = nsize;
44838542Sluoqi	object->size = nobjsize;
449248084Sattilio	VM_OBJECT_WUNLOCK(object);
4501541Srgrimes}
4511541Srgrimes
4521549Srgrimes/*
4531549Srgrimes * calculate the linear (byte) disk address of specified virtual
4541549Srgrimes * file address
4551549Srgrimes */
456163359Salcstatic int
457163359Salcvnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress,
458163359Salc    int *run)
4591549Srgrimes{
4605455Sdg	int bsize;
4615455Sdg	int err;
46212767Sdyson	daddr_t vblock;
463146340Sbz	daddr_t voffset;
4641549Srgrimes
465138531Salc	if (address < 0)
4665455Sdg		return -1;
4675455Sdg
468155384Sjeff	if (vp->v_iflag & VI_DOOMED)
46911701Sdyson		return -1;
47011701Sdyson
4711549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4721549Srgrimes	vblock = address / bsize;
4731549Srgrimes	voffset = address % bsize;
4741549Srgrimes
475163359Salc	err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL);
476163359Salc	if (err == 0) {
477163359Salc		if (*rtaddress != -1)
478163359Salc			*rtaddress += voffset / DEV_BSIZE;
47992029Seivind		if (run) {
4806151Sdg			*run += 1;
4816151Sdg			*run *= bsize/PAGE_SIZE;
4826151Sdg			*run -= voffset/PAGE_SIZE;
4836151Sdg		}
4846151Sdg	}
4851549Srgrimes
486163359Salc	return (err);
4871549Srgrimes}
4881549Srgrimes
4891549Srgrimes/*
49096755Strhodes * small block filesystem vnode pager input
4911549Srgrimes */
49212820Sphkstatic int
4939507Sdgvnode_pager_input_smlfs(object, m)
4949507Sdg	vm_object_t object;
4951549Srgrimes	vm_page_t m;
4961549Srgrimes{
497137726Sphk	struct vnode *vp;
498137726Sphk	struct bufobj *bo;
4991549Srgrimes	struct buf *bp;
500127926Salc	struct sf_buf *sf;
501146340Sbz	daddr_t fileaddr;
5021549Srgrimes	vm_offset_t bsize;
503227102Skib	vm_page_bits_t bits;
504227102Skib	int error, i;
5051549Srgrimes
506227102Skib	error = 0;
5079507Sdg	vp = object->handle;
508155384Sjeff	if (vp->v_iflag & VI_DOOMED)
50911701Sdyson		return VM_PAGER_BAD;
51011701Sdyson
5111549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5121549Srgrimes
513137726Sphk	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);
5141549Srgrimes
515127926Salc	sf = sf_buf_alloc(m, 0);
5161549Srgrimes
5171827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
51886092Sdillon		vm_ooffset_t address;
5191827Sdg
520191935Salc		bits = vm_page_bits(i * bsize, bsize);
521191935Salc		if (m->valid & bits)
5225455Sdg			continue;
5231549Srgrimes
52486092Sdillon		address = IDX_TO_OFF(m->pindex) + i * bsize;
52586092Sdillon		if (address >= object->un_pager.vnp.vnp_size) {
52686092Sdillon			fileaddr = -1;
52786092Sdillon		} else {
528163359Salc			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
529163359Salc			if (error)
530163359Salc				break;
53186092Sdillon		}
5321827Sdg		if (fileaddr != -1) {
53342957Sdillon			bp = getpbuf(&vnode_pbuf_freecnt);
5341549Srgrimes
5351827Sdg			/* build a minimal buffer header */
53658345Sphk			bp->b_iocmd = BIO_READ;
537119092Sphk			bp->b_iodone = bdone;
53884827Sjhb			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
53984827Sjhb			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
54091406Sjhb			bp->b_rcred = crhold(curthread->td_ucred);
54191406Sjhb			bp->b_wcred = crhold(curthread->td_ucred);
542127926Salc			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
5436626Sdg			bp->b_blkno = fileaddr;
544137726Sphk			pbgetbo(bo, bp);
545233627Smckusick			bp->b_vp = vp;
5461549Srgrimes			bp->b_bcount = bsize;
5471549Srgrimes			bp->b_bufsize = bsize;
54870374Sdillon			bp->b_runningbufspace = bp->b_bufsize;
549189595Sjhb			atomic_add_long(&runningbufspace, bp->b_runningbufspace);
5501827Sdg
5511827Sdg			/* do the input */
552121205Sphk			bp->b_iooffset = dbtob(bp->b_blkno);
553136927Sphk			bstrategy(bp);
5541549Srgrimes
555119092Sphk			bwait(bp, PVM, "vnsrd");
556119092Sphk
55758934Sphk			if ((bp->b_ioflags & BIO_ERROR) != 0)
5581549Srgrimes				error = EIO;
5591549Srgrimes
5601827Sdg			/*
5611827Sdg			 * free the buffer header back to the swap buffer pool
5621827Sdg			 */
563233627Smckusick			bp->b_vp = NULL;
564137726Sphk			pbrelbo(bp);
56542957Sdillon			relpbuf(bp, &vnode_pbuf_freecnt);
5661827Sdg			if (error)
5671549Srgrimes				break;
568191935Salc		} else
569127926Salc			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
570191935Salc		KASSERT((m->dirty & bits) == 0,
571191935Salc		    ("vnode_pager_input_smlfs: page %p is dirty", m));
572248084Sattilio		VM_OBJECT_WLOCK(object);
573191935Salc		m->valid |= bits;
574248084Sattilio		VM_OBJECT_WUNLOCK(object);
5751549Srgrimes	}
576127926Salc	sf_buf_free(sf);
5771827Sdg	if (error) {
5784207Sdg		return VM_PAGER_ERROR;
5791549Srgrimes	}
5801549Srgrimes	return VM_PAGER_OK;
5811549Srgrimes}
5821549Srgrimes
5831549Srgrimes/*
584139296Sphk * old style vnode pager input routine
5851549Srgrimes */
58612820Sphkstatic int
5879507Sdgvnode_pager_input_old(object, m)
5889507Sdg	vm_object_t object;
5891549Srgrimes	vm_page_t m;
5901549Srgrimes{
5911541Srgrimes	struct uio auio;
5921541Srgrimes	struct iovec aiov;
5935455Sdg	int error;
5945455Sdg	int size;
595127926Salc	struct sf_buf *sf;
59677398Sjhb	struct vnode *vp;
5971549Srgrimes
598248084Sattilio	VM_OBJECT_ASSERT_WLOCKED(object);
5991549Srgrimes	error = 0;
6001827Sdg
6011549Srgrimes	/*
6021549Srgrimes	 * Return failure if beyond current EOF
6031549Srgrimes	 */
60412767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
6051549Srgrimes		return VM_PAGER_BAD;
6061549Srgrimes	} else {
6071549Srgrimes		size = PAGE_SIZE;
60812767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
60912767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
610121495Salc		vp = object->handle;
611248084Sattilio		VM_OBJECT_WUNLOCK(object);
6127178Sdg
6135455Sdg		/*
6145455Sdg		 * Allocate a kernel virtual address and initialize so that
6155455Sdg		 * we can use VOP_READ/WRITE routines.
6165455Sdg		 */
617127926Salc		sf = sf_buf_alloc(m, 0);
6187178Sdg
619127926Salc		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
6201549Srgrimes		aiov.iov_len = size;
6211549Srgrimes		auio.uio_iov = &aiov;
6221549Srgrimes		auio.uio_iovcnt = 1;
62312767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
6241549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
6251549Srgrimes		auio.uio_rw = UIO_READ;
6261549Srgrimes		auio.uio_resid = size;
62783366Sjulian		auio.uio_td = curthread;
6281549Srgrimes
62991406Sjhb		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
6301549Srgrimes		if (!error) {
63179242Sdillon			int count = size - auio.uio_resid;
6321549Srgrimes
6331549Srgrimes			if (count == 0)
6341549Srgrimes				error = EINVAL;
6351549Srgrimes			else if (count != PAGE_SIZE)
636127926Salc				bzero((caddr_t)sf_buf_kva(sf) + count,
637127926Salc				    PAGE_SIZE - count);
6381549Srgrimes		}
639127926Salc		sf_buf_free(sf);
640121230Salc
641248084Sattilio		VM_OBJECT_WLOCK(object);
6421549Srgrimes	}
643191935Salc	KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m));
64439739Srvb	if (!error)
64539739Srvb		m->valid = VM_PAGE_BITS_ALL;
6464207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6471549Srgrimes}
6481549Srgrimes
6491549Srgrimes/*
6501549Srgrimes * generic vnode pager input routine
6511549Srgrimes */
65210556Sdyson
65333847Smsmith/*
65476827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES
65599211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages()
65699211Srobert * to implement the previous behaviour.
65733847Smsmith *
65833847Smsmith * All other FS's should use the bypass to get to the local media
65933847Smsmith * backing vp's VOP_GETPAGES.
66033847Smsmith */
66112820Sphkstatic int
6629507Sdgvnode_pager_getpages(object, m, count, reqpage)
6639507Sdg	vm_object_t object;
6641549Srgrimes	vm_page_t *m;
6659507Sdg	int count;
6669507Sdg	int reqpage;
6671549Srgrimes{
66810556Sdyson	int rtval;
66910556Sdyson	struct vnode *vp;
67034403Smsmith	int bytes = count * PAGE_SIZE;
67132286Sdyson
67210556Sdyson	vp = object->handle;
673248084Sattilio	VM_OBJECT_WUNLOCK(object);
67434403Smsmith	rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
67576827Salfred	KASSERT(rtval != EOPNOTSUPP,
67676827Salfred	    ("vnode_pager: FS getpages not implemented\n"));
677248084Sattilio	VM_OBJECT_WLOCK(object);
67833847Smsmith	return rtval;
67910556Sdyson}
68010556Sdyson
68133847Smsmith/*
68233847Smsmith * This is now called from local media FS's to operate against their
68333847Smsmith * own vnodes if they fail to implement VOP_GETPAGES.
68433847Smsmith */
68533847Smsmithint
68633847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage)
68733847Smsmith	struct vnode *vp;
68810556Sdyson	vm_page_t *m;
68933847Smsmith	int bytecount;
69010556Sdyson	int reqpage;
69110556Sdyson{
69233847Smsmith	vm_object_t object;
69312767Sdyson	vm_offset_t kva;
69434206Sdyson	off_t foff, tfoff, nextoff;
695146340Sbz	int i, j, size, bsize, first;
696163140Salc	daddr_t firstaddr, reqblock;
697137726Sphk	struct bufobj *bo;
6986151Sdg	int runpg;
6996151Sdg	int runend;
7007178Sdg	struct buf *bp;
701248512Skib	struct mount *mp;
70233847Smsmith	int count;
703163210Salc	int error;
7041549Srgrimes
70533847Smsmith	object = vp->v_object;
70633847Smsmith	count = bytecount / PAGE_SIZE;
70733847Smsmith
708137726Sphk	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
709137726Sphk	    ("vnode_pager_generic_getpages does not support devices"));
710155384Sjeff	if (vp->v_iflag & VI_DOOMED)
71111701Sdyson		return VM_PAGER_BAD;
71211701Sdyson
7131549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7141549Srgrimes
7151549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7161827Sdg
7171549Srgrimes	/*
7181827Sdg	 * originally, we did not check for an error return value -- assuming
7191827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7201549Srgrimes	 */
72112767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
7221827Sdg
7231549Srgrimes	/*
7241887Sdg	 * if we can't bmap, use old VOP code
7251549Srgrimes	 */
726163210Salc	error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL);
727163210Salc	if (error == EOPNOTSUPP) {
728248084Sattilio		VM_OBJECT_WLOCK(object);
729207410Skmacy
730100832Salc		for (i = 0; i < count; i++)
731207410Skmacy			if (i != reqpage) {
732207410Skmacy				vm_page_lock(m[i]);
73375692Salfred				vm_page_free(m[i]);
734207410Skmacy				vm_page_unlock(m[i]);
735207410Skmacy			}
736170292Sattilio		PCPU_INC(cnt.v_vnodein);
737170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
738121495Salc		error = vnode_pager_input_old(object, m[reqpage]);
739248084Sattilio		VM_OBJECT_WUNLOCK(object);
740121495Salc		return (error);
741163210Salc	} else if (error != 0) {
742248084Sattilio		VM_OBJECT_WLOCK(object);
743163210Salc		for (i = 0; i < count; i++)
744207410Skmacy			if (i != reqpage) {
745207410Skmacy				vm_page_lock(m[i]);
746163210Salc				vm_page_free(m[i]);
747207410Skmacy				vm_page_unlock(m[i]);
748207410Skmacy			}
749248084Sattilio		VM_OBJECT_WUNLOCK(object);
750163210Salc		return (VM_PAGER_ERROR);
7511549Srgrimes
7521827Sdg		/*
7531827Sdg		 * if the blocksize is smaller than a page size, then use
7541827Sdg		 * special small filesystem code.  NFS sometimes has a small
7551827Sdg		 * blocksize, but it can handle large reads itself.
7561827Sdg		 */
7571827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
75838866Sbde	    (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
759248084Sattilio		VM_OBJECT_WLOCK(object);
760100832Salc		for (i = 0; i < count; i++)
761207410Skmacy			if (i != reqpage) {
762207410Skmacy				vm_page_lock(m[i]);
76375692Salfred				vm_page_free(m[i]);
764207410Skmacy				vm_page_unlock(m[i]);
765207410Skmacy			}
766248084Sattilio		VM_OBJECT_WUNLOCK(object);
767170292Sattilio		PCPU_INC(cnt.v_vnodein);
768170292Sattilio		PCPU_INC(cnt.v_vnodepgsin);
7699507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
7701549Srgrimes	}
77145347Sjulian
7721549Srgrimes	/*
77345347Sjulian	 * If we have a completely valid page available to us, we can
77445347Sjulian	 * clean up and return.  Otherwise we have to re-read the
77545347Sjulian	 * media.
7761549Srgrimes	 */
777248084Sattilio	VM_OBJECT_WLOCK(object);
77845347Sjulian	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
779100832Salc		for (i = 0; i < count; i++)
780207410Skmacy			if (i != reqpage) {
781207410Skmacy				vm_page_lock(m[i]);
78275692Salfred				vm_page_free(m[i]);
783207410Skmacy				vm_page_unlock(m[i]);
784207410Skmacy			}
785248084Sattilio		VM_OBJECT_WUNLOCK(object);
7865455Sdg		return VM_PAGER_OK;
787163140Salc	} else if (reqblock == -1) {
788163140Salc		pmap_zero_page(m[reqpage]);
789192010Salc		KASSERT(m[reqpage]->dirty == 0,
790192010Salc		    ("vnode_pager_generic_getpages: page %p is dirty", m));
791163140Salc		m[reqpage]->valid = VM_PAGE_BITS_ALL;
792163140Salc		for (i = 0; i < count; i++)
793207410Skmacy			if (i != reqpage) {
794207410Skmacy				vm_page_lock(m[i]);
795163140Salc				vm_page_free(m[i]);
796207410Skmacy				vm_page_unlock(m[i]);
797207410Skmacy			}
798248084Sattilio		VM_OBJECT_WUNLOCK(object);
799163140Salc		return (VM_PAGER_OK);
8001549Srgrimes	}
80145347Sjulian	m[reqpage]->valid = 0;
802248084Sattilio	VM_OBJECT_WUNLOCK(object);
8037178Sdg
8045455Sdg	/*
8055455Sdg	 * here on direct device I/O
8065455Sdg	 */
80792029Seivind	firstaddr = -1;
8081549Srgrimes
8091549Srgrimes	/*
8106151Sdg	 * calculate the run that includes the required page
8111549Srgrimes	 */
81292029Seivind	for (first = 0, i = 0; i < count; i = runend) {
813163359Salc		if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr,
814163359Salc		    &runpg) != 0) {
815248084Sattilio			VM_OBJECT_WLOCK(object);
816163359Salc			for (; i < count; i++)
817207410Skmacy				if (i != reqpage) {
818207410Skmacy					vm_page_lock(m[i]);
819163359Salc					vm_page_free(m[i]);
820207410Skmacy					vm_page_unlock(m[i]);
821207410Skmacy				}
822248084Sattilio			VM_OBJECT_WUNLOCK(object);
823163359Salc			return (VM_PAGER_ERROR);
824163359Salc		}
8256151Sdg		if (firstaddr == -1) {
826248084Sattilio			VM_OBJECT_WLOCK(object);
8279507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
828146340Sbz				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %jd, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx",
829146340Sbz				    (intmax_t)firstaddr, (uintmax_t)(foff >> 32),
830106603Smux				    (uintmax_t)foff,
831106603Smux				    (uintmax_t)
832106603Smux				    (object->un_pager.vnp.vnp_size >> 32),
833106603Smux				    (uintmax_t)object->un_pager.vnp.vnp_size);
8346151Sdg			}
835207410Skmacy			vm_page_lock(m[i]);
83675692Salfred			vm_page_free(m[i]);
837207410Skmacy			vm_page_unlock(m[i]);
838248084Sattilio			VM_OBJECT_WUNLOCK(object);
8396151Sdg			runend = i + 1;
8406151Sdg			first = runend;
8416151Sdg			continue;
8421549Srgrimes		}
8436151Sdg		runend = i + runpg;
8449507Sdg		if (runend <= reqpage) {
845248084Sattilio			VM_OBJECT_WLOCK(object);
846207410Skmacy			for (j = i; j < runend; j++) {
847207410Skmacy				vm_page_lock(m[j]);
84875692Salfred				vm_page_free(m[j]);
849207410Skmacy				vm_page_unlock(m[j]);
850207410Skmacy			}
851248084Sattilio			VM_OBJECT_WUNLOCK(object);
8521549Srgrimes		} else {
8539507Sdg			if (runpg < (count - first)) {
854248084Sattilio				VM_OBJECT_WLOCK(object);
855207410Skmacy				for (i = first + runpg; i < count; i++) {
856207410Skmacy					vm_page_lock(m[i]);
85775692Salfred					vm_page_free(m[i]);
858207410Skmacy					vm_page_unlock(m[i]);
859207410Skmacy				}
860248084Sattilio				VM_OBJECT_WUNLOCK(object);
8616151Sdg				count = first + runpg;
8626151Sdg			}
8636151Sdg			break;
8641549Srgrimes		}
8656151Sdg		first = runend;
8661549Srgrimes	}
8671549Srgrimes
8681549Srgrimes	/*
8691827Sdg	 * the first and last page have been calculated now, move input pages
8701827Sdg	 * to be zero based...
8711549Srgrimes	 */
8721549Srgrimes	if (first != 0) {
873163361Salc		m += first;
8741549Srgrimes		count -= first;
8751549Srgrimes		reqpage -= first;
8761549Srgrimes	}
8776151Sdg
8781549Srgrimes	/*
8791549Srgrimes	 * calculate the file virtual address for the transfer
8801549Srgrimes	 */
88112767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
8821827Sdg
8831549Srgrimes	/*
8841549Srgrimes	 * calculate the size of the transfer
8851549Srgrimes	 */
8861549Srgrimes	size = count * PAGE_SIZE;
887134892Sphk	KASSERT(count > 0, ("zero count"));
8889507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
8899507Sdg		size = object->un_pager.vnp.vnp_size - foff;
890134892Sphk	KASSERT(size > 0, ("zero size"));
8911549Srgrimes
8921549Srgrimes	/*
89351340Sdillon	 * round up physical size for real devices.
8941549Srgrimes	 */
895137726Sphk	if (1) {
896137726Sphk		int secmask = bo->bo_bsize - 1;
897136977Sphk		KASSERT(secmask < PAGE_SIZE && secmask > 0,
898136977Sphk		    ("vnode_pager_generic_getpages: sector size %d too large",
899136977Sphk		    secmask + 1));
90051340Sdillon		size = (size + secmask) & ~secmask;
90151340Sdillon	}
9021549Srgrimes
90342957Sdillon	bp = getpbuf(&vnode_pbuf_freecnt);
904248283Skib	kva = (vm_offset_t)bp->b_data;
9051887Sdg
9061549Srgrimes	/*
907248512Skib	 * and map the pages to be read into the kva, if the filesystem
908248512Skib	 * requires mapped buffers.
9091549Srgrimes	 */
910248512Skib	mp = vp->v_mount;
911248512Skib	if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
912248512Skib	    unmapped_buf_allowed) {
913248512Skib		bp->b_data = unmapped_buf;
914248512Skib		bp->b_kvabase = unmapped_buf;
915248512Skib		bp->b_offset = 0;
916248512Skib		bp->b_flags |= B_UNMAPPED;
917248512Skib		bp->b_npages = count;
918248512Skib		for (i = 0; i < count; i++)
919248512Skib			bp->b_pages[i] = m[i];
920248512Skib	} else
921248512Skib		pmap_qenter(kva, m, count);
9221549Srgrimes
9231549Srgrimes	/* build a minimal buffer header */
92458345Sphk	bp->b_iocmd = BIO_READ;
925119092Sphk	bp->b_iodone = bdone;
92684827Sjhb	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
92784827Sjhb	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
92891406Sjhb	bp->b_rcred = crhold(curthread->td_ucred);
92991406Sjhb	bp->b_wcred = crhold(curthread->td_ucred);
9306626Sdg	bp->b_blkno = firstaddr;
931137726Sphk	pbgetbo(bo, bp);
932233627Smckusick	bp->b_vp = vp;
9331549Srgrimes	bp->b_bcount = size;
9341549Srgrimes	bp->b_bufsize = size;
93570374Sdillon	bp->b_runningbufspace = bp->b_bufsize;
936189595Sjhb	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
9371549Srgrimes
938170292Sattilio	PCPU_INC(cnt.v_vnodein);
939170292Sattilio	PCPU_ADD(cnt.v_vnodepgsin, count);
9403612Sdg
9411549Srgrimes	/* do the input */
942121205Sphk	bp->b_iooffset = dbtob(bp->b_blkno);
943136927Sphk	bstrategy(bp);
9443612Sdg
945119092Sphk	bwait(bp, PVM, "vnread");
9461549Srgrimes
94758934Sphk	if ((bp->b_ioflags & BIO_ERROR) != 0)
9481549Srgrimes		error = EIO;
9491549Srgrimes
950248550Skib	if (error == 0 && size != count * PAGE_SIZE) {
951248512Skib		if ((bp->b_flags & B_UNMAPPED) != 0) {
952248512Skib			bp->b_flags &= ~B_UNMAPPED;
953248512Skib			pmap_qenter(kva, m, count);
954248512Skib		}
955248512Skib		bzero((caddr_t)kva + size, PAGE_SIZE * count - size);
9561549Srgrimes	}
957248512Skib	if ((bp->b_flags & B_UNMAPPED) == 0)
958248512Skib		pmap_qremove(kva, count);
959248512Skib	if (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0) {
960248512Skib		bp->b_data = (caddr_t)kva;
961248512Skib		bp->b_kvabase = (caddr_t)kva;
962248512Skib		bp->b_flags &= ~B_UNMAPPED;
963248512Skib		for (i = 0; i < count; i++)
964248512Skib			bp->b_pages[i] = NULL;
965248512Skib	}
9661549Srgrimes
9671549Srgrimes	/*
9681549Srgrimes	 * free the buffer header back to the swap buffer pool
9691549Srgrimes	 */
970233627Smckusick	bp->b_vp = NULL;
971137726Sphk	pbrelbo(bp);
97242957Sdillon	relpbuf(bp, &vnode_pbuf_freecnt);
9731549Srgrimes
974248084Sattilio	VM_OBJECT_WLOCK(object);
97534206Sdyson	for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
97634206Sdyson		vm_page_t mt;
97734206Sdyson
97834206Sdyson		nextoff = tfoff + PAGE_SIZE;
97934206Sdyson		mt = m[i];
98034206Sdyson
98147239Sdt		if (nextoff <= object->un_pager.vnp.vnp_size) {
98245347Sjulian			/*
98345347Sjulian			 * Read filled up entire page.
98445347Sjulian			 */
98534206Sdyson			mt->valid = VM_PAGE_BITS_ALL;
986191478Salc			KASSERT(mt->dirty == 0,
987191478Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
988191478Salc			    mt));
989191478Salc			KASSERT(!pmap_page_is_mapped(mt),
990191478Salc			    ("vnode_pager_generic_getpages: page %p is mapped",
991191478Salc			    mt));
99234206Sdyson		} else {
99345347Sjulian			/*
994192134Salc			 * Read did not fill up entire page.
99545347Sjulian			 *
99645347Sjulian			 * Currently we do not set the entire page valid,
99745347Sjulian			 * we just try to clear the piece that we couldn't
99845347Sjulian			 * read.
99945347Sjulian			 */
1000228156Skib			vm_page_set_valid_range(mt, 0,
100147239Sdt			    object->un_pager.vnp.vnp_size - tfoff);
1002192134Salc			KASSERT((mt->dirty & vm_page_bits(0,
1003192134Salc			    object->un_pager.vnp.vnp_size - tfoff)) == 0,
1004192134Salc			    ("vnode_pager_generic_getpages: page %p is dirty",
1005192134Salc			    mt));
100634206Sdyson		}
100734206Sdyson
1008239040Skib		if (i != reqpage)
1009239246Skib			vm_page_readahead_finish(mt);
10101549Srgrimes	}
1011248084Sattilio	VM_OBJECT_WUNLOCK(object);
10121549Srgrimes	if (error) {
10139507Sdg		printf("vnode_pager_getpages: I/O read error\n");
10141549Srgrimes	}
10154207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
10161549Srgrimes}
10171549Srgrimes
101833847Smsmith/*
101933847Smsmith * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
102033847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
102133847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour.
102233847Smsmith *
102333847Smsmith * All other FS's should use the bypass to get to the local media
102433847Smsmith * backing vp's VOP_PUTPAGES.
102533847Smsmith */
102643129Sdillonstatic void
102710556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
102810556Sdyson	vm_object_t object;
102910556Sdyson	vm_page_t *m;
103010556Sdyson	int count;
103110556Sdyson	boolean_t sync;
103210556Sdyson	int *rtvals;
103310556Sdyson{
103410556Sdyson	int rtval;
103510556Sdyson	struct vnode *vp;
103634403Smsmith	int bytes = count * PAGE_SIZE;
103718973Sdyson
103844321Salc	/*
103944321Salc	 * Force synchronous operation if we are extremely low on memory
104044321Salc	 * to prevent a low-memory deadlock.  VOP operations often need to
104144321Salc	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
104244321Salc	 * operation ).  The swapper handles the case by limiting the amount
104344321Salc	 * of asynchronous I/O, but that sort of solution doesn't scale well
104444321Salc	 * for the vnode pager without a lot of work.
104544321Salc	 *
104644321Salc	 * Also, the backing vnode's iodone routine may not wake the pageout
104744321Salc	 * daemon up.  This should be probably be addressed XXX.
104844321Salc	 */
104944321Salc
1050170170Sattilio	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
105144321Salc		sync |= OBJPC_SYNC;
105244321Salc
105344321Salc	/*
105444321Salc	 * Call device-specific putpages function
105544321Salc	 */
105610556Sdyson	vp = object->handle;
1057248084Sattilio	VM_OBJECT_WUNLOCK(object);
105834403Smsmith	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
105976827Salfred	KASSERT(rtval != EOPNOTSUPP,
106076827Salfred	    ("vnode_pager: stale FS putpages\n"));
1061248084Sattilio	VM_OBJECT_WLOCK(object);
106210556Sdyson}
106310556Sdyson
106433847Smsmith
10651549Srgrimes/*
106633847Smsmith * This is now called from local media FS's to operate against their
106745057Seivind * own vnodes if they fail to implement VOP_PUTPAGES.
106870374Sdillon *
106970374Sdillon * This is typically called indirectly via the pageout daemon and
107070374Sdillon * clustering has already typically occured, so in general we ask the
107170374Sdillon * underlying filesystem to write the data out asynchronously rather
107270374Sdillon * then delayed.
10731549Srgrimes */
107433847Smsmithint
1075208574Salcvnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
1076208574Salc    int flags, int *rtvals)
10771549Srgrimes{
10787695Sdg	int i;
107933847Smsmith	vm_object_t object;
1080208574Salc	vm_page_t m;
108133847Smsmith	int count;
10821549Srgrimes
10837695Sdg	int maxsize, ncount;
108412767Sdyson	vm_ooffset_t poffset;
10857695Sdg	struct uio auio;
10867695Sdg	struct iovec aiov;
10877695Sdg	int error;
108834206Sdyson	int ioflags;
1089151951Sps	int ppscheck = 0;
1090151951Sps	static struct timeval lastfail;
1091151951Sps	static int curfail;
10921549Srgrimes
109333847Smsmith	object = vp->v_object;
109433847Smsmith	count = bytecount / PAGE_SIZE;
109533847Smsmith
10961827Sdg	for (i = 0; i < count; i++)
1097222586Skib		rtvals[i] = VM_PAGER_ERROR;
10981549Srgrimes
1099208574Salc	if ((int64_t)ma[0]->pindex < 0) {
1100119544Smarcel		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n",
1101208574Salc		    (long)ma[0]->pindex, (u_long)ma[0]->dirty);
11027695Sdg		rtvals[0] = VM_PAGER_BAD;
11037695Sdg		return VM_PAGER_BAD;
11045455Sdg	}
11057178Sdg
11067695Sdg	maxsize = count * PAGE_SIZE;
11077695Sdg	ncount = count;
11081549Srgrimes
1109208574Salc	poffset = IDX_TO_OFF(ma[0]->pindex);
111084854Sdillon
111184854Sdillon	/*
111284854Sdillon	 * If the page-aligned write is larger then the actual file we
111384854Sdillon	 * have to invalidate pages occuring beyond the file EOF.  However,
111484854Sdillon	 * there is an edge case where a file may not be page-aligned where
111584854Sdillon	 * the last page is partially invalid.  In this case the filesystem
111684854Sdillon	 * may not properly clear the dirty bits for the entire page (which
111784854Sdillon	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
111884854Sdillon	 * With the page locked we are free to fix-up the dirty bits here.
111987834Sdillon	 *
112087834Sdillon	 * We do not under any circumstances truncate the valid bits, as
112187834Sdillon	 * this will screw up bogus page replacement.
112284854Sdillon	 */
1123248084Sattilio	VM_OBJECT_WLOCK(object);
112412767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
112584854Sdillon		if (object->un_pager.vnp.vnp_size > poffset) {
112684854Sdillon			int pgoff;
112784854Sdillon
112812767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
112984854Sdillon			ncount = btoc(maxsize);
113084854Sdillon			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
1131208574Salc				/*
1132208574Salc				 * If the object is locked and the following
1133208574Salc				 * conditions hold, then the page's dirty
1134208574Salc				 * field cannot be concurrently changed by a
1135208574Salc				 * pmap operation.
1136208574Salc				 */
1137208574Salc				m = ma[ncount - 1];
1138254138Sattilio				vm_page_assert_sbusied(m);
1139237168Salc				KASSERT(!pmap_page_is_write_mapped(m),
1140208574Salc		("vnode_pager_generic_putpages: page %p is not read-only", m));
1141208574Salc				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
1142208574Salc				    pgoff);
114384854Sdillon			}
114484854Sdillon		} else {
11458585Sdg			maxsize = 0;
114684854Sdillon			ncount = 0;
114784854Sdillon		}
11488585Sdg		if (ncount < count) {
11498585Sdg			for (i = ncount; i < count; i++) {
11507695Sdg				rtvals[i] = VM_PAGER_BAD;
11511549Srgrimes			}
11521549Srgrimes		}
11531541Srgrimes	}
1154248084Sattilio	VM_OBJECT_WUNLOCK(object);
11557695Sdg
115670374Sdillon	/*
1157226366Sjhb	 * pageouts are already clustered, use IO_ASYNC to force a bawrite()
115870374Sdillon	 * rather then a bdwrite() to prevent paging I/O from saturating
1159108358Sdillon	 * the buffer cache.  Dummy-up the sequential heuristic to cause
1160108358Sdillon	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
1161108358Sdillon	 * the system decides how to cluster.
116270374Sdillon	 */
116334206Sdyson	ioflags = IO_VMIO;
1164108358Sdillon	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
1165108358Sdillon		ioflags |= IO_SYNC;
1166108358Sdillon	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
1167108358Sdillon		ioflags |= IO_ASYNC;
116834206Sdyson	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
1169108358Sdillon	ioflags |= IO_SEQMAX << IO_SEQSHIFT;
11701827Sdg
11717695Sdg	aiov.iov_base = (caddr_t) 0;
11727695Sdg	aiov.iov_len = maxsize;
11737695Sdg	auio.uio_iov = &aiov;
11747695Sdg	auio.uio_iovcnt = 1;
117512767Sdyson	auio.uio_offset = poffset;
11767695Sdg	auio.uio_segflg = UIO_NOCOPY;
11777695Sdg	auio.uio_rw = UIO_WRITE;
11787695Sdg	auio.uio_resid = maxsize;
117983366Sjulian	auio.uio_td = (struct thread *) 0;
118091406Sjhb	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
1181170292Sattilio	PCPU_INC(cnt.v_vnodeout);
1182170292Sattilio	PCPU_ADD(cnt.v_vnodepgsout, ncount);
11833612Sdg
11848585Sdg	if (error) {
1185151951Sps		if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1)))
1186151951Sps			printf("vnode_pager_putpages: I/O error %d\n", error);
11877695Sdg	}
11888585Sdg	if (auio.uio_resid) {
1189151951Sps		if (ppscheck || ppsratecheck(&lastfail, &curfail, 1))
1190194990Skib			printf("vnode_pager_putpages: residual I/O %zd at %lu\n",
1191208574Salc			    auio.uio_resid, (u_long)ma[0]->pindex);
11927695Sdg	}
119333936Sdyson	for (i = 0; i < ncount; i++) {
119433936Sdyson		rtvals[i] = VM_PAGER_OK;
11957695Sdg	}
11967695Sdg	return rtvals[0];
11977695Sdg}
1198222586Skib
1199222586Skibvoid
1200222586Skibvnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written)
1201222586Skib{
1202222991Skib	vm_object_t obj;
1203222586Skib	int i, pos;
1204222586Skib
1205222991Skib	if (written == 0)
1206222991Skib		return;
1207222991Skib	obj = ma[0]->object;
1208248084Sattilio	VM_OBJECT_WLOCK(obj);
1209222586Skib	for (i = 0, pos = 0; pos < written; i++, pos += PAGE_SIZE) {
1210222586Skib		if (pos < trunc_page(written)) {
1211222586Skib			rtvals[i] = VM_PAGER_OK;
1212222586Skib			vm_page_undirty(ma[i]);
1213222586Skib		} else {
1214222586Skib			/* Partially written page. */
1215222586Skib			rtvals[i] = VM_PAGER_AGAIN;
1216222586Skib			vm_page_clear_dirty(ma[i], 0, written & PAGE_MASK);
1217222586Skib		}
1218222586Skib	}
1219248084Sattilio	VM_OBJECT_WUNLOCK(obj);
1220222586Skib}
1221232071Skib
1222232071Skibvoid
1223232071Skibvnode_pager_update_writecount(vm_object_t object, vm_offset_t start,
1224232071Skib    vm_offset_t end)
1225232071Skib{
1226232071Skib	struct vnode *vp;
1227232071Skib	vm_ooffset_t old_wm;
1228232071Skib
1229248084Sattilio	VM_OBJECT_WLOCK(object);
1230232071Skib	if (object->type != OBJT_VNODE) {
1231248084Sattilio		VM_OBJECT_WUNLOCK(object);
1232232071Skib		return;
1233232071Skib	}
1234232071Skib	old_wm = object->un_pager.vnp.writemappings;
1235232071Skib	object->un_pager.vnp.writemappings += (vm_ooffset_t)end - start;
1236232071Skib	vp = object->handle;
1237232071Skib	if (old_wm == 0 && object->un_pager.vnp.writemappings != 0) {
1238232071Skib		ASSERT_VOP_ELOCKED(vp, "v_writecount inc");
1239242476Skib		VOP_ADD_WRITECOUNT(vp, 1);
1240232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
1241232701Sjhb		    __func__, vp, vp->v_writecount);
1242232071Skib	} else if (old_wm != 0 && object->un_pager.vnp.writemappings == 0) {
1243232071Skib		ASSERT_VOP_ELOCKED(vp, "v_writecount dec");
1244242476Skib		VOP_ADD_WRITECOUNT(vp, -1);
1245232701Sjhb		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
1246232701Sjhb		    __func__, vp, vp->v_writecount);
1247232071Skib	}
1248248084Sattilio	VM_OBJECT_WUNLOCK(object);
1249232071Skib}
1250232071Skib
1251232071Skibvoid
1252232071Skibvnode_pager_release_writecount(vm_object_t object, vm_offset_t start,
1253232071Skib    vm_offset_t end)
1254232071Skib{
1255232071Skib	struct vnode *vp;
1256232071Skib	struct mount *mp;
1257232071Skib	vm_offset_t inc;
1258232071Skib
1259248084Sattilio	VM_OBJECT_WLOCK(object);
1260232071Skib
1261232071Skib	/*
1262232071Skib	 * First, recheck the object type to account for the race when
1263232071Skib	 * the vnode is reclaimed.
1264232071Skib	 */
1265232071Skib	if (object->type != OBJT_VNODE) {
1266248084Sattilio		VM_OBJECT_WUNLOCK(object);
1267232071Skib		return;
1268232071Skib	}
1269232071Skib
1270232071Skib	/*
1271232071Skib	 * Optimize for the case when writemappings is not going to
1272232071Skib	 * zero.
1273232071Skib	 */
1274232071Skib	inc = end - start;
1275232071Skib	if (object->un_pager.vnp.writemappings != inc) {
1276232071Skib		object->un_pager.vnp.writemappings -= inc;
1277248084Sattilio		VM_OBJECT_WUNLOCK(object);
1278232071Skib		return;
1279232071Skib	}
1280232071Skib
1281232071Skib	vp = object->handle;
1282232071Skib	vhold(vp);
1283248084Sattilio	VM_OBJECT_WUNLOCK(object);
1284232071Skib	mp = NULL;
1285232071Skib	vn_start_write(vp, &mp, V_WAIT);
1286232071Skib	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1287232071Skib
1288232071Skib	/*
1289232071Skib	 * Decrement the object's writemappings, by swapping the start
1290232071Skib	 * and end arguments for vnode_pager_update_writecount().  If
1291232071Skib	 * there was not a race with vnode reclaimation, then the
1292232071Skib	 * vnode's v_writecount is decremented.
1293232071Skib	 */
1294232071Skib	vnode_pager_update_writecount(object, end, start);
1295232071Skib	VOP_UNLOCK(vp, 0);
1296232071Skib	vdrop(vp);
1297232071Skib	if (mp != NULL)
1298232071Skib		vn_finished_write(mp);
1299232071Skib}
1300