vnode_pager.c revision 116167
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
2158705Scharnier *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
4150477Speter * $FreeBSD: head/sys/vm/vnode_pager.c 116167 2003-06-10 20:28:41Z alc $
421541Srgrimes */
431541Srgrimes
441541Srgrimes/*
451541Srgrimes * Page to/from files (vnodes).
461541Srgrimes */
471541Srgrimes
481549Srgrimes/*
491549Srgrimes * TODO:
509507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
517695Sdg *	greatly re-simplify the vnode_pager.
521549Srgrimes */
531549Srgrimes
541541Srgrimes#include <sys/param.h>
551541Srgrimes#include <sys/systm.h>
561541Srgrimes#include <sys/proc.h>
571541Srgrimes#include <sys/vnode.h>
581541Srgrimes#include <sys/mount.h>
5960041Sphk#include <sys/bio.h>
609507Sdg#include <sys/buf.h>
6112662Sdg#include <sys/vmmeter.h>
6251340Sdillon#include <sys/conf.h>
631541Srgrimes
641541Srgrimes#include <vm/vm.h>
6512662Sdg#include <vm/vm_object.h>
661541Srgrimes#include <vm/vm_page.h>
679507Sdg#include <vm/vm_pager.h>
6831853Sdyson#include <vm/vm_map.h>
691541Srgrimes#include <vm/vnode_pager.h>
7012662Sdg#include <vm/vm_extern.h>
711541Srgrimes
7292727Salfredstatic void vnode_pager_init(void);
7392727Salfredstatic vm_offset_t vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
7492727Salfred					 int *run);
7592727Salfredstatic void vnode_pager_iodone(struct buf *bp);
7692727Salfredstatic int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m);
7792727Salfredstatic int vnode_pager_input_old(vm_object_t object, vm_page_t m);
7892727Salfredstatic void vnode_pager_dealloc(vm_object_t);
7992727Salfredstatic int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int);
8092727Salfredstatic void vnode_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *);
8192727Salfredstatic boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *);
8211943Sbde
831541Srgrimesstruct pagerops vnodepagerops = {
8479127Sjhb	vnode_pager_init,
851541Srgrimes	vnode_pager_alloc,
861541Srgrimes	vnode_pager_dealloc,
879507Sdg	vnode_pager_getpages,
889507Sdg	vnode_pager_putpages,
899507Sdg	vnode_pager_haspage,
909507Sdg	NULL
911541Srgrimes};
921541Srgrimes
9379127Sjhbint vnode_pbuf_freecnt;
9410556Sdyson
95104094Sphkstatic void
9679127Sjhbvnode_pager_init(void)
9779127Sjhb{
9842957Sdillon
9979127Sjhb	vnode_pbuf_freecnt = nswbuf / 2 + 1;
10079127Sjhb}
10179127Sjhb
1021541Srgrimes/*
1031541Srgrimes * Allocate (or lookup) pager for a vnode.
1041541Srgrimes * Handle is a vnode pointer.
10598604Salc *
10698604Salc * MPSAFE
1071541Srgrimes */
1089507Sdgvm_object_t
10940286Sdgvnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
11028751Sbde		  vm_ooffset_t offset)
1111541Srgrimes{
1129456Sdg	vm_object_t object;
1131541Srgrimes	struct vnode *vp;
1141541Srgrimes
1151541Srgrimes	/*
1161541Srgrimes	 * Pageout to vnode, no can do yet.
1171541Srgrimes	 */
1181541Srgrimes	if (handle == NULL)
1191827Sdg		return (NULL);
1201541Srgrimes
1219411Sdg	vp = (struct vnode *) handle;
1229411Sdg
123103923Sjeff	ASSERT_VOP_LOCKED(vp, "vnode_pager_alloc");
124103923Sjeff
12598604Salc	mtx_lock(&Giant);
1261541Srgrimes	/*
1279411Sdg	 * Prevent race condition when allocating the object. This
1289411Sdg	 * can happen with NFS vnodes since the nfsnode isn't locked.
1291541Srgrimes	 */
130101308Sjeff	VI_LOCK(vp);
131101308Sjeff	while (vp->v_iflag & VI_OLOCK) {
132101308Sjeff		vp->v_iflag |= VI_OWANT;
133101308Sjeff		msleep(vp, VI_MTX(vp), PVM, "vnpobj", 0);
1349411Sdg	}
135101308Sjeff	vp->v_iflag |= VI_OLOCK;
136101308Sjeff	VI_UNLOCK(vp);
1379411Sdg
1389411Sdg	/*
1399411Sdg	 * If the object is being terminated, wait for it to
1409411Sdg	 * go away.
1419411Sdg	 */
142114074Salc	while ((object = vp->v_object) != NULL) {
143114074Salc		VM_OBJECT_LOCK(object);
144114074Salc		if ((object->flags & OBJ_DEAD) == 0)
145114074Salc			break;
146114074Salc		msleep(object, VM_OBJECT_MTX(object), PDROP | PVM, "vadead", 0);
1479507Sdg	}
1485455Sdg
14932071Sdyson	if (vp->v_usecount == 0)
15032071Sdyson		panic("vnode_pager_alloc: no vnode reference");
15132071Sdyson
1529507Sdg	if (object == NULL) {
1531541Srgrimes		/*
1541541Srgrimes		 * And an object of the appropriate size
1551541Srgrimes		 */
15640286Sdg		object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
1571827Sdg
15840286Sdg		object->un_pager.vnp.vnp_size = size;
1591549Srgrimes
1609507Sdg		object->handle = handle;
1619507Sdg		vp->v_object = object;
1621541Srgrimes	} else {
16332286Sdyson		object->ref_count++;
164114074Salc		VM_OBJECT_UNLOCK(object);
1651541Srgrimes	}
166101308Sjeff	VI_LOCK(vp);
16798604Salc	vp->v_usecount++;
168101308Sjeff	vp->v_iflag &= ~VI_OLOCK;
169101308Sjeff	if (vp->v_iflag & VI_OWANT) {
170101308Sjeff		vp->v_iflag &= ~VI_OWANT;
1719411Sdg		wakeup(vp);
1729411Sdg	}
173101308Sjeff	VI_UNLOCK(vp);
17498604Salc	mtx_unlock(&Giant);
1759507Sdg	return (object);
1761541Srgrimes}
1771541Srgrimes
178114774Salc/*
179114774Salc *	The object must be locked.
180114774Salc */
18112820Sphkstatic void
1829507Sdgvnode_pager_dealloc(object)
1839507Sdg	vm_object_t object;
1841541Srgrimes{
18579242Sdillon	struct vnode *vp = object->handle;
1861541Srgrimes
1879507Sdg	if (vp == NULL)
1889507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
1899507Sdg
190114774Salc	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
19133817Sdyson	vm_object_pip_wait(object, "vnpdea");
1921541Srgrimes
1939507Sdg	object->handle = NULL;
19433109Sdyson	object->type = OBJT_DEAD;
195101308Sjeff	ASSERT_VOP_LOCKED(vp, "vnode_pager_dealloc");
1969507Sdg	vp->v_object = NULL;
197101308Sjeff	vp->v_vflag &= ~(VV_TEXT | VV_OBJBUF);
1981549Srgrimes}
1991541Srgrimes
20012820Sphkstatic boolean_t
20112767Sdysonvnode_pager_haspage(object, pindex, before, after)
2029507Sdg	vm_object_t object;
20312767Sdyson	vm_pindex_t pindex;
2049507Sdg	int *before;
2059507Sdg	int *after;
2061541Srgrimes{
2079507Sdg	struct vnode *vp = object->handle;
20896572Sphk	daddr_t bn;
20912423Sphk	int err;
21010556Sdyson	daddr_t reqblock;
21111701Sdyson	int poff;
21211701Sdyson	int bsize;
21312914Sdyson	int pagesperblock, blocksperpage;
2141541Srgrimes
21579224Sdillon	GIANT_REQUIRED;
21651340Sdillon	/*
21751340Sdillon	 * If no vp or vp is doomed or marked transparent to VM, we do not
21851340Sdillon	 * have the page.
21951340Sdillon	 */
220101308Sjeff	if (vp == NULL)
22132585Sdyson		return FALSE;
22232585Sdyson
223103923Sjeff	VI_LOCK(vp);
224103923Sjeff	if (vp->v_iflag & VI_DOOMED) {
225103923Sjeff		VI_UNLOCK(vp);
226101308Sjeff		return FALSE;
227103923Sjeff	}
228103923Sjeff	VI_UNLOCK(vp);
2291541Srgrimes	/*
2305455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
2315455Sdg	 * not have the page.
2321541Srgrimes	 */
23312767Sdyson	if ((vp->v_mount == NULL) ||
23481140Sjhb	    (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
2354797Sdg		return FALSE;
2361541Srgrimes
23711576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
23810556Sdyson	pagesperblock = bsize / PAGE_SIZE;
23912914Sdyson	blocksperpage = 0;
24012914Sdyson	if (pagesperblock > 0) {
24112914Sdyson		reqblock = pindex / pagesperblock;
24212914Sdyson	} else {
24312914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
24412914Sdyson		reqblock = pindex * blocksperpage;
24512914Sdyson	}
24610556Sdyson	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
24710556Sdyson		after, before);
2488876Srgrimes	if (err)
2499507Sdg		return TRUE;
25092029Seivind	if (bn == -1)
25110576Sdyson		return FALSE;
25212914Sdyson	if (pagesperblock > 0) {
25312914Sdyson		poff = pindex - (reqblock * pagesperblock);
25412914Sdyson		if (before) {
25512914Sdyson			*before *= pagesperblock;
25612914Sdyson			*before += poff;
25710669Sdyson		}
25812914Sdyson		if (after) {
25912914Sdyson			int numafter;
26012914Sdyson			*after *= pagesperblock;
26112914Sdyson			numafter = pagesperblock - (poff + 1);
26299211Srobert			if (IDX_TO_OFF(pindex + numafter) >
26399211Srobert			    object->un_pager.vnp.vnp_size) {
26499211Srobert				numafter =
26599211Srobert		    		    OFF_TO_IDX(object->un_pager.vnp.vnp_size) -
26699211Srobert				    pindex;
26712914Sdyson			}
26812914Sdyson			*after += numafter;
26912914Sdyson		}
27012914Sdyson	} else {
27112914Sdyson		if (before) {
27212914Sdyson			*before /= blocksperpage;
27312914Sdyson		}
27412914Sdyson
27512914Sdyson		if (after) {
27612914Sdyson			*after /= blocksperpage;
27712914Sdyson		}
27810556Sdyson	}
27910576Sdyson	return TRUE;
2801541Srgrimes}
2811541Srgrimes
2821541Srgrimes/*
2831541Srgrimes * Lets the VM system know about a change in size for a file.
2849507Sdg * We adjust our own internal size and flush any cached pages in
2851541Srgrimes * the associated object that are affected by the size change.
2861541Srgrimes *
2871541Srgrimes * Note: this routine may be invoked as a result of a pager put
2881541Srgrimes * operation (possibly at object termination time), so we must be careful.
2891541Srgrimes */
2901541Srgrimesvoid
2911541Srgrimesvnode_pager_setsize(vp, nsize)
2921541Srgrimes	struct vnode *vp;
29312767Sdyson	vm_ooffset_t nsize;
2941541Srgrimes{
295116167Salc	vm_object_t object;
296116167Salc	vm_page_t m;
29738542Sluoqi	vm_pindex_t nobjsize;
2981541Srgrimes
299116167Salc	if ((object = vp->v_object) == NULL)
3001541Srgrimes		return;
301116167Salc	VM_OBJECT_LOCK(object);
302116167Salc	if (nsize == object->un_pager.vnp.vnp_size) {
303116167Salc		/*
304116167Salc		 * Hasn't changed size
305116167Salc		 */
306116167Salc		VM_OBJECT_UNLOCK(object);
3073374Sdg		return;
308116167Salc	}
30938542Sluoqi	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
3109507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
311116167Salc		/*
312116167Salc		 * File has shrunk. Toss any cached pages beyond the new EOF.
313116167Salc		 */
314116167Salc		if (nobjsize < object->size)
31538542Sluoqi			vm_object_page_remove(object, nobjsize, object->size,
316116167Salc			    FALSE);
3171827Sdg		/*
3181827Sdg		 * this gets rid of garbage at the end of a page that is now
31987834Sdillon		 * only partially backed by the vnode.
32087834Sdillon		 *
32187834Sdillon		 * XXX for some reason (I don't know yet), if we take a
32287834Sdillon		 * completely invalid page and mark it partially valid
32387834Sdillon		 * it can screw up NFS reads, so we don't allow the case.
3241827Sdg		 */
325116167Salc		if ((nsize & PAGE_MASK) &&
326116167Salc		    (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL) {
327116167Salc			vm_page_lock_queues();
328116167Salc			if (m->valid) {
32970374Sdillon				int base = (int)nsize & PAGE_MASK;
33070374Sdillon				int size = PAGE_SIZE - base;
33170374Sdillon
33270374Sdillon				/*
33370374Sdillon				 * Clear out partial-page garbage in case
33470374Sdillon				 * the page has been mapped.
33570374Sdillon				 */
336102382Salc				pmap_zero_page_area(m, base, size);
33770374Sdillon
33870374Sdillon				/*
33987834Sdillon				 * XXX work around SMP data integrity race
34087834Sdillon				 * by unmapping the page from user processes.
34187834Sdillon				 * The garbage we just cleared may be mapped
34287834Sdillon				 * to a user process running on another cpu
34387834Sdillon				 * and this code is not running through normal
34487834Sdillon				 * I/O channels which handle SMP issues for
34587834Sdillon				 * us, so unmap page to synchronize all cpus.
34687834Sdillon				 *
34787834Sdillon				 * XXX should vm_pager_unmap_page() have
34887834Sdillon				 * dealt with this?
34987834Sdillon				 */
350106981Salc				pmap_remove_all(m);
35187834Sdillon
35287834Sdillon				/*
35370374Sdillon				 * Clear out partial-page dirty bits.  This
35470374Sdillon				 * has the side effect of setting the valid
35570374Sdillon				 * bits, but that is ok.  There are a bunch
35670374Sdillon				 * of places in the VM system where we expected
35770374Sdillon				 * m->dirty == VM_PAGE_BITS_ALL.  The file EOF
35870374Sdillon				 * case is one of them.  If the page is still
35970374Sdillon				 * partially dirty, make it fully dirty.
36087834Sdillon				 *
36187834Sdillon				 * note that we do not clear out the valid
36287834Sdillon				 * bits.  This would prevent bogus_page
36387834Sdillon				 * replacement from working properly.
36470374Sdillon				 */
36570374Sdillon				vm_page_set_validclean(m, base, size);
36670374Sdillon				if (m->dirty != 0)
36770374Sdillon					m->dirty = VM_PAGE_BITS_ALL;
3681827Sdg			}
369116167Salc			vm_page_unlock_queues();
3701827Sdg		}
3711541Srgrimes	}
37212767Sdyson	object->un_pager.vnp.vnp_size = nsize;
37338542Sluoqi	object->size = nobjsize;
374116167Salc	VM_OBJECT_UNLOCK(object);
3751541Srgrimes}
3761541Srgrimes
3771549Srgrimes/*
3781549Srgrimes * calculate the linear (byte) disk address of specified virtual
3791549Srgrimes * file address
3801549Srgrimes */
38112820Sphkstatic vm_offset_t
3826151Sdgvnode_pager_addr(vp, address, run)
3831549Srgrimes	struct vnode *vp;
38412767Sdyson	vm_ooffset_t address;
3856151Sdg	int *run;
3861549Srgrimes{
3875455Sdg	int rtaddress;
3885455Sdg	int bsize;
38996572Sphk	daddr_t block;
3901549Srgrimes	struct vnode *rtvp;
3915455Sdg	int err;
39212767Sdyson	daddr_t vblock;
39312767Sdyson	int voffset;
3941549Srgrimes
39579224Sdillon	GIANT_REQUIRED;
3965455Sdg	if ((int) address < 0)
3975455Sdg		return -1;
3985455Sdg
39911701Sdyson	if (vp->v_mount == NULL)
40011701Sdyson		return -1;
40111701Sdyson
4021549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4031549Srgrimes	vblock = address / bsize;
4041549Srgrimes	voffset = address % bsize;
4051549Srgrimes
40610551Sdyson	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
4071549Srgrimes
4086151Sdg	if (err || (block == -1))
4091549Srgrimes		rtaddress = -1;
4106151Sdg	else {
4116626Sdg		rtaddress = block + voffset / DEV_BSIZE;
41292029Seivind		if (run) {
4136151Sdg			*run += 1;
4146151Sdg			*run *= bsize/PAGE_SIZE;
4156151Sdg			*run -= voffset/PAGE_SIZE;
4166151Sdg		}
4176151Sdg	}
4181549Srgrimes
4191549Srgrimes	return rtaddress;
4201549Srgrimes}
4211549Srgrimes
4221549Srgrimes/*
4231549Srgrimes * interrupt routine for I/O completion
4241549Srgrimes */
42512820Sphkstatic void
4261549Srgrimesvnode_pager_iodone(bp)
4271549Srgrimes	struct buf *bp;
4281549Srgrimes{
4291549Srgrimes	bp->b_flags |= B_DONE;
4309507Sdg	wakeup(bp);
4311549Srgrimes}
4321549Srgrimes
4331549Srgrimes/*
43496755Strhodes * small block filesystem vnode pager input
4351549Srgrimes */
43612820Sphkstatic int
4379507Sdgvnode_pager_input_smlfs(object, m)
4389507Sdg	vm_object_t object;
4391549Srgrimes	vm_page_t m;
4401549Srgrimes{
4415455Sdg	int i;
4425455Sdg	int s;
4431549Srgrimes	struct vnode *dp, *vp;
4441549Srgrimes	struct buf *bp;
4451549Srgrimes	vm_offset_t kva;
4465455Sdg	int fileaddr;
4471549Srgrimes	vm_offset_t bsize;
4485455Sdg	int error = 0;
4491549Srgrimes
45079224Sdillon	GIANT_REQUIRED;
45179224Sdillon
4529507Sdg	vp = object->handle;
45311701Sdyson	if (vp->v_mount == NULL)
45411701Sdyson		return VM_PAGER_BAD;
45511701Sdyson
4561549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4571549Srgrimes
45810551Sdyson	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
4591549Srgrimes
4601549Srgrimes	kva = vm_pager_map_page(m);
4611549Srgrimes
4621827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
46386092Sdillon		vm_ooffset_t address;
4641827Sdg
46545561Sdt		if (vm_page_bits(i * bsize, bsize) & m->valid)
4665455Sdg			continue;
4671549Srgrimes
46886092Sdillon		address = IDX_TO_OFF(m->pindex) + i * bsize;
46986092Sdillon		if (address >= object->un_pager.vnp.vnp_size) {
47086092Sdillon			fileaddr = -1;
47186092Sdillon		} else {
47286092Sdillon			fileaddr = vnode_pager_addr(vp, address, NULL);
47386092Sdillon		}
4741827Sdg		if (fileaddr != -1) {
47542957Sdillon			bp = getpbuf(&vnode_pbuf_freecnt);
4761549Srgrimes
4771827Sdg			/* build a minimal buffer header */
47858345Sphk			bp->b_iocmd = BIO_READ;
4791549Srgrimes			bp->b_iodone = vnode_pager_iodone;
48084827Sjhb			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
48184827Sjhb			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
48291406Sjhb			bp->b_rcred = crhold(curthread->td_ucred);
48391406Sjhb			bp->b_wcred = crhold(curthread->td_ucred);
48431493Sphk			bp->b_data = (caddr_t) kva + i * bsize;
4856626Sdg			bp->b_blkno = fileaddr;
4865455Sdg			pbgetvp(dp, bp);
4871549Srgrimes			bp->b_bcount = bsize;
4881549Srgrimes			bp->b_bufsize = bsize;
48970374Sdillon			bp->b_runningbufspace = bp->b_bufsize;
49070374Sdillon			runningbufspace += bp->b_runningbufspace;
4911827Sdg
4921827Sdg			/* do the input */
493108723Sphk			VOP_SPECSTRATEGY(bp->b_vp, bp);
4941549Srgrimes
49533758Sdyson			/* we definitely need to be at splvm here */
4961549Srgrimes
49733758Sdyson			s = splvm();
4981549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
4999356Sdg				tsleep(bp, PVM, "vnsrd", 0);
5001549Srgrimes			}
5011549Srgrimes			splx(s);
50258934Sphk			if ((bp->b_ioflags & BIO_ERROR) != 0)
5031549Srgrimes				error = EIO;
5041549Srgrimes
5051827Sdg			/*
5061827Sdg			 * free the buffer header back to the swap buffer pool
5071827Sdg			 */
50842957Sdillon			relpbuf(bp, &vnode_pbuf_freecnt);
5091827Sdg			if (error)
5101549Srgrimes				break;
5115455Sdg
512107189Salc			vm_page_lock_queues();
51315583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
514107189Salc			vm_page_unlock_queues();
5151549Srgrimes		} else {
516107189Salc			vm_page_lock_queues();
51715583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
518107189Salc			vm_page_unlock_queues();
5191549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
5201549Srgrimes		}
5211549Srgrimes	}
5221549Srgrimes	vm_pager_unmap_page(kva);
523107347Salc	vm_page_lock_queues();
52460755Speter	pmap_clear_modify(m);
52538799Sdfr	vm_page_flag_clear(m, PG_ZERO);
526107347Salc	vm_page_unlock_queues();
5271827Sdg	if (error) {
5284207Sdg		return VM_PAGER_ERROR;
5291549Srgrimes	}
5301549Srgrimes	return VM_PAGER_OK;
5311549Srgrimes
5321549Srgrimes}
5331549Srgrimes
5341549Srgrimes
5351549Srgrimes/*
5361549Srgrimes * old style vnode pager output routine
5371549Srgrimes */
53812820Sphkstatic int
5399507Sdgvnode_pager_input_old(object, m)
5409507Sdg	vm_object_t object;
5411549Srgrimes	vm_page_t m;
5421549Srgrimes{
5431541Srgrimes	struct uio auio;
5441541Srgrimes	struct iovec aiov;
5455455Sdg	int error;
5465455Sdg	int size;
5471549Srgrimes	vm_offset_t kva;
54877398Sjhb	struct vnode *vp;
5491549Srgrimes
55079224Sdillon	GIANT_REQUIRED;
5511549Srgrimes	error = 0;
5521827Sdg
5531549Srgrimes	/*
5541549Srgrimes	 * Return failure if beyond current EOF
5551549Srgrimes	 */
55612767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
5571549Srgrimes		return VM_PAGER_BAD;
5581549Srgrimes	} else {
5591549Srgrimes		size = PAGE_SIZE;
56012767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
56112767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
5627178Sdg
5635455Sdg		/*
5645455Sdg		 * Allocate a kernel virtual address and initialize so that
5655455Sdg		 * we can use VOP_READ/WRITE routines.
5665455Sdg		 */
5671549Srgrimes		kva = vm_pager_map_page(m);
5687178Sdg
56977398Sjhb		vp = object->handle;
5701827Sdg		aiov.iov_base = (caddr_t) kva;
5711549Srgrimes		aiov.iov_len = size;
5721549Srgrimes		auio.uio_iov = &aiov;
5731549Srgrimes		auio.uio_iovcnt = 1;
57412767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
5751549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
5761549Srgrimes		auio.uio_rw = UIO_READ;
5771549Srgrimes		auio.uio_resid = size;
57883366Sjulian		auio.uio_td = curthread;
5791549Srgrimes
58091406Sjhb		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
5811549Srgrimes		if (!error) {
58279242Sdillon			int count = size - auio.uio_resid;
5831549Srgrimes
5841549Srgrimes			if (count == 0)
5851549Srgrimes				error = EINVAL;
5861549Srgrimes			else if (count != PAGE_SIZE)
5871827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
5881549Srgrimes		}
5891549Srgrimes		vm_pager_unmap_page(kva);
5901549Srgrimes	}
591107347Salc	vm_page_lock_queues();
59260755Speter	pmap_clear_modify(m);
59349945Salc	vm_page_undirty(m);
59438799Sdfr	vm_page_flag_clear(m, PG_ZERO);
59539739Srvb	if (!error)
59639739Srvb		m->valid = VM_PAGE_BITS_ALL;
597107347Salc	vm_page_unlock_queues();
5984207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
5991549Srgrimes}
6001549Srgrimes
6011549Srgrimes/*
6021549Srgrimes * generic vnode pager input routine
6031549Srgrimes */
60410556Sdyson
60533847Smsmith/*
60676827Salfred * Local media VFS's that do not implement their own VOP_GETPAGES
60799211Srobert * should have their VOP_GETPAGES call to vnode_pager_generic_getpages()
60899211Srobert * to implement the previous behaviour.
60933847Smsmith *
61033847Smsmith * All other FS's should use the bypass to get to the local media
61133847Smsmith * backing vp's VOP_GETPAGES.
61233847Smsmith */
61312820Sphkstatic int
6149507Sdgvnode_pager_getpages(object, m, count, reqpage)
6159507Sdg	vm_object_t object;
6161549Srgrimes	vm_page_t *m;
6179507Sdg	int count;
6189507Sdg	int reqpage;
6191549Srgrimes{
62010556Sdyson	int rtval;
62110556Sdyson	struct vnode *vp;
62234403Smsmith	int bytes = count * PAGE_SIZE;
62332286Sdyson
62479224Sdillon	GIANT_REQUIRED;
62510556Sdyson	vp = object->handle;
62634403Smsmith	rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
62776827Salfred	KASSERT(rtval != EOPNOTSUPP,
62876827Salfred	    ("vnode_pager: FS getpages not implemented\n"));
62933847Smsmith	return rtval;
63010556Sdyson}
63110556Sdyson
63233847Smsmith/*
63333847Smsmith * This is now called from local media FS's to operate against their
63433847Smsmith * own vnodes if they fail to implement VOP_GETPAGES.
63533847Smsmith */
63633847Smsmithint
63733847Smsmithvnode_pager_generic_getpages(vp, m, bytecount, reqpage)
63833847Smsmith	struct vnode *vp;
63910556Sdyson	vm_page_t *m;
64033847Smsmith	int bytecount;
64110556Sdyson	int reqpage;
64210556Sdyson{
64333847Smsmith	vm_object_t object;
64412767Sdyson	vm_offset_t kva;
64534206Sdyson	off_t foff, tfoff, nextoff;
646100832Salc	int i, j, size, bsize, first, firstaddr;
64733847Smsmith	struct vnode *dp;
6486151Sdg	int runpg;
6496151Sdg	int runend;
6507178Sdg	struct buf *bp;
6515455Sdg	int s;
65233847Smsmith	int count;
6535455Sdg	int error = 0;
6541549Srgrimes
65579224Sdillon	GIANT_REQUIRED;
65633847Smsmith	object = vp->v_object;
65733847Smsmith	count = bytecount / PAGE_SIZE;
65833847Smsmith
65911701Sdyson	if (vp->v_mount == NULL)
66011701Sdyson		return VM_PAGER_BAD;
66111701Sdyson
6621549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
6631549Srgrimes
6641549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
6651827Sdg
6661549Srgrimes	/*
6671827Sdg	 * originally, we did not check for an error return value -- assuming
6681827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
6691549Srgrimes	 */
67012767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
6711827Sdg
6721549Srgrimes	/*
6731887Sdg	 * if we can't bmap, use old VOP code
6741549Srgrimes	 */
67510551Sdyson	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
676100832Salc		vm_page_lock_queues();
677100832Salc		for (i = 0; i < count; i++)
678100832Salc			if (i != reqpage)
67975692Salfred				vm_page_free(m[i]);
680100832Salc		vm_page_unlock_queues();
6813612Sdg		cnt.v_vnodein++;
6823612Sdg		cnt.v_vnodepgsin++;
6839507Sdg		return vnode_pager_input_old(object, m[reqpage]);
6841549Srgrimes
6851827Sdg		/*
6861827Sdg		 * if the blocksize is smaller than a page size, then use
6871827Sdg		 * special small filesystem code.  NFS sometimes has a small
6881827Sdg		 * blocksize, but it can handle large reads itself.
6891827Sdg		 */
6901827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
69138866Sbde	    (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
692100832Salc		vm_page_lock_queues();
693100832Salc		for (i = 0; i < count; i++)
694100832Salc			if (i != reqpage)
69575692Salfred				vm_page_free(m[i]);
696100832Salc		vm_page_unlock_queues();
6973612Sdg		cnt.v_vnodein++;
6983612Sdg		cnt.v_vnodepgsin++;
6999507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
7001549Srgrimes	}
70145347Sjulian
7021549Srgrimes	/*
70345347Sjulian	 * If we have a completely valid page available to us, we can
70445347Sjulian	 * clean up and return.  Otherwise we have to re-read the
70545347Sjulian	 * media.
7061549Srgrimes	 */
70745347Sjulian	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
708100832Salc		vm_page_lock_queues();
709100832Salc		for (i = 0; i < count; i++)
7105455Sdg			if (i != reqpage)
71175692Salfred				vm_page_free(m[i]);
712100832Salc		vm_page_unlock_queues();
7135455Sdg		return VM_PAGER_OK;
7141549Srgrimes	}
71545347Sjulian	m[reqpage]->valid = 0;
7167178Sdg
7175455Sdg	/*
7185455Sdg	 * here on direct device I/O
7195455Sdg	 */
72092029Seivind	firstaddr = -1;
7211549Srgrimes
7221549Srgrimes	/*
7236151Sdg	 * calculate the run that includes the required page
7241549Srgrimes	 */
72592029Seivind	for (first = 0, i = 0; i < count; i = runend) {
72612767Sdyson		firstaddr = vnode_pager_addr(vp,
72712767Sdyson			IDX_TO_OFF(m[i]->pindex), &runpg);
7286151Sdg		if (firstaddr == -1) {
7299507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
730106603Smux				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: 0x%jx%08jx, vnp_size: 0x%jx%08jx",
731106603Smux				    firstaddr, (uintmax_t)(foff >> 32),
732106603Smux				    (uintmax_t)foff,
733106603Smux				    (uintmax_t)
734106603Smux				    (object->un_pager.vnp.vnp_size >> 32),
735106603Smux				    (uintmax_t)object->un_pager.vnp.vnp_size);
7366151Sdg			}
737100832Salc			vm_page_lock_queues();
73875692Salfred			vm_page_free(m[i]);
739100832Salc			vm_page_unlock_queues();
7406151Sdg			runend = i + 1;
7416151Sdg			first = runend;
7426151Sdg			continue;
7431549Srgrimes		}
7446151Sdg		runend = i + runpg;
7459507Sdg		if (runend <= reqpage) {
746100832Salc			vm_page_lock_queues();
747100832Salc			for (j = i; j < runend; j++)
74875692Salfred				vm_page_free(m[j]);
749100832Salc			vm_page_unlock_queues();
7501549Srgrimes		} else {
7519507Sdg			if (runpg < (count - first)) {
752100832Salc				vm_page_lock_queues();
7539507Sdg				for (i = first + runpg; i < count; i++)
75475692Salfred					vm_page_free(m[i]);
755100832Salc				vm_page_unlock_queues();
7566151Sdg				count = first + runpg;
7576151Sdg			}
7586151Sdg			break;
7591549Srgrimes		}
7606151Sdg		first = runend;
7611549Srgrimes	}
7621549Srgrimes
7631549Srgrimes	/*
7641827Sdg	 * the first and last page have been calculated now, move input pages
7651827Sdg	 * to be zero based...
7661549Srgrimes	 */
7671549Srgrimes	if (first != 0) {
7681549Srgrimes		for (i = first; i < count; i++) {
7691549Srgrimes			m[i - first] = m[i];
7701549Srgrimes		}
7711549Srgrimes		count -= first;
7721549Srgrimes		reqpage -= first;
7731549Srgrimes	}
7746151Sdg
7751549Srgrimes	/*
7761549Srgrimes	 * calculate the file virtual address for the transfer
7771549Srgrimes	 */
77812767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
7791827Sdg
7801549Srgrimes	/*
7811549Srgrimes	 * calculate the size of the transfer
7821549Srgrimes	 */
7831549Srgrimes	size = count * PAGE_SIZE;
7849507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
7859507Sdg		size = object->un_pager.vnp.vnp_size - foff;
7861549Srgrimes
7871549Srgrimes	/*
78851340Sdillon	 * round up physical size for real devices.
7891549Srgrimes	 */
79051340Sdillon	if (dp->v_type == VBLK || dp->v_type == VCHR) {
79151340Sdillon		int secmask = dp->v_rdev->si_bsize_phys - 1;
79251340Sdillon		KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large\n", secmask + 1));
79351340Sdillon		size = (size + secmask) & ~secmask;
79451340Sdillon	}
7951549Srgrimes
79642957Sdillon	bp = getpbuf(&vnode_pbuf_freecnt);
7975455Sdg	kva = (vm_offset_t) bp->b_data;
7981887Sdg
7991549Srgrimes	/*
8001549Srgrimes	 * and map the pages to be read into the kva
8011549Srgrimes	 */
8021887Sdg	pmap_qenter(kva, m, count);
8031549Srgrimes
8041549Srgrimes	/* build a minimal buffer header */
80558345Sphk	bp->b_iocmd = BIO_READ;
8061549Srgrimes	bp->b_iodone = vnode_pager_iodone;
8071549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
80884827Sjhb	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
80984827Sjhb	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
81091406Sjhb	bp->b_rcred = crhold(curthread->td_ucred);
81191406Sjhb	bp->b_wcred = crhold(curthread->td_ucred);
8126626Sdg	bp->b_blkno = firstaddr;
8135455Sdg	pbgetvp(dp, bp);
8141549Srgrimes	bp->b_bcount = size;
8151549Srgrimes	bp->b_bufsize = size;
81670374Sdillon	bp->b_runningbufspace = bp->b_bufsize;
81770374Sdillon	runningbufspace += bp->b_runningbufspace;
8181549Srgrimes
8193612Sdg	cnt.v_vnodein++;
8203612Sdg	cnt.v_vnodepgsin += count;
8213612Sdg
8221549Srgrimes	/* do the input */
823109198Sphk	if (dp->v_type == VCHR)
824109198Sphk		VOP_SPECSTRATEGY(bp->b_vp, bp);
825109198Sphk	else
826109198Sphk		VOP_STRATEGY(bp->b_vp, bp);
8273612Sdg
82833758Sdyson	s = splvm();
82933758Sdyson	/* we definitely need to be at splvm here */
8301549Srgrimes
8311549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
8329356Sdg		tsleep(bp, PVM, "vnread", 0);
8331549Srgrimes	}
8341549Srgrimes	splx(s);
83558934Sphk	if ((bp->b_ioflags & BIO_ERROR) != 0)
8361549Srgrimes		error = EIO;
8371549Srgrimes
8381549Srgrimes	if (!error) {
8391549Srgrimes		if (size != count * PAGE_SIZE)
8401827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
8411549Srgrimes	}
8425455Sdg	pmap_qremove(kva, count);
8431549Srgrimes
8441549Srgrimes	/*
8451549Srgrimes	 * free the buffer header back to the swap buffer pool
8461549Srgrimes	 */
84742957Sdillon	relpbuf(bp, &vnode_pbuf_freecnt);
8481549Srgrimes
849100736Salc	vm_page_lock_queues();
85034206Sdyson	for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
85134206Sdyson		vm_page_t mt;
85234206Sdyson
85334206Sdyson		nextoff = tfoff + PAGE_SIZE;
85434206Sdyson		mt = m[i];
85534206Sdyson
85647239Sdt		if (nextoff <= object->un_pager.vnp.vnp_size) {
85745347Sjulian			/*
85845347Sjulian			 * Read filled up entire page.
85945347Sjulian			 */
86034206Sdyson			mt->valid = VM_PAGE_BITS_ALL;
86149945Salc			vm_page_undirty(mt);	/* should be an assert? XXX */
86260755Speter			pmap_clear_modify(mt);
86334206Sdyson		} else {
86445347Sjulian			/*
86545347Sjulian			 * Read did not fill up entire page.  Since this
86645347Sjulian			 * is getpages, the page may be mapped, so we have
86745347Sjulian			 * to zero the invalid portions of the page even
86845347Sjulian			 * though we aren't setting them valid.
86945347Sjulian			 *
87045347Sjulian			 * Currently we do not set the entire page valid,
87145347Sjulian			 * we just try to clear the piece that we couldn't
87245347Sjulian			 * read.
87345347Sjulian			 */
87447239Sdt			vm_page_set_validclean(mt, 0,
87547239Sdt			    object->un_pager.vnp.vnp_size - tfoff);
87646349Salc			/* handled by vm_fault now */
87746349Salc			/* vm_page_zero_invalid(mt, FALSE); */
87834206Sdyson		}
87934206Sdyson
88038799Sdfr		vm_page_flag_clear(mt, PG_ZERO);
8811549Srgrimes		if (i != reqpage) {
8821827Sdg
8831549Srgrimes			/*
8841827Sdg			 * whether or not to leave the page activated is up in
8851827Sdg			 * the air, but we should put the page on a page queue
8861827Sdg			 * somewhere. (it already is in the object). Result:
88758634Scharnier			 * It appears that empirical results show that
8881827Sdg			 * deactivating pages is best.
8891549Srgrimes			 */
8901827Sdg
8911549Srgrimes			/*
8921827Sdg			 * just in case someone was asking for this page we
8931827Sdg			 * now tell them that it is ok to use
8941549Srgrimes			 */
8951549Srgrimes			if (!error) {
89634206Sdyson				if (mt->flags & PG_WANTED)
89734206Sdyson					vm_page_activate(mt);
89833109Sdyson				else
89934206Sdyson					vm_page_deactivate(mt);
90038799Sdfr				vm_page_wakeup(mt);
9011549Srgrimes			} else {
90275692Salfred				vm_page_free(mt);
9031549Srgrimes			}
9041549Srgrimes		}
9051549Srgrimes	}
906100736Salc	vm_page_unlock_queues();
9071549Srgrimes	if (error) {
9089507Sdg		printf("vnode_pager_getpages: I/O read error\n");
9091549Srgrimes	}
9104207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
9111549Srgrimes}
9121549Srgrimes
91333847Smsmith/*
91433847Smsmith * EOPNOTSUPP is no longer legal.  For local media VFS's that do not
91533847Smsmith * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
91633847Smsmith * vnode_pager_generic_putpages() to implement the previous behaviour.
91733847Smsmith *
91833847Smsmith * All other FS's should use the bypass to get to the local media
91933847Smsmith * backing vp's VOP_PUTPAGES.
92033847Smsmith */
92143129Sdillonstatic void
92210556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
92310556Sdyson	vm_object_t object;
92410556Sdyson	vm_page_t *m;
92510556Sdyson	int count;
92610556Sdyson	boolean_t sync;
92710556Sdyson	int *rtvals;
92810556Sdyson{
92910556Sdyson	int rtval;
93010556Sdyson	struct vnode *vp;
93162976Smckusick	struct mount *mp;
93234403Smsmith	int bytes = count * PAGE_SIZE;
93318973Sdyson
93479224Sdillon	GIANT_REQUIRED;
93544321Salc	/*
93644321Salc	 * Force synchronous operation if we are extremely low on memory
93744321Salc	 * to prevent a low-memory deadlock.  VOP operations often need to
93844321Salc	 * allocate more memory to initiate the I/O ( i.e. do a BMAP
93944321Salc	 * operation ).  The swapper handles the case by limiting the amount
94044321Salc	 * of asynchronous I/O, but that sort of solution doesn't scale well
94144321Salc	 * for the vnode pager without a lot of work.
94244321Salc	 *
94344321Salc	 * Also, the backing vnode's iodone routine may not wake the pageout
94444321Salc	 * daemon up.  This should be probably be addressed XXX.
94544321Salc	 */
94644321Salc
94744321Salc	if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
94844321Salc		sync |= OBJPC_SYNC;
94944321Salc
95044321Salc	/*
95144321Salc	 * Call device-specific putpages function
95244321Salc	 */
95310556Sdyson	vp = object->handle;
95462976Smckusick	if (vp->v_type != VREG)
95562976Smckusick		mp = NULL;
95662976Smckusick	(void)vn_start_write(vp, &mp, V_WAIT);
95734403Smsmith	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
95876827Salfred	KASSERT(rtval != EOPNOTSUPP,
95976827Salfred	    ("vnode_pager: stale FS putpages\n"));
96062976Smckusick	vn_finished_write(mp);
96110556Sdyson}
96210556Sdyson
96333847Smsmith
9641549Srgrimes/*
96533847Smsmith * This is now called from local media FS's to operate against their
96645057Seivind * own vnodes if they fail to implement VOP_PUTPAGES.
96770374Sdillon *
96870374Sdillon * This is typically called indirectly via the pageout daemon and
96970374Sdillon * clustering has already typically occured, so in general we ask the
97070374Sdillon * underlying filesystem to write the data out asynchronously rather
97170374Sdillon * then delayed.
9721549Srgrimes */
97333847Smsmithint
97434206Sdysonvnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
97533847Smsmith	struct vnode *vp;
9761549Srgrimes	vm_page_t *m;
97733847Smsmith	int bytecount;
97834206Sdyson	int flags;
9795455Sdg	int *rtvals;
9801549Srgrimes{
9817695Sdg	int i;
98233847Smsmith	vm_object_t object;
98333847Smsmith	int count;
9841549Srgrimes
9857695Sdg	int maxsize, ncount;
98612767Sdyson	vm_ooffset_t poffset;
9877695Sdg	struct uio auio;
9887695Sdg	struct iovec aiov;
9897695Sdg	int error;
99034206Sdyson	int ioflags;
9911549Srgrimes
99279224Sdillon	GIANT_REQUIRED;
99333847Smsmith	object = vp->v_object;
99433847Smsmith	count = bytecount / PAGE_SIZE;
99533847Smsmith
9961827Sdg	for (i = 0; i < count; i++)
9971549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
9981549Srgrimes
99912767Sdyson	if ((int) m[0]->pindex < 0) {
100048409Speter		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%x)\n",
100148409Speter			(long)m[0]->pindex, m[0]->dirty);
10027695Sdg		rtvals[0] = VM_PAGER_BAD;
10037695Sdg		return VM_PAGER_BAD;
10045455Sdg	}
10057178Sdg
10067695Sdg	maxsize = count * PAGE_SIZE;
10077695Sdg	ncount = count;
10081549Srgrimes
100912767Sdyson	poffset = IDX_TO_OFF(m[0]->pindex);
101084854Sdillon
101184854Sdillon	/*
101284854Sdillon	 * If the page-aligned write is larger then the actual file we
101384854Sdillon	 * have to invalidate pages occuring beyond the file EOF.  However,
101484854Sdillon	 * there is an edge case where a file may not be page-aligned where
101584854Sdillon	 * the last page is partially invalid.  In this case the filesystem
101684854Sdillon	 * may not properly clear the dirty bits for the entire page (which
101784854Sdillon	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
101884854Sdillon	 * With the page locked we are free to fix-up the dirty bits here.
101987834Sdillon	 *
102087834Sdillon	 * We do not under any circumstances truncate the valid bits, as
102187834Sdillon	 * this will screw up bogus page replacement.
102284854Sdillon	 */
102312767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
102484854Sdillon		if (object->un_pager.vnp.vnp_size > poffset) {
102584854Sdillon			int pgoff;
102684854Sdillon
102712767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
102884854Sdillon			ncount = btoc(maxsize);
102984854Sdillon			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
103084854Sdillon				vm_page_clear_dirty(m[ncount - 1], pgoff,
103184854Sdillon					PAGE_SIZE - pgoff);
103284854Sdillon			}
103384854Sdillon		} else {
10348585Sdg			maxsize = 0;
103584854Sdillon			ncount = 0;
103684854Sdillon		}
10378585Sdg		if (ncount < count) {
10388585Sdg			for (i = ncount; i < count; i++) {
10397695Sdg				rtvals[i] = VM_PAGER_BAD;
10401549Srgrimes			}
10411549Srgrimes		}
10421541Srgrimes	}
10437695Sdg
104470374Sdillon	/*
104570374Sdillon	 * pageouts are already clustered, use IO_ASYNC t o force a bawrite()
104670374Sdillon	 * rather then a bdwrite() to prevent paging I/O from saturating
1047108358Sdillon	 * the buffer cache.  Dummy-up the sequential heuristic to cause
1048108358Sdillon	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
1049108358Sdillon	 * the system decides how to cluster.
105070374Sdillon	 */
105134206Sdyson	ioflags = IO_VMIO;
1052108358Sdillon	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
1053108358Sdillon		ioflags |= IO_SYNC;
1054108358Sdillon	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
1055108358Sdillon		ioflags |= IO_ASYNC;
105634206Sdyson	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
1057108358Sdillon	ioflags |= IO_SEQMAX << IO_SEQSHIFT;
10581827Sdg
10597695Sdg	aiov.iov_base = (caddr_t) 0;
10607695Sdg	aiov.iov_len = maxsize;
10617695Sdg	auio.uio_iov = &aiov;
10627695Sdg	auio.uio_iovcnt = 1;
106312767Sdyson	auio.uio_offset = poffset;
10647695Sdg	auio.uio_segflg = UIO_NOCOPY;
10657695Sdg	auio.uio_rw = UIO_WRITE;
10667695Sdg	auio.uio_resid = maxsize;
106783366Sjulian	auio.uio_td = (struct thread *) 0;
106891406Sjhb	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
10693612Sdg	cnt.v_vnodeout++;
10707695Sdg	cnt.v_vnodepgsout += ncount;
10713612Sdg
10728585Sdg	if (error) {
10739507Sdg		printf("vnode_pager_putpages: I/O error %d\n", error);
10747695Sdg	}
10758585Sdg	if (auio.uio_resid) {
107637555Sbde		printf("vnode_pager_putpages: residual I/O %d at %lu\n",
107737555Sbde		    auio.uio_resid, (u_long)m[0]->pindex);
10787695Sdg	}
107933936Sdyson	for (i = 0; i < ncount; i++) {
108033936Sdyson		rtvals[i] = VM_PAGER_OK;
10817695Sdg	}
10827695Sdg	return rtvals[0];
10837695Sdg}
10841549Srgrimes
10857695Sdgstruct vnode *
10869507Sdgvnode_pager_lock(object)
10879507Sdg	vm_object_t object;
10889507Sdg{
108983366Sjulian	struct thread *td = curthread;	/* XXX */
109022521Sdyson
109179224Sdillon	GIANT_REQUIRED;
109279224Sdillon
10939507Sdg	for (; object != NULL; object = object->backing_object) {
10949507Sdg		if (object->type != OBJT_VNODE)
10957695Sdg			continue;
109677094Sjhb		if (object->flags & OBJ_DEAD) {
109732585Sdyson			return NULL;
109877094Sjhb		}
10991549Srgrimes
110077094Sjhb		/* XXX; If object->handle can change, we need to cache it. */
110132585Sdyson		while (vget(object->handle,
110283366Sjulian			LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, td)){
110334611Sdyson			if ((object->flags & OBJ_DEAD) || (object->type != OBJT_VNODE))
110434611Sdyson				return NULL;
110532585Sdyson			printf("vnode_pager_lock: retrying\n");
110632585Sdyson		}
11079507Sdg		return object->handle;
11081549Srgrimes	}
11099507Sdg	return NULL;
11107695Sdg}
1111