vnode_pager.c revision 12423
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
211541Srgrimes *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
4112423Sphk *	$Id: vnode_pager.c,v 1.52 1995/10/30 17:56:30 bde Exp $
421541Srgrimes */
431541Srgrimes
441541Srgrimes/*
451541Srgrimes * Page to/from files (vnodes).
461541Srgrimes */
471541Srgrimes
481549Srgrimes/*
491549Srgrimes * TODO:
509507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
517695Sdg *	greatly re-simplify the vnode_pager.
521549Srgrimes */
531549Srgrimes
541541Srgrimes#include <sys/param.h>
551541Srgrimes#include <sys/systm.h>
565455Sdg#include <sys/kernel.h>
571541Srgrimes#include <sys/proc.h>
581541Srgrimes#include <sys/malloc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/uio.h>
611541Srgrimes#include <sys/mount.h>
629507Sdg#include <sys/buf.h>
631541Srgrimes
641541Srgrimes#include <vm/vm.h>
651541Srgrimes#include <vm/vm_page.h>
669507Sdg#include <vm/vm_pager.h>
671541Srgrimes#include <vm/vnode_pager.h>
681541Srgrimes
6911943Sbdeextern vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_offset_t address,
7011943Sbde					 int *run));
7111943Sbdeextern void vnode_pager_iodone __P((struct buf *bp));
7211943Sbdeextern int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
7311943Sbdeextern int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
7411943Sbde
751541Srgrimesstruct pagerops vnodepagerops = {
769507Sdg	NULL,
771541Srgrimes	vnode_pager_alloc,
781541Srgrimes	vnode_pager_dealloc,
799507Sdg	vnode_pager_getpages,
809507Sdg	vnode_pager_putpages,
819507Sdg	vnode_pager_haspage,
829507Sdg	NULL
831541Srgrimes};
841541Srgrimes
8511943Sbdestatic int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
8611943Sbde					  int count, int reqpage));
8711943Sbdestatic int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
8811943Sbde					  int count, boolean_t sync,
8911943Sbde					  int *rtvals));
9010556Sdyson
911541Srgrimes/*
921541Srgrimes * Allocate (or lookup) pager for a vnode.
931541Srgrimes * Handle is a vnode pointer.
941541Srgrimes */
959507Sdgvm_object_t
961549Srgrimesvnode_pager_alloc(handle, size, prot, offset)
978416Sdg	void *handle;
981541Srgrimes	vm_size_t size;
991541Srgrimes	vm_prot_t prot;
1001549Srgrimes	vm_offset_t offset;
1011541Srgrimes{
1029456Sdg	vm_object_t object;
1031541Srgrimes	struct vnode *vp;
1041541Srgrimes
1051541Srgrimes	/*
1061541Srgrimes	 * Pageout to vnode, no can do yet.
1071541Srgrimes	 */
1081541Srgrimes	if (handle == NULL)
1091827Sdg		return (NULL);
1101541Srgrimes
1119411Sdg	vp = (struct vnode *) handle;
1129411Sdg
1131541Srgrimes	/*
1149411Sdg	 * Prevent race condition when allocating the object. This
1159411Sdg	 * can happen with NFS vnodes since the nfsnode isn't locked.
1161541Srgrimes	 */
1179411Sdg	while (vp->v_flag & VOLOCK) {
1189411Sdg		vp->v_flag |= VOWANT;
1199411Sdg		tsleep(vp, PVM, "vnpobj", 0);
1209411Sdg	}
1219411Sdg	vp->v_flag |= VOLOCK;
1229411Sdg
1239411Sdg	/*
1249411Sdg	 * If the object is being terminated, wait for it to
1259411Sdg	 * go away.
1269411Sdg	 */
1279507Sdg	while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) {
1289356Sdg		tsleep(object, PVM, "vadead", 0);
1299507Sdg	}
1305455Sdg
1319507Sdg	if (object == NULL) {
1321541Srgrimes		/*
1331541Srgrimes		 * And an object of the appropriate size
1341541Srgrimes		 */
1359507Sdg		object = vm_object_allocate(OBJT_VNODE, round_page(size));
1369456Sdg		object->flags = OBJ_CANPERSIST;
1371827Sdg
1381541Srgrimes		/*
1399507Sdg		 * Hold a reference to the vnode and initialize object data.
1401541Srgrimes		 */
1411541Srgrimes		VREF(vp);
1429507Sdg		object->un_pager.vnp.vnp_size = size;
1431549Srgrimes
1449507Sdg		object->handle = handle;
1459507Sdg		vp->v_object = object;
1461541Srgrimes	} else {
1471541Srgrimes		/*
1489507Sdg		 * vm_object_reference() will remove the object from the cache if
1499507Sdg		 * found and gain a reference to the object.
1501541Srgrimes		 */
1519507Sdg		vm_object_reference(object);
1521541Srgrimes	}
1539411Sdg
1549411Sdg	if (vp->v_type == VREG)
1557695Sdg		vp->v_flag |= VVMIO;
1569411Sdg
1579411Sdg	vp->v_flag &= ~VOLOCK;
1589411Sdg	if (vp->v_flag & VOWANT) {
1599411Sdg		vp->v_flag &= ~VOWANT;
1609411Sdg		wakeup(vp);
1619411Sdg	}
1629507Sdg	return (object);
1631541Srgrimes}
1641541Srgrimes
1651549Srgrimesvoid
1669507Sdgvnode_pager_dealloc(object)
1679507Sdg	vm_object_t object;
1681541Srgrimes{
1699507Sdg	register struct vnode *vp = object->handle;
1701541Srgrimes
1719507Sdg	if (vp == NULL)
1729507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
1739507Sdg
1749507Sdg	if (object->paging_in_progress) {
1755455Sdg		int s = splbio();
1769507Sdg		while (object->paging_in_progress) {
1779507Sdg			object->flags |= OBJ_PIPWNT;
1789507Sdg			tsleep(object, PVM, "vnpdea", 0);
1795455Sdg		}
1805455Sdg		splx(s);
1811541Srgrimes	}
1821541Srgrimes
1839507Sdg	object->handle = NULL;
1841827Sdg
1859507Sdg	vp->v_object = NULL;
1869507Sdg	vp->v_flag &= ~(VTEXT | VVMIO);
1879507Sdg	vp->v_flag |= VAGE;
1889507Sdg	vrele(vp);
1891549Srgrimes}
1901541Srgrimes
1911549Srgrimesboolean_t
1929507Sdgvnode_pager_haspage(object, offset, before, after)
1939507Sdg	vm_object_t object;
1941541Srgrimes	vm_offset_t offset;
1959507Sdg	int *before;
1969507Sdg	int *after;
1971541Srgrimes{
1989507Sdg	struct vnode *vp = object->handle;
1991541Srgrimes	daddr_t bn;
20012423Sphk	int err;
20110556Sdyson	daddr_t reqblock;
20211701Sdyson	int poff;
20311701Sdyson	int bsize;
20411701Sdyson	int pagesperblock;
2051541Srgrimes
2061541Srgrimes	/*
2075455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
2085455Sdg	 * not have the page.
2091541Srgrimes	 */
2109507Sdg	if ((vp->v_mount == NULL) || (offset >= object->un_pager.vnp.vnp_size))
2114797Sdg		return FALSE;
2121541Srgrimes
21311576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
21410556Sdyson	pagesperblock = bsize / PAGE_SIZE;
21510556Sdyson	reqblock = offset / bsize;
21610556Sdyson	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
21710556Sdyson		after, before);
2188876Srgrimes	if (err)
2199507Sdg		return TRUE;
22010702Sdyson	if ( bn == -1)
22110576Sdyson		return FALSE;
22210556Sdyson	poff = (offset - (reqblock * bsize)) / PAGE_SIZE;
22310556Sdyson	if (before) {
22410556Sdyson		*before *= pagesperblock;
22510556Sdyson		*before += poff;
22610556Sdyson	}
22710556Sdyson	if (after) {
22810669Sdyson		int numafter;
22910556Sdyson		*after *= pagesperblock;
23010669Sdyson		numafter = pagesperblock - (poff + 1);
23110669Sdyson		if (offset + numafter * PAGE_SIZE > object->un_pager.vnp.vnp_size) {
23210669Sdyson			numafter = (object->un_pager.vnp.vnp_size - offset)/PAGE_SIZE;
23310669Sdyson		}
23410669Sdyson		*after += numafter;
23510556Sdyson	}
23610576Sdyson	return TRUE;
2371541Srgrimes}
2381541Srgrimes
2391541Srgrimes/*
2401541Srgrimes * Lets the VM system know about a change in size for a file.
2419507Sdg * We adjust our own internal size and flush any cached pages in
2421541Srgrimes * the associated object that are affected by the size change.
2431541Srgrimes *
2441541Srgrimes * Note: this routine may be invoked as a result of a pager put
2451541Srgrimes * operation (possibly at object termination time), so we must be careful.
2461541Srgrimes */
2471541Srgrimesvoid
2481541Srgrimesvnode_pager_setsize(vp, nsize)
2491541Srgrimes	struct vnode *vp;
2505455Sdg	u_long nsize;
2511541Srgrimes{
2529507Sdg	vm_object_t object = vp->v_object;
2531541Srgrimes
2549507Sdg	if (object == NULL)
2551541Srgrimes		return;
2561827Sdg
2571541Srgrimes	/*
2581541Srgrimes	 * Hasn't changed size
2591541Srgrimes	 */
2609507Sdg	if (nsize == object->un_pager.vnp.vnp_size)
2613374Sdg		return;
2621827Sdg
2631541Srgrimes	/*
2641827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
2651541Srgrimes	 */
2669507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
2679507Sdg		if (round_page((vm_offset_t) nsize) < object->un_pager.vnp.vnp_size) {
2685455Sdg			vm_object_page_remove(object,
2699507Sdg			    round_page((vm_offset_t) nsize), object->un_pager.vnp.vnp_size, FALSE);
2705455Sdg		}
2711827Sdg		/*
2721827Sdg		 * this gets rid of garbage at the end of a page that is now
2731827Sdg		 * only partially backed by the vnode...
2741827Sdg		 */
2751827Sdg		if (nsize & PAGE_MASK) {
2761827Sdg			vm_offset_t kva;
2771827Sdg			vm_page_t m;
2781827Sdg
2791827Sdg			m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize));
2801827Sdg			if (m) {
2811827Sdg				kva = vm_pager_map_page(m);
2821827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
2835455Sdg				    round_page(nsize) - nsize);
2841827Sdg				vm_pager_unmap_page(kva);
2851827Sdg			}
2861827Sdg		}
2871541Srgrimes	}
2889507Sdg	object->un_pager.vnp.vnp_size = (vm_offset_t) nsize;
2891827Sdg	object->size = round_page(nsize);
2901541Srgrimes}
2911541Srgrimes
2921541Srgrimesvoid
2931541Srgrimesvnode_pager_umount(mp)
2941541Srgrimes	register struct mount *mp;
2951541Srgrimes{
2969507Sdg	struct vnode *vp, *nvp;
2971541Srgrimes
2989507Sdgloop:
2999507Sdg	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
3001541Srgrimes		/*
3019507Sdg		 * Vnode can be reclaimed by getnewvnode() while we
3029507Sdg		 * traverse the list.
3039507Sdg		 */
3049507Sdg		if (vp->v_mount != mp)
3059507Sdg			goto loop;
3069507Sdg
3079507Sdg		/*
3081827Sdg		 * Save the next pointer now since uncaching may terminate the
3099507Sdg		 * object and render vnode invalid
3101541Srgrimes		 */
3119507Sdg		nvp = vp->v_mntvnodes.le_next;
3129507Sdg
3139507Sdg		if (vp->v_object != NULL) {
3147162Sdg			VOP_LOCK(vp);
3159507Sdg			vnode_pager_uncache(vp);
3167162Sdg			VOP_UNLOCK(vp);
3177162Sdg		}
3181541Srgrimes	}
3191541Srgrimes}
3201541Srgrimes
3211541Srgrimes/*
3221541Srgrimes * Remove vnode associated object from the object cache.
3237162Sdg * This routine must be called with the vnode locked.
3241541Srgrimes *
3257162Sdg * XXX unlock the vnode.
3267162Sdg * We must do this since uncaching the object may result in its
3277162Sdg * destruction which may initiate paging activity which may necessitate
3287162Sdg * re-locking the vnode.
3291549Srgrimes */
3309507Sdgvoid
3311549Srgrimesvnode_pager_uncache(vp)
3329507Sdg	struct vnode *vp;
3331549Srgrimes{
3349507Sdg	vm_object_t object;
3351549Srgrimes
3361549Srgrimes	/*
3371549Srgrimes	 * Not a mapped vnode
3381549Srgrimes	 */
3399356Sdg	object = vp->v_object;
3405455Sdg	if (object == NULL)
3419507Sdg		return;
3425455Sdg
3439507Sdg	vm_object_reference(object);
3449507Sdg	VOP_UNLOCK(vp);
3459507Sdg	pager_cache(object, FALSE);
3469507Sdg	VOP_LOCK(vp);
3479507Sdg	return;
3481549Srgrimes}
3491541Srgrimes
3501541Srgrimes
3511549Srgrimesvoid
3521549Srgrimesvnode_pager_freepage(m)
3531549Srgrimes	vm_page_t m;
3541541Srgrimes{
3551549Srgrimes	PAGE_WAKEUP(m);
3561549Srgrimes	vm_page_free(m);
3571549Srgrimes}
3581549Srgrimes
3591549Srgrimes/*
3601549Srgrimes * calculate the linear (byte) disk address of specified virtual
3611549Srgrimes * file address
3621549Srgrimes */
3631549Srgrimesvm_offset_t
3646151Sdgvnode_pager_addr(vp, address, run)
3651549Srgrimes	struct vnode *vp;
3661549Srgrimes	vm_offset_t address;
3676151Sdg	int *run;
3681549Srgrimes{
3695455Sdg	int rtaddress;
3705455Sdg	int bsize;
3711549Srgrimes	vm_offset_t block;
3721549Srgrimes	struct vnode *rtvp;
3735455Sdg	int err;
3745455Sdg	int vblock, voffset;
3751549Srgrimes
3765455Sdg	if ((int) address < 0)
3775455Sdg		return -1;
3785455Sdg
37911701Sdyson	if (vp->v_mount == NULL)
38011701Sdyson		return -1;
38111701Sdyson
3821549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
3831549Srgrimes	vblock = address / bsize;
3841549Srgrimes	voffset = address % bsize;
3851549Srgrimes
38610551Sdyson	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
3871549Srgrimes
3886151Sdg	if (err || (block == -1))
3891549Srgrimes		rtaddress = -1;
3906151Sdg	else {
3916626Sdg		rtaddress = block + voffset / DEV_BSIZE;
3926151Sdg		if( run) {
3936151Sdg			*run += 1;
3946151Sdg			*run *= bsize/PAGE_SIZE;
3956151Sdg			*run -= voffset/PAGE_SIZE;
3966151Sdg		}
3976151Sdg	}
3981549Srgrimes
3991549Srgrimes	return rtaddress;
4001549Srgrimes}
4011549Srgrimes
4021549Srgrimes/*
4031549Srgrimes * interrupt routine for I/O completion
4041549Srgrimes */
4051549Srgrimesvoid
4061549Srgrimesvnode_pager_iodone(bp)
4071549Srgrimes	struct buf *bp;
4081549Srgrimes{
4091549Srgrimes	bp->b_flags |= B_DONE;
4109507Sdg	wakeup(bp);
4111549Srgrimes}
4121549Srgrimes
4131549Srgrimes/*
4141549Srgrimes * small block file system vnode pager input
4151549Srgrimes */
4161549Srgrimesint
4179507Sdgvnode_pager_input_smlfs(object, m)
4189507Sdg	vm_object_t object;
4191549Srgrimes	vm_page_t m;
4201549Srgrimes{
4215455Sdg	int i;
4225455Sdg	int s;
4231549Srgrimes	struct vnode *dp, *vp;
4241549Srgrimes	struct buf *bp;
4251549Srgrimes	vm_offset_t kva;
4265455Sdg	int fileaddr;
4271549Srgrimes	vm_offset_t bsize;
4285455Sdg	int error = 0;
4291549Srgrimes
4309507Sdg	vp = object->handle;
43111701Sdyson	if (vp->v_mount == NULL)
43211701Sdyson		return VM_PAGER_BAD;
43311701Sdyson
4341549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4351549Srgrimes
4367178Sdg
43710551Sdyson	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
4381549Srgrimes
4391549Srgrimes	kva = vm_pager_map_page(m);
4401549Srgrimes
4411827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
4421827Sdg
4435455Sdg		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid))
4445455Sdg			continue;
4451549Srgrimes
4466151Sdg		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
4471827Sdg		if (fileaddr != -1) {
4481549Srgrimes			bp = getpbuf();
4491549Srgrimes
4501827Sdg			/* build a minimal buffer header */
4511549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
4521549Srgrimes			bp->b_iodone = vnode_pager_iodone;
4531549Srgrimes			bp->b_proc = curproc;
4541549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
4551827Sdg			if (bp->b_rcred != NOCRED)
4561549Srgrimes				crhold(bp->b_rcred);
4571827Sdg			if (bp->b_wcred != NOCRED)
4581549Srgrimes				crhold(bp->b_wcred);
4591549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
4606626Sdg			bp->b_blkno = fileaddr;
4615455Sdg			pbgetvp(dp, bp);
4621549Srgrimes			bp->b_bcount = bsize;
4631549Srgrimes			bp->b_bufsize = bsize;
4641827Sdg
4651827Sdg			/* do the input */
4661549Srgrimes			VOP_STRATEGY(bp);
4671549Srgrimes
4681827Sdg			/* we definitely need to be at splbio here */
4691549Srgrimes
4701549Srgrimes			s = splbio();
4711549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
4729356Sdg				tsleep(bp, PVM, "vnsrd", 0);
4731549Srgrimes			}
4741549Srgrimes			splx(s);
4751549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
4761549Srgrimes				error = EIO;
4771549Srgrimes
4781827Sdg			/*
4791827Sdg			 * free the buffer header back to the swap buffer pool
4801827Sdg			 */
4811549Srgrimes			relpbuf(bp);
4821827Sdg			if (error)
4831549Srgrimes				break;
4845455Sdg
48510556Sdyson			vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4861549Srgrimes		} else {
48710669Sdyson			vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4881549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
4891549Srgrimes		}
4901549Srgrimes	}
4911549Srgrimes	vm_pager_unmap_page(kva);
4925455Sdg	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
49310669Sdyson	m->flags &= ~PG_ZERO;
4941827Sdg	if (error) {
4954207Sdg		return VM_PAGER_ERROR;
4961549Srgrimes	}
4971549Srgrimes	return VM_PAGER_OK;
4981549Srgrimes
4991549Srgrimes}
5001549Srgrimes
5011549Srgrimes
5021549Srgrimes/*
5031549Srgrimes * old style vnode pager output routine
5041549Srgrimes */
5051549Srgrimesint
5069507Sdgvnode_pager_input_old(object, m)
5079507Sdg	vm_object_t object;
5081549Srgrimes	vm_page_t m;
5091549Srgrimes{
5101541Srgrimes	struct uio auio;
5111541Srgrimes	struct iovec aiov;
5125455Sdg	int error;
5135455Sdg	int size;
5141549Srgrimes	vm_offset_t kva;
5151549Srgrimes
5161549Srgrimes	error = 0;
5171827Sdg
5181549Srgrimes	/*
5191549Srgrimes	 * Return failure if beyond current EOF
5201549Srgrimes	 */
5219507Sdg	if (m->offset >= object->un_pager.vnp.vnp_size) {
5221549Srgrimes		return VM_PAGER_BAD;
5231549Srgrimes	} else {
5241549Srgrimes		size = PAGE_SIZE;
5259507Sdg		if (m->offset + size > object->un_pager.vnp.vnp_size)
5269507Sdg			size = object->un_pager.vnp.vnp_size - m->offset;
5277178Sdg
5285455Sdg		/*
5295455Sdg		 * Allocate a kernel virtual address and initialize so that
5305455Sdg		 * we can use VOP_READ/WRITE routines.
5315455Sdg		 */
5321549Srgrimes		kva = vm_pager_map_page(m);
5337178Sdg
5341827Sdg		aiov.iov_base = (caddr_t) kva;
5351549Srgrimes		aiov.iov_len = size;
5361549Srgrimes		auio.uio_iov = &aiov;
5371549Srgrimes		auio.uio_iovcnt = 1;
5385455Sdg		auio.uio_offset = m->offset;
5391549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
5401549Srgrimes		auio.uio_rw = UIO_READ;
5411549Srgrimes		auio.uio_resid = size;
5421827Sdg		auio.uio_procp = (struct proc *) 0;
5431549Srgrimes
5449507Sdg		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
5451549Srgrimes		if (!error) {
5461549Srgrimes			register int count = size - auio.uio_resid;
5471549Srgrimes
5481549Srgrimes			if (count == 0)
5491549Srgrimes				error = EINVAL;
5501549Srgrimes			else if (count != PAGE_SIZE)
5511827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
5521549Srgrimes		}
5531549Srgrimes		vm_pager_unmap_page(kva);
5541549Srgrimes	}
5551549Srgrimes	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
5565455Sdg	m->dirty = 0;
55710669Sdyson	m->flags &= ~PG_ZERO;
5584207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
5591549Srgrimes}
5601549Srgrimes
5611549Srgrimes/*
5621549Srgrimes * generic vnode pager input routine
5631549Srgrimes */
56410556Sdyson
5651549Srgrimesint
5669507Sdgvnode_pager_getpages(object, m, count, reqpage)
5679507Sdg	vm_object_t object;
5681549Srgrimes	vm_page_t *m;
5699507Sdg	int count;
5709507Sdg	int reqpage;
5711549Srgrimes{
57210556Sdyson	int rtval;
57310556Sdyson	struct vnode *vp;
57410556Sdyson	vp = object->handle;
57511701Sdyson	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
57610556Sdyson	if (rtval == EOPNOTSUPP)
57711943Sbde		return vnode_pager_leaf_getpages(object, m, count, reqpage);
57810556Sdyson	else
57910556Sdyson		return rtval;
58010556Sdyson}
58110556Sdyson
58210556Sdysonstatic int
58310556Sdysonvnode_pager_leaf_getpages(object, m, count, reqpage)
58410556Sdyson	vm_object_t object;
58510556Sdyson	vm_page_t *m;
58610556Sdyson	int count;
58710556Sdyson	int reqpage;
58810556Sdyson{
5891541Srgrimes	vm_offset_t kva, foff;
5909507Sdg	int i, size, bsize, first, firstaddr;
5911549Srgrimes	struct vnode *dp, *vp;
5926151Sdg	int runpg;
5936151Sdg	int runend;
5947178Sdg	struct buf *bp;
5955455Sdg	int s;
5965455Sdg	int error = 0;
5971549Srgrimes
5989507Sdg	vp = object->handle;
59911701Sdyson	if (vp->v_mount == NULL)
60011701Sdyson		return VM_PAGER_BAD;
60111701Sdyson
6021549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
6031549Srgrimes
6041549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
6051827Sdg
6061549Srgrimes	/*
6071827Sdg	 * originally, we did not check for an error return value -- assuming
6081827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
6091549Srgrimes	 */
6105455Sdg	foff = m[reqpage]->offset;
6111827Sdg
6121549Srgrimes	/*
6131887Sdg	 * if we can't bmap, use old VOP code
6141549Srgrimes	 */
61510551Sdyson	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
6161549Srgrimes		for (i = 0; i < count; i++) {
6171549Srgrimes			if (i != reqpage) {
6181549Srgrimes				vnode_pager_freepage(m[i]);
6191549Srgrimes			}
6201549Srgrimes		}
6213612Sdg		cnt.v_vnodein++;
6223612Sdg		cnt.v_vnodepgsin++;
6239507Sdg		return vnode_pager_input_old(object, m[reqpage]);
6241549Srgrimes
6251827Sdg		/*
6261827Sdg		 * if the blocksize is smaller than a page size, then use
6271827Sdg		 * special small filesystem code.  NFS sometimes has a small
6281827Sdg		 * blocksize, but it can handle large reads itself.
6291827Sdg		 */
6301827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
6315455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
6321827Sdg
6331549Srgrimes		for (i = 0; i < count; i++) {
6341549Srgrimes			if (i != reqpage) {
6351549Srgrimes				vnode_pager_freepage(m[i]);
6361549Srgrimes			}
6371549Srgrimes		}
6383612Sdg		cnt.v_vnodein++;
6393612Sdg		cnt.v_vnodepgsin++;
6409507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
6411549Srgrimes	}
6421549Srgrimes	/*
6435455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
6445455Sdg	 * then, the entire page is valid --
6451549Srgrimes	 */
6465455Sdg	if (m[reqpage]->valid) {
6475455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
6485455Sdg		for (i = 0; i < count; i++) {
6495455Sdg			if (i != reqpage)
6505455Sdg				vnode_pager_freepage(m[i]);
6511549Srgrimes		}
6525455Sdg		return VM_PAGER_OK;
6531549Srgrimes	}
6547178Sdg
6555455Sdg	/*
6565455Sdg	 * here on direct device I/O
6575455Sdg	 */
6581549Srgrimes
6596151Sdg	firstaddr = -1;
6601549Srgrimes	/*
6616151Sdg	 * calculate the run that includes the required page
6621549Srgrimes	 */
6636151Sdg	for(first = 0, i = 0; i < count; i = runend) {
6646151Sdg		firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg);
6656151Sdg		if (firstaddr == -1) {
6669507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
6679507Sdg				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
6689507Sdg			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
6696151Sdg			}
6701549Srgrimes			vnode_pager_freepage(m[i]);
6716151Sdg			runend = i + 1;
6726151Sdg			first = runend;
6736151Sdg			continue;
6741549Srgrimes		}
6756151Sdg		runend = i + runpg;
6769507Sdg		if (runend <= reqpage) {
6776151Sdg			int j;
6789507Sdg			for (j = i; j < runend; j++) {
6796151Sdg				vnode_pager_freepage(m[j]);
6806151Sdg			}
6811549Srgrimes		} else {
6829507Sdg			if (runpg < (count - first)) {
6839507Sdg				for (i = first + runpg; i < count; i++)
6846151Sdg					vnode_pager_freepage(m[i]);
6856151Sdg				count = first + runpg;
6866151Sdg			}
6876151Sdg			break;
6881549Srgrimes		}
6896151Sdg		first = runend;
6901549Srgrimes	}
6911549Srgrimes
6921549Srgrimes	/*
6931827Sdg	 * the first and last page have been calculated now, move input pages
6941827Sdg	 * to be zero based...
6951549Srgrimes	 */
6961549Srgrimes	if (first != 0) {
6971549Srgrimes		for (i = first; i < count; i++) {
6981549Srgrimes			m[i - first] = m[i];
6991549Srgrimes		}
7001549Srgrimes		count -= first;
7011549Srgrimes		reqpage -= first;
7021549Srgrimes	}
7036151Sdg
7041549Srgrimes	/*
7051549Srgrimes	 * calculate the file virtual address for the transfer
7061549Srgrimes	 */
7075455Sdg	foff = m[0]->offset;
7081827Sdg
7091549Srgrimes	/*
7101549Srgrimes	 * calculate the size of the transfer
7111549Srgrimes	 */
7121549Srgrimes	size = count * PAGE_SIZE;
7139507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
7149507Sdg		size = object->un_pager.vnp.vnp_size - foff;
7151549Srgrimes
7161549Srgrimes	/*
7171549Srgrimes	 * round up physical size for real devices
7181549Srgrimes	 */
7191827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
7201549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
7211549Srgrimes
7225841Sdg	bp = getpbuf();
7235455Sdg	kva = (vm_offset_t) bp->b_data;
7241887Sdg
7251549Srgrimes	/*
7261549Srgrimes	 * and map the pages to be read into the kva
7271549Srgrimes	 */
7281887Sdg	pmap_qenter(kva, m, count);
7291549Srgrimes
7301549Srgrimes	/* build a minimal buffer header */
7311549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
7321549Srgrimes	bp->b_iodone = vnode_pager_iodone;
7331549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
7341549Srgrimes	bp->b_proc = curproc;
7351549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
7361827Sdg	if (bp->b_rcred != NOCRED)
7371549Srgrimes		crhold(bp->b_rcred);
7381827Sdg	if (bp->b_wcred != NOCRED)
7391549Srgrimes		crhold(bp->b_wcred);
7406626Sdg	bp->b_blkno = firstaddr;
7415455Sdg	pbgetvp(dp, bp);
7421549Srgrimes	bp->b_bcount = size;
7431549Srgrimes	bp->b_bufsize = size;
7441549Srgrimes
7453612Sdg	cnt.v_vnodein++;
7463612Sdg	cnt.v_vnodepgsin += count;
7473612Sdg
7481549Srgrimes	/* do the input */
7491549Srgrimes	VOP_STRATEGY(bp);
7503612Sdg
7511549Srgrimes	s = splbio();
7521549Srgrimes	/* we definitely need to be at splbio here */
7531549Srgrimes
7541549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
7559356Sdg		tsleep(bp, PVM, "vnread", 0);
7561549Srgrimes	}
7571549Srgrimes	splx(s);
7581549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
7591549Srgrimes		error = EIO;
7601549Srgrimes
7611549Srgrimes	if (!error) {
7621549Srgrimes		if (size != count * PAGE_SIZE)
7631827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
7641549Srgrimes	}
7655455Sdg	pmap_qremove(kva, count);
7661549Srgrimes
7671549Srgrimes	/*
7681549Srgrimes	 * free the buffer header back to the swap buffer pool
7691549Srgrimes	 */
7701549Srgrimes	relpbuf(bp);
7711549Srgrimes
7721549Srgrimes	for (i = 0; i < count; i++) {
7732386Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
7745455Sdg		m[i]->dirty = 0;
7755455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
77610669Sdyson		m[i]->flags &= ~PG_ZERO;
7771549Srgrimes		if (i != reqpage) {
7781827Sdg
7791549Srgrimes			/*
7801827Sdg			 * whether or not to leave the page activated is up in
7811827Sdg			 * the air, but we should put the page on a page queue
7821827Sdg			 * somewhere. (it already is in the object). Result:
7831827Sdg			 * It appears that emperical results show that
7841827Sdg			 * deactivating pages is best.
7851549Srgrimes			 */
7861827Sdg
7871549Srgrimes			/*
7881827Sdg			 * just in case someone was asking for this page we
7891827Sdg			 * now tell them that it is ok to use
7901549Srgrimes			 */
7911549Srgrimes			if (!error) {
7925841Sdg				vm_page_deactivate(m[i]);
7931549Srgrimes				PAGE_WAKEUP(m[i]);
7941549Srgrimes			} else {
7951549Srgrimes				vnode_pager_freepage(m[i]);
7961549Srgrimes			}
7971549Srgrimes		}
7981549Srgrimes	}
7991549Srgrimes	if (error) {
8009507Sdg		printf("vnode_pager_getpages: I/O read error\n");
8011549Srgrimes	}
8024207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
8031549Srgrimes}
8041549Srgrimes
80510556Sdysonint
80610556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
80710556Sdyson	vm_object_t object;
80810556Sdyson	vm_page_t *m;
80910556Sdyson	int count;
81010556Sdyson	boolean_t sync;
81110556Sdyson	int *rtvals;
81210556Sdyson{
81310556Sdyson	int rtval;
81410556Sdyson	struct vnode *vp;
81510556Sdyson	vp = object->handle;
81611701Sdyson	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
81710556Sdyson	if (rtval == EOPNOTSUPP)
81811943Sbde		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
81910556Sdyson	else
82010556Sdyson		return rtval;
82110556Sdyson}
82210556Sdyson
8231549Srgrimes/*
8241549Srgrimes * generic vnode pager output routine
8251549Srgrimes */
82610556Sdysonstatic int
82710556Sdysonvnode_pager_leaf_putpages(object, m, count, sync, rtvals)
8289507Sdg	vm_object_t object;
8291549Srgrimes	vm_page_t *m;
8305455Sdg	int count;
8319507Sdg	boolean_t sync;
8325455Sdg	int *rtvals;
8331549Srgrimes{
8347695Sdg	int i;
8351549Srgrimes
8367695Sdg	struct vnode *vp;
8377695Sdg	int maxsize, ncount;
8387695Sdg	struct uio auio;
8397695Sdg	struct iovec aiov;
8407695Sdg	int error;
8411549Srgrimes
8429507Sdg	vp = object->handle;;
8431827Sdg	for (i = 0; i < count; i++)
8441549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
8451549Srgrimes
8465455Sdg	if ((int) m[0]->offset < 0) {
8479507Sdg		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty);
8487695Sdg		rtvals[0] = VM_PAGER_BAD;
8497695Sdg		return VM_PAGER_BAD;
8505455Sdg	}
8517178Sdg
8527695Sdg	maxsize = count * PAGE_SIZE;
8537695Sdg	ncount = count;
8541549Srgrimes
8559507Sdg	if (maxsize + m[0]->offset > object->un_pager.vnp.vnp_size) {
8569507Sdg		if (object->un_pager.vnp.vnp_size > m[0]->offset)
8579507Sdg			maxsize = object->un_pager.vnp.vnp_size - m[0]->offset;
8588585Sdg		else
8598585Sdg			maxsize = 0;
8607695Sdg		ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
8618585Sdg		if (ncount < count) {
8628585Sdg			for (i = ncount; i < count; i++) {
8637695Sdg				rtvals[i] = VM_PAGER_BAD;
8641549Srgrimes			}
8658585Sdg			if (ncount == 0) {
86612423Sphk				printf("vnode_pager_putpages: write past end of file: %ld, %ld\n",
86712423Sphk					m[0]->offset,
86812423Sphk					object->un_pager.vnp.vnp_size);
8697695Sdg				return rtvals[0];
8707695Sdg			}
8711549Srgrimes		}
8721541Srgrimes	}
8737695Sdg
8748585Sdg	for (i = 0; i < count; i++) {
8758585Sdg		m[i]->busy++;
8767695Sdg		m[i]->flags &= ~PG_BUSY;
8771549Srgrimes	}
8781827Sdg
8797695Sdg	aiov.iov_base = (caddr_t) 0;
8807695Sdg	aiov.iov_len = maxsize;
8817695Sdg	auio.uio_iov = &aiov;
8827695Sdg	auio.uio_iovcnt = 1;
8837695Sdg	auio.uio_offset = m[0]->offset;
8847695Sdg	auio.uio_segflg = UIO_NOCOPY;
8857695Sdg	auio.uio_rw = UIO_WRITE;
8867695Sdg	auio.uio_resid = maxsize;
8877695Sdg	auio.uio_procp = (struct proc *) 0;
8887695Sdg	error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred);
8893612Sdg	cnt.v_vnodeout++;
8907695Sdg	cnt.v_vnodepgsout += ncount;
8913612Sdg
8928585Sdg	if (error) {
8939507Sdg		printf("vnode_pager_putpages: I/O error %d\n", error);
8947695Sdg	}
8958585Sdg	if (auio.uio_resid) {
89612423Sphk		printf("vnode_pager_putpages: residual I/O %d at %ld\n",
89712423Sphk			auio.uio_resid, m[0]->offset);
8987695Sdg	}
8998585Sdg	for (i = 0; i < count; i++) {
9008585Sdg		m[i]->busy--;
9018585Sdg		if (i < ncount) {
9027695Sdg			rtvals[i] = VM_PAGER_OK;
9037695Sdg		}
9048585Sdg		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
9059507Sdg			wakeup(m[i]);
9067695Sdg	}
9077695Sdg	return rtvals[0];
9087695Sdg}
9091549Srgrimes
9107695Sdgstruct vnode *
9119507Sdgvnode_pager_lock(object)
9129507Sdg	vm_object_t object;
9139507Sdg{
9149507Sdg	for (; object != NULL; object = object->backing_object) {
9159507Sdg		if (object->type != OBJT_VNODE)
9167695Sdg			continue;
9171549Srgrimes
9189507Sdg		VOP_LOCK(object->handle);
9199507Sdg		return object->handle;
9201549Srgrimes	}
9219507Sdg	return NULL;
9227695Sdg}
923