vnode_pager.c revision 9507
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
211541Srgrimes *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
419507Sdg *	$Id: vnode_pager.c,v 1.43 1995/07/09 06:58:03 davidg Exp $
421541Srgrimes */
431541Srgrimes
441541Srgrimes/*
451541Srgrimes * Page to/from files (vnodes).
461541Srgrimes */
471541Srgrimes
481549Srgrimes/*
491549Srgrimes * TODO:
509507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
517695Sdg *	greatly re-simplify the vnode_pager.
521549Srgrimes */
531549Srgrimes
541541Srgrimes#include <sys/param.h>
551541Srgrimes#include <sys/systm.h>
565455Sdg#include <sys/kernel.h>
571541Srgrimes#include <sys/proc.h>
581541Srgrimes#include <sys/malloc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/uio.h>
611541Srgrimes#include <sys/mount.h>
629507Sdg#include <sys/buf.h>
631541Srgrimes
641541Srgrimes#include <vm/vm.h>
651541Srgrimes#include <vm/vm_page.h>
669507Sdg#include <vm/vm_pager.h>
671541Srgrimes#include <vm/vnode_pager.h>
681541Srgrimes
691541Srgrimesstruct pagerops vnodepagerops = {
709507Sdg	NULL,
711541Srgrimes	vnode_pager_alloc,
721541Srgrimes	vnode_pager_dealloc,
739507Sdg	vnode_pager_getpages,
749507Sdg	vnode_pager_putpages,
759507Sdg	vnode_pager_haspage,
769507Sdg	NULL
771541Srgrimes};
781541Srgrimes
791541Srgrimes/*
801541Srgrimes * Allocate (or lookup) pager for a vnode.
811541Srgrimes * Handle is a vnode pointer.
821541Srgrimes */
839507Sdgvm_object_t
841549Srgrimesvnode_pager_alloc(handle, size, prot, offset)
858416Sdg	void *handle;
861541Srgrimes	vm_size_t size;
871541Srgrimes	vm_prot_t prot;
881549Srgrimes	vm_offset_t offset;
891541Srgrimes{
909456Sdg	vm_object_t object;
911541Srgrimes	struct vnode *vp;
921541Srgrimes
931541Srgrimes	/*
941541Srgrimes	 * Pageout to vnode, no can do yet.
951541Srgrimes	 */
961541Srgrimes	if (handle == NULL)
971827Sdg		return (NULL);
981541Srgrimes
999411Sdg	vp = (struct vnode *) handle;
1009411Sdg
1011541Srgrimes	/*
1029411Sdg	 * Prevent race condition when allocating the object. This
1039411Sdg	 * can happen with NFS vnodes since the nfsnode isn't locked.
1041541Srgrimes	 */
1059411Sdg	while (vp->v_flag & VOLOCK) {
1069411Sdg		vp->v_flag |= VOWANT;
1079411Sdg		tsleep(vp, PVM, "vnpobj", 0);
1089411Sdg	}
1099411Sdg	vp->v_flag |= VOLOCK;
1109411Sdg
1119411Sdg	/*
1129411Sdg	 * If the object is being terminated, wait for it to
1139411Sdg	 * go away.
1149411Sdg	 */
1159507Sdg	while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) {
1169356Sdg		tsleep(object, PVM, "vadead", 0);
1179507Sdg	}
1185455Sdg
1199507Sdg	if (object == NULL) {
1201541Srgrimes		/*
1211541Srgrimes		 * And an object of the appropriate size
1221541Srgrimes		 */
1239507Sdg		object = vm_object_allocate(OBJT_VNODE, round_page(size));
1249456Sdg		object->flags = OBJ_CANPERSIST;
1251827Sdg
1261541Srgrimes		/*
1279507Sdg		 * Hold a reference to the vnode and initialize object data.
1281541Srgrimes		 */
1291541Srgrimes		VREF(vp);
1309507Sdg		object->un_pager.vnp.vnp_size = size;
1311549Srgrimes
1329507Sdg		object->handle = handle;
1339507Sdg		vp->v_object = object;
1341541Srgrimes	} else {
1351541Srgrimes		/*
1369507Sdg		 * vm_object_reference() will remove the object from the cache if
1379507Sdg		 * found and gain a reference to the object.
1381541Srgrimes		 */
1399507Sdg		vm_object_reference(object);
1401541Srgrimes	}
1419411Sdg
1429411Sdg	if (vp->v_type == VREG)
1437695Sdg		vp->v_flag |= VVMIO;
1449411Sdg
1459411Sdg	vp->v_flag &= ~VOLOCK;
1469411Sdg	if (vp->v_flag & VOWANT) {
1479411Sdg		vp->v_flag &= ~VOWANT;
1489411Sdg		wakeup(vp);
1499411Sdg	}
1509507Sdg	return (object);
1511541Srgrimes}
1521541Srgrimes
1531549Srgrimesvoid
1549507Sdgvnode_pager_dealloc(object)
1559507Sdg	vm_object_t object;
1561541Srgrimes{
1579507Sdg	register struct vnode *vp = object->handle;
1581541Srgrimes
1599507Sdg	if (vp == NULL)
1609507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
1619507Sdg
1629507Sdg	if (object->paging_in_progress) {
1635455Sdg		int s = splbio();
1649507Sdg		while (object->paging_in_progress) {
1659507Sdg			object->flags |= OBJ_PIPWNT;
1669507Sdg			tsleep(object, PVM, "vnpdea", 0);
1675455Sdg		}
1685455Sdg		splx(s);
1691541Srgrimes	}
1701541Srgrimes
1719507Sdg	object->handle = NULL;
1721827Sdg
1739507Sdg	vp->v_object = NULL;
1749507Sdg	vp->v_flag &= ~(VTEXT | VVMIO);
1759507Sdg	vp->v_flag |= VAGE;
1769507Sdg	vrele(vp);
1771549Srgrimes}
1781541Srgrimes
1791549Srgrimesboolean_t
1809507Sdgvnode_pager_haspage(object, offset, before, after)
1819507Sdg	vm_object_t object;
1821541Srgrimes	vm_offset_t offset;
1839507Sdg	int *before;
1849507Sdg	int *after;
1851541Srgrimes{
1869507Sdg	struct vnode *vp = object->handle;
1871541Srgrimes	daddr_t bn;
1889507Sdg	int err, run;
1899507Sdg	daddr_t startblock, reqblock;
1901541Srgrimes
1911541Srgrimes	/*
1925455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
1935455Sdg	 * not have the page.
1941541Srgrimes	 */
1959507Sdg	if ((vp->v_mount == NULL) || (offset >= object->un_pager.vnp.vnp_size))
1964797Sdg		return FALSE;
1971541Srgrimes
1989507Sdg	startblock = reqblock = offset / vp->v_mount->mnt_stat.f_iosize;
1999507Sdg	if (startblock > PFCLUSTER_BEHIND)
2009507Sdg		startblock -= PFCLUSTER_BEHIND;
2019507Sdg	else
2029507Sdg		startblock = 0;;
2037178Sdg
2049507Sdg	if (before != NULL) {
2059507Sdg		/*
2069507Sdg		 * Loop looking for a contiguous chunk that includes the
2079507Sdg		 * requested page.
2089507Sdg		 */
2099507Sdg		while (TRUE) {
2109507Sdg			err = VOP_BMAP(vp, startblock, (struct vnode **) 0, &bn, &run);
2119507Sdg			if (err || bn == -1) {
2129507Sdg				if (startblock < reqblock) {
2139507Sdg					startblock++;
2149507Sdg					continue;
2159507Sdg				}
2169507Sdg				*before = 0;
2179507Sdg				if (after != NULL)
2189507Sdg					*after = 0;
2199507Sdg				return err ? TRUE : FALSE;
2209507Sdg			}
2219507Sdg			if ((startblock + run) < reqblock) {
2229507Sdg				startblock += run + 1;
2239507Sdg				continue;
2249507Sdg			}
2259507Sdg			*before = reqblock - startblock;
2269507Sdg			if (after != NULL)
2279507Sdg				*after = run;
2289507Sdg			return TRUE;
2299507Sdg		}
2309507Sdg	}
2319507Sdg
2329507Sdg	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, after);
2338876Srgrimes	if (err)
2349507Sdg		return TRUE;
2351827Sdg	return ((long) bn < 0 ? FALSE : TRUE);
2361541Srgrimes}
2371541Srgrimes
2381541Srgrimes/*
2391541Srgrimes * Lets the VM system know about a change in size for a file.
2409507Sdg * We adjust our own internal size and flush any cached pages in
2411541Srgrimes * the associated object that are affected by the size change.
2421541Srgrimes *
2431541Srgrimes * Note: this routine may be invoked as a result of a pager put
2441541Srgrimes * operation (possibly at object termination time), so we must be careful.
2451541Srgrimes */
2461541Srgrimesvoid
2471541Srgrimesvnode_pager_setsize(vp, nsize)
2481541Srgrimes	struct vnode *vp;
2495455Sdg	u_long nsize;
2501541Srgrimes{
2519507Sdg	vm_object_t object = vp->v_object;
2521541Srgrimes
2539507Sdg	if (object == NULL)
2541541Srgrimes		return;
2551827Sdg
2561541Srgrimes	/*
2571541Srgrimes	 * Hasn't changed size
2581541Srgrimes	 */
2599507Sdg	if (nsize == object->un_pager.vnp.vnp_size)
2603374Sdg		return;
2611827Sdg
2621541Srgrimes	/*
2631827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
2641541Srgrimes	 */
2659507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
2669507Sdg		if (round_page((vm_offset_t) nsize) < object->un_pager.vnp.vnp_size) {
2675455Sdg			vm_object_page_remove(object,
2689507Sdg			    round_page((vm_offset_t) nsize), object->un_pager.vnp.vnp_size, FALSE);
2695455Sdg		}
2701827Sdg		/*
2711827Sdg		 * this gets rid of garbage at the end of a page that is now
2721827Sdg		 * only partially backed by the vnode...
2731827Sdg		 */
2741827Sdg		if (nsize & PAGE_MASK) {
2751827Sdg			vm_offset_t kva;
2761827Sdg			vm_page_t m;
2771827Sdg
2781827Sdg			m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize));
2791827Sdg			if (m) {
2801827Sdg				kva = vm_pager_map_page(m);
2811827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
2825455Sdg				    round_page(nsize) - nsize);
2831827Sdg				vm_pager_unmap_page(kva);
2841827Sdg			}
2851827Sdg		}
2861541Srgrimes	}
2879507Sdg	object->un_pager.vnp.vnp_size = (vm_offset_t) nsize;
2881827Sdg	object->size = round_page(nsize);
2891541Srgrimes}
2901541Srgrimes
2911541Srgrimesvoid
2921541Srgrimesvnode_pager_umount(mp)
2931541Srgrimes	register struct mount *mp;
2941541Srgrimes{
2959507Sdg	struct vnode *vp, *nvp;
2961541Srgrimes
2979507Sdgloop:
2989507Sdg	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
2991541Srgrimes		/*
3009507Sdg		 * Vnode can be reclaimed by getnewvnode() while we
3019507Sdg		 * traverse the list.
3029507Sdg		 */
3039507Sdg		if (vp->v_mount != mp)
3049507Sdg			goto loop;
3059507Sdg
3069507Sdg		/*
3071827Sdg		 * Save the next pointer now since uncaching may terminate the
3089507Sdg		 * object and render vnode invalid
3091541Srgrimes		 */
3109507Sdg		nvp = vp->v_mntvnodes.le_next;
3119507Sdg
3129507Sdg		if (vp->v_object != NULL) {
3137162Sdg			VOP_LOCK(vp);
3149507Sdg			vnode_pager_uncache(vp);
3157162Sdg			VOP_UNLOCK(vp);
3167162Sdg		}
3171541Srgrimes	}
3181541Srgrimes}
3191541Srgrimes
3201541Srgrimes/*
3211541Srgrimes * Remove vnode associated object from the object cache.
3227162Sdg * This routine must be called with the vnode locked.
3231541Srgrimes *
3247162Sdg * XXX unlock the vnode.
3257162Sdg * We must do this since uncaching the object may result in its
3267162Sdg * destruction which may initiate paging activity which may necessitate
3277162Sdg * re-locking the vnode.
3281549Srgrimes */
3299507Sdgvoid
3301549Srgrimesvnode_pager_uncache(vp)
3319507Sdg	struct vnode *vp;
3321549Srgrimes{
3339507Sdg	vm_object_t object;
3341549Srgrimes
3351549Srgrimes	/*
3361549Srgrimes	 * Not a mapped vnode
3371549Srgrimes	 */
3389356Sdg	object = vp->v_object;
3395455Sdg	if (object == NULL)
3409507Sdg		return;
3415455Sdg
3429507Sdg	vm_object_reference(object);
3439507Sdg	VOP_UNLOCK(vp);
3449507Sdg	pager_cache(object, FALSE);
3459507Sdg	VOP_LOCK(vp);
3469507Sdg	return;
3471549Srgrimes}
3481541Srgrimes
3491541Srgrimes
3501549Srgrimesvoid
3511549Srgrimesvnode_pager_freepage(m)
3521549Srgrimes	vm_page_t m;
3531541Srgrimes{
3541549Srgrimes	PAGE_WAKEUP(m);
3551549Srgrimes	vm_page_free(m);
3561549Srgrimes}
3571549Srgrimes
3581549Srgrimes/*
3591549Srgrimes * calculate the linear (byte) disk address of specified virtual
3601549Srgrimes * file address
3611549Srgrimes */
3621549Srgrimesvm_offset_t
3636151Sdgvnode_pager_addr(vp, address, run)
3641549Srgrimes	struct vnode *vp;
3651549Srgrimes	vm_offset_t address;
3666151Sdg	int *run;
3671549Srgrimes{
3685455Sdg	int rtaddress;
3695455Sdg	int bsize;
3701549Srgrimes	vm_offset_t block;
3711549Srgrimes	struct vnode *rtvp;
3725455Sdg	int err;
3735455Sdg	int vblock, voffset;
3741549Srgrimes
3755455Sdg	if ((int) address < 0)
3765455Sdg		return -1;
3775455Sdg
3781549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
3791549Srgrimes	vblock = address / bsize;
3801549Srgrimes	voffset = address % bsize;
3811549Srgrimes
3826151Sdg	err = VOP_BMAP(vp, vblock, &rtvp, &block, run);
3831549Srgrimes
3846151Sdg	if (err || (block == -1))
3851549Srgrimes		rtaddress = -1;
3866151Sdg	else {
3876626Sdg		rtaddress = block + voffset / DEV_BSIZE;
3886151Sdg		if( run) {
3896151Sdg			*run += 1;
3906151Sdg			*run *= bsize/PAGE_SIZE;
3916151Sdg			*run -= voffset/PAGE_SIZE;
3926151Sdg		}
3936151Sdg	}
3941549Srgrimes
3951549Srgrimes	return rtaddress;
3961549Srgrimes}
3971549Srgrimes
3981549Srgrimes/*
3991549Srgrimes * interrupt routine for I/O completion
4001549Srgrimes */
4011549Srgrimesvoid
4021549Srgrimesvnode_pager_iodone(bp)
4031549Srgrimes	struct buf *bp;
4041549Srgrimes{
4051549Srgrimes	bp->b_flags |= B_DONE;
4069507Sdg	wakeup(bp);
4071549Srgrimes}
4081549Srgrimes
4091549Srgrimes/*
4101549Srgrimes * small block file system vnode pager input
4111549Srgrimes */
4121549Srgrimesint
4139507Sdgvnode_pager_input_smlfs(object, m)
4149507Sdg	vm_object_t object;
4151549Srgrimes	vm_page_t m;
4161549Srgrimes{
4175455Sdg	int i;
4185455Sdg	int s;
4191549Srgrimes	struct vnode *dp, *vp;
4201549Srgrimes	struct buf *bp;
4211549Srgrimes	vm_offset_t kva;
4225455Sdg	int fileaddr;
4231549Srgrimes	vm_offset_t bsize;
4245455Sdg	int error = 0;
4251549Srgrimes
4269507Sdg	vp = object->handle;
4271549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4281549Srgrimes
4297178Sdg
4305455Sdg	VOP_BMAP(vp, 0, &dp, 0, 0);
4311549Srgrimes
4321549Srgrimes	kva = vm_pager_map_page(m);
4331549Srgrimes
4341827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
4351827Sdg
4365455Sdg		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid))
4375455Sdg			continue;
4381549Srgrimes
4396151Sdg		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
4401827Sdg		if (fileaddr != -1) {
4411549Srgrimes			bp = getpbuf();
4421549Srgrimes
4431827Sdg			/* build a minimal buffer header */
4441549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
4451549Srgrimes			bp->b_iodone = vnode_pager_iodone;
4461549Srgrimes			bp->b_proc = curproc;
4471549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
4481827Sdg			if (bp->b_rcred != NOCRED)
4491549Srgrimes				crhold(bp->b_rcred);
4501827Sdg			if (bp->b_wcred != NOCRED)
4511549Srgrimes				crhold(bp->b_wcred);
4521549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
4536626Sdg			bp->b_blkno = fileaddr;
4545455Sdg			pbgetvp(dp, bp);
4551549Srgrimes			bp->b_bcount = bsize;
4561549Srgrimes			bp->b_bufsize = bsize;
4571827Sdg
4581827Sdg			/* do the input */
4591549Srgrimes			VOP_STRATEGY(bp);
4601549Srgrimes
4611827Sdg			/* we definitely need to be at splbio here */
4621549Srgrimes
4631549Srgrimes			s = splbio();
4641549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
4659356Sdg				tsleep(bp, PVM, "vnsrd", 0);
4661549Srgrimes			}
4671549Srgrimes			splx(s);
4681549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
4691549Srgrimes				error = EIO;
4701549Srgrimes
4711827Sdg			/*
4721827Sdg			 * free the buffer header back to the swap buffer pool
4731827Sdg			 */
4741549Srgrimes			relpbuf(bp);
4751827Sdg			if (error)
4761549Srgrimes				break;
4775455Sdg
4787695Sdg			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4797695Sdg			vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4801549Srgrimes		} else {
4817695Sdg			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4821549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
4831549Srgrimes		}
4841549Srgrimes	}
4851549Srgrimes	vm_pager_unmap_page(kva);
4865455Sdg	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
4871827Sdg	if (error) {
4884207Sdg		return VM_PAGER_ERROR;
4891549Srgrimes	}
4901549Srgrimes	return VM_PAGER_OK;
4911549Srgrimes
4921549Srgrimes}
4931549Srgrimes
4941549Srgrimes
4951549Srgrimes/*
4961549Srgrimes * old style vnode pager output routine
4971549Srgrimes */
4981549Srgrimesint
4999507Sdgvnode_pager_input_old(object, m)
5009507Sdg	vm_object_t object;
5011549Srgrimes	vm_page_t m;
5021549Srgrimes{
5031541Srgrimes	struct uio auio;
5041541Srgrimes	struct iovec aiov;
5055455Sdg	int error;
5065455Sdg	int size;
5071549Srgrimes	vm_offset_t kva;
5081549Srgrimes
5091549Srgrimes	error = 0;
5101827Sdg
5111549Srgrimes	/*
5121549Srgrimes	 * Return failure if beyond current EOF
5131549Srgrimes	 */
5149507Sdg	if (m->offset >= object->un_pager.vnp.vnp_size) {
5151549Srgrimes		return VM_PAGER_BAD;
5161549Srgrimes	} else {
5171549Srgrimes		size = PAGE_SIZE;
5189507Sdg		if (m->offset + size > object->un_pager.vnp.vnp_size)
5199507Sdg			size = object->un_pager.vnp.vnp_size - m->offset;
5207178Sdg
5215455Sdg		/*
5225455Sdg		 * Allocate a kernel virtual address and initialize so that
5235455Sdg		 * we can use VOP_READ/WRITE routines.
5245455Sdg		 */
5251549Srgrimes		kva = vm_pager_map_page(m);
5267178Sdg
5271827Sdg		aiov.iov_base = (caddr_t) kva;
5281549Srgrimes		aiov.iov_len = size;
5291549Srgrimes		auio.uio_iov = &aiov;
5301549Srgrimes		auio.uio_iovcnt = 1;
5315455Sdg		auio.uio_offset = m->offset;
5321549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
5331549Srgrimes		auio.uio_rw = UIO_READ;
5341549Srgrimes		auio.uio_resid = size;
5351827Sdg		auio.uio_procp = (struct proc *) 0;
5361549Srgrimes
5379507Sdg		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
5381549Srgrimes		if (!error) {
5391549Srgrimes			register int count = size - auio.uio_resid;
5401549Srgrimes
5411549Srgrimes			if (count == 0)
5421549Srgrimes				error = EINVAL;
5431549Srgrimes			else if (count != PAGE_SIZE)
5441827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
5451549Srgrimes		}
5461549Srgrimes		vm_pager_unmap_page(kva);
5471549Srgrimes	}
5481549Srgrimes	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
5495455Sdg	m->dirty = 0;
5504207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
5511549Srgrimes}
5521549Srgrimes
5531549Srgrimes/*
5541549Srgrimes * generic vnode pager input routine
5551549Srgrimes */
5561549Srgrimesint
5579507Sdgvnode_pager_getpages(object, m, count, reqpage)
5589507Sdg	vm_object_t object;
5591549Srgrimes	vm_page_t *m;
5609507Sdg	int count;
5619507Sdg	int reqpage;
5621549Srgrimes{
5631541Srgrimes	vm_offset_t kva, foff;
5649507Sdg	int i, size, bsize, first, firstaddr;
5651549Srgrimes	struct vnode *dp, *vp;
5666151Sdg	int runpg;
5676151Sdg	int runend;
5687178Sdg	struct buf *bp;
5695455Sdg	int s;
5705455Sdg	int error = 0;
5711549Srgrimes
5729507Sdg	vp = object->handle;
5731549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5741549Srgrimes
5751549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
5761827Sdg
5771549Srgrimes	/*
5781827Sdg	 * originally, we did not check for an error return value -- assuming
5791827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
5801549Srgrimes	 */
5815455Sdg	foff = m[reqpage]->offset;
5821827Sdg
5831549Srgrimes	/*
5841887Sdg	 * if we can't bmap, use old VOP code
5851549Srgrimes	 */
5865455Sdg	if (VOP_BMAP(vp, 0, &dp, 0, 0)) {
5871549Srgrimes		for (i = 0; i < count; i++) {
5881549Srgrimes			if (i != reqpage) {
5891549Srgrimes				vnode_pager_freepage(m[i]);
5901549Srgrimes			}
5911549Srgrimes		}
5923612Sdg		cnt.v_vnodein++;
5933612Sdg		cnt.v_vnodepgsin++;
5949507Sdg		return vnode_pager_input_old(object, m[reqpage]);
5951549Srgrimes
5961827Sdg		/*
5971827Sdg		 * if the blocksize is smaller than a page size, then use
5981827Sdg		 * special small filesystem code.  NFS sometimes has a small
5991827Sdg		 * blocksize, but it can handle large reads itself.
6001827Sdg		 */
6011827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
6025455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
6031827Sdg
6041549Srgrimes		for (i = 0; i < count; i++) {
6051549Srgrimes			if (i != reqpage) {
6061549Srgrimes				vnode_pager_freepage(m[i]);
6071549Srgrimes			}
6081549Srgrimes		}
6093612Sdg		cnt.v_vnodein++;
6103612Sdg		cnt.v_vnodepgsin++;
6119507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
6121549Srgrimes	}
6131549Srgrimes	/*
6145455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
6155455Sdg	 * then, the entire page is valid --
6161549Srgrimes	 */
6175455Sdg	if (m[reqpage]->valid) {
6185455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
6195455Sdg		for (i = 0; i < count; i++) {
6205455Sdg			if (i != reqpage)
6215455Sdg				vnode_pager_freepage(m[i]);
6221549Srgrimes		}
6235455Sdg		return VM_PAGER_OK;
6241549Srgrimes	}
6257178Sdg
6265455Sdg	/*
6275455Sdg	 * here on direct device I/O
6285455Sdg	 */
6291549Srgrimes
6306151Sdg	firstaddr = -1;
6311549Srgrimes	/*
6326151Sdg	 * calculate the run that includes the required page
6331549Srgrimes	 */
6346151Sdg	for(first = 0, i = 0; i < count; i = runend) {
6356151Sdg		firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg);
6366151Sdg		if (firstaddr == -1) {
6379507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
6389507Sdg				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
6399507Sdg			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
6406151Sdg			}
6411549Srgrimes			vnode_pager_freepage(m[i]);
6426151Sdg			runend = i + 1;
6436151Sdg			first = runend;
6446151Sdg			continue;
6451549Srgrimes		}
6466151Sdg		runend = i + runpg;
6479507Sdg		if (runend <= reqpage) {
6486151Sdg			int j;
6499507Sdg			for (j = i; j < runend; j++) {
6506151Sdg				vnode_pager_freepage(m[j]);
6516151Sdg			}
6521549Srgrimes		} else {
6539507Sdg			if (runpg < (count - first)) {
6549507Sdg				for (i = first + runpg; i < count; i++)
6556151Sdg					vnode_pager_freepage(m[i]);
6566151Sdg				count = first + runpg;
6576151Sdg			}
6586151Sdg			break;
6591549Srgrimes		}
6606151Sdg		first = runend;
6611549Srgrimes	}
6621549Srgrimes
6631549Srgrimes	/*
6641827Sdg	 * the first and last page have been calculated now, move input pages
6651827Sdg	 * to be zero based...
6661549Srgrimes	 */
6671549Srgrimes	if (first != 0) {
6681549Srgrimes		for (i = first; i < count; i++) {
6691549Srgrimes			m[i - first] = m[i];
6701549Srgrimes		}
6711549Srgrimes		count -= first;
6721549Srgrimes		reqpage -= first;
6731549Srgrimes	}
6746151Sdg
6751549Srgrimes	/*
6761549Srgrimes	 * calculate the file virtual address for the transfer
6771549Srgrimes	 */
6785455Sdg	foff = m[0]->offset;
6791827Sdg
6801549Srgrimes	/*
6811549Srgrimes	 * calculate the size of the transfer
6821549Srgrimes	 */
6831549Srgrimes	size = count * PAGE_SIZE;
6849507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
6859507Sdg		size = object->un_pager.vnp.vnp_size - foff;
6861549Srgrimes
6871549Srgrimes	/*
6881549Srgrimes	 * round up physical size for real devices
6891549Srgrimes	 */
6901827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
6911549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
6921549Srgrimes
6935841Sdg	bp = getpbuf();
6945455Sdg	kva = (vm_offset_t) bp->b_data;
6951887Sdg
6961549Srgrimes	/*
6971549Srgrimes	 * and map the pages to be read into the kva
6981549Srgrimes	 */
6991887Sdg	pmap_qenter(kva, m, count);
7001549Srgrimes
7011549Srgrimes	/* build a minimal buffer header */
7021549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
7031549Srgrimes	bp->b_iodone = vnode_pager_iodone;
7041549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
7051549Srgrimes	bp->b_proc = curproc;
7061549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
7071827Sdg	if (bp->b_rcred != NOCRED)
7081549Srgrimes		crhold(bp->b_rcred);
7091827Sdg	if (bp->b_wcred != NOCRED)
7101549Srgrimes		crhold(bp->b_wcred);
7116626Sdg	bp->b_blkno = firstaddr;
7125455Sdg	pbgetvp(dp, bp);
7131549Srgrimes	bp->b_bcount = size;
7141549Srgrimes	bp->b_bufsize = size;
7151549Srgrimes
7163612Sdg	cnt.v_vnodein++;
7173612Sdg	cnt.v_vnodepgsin += count;
7183612Sdg
7191549Srgrimes	/* do the input */
7201549Srgrimes	VOP_STRATEGY(bp);
7213612Sdg
7221549Srgrimes	s = splbio();
7231549Srgrimes	/* we definitely need to be at splbio here */
7241549Srgrimes
7251549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
7269356Sdg		tsleep(bp, PVM, "vnread", 0);
7271549Srgrimes	}
7281549Srgrimes	splx(s);
7291549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
7301549Srgrimes		error = EIO;
7311549Srgrimes
7321549Srgrimes	if (!error) {
7331549Srgrimes		if (size != count * PAGE_SIZE)
7341827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
7351549Srgrimes	}
7365455Sdg	pmap_qremove(kva, count);
7371549Srgrimes
7381549Srgrimes	/*
7391549Srgrimes	 * free the buffer header back to the swap buffer pool
7401549Srgrimes	 */
7411549Srgrimes	relpbuf(bp);
7421549Srgrimes
7431549Srgrimes	for (i = 0; i < count; i++) {
7442386Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
7455455Sdg		m[i]->dirty = 0;
7465455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
7471549Srgrimes		if (i != reqpage) {
7481827Sdg
7491549Srgrimes			/*
7501827Sdg			 * whether or not to leave the page activated is up in
7511827Sdg			 * the air, but we should put the page on a page queue
7521827Sdg			 * somewhere. (it already is in the object). Result:
7531827Sdg			 * It appears that emperical results show that
7541827Sdg			 * deactivating pages is best.
7551549Srgrimes			 */
7561827Sdg
7571549Srgrimes			/*
7581827Sdg			 * just in case someone was asking for this page we
7591827Sdg			 * now tell them that it is ok to use
7601549Srgrimes			 */
7611549Srgrimes			if (!error) {
7625841Sdg				vm_page_deactivate(m[i]);
7631549Srgrimes				PAGE_WAKEUP(m[i]);
7641549Srgrimes			} else {
7651549Srgrimes				vnode_pager_freepage(m[i]);
7661549Srgrimes			}
7671549Srgrimes		}
7681549Srgrimes	}
7691549Srgrimes	if (error) {
7709507Sdg		printf("vnode_pager_getpages: I/O read error\n");
7711549Srgrimes	}
7724207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
7731549Srgrimes}
7741549Srgrimes
7751549Srgrimes/*
7761549Srgrimes * generic vnode pager output routine
7771549Srgrimes */
7781549Srgrimesint
7799507Sdgvnode_pager_putpages(object, m, count, sync, rtvals)
7809507Sdg	vm_object_t object;
7811549Srgrimes	vm_page_t *m;
7825455Sdg	int count;
7839507Sdg	boolean_t sync;
7845455Sdg	int *rtvals;
7851549Srgrimes{
7867695Sdg	int i;
7871549Srgrimes
7887695Sdg	struct vnode *vp;
7897695Sdg	int maxsize, ncount;
7907695Sdg	struct uio auio;
7917695Sdg	struct iovec aiov;
7927695Sdg	int error;
7931549Srgrimes
7949507Sdg	vp = object->handle;;
7951827Sdg	for (i = 0; i < count; i++)
7961549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
7971549Srgrimes
7985455Sdg	if ((int) m[0]->offset < 0) {
7999507Sdg		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty);
8007695Sdg		rtvals[0] = VM_PAGER_BAD;
8017695Sdg		return VM_PAGER_BAD;
8025455Sdg	}
8037178Sdg
8047695Sdg	maxsize = count * PAGE_SIZE;
8057695Sdg	ncount = count;
8061549Srgrimes
8079507Sdg	if (maxsize + m[0]->offset > object->un_pager.vnp.vnp_size) {
8089507Sdg		if (object->un_pager.vnp.vnp_size > m[0]->offset)
8099507Sdg			maxsize = object->un_pager.vnp.vnp_size - m[0]->offset;
8108585Sdg		else
8118585Sdg			maxsize = 0;
8127695Sdg		ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
8138585Sdg		if (ncount < count) {
8148585Sdg			for (i = ncount; i < count; i++) {
8157695Sdg				rtvals[i] = VM_PAGER_BAD;
8161549Srgrimes			}
8178585Sdg			if (ncount == 0) {
8189507Sdg				printf("vnode_pager_putpages: write past end of file: %d, %d\n",
8199507Sdg					m[0]->offset, object->un_pager.vnp.vnp_size);
8207695Sdg				return rtvals[0];
8217695Sdg			}
8221549Srgrimes		}
8231541Srgrimes	}
8247695Sdg
8258585Sdg	for (i = 0; i < count; i++) {
8268585Sdg		m[i]->busy++;
8277695Sdg		m[i]->flags &= ~PG_BUSY;
8281549Srgrimes	}
8291827Sdg
8307695Sdg	aiov.iov_base = (caddr_t) 0;
8317695Sdg	aiov.iov_len = maxsize;
8327695Sdg	auio.uio_iov = &aiov;
8337695Sdg	auio.uio_iovcnt = 1;
8347695Sdg	auio.uio_offset = m[0]->offset;
8357695Sdg	auio.uio_segflg = UIO_NOCOPY;
8367695Sdg	auio.uio_rw = UIO_WRITE;
8377695Sdg	auio.uio_resid = maxsize;
8387695Sdg	auio.uio_procp = (struct proc *) 0;
8397695Sdg	error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred);
8403612Sdg	cnt.v_vnodeout++;
8417695Sdg	cnt.v_vnodepgsout += ncount;
8423612Sdg
8438585Sdg	if (error) {
8449507Sdg		printf("vnode_pager_putpages: I/O error %d\n", error);
8457695Sdg	}
8468585Sdg	if (auio.uio_resid) {
8479507Sdg		printf("vnode_pager_putpages: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset);
8487695Sdg	}
8498585Sdg	for (i = 0; i < count; i++) {
8508585Sdg		m[i]->busy--;
8518585Sdg		if (i < ncount) {
8527695Sdg			rtvals[i] = VM_PAGER_OK;
8537695Sdg		}
8548585Sdg		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
8559507Sdg			wakeup(m[i]);
8567695Sdg	}
8577695Sdg	return rtvals[0];
8587695Sdg}
8591549Srgrimes
8607695Sdgstruct vnode *
8619507Sdgvnode_pager_lock(object)
8629507Sdg	vm_object_t object;
8639507Sdg{
8649507Sdg	for (; object != NULL; object = object->backing_object) {
8659507Sdg		if (object->type != OBJT_VNODE)
8667695Sdg			continue;
8671549Srgrimes
8689507Sdg		VOP_LOCK(object->handle);
8699507Sdg		return object->handle;
8701549Srgrimes	}
8719507Sdg	return NULL;
8727695Sdg}
873