vnode_pager.c revision 8585
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
51549Srgrimes * Copyright (c) 1993,1994 John S. Dyson
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * the Systems Programming Group of the University of Utah Computer
91541Srgrimes * Science Department.
101541Srgrimes *
111541Srgrimes * Redistribution and use in source and binary forms, with or without
121541Srgrimes * modification, are permitted provided that the following conditions
131541Srgrimes * are met:
141541Srgrimes * 1. Redistributions of source code must retain the above copyright
151541Srgrimes *    notice, this list of conditions and the following disclaimer.
161541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
171541Srgrimes *    notice, this list of conditions and the following disclaimer in the
181541Srgrimes *    documentation and/or other materials provided with the distribution.
191541Srgrimes * 3. All advertising materials mentioning features or use of this software
201541Srgrimes *    must display the following acknowledgement:
211541Srgrimes *	This product includes software developed by the University of
221541Srgrimes *	California, Berkeley and its contributors.
231541Srgrimes * 4. Neither the name of the University nor the names of its contributors
241541Srgrimes *    may be used to endorse or promote products derived from this software
251541Srgrimes *    without specific prior written permission.
261541Srgrimes *
271541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
281541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
291541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
301541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
311541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
321541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
331541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
341541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
351541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
361541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
371541Srgrimes * SUCH DAMAGE.
381541Srgrimes *
391549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
408585Sdg *	$Id: vnode_pager.c,v 1.38 1995/05/10 18:56:09 davidg Exp $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes *
461541Srgrimes * TODO:
471541Srgrimes *	pageouts
481541Srgrimes *	fix credential use (uses current process credentials now)
491541Srgrimes */
501541Srgrimes
511549Srgrimes/*
527695Sdg * 1) Supports multiple - block reads/writes
531549Srgrimes * 2) Bypasses buffer cache for reads
541827Sdg *
551549Srgrimes * TODO:
567695Sdg *	Implement getpage/putpage interface for filesystems.  Should
577695Sdg *	greatly re-simplify the vnode_pager.
581549Srgrimes *
591549Srgrimes */
601549Srgrimes
611541Srgrimes#include <sys/param.h>
621541Srgrimes#include <sys/systm.h>
635455Sdg#include <sys/kernel.h>
641541Srgrimes#include <sys/proc.h>
651541Srgrimes#include <sys/malloc.h>
661541Srgrimes#include <sys/vnode.h>
671541Srgrimes#include <sys/uio.h>
681541Srgrimes#include <sys/mount.h>
691541Srgrimes
701541Srgrimes#include <vm/vm.h>
711541Srgrimes#include <vm/vm_page.h>
721541Srgrimes#include <vm/vnode_pager.h>
731541Srgrimes
741549Srgrimes#include <sys/buf.h>
751549Srgrimes#include <miscfs/specfs/specdev.h>
761541Srgrimes
775455Sdgint vnode_pager_putmulti();
781541Srgrimes
795455Sdgvoid vnode_pager_init();
805455Sdgvoid vnode_pager_dealloc();
815455Sdgint vnode_pager_getpage();
825455Sdgint vnode_pager_getmulti();
835455Sdgint vnode_pager_putpage();
841827Sdgboolean_t vnode_pager_haspage();
851541Srgrimes
861541Srgrimesstruct pagerops vnodepagerops = {
871541Srgrimes	vnode_pager_init,
881541Srgrimes	vnode_pager_alloc,
891541Srgrimes	vnode_pager_dealloc,
901541Srgrimes	vnode_pager_getpage,
911549Srgrimes	vnode_pager_getmulti,
921541Srgrimes	vnode_pager_putpage,
931549Srgrimes	vnode_pager_putmulti,
941549Srgrimes	vnode_pager_haspage
951541Srgrimes};
961541Srgrimes
971887Sdg
981887Sdg
991827Sdgstatic int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage);
1001827Sdgstatic int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals);
1011549Srgrimes
1021549Srgrimesextern vm_map_t pager_map;
1031549Srgrimes
1041549Srgrimesstruct pagerlst vnode_pager_list;	/* list of managed vnodes */
1051549Srgrimes
1061549Srgrimes#define MAXBP (PAGE_SIZE/DEV_BSIZE);
1071549Srgrimes
1081549Srgrimesvoid
1091541Srgrimesvnode_pager_init()
1101541Srgrimes{
1111541Srgrimes	TAILQ_INIT(&vnode_pager_list);
1121541Srgrimes}
1131541Srgrimes
1141541Srgrimes/*
1151541Srgrimes * Allocate (or lookup) pager for a vnode.
1161541Srgrimes * Handle is a vnode pointer.
1171541Srgrimes */
1181549Srgrimesvm_pager_t
1191549Srgrimesvnode_pager_alloc(handle, size, prot, offset)
1208416Sdg	void *handle;
1211541Srgrimes	vm_size_t size;
1221541Srgrimes	vm_prot_t prot;
1231549Srgrimes	vm_offset_t offset;
1241541Srgrimes{
1251541Srgrimes	register vm_pager_t pager;
1261541Srgrimes	register vn_pager_t vnp;
1275455Sdg	vm_object_t object, tobject;
1281541Srgrimes	struct vattr vattr;
1291541Srgrimes	struct vnode *vp;
1301541Srgrimes	struct proc *p = curproc;	/* XXX */
1315455Sdg	int rtval;
1321541Srgrimes
1331541Srgrimes	/*
1341541Srgrimes	 * Pageout to vnode, no can do yet.
1351541Srgrimes	 */
1361541Srgrimes	if (handle == NULL)
1371827Sdg		return (NULL);
1381541Srgrimes
1391541Srgrimes	/*
1401827Sdg	 * Vnodes keep a pointer to any associated pager so no need to lookup
1411827Sdg	 * with vm_pager_lookup.
1421541Srgrimes	 */
1431827Sdg	vp = (struct vnode *) handle;
1447695Sdg	while ((object = (vm_object_t) vp->v_vmdata) &&
1457695Sdg		(object->flags & OBJ_DEAD))
1465455Sdg		tsleep((caddr_t) object, PVM, "vadead", 0);
1475455Sdg
1483374Sdg	pager = NULL;
1495455Sdg	if (object != NULL)
1503374Sdg		pager = object->pager;
1511541Srgrimes	if (pager == NULL) {
1521827Sdg
1531541Srgrimes		/*
1541541Srgrimes		 * Allocate pager structures
1551541Srgrimes		 */
1561827Sdg		pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
1571541Srgrimes		if (pager == NULL)
1581827Sdg			return (NULL);
1591827Sdg		vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
1601541Srgrimes		if (vnp == NULL) {
1611827Sdg			free((caddr_t) pager, M_VMPAGER);
1621827Sdg			return (NULL);
1631541Srgrimes		}
1641541Srgrimes		/*
1651541Srgrimes		 * And an object of the appropriate size
1661541Srgrimes		 */
1675455Sdg		if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) {
1681541Srgrimes			object = vm_object_allocate(round_page(vattr.va_size));
1697014Sdg			object->flags = OBJ_CANPERSIST;
1701541Srgrimes			vm_object_enter(object, pager);
1716585Sdg			object->pager = pager;
1721541Srgrimes		} else {
1731827Sdg			free((caddr_t) vnp, M_VMPGDATA);
1741827Sdg			free((caddr_t) pager, M_VMPAGER);
1751827Sdg			return (NULL);
1761541Srgrimes		}
1771827Sdg
1781541Srgrimes		/*
1791541Srgrimes		 * Hold a reference to the vnode and initialize pager data.
1801541Srgrimes		 */
1811541Srgrimes		VREF(vp);
1821541Srgrimes		vnp->vnp_flags = 0;
1831541Srgrimes		vnp->vnp_vp = vp;
1841541Srgrimes		vnp->vnp_size = vattr.va_size;
1851549Srgrimes
1861541Srgrimes		TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
1871541Srgrimes		pager->pg_handle = handle;
1881541Srgrimes		pager->pg_type = PG_VNODE;
1891541Srgrimes		pager->pg_ops = &vnodepagerops;
1901827Sdg		pager->pg_data = (caddr_t) vnp;
1913374Sdg		vp->v_vmdata = (caddr_t) object;
1921541Srgrimes	} else {
1931827Sdg
1941541Srgrimes		/*
1951827Sdg		 * vm_object_lookup() will remove the object from the cache if
1961827Sdg		 * found and also gain a reference to the object.
1971541Srgrimes		 */
1983374Sdg		(void) vm_object_lookup(pager);
1991541Srgrimes	}
2007695Sdg	if( vp->v_type == VREG)
2017695Sdg		vp->v_flag |= VVMIO;
2021827Sdg	return (pager);
2031541Srgrimes}
2041541Srgrimes
2051549Srgrimesvoid
2061541Srgrimesvnode_pager_dealloc(pager)
2071541Srgrimes	vm_pager_t pager;
2081541Srgrimes{
2091827Sdg	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
2101541Srgrimes	register struct vnode *vp;
2115455Sdg	vm_object_t object;
2121541Srgrimes
2133449Sphk	vp = vnp->vnp_vp;
2143449Sphk	if (vp) {
2155455Sdg		int s = splbio();
2165455Sdg
2175455Sdg		object = (vm_object_t) vp->v_vmdata;
2185455Sdg		if (object) {
2195455Sdg			while (object->paging_in_progress) {
2206618Sdg				object->flags |= OBJ_PIPWNT;
2215455Sdg				tsleep(object, PVM, "vnpdea", 0);
2225455Sdg			}
2235455Sdg		}
2245455Sdg		splx(s);
2255455Sdg
2261541Srgrimes		vp->v_vmdata = NULL;
2275455Sdg		vp->v_flag &= ~(VTEXT | VVMIO);
2286947Sdg		vp->v_flag |= VAGE;
2291541Srgrimes		vrele(vp);
2301541Srgrimes	}
2311541Srgrimes	TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
2321827Sdg	free((caddr_t) vnp, M_VMPGDATA);
2331827Sdg	free((caddr_t) pager, M_VMPAGER);
2341541Srgrimes}
2351541Srgrimes
2361549Srgrimesint
2371549Srgrimesvnode_pager_getmulti(pager, m, count, reqpage, sync)
2381541Srgrimes	vm_pager_t pager;
2391549Srgrimes	vm_page_t *m;
2405455Sdg	int count;
2415455Sdg	int reqpage;
2421541Srgrimes	boolean_t sync;
2431541Srgrimes{
2441827Sdg
2451549Srgrimes	return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage);
2461549Srgrimes}
2471541Srgrimes
2481549Srgrimesint
2491549Srgrimesvnode_pager_getpage(pager, m, sync)
2501549Srgrimes	vm_pager_t pager;
2511549Srgrimes	vm_page_t m;
2521549Srgrimes	boolean_t sync;
2531549Srgrimes{
2541549Srgrimes
2551549Srgrimes	vm_page_t marray[1];
2561827Sdg
2571549Srgrimes	if (pager == NULL)
2581549Srgrimes		return FALSE;
2591549Srgrimes	marray[0] = m;
2601549Srgrimes
2611827Sdg	return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0);
2621541Srgrimes}
2631541Srgrimes
2641549Srgrimesboolean_t
2651549Srgrimesvnode_pager_putpage(pager, m, sync)
2661541Srgrimes	vm_pager_t pager;
2671549Srgrimes	vm_page_t m;
2681541Srgrimes	boolean_t sync;
2691541Srgrimes{
2701549Srgrimes	vm_page_t marray[1];
2715455Sdg	int rtvals[1];
2721541Srgrimes
2731541Srgrimes	if (pager == NULL)
2741549Srgrimes		return FALSE;
2751549Srgrimes	marray[0] = m;
2761827Sdg	vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals);
2771549Srgrimes	return rtvals[0];
2781541Srgrimes}
2791541Srgrimes
2801549Srgrimesint
2811549Srgrimesvnode_pager_putmulti(pager, m, c, sync, rtvals)
2821549Srgrimes	vm_pager_t pager;
2831549Srgrimes	vm_page_t *m;
2845455Sdg	int c;
2851549Srgrimes	boolean_t sync;
2865455Sdg	int *rtvals;
2871549Srgrimes{
2881827Sdg	return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals);
2891549Srgrimes}
2901549Srgrimes
2911549Srgrimes
2921549Srgrimesboolean_t
2931541Srgrimesvnode_pager_haspage(pager, offset)
2941541Srgrimes	vm_pager_t pager;
2951541Srgrimes	vm_offset_t offset;
2961541Srgrimes{
2971827Sdg	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
2984797Sdg	register struct vnode *vp = vnp->vnp_vp;
2991541Srgrimes	daddr_t bn;
3005455Sdg	int err;
3014446Sdg	daddr_t block;
3021541Srgrimes
3031541Srgrimes	/*
3045455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
3055455Sdg	 * not have the page.
3061541Srgrimes	 */
3074797Sdg	if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size))
3084797Sdg		return FALSE;
3091541Srgrimes
3104797Sdg	block = offset / vp->v_mount->mnt_stat.f_iosize;
3114797Sdg	if (incore(vp, block))
3124446Sdg		return TRUE;
3137178Sdg
3141541Srgrimes	/*
3151827Sdg	 * Read the index to find the disk block to read from.  If there is no
3161827Sdg	 * block, report that we don't have this data.
3171827Sdg	 *
3181541Srgrimes	 * Assumes that the vnode has whole page or nothing.
3191541Srgrimes	 */
3204797Sdg	err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0);
3217178Sdg	if (err)
3221827Sdg		return (TRUE);
3231827Sdg	return ((long) bn < 0 ? FALSE : TRUE);
3241541Srgrimes}
3251541Srgrimes
3261541Srgrimes/*
3271541Srgrimes * Lets the VM system know about a change in size for a file.
3281541Srgrimes * If this vnode is mapped into some address space (i.e. we have a pager
3291541Srgrimes * for it) we adjust our own internal size and flush any cached pages in
3301541Srgrimes * the associated object that are affected by the size change.
3311541Srgrimes *
3321541Srgrimes * Note: this routine may be invoked as a result of a pager put
3331541Srgrimes * operation (possibly at object termination time), so we must be careful.
3341541Srgrimes */
3351541Srgrimesvoid
3361541Srgrimesvnode_pager_setsize(vp, nsize)
3371541Srgrimes	struct vnode *vp;
3385455Sdg	u_long nsize;
3391541Srgrimes{
3401541Srgrimes	register vn_pager_t vnp;
3411541Srgrimes	register vm_object_t object;
3421541Srgrimes	vm_pager_t pager;
3431541Srgrimes
3441541Srgrimes	/*
3451541Srgrimes	 * Not a mapped vnode
3461541Srgrimes	 */
3471541Srgrimes	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
3481541Srgrimes		return;
3491827Sdg
3501541Srgrimes	/*
3511541Srgrimes	 * Hasn't changed size
3521541Srgrimes	 */
3533374Sdg	object = (vm_object_t) vp->v_vmdata;
3545455Sdg	if (object == NULL)
3553374Sdg		return;
3565455Sdg	if ((pager = object->pager) == NULL)
3573374Sdg		return;
3581827Sdg	vnp = (vn_pager_t) pager->pg_data;
3591541Srgrimes	if (nsize == vnp->vnp_size)
3601541Srgrimes		return;
3611827Sdg
3621541Srgrimes	/*
3631827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
3641541Srgrimes	 */
3651827Sdg	if (nsize < vnp->vnp_size) {
3665455Sdg		if (round_page((vm_offset_t) nsize) < vnp->vnp_size) {
3675455Sdg			vm_object_lock(object);
3685455Sdg			vm_object_page_remove(object,
3697204Sdg			    round_page((vm_offset_t) nsize), vnp->vnp_size, FALSE);
3705455Sdg			vm_object_unlock(object);
3715455Sdg		}
3721827Sdg		/*
3731827Sdg		 * this gets rid of garbage at the end of a page that is now
3741827Sdg		 * only partially backed by the vnode...
3751827Sdg		 */
3761827Sdg		if (nsize & PAGE_MASK) {
3771827Sdg			vm_offset_t kva;
3781827Sdg			vm_page_t m;
3791827Sdg
3801827Sdg			m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize));
3811827Sdg			if (m) {
3821827Sdg				kva = vm_pager_map_page(m);
3831827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
3845455Sdg				    round_page(nsize) - nsize);
3851827Sdg				vm_pager_unmap_page(kva);
3861827Sdg			}
3871827Sdg		}
3881541Srgrimes	}
3891827Sdg	vnp->vnp_size = (vm_offset_t) nsize;
3901827Sdg	object->size = round_page(nsize);
3911541Srgrimes}
3921541Srgrimes
3931541Srgrimesvoid
3941541Srgrimesvnode_pager_umount(mp)
3951541Srgrimes	register struct mount *mp;
3961541Srgrimes{
3971541Srgrimes	register vm_pager_t pager, npager;
3981541Srgrimes	struct vnode *vp;
3991541Srgrimes
4007162Sdg	for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager) {
4011541Srgrimes		/*
4021827Sdg		 * Save the next pointer now since uncaching may terminate the
4031827Sdg		 * object and render pager invalid
4041541Srgrimes		 */
4057162Sdg		npager = pager->pg_list.tqe_next;
4061827Sdg		vp = ((vn_pager_t) pager->pg_data)->vnp_vp;
4077162Sdg		if (mp == (struct mount *) 0 || vp->v_mount == mp) {
4087162Sdg			VOP_LOCK(vp);
4091541Srgrimes			(void) vnode_pager_uncache(vp);
4107162Sdg			VOP_UNLOCK(vp);
4117162Sdg		}
4121541Srgrimes	}
4131541Srgrimes}
4141541Srgrimes
4151541Srgrimes/*
4161541Srgrimes * Remove vnode associated object from the object cache.
4177162Sdg * This routine must be called with the vnode locked.
4181541Srgrimes *
4197162Sdg * XXX unlock the vnode.
4207162Sdg * We must do this since uncaching the object may result in its
4217162Sdg * destruction which may initiate paging activity which may necessitate
4227162Sdg * re-locking the vnode.
4231549Srgrimes */
4241549Srgrimesboolean_t
4251549Srgrimesvnode_pager_uncache(vp)
4261549Srgrimes	register struct vnode *vp;
4271549Srgrimes{
4281549Srgrimes	register vm_object_t object;
4297162Sdg	boolean_t uncached;
4301549Srgrimes	vm_pager_t pager;
4311549Srgrimes
4321549Srgrimes	/*
4331549Srgrimes	 * Not a mapped vnode
4341549Srgrimes	 */
4353374Sdg	object = (vm_object_t) vp->v_vmdata;
4365455Sdg	if (object == NULL)
4375455Sdg		return (TRUE);
4385455Sdg
4393374Sdg	pager = object->pager;
4401549Srgrimes	if (pager == NULL)
4411549Srgrimes		return (TRUE);
4421827Sdg
4437162Sdg#ifdef DEBUG
4447162Sdg	if (!VOP_ISLOCKED(vp)) {
4457162Sdg		extern int (**nfsv2_vnodeop_p)();
4461827Sdg
4477162Sdg		if (vp->v_op != nfsv2_vnodeop_p)
4487162Sdg			panic("vnode_pager_uncache: vnode not locked!");
4497162Sdg	}
4507162Sdg#endif
4511549Srgrimes	/*
4521827Sdg	 * Must use vm_object_lookup() as it actually removes the object from
4531827Sdg	 * the cache list.
4541549Srgrimes	 */
4551549Srgrimes	object = vm_object_lookup(pager);
4561549Srgrimes	if (object) {
4571549Srgrimes		uncached = (object->ref_count <= 1);
4587162Sdg		VOP_UNLOCK(vp);
4591549Srgrimes		pager_cache(object, FALSE);
4607162Sdg		VOP_LOCK(vp);
4611549Srgrimes	} else
4621549Srgrimes		uncached = TRUE;
4631827Sdg	return (uncached);
4641549Srgrimes}
4651541Srgrimes
4661541Srgrimes
4671549Srgrimesvoid
4681549Srgrimesvnode_pager_freepage(m)
4691549Srgrimes	vm_page_t m;
4701541Srgrimes{
4711549Srgrimes	PAGE_WAKEUP(m);
4721549Srgrimes	vm_page_free(m);
4731549Srgrimes}
4741549Srgrimes
4751549Srgrimes/*
4761549Srgrimes * calculate the linear (byte) disk address of specified virtual
4771549Srgrimes * file address
4781549Srgrimes */
4791549Srgrimesvm_offset_t
4806151Sdgvnode_pager_addr(vp, address, run)
4811549Srgrimes	struct vnode *vp;
4821549Srgrimes	vm_offset_t address;
4836151Sdg	int *run;
4841549Srgrimes{
4855455Sdg	int rtaddress;
4865455Sdg	int bsize;
4871549Srgrimes	vm_offset_t block;
4881549Srgrimes	struct vnode *rtvp;
4895455Sdg	int err;
4905455Sdg	int vblock, voffset;
4911549Srgrimes
4925455Sdg	if ((int) address < 0)
4935455Sdg		return -1;
4945455Sdg
4951549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4961549Srgrimes	vblock = address / bsize;
4971549Srgrimes	voffset = address % bsize;
4981549Srgrimes
4996151Sdg	err = VOP_BMAP(vp, vblock, &rtvp, &block, run);
5001549Srgrimes
5016151Sdg	if (err || (block == -1))
5021549Srgrimes		rtaddress = -1;
5036151Sdg	else {
5046626Sdg		rtaddress = block + voffset / DEV_BSIZE;
5056151Sdg		if( run) {
5066151Sdg			*run += 1;
5076151Sdg			*run *= bsize/PAGE_SIZE;
5086151Sdg			*run -= voffset/PAGE_SIZE;
5096151Sdg		}
5106151Sdg	}
5111549Srgrimes
5121549Srgrimes	return rtaddress;
5131549Srgrimes}
5141549Srgrimes
5151549Srgrimes/*
5161549Srgrimes * interrupt routine for I/O completion
5171549Srgrimes */
5181549Srgrimesvoid
5191549Srgrimesvnode_pager_iodone(bp)
5201549Srgrimes	struct buf *bp;
5211549Srgrimes{
5221549Srgrimes	bp->b_flags |= B_DONE;
5231827Sdg	wakeup((caddr_t) bp);
5241549Srgrimes}
5251549Srgrimes
5261549Srgrimes/*
5271549Srgrimes * small block file system vnode pager input
5281549Srgrimes */
5291549Srgrimesint
5301549Srgrimesvnode_pager_input_smlfs(vnp, m)
5311549Srgrimes	vn_pager_t vnp;
5321549Srgrimes	vm_page_t m;
5331549Srgrimes{
5345455Sdg	int i;
5355455Sdg	int s;
5361549Srgrimes	struct vnode *dp, *vp;
5371549Srgrimes	struct buf *bp;
5381549Srgrimes	vm_offset_t kva;
5395455Sdg	int fileaddr;
5405455Sdg	int block;
5411549Srgrimes	vm_offset_t bsize;
5425455Sdg	int error = 0;
5431549Srgrimes
5441549Srgrimes	vp = vnp->vnp_vp;
5451549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5461549Srgrimes
5477178Sdg
5485455Sdg	VOP_BMAP(vp, 0, &dp, 0, 0);
5491549Srgrimes
5501549Srgrimes	kva = vm_pager_map_page(m);
5511549Srgrimes
5521827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
5531827Sdg
5545455Sdg		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid))
5555455Sdg			continue;
5561549Srgrimes
5576151Sdg		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
5581827Sdg		if (fileaddr != -1) {
5591549Srgrimes			bp = getpbuf();
5601549Srgrimes
5611827Sdg			/* build a minimal buffer header */
5621549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
5631549Srgrimes			bp->b_iodone = vnode_pager_iodone;
5641549Srgrimes			bp->b_proc = curproc;
5651549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
5661827Sdg			if (bp->b_rcred != NOCRED)
5671549Srgrimes				crhold(bp->b_rcred);
5681827Sdg			if (bp->b_wcred != NOCRED)
5691549Srgrimes				crhold(bp->b_wcred);
5701549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
5716626Sdg			bp->b_blkno = fileaddr;
5725455Sdg			pbgetvp(dp, bp);
5731549Srgrimes			bp->b_bcount = bsize;
5741549Srgrimes			bp->b_bufsize = bsize;
5751827Sdg
5761827Sdg			/* do the input */
5771549Srgrimes			VOP_STRATEGY(bp);
5781549Srgrimes
5791827Sdg			/* we definitely need to be at splbio here */
5801549Srgrimes
5811549Srgrimes			s = splbio();
5821549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
5831827Sdg				tsleep((caddr_t) bp, PVM, "vnsrd", 0);
5841549Srgrimes			}
5851549Srgrimes			splx(s);
5861549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
5871549Srgrimes				error = EIO;
5881549Srgrimes
5891827Sdg			/*
5901827Sdg			 * free the buffer header back to the swap buffer pool
5911827Sdg			 */
5921549Srgrimes			relpbuf(bp);
5931827Sdg			if (error)
5941549Srgrimes				break;
5955455Sdg
5967695Sdg			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
5977695Sdg			vm_page_set_valid(m, (i * bsize) & (PAGE_SIZE-1), bsize);
5981549Srgrimes		} else {
5997695Sdg			vm_page_set_clean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
6001549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
6011549Srgrimes		}
6021549Srgrimesnextblock:
6031549Srgrimes	}
6041549Srgrimes	vm_pager_unmap_page(kva);
6055455Sdg	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
6061827Sdg	if (error) {
6074207Sdg		return VM_PAGER_ERROR;
6081549Srgrimes	}
6091549Srgrimes	return VM_PAGER_OK;
6101549Srgrimes
6111549Srgrimes}
6121549Srgrimes
6131549Srgrimes
6141549Srgrimes/*
6151549Srgrimes * old style vnode pager output routine
6161549Srgrimes */
6171549Srgrimesint
6181549Srgrimesvnode_pager_input_old(vnp, m)
6191549Srgrimes	vn_pager_t vnp;
6201549Srgrimes	vm_page_t m;
6211549Srgrimes{
6221541Srgrimes	struct uio auio;
6231541Srgrimes	struct iovec aiov;
6245455Sdg	int error;
6255455Sdg	int size;
6261549Srgrimes	vm_offset_t kva;
6271549Srgrimes
6281549Srgrimes	error = 0;
6291827Sdg
6301549Srgrimes	/*
6311549Srgrimes	 * Return failure if beyond current EOF
6321549Srgrimes	 */
6335455Sdg	if (m->offset >= vnp->vnp_size) {
6341549Srgrimes		return VM_PAGER_BAD;
6351549Srgrimes	} else {
6361549Srgrimes		size = PAGE_SIZE;
6375455Sdg		if (m->offset + size > vnp->vnp_size)
6385455Sdg			size = vnp->vnp_size - m->offset;
6397178Sdg
6405455Sdg		/*
6415455Sdg		 * Allocate a kernel virtual address and initialize so that
6425455Sdg		 * we can use VOP_READ/WRITE routines.
6435455Sdg		 */
6441549Srgrimes		kva = vm_pager_map_page(m);
6457178Sdg
6461827Sdg		aiov.iov_base = (caddr_t) kva;
6471549Srgrimes		aiov.iov_len = size;
6481549Srgrimes		auio.uio_iov = &aiov;
6491549Srgrimes		auio.uio_iovcnt = 1;
6505455Sdg		auio.uio_offset = m->offset;
6511549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
6521549Srgrimes		auio.uio_rw = UIO_READ;
6531549Srgrimes		auio.uio_resid = size;
6541827Sdg		auio.uio_procp = (struct proc *) 0;
6551549Srgrimes
6561549Srgrimes		error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred);
6571549Srgrimes		if (!error) {
6581549Srgrimes			register int count = size - auio.uio_resid;
6591549Srgrimes
6601549Srgrimes			if (count == 0)
6611549Srgrimes				error = EINVAL;
6621549Srgrimes			else if (count != PAGE_SIZE)
6631827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
6641549Srgrimes		}
6651549Srgrimes		vm_pager_unmap_page(kva);
6661549Srgrimes	}
6671549Srgrimes	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
6685455Sdg	m->dirty = 0;
6694207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6701549Srgrimes}
6711549Srgrimes
6721549Srgrimes/*
6731549Srgrimes * generic vnode pager input routine
6741549Srgrimes */
6751549Srgrimesint
6761549Srgrimesvnode_pager_input(vnp, m, count, reqpage)
6771549Srgrimes	register vn_pager_t vnp;
6781549Srgrimes	vm_page_t *m;
6795455Sdg	int count, reqpage;
6801549Srgrimes{
6815455Sdg	int i;
6821541Srgrimes	vm_offset_t kva, foff;
6837178Sdg	int size;
6841549Srgrimes	vm_object_t object;
6851549Srgrimes	struct vnode *dp, *vp;
6865455Sdg	int bsize;
6871541Srgrimes
6885455Sdg	int first, last;
6896151Sdg	int firstaddr;
6905455Sdg	int block, offset;
6916151Sdg	int runpg;
6926151Sdg	int runend;
6931549Srgrimes
6947178Sdg	struct buf *bp;
6955455Sdg	int s;
6965455Sdg	int failflag;
6971549Srgrimes
6985455Sdg	int error = 0;
6991549Srgrimes
7001827Sdg	object = m[reqpage]->object;	/* all vm_page_t items are in same
7011827Sdg					 * object */
7021549Srgrimes
7031549Srgrimes	vp = vnp->vnp_vp;
7041549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7051549Srgrimes
7061549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7071827Sdg
7081549Srgrimes	/*
7091827Sdg	 * originally, we did not check for an error return value -- assuming
7101827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7111549Srgrimes	 */
7125455Sdg	foff = m[reqpage]->offset;
7131827Sdg
7141549Srgrimes	/*
7151887Sdg	 * if we can't bmap, use old VOP code
7161549Srgrimes	 */
7175455Sdg	if (VOP_BMAP(vp, 0, &dp, 0, 0)) {
7181549Srgrimes		for (i = 0; i < count; i++) {
7191549Srgrimes			if (i != reqpage) {
7201549Srgrimes				vnode_pager_freepage(m[i]);
7211549Srgrimes			}
7221549Srgrimes		}
7233612Sdg		cnt.v_vnodein++;
7243612Sdg		cnt.v_vnodepgsin++;
7251549Srgrimes		return vnode_pager_input_old(vnp, m[reqpage]);
7261549Srgrimes
7271827Sdg		/*
7281827Sdg		 * if the blocksize is smaller than a page size, then use
7291827Sdg		 * special small filesystem code.  NFS sometimes has a small
7301827Sdg		 * blocksize, but it can handle large reads itself.
7311827Sdg		 */
7321827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
7335455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
7341827Sdg
7351549Srgrimes		for (i = 0; i < count; i++) {
7361549Srgrimes			if (i != reqpage) {
7371549Srgrimes				vnode_pager_freepage(m[i]);
7381549Srgrimes			}
7391549Srgrimes		}
7403612Sdg		cnt.v_vnodein++;
7413612Sdg		cnt.v_vnodepgsin++;
7421549Srgrimes		return vnode_pager_input_smlfs(vnp, m[reqpage]);
7431549Srgrimes	}
7441549Srgrimes	/*
7455455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
7465455Sdg	 * then, the entire page is valid --
7471549Srgrimes	 */
7485455Sdg	if (m[reqpage]->valid) {
7495455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
7505455Sdg		for (i = 0; i < count; i++) {
7515455Sdg			if (i != reqpage)
7525455Sdg				vnode_pager_freepage(m[i]);
7531549Srgrimes		}
7545455Sdg		return VM_PAGER_OK;
7551549Srgrimes	}
7567178Sdg
7575455Sdg	/*
7585455Sdg	 * here on direct device I/O
7595455Sdg	 */
7601549Srgrimes
7616151Sdg	firstaddr = -1;
7621549Srgrimes	/*
7636151Sdg	 * calculate the run that includes the required page
7641549Srgrimes	 */
7656151Sdg	for(first = 0, i = 0; i < count; i = runend) {
7666151Sdg		firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg);
7676151Sdg		if (firstaddr == -1) {
7686151Sdg			if( i == reqpage && foff < vnp->vnp_size) {
7696151Sdg				printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n",
7706151Sdg			   	 firstaddr, foff, vnp->vnp_size);
7716151Sdg				panic("vnode_pager_input:...");
7726151Sdg			}
7731549Srgrimes			vnode_pager_freepage(m[i]);
7746151Sdg			runend = i + 1;
7756151Sdg			first = runend;
7766151Sdg			continue;
7771549Srgrimes		}
7786151Sdg		runend = i + runpg;
7796151Sdg		if( runend <= reqpage) {
7806151Sdg			int j;
7816151Sdg			for(j = i; j < runend; j++) {
7826151Sdg				vnode_pager_freepage(m[j]);
7836151Sdg			}
7841549Srgrimes		} else {
7856151Sdg			if( runpg < (count - first)) {
7866151Sdg				for(i=first + runpg; i < count; i++)
7876151Sdg					vnode_pager_freepage(m[i]);
7886151Sdg				count = first + runpg;
7896151Sdg			}
7906151Sdg			break;
7911549Srgrimes		}
7926151Sdg		first = runend;
7931549Srgrimes	}
7941549Srgrimes
7951549Srgrimes	/*
7961827Sdg	 * the first and last page have been calculated now, move input pages
7971827Sdg	 * to be zero based...
7981549Srgrimes	 */
7991549Srgrimes	if (first != 0) {
8001549Srgrimes		for (i = first; i < count; i++) {
8011549Srgrimes			m[i - first] = m[i];
8021549Srgrimes		}
8031549Srgrimes		count -= first;
8041549Srgrimes		reqpage -= first;
8051549Srgrimes	}
8066151Sdg
8071549Srgrimes	/*
8081549Srgrimes	 * calculate the file virtual address for the transfer
8091549Srgrimes	 */
8105455Sdg	foff = m[0]->offset;
8111827Sdg
8121549Srgrimes	/*
8131549Srgrimes	 * calculate the size of the transfer
8141549Srgrimes	 */
8151549Srgrimes	size = count * PAGE_SIZE;
8161549Srgrimes	if ((foff + size) > vnp->vnp_size)
8171549Srgrimes		size = vnp->vnp_size - foff;
8181549Srgrimes
8191549Srgrimes	/*
8201549Srgrimes	 * round up physical size for real devices
8211549Srgrimes	 */
8221827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
8231549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
8241549Srgrimes
8255841Sdg	bp = getpbuf();
8265455Sdg	kva = (vm_offset_t) bp->b_data;
8271887Sdg
8281549Srgrimes	/*
8291549Srgrimes	 * and map the pages to be read into the kva
8301549Srgrimes	 */
8311887Sdg	pmap_qenter(kva, m, count);
8321549Srgrimes
8331549Srgrimes	/* build a minimal buffer header */
8341549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
8351549Srgrimes	bp->b_iodone = vnode_pager_iodone;
8361549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
8371549Srgrimes	bp->b_proc = curproc;
8381549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
8391827Sdg	if (bp->b_rcred != NOCRED)
8401549Srgrimes		crhold(bp->b_rcred);
8411827Sdg	if (bp->b_wcred != NOCRED)
8421549Srgrimes		crhold(bp->b_wcred);
8436626Sdg	bp->b_blkno = firstaddr;
8445455Sdg	pbgetvp(dp, bp);
8451549Srgrimes	bp->b_bcount = size;
8461549Srgrimes	bp->b_bufsize = size;
8471549Srgrimes
8483612Sdg	cnt.v_vnodein++;
8493612Sdg	cnt.v_vnodepgsin += count;
8503612Sdg
8511549Srgrimes	/* do the input */
8521549Srgrimes	VOP_STRATEGY(bp);
8533612Sdg
8541549Srgrimes	s = splbio();
8551549Srgrimes	/* we definitely need to be at splbio here */
8561549Srgrimes
8571549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
8581827Sdg		tsleep((caddr_t) bp, PVM, "vnread", 0);
8591549Srgrimes	}
8601549Srgrimes	splx(s);
8611549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
8621549Srgrimes		error = EIO;
8631549Srgrimes
8641549Srgrimes	if (!error) {
8651549Srgrimes		if (size != count * PAGE_SIZE)
8661827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
8671549Srgrimes	}
8685455Sdg	pmap_qremove(kva, count);
8691549Srgrimes
8701549Srgrimes	/*
8711549Srgrimes	 * free the buffer header back to the swap buffer pool
8721549Srgrimes	 */
8731549Srgrimes	relpbuf(bp);
8741549Srgrimes
8751549Srgrimesfinishup:
8761549Srgrimes	for (i = 0; i < count; i++) {
8772386Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
8785455Sdg		m[i]->dirty = 0;
8795455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
8801549Srgrimes		if (i != reqpage) {
8811827Sdg
8821549Srgrimes			/*
8831827Sdg			 * whether or not to leave the page activated is up in
8841827Sdg			 * the air, but we should put the page on a page queue
8851827Sdg			 * somewhere. (it already is in the object). Result:
8861827Sdg			 * It appears that emperical results show that
8871827Sdg			 * deactivating pages is best.
8881549Srgrimes			 */
8891827Sdg
8901549Srgrimes			/*
8911827Sdg			 * just in case someone was asking for this page we
8921827Sdg			 * now tell them that it is ok to use
8931549Srgrimes			 */
8941549Srgrimes			if (!error) {
8955841Sdg				vm_page_deactivate(m[i]);
8961549Srgrimes				PAGE_WAKEUP(m[i]);
8971549Srgrimes			} else {
8981549Srgrimes				vnode_pager_freepage(m[i]);
8991549Srgrimes			}
9001549Srgrimes		}
9011549Srgrimes	}
9021549Srgrimes	if (error) {
9034207Sdg		printf("vnode_pager_input: I/O read error\n");
9041549Srgrimes	}
9054207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
9061549Srgrimes}
9071549Srgrimes
9081549Srgrimes/*
9091549Srgrimes * generic vnode pager output routine
9101549Srgrimes */
9111549Srgrimesint
9121549Srgrimesvnode_pager_output(vnp, m, count, rtvals)
9131549Srgrimes	vn_pager_t vnp;
9141549Srgrimes	vm_page_t *m;
9155455Sdg	int count;
9165455Sdg	int *rtvals;
9171549Srgrimes{
9187695Sdg	int i;
9191549Srgrimes
9207695Sdg	struct vnode *vp;
9217695Sdg	int maxsize, ncount;
9227695Sdg	struct uio auio;
9237695Sdg	struct iovec aiov;
9247695Sdg	int error;
9251549Srgrimes
9261549Srgrimes	vp = vnp->vnp_vp;
9271827Sdg	for (i = 0; i < count; i++)
9281549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
9291549Srgrimes
9305455Sdg	if ((int) m[0]->offset < 0) {
9317695Sdg		printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->offset, m[0]->dirty);
9327695Sdg		rtvals[0] = VM_PAGER_BAD;
9337695Sdg		return VM_PAGER_BAD;
9345455Sdg	}
9357178Sdg
9367695Sdg	maxsize = count * PAGE_SIZE;
9377695Sdg	ncount = count;
9381549Srgrimes
9398585Sdg	if (maxsize + m[0]->offset > vnp->vnp_size) {
9408585Sdg		if (vnp->vnp_size > m[0]->offset)
9418585Sdg			maxsize = vnp->vnp_size - m[0]->offset;
9428585Sdg		else
9438585Sdg			maxsize = 0;
9447695Sdg		ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
9458585Sdg		if (ncount < count) {
9468585Sdg			for (i = ncount; i < count; i++) {
9477695Sdg				rtvals[i] = VM_PAGER_BAD;
9481549Srgrimes			}
9498585Sdg			if (ncount == 0) {
9507695Sdg				printf("vnode_pager_output: write past end of file: %d, %d\n",
9517695Sdg					m[0]->offset, vnp->vnp_size);
9527695Sdg				return rtvals[0];
9537695Sdg			}
9541549Srgrimes		}
9551541Srgrimes	}
9567695Sdg
9578585Sdg	for (i = 0; i < count; i++) {
9588585Sdg		m[i]->busy++;
9597695Sdg		m[i]->flags &= ~PG_BUSY;
9601549Srgrimes	}
9611827Sdg
9627695Sdg	aiov.iov_base = (caddr_t) 0;
9637695Sdg	aiov.iov_len = maxsize;
9647695Sdg	auio.uio_iov = &aiov;
9657695Sdg	auio.uio_iovcnt = 1;
9667695Sdg	auio.uio_offset = m[0]->offset;
9677695Sdg	auio.uio_segflg = UIO_NOCOPY;
9687695Sdg	auio.uio_rw = UIO_WRITE;
9697695Sdg	auio.uio_resid = maxsize;
9707695Sdg	auio.uio_procp = (struct proc *) 0;
9717695Sdg	error = VOP_WRITE(vp, &auio, IO_VMIO, curproc->p_ucred);
9723612Sdg	cnt.v_vnodeout++;
9737695Sdg	cnt.v_vnodepgsout += ncount;
9743612Sdg
9758585Sdg	if (error) {
9767695Sdg		printf("vnode_pager_output: I/O error %d\n", error);
9777695Sdg	}
9788585Sdg	if (auio.uio_resid) {
9797695Sdg		printf("vnode_pager_output: residual I/O %d at %d\n", auio.uio_resid, m[0]->offset);
9807695Sdg	}
9818585Sdg	for (i = 0; i < count; i++) {
9828585Sdg		m[i]->busy--;
9838585Sdg		if (i < ncount) {
9847695Sdg			rtvals[i] = VM_PAGER_OK;
9857695Sdg		}
9868585Sdg		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
9877695Sdg			wakeup((caddr_t) m[i]);
9887695Sdg	}
9897695Sdg	return rtvals[0];
9907695Sdg}
9911549Srgrimes
9927695Sdgstruct vnode *
9937695Sdgvnode_pager_lock(vm_object_t object) {
9941549Srgrimes
9957695Sdg	for(;object;object=object->shadow) {
9967695Sdg		vn_pager_t vnp;
9977695Sdg		if( !object->pager || (object->pager->pg_type != PG_VNODE))
9987695Sdg			continue;
9991549Srgrimes
10007695Sdg		vnp = (vn_pager_t) object->pager->pg_data;
10017695Sdg		VOP_LOCK(vnp->vnp_vp);
10027695Sdg		return vnp->vnp_vp;
10031549Srgrimes	}
10047695Sdg	return (struct vnode *)NULL;
10057695Sdg}
10061549Srgrimes
10077695Sdgvoid
10087695Sdgvnode_pager_unlock(struct vnode *vp) {
10097178Sdg	VOP_UNLOCK(vp);
10107695Sdg}
10117178Sdg
1012