vnode_pager.c revision 6626
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
51549Srgrimes * Copyright (c) 1993,1994 John S. Dyson
61541Srgrimes *
71541Srgrimes * This code is derived from software contributed to Berkeley by
81541Srgrimes * the Systems Programming Group of the University of Utah Computer
91541Srgrimes * Science Department.
101541Srgrimes *
111541Srgrimes * Redistribution and use in source and binary forms, with or without
121541Srgrimes * modification, are permitted provided that the following conditions
131541Srgrimes * are met:
141541Srgrimes * 1. Redistributions of source code must retain the above copyright
151541Srgrimes *    notice, this list of conditions and the following disclaimer.
161541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
171541Srgrimes *    notice, this list of conditions and the following disclaimer in the
181541Srgrimes *    documentation and/or other materials provided with the distribution.
191541Srgrimes * 3. All advertising materials mentioning features or use of this software
201541Srgrimes *    must display the following acknowledgement:
211541Srgrimes *	This product includes software developed by the University of
221541Srgrimes *	California, Berkeley and its contributors.
231541Srgrimes * 4. Neither the name of the University nor the names of its contributors
241541Srgrimes *    may be used to endorse or promote products derived from this software
251541Srgrimes *    without specific prior written permission.
261541Srgrimes *
271541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
281541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
291541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
301541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
311541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
321541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
331541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
341541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
351541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
361541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
371541Srgrimes * SUCH DAMAGE.
381541Srgrimes *
391549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
406626Sdg *	$Id: vnode_pager.c,v 1.24 1995/02/22 09:15:35 davidg Exp $
411541Srgrimes */
421541Srgrimes
431541Srgrimes/*
441541Srgrimes * Page to/from files (vnodes).
451541Srgrimes *
461541Srgrimes * TODO:
471541Srgrimes *	pageouts
481541Srgrimes *	fix credential use (uses current process credentials now)
491541Srgrimes */
501541Srgrimes
511549Srgrimes/*
521549Srgrimes * MODIFICATIONS:
531549Srgrimes * John S. Dyson  08 Dec 93
541549Srgrimes *
551549Srgrimes * This file in conjunction with some vm_fault mods, eliminate the performance
561549Srgrimes * advantage for using the buffer cache and minimize memory copies.
571549Srgrimes *
581549Srgrimes * 1) Supports multiple - block reads
591549Srgrimes * 2) Bypasses buffer cache for reads
601827Sdg *
611549Srgrimes * TODO:
621549Srgrimes *
631549Srgrimes * 1) Totally bypass buffer cache for reads
641549Srgrimes *    (Currently will still sometimes use buffer cache for reads)
651549Srgrimes * 2) Bypass buffer cache for writes
661549Srgrimes *    (Code does not support it, but mods are simple)
671549Srgrimes */
681549Srgrimes
691541Srgrimes#include <sys/param.h>
701541Srgrimes#include <sys/systm.h>
715455Sdg#include <sys/kernel.h>
721541Srgrimes#include <sys/proc.h>
731541Srgrimes#include <sys/malloc.h>
741541Srgrimes#include <sys/vnode.h>
751541Srgrimes#include <sys/uio.h>
761541Srgrimes#include <sys/mount.h>
771541Srgrimes
781541Srgrimes#include <vm/vm.h>
791541Srgrimes#include <vm/vm_page.h>
801541Srgrimes#include <vm/vnode_pager.h>
811541Srgrimes
821549Srgrimes#include <sys/buf.h>
831549Srgrimes#include <miscfs/specfs/specdev.h>
841541Srgrimes
855455Sdgint vnode_pager_putmulti();
861541Srgrimes
875455Sdgvoid vnode_pager_init();
881827Sdgvm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t);
895455Sdgvoid vnode_pager_dealloc();
905455Sdgint vnode_pager_getpage();
915455Sdgint vnode_pager_getmulti();
925455Sdgint vnode_pager_putpage();
931827Sdgboolean_t vnode_pager_haspage();
941541Srgrimes
951541Srgrimesstruct pagerops vnodepagerops = {
961541Srgrimes	vnode_pager_init,
971541Srgrimes	vnode_pager_alloc,
981541Srgrimes	vnode_pager_dealloc,
991541Srgrimes	vnode_pager_getpage,
1001549Srgrimes	vnode_pager_getmulti,
1011541Srgrimes	vnode_pager_putpage,
1021549Srgrimes	vnode_pager_putmulti,
1031549Srgrimes	vnode_pager_haspage
1041541Srgrimes};
1051541Srgrimes
1061887Sdg
1071887Sdg
1081827Sdgstatic int vnode_pager_input(vn_pager_t vnp, vm_page_t * m, int count, int reqpage);
1091827Sdgstatic int vnode_pager_output(vn_pager_t vnp, vm_page_t * m, int count, int *rtvals);
1101549Srgrimes
1111549Srgrimesextern vm_map_t pager_map;
1121549Srgrimes
1131549Srgrimesstruct pagerlst vnode_pager_list;	/* list of managed vnodes */
1141549Srgrimes
1151549Srgrimes#define MAXBP (PAGE_SIZE/DEV_BSIZE);
1161549Srgrimes
1171549Srgrimesvoid
1181541Srgrimesvnode_pager_init()
1191541Srgrimes{
1201541Srgrimes	TAILQ_INIT(&vnode_pager_list);
1211541Srgrimes}
1221541Srgrimes
1231541Srgrimes/*
1241541Srgrimes * Allocate (or lookup) pager for a vnode.
1251541Srgrimes * Handle is a vnode pointer.
1261541Srgrimes */
1271549Srgrimesvm_pager_t
1281549Srgrimesvnode_pager_alloc(handle, size, prot, offset)
1291541Srgrimes	caddr_t handle;
1301541Srgrimes	vm_size_t size;
1311541Srgrimes	vm_prot_t prot;
1321549Srgrimes	vm_offset_t offset;
1331541Srgrimes{
1341541Srgrimes	register vm_pager_t pager;
1351541Srgrimes	register vn_pager_t vnp;
1365455Sdg	vm_object_t object, tobject;
1371541Srgrimes	struct vattr vattr;
1381541Srgrimes	struct vnode *vp;
1391541Srgrimes	struct proc *p = curproc;	/* XXX */
1405455Sdg	int rtval;
1411541Srgrimes
1421541Srgrimes	/*
1431541Srgrimes	 * Pageout to vnode, no can do yet.
1441541Srgrimes	 */
1451541Srgrimes	if (handle == NULL)
1461827Sdg		return (NULL);
1471541Srgrimes
1481541Srgrimes	/*
1491827Sdg	 * Vnodes keep a pointer to any associated pager so no need to lookup
1501827Sdg	 * with vm_pager_lookup.
1511541Srgrimes	 */
1521827Sdg	vp = (struct vnode *) handle;
1535455Sdg	while ((object = (vm_object_t) vp->v_vmdata) && (object->flags & OBJ_DEAD))
1545455Sdg		tsleep((caddr_t) object, PVM, "vadead", 0);
1555455Sdg
1563374Sdg	pager = NULL;
1575455Sdg	if (object != NULL)
1583374Sdg		pager = object->pager;
1591541Srgrimes	if (pager == NULL) {
1601827Sdg
1611541Srgrimes		/*
1621541Srgrimes		 * Allocate pager structures
1631541Srgrimes		 */
1641827Sdg		pager = (vm_pager_t) malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
1651541Srgrimes		if (pager == NULL)
1661827Sdg			return (NULL);
1671827Sdg		vnp = (vn_pager_t) malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
1681541Srgrimes		if (vnp == NULL) {
1691827Sdg			free((caddr_t) pager, M_VMPAGER);
1701827Sdg			return (NULL);
1711541Srgrimes		}
1721541Srgrimes		/*
1731541Srgrimes		 * And an object of the appropriate size
1741541Srgrimes		 */
1755455Sdg		if ((rtval = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) == 0) {
1761541Srgrimes			object = vm_object_allocate(round_page(vattr.va_size));
1775519Sdg			object->flags &= ~OBJ_INTERNAL;
1786585Sdg			object->flags |= OBJ_CANPERSIST;
1791541Srgrimes			vm_object_enter(object, pager);
1806585Sdg			object->pager = pager;
1811541Srgrimes		} else {
1825455Sdg			printf("Error in getattr: %d\n", rtval);
1831827Sdg			free((caddr_t) vnp, M_VMPGDATA);
1841827Sdg			free((caddr_t) pager, M_VMPAGER);
1851827Sdg			return (NULL);
1861541Srgrimes		}
1871827Sdg
1881541Srgrimes		/*
1891541Srgrimes		 * Hold a reference to the vnode and initialize pager data.
1901541Srgrimes		 */
1911541Srgrimes		VREF(vp);
1921541Srgrimes		vnp->vnp_flags = 0;
1931541Srgrimes		vnp->vnp_vp = vp;
1941541Srgrimes		vnp->vnp_size = vattr.va_size;
1951549Srgrimes
1961541Srgrimes		TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
1971541Srgrimes		pager->pg_handle = handle;
1981541Srgrimes		pager->pg_type = PG_VNODE;
1991541Srgrimes		pager->pg_ops = &vnodepagerops;
2001827Sdg		pager->pg_data = (caddr_t) vnp;
2013374Sdg		vp->v_vmdata = (caddr_t) object;
2021541Srgrimes	} else {
2031827Sdg
2041541Srgrimes		/*
2051827Sdg		 * vm_object_lookup() will remove the object from the cache if
2061827Sdg		 * found and also gain a reference to the object.
2071541Srgrimes		 */
2083374Sdg		(void) vm_object_lookup(pager);
2091541Srgrimes	}
2101827Sdg	return (pager);
2111541Srgrimes}
2121541Srgrimes
2131549Srgrimesvoid
2141541Srgrimesvnode_pager_dealloc(pager)
2151541Srgrimes	vm_pager_t pager;
2161541Srgrimes{
2171827Sdg	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
2181541Srgrimes	register struct vnode *vp;
2195455Sdg	vm_object_t object;
2201541Srgrimes
2213449Sphk	vp = vnp->vnp_vp;
2223449Sphk	if (vp) {
2235455Sdg		int s = splbio();
2245455Sdg
2255455Sdg		object = (vm_object_t) vp->v_vmdata;
2265455Sdg		if (object) {
2275455Sdg			while (object->paging_in_progress) {
2286618Sdg				object->flags |= OBJ_PIPWNT;
2295455Sdg				tsleep(object, PVM, "vnpdea", 0);
2305455Sdg			}
2315455Sdg		}
2325455Sdg		splx(s);
2335455Sdg
2341541Srgrimes		vp->v_vmdata = NULL;
2355455Sdg		vp->v_flag &= ~(VTEXT | VVMIO);
2361541Srgrimes		vrele(vp);
2371541Srgrimes	}
2381541Srgrimes	TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
2391827Sdg	free((caddr_t) vnp, M_VMPGDATA);
2401827Sdg	free((caddr_t) pager, M_VMPAGER);
2411541Srgrimes}
2421541Srgrimes
2431549Srgrimesint
2441549Srgrimesvnode_pager_getmulti(pager, m, count, reqpage, sync)
2451541Srgrimes	vm_pager_t pager;
2461549Srgrimes	vm_page_t *m;
2475455Sdg	int count;
2485455Sdg	int reqpage;
2491541Srgrimes	boolean_t sync;
2501541Srgrimes{
2511827Sdg
2521549Srgrimes	return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage);
2531549Srgrimes}
2541541Srgrimes
2551549Srgrimesint
2561549Srgrimesvnode_pager_getpage(pager, m, sync)
2571549Srgrimes	vm_pager_t pager;
2581549Srgrimes	vm_page_t m;
2591549Srgrimes	boolean_t sync;
2601549Srgrimes{
2611549Srgrimes
2621549Srgrimes	vm_page_t marray[1];
2631827Sdg
2641549Srgrimes	if (pager == NULL)
2651549Srgrimes		return FALSE;
2661549Srgrimes	marray[0] = m;
2671549Srgrimes
2681827Sdg	return vnode_pager_input((vn_pager_t) pager->pg_data, marray, 1, 0);
2691541Srgrimes}
2701541Srgrimes
2711549Srgrimesboolean_t
2721549Srgrimesvnode_pager_putpage(pager, m, sync)
2731541Srgrimes	vm_pager_t pager;
2741549Srgrimes	vm_page_t m;
2751541Srgrimes	boolean_t sync;
2761541Srgrimes{
2771549Srgrimes	vm_page_t marray[1];
2785455Sdg	int rtvals[1];
2791541Srgrimes
2801541Srgrimes	if (pager == NULL)
2811549Srgrimes		return FALSE;
2821549Srgrimes	marray[0] = m;
2831827Sdg	vnode_pager_output((vn_pager_t) pager->pg_data, marray, 1, rtvals);
2841549Srgrimes	return rtvals[0];
2851541Srgrimes}
2861541Srgrimes
2871549Srgrimesint
2881549Srgrimesvnode_pager_putmulti(pager, m, c, sync, rtvals)
2891549Srgrimes	vm_pager_t pager;
2901549Srgrimes	vm_page_t *m;
2915455Sdg	int c;
2921549Srgrimes	boolean_t sync;
2935455Sdg	int *rtvals;
2941549Srgrimes{
2951827Sdg	return vnode_pager_output((vn_pager_t) pager->pg_data, m, c, rtvals);
2961549Srgrimes}
2971549Srgrimes
2981549Srgrimes
2991549Srgrimesboolean_t
3001541Srgrimesvnode_pager_haspage(pager, offset)
3011541Srgrimes	vm_pager_t pager;
3021541Srgrimes	vm_offset_t offset;
3031541Srgrimes{
3041827Sdg	register vn_pager_t vnp = (vn_pager_t) pager->pg_data;
3054797Sdg	register struct vnode *vp = vnp->vnp_vp;
3061541Srgrimes	daddr_t bn;
3075455Sdg	int err;
3084446Sdg	daddr_t block;
3091541Srgrimes
3101541Srgrimes	/*
3115455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
3125455Sdg	 * not have the page.
3131541Srgrimes	 */
3144797Sdg	if ((vp->v_mount == NULL) || (offset >= vnp->vnp_size))
3154797Sdg		return FALSE;
3161541Srgrimes
3174797Sdg	block = offset / vp->v_mount->mnt_stat.f_iosize;
3184797Sdg	if (incore(vp, block))
3194446Sdg		return TRUE;
3201541Srgrimes	/*
3211827Sdg	 * Read the index to find the disk block to read from.  If there is no
3221827Sdg	 * block, report that we don't have this data.
3231827Sdg	 *
3241541Srgrimes	 * Assumes that the vnode has whole page or nothing.
3251541Srgrimes	 */
3264797Sdg	err = VOP_BMAP(vp, block, (struct vnode **) 0, &bn, 0);
3275455Sdg	if (err)
3281827Sdg		return (TRUE);
3291827Sdg	return ((long) bn < 0 ? FALSE : TRUE);
3301541Srgrimes}
3311541Srgrimes
3321541Srgrimes/*
3331541Srgrimes * Lets the VM system know about a change in size for a file.
3341541Srgrimes * If this vnode is mapped into some address space (i.e. we have a pager
3351541Srgrimes * for it) we adjust our own internal size and flush any cached pages in
3361541Srgrimes * the associated object that are affected by the size change.
3371541Srgrimes *
3381541Srgrimes * Note: this routine may be invoked as a result of a pager put
3391541Srgrimes * operation (possibly at object termination time), so we must be careful.
3401541Srgrimes */
3411541Srgrimesvoid
3421541Srgrimesvnode_pager_setsize(vp, nsize)
3431541Srgrimes	struct vnode *vp;
3445455Sdg	u_long nsize;
3451541Srgrimes{
3461541Srgrimes	register vn_pager_t vnp;
3471541Srgrimes	register vm_object_t object;
3481541Srgrimes	vm_pager_t pager;
3491541Srgrimes
3501541Srgrimes	/*
3511541Srgrimes	 * Not a mapped vnode
3521541Srgrimes	 */
3531541Srgrimes	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
3541541Srgrimes		return;
3551827Sdg
3561541Srgrimes	/*
3571541Srgrimes	 * Hasn't changed size
3581541Srgrimes	 */
3593374Sdg	object = (vm_object_t) vp->v_vmdata;
3605455Sdg	if (object == NULL)
3613374Sdg		return;
3625455Sdg	if ((pager = object->pager) == NULL)
3633374Sdg		return;
3641827Sdg	vnp = (vn_pager_t) pager->pg_data;
3651541Srgrimes	if (nsize == vnp->vnp_size)
3661541Srgrimes		return;
3671827Sdg
3681541Srgrimes	/*
3691827Sdg	 * No object. This can happen during object termination since
3701827Sdg	 * vm_object_page_clean is called after the object has been removed
3711827Sdg	 * from the hash table, and clean may cause vnode write operations
3721827Sdg	 * which can wind up back here.
3731541Srgrimes	 */
3741541Srgrimes	object = vm_object_lookup(pager);
3751541Srgrimes	if (object == NULL)
3761541Srgrimes		return;
3771541Srgrimes
3781541Srgrimes	/*
3791827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
3801541Srgrimes	 */
3811827Sdg	if (nsize < vnp->vnp_size) {
3825455Sdg		if (round_page((vm_offset_t) nsize) < vnp->vnp_size) {
3835455Sdg			vm_object_lock(object);
3845455Sdg			vm_object_page_remove(object,
3855455Sdg			    round_page((vm_offset_t) nsize), vnp->vnp_size);
3865455Sdg			vm_object_unlock(object);
3875455Sdg		}
3881827Sdg		/*
3891827Sdg		 * this gets rid of garbage at the end of a page that is now
3901827Sdg		 * only partially backed by the vnode...
3911827Sdg		 */
3921827Sdg		if (nsize & PAGE_MASK) {
3931827Sdg			vm_offset_t kva;
3941827Sdg			vm_page_t m;
3951827Sdg
3961827Sdg			m = vm_page_lookup(object, trunc_page((vm_offset_t) nsize));
3971827Sdg			if (m) {
3981827Sdg				kva = vm_pager_map_page(m);
3991827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
4005455Sdg				    round_page(nsize) - nsize);
4011827Sdg				vm_pager_unmap_page(kva);
4021827Sdg			}
4031827Sdg		}
4041541Srgrimes	}
4051827Sdg	vnp->vnp_size = (vm_offset_t) nsize;
4061827Sdg	object->size = round_page(nsize);
4071827Sdg
4081541Srgrimes	vm_object_deallocate(object);
4091541Srgrimes}
4101541Srgrimes
4111541Srgrimesvoid
4121541Srgrimesvnode_pager_umount(mp)
4131541Srgrimes	register struct mount *mp;
4141541Srgrimes{
4151541Srgrimes	register vm_pager_t pager, npager;
4161541Srgrimes	struct vnode *vp;
4171541Srgrimes
4181549Srgrimes	pager = vnode_pager_list.tqh_first;
4191827Sdg	while (pager) {
4201827Sdg
4211541Srgrimes		/*
4221827Sdg		 * Save the next pointer now since uncaching may terminate the
4231827Sdg		 * object and render pager invalid
4241541Srgrimes		 */
4251827Sdg		vp = ((vn_pager_t) pager->pg_data)->vnp_vp;
4261541Srgrimes		npager = pager->pg_list.tqe_next;
4271827Sdg		if (mp == (struct mount *) 0 || vp->v_mount == mp)
4281541Srgrimes			(void) vnode_pager_uncache(vp);
4291549Srgrimes		pager = npager;
4301541Srgrimes	}
4311541Srgrimes}
4321541Srgrimes
4331541Srgrimes/*
4341541Srgrimes * Remove vnode associated object from the object cache.
4351541Srgrimes *
4361549Srgrimes * Note: this routine may be invoked as a result of a pager put
4371549Srgrimes * operation (possibly at object termination time), so we must be careful.
4381549Srgrimes */
4391549Srgrimesboolean_t
4401549Srgrimesvnode_pager_uncache(vp)
4411549Srgrimes	register struct vnode *vp;
4421549Srgrimes{
4431549Srgrimes	register vm_object_t object;
4441549Srgrimes	boolean_t uncached, locked;
4451549Srgrimes	vm_pager_t pager;
4461549Srgrimes
4471549Srgrimes	/*
4481549Srgrimes	 * Not a mapped vnode
4491549Srgrimes	 */
4503374Sdg	object = (vm_object_t) vp->v_vmdata;
4515455Sdg	if (object == NULL)
4525455Sdg		return (TRUE);
4535455Sdg
4543374Sdg	pager = object->pager;
4551549Srgrimes	if (pager == NULL)
4561549Srgrimes		return (TRUE);
4571827Sdg
4581549Srgrimes	/*
4591827Sdg	 * Unlock the vnode if it is currently locked. We do this since
4601827Sdg	 * uncaching the object may result in its destruction which may
4611827Sdg	 * initiate paging activity which may necessitate locking the vnode.
4621549Srgrimes	 */
4631549Srgrimes	locked = VOP_ISLOCKED(vp);
4641549Srgrimes	if (locked)
4651549Srgrimes		VOP_UNLOCK(vp);
4661827Sdg
4671549Srgrimes	/*
4681827Sdg	 * Must use vm_object_lookup() as it actually removes the object from
4691827Sdg	 * the cache list.
4701549Srgrimes	 */
4711549Srgrimes	object = vm_object_lookup(pager);
4721549Srgrimes	if (object) {
4731549Srgrimes		uncached = (object->ref_count <= 1);
4741549Srgrimes		pager_cache(object, FALSE);
4751549Srgrimes	} else
4761549Srgrimes		uncached = TRUE;
4771549Srgrimes	if (locked)
4781549Srgrimes		VOP_LOCK(vp);
4791827Sdg	return (uncached);
4801549Srgrimes}
4811541Srgrimes
4821541Srgrimes
4831549Srgrimesvoid
4841549Srgrimesvnode_pager_freepage(m)
4851549Srgrimes	vm_page_t m;
4861541Srgrimes{
4871549Srgrimes	PAGE_WAKEUP(m);
4881549Srgrimes	vm_page_free(m);
4891549Srgrimes}
4901549Srgrimes
4911549Srgrimes/*
4921549Srgrimes * calculate the linear (byte) disk address of specified virtual
4931549Srgrimes * file address
4941549Srgrimes */
4951549Srgrimesvm_offset_t
4966151Sdgvnode_pager_addr(vp, address, run)
4971549Srgrimes	struct vnode *vp;
4981549Srgrimes	vm_offset_t address;
4996151Sdg	int *run;
5001549Srgrimes{
5015455Sdg	int rtaddress;
5025455Sdg	int bsize;
5031549Srgrimes	vm_offset_t block;
5041549Srgrimes	struct vnode *rtvp;
5055455Sdg	int err;
5065455Sdg	int vblock, voffset;
5071549Srgrimes
5085455Sdg	if ((int) address < 0)
5095455Sdg		return -1;
5105455Sdg
5111549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
5121549Srgrimes	vblock = address / bsize;
5131549Srgrimes	voffset = address % bsize;
5141549Srgrimes
5156151Sdg	err = VOP_BMAP(vp, vblock, &rtvp, &block, run);
5161549Srgrimes
5176151Sdg	if (err || (block == -1))
5181549Srgrimes		rtaddress = -1;
5196151Sdg	else {
5206626Sdg		rtaddress = block + voffset / DEV_BSIZE;
5216151Sdg		if( run) {
5226151Sdg			*run += 1;
5236151Sdg			*run *= bsize/PAGE_SIZE;
5246151Sdg			*run -= voffset/PAGE_SIZE;
5256151Sdg		}
5266151Sdg	}
5271549Srgrimes
5281549Srgrimes	return rtaddress;
5291549Srgrimes}
5301549Srgrimes
5311549Srgrimes/*
5321549Srgrimes * interrupt routine for I/O completion
5331549Srgrimes */
5341549Srgrimesvoid
5351549Srgrimesvnode_pager_iodone(bp)
5361549Srgrimes	struct buf *bp;
5371549Srgrimes{
5381549Srgrimes	bp->b_flags |= B_DONE;
5391827Sdg	wakeup((caddr_t) bp);
5405455Sdg	if (bp->b_flags & B_ASYNC) {
5411887Sdg		vm_offset_t paddr;
5421887Sdg		vm_page_t m;
5431887Sdg		vm_object_t obj = 0;
5441887Sdg		int i;
5451887Sdg		int npages;
5461887Sdg
5471887Sdg		paddr = (vm_offset_t) bp->b_data;
5485455Sdg		if (bp->b_bufsize != bp->b_bcount)
5495455Sdg			bzero(bp->b_data + bp->b_bcount,
5505455Sdg			    bp->b_bufsize - bp->b_bcount);
5511887Sdg
5521887Sdg		npages = (bp->b_bufsize + PAGE_SIZE - 1) / PAGE_SIZE;
5535455Sdg		for (i = 0; i < npages; i++) {
5541887Sdg			m = PHYS_TO_VM_PAGE(pmap_kextract(paddr + i * PAGE_SIZE));
5551887Sdg			obj = m->object;
5565455Sdg			if (m) {
5575455Sdg				m->dirty = 0;
5585455Sdg				m->valid = VM_PAGE_BITS_ALL;
5595455Sdg				if (m->flags & PG_WANTED)
5605455Sdg					m->flags |= PG_REFERENCED;
5611887Sdg				PAGE_WAKEUP(m);
5621887Sdg			} else {
5631887Sdg				panic("vnode_pager_iodone: page is gone!!!");
5641887Sdg			}
5651887Sdg		}
5665455Sdg		pmap_qremove(paddr, npages);
5675455Sdg		if (obj) {
5681887Sdg			--obj->paging_in_progress;
5696618Sdg			if (obj->paging_in_progress == 0 &&
5706618Sdg			    (obj->flags & OBJ_PIPWNT)) {
5716618Sdg				obj->flags &= ~OBJ_PIPWNT;
5721887Sdg				wakeup((caddr_t) obj);
5736618Sdg			}
5741887Sdg		} else {
5751887Sdg			panic("vnode_pager_iodone: object is gone???");
5761887Sdg		}
5771887Sdg		relpbuf(bp);
5781887Sdg	}
5791549Srgrimes}
5801549Srgrimes
5811549Srgrimes/*
5821549Srgrimes * small block file system vnode pager input
5831549Srgrimes */
5841549Srgrimesint
5851549Srgrimesvnode_pager_input_smlfs(vnp, m)
5861549Srgrimes	vn_pager_t vnp;
5871549Srgrimes	vm_page_t m;
5881549Srgrimes{
5895455Sdg	int i;
5905455Sdg	int s;
5911549Srgrimes	struct vnode *dp, *vp;
5921549Srgrimes	struct buf *bp;
5931549Srgrimes	vm_offset_t kva;
5945455Sdg	int fileaddr;
5955455Sdg	int block;
5961549Srgrimes	vm_offset_t bsize;
5975455Sdg	int error = 0;
5981549Srgrimes
5991549Srgrimes	vp = vnp->vnp_vp;
6001549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
6011549Srgrimes
6025455Sdg	VOP_BMAP(vp, 0, &dp, 0, 0);
6031549Srgrimes
6041549Srgrimes	kva = vm_pager_map_page(m);
6051549Srgrimes
6061827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
6071827Sdg
6085455Sdg		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid))
6095455Sdg			continue;
6101549Srgrimes
6116151Sdg		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
6121827Sdg		if (fileaddr != -1) {
6131549Srgrimes			bp = getpbuf();
6141549Srgrimes
6151827Sdg			/* build a minimal buffer header */
6161549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
6171549Srgrimes			bp->b_iodone = vnode_pager_iodone;
6181549Srgrimes			bp->b_proc = curproc;
6191549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
6201827Sdg			if (bp->b_rcred != NOCRED)
6211549Srgrimes				crhold(bp->b_rcred);
6221827Sdg			if (bp->b_wcred != NOCRED)
6231549Srgrimes				crhold(bp->b_wcred);
6241549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
6256626Sdg			bp->b_blkno = fileaddr;
6265455Sdg			pbgetvp(dp, bp);
6271549Srgrimes			bp->b_bcount = bsize;
6281549Srgrimes			bp->b_bufsize = bsize;
6291827Sdg
6301827Sdg			/* do the input */
6311549Srgrimes			VOP_STRATEGY(bp);
6321549Srgrimes
6331827Sdg			/* we definitely need to be at splbio here */
6341549Srgrimes
6351549Srgrimes			s = splbio();
6361549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
6371827Sdg				tsleep((caddr_t) bp, PVM, "vnsrd", 0);
6381549Srgrimes			}
6391549Srgrimes			splx(s);
6401549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
6411549Srgrimes				error = EIO;
6421549Srgrimes
6431827Sdg			/*
6441827Sdg			 * free the buffer header back to the swap buffer pool
6451827Sdg			 */
6461549Srgrimes			relpbuf(bp);
6471549Srgrimes			HOLDRELE(vp);
6481827Sdg			if (error)
6491549Srgrimes				break;
6505455Sdg
6515455Sdg			vm_page_set_clean(m, i * bsize, bsize);
6525455Sdg			vm_page_set_valid(m, i * bsize, bsize);
6531549Srgrimes		} else {
6545455Sdg			vm_page_set_clean(m, i * bsize, bsize);
6551549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
6561549Srgrimes		}
6571549Srgrimesnextblock:
6581549Srgrimes	}
6591549Srgrimes	vm_pager_unmap_page(kva);
6605455Sdg	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
6611827Sdg	if (error) {
6624207Sdg		return VM_PAGER_ERROR;
6631549Srgrimes	}
6641549Srgrimes	return VM_PAGER_OK;
6651549Srgrimes
6661549Srgrimes}
6671549Srgrimes
6681549Srgrimes
6691549Srgrimes/*
6701549Srgrimes * old style vnode pager output routine
6711549Srgrimes */
6721549Srgrimesint
6731549Srgrimesvnode_pager_input_old(vnp, m)
6741549Srgrimes	vn_pager_t vnp;
6751549Srgrimes	vm_page_t m;
6761549Srgrimes{
6771541Srgrimes	struct uio auio;
6781541Srgrimes	struct iovec aiov;
6795455Sdg	int error;
6805455Sdg	int size;
6811549Srgrimes	vm_offset_t kva;
6821549Srgrimes
6831549Srgrimes	error = 0;
6841827Sdg
6851549Srgrimes	/*
6861549Srgrimes	 * Return failure if beyond current EOF
6871549Srgrimes	 */
6885455Sdg	if (m->offset >= vnp->vnp_size) {
6891549Srgrimes		return VM_PAGER_BAD;
6901549Srgrimes	} else {
6911549Srgrimes		size = PAGE_SIZE;
6925455Sdg		if (m->offset + size > vnp->vnp_size)
6935455Sdg			size = vnp->vnp_size - m->offset;
6945455Sdg		/*
6955455Sdg		 * Allocate a kernel virtual address and initialize so that
6965455Sdg		 * we can use VOP_READ/WRITE routines.
6975455Sdg		 */
6981549Srgrimes		kva = vm_pager_map_page(m);
6991827Sdg		aiov.iov_base = (caddr_t) kva;
7001549Srgrimes		aiov.iov_len = size;
7011549Srgrimes		auio.uio_iov = &aiov;
7021549Srgrimes		auio.uio_iovcnt = 1;
7035455Sdg		auio.uio_offset = m->offset;
7041549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
7051549Srgrimes		auio.uio_rw = UIO_READ;
7061549Srgrimes		auio.uio_resid = size;
7071827Sdg		auio.uio_procp = (struct proc *) 0;
7081549Srgrimes
7091549Srgrimes		error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred);
7101549Srgrimes		if (!error) {
7111549Srgrimes			register int count = size - auio.uio_resid;
7121549Srgrimes
7131549Srgrimes			if (count == 0)
7141549Srgrimes				error = EINVAL;
7151549Srgrimes			else if (count != PAGE_SIZE)
7161827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
7171549Srgrimes		}
7181549Srgrimes		vm_pager_unmap_page(kva);
7191549Srgrimes	}
7201549Srgrimes	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
7215455Sdg	m->dirty = 0;
7224207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
7231549Srgrimes}
7241549Srgrimes
7251549Srgrimes/*
7261549Srgrimes * generic vnode pager input routine
7271549Srgrimes */
7281549Srgrimesint
7291549Srgrimesvnode_pager_input(vnp, m, count, reqpage)
7301549Srgrimes	register vn_pager_t vnp;
7311549Srgrimes	vm_page_t *m;
7325455Sdg	int count, reqpage;
7331549Srgrimes{
7345455Sdg	int i;
7351541Srgrimes	vm_offset_t kva, foff;
7365455Sdg	int size, sizea;
7371549Srgrimes	vm_object_t object;
7381549Srgrimes	struct vnode *dp, *vp;
7395455Sdg	int bsize;
7401541Srgrimes
7415455Sdg	int first, last;
7426151Sdg	int firstaddr;
7435455Sdg	int block, offset;
7446151Sdg	int runpg;
7456151Sdg	int runend;
7461549Srgrimes
7471887Sdg	struct buf *bp, *bpa;
7485455Sdg	int counta;
7495455Sdg	int s;
7505455Sdg	int failflag;
7511549Srgrimes
7525455Sdg	int error = 0;
7531549Srgrimes
7541827Sdg	object = m[reqpage]->object;	/* all vm_page_t items are in same
7551827Sdg					 * object */
7561549Srgrimes
7571549Srgrimes	vp = vnp->vnp_vp;
7581549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
7591549Srgrimes
7601549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
7611827Sdg
7621549Srgrimes	/*
7631827Sdg	 * originally, we did not check for an error return value -- assuming
7641827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
7651549Srgrimes	 */
7665455Sdg	foff = m[reqpage]->offset;
7671827Sdg
7681549Srgrimes	/*
7691887Sdg	 * if we can't bmap, use old VOP code
7701549Srgrimes	 */
7715455Sdg	if (VOP_BMAP(vp, 0, &dp, 0, 0)) {
7721549Srgrimes		for (i = 0; i < count; i++) {
7731549Srgrimes			if (i != reqpage) {
7741549Srgrimes				vnode_pager_freepage(m[i]);
7751549Srgrimes			}
7761549Srgrimes		}
7773612Sdg		cnt.v_vnodein++;
7783612Sdg		cnt.v_vnodepgsin++;
7791549Srgrimes		return vnode_pager_input_old(vnp, m[reqpage]);
7801549Srgrimes
7811827Sdg		/*
7821827Sdg		 * if the blocksize is smaller than a page size, then use
7831827Sdg		 * special small filesystem code.  NFS sometimes has a small
7841827Sdg		 * blocksize, but it can handle large reads itself.
7851827Sdg		 */
7861827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
7875455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
7881827Sdg
7891549Srgrimes		for (i = 0; i < count; i++) {
7901549Srgrimes			if (i != reqpage) {
7911549Srgrimes				vnode_pager_freepage(m[i]);
7921549Srgrimes			}
7931549Srgrimes		}
7943612Sdg		cnt.v_vnodein++;
7953612Sdg		cnt.v_vnodepgsin++;
7961549Srgrimes		return vnode_pager_input_smlfs(vnp, m[reqpage]);
7971549Srgrimes	}
7981549Srgrimes	/*
7995455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
8005455Sdg	 * then, the entire page is valid --
8011549Srgrimes	 */
8025455Sdg	if (m[reqpage]->valid) {
8035455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
8045455Sdg		for (i = 0; i < count; i++) {
8055455Sdg			if (i != reqpage)
8065455Sdg				vnode_pager_freepage(m[i]);
8071549Srgrimes		}
8085455Sdg		return VM_PAGER_OK;
8091549Srgrimes	}
8105455Sdg	/*
8115455Sdg	 * here on direct device I/O
8125455Sdg	 */
8131549Srgrimes
8141827Sdg
8156151Sdg	firstaddr = -1;
8161549Srgrimes	/*
8176151Sdg	 * calculate the run that includes the required page
8181549Srgrimes	 */
8196151Sdg	for(first = 0, i = 0; i < count; i = runend) {
8206151Sdg		firstaddr = vnode_pager_addr(vp, m[i]->offset, &runpg);
8216151Sdg		if (firstaddr == -1) {
8226151Sdg			if( i == reqpage && foff < vnp->vnp_size) {
8236151Sdg				printf("vnode_pager_input: unexpected missing page: firstaddr: %d, foff: %d, vnp_size: %d\n",
8246151Sdg			   	 firstaddr, foff, vnp->vnp_size);
8256151Sdg				panic("vnode_pager_input:...");
8266151Sdg			}
8271549Srgrimes			vnode_pager_freepage(m[i]);
8286151Sdg			runend = i + 1;
8296151Sdg			first = runend;
8306151Sdg			continue;
8311549Srgrimes		}
8326151Sdg		runend = i + runpg;
8336151Sdg		if( runend <= reqpage) {
8346151Sdg			int j;
8356151Sdg			for(j = i; j < runend; j++) {
8366151Sdg				vnode_pager_freepage(m[j]);
8376151Sdg			}
8381549Srgrimes		} else {
8396151Sdg			if( runpg < (count - first)) {
8406151Sdg				for(i=first + runpg; i < count; i++)
8416151Sdg					vnode_pager_freepage(m[i]);
8426151Sdg				count = first + runpg;
8436151Sdg			}
8446151Sdg			break;
8451549Srgrimes		}
8466151Sdg		first = runend;
8471549Srgrimes	}
8481549Srgrimes
8491549Srgrimes	/*
8501827Sdg	 * the first and last page have been calculated now, move input pages
8511827Sdg	 * to be zero based...
8521549Srgrimes	 */
8531549Srgrimes	if (first != 0) {
8541549Srgrimes		for (i = first; i < count; i++) {
8551549Srgrimes			m[i - first] = m[i];
8561549Srgrimes		}
8571549Srgrimes		count -= first;
8581549Srgrimes		reqpage -= first;
8591549Srgrimes	}
8606151Sdg
8611549Srgrimes	/*
8621549Srgrimes	 * calculate the file virtual address for the transfer
8631549Srgrimes	 */
8645455Sdg	foff = m[0]->offset;
8656151Sdg#if 0
8666151Sdg	printf("foff: 0x%lx, firstaddr: 0x%lx\n",
8676151Sdg		foff, firstaddr);
8686151Sdg	DELAY(6000000);
8696151Sdg#endif
8701827Sdg
8711549Srgrimes	/*
8721549Srgrimes	 * calculate the size of the transfer
8731549Srgrimes	 */
8741549Srgrimes	size = count * PAGE_SIZE;
8751549Srgrimes	if ((foff + size) > vnp->vnp_size)
8761549Srgrimes		size = vnp->vnp_size - foff;
8771549Srgrimes
8781549Srgrimes	/*
8791549Srgrimes	 * round up physical size for real devices
8801549Srgrimes	 */
8811827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
8821549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
8831549Srgrimes
8841887Sdg	counta = 0;
8855455Sdg	if (count * PAGE_SIZE > bsize)
8861887Sdg		counta = (count - reqpage) - 1;
8871887Sdg	bpa = 0;
8881887Sdg	sizea = 0;
8895841Sdg	bp = getpbuf();
8905455Sdg	if (counta) {
8915841Sdg		bpa = (struct buf *) trypbuf();
8925841Sdg		if (bpa) {
8935841Sdg			count -= counta;
8945841Sdg			sizea = size - count * PAGE_SIZE;
8955841Sdg			size = count * PAGE_SIZE;
8965841Sdg		}
8971887Sdg	}
8985455Sdg	kva = (vm_offset_t) bp->b_data;
8991887Sdg
9001549Srgrimes	/*
9011549Srgrimes	 * and map the pages to be read into the kva
9021549Srgrimes	 */
9031887Sdg	pmap_qenter(kva, m, count);
9041549Srgrimes
9051549Srgrimes	/* build a minimal buffer header */
9061549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
9071549Srgrimes	bp->b_iodone = vnode_pager_iodone;
9081549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
9091549Srgrimes	bp->b_proc = curproc;
9101549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
9111827Sdg	if (bp->b_rcred != NOCRED)
9121549Srgrimes		crhold(bp->b_rcred);
9131827Sdg	if (bp->b_wcred != NOCRED)
9141549Srgrimes		crhold(bp->b_wcred);
9156626Sdg	bp->b_blkno = firstaddr;
9165455Sdg	pbgetvp(dp, bp);
9171549Srgrimes	bp->b_bcount = size;
9181549Srgrimes	bp->b_bufsize = size;
9191549Srgrimes
9203612Sdg	cnt.v_vnodein++;
9213612Sdg	cnt.v_vnodepgsin += count;
9223612Sdg
9231549Srgrimes	/* do the input */
9241549Srgrimes	VOP_STRATEGY(bp);
9253612Sdg
9265455Sdg	if (counta) {
9275455Sdg		for (i = 0; i < counta; i++) {
9285455Sdg			vm_page_deactivate(m[count + i]);
9291887Sdg		}
9305455Sdg		pmap_qenter((vm_offset_t) bpa->b_data, &m[count], counta);
9311887Sdg		++m[count]->object->paging_in_progress;
9321887Sdg		bpa->b_flags = B_BUSY | B_READ | B_CALL | B_ASYNC;
9331887Sdg		bpa->b_iodone = vnode_pager_iodone;
9341887Sdg		/* B_PHYS is not set, but it is nice to fill this in */
9351887Sdg		bpa->b_proc = curproc;
9361887Sdg		bpa->b_rcred = bpa->b_wcred = bpa->b_proc->p_ucred;
9371887Sdg		if (bpa->b_rcred != NOCRED)
9381887Sdg			crhold(bpa->b_rcred);
9391887Sdg		if (bpa->b_wcred != NOCRED)
9401887Sdg			crhold(bpa->b_wcred);
9416626Sdg		bpa->b_blkno = firstaddr + count * (PAGE_SIZE / DEV_BSIZE);
9425455Sdg		pbgetvp(dp, bpa);
9431887Sdg		bpa->b_bcount = sizea;
9445455Sdg		bpa->b_bufsize = counta * PAGE_SIZE;
9451549Srgrimes
9463612Sdg		cnt.v_vnodepgsin += counta;
9471887Sdg		VOP_STRATEGY(bpa);
9481887Sdg	}
9491549Srgrimes	s = splbio();
9501549Srgrimes	/* we definitely need to be at splbio here */
9511549Srgrimes
9521549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
9531827Sdg		tsleep((caddr_t) bp, PVM, "vnread", 0);
9541549Srgrimes	}
9551549Srgrimes	splx(s);
9561549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
9571549Srgrimes		error = EIO;
9581549Srgrimes
9591549Srgrimes	if (!error) {
9601549Srgrimes		if (size != count * PAGE_SIZE)
9611827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
9621549Srgrimes	}
9635455Sdg	pmap_qremove(kva, count);
9641549Srgrimes
9651549Srgrimes	/*
9661549Srgrimes	 * free the buffer header back to the swap buffer pool
9671549Srgrimes	 */
9681549Srgrimes	relpbuf(bp);
9691549Srgrimes	HOLDRELE(vp);
9701549Srgrimes
9711549Srgrimesfinishup:
9721549Srgrimes	for (i = 0; i < count; i++) {
9732386Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
9745455Sdg		m[i]->dirty = 0;
9755455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
9761549Srgrimes		if (i != reqpage) {
9771827Sdg
9781549Srgrimes			/*
9791827Sdg			 * whether or not to leave the page activated is up in
9801827Sdg			 * the air, but we should put the page on a page queue
9811827Sdg			 * somewhere. (it already is in the object). Result:
9821827Sdg			 * It appears that emperical results show that
9831827Sdg			 * deactivating pages is best.
9841549Srgrimes			 */
9851827Sdg
9861549Srgrimes			/*
9871827Sdg			 * just in case someone was asking for this page we
9881827Sdg			 * now tell them that it is ok to use
9891549Srgrimes			 */
9901549Srgrimes			if (!error) {
9915841Sdg				vm_page_deactivate(m[i]);
9921549Srgrimes				PAGE_WAKEUP(m[i]);
9931549Srgrimes			} else {
9941549Srgrimes				vnode_pager_freepage(m[i]);
9951549Srgrimes			}
9961549Srgrimes		}
9971549Srgrimes	}
9981549Srgrimes	if (error) {
9994207Sdg		printf("vnode_pager_input: I/O read error\n");
10001549Srgrimes	}
10014207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
10021549Srgrimes}
10031549Srgrimes
10041549Srgrimes/*
10051549Srgrimes * old-style vnode pager output routine
10061549Srgrimes */
10071549Srgrimesint
10081549Srgrimesvnode_pager_output_old(vnp, m)
10091549Srgrimes	register vn_pager_t vnp;
10101541Srgrimes	vm_page_t m;
10111549Srgrimes{
10125455Sdg	vm_offset_t kva, kva2;
10131549Srgrimes	vm_offset_t size;
10141549Srgrimes	struct iovec aiov;
10151549Srgrimes	struct uio auio;
10161549Srgrimes	struct vnode *vp;
10175455Sdg	int error;
10181541Srgrimes
10191549Srgrimes	vp = vnp->vnp_vp;
10201827Sdg
10211541Srgrimes	/*
10225455Sdg	 * Dont return failure if beyond current EOF placate the VM system.
10231541Srgrimes	 */
10245455Sdg	if (m->offset >= vnp->vnp_size) {
10255455Sdg		return VM_PAGER_OK;
10261549Srgrimes	} else {
10271549Srgrimes		size = PAGE_SIZE;
10285455Sdg		if (m->offset + size > vnp->vnp_size)
10295455Sdg			size = vnp->vnp_size - m->offset;
10305455Sdg
10315455Sdg		kva2 = kmem_alloc(pager_map, PAGE_SIZE);
10325455Sdg		/*
10335455Sdg		 * Allocate a kernel virtual address and initialize so that
10345455Sdg		 * we can use VOP_WRITE routines.
10355455Sdg		 */
10361549Srgrimes		kva = vm_pager_map_page(m);
10375455Sdg		bcopy((caddr_t) kva, (caddr_t) kva2, size);
10385455Sdg		vm_pager_unmap_page(kva);
10395455Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
10405455Sdg		PAGE_WAKEUP(m);
10415455Sdg
10425455Sdg		aiov.iov_base = (caddr_t) kva2;
10431549Srgrimes		aiov.iov_len = size;
10441549Srgrimes		auio.uio_iov = &aiov;
10451549Srgrimes		auio.uio_iovcnt = 1;
10465455Sdg		auio.uio_offset = m->offset;
10471549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
10481549Srgrimes		auio.uio_rw = UIO_WRITE;
10491549Srgrimes		auio.uio_resid = size;
10501827Sdg		auio.uio_procp = (struct proc *) 0;
10511549Srgrimes
10521549Srgrimes		error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred);
10531549Srgrimes
10545455Sdg		kmem_free_wakeup(pager_map, kva2, PAGE_SIZE);
10551549Srgrimes		if (!error) {
10561549Srgrimes			if ((size - auio.uio_resid) == 0) {
10571549Srgrimes				error = EINVAL;
10581549Srgrimes			}
10591549Srgrimes		}
10605455Sdg		return error ? VM_PAGER_ERROR : VM_PAGER_OK;
10611549Srgrimes	}
10621549Srgrimes}
10631549Srgrimes
10641549Srgrimes/*
10651549Srgrimes * vnode pager output on a small-block file system
10661549Srgrimes */
10671549Srgrimesint
10681549Srgrimesvnode_pager_output_smlfs(vnp, m)
10691549Srgrimes	vn_pager_t vnp;
10701549Srgrimes	vm_page_t m;
10711549Srgrimes{
10725455Sdg	int i;
10735455Sdg	int s;
10741549Srgrimes	struct vnode *dp, *vp;
10751549Srgrimes	struct buf *bp;
10761549Srgrimes	vm_offset_t kva;
10775455Sdg	int fileaddr;
10781549Srgrimes	vm_offset_t bsize;
10795455Sdg	int error = 0;
10801549Srgrimes
10811549Srgrimes	vp = vnp->vnp_vp;
10821549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
10831549Srgrimes
10845455Sdg	VOP_BMAP(vp, 0, &dp, 0, 0);
10851549Srgrimes	kva = vm_pager_map_page(m);
10861827Sdg	for (i = 0; !error && i < (PAGE_SIZE / bsize); i++) {
10871827Sdg
10885455Sdg		if ((vm_page_bits(m->offset + i * bsize, bsize) & m->valid & m->dirty) == 0)
10895455Sdg			continue;
10901827Sdg		/*
10911827Sdg		 * calculate logical block and offset
10921827Sdg		 */
10936151Sdg		fileaddr = vnode_pager_addr(vp, m->offset + i * bsize, (int *)0);
10941827Sdg		if (fileaddr != -1) {
10951549Srgrimes
10961549Srgrimes			bp = getpbuf();
10971549Srgrimes
10981827Sdg			/* build a minimal buffer header */
10991549Srgrimes			bp->b_flags = B_BUSY | B_CALL | B_WRITE;
11001549Srgrimes			bp->b_iodone = vnode_pager_iodone;
11011549Srgrimes			bp->b_proc = curproc;
11021549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
11031827Sdg			if (bp->b_rcred != NOCRED)
11041549Srgrimes				crhold(bp->b_rcred);
11051827Sdg			if (bp->b_wcred != NOCRED)
11061549Srgrimes				crhold(bp->b_wcred);
11071549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
11086626Sdg			bp->b_blkno = fileaddr;
11095455Sdg			pbgetvp(dp, bp);
11101549Srgrimes			++dp->v_numoutput;
11111827Sdg			/* for NFS */
11121549Srgrimes			bp->b_dirtyoff = 0;
11131549Srgrimes			bp->b_dirtyend = bsize;
11141549Srgrimes			bp->b_bcount = bsize;
11151549Srgrimes			bp->b_bufsize = bsize;
11161827Sdg
11171827Sdg			/* do the input */
11181549Srgrimes			VOP_STRATEGY(bp);
11191549Srgrimes
11201827Sdg			/* we definitely need to be at splbio here */
11211549Srgrimes
11221549Srgrimes			s = splbio();
11231549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
11241827Sdg				tsleep((caddr_t) bp, PVM, "vnswrt", 0);
11251549Srgrimes			}
11261549Srgrimes			splx(s);
11271549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
11281549Srgrimes				error = EIO;
11291549Srgrimes
11305455Sdg			vm_page_set_clean(m, i * bsize, bsize);
11311827Sdg			/*
11321827Sdg			 * free the buffer header back to the swap buffer pool
11331827Sdg			 */
11341549Srgrimes			relpbuf(bp);
11351549Srgrimes			HOLDRELE(vp);
11361827Sdg		}
11371541Srgrimes	}
11381549Srgrimes	vm_pager_unmap_page(kva);
11391827Sdg	if (error)
11404207Sdg		return VM_PAGER_ERROR;
11411541Srgrimes	else
11421549Srgrimes		return VM_PAGER_OK;
11431549Srgrimes}
11441549Srgrimes
11451549Srgrimes/*
11461549Srgrimes * generic vnode pager output routine
11471549Srgrimes */
11481549Srgrimesint
11491549Srgrimesvnode_pager_output(vnp, m, count, rtvals)
11501549Srgrimes	vn_pager_t vnp;
11511549Srgrimes	vm_page_t *m;
11525455Sdg	int count;
11535455Sdg	int *rtvals;
11541549Srgrimes{
11555455Sdg	int i, j;
11561549Srgrimes	vm_offset_t kva, foff;
11575455Sdg	int size;
11581549Srgrimes	vm_object_t object;
11591549Srgrimes	struct vnode *dp, *vp;
11601549Srgrimes	struct buf *bp;
11611549Srgrimes	vm_offset_t reqaddr;
11625455Sdg	int bsize;
11635455Sdg	int s;
11645455Sdg	daddr_t block;
11655455Sdg	struct timeval tv;
11666151Sdg	int runpg;
11671549Srgrimes
11685455Sdg	int error = 0;
11691549Srgrimes
11701549Srgrimesretryoutput:
11711549Srgrimes	object = m[0]->object;	/* all vm_page_t items are in same object */
11721549Srgrimes
11731549Srgrimes	vp = vnp->vnp_vp;
11744797Sdg
11754797Sdg	/*
11764797Sdg	 * Make sure underlying filesystem is still mounted.
11774797Sdg	 */
11784797Sdg	if (vp->v_mount == NULL)
11794797Sdg		return VM_PAGER_FAIL;
11804797Sdg
11811549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
11821549Srgrimes
11831827Sdg	for (i = 0; i < count; i++)
11841549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
11851549Srgrimes
11865455Sdg	if ((int) m[0]->offset < 0) {
11875455Sdg		printf("vnode_pager_output: attempt to write meta-data!!! -- 0x%x\n", m[0]->offset);
11885455Sdg		m[0]->dirty = 0;
11895455Sdg		rtvals[0] = VM_PAGER_OK;
11905455Sdg		return VM_PAGER_OK;
11915455Sdg	}
11921549Srgrimes	/*
11931827Sdg	 * if the filesystem does not have a bmap, then use the old code
11941549Srgrimes	 */
11955455Sdg	if (VOP_BMAP(vp, (m[0]->offset / bsize), &dp, &block, 0) ||
11965455Sdg	    (block == -1)) {
11971549Srgrimes
11981549Srgrimes		rtvals[0] = vnode_pager_output_old(vnp, m[0]);
11991549Srgrimes
12005455Sdg		m[0]->dirty = 0;
12013612Sdg		cnt.v_vnodeout++;
12023612Sdg		cnt.v_vnodepgsout++;
12031549Srgrimes		return rtvals[0];
12041541Srgrimes	}
12055455Sdg	tv = time;
12065455Sdg	VOP_UPDATE(vp, &tv, &tv, 0);
12071541Srgrimes
12081549Srgrimes	/*
12091827Sdg	 * if the filesystem has a small blocksize, then use the small block
12101827Sdg	 * filesystem output code
12111549Srgrimes	 */
12121549Srgrimes	if ((bsize < PAGE_SIZE) &&
12131827Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
12141549Srgrimes
12151827Sdg		for (i = 0; i < count; i++) {
12161549Srgrimes			rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]);
12171827Sdg			if (rtvals[i] == VM_PAGER_OK) {
12181549Srgrimes				pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
12191549Srgrimes			}
12201549Srgrimes		}
12213612Sdg		cnt.v_vnodeout++;
12223612Sdg		cnt.v_vnodepgsout += count;
12231549Srgrimes		return rtvals[0];
12241541Srgrimes	}
12251827Sdg	for (i = 0; i < count; i++) {
12265455Sdg		foff = m[i]->offset;
12271549Srgrimes		if (foff >= vnp->vnp_size) {
12281827Sdg			for (j = i; j < count; j++)
12291549Srgrimes				rtvals[j] = VM_PAGER_BAD;
12301549Srgrimes			count = i;
12311549Srgrimes			break;
12321549Srgrimes		}
12331549Srgrimes	}
12341549Srgrimes	if (count == 0) {
12351549Srgrimes		return rtvals[0];
12361549Srgrimes	}
12375455Sdg	foff = m[0]->offset;
12386151Sdg	reqaddr = vnode_pager_addr(vp, foff, &runpg);
12396151Sdg	if( runpg < count)
12406151Sdg		count = runpg;
12411827Sdg
12421549Srgrimes	/*
12431549Srgrimes	 * calculate the size of the transfer
12441549Srgrimes	 */
12451549Srgrimes	size = count * PAGE_SIZE;
12461549Srgrimes	if ((foff + size) > vnp->vnp_size)
12471549Srgrimes		size = vnp->vnp_size - foff;
12481549Srgrimes
12491549Srgrimes	/*
12501549Srgrimes	 * round up physical size for real devices
12511549Srgrimes	 */
12521827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
12531549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
12541549Srgrimes
12551887Sdg	bp = getpbuf();
12565455Sdg	kva = (vm_offset_t) bp->b_data;
12571549Srgrimes	/*
12581549Srgrimes	 * and map the pages to be read into the kva
12591549Srgrimes	 */
12601887Sdg	pmap_qenter(kva, m, count);
12611827Sdg
12621549Srgrimes	/* build a minimal buffer header */
12631549Srgrimes	bp->b_flags = B_BUSY | B_WRITE | B_CALL;
12641549Srgrimes	bp->b_iodone = vnode_pager_iodone;
12651549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
12661549Srgrimes	bp->b_proc = curproc;
12671549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
12681549Srgrimes
12691827Sdg	if (bp->b_rcred != NOCRED)
12701549Srgrimes		crhold(bp->b_rcred);
12711827Sdg	if (bp->b_wcred != NOCRED)
12721549Srgrimes		crhold(bp->b_wcred);
12736626Sdg	bp->b_blkno = reqaddr;
12745455Sdg	pbgetvp(dp, bp);
12751549Srgrimes	++dp->v_numoutput;
12761827Sdg
12771549Srgrimes	/* for NFS */
12781549Srgrimes	bp->b_dirtyoff = 0;
12791549Srgrimes	bp->b_dirtyend = size;
12801549Srgrimes
12811549Srgrimes	bp->b_bcount = size;
12821549Srgrimes	bp->b_bufsize = size;
12831549Srgrimes
12843612Sdg	cnt.v_vnodeout++;
12853612Sdg	cnt.v_vnodepgsout += count;
12863612Sdg
12871549Srgrimes	/* do the output */
12881549Srgrimes	VOP_STRATEGY(bp);
12891549Srgrimes
12901549Srgrimes	s = splbio();
12911549Srgrimes
12921549Srgrimes	/* we definitely need to be at splbio here */
12931549Srgrimes
12941549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
12951827Sdg		tsleep((caddr_t) bp, PVM, "vnwrite", 0);
12961549Srgrimes	}
12971549Srgrimes	splx(s);
12981549Srgrimes
12991549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
13001549Srgrimes		error = EIO;
13011549Srgrimes
13025455Sdg	pmap_qremove(kva, count);
13031549Srgrimes
13041549Srgrimes	/*
13051549Srgrimes	 * free the buffer header back to the swap buffer pool
13061549Srgrimes	 */
13071549Srgrimes	relpbuf(bp);
13081549Srgrimes	HOLDRELE(vp);
13091549Srgrimes
13101827Sdg	if (!error) {
13111827Sdg		for (i = 0; i < count; i++) {
13121549Srgrimes			pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
13135455Sdg			m[i]->dirty = 0;
13141549Srgrimes			rtvals[i] = VM_PAGER_OK;
13151549Srgrimes		}
13161827Sdg	} else if (count != 1) {
13171549Srgrimes		error = 0;
13181549Srgrimes		count = 1;
13191549Srgrimes		goto retryoutput;
13201549Srgrimes	}
13211549Srgrimes	if (error) {
13224207Sdg		printf("vnode_pager_output: I/O write error\n");
13231549Srgrimes	}
13245455Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
13251541Srgrimes}
1326