vnode_pager.c revision 12767
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
211541Srgrimes *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
4112767Sdyson *	$Id: vnode_pager.c,v 1.54 1995/12/07 12:48:31 davidg Exp $
421541Srgrimes */
431541Srgrimes
441541Srgrimes/*
451541Srgrimes * Page to/from files (vnodes).
461541Srgrimes */
471541Srgrimes
481549Srgrimes/*
491549Srgrimes * TODO:
509507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
517695Sdg *	greatly re-simplify the vnode_pager.
521549Srgrimes */
531549Srgrimes
541541Srgrimes#include <sys/param.h>
551541Srgrimes#include <sys/systm.h>
565455Sdg#include <sys/kernel.h>
571541Srgrimes#include <sys/proc.h>
581541Srgrimes#include <sys/malloc.h>
591541Srgrimes#include <sys/vnode.h>
601541Srgrimes#include <sys/uio.h>
611541Srgrimes#include <sys/mount.h>
629507Sdg#include <sys/buf.h>
6312662Sdg#include <sys/vmmeter.h>
641541Srgrimes
651541Srgrimes#include <vm/vm.h>
6612662Sdg#include <vm/vm_param.h>
6712662Sdg#include <vm/vm_prot.h>
6812662Sdg#include <vm/vm_object.h>
691541Srgrimes#include <vm/vm_page.h>
709507Sdg#include <vm/vm_pager.h>
711541Srgrimes#include <vm/vnode_pager.h>
7212662Sdg#include <vm/vm_extern.h>
731541Srgrimes
7412767Sdysonextern vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
7511943Sbde					 int *run));
7611943Sbdeextern void vnode_pager_iodone __P((struct buf *bp));
7711943Sbdeextern int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
7811943Sbdeextern int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
7911943Sbde
801541Srgrimesstruct pagerops vnodepagerops = {
819507Sdg	NULL,
821541Srgrimes	vnode_pager_alloc,
831541Srgrimes	vnode_pager_dealloc,
849507Sdg	vnode_pager_getpages,
859507Sdg	vnode_pager_putpages,
869507Sdg	vnode_pager_haspage,
879507Sdg	NULL
881541Srgrimes};
891541Srgrimes
9011943Sbdestatic int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
9111943Sbde					  int count, int reqpage));
9211943Sbdestatic int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
9311943Sbde					  int count, boolean_t sync,
9411943Sbde					  int *rtvals));
9510556Sdyson
961541Srgrimes/*
971541Srgrimes * Allocate (or lookup) pager for a vnode.
981541Srgrimes * Handle is a vnode pointer.
991541Srgrimes */
1009507Sdgvm_object_t
1011549Srgrimesvnode_pager_alloc(handle, size, prot, offset)
1028416Sdg	void *handle;
1031541Srgrimes	vm_size_t size;
1041541Srgrimes	vm_prot_t prot;
10512767Sdyson	vm_ooffset_t offset;
1061541Srgrimes{
1079456Sdg	vm_object_t object;
1081541Srgrimes	struct vnode *vp;
1091541Srgrimes
1101541Srgrimes	/*
1111541Srgrimes	 * Pageout to vnode, no can do yet.
1121541Srgrimes	 */
1131541Srgrimes	if (handle == NULL)
1141827Sdg		return (NULL);
1151541Srgrimes
1169411Sdg	vp = (struct vnode *) handle;
1179411Sdg
1181541Srgrimes	/*
1199411Sdg	 * Prevent race condition when allocating the object. This
1209411Sdg	 * can happen with NFS vnodes since the nfsnode isn't locked.
1211541Srgrimes	 */
1229411Sdg	while (vp->v_flag & VOLOCK) {
1239411Sdg		vp->v_flag |= VOWANT;
1249411Sdg		tsleep(vp, PVM, "vnpobj", 0);
1259411Sdg	}
1269411Sdg	vp->v_flag |= VOLOCK;
1279411Sdg
1289411Sdg	/*
1299411Sdg	 * If the object is being terminated, wait for it to
1309411Sdg	 * go away.
1319411Sdg	 */
1329507Sdg	while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) {
1339356Sdg		tsleep(object, PVM, "vadead", 0);
1349507Sdg	}
1355455Sdg
1369507Sdg	if (object == NULL) {
1371541Srgrimes		/*
1381541Srgrimes		 * And an object of the appropriate size
1391541Srgrimes		 */
14012767Sdyson		object = vm_object_allocate(OBJT_VNODE, size);
1419456Sdg		object->flags = OBJ_CANPERSIST;
1421827Sdg
1431541Srgrimes		/*
1449507Sdg		 * Hold a reference to the vnode and initialize object data.
1451541Srgrimes		 */
1461541Srgrimes		VREF(vp);
14712767Sdyson		object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE;
1481549Srgrimes
1499507Sdg		object->handle = handle;
1509507Sdg		vp->v_object = object;
1511541Srgrimes	} else {
1521541Srgrimes		/*
1539507Sdg		 * vm_object_reference() will remove the object from the cache if
1549507Sdg		 * found and gain a reference to the object.
1551541Srgrimes		 */
1569507Sdg		vm_object_reference(object);
1571541Srgrimes	}
1589411Sdg
1599411Sdg	if (vp->v_type == VREG)
1607695Sdg		vp->v_flag |= VVMIO;
1619411Sdg
1629411Sdg	vp->v_flag &= ~VOLOCK;
1639411Sdg	if (vp->v_flag & VOWANT) {
1649411Sdg		vp->v_flag &= ~VOWANT;
1659411Sdg		wakeup(vp);
1669411Sdg	}
1679507Sdg	return (object);
1681541Srgrimes}
1691541Srgrimes
1701549Srgrimesvoid
1719507Sdgvnode_pager_dealloc(object)
1729507Sdg	vm_object_t object;
1731541Srgrimes{
1749507Sdg	register struct vnode *vp = object->handle;
1751541Srgrimes
1769507Sdg	if (vp == NULL)
1779507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
1789507Sdg
1799507Sdg	if (object->paging_in_progress) {
1805455Sdg		int s = splbio();
1819507Sdg		while (object->paging_in_progress) {
1829507Sdg			object->flags |= OBJ_PIPWNT;
1839507Sdg			tsleep(object, PVM, "vnpdea", 0);
1845455Sdg		}
1855455Sdg		splx(s);
1861541Srgrimes	}
1871541Srgrimes
1889507Sdg	object->handle = NULL;
1891827Sdg
1909507Sdg	vp->v_object = NULL;
1919507Sdg	vp->v_flag &= ~(VTEXT | VVMIO);
1929507Sdg	vp->v_flag |= VAGE;
1939507Sdg	vrele(vp);
1941549Srgrimes}
1951541Srgrimes
1961549Srgrimesboolean_t
19712767Sdysonvnode_pager_haspage(object, pindex, before, after)
1989507Sdg	vm_object_t object;
19912767Sdyson	vm_pindex_t pindex;
2009507Sdg	int *before;
2019507Sdg	int *after;
2021541Srgrimes{
2039507Sdg	struct vnode *vp = object->handle;
2041541Srgrimes	daddr_t bn;
20512423Sphk	int err;
20610556Sdyson	daddr_t reqblock;
20711701Sdyson	int poff;
20811701Sdyson	int bsize;
20911701Sdyson	int pagesperblock;
2101541Srgrimes
2111541Srgrimes	/*
2125455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
2135455Sdg	 * not have the page.
2141541Srgrimes	 */
21512767Sdyson	if ((vp->v_mount == NULL) ||
21612767Sdyson		(IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
2174797Sdg		return FALSE;
2181541Srgrimes
21911576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
22010556Sdyson	pagesperblock = bsize / PAGE_SIZE;
22112767Sdyson	reqblock = pindex / pagesperblock;
22210556Sdyson	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
22310556Sdyson		after, before);
2248876Srgrimes	if (err)
2259507Sdg		return TRUE;
22610702Sdyson	if ( bn == -1)
22710576Sdyson		return FALSE;
22812767Sdyson	poff = pindex - (reqblock * pagesperblock);
22910556Sdyson	if (before) {
23010556Sdyson		*before *= pagesperblock;
23110556Sdyson		*before += poff;
23210556Sdyson	}
23310556Sdyson	if (after) {
23410669Sdyson		int numafter;
23510556Sdyson		*after *= pagesperblock;
23610669Sdyson		numafter = pagesperblock - (poff + 1);
23712767Sdyson		if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
23812767Sdyson			numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
23910669Sdyson		}
24010669Sdyson		*after += numafter;
24110556Sdyson	}
24210576Sdyson	return TRUE;
2431541Srgrimes}
2441541Srgrimes
2451541Srgrimes/*
2461541Srgrimes * Lets the VM system know about a change in size for a file.
2479507Sdg * We adjust our own internal size and flush any cached pages in
2481541Srgrimes * the associated object that are affected by the size change.
2491541Srgrimes *
2501541Srgrimes * Note: this routine may be invoked as a result of a pager put
2511541Srgrimes * operation (possibly at object termination time), so we must be careful.
2521541Srgrimes */
2531541Srgrimesvoid
2541541Srgrimesvnode_pager_setsize(vp, nsize)
2551541Srgrimes	struct vnode *vp;
25612767Sdyson	vm_ooffset_t nsize;
2571541Srgrimes{
2589507Sdg	vm_object_t object = vp->v_object;
2591541Srgrimes
2609507Sdg	if (object == NULL)
2611541Srgrimes		return;
2621827Sdg
2631541Srgrimes	/*
2641541Srgrimes	 * Hasn't changed size
2651541Srgrimes	 */
2669507Sdg	if (nsize == object->un_pager.vnp.vnp_size)
2673374Sdg		return;
2681827Sdg
2691541Srgrimes	/*
2701827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
2711541Srgrimes	 */
2729507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
27312767Sdyson		vm_ooffset_t nsizerounded;
27412767Sdyson		nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_SIZE - 1));
27512767Sdyson		if (nsizerounded < object->un_pager.vnp.vnp_size) {
2765455Sdg			vm_object_page_remove(object,
27712767Sdyson				OFF_TO_IDX(nsize + PAGE_SIZE - 1),
27812767Sdyson				OFF_TO_IDX(object->un_pager.vnp.vnp_size),
27912767Sdyson				FALSE);
2805455Sdg		}
2811827Sdg		/*
2821827Sdg		 * this gets rid of garbage at the end of a page that is now
2831827Sdg		 * only partially backed by the vnode...
2841827Sdg		 */
2851827Sdg		if (nsize & PAGE_MASK) {
2861827Sdg			vm_offset_t kva;
2871827Sdg			vm_page_t m;
2881827Sdg
28912767Sdyson			m = vm_page_lookup(object, OFF_TO_IDX(nsize));
2901827Sdg			if (m) {
2911827Sdg				kva = vm_pager_map_page(m);
2921827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
29312767Sdyson				    (int) (round_page(nsize) - nsize));
2941827Sdg				vm_pager_unmap_page(kva);
2951827Sdg			}
2961827Sdg		}
2971541Srgrimes	}
29812767Sdyson	object->un_pager.vnp.vnp_size = nsize;
29912767Sdyson	object->size = OFF_TO_IDX(nsize + PAGE_SIZE - 1);
3001541Srgrimes}
3011541Srgrimes
3021541Srgrimesvoid
3031541Srgrimesvnode_pager_umount(mp)
3041541Srgrimes	register struct mount *mp;
3051541Srgrimes{
3069507Sdg	struct vnode *vp, *nvp;
3071541Srgrimes
3089507Sdgloop:
3099507Sdg	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
3101541Srgrimes		/*
3119507Sdg		 * Vnode can be reclaimed by getnewvnode() while we
3129507Sdg		 * traverse the list.
3139507Sdg		 */
3149507Sdg		if (vp->v_mount != mp)
3159507Sdg			goto loop;
3169507Sdg
3179507Sdg		/*
3181827Sdg		 * Save the next pointer now since uncaching may terminate the
3199507Sdg		 * object and render vnode invalid
3201541Srgrimes		 */
3219507Sdg		nvp = vp->v_mntvnodes.le_next;
3229507Sdg
3239507Sdg		if (vp->v_object != NULL) {
3247162Sdg			VOP_LOCK(vp);
3259507Sdg			vnode_pager_uncache(vp);
3267162Sdg			VOP_UNLOCK(vp);
3277162Sdg		}
3281541Srgrimes	}
3291541Srgrimes}
3301541Srgrimes
3311541Srgrimes/*
3321541Srgrimes * Remove vnode associated object from the object cache.
3337162Sdg * This routine must be called with the vnode locked.
3341541Srgrimes *
3357162Sdg * XXX unlock the vnode.
3367162Sdg * We must do this since uncaching the object may result in its
3377162Sdg * destruction which may initiate paging activity which may necessitate
3387162Sdg * re-locking the vnode.
3391549Srgrimes */
3409507Sdgvoid
3411549Srgrimesvnode_pager_uncache(vp)
3429507Sdg	struct vnode *vp;
3431549Srgrimes{
3449507Sdg	vm_object_t object;
3451549Srgrimes
3461549Srgrimes	/*
3471549Srgrimes	 * Not a mapped vnode
3481549Srgrimes	 */
3499356Sdg	object = vp->v_object;
3505455Sdg	if (object == NULL)
3519507Sdg		return;
3525455Sdg
3539507Sdg	vm_object_reference(object);
3549507Sdg	VOP_UNLOCK(vp);
3559507Sdg	pager_cache(object, FALSE);
3569507Sdg	VOP_LOCK(vp);
3579507Sdg	return;
3581549Srgrimes}
3591541Srgrimes
3601541Srgrimes
3611549Srgrimesvoid
3621549Srgrimesvnode_pager_freepage(m)
3631549Srgrimes	vm_page_t m;
3641541Srgrimes{
3651549Srgrimes	PAGE_WAKEUP(m);
3661549Srgrimes	vm_page_free(m);
3671549Srgrimes}
3681549Srgrimes
3691549Srgrimes/*
3701549Srgrimes * calculate the linear (byte) disk address of specified virtual
3711549Srgrimes * file address
3721549Srgrimes */
3731549Srgrimesvm_offset_t
3746151Sdgvnode_pager_addr(vp, address, run)
3751549Srgrimes	struct vnode *vp;
37612767Sdyson	vm_ooffset_t address;
3776151Sdg	int *run;
3781549Srgrimes{
3795455Sdg	int rtaddress;
3805455Sdg	int bsize;
38112767Sdyson	daddr_t block;
3821549Srgrimes	struct vnode *rtvp;
3835455Sdg	int err;
38412767Sdyson	daddr_t vblock;
38512767Sdyson	int voffset;
3861549Srgrimes
3875455Sdg	if ((int) address < 0)
3885455Sdg		return -1;
3895455Sdg
39011701Sdyson	if (vp->v_mount == NULL)
39111701Sdyson		return -1;
39211701Sdyson
3931549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
3941549Srgrimes	vblock = address / bsize;
3951549Srgrimes	voffset = address % bsize;
3961549Srgrimes
39710551Sdyson	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
3981549Srgrimes
3996151Sdg	if (err || (block == -1))
4001549Srgrimes		rtaddress = -1;
4016151Sdg	else {
4026626Sdg		rtaddress = block + voffset / DEV_BSIZE;
4036151Sdg		if( run) {
4046151Sdg			*run += 1;
4056151Sdg			*run *= bsize/PAGE_SIZE;
4066151Sdg			*run -= voffset/PAGE_SIZE;
4076151Sdg		}
4086151Sdg	}
4091549Srgrimes
4101549Srgrimes	return rtaddress;
4111549Srgrimes}
4121549Srgrimes
4131549Srgrimes/*
4141549Srgrimes * interrupt routine for I/O completion
4151549Srgrimes */
4161549Srgrimesvoid
4171549Srgrimesvnode_pager_iodone(bp)
4181549Srgrimes	struct buf *bp;
4191549Srgrimes{
4201549Srgrimes	bp->b_flags |= B_DONE;
4219507Sdg	wakeup(bp);
4221549Srgrimes}
4231549Srgrimes
4241549Srgrimes/*
4251549Srgrimes * small block file system vnode pager input
4261549Srgrimes */
4271549Srgrimesint
4289507Sdgvnode_pager_input_smlfs(object, m)
4299507Sdg	vm_object_t object;
4301549Srgrimes	vm_page_t m;
4311549Srgrimes{
4325455Sdg	int i;
4335455Sdg	int s;
4341549Srgrimes	struct vnode *dp, *vp;
4351549Srgrimes	struct buf *bp;
4361549Srgrimes	vm_offset_t kva;
4375455Sdg	int fileaddr;
4381549Srgrimes	vm_offset_t bsize;
4395455Sdg	int error = 0;
4401549Srgrimes
4419507Sdg	vp = object->handle;
44211701Sdyson	if (vp->v_mount == NULL)
44311701Sdyson		return VM_PAGER_BAD;
44411701Sdyson
4451549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4461549Srgrimes
4477178Sdg
44810551Sdyson	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
4491549Srgrimes
4501549Srgrimes	kva = vm_pager_map_page(m);
4511549Srgrimes
4521827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
4531827Sdg
45412767Sdyson		if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid))
4555455Sdg			continue;
4561549Srgrimes
45712767Sdyson		fileaddr = vnode_pager_addr(vp,
45812767Sdyson			IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
4591827Sdg		if (fileaddr != -1) {
4601549Srgrimes			bp = getpbuf();
4611549Srgrimes
4621827Sdg			/* build a minimal buffer header */
4631549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
4641549Srgrimes			bp->b_iodone = vnode_pager_iodone;
4651549Srgrimes			bp->b_proc = curproc;
4661549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
4671827Sdg			if (bp->b_rcred != NOCRED)
4681549Srgrimes				crhold(bp->b_rcred);
4691827Sdg			if (bp->b_wcred != NOCRED)
4701549Srgrimes				crhold(bp->b_wcred);
4711549Srgrimes			bp->b_un.b_addr = (caddr_t) kva + i * bsize;
4726626Sdg			bp->b_blkno = fileaddr;
4735455Sdg			pbgetvp(dp, bp);
4741549Srgrimes			bp->b_bcount = bsize;
4751549Srgrimes			bp->b_bufsize = bsize;
4761827Sdg
4771827Sdg			/* do the input */
4781549Srgrimes			VOP_STRATEGY(bp);
4791549Srgrimes
4801827Sdg			/* we definitely need to be at splbio here */
4811549Srgrimes
4821549Srgrimes			s = splbio();
4831549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
4849356Sdg				tsleep(bp, PVM, "vnsrd", 0);
4851549Srgrimes			}
4861549Srgrimes			splx(s);
4871549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
4881549Srgrimes				error = EIO;
4891549Srgrimes
4901827Sdg			/*
4911827Sdg			 * free the buffer header back to the swap buffer pool
4921827Sdg			 */
4931549Srgrimes			relpbuf(bp);
4941827Sdg			if (error)
4951549Srgrimes				break;
4965455Sdg
49710556Sdyson			vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
4981549Srgrimes		} else {
49910669Sdyson			vm_page_set_validclean(m, (i * bsize) & (PAGE_SIZE-1), bsize);
5001549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
5011549Srgrimes		}
5021549Srgrimes	}
5031549Srgrimes	vm_pager_unmap_page(kva);
5045455Sdg	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
50510669Sdyson	m->flags &= ~PG_ZERO;
5061827Sdg	if (error) {
5074207Sdg		return VM_PAGER_ERROR;
5081549Srgrimes	}
5091549Srgrimes	return VM_PAGER_OK;
5101549Srgrimes
5111549Srgrimes}
5121549Srgrimes
5131549Srgrimes
5141549Srgrimes/*
5151549Srgrimes * old style vnode pager output routine
5161549Srgrimes */
5171549Srgrimesint
5189507Sdgvnode_pager_input_old(object, m)
5199507Sdg	vm_object_t object;
5201549Srgrimes	vm_page_t m;
5211549Srgrimes{
5221541Srgrimes	struct uio auio;
5231541Srgrimes	struct iovec aiov;
5245455Sdg	int error;
5255455Sdg	int size;
5261549Srgrimes	vm_offset_t kva;
5271549Srgrimes
5281549Srgrimes	error = 0;
5291827Sdg
5301549Srgrimes	/*
5311549Srgrimes	 * Return failure if beyond current EOF
5321549Srgrimes	 */
53312767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
5341549Srgrimes		return VM_PAGER_BAD;
5351549Srgrimes	} else {
5361549Srgrimes		size = PAGE_SIZE;
53712767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
53812767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
5397178Sdg
5405455Sdg		/*
5415455Sdg		 * Allocate a kernel virtual address and initialize so that
5425455Sdg		 * we can use VOP_READ/WRITE routines.
5435455Sdg		 */
5441549Srgrimes		kva = vm_pager_map_page(m);
5457178Sdg
5461827Sdg		aiov.iov_base = (caddr_t) kva;
5471549Srgrimes		aiov.iov_len = size;
5481549Srgrimes		auio.uio_iov = &aiov;
5491549Srgrimes		auio.uio_iovcnt = 1;
55012767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
5511549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
5521549Srgrimes		auio.uio_rw = UIO_READ;
5531549Srgrimes		auio.uio_resid = size;
5541827Sdg		auio.uio_procp = (struct proc *) 0;
5551549Srgrimes
5569507Sdg		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
5571549Srgrimes		if (!error) {
5581549Srgrimes			register int count = size - auio.uio_resid;
5591549Srgrimes
5601549Srgrimes			if (count == 0)
5611549Srgrimes				error = EINVAL;
5621549Srgrimes			else if (count != PAGE_SIZE)
5631827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
5641549Srgrimes		}
5651549Srgrimes		vm_pager_unmap_page(kva);
5661549Srgrimes	}
5671549Srgrimes	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
5685455Sdg	m->dirty = 0;
56910669Sdyson	m->flags &= ~PG_ZERO;
5704207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
5711549Srgrimes}
5721549Srgrimes
5731549Srgrimes/*
5741549Srgrimes * generic vnode pager input routine
5751549Srgrimes */
57610556Sdyson
5771549Srgrimesint
5789507Sdgvnode_pager_getpages(object, m, count, reqpage)
5799507Sdg	vm_object_t object;
5801549Srgrimes	vm_page_t *m;
5819507Sdg	int count;
5829507Sdg	int reqpage;
5831549Srgrimes{
58410556Sdyson	int rtval;
58510556Sdyson	struct vnode *vp;
58610556Sdyson	vp = object->handle;
58711701Sdyson	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
58810556Sdyson	if (rtval == EOPNOTSUPP)
58911943Sbde		return vnode_pager_leaf_getpages(object, m, count, reqpage);
59010556Sdyson	else
59110556Sdyson		return rtval;
59210556Sdyson}
59310556Sdyson
59410556Sdysonstatic int
59510556Sdysonvnode_pager_leaf_getpages(object, m, count, reqpage)
59610556Sdyson	vm_object_t object;
59710556Sdyson	vm_page_t *m;
59810556Sdyson	int count;
59910556Sdyson	int reqpage;
60010556Sdyson{
60112767Sdyson	vm_offset_t kva;
60212767Sdyson	off_t foff;
6039507Sdg	int i, size, bsize, first, firstaddr;
6041549Srgrimes	struct vnode *dp, *vp;
6056151Sdg	int runpg;
6066151Sdg	int runend;
6077178Sdg	struct buf *bp;
6085455Sdg	int s;
6095455Sdg	int error = 0;
6101549Srgrimes
6119507Sdg	vp = object->handle;
61211701Sdyson	if (vp->v_mount == NULL)
61311701Sdyson		return VM_PAGER_BAD;
61411701Sdyson
6151549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
6161549Srgrimes
6171549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
6181827Sdg
6191549Srgrimes	/*
6201827Sdg	 * originally, we did not check for an error return value -- assuming
6211827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
6221549Srgrimes	 */
62312767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
6241827Sdg
6251549Srgrimes	/*
6261887Sdg	 * if we can't bmap, use old VOP code
6271549Srgrimes	 */
62810551Sdyson	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
6291549Srgrimes		for (i = 0; i < count; i++) {
6301549Srgrimes			if (i != reqpage) {
6311549Srgrimes				vnode_pager_freepage(m[i]);
6321549Srgrimes			}
6331549Srgrimes		}
6343612Sdg		cnt.v_vnodein++;
6353612Sdg		cnt.v_vnodepgsin++;
6369507Sdg		return vnode_pager_input_old(object, m[reqpage]);
6371549Srgrimes
6381827Sdg		/*
6391827Sdg		 * if the blocksize is smaller than a page size, then use
6401827Sdg		 * special small filesystem code.  NFS sometimes has a small
6411827Sdg		 * blocksize, but it can handle large reads itself.
6421827Sdg		 */
6431827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
6445455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
6451827Sdg
6461549Srgrimes		for (i = 0; i < count; i++) {
6471549Srgrimes			if (i != reqpage) {
6481549Srgrimes				vnode_pager_freepage(m[i]);
6491549Srgrimes			}
6501549Srgrimes		}
6513612Sdg		cnt.v_vnodein++;
6523612Sdg		cnt.v_vnodepgsin++;
6539507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
6541549Srgrimes	}
6551549Srgrimes	/*
6565455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
6575455Sdg	 * then, the entire page is valid --
6581549Srgrimes	 */
6595455Sdg	if (m[reqpage]->valid) {
6605455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
6615455Sdg		for (i = 0; i < count; i++) {
6625455Sdg			if (i != reqpage)
6635455Sdg				vnode_pager_freepage(m[i]);
6641549Srgrimes		}
6655455Sdg		return VM_PAGER_OK;
6661549Srgrimes	}
6677178Sdg
6685455Sdg	/*
6695455Sdg	 * here on direct device I/O
6705455Sdg	 */
6711549Srgrimes
6726151Sdg	firstaddr = -1;
6731549Srgrimes	/*
6746151Sdg	 * calculate the run that includes the required page
6751549Srgrimes	 */
6766151Sdg	for(first = 0, i = 0; i < count; i = runend) {
67712767Sdyson		firstaddr = vnode_pager_addr(vp,
67812767Sdyson			IDX_TO_OFF(m[i]->pindex), &runpg);
6796151Sdg		if (firstaddr == -1) {
6809507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
6819507Sdg				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
6829507Sdg			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
6836151Sdg			}
6841549Srgrimes			vnode_pager_freepage(m[i]);
6856151Sdg			runend = i + 1;
6866151Sdg			first = runend;
6876151Sdg			continue;
6881549Srgrimes		}
6896151Sdg		runend = i + runpg;
6909507Sdg		if (runend <= reqpage) {
6916151Sdg			int j;
6929507Sdg			for (j = i; j < runend; j++) {
6936151Sdg				vnode_pager_freepage(m[j]);
6946151Sdg			}
6951549Srgrimes		} else {
6969507Sdg			if (runpg < (count - first)) {
6979507Sdg				for (i = first + runpg; i < count; i++)
6986151Sdg					vnode_pager_freepage(m[i]);
6996151Sdg				count = first + runpg;
7006151Sdg			}
7016151Sdg			break;
7021549Srgrimes		}
7036151Sdg		first = runend;
7041549Srgrimes	}
7051549Srgrimes
7061549Srgrimes	/*
7071827Sdg	 * the first and last page have been calculated now, move input pages
7081827Sdg	 * to be zero based...
7091549Srgrimes	 */
7101549Srgrimes	if (first != 0) {
7111549Srgrimes		for (i = first; i < count; i++) {
7121549Srgrimes			m[i - first] = m[i];
7131549Srgrimes		}
7141549Srgrimes		count -= first;
7151549Srgrimes		reqpage -= first;
7161549Srgrimes	}
7176151Sdg
7181549Srgrimes	/*
7191549Srgrimes	 * calculate the file virtual address for the transfer
7201549Srgrimes	 */
72112767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
7221827Sdg
7231549Srgrimes	/*
7241549Srgrimes	 * calculate the size of the transfer
7251549Srgrimes	 */
7261549Srgrimes	size = count * PAGE_SIZE;
7279507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
7289507Sdg		size = object->un_pager.vnp.vnp_size - foff;
7291549Srgrimes
7301549Srgrimes	/*
7311549Srgrimes	 * round up physical size for real devices
7321549Srgrimes	 */
7331827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
7341549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
7351549Srgrimes
7365841Sdg	bp = getpbuf();
7375455Sdg	kva = (vm_offset_t) bp->b_data;
7381887Sdg
7391549Srgrimes	/*
7401549Srgrimes	 * and map the pages to be read into the kva
7411549Srgrimes	 */
7421887Sdg	pmap_qenter(kva, m, count);
7431549Srgrimes
7441549Srgrimes	/* build a minimal buffer header */
7451549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
7461549Srgrimes	bp->b_iodone = vnode_pager_iodone;
7471549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
7481549Srgrimes	bp->b_proc = curproc;
7491549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
7501827Sdg	if (bp->b_rcred != NOCRED)
7511549Srgrimes		crhold(bp->b_rcred);
7521827Sdg	if (bp->b_wcred != NOCRED)
7531549Srgrimes		crhold(bp->b_wcred);
7546626Sdg	bp->b_blkno = firstaddr;
7555455Sdg	pbgetvp(dp, bp);
7561549Srgrimes	bp->b_bcount = size;
7571549Srgrimes	bp->b_bufsize = size;
7581549Srgrimes
7593612Sdg	cnt.v_vnodein++;
7603612Sdg	cnt.v_vnodepgsin += count;
7613612Sdg
7621549Srgrimes	/* do the input */
7631549Srgrimes	VOP_STRATEGY(bp);
7643612Sdg
7651549Srgrimes	s = splbio();
7661549Srgrimes	/* we definitely need to be at splbio here */
7671549Srgrimes
7681549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
7699356Sdg		tsleep(bp, PVM, "vnread", 0);
7701549Srgrimes	}
7711549Srgrimes	splx(s);
7721549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
7731549Srgrimes		error = EIO;
7741549Srgrimes
7751549Srgrimes	if (!error) {
7761549Srgrimes		if (size != count * PAGE_SIZE)
7771827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
7781549Srgrimes	}
7795455Sdg	pmap_qremove(kva, count);
7801549Srgrimes
7811549Srgrimes	/*
7821549Srgrimes	 * free the buffer header back to the swap buffer pool
7831549Srgrimes	 */
7841549Srgrimes	relpbuf(bp);
7851549Srgrimes
7861549Srgrimes	for (i = 0; i < count; i++) {
7872386Sdg		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
7885455Sdg		m[i]->dirty = 0;
7895455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
79010669Sdyson		m[i]->flags &= ~PG_ZERO;
7911549Srgrimes		if (i != reqpage) {
7921827Sdg
7931549Srgrimes			/*
7941827Sdg			 * whether or not to leave the page activated is up in
7951827Sdg			 * the air, but we should put the page on a page queue
7961827Sdg			 * somewhere. (it already is in the object). Result:
7971827Sdg			 * It appears that emperical results show that
7981827Sdg			 * deactivating pages is best.
7991549Srgrimes			 */
8001827Sdg
8011549Srgrimes			/*
8021827Sdg			 * just in case someone was asking for this page we
8031827Sdg			 * now tell them that it is ok to use
8041549Srgrimes			 */
8051549Srgrimes			if (!error) {
8065841Sdg				vm_page_deactivate(m[i]);
8071549Srgrimes				PAGE_WAKEUP(m[i]);
8081549Srgrimes			} else {
8091549Srgrimes				vnode_pager_freepage(m[i]);
8101549Srgrimes			}
8111549Srgrimes		}
8121549Srgrimes	}
8131549Srgrimes	if (error) {
8149507Sdg		printf("vnode_pager_getpages: I/O read error\n");
8151549Srgrimes	}
8164207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
8171549Srgrimes}
8181549Srgrimes
81910556Sdysonint
82010556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
82110556Sdyson	vm_object_t object;
82210556Sdyson	vm_page_t *m;
82310556Sdyson	int count;
82410556Sdyson	boolean_t sync;
82510556Sdyson	int *rtvals;
82610556Sdyson{
82710556Sdyson	int rtval;
82810556Sdyson	struct vnode *vp;
82910556Sdyson	vp = object->handle;
83011701Sdyson	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
83110556Sdyson	if (rtval == EOPNOTSUPP)
83211943Sbde		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
83310556Sdyson	else
83410556Sdyson		return rtval;
83510556Sdyson}
83610556Sdyson
8371549Srgrimes/*
8381549Srgrimes * generic vnode pager output routine
8391549Srgrimes */
84010556Sdysonstatic int
84110556Sdysonvnode_pager_leaf_putpages(object, m, count, sync, rtvals)
8429507Sdg	vm_object_t object;
8431549Srgrimes	vm_page_t *m;
8445455Sdg	int count;
8459507Sdg	boolean_t sync;
8465455Sdg	int *rtvals;
8471549Srgrimes{
8487695Sdg	int i;
8491549Srgrimes
8507695Sdg	struct vnode *vp;
8517695Sdg	int maxsize, ncount;
85212767Sdyson	vm_ooffset_t poffset;
8537695Sdg	struct uio auio;
8547695Sdg	struct iovec aiov;
8557695Sdg	int error;
8561549Srgrimes
8579507Sdg	vp = object->handle;;
8581827Sdg	for (i = 0; i < count; i++)
8591549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
8601549Srgrimes
86112767Sdyson	if ((int) m[0]->pindex < 0) {
86212767Sdyson		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty);
8637695Sdg		rtvals[0] = VM_PAGER_BAD;
8647695Sdg		return VM_PAGER_BAD;
8655455Sdg	}
8667178Sdg
8677695Sdg	maxsize = count * PAGE_SIZE;
8687695Sdg	ncount = count;
8691549Srgrimes
87012767Sdyson	poffset = IDX_TO_OFF(m[0]->pindex);
87112767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
87212767Sdyson		if (object->un_pager.vnp.vnp_size > poffset)
87312767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
8748585Sdg		else
8758585Sdg			maxsize = 0;
8767695Sdg		ncount = (maxsize + PAGE_SIZE - 1) / PAGE_SIZE;
8778585Sdg		if (ncount < count) {
8788585Sdg			for (i = ncount; i < count; i++) {
8797695Sdg				rtvals[i] = VM_PAGER_BAD;
8801549Srgrimes			}
88112767Sdyson#ifdef BOGUS
8828585Sdg			if (ncount == 0) {
88312767Sdyson				printf("vnode_pager_putpages: write past end of file: %d, %lu\n",
88412767Sdyson					poffset,
88512767Sdyson					(unsigned long) object->un_pager.vnp.vnp_size);
8867695Sdg				return rtvals[0];
8877695Sdg			}
88812767Sdyson#endif
8891549Srgrimes		}
8901541Srgrimes	}
8917695Sdg
8928585Sdg	for (i = 0; i < count; i++) {
8938585Sdg		m[i]->busy++;
8947695Sdg		m[i]->flags &= ~PG_BUSY;
8951549Srgrimes	}
8961827Sdg
8977695Sdg	aiov.iov_base = (caddr_t) 0;
8987695Sdg	aiov.iov_len = maxsize;
8997695Sdg	auio.uio_iov = &aiov;
9007695Sdg	auio.uio_iovcnt = 1;
90112767Sdyson	auio.uio_offset = poffset;
9027695Sdg	auio.uio_segflg = UIO_NOCOPY;
9037695Sdg	auio.uio_rw = UIO_WRITE;
9047695Sdg	auio.uio_resid = maxsize;
9057695Sdg	auio.uio_procp = (struct proc *) 0;
90612767Sdyson	error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred);
9073612Sdg	cnt.v_vnodeout++;
9087695Sdg	cnt.v_vnodepgsout += ncount;
9093612Sdg
9108585Sdg	if (error) {
9119507Sdg		printf("vnode_pager_putpages: I/O error %d\n", error);
9127695Sdg	}
9138585Sdg	if (auio.uio_resid) {
91412767Sdyson		printf("vnode_pager_putpages: residual I/O %d at %d\n",
91512767Sdyson			auio.uio_resid, m[0]->pindex);
9167695Sdg	}
9178585Sdg	for (i = 0; i < count; i++) {
9188585Sdg		m[i]->busy--;
9198585Sdg		if (i < ncount) {
9207695Sdg			rtvals[i] = VM_PAGER_OK;
9217695Sdg		}
9228585Sdg		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
9239507Sdg			wakeup(m[i]);
9247695Sdg	}
9257695Sdg	return rtvals[0];
9267695Sdg}
9271549Srgrimes
9287695Sdgstruct vnode *
9299507Sdgvnode_pager_lock(object)
9309507Sdg	vm_object_t object;
9319507Sdg{
9329507Sdg	for (; object != NULL; object = object->backing_object) {
9339507Sdg		if (object->type != OBJT_VNODE)
9347695Sdg			continue;
9351549Srgrimes
9369507Sdg		VOP_LOCK(object->handle);
9379507Sdg		return object->handle;
9381549Srgrimes	}
9399507Sdg	return NULL;
9407695Sdg}
941