vnode_pager.c revision 31853
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1990 University of Utah.
31549Srgrimes * Copyright (c) 1991 The Regents of the University of California.
41549Srgrimes * All rights reserved.
59507Sdg * Copyright (c) 1993, 1994 John S. Dyson
69507Sdg * Copyright (c) 1995, David Greenman
71541Srgrimes *
81541Srgrimes * This code is derived from software contributed to Berkeley by
91541Srgrimes * the Systems Programming Group of the University of Utah Computer
101541Srgrimes * Science Department.
111541Srgrimes *
121541Srgrimes * Redistribution and use in source and binary forms, with or without
131541Srgrimes * modification, are permitted provided that the following conditions
141541Srgrimes * are met:
151541Srgrimes * 1. Redistributions of source code must retain the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer.
171541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
181541Srgrimes *    notice, this list of conditions and the following disclaimer in the
191541Srgrimes *    documentation and/or other materials provided with the distribution.
201541Srgrimes * 3. All advertising materials mentioning features or use of this software
211541Srgrimes *    must display the following acknowledgement:
221541Srgrimes *	This product includes software developed by the University of
231541Srgrimes *	California, Berkeley and its contributors.
241541Srgrimes * 4. Neither the name of the University nor the names of its contributors
251541Srgrimes *    may be used to endorse or promote products derived from this software
261541Srgrimes *    without specific prior written permission.
271541Srgrimes *
281541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
291541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
301541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
311541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
321541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
331541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
341541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
351541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
361541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
371541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
381541Srgrimes * SUCH DAMAGE.
391541Srgrimes *
401549Srgrimes *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
4131853Sdyson *	$Id: vnode_pager.c,v 1.76 1997/12/02 21:07:20 phk Exp $
421541Srgrimes */
431541Srgrimes
441541Srgrimes/*
451541Srgrimes * Page to/from files (vnodes).
461541Srgrimes */
471541Srgrimes
481549Srgrimes/*
491549Srgrimes * TODO:
509507Sdg *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
517695Sdg *	greatly re-simplify the vnode_pager.
521549Srgrimes */
531549Srgrimes
541541Srgrimes#include <sys/param.h>
551541Srgrimes#include <sys/systm.h>
561541Srgrimes#include <sys/proc.h>
571541Srgrimes#include <sys/vnode.h>
581541Srgrimes#include <sys/mount.h>
599507Sdg#include <sys/buf.h>
6012662Sdg#include <sys/vmmeter.h>
611541Srgrimes
621541Srgrimes#include <vm/vm.h>
6312662Sdg#include <vm/vm_prot.h>
6412662Sdg#include <vm/vm_object.h>
651541Srgrimes#include <vm/vm_page.h>
669507Sdg#include <vm/vm_pager.h>
6731853Sdyson#include <vm/vm_map.h>
681541Srgrimes#include <vm/vnode_pager.h>
6912662Sdg#include <vm/vm_extern.h>
701541Srgrimes
7112820Sphkstatic vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
7211943Sbde					 int *run));
7312820Sphkstatic void vnode_pager_iodone __P((struct buf *bp));
7412820Sphkstatic int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
7512820Sphkstatic int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
7612820Sphkstatic void vnode_pager_dealloc __P((vm_object_t));
7712820Sphkstatic int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
7812820Sphkstatic int vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
7912820Sphkstatic boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
8011943Sbde
811541Srgrimesstruct pagerops vnodepagerops = {
829507Sdg	NULL,
831541Srgrimes	vnode_pager_alloc,
841541Srgrimes	vnode_pager_dealloc,
859507Sdg	vnode_pager_getpages,
869507Sdg	vnode_pager_putpages,
879507Sdg	vnode_pager_haspage,
889507Sdg	NULL
891541Srgrimes};
901541Srgrimes
9111943Sbdestatic int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
9211943Sbde					  int count, int reqpage));
9311943Sbdestatic int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
9411943Sbde					  int count, boolean_t sync,
9511943Sbde					  int *rtvals));
9610556Sdyson
971541Srgrimes/*
981541Srgrimes * Allocate (or lookup) pager for a vnode.
991541Srgrimes * Handle is a vnode pointer.
1001541Srgrimes */
1019507Sdgvm_object_t
10228751Sbdevnode_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot,
10328751Sbde		  vm_ooffset_t offset)
1041541Srgrimes{
1059456Sdg	vm_object_t object;
1061541Srgrimes	struct vnode *vp;
1071541Srgrimes
1081541Srgrimes	/*
1091541Srgrimes	 * Pageout to vnode, no can do yet.
1101541Srgrimes	 */
1111541Srgrimes	if (handle == NULL)
1121827Sdg		return (NULL);
1131541Srgrimes
1149411Sdg	vp = (struct vnode *) handle;
1159411Sdg
1161541Srgrimes	/*
1179411Sdg	 * Prevent race condition when allocating the object. This
1189411Sdg	 * can happen with NFS vnodes since the nfsnode isn't locked.
1191541Srgrimes	 */
1209411Sdg	while (vp->v_flag & VOLOCK) {
1219411Sdg		vp->v_flag |= VOWANT;
1229411Sdg		tsleep(vp, PVM, "vnpobj", 0);
1239411Sdg	}
1249411Sdg	vp->v_flag |= VOLOCK;
1259411Sdg
1269411Sdg	/*
1279411Sdg	 * If the object is being terminated, wait for it to
1289411Sdg	 * go away.
1299411Sdg	 */
13013490Sdyson	while (((object = vp->v_object) != NULL) &&
13113490Sdyson		(object->flags & OBJ_DEAD)) {
1329356Sdg		tsleep(object, PVM, "vadead", 0);
1339507Sdg	}
1345455Sdg
1359507Sdg	if (object == NULL) {
1361541Srgrimes		/*
1371541Srgrimes		 * And an object of the appropriate size
1381541Srgrimes		 */
13912767Sdyson		object = vm_object_allocate(OBJT_VNODE, size);
14014693Sdyson		if (vp->v_type == VREG)
14114693Sdyson			object->flags = OBJ_CANPERSIST;
14214693Sdyson		else
14314693Sdyson			object->flags = 0;
1441827Sdg
14521987Sdg		if (vp->v_usecount == 0)
14621987Sdg			panic("vnode_pager_alloc: no vnode reference");
1471541Srgrimes		/*
1489507Sdg		 * Hold a reference to the vnode and initialize object data.
1491541Srgrimes		 */
15017761Sdyson		vp->v_usecount++;
15112767Sdyson		object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE;
1521549Srgrimes
1539507Sdg		object->handle = handle;
1549507Sdg		vp->v_object = object;
1551541Srgrimes	} else {
1561541Srgrimes		/*
1579507Sdg		 * vm_object_reference() will remove the object from the cache if
1589507Sdg		 * found and gain a reference to the object.
1591541Srgrimes		 */
1609507Sdg		vm_object_reference(object);
1611541Srgrimes	}
1629411Sdg
1639411Sdg	if (vp->v_type == VREG)
1647695Sdg		vp->v_flag |= VVMIO;
1659411Sdg
1669411Sdg	vp->v_flag &= ~VOLOCK;
1679411Sdg	if (vp->v_flag & VOWANT) {
1689411Sdg		vp->v_flag &= ~VOWANT;
1699411Sdg		wakeup(vp);
1709411Sdg	}
1719507Sdg	return (object);
1721541Srgrimes}
1731541Srgrimes
17412820Sphkstatic void
1759507Sdgvnode_pager_dealloc(object)
1769507Sdg	vm_object_t object;
1771541Srgrimes{
1789507Sdg	register struct vnode *vp = object->handle;
1791541Srgrimes
1809507Sdg	if (vp == NULL)
1819507Sdg		panic("vnode_pager_dealloc: pager already dealloced");
1829507Sdg
1839507Sdg	if (object->paging_in_progress) {
1845455Sdg		int s = splbio();
1859507Sdg		while (object->paging_in_progress) {
1869507Sdg			object->flags |= OBJ_PIPWNT;
1879507Sdg			tsleep(object, PVM, "vnpdea", 0);
1885455Sdg		}
1895455Sdg		splx(s);
1901541Srgrimes	}
1911541Srgrimes
1929507Sdg	object->handle = NULL;
1931827Sdg
1949507Sdg	vp->v_object = NULL;
1959507Sdg	vp->v_flag &= ~(VTEXT | VVMIO);
1969507Sdg	vrele(vp);
1971549Srgrimes}
1981541Srgrimes
19912820Sphkstatic boolean_t
20012767Sdysonvnode_pager_haspage(object, pindex, before, after)
2019507Sdg	vm_object_t object;
20212767Sdyson	vm_pindex_t pindex;
2039507Sdg	int *before;
2049507Sdg	int *after;
2051541Srgrimes{
2069507Sdg	struct vnode *vp = object->handle;
2071541Srgrimes	daddr_t bn;
20812423Sphk	int err;
20910556Sdyson	daddr_t reqblock;
21011701Sdyson	int poff;
21111701Sdyson	int bsize;
21212914Sdyson	int pagesperblock, blocksperpage;
2131541Srgrimes
2141541Srgrimes	/*
2155455Sdg	 * If filesystem no longer mounted or offset beyond end of file we do
2165455Sdg	 * not have the page.
2171541Srgrimes	 */
21812767Sdyson	if ((vp->v_mount == NULL) ||
21912767Sdyson		(IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
2204797Sdg		return FALSE;
2211541Srgrimes
22211576Sdg	bsize = vp->v_mount->mnt_stat.f_iosize;
22310556Sdyson	pagesperblock = bsize / PAGE_SIZE;
22412914Sdyson	blocksperpage = 0;
22512914Sdyson	if (pagesperblock > 0) {
22612914Sdyson		reqblock = pindex / pagesperblock;
22712914Sdyson	} else {
22812914Sdyson		blocksperpage = (PAGE_SIZE / bsize);
22912914Sdyson		reqblock = pindex * blocksperpage;
23012914Sdyson	}
23110556Sdyson	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
23210556Sdyson		after, before);
2338876Srgrimes	if (err)
2349507Sdg		return TRUE;
23510702Sdyson	if ( bn == -1)
23610576Sdyson		return FALSE;
23712914Sdyson	if (pagesperblock > 0) {
23812914Sdyson		poff = pindex - (reqblock * pagesperblock);
23912914Sdyson		if (before) {
24012914Sdyson			*before *= pagesperblock;
24112914Sdyson			*before += poff;
24210669Sdyson		}
24312914Sdyson		if (after) {
24412914Sdyson			int numafter;
24512914Sdyson			*after *= pagesperblock;
24612914Sdyson			numafter = pagesperblock - (poff + 1);
24712914Sdyson			if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
24812914Sdyson				numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
24912914Sdyson			}
25012914Sdyson			*after += numafter;
25112914Sdyson		}
25212914Sdyson	} else {
25312914Sdyson		if (before) {
25412914Sdyson			*before /= blocksperpage;
25512914Sdyson		}
25612914Sdyson
25712914Sdyson		if (after) {
25812914Sdyson			*after /= blocksperpage;
25912914Sdyson		}
26010556Sdyson	}
26110576Sdyson	return TRUE;
2621541Srgrimes}
2631541Srgrimes
2641541Srgrimes/*
2651541Srgrimes * Lets the VM system know about a change in size for a file.
2669507Sdg * We adjust our own internal size and flush any cached pages in
2671541Srgrimes * the associated object that are affected by the size change.
2681541Srgrimes *
2691541Srgrimes * Note: this routine may be invoked as a result of a pager put
2701541Srgrimes * operation (possibly at object termination time), so we must be careful.
2711541Srgrimes */
2721541Srgrimesvoid
2731541Srgrimesvnode_pager_setsize(vp, nsize)
2741541Srgrimes	struct vnode *vp;
27512767Sdyson	vm_ooffset_t nsize;
2761541Srgrimes{
2779507Sdg	vm_object_t object = vp->v_object;
2781541Srgrimes
2799507Sdg	if (object == NULL)
2801541Srgrimes		return;
2811827Sdg
2821541Srgrimes	/*
2831541Srgrimes	 * Hasn't changed size
2841541Srgrimes	 */
2859507Sdg	if (nsize == object->un_pager.vnp.vnp_size)
2863374Sdg		return;
2871827Sdg
2881541Srgrimes	/*
2891827Sdg	 * File has shrunk. Toss any cached pages beyond the new EOF.
2901541Srgrimes	 */
2919507Sdg	if (nsize < object->un_pager.vnp.vnp_size) {
29212767Sdyson		vm_ooffset_t nsizerounded;
29315583Sphk		nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_MASK));
29412767Sdyson		if (nsizerounded < object->un_pager.vnp.vnp_size) {
29531853Sdyson			vm_pindex_t st, end;
29631853Sdyson			st = OFF_TO_IDX(nsize + PAGE_MASK);
29731853Sdyson			end = OFF_TO_IDX(object->un_pager.vnp.vnp_size);
29831853Sdyson
29931853Sdyson			vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
30031853Sdyson			vm_object_page_remove(object, st, end, FALSE);
3015455Sdg		}
3021827Sdg		/*
3031827Sdg		 * this gets rid of garbage at the end of a page that is now
3041827Sdg		 * only partially backed by the vnode...
3051827Sdg		 */
3061827Sdg		if (nsize & PAGE_MASK) {
3071827Sdg			vm_offset_t kva;
3081827Sdg			vm_page_t m;
3091827Sdg
31012767Sdyson			m = vm_page_lookup(object, OFF_TO_IDX(nsize));
3111827Sdg			if (m) {
3121827Sdg				kva = vm_pager_map_page(m);
3131827Sdg				bzero((caddr_t) kva + (nsize & PAGE_MASK),
31412767Sdyson				    (int) (round_page(nsize) - nsize));
3151827Sdg				vm_pager_unmap_page(kva);
3161827Sdg			}
3171827Sdg		}
3181541Srgrimes	}
31912767Sdyson	object->un_pager.vnp.vnp_size = nsize;
32015583Sphk	object->size = OFF_TO_IDX(nsize + PAGE_MASK);
3211541Srgrimes}
3221541Srgrimes
3231541Srgrimesvoid
3241541Srgrimesvnode_pager_umount(mp)
3251541Srgrimes	register struct mount *mp;
3261541Srgrimes{
32722521Sdyson	struct proc *p = curproc;	/* XXX */
3289507Sdg	struct vnode *vp, *nvp;
3291541Srgrimes
3309507Sdgloop:
3319507Sdg	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
3321541Srgrimes		/*
3339507Sdg		 * Vnode can be reclaimed by getnewvnode() while we
3349507Sdg		 * traverse the list.
3359507Sdg		 */
3369507Sdg		if (vp->v_mount != mp)
3379507Sdg			goto loop;
3389507Sdg
3399507Sdg		/*
3401827Sdg		 * Save the next pointer now since uncaching may terminate the
3419507Sdg		 * object and render vnode invalid
3421541Srgrimes		 */
3439507Sdg		nvp = vp->v_mntvnodes.le_next;
3449507Sdg
3459507Sdg		if (vp->v_object != NULL) {
34622521Sdyson			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
34722521Sdyson			vnode_pager_uncache(vp, p);
34822521Sdyson			VOP_UNLOCK(vp, 0, p);
3497162Sdg		}
3501541Srgrimes	}
3511541Srgrimes}
3521541Srgrimes
3531541Srgrimes/*
3541541Srgrimes * Remove vnode associated object from the object cache.
3557162Sdg * This routine must be called with the vnode locked.
3561541Srgrimes *
3577162Sdg * XXX unlock the vnode.
3587162Sdg * We must do this since uncaching the object may result in its
3597162Sdg * destruction which may initiate paging activity which may necessitate
3607162Sdg * re-locking the vnode.
3611549Srgrimes */
3629507Sdgvoid
36322521Sdysonvnode_pager_uncache(vp, p)
3649507Sdg	struct vnode *vp;
36522521Sdyson	struct proc *p;
3661549Srgrimes{
3679507Sdg	vm_object_t object;
3681549Srgrimes
3691549Srgrimes	/*
3701549Srgrimes	 * Not a mapped vnode
3711549Srgrimes	 */
3729356Sdg	object = vp->v_object;
3735455Sdg	if (object == NULL)
3749507Sdg		return;
3755455Sdg
3769507Sdg	vm_object_reference(object);
37731853Sdyson	vm_freeze_copyopts(object, 0, object->size);
37818205Sdyson
37918205Sdyson	/*
38018205Sdyson	 * XXX We really should handle locking on
38118205Sdyson	 * VBLK devices...
38218205Sdyson	 */
38318205Sdyson	if (vp->v_type != VBLK)
38422521Sdyson		VOP_UNLOCK(vp, 0, p);
3859507Sdg	pager_cache(object, FALSE);
38618205Sdyson	if (vp->v_type != VBLK)
38722521Sdyson		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
3889507Sdg	return;
3891549Srgrimes}
3901541Srgrimes
3911541Srgrimes
3921549Srgrimesvoid
3931549Srgrimesvnode_pager_freepage(m)
3941549Srgrimes	vm_page_t m;
3951541Srgrimes{
3961549Srgrimes	PAGE_WAKEUP(m);
3971549Srgrimes	vm_page_free(m);
3981549Srgrimes}
3991549Srgrimes
4001549Srgrimes/*
4011549Srgrimes * calculate the linear (byte) disk address of specified virtual
4021549Srgrimes * file address
4031549Srgrimes */
40412820Sphkstatic vm_offset_t
4056151Sdgvnode_pager_addr(vp, address, run)
4061549Srgrimes	struct vnode *vp;
40712767Sdyson	vm_ooffset_t address;
4086151Sdg	int *run;
4091549Srgrimes{
4105455Sdg	int rtaddress;
4115455Sdg	int bsize;
41212767Sdyson	daddr_t block;
4131549Srgrimes	struct vnode *rtvp;
4145455Sdg	int err;
41512767Sdyson	daddr_t vblock;
41612767Sdyson	int voffset;
4171549Srgrimes
4185455Sdg	if ((int) address < 0)
4195455Sdg		return -1;
4205455Sdg
42111701Sdyson	if (vp->v_mount == NULL)
42211701Sdyson		return -1;
42311701Sdyson
4241549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4251549Srgrimes	vblock = address / bsize;
4261549Srgrimes	voffset = address % bsize;
4271549Srgrimes
42810551Sdyson	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
4291549Srgrimes
4306151Sdg	if (err || (block == -1))
4311549Srgrimes		rtaddress = -1;
4326151Sdg	else {
4336626Sdg		rtaddress = block + voffset / DEV_BSIZE;
4346151Sdg		if( run) {
4356151Sdg			*run += 1;
4366151Sdg			*run *= bsize/PAGE_SIZE;
4376151Sdg			*run -= voffset/PAGE_SIZE;
4386151Sdg		}
4396151Sdg	}
4401549Srgrimes
4411549Srgrimes	return rtaddress;
4421549Srgrimes}
4431549Srgrimes
4441549Srgrimes/*
4451549Srgrimes * interrupt routine for I/O completion
4461549Srgrimes */
44712820Sphkstatic void
4481549Srgrimesvnode_pager_iodone(bp)
4491549Srgrimes	struct buf *bp;
4501549Srgrimes{
4511549Srgrimes	bp->b_flags |= B_DONE;
4529507Sdg	wakeup(bp);
4531549Srgrimes}
4541549Srgrimes
4551549Srgrimes/*
4561549Srgrimes * small block file system vnode pager input
4571549Srgrimes */
45812820Sphkstatic int
4599507Sdgvnode_pager_input_smlfs(object, m)
4609507Sdg	vm_object_t object;
4611549Srgrimes	vm_page_t m;
4621549Srgrimes{
4635455Sdg	int i;
4645455Sdg	int s;
4651549Srgrimes	struct vnode *dp, *vp;
4661549Srgrimes	struct buf *bp;
4671549Srgrimes	vm_offset_t kva;
4685455Sdg	int fileaddr;
4691549Srgrimes	vm_offset_t bsize;
4705455Sdg	int error = 0;
4711549Srgrimes
4729507Sdg	vp = object->handle;
47311701Sdyson	if (vp->v_mount == NULL)
47411701Sdyson		return VM_PAGER_BAD;
47511701Sdyson
4761549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
4771549Srgrimes
4787178Sdg
47910551Sdyson	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
4801549Srgrimes
4811549Srgrimes	kva = vm_pager_map_page(m);
4821549Srgrimes
4831827Sdg	for (i = 0; i < PAGE_SIZE / bsize; i++) {
4841827Sdg
48512767Sdyson		if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid))
4865455Sdg			continue;
4871549Srgrimes
48812767Sdyson		fileaddr = vnode_pager_addr(vp,
48912767Sdyson			IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
4901827Sdg		if (fileaddr != -1) {
4911549Srgrimes			bp = getpbuf();
4921549Srgrimes
4931827Sdg			/* build a minimal buffer header */
4941549Srgrimes			bp->b_flags = B_BUSY | B_READ | B_CALL;
4951549Srgrimes			bp->b_iodone = vnode_pager_iodone;
4961549Srgrimes			bp->b_proc = curproc;
4971549Srgrimes			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
4981827Sdg			if (bp->b_rcred != NOCRED)
4991549Srgrimes				crhold(bp->b_rcred);
5001827Sdg			if (bp->b_wcred != NOCRED)
5011549Srgrimes				crhold(bp->b_wcred);
50231493Sphk			bp->b_data = (caddr_t) kva + i * bsize;
5036626Sdg			bp->b_blkno = fileaddr;
5045455Sdg			pbgetvp(dp, bp);
5051549Srgrimes			bp->b_bcount = bsize;
5061549Srgrimes			bp->b_bufsize = bsize;
5071827Sdg
5081827Sdg			/* do the input */
5091549Srgrimes			VOP_STRATEGY(bp);
5101549Srgrimes
5111827Sdg			/* we definitely need to be at splbio here */
5121549Srgrimes
5131549Srgrimes			s = splbio();
5141549Srgrimes			while ((bp->b_flags & B_DONE) == 0) {
5159356Sdg				tsleep(bp, PVM, "vnsrd", 0);
5161549Srgrimes			}
5171549Srgrimes			splx(s);
5181549Srgrimes			if ((bp->b_flags & B_ERROR) != 0)
5191549Srgrimes				error = EIO;
5201549Srgrimes
5211827Sdg			/*
5221827Sdg			 * free the buffer header back to the swap buffer pool
5231827Sdg			 */
5241549Srgrimes			relpbuf(bp);
5251827Sdg			if (error)
5261549Srgrimes				break;
5275455Sdg
52815583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
5291549Srgrimes		} else {
53015583Sphk			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
5311549Srgrimes			bzero((caddr_t) kva + i * bsize, bsize);
5321549Srgrimes		}
5331549Srgrimes	}
5341549Srgrimes	vm_pager_unmap_page(kva);
53517334Sdyson	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
53610669Sdyson	m->flags &= ~PG_ZERO;
5371827Sdg	if (error) {
5384207Sdg		return VM_PAGER_ERROR;
5391549Srgrimes	}
5401549Srgrimes	return VM_PAGER_OK;
5411549Srgrimes
5421549Srgrimes}
5431549Srgrimes
5441549Srgrimes
5451549Srgrimes/*
5461549Srgrimes * old style vnode pager output routine
5471549Srgrimes */
54812820Sphkstatic int
5499507Sdgvnode_pager_input_old(object, m)
5509507Sdg	vm_object_t object;
5511549Srgrimes	vm_page_t m;
5521549Srgrimes{
5531541Srgrimes	struct uio auio;
5541541Srgrimes	struct iovec aiov;
5555455Sdg	int error;
5565455Sdg	int size;
5571549Srgrimes	vm_offset_t kva;
5581549Srgrimes
5591549Srgrimes	error = 0;
5601827Sdg
5611549Srgrimes	/*
5621549Srgrimes	 * Return failure if beyond current EOF
5631549Srgrimes	 */
56412767Sdyson	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
5651549Srgrimes		return VM_PAGER_BAD;
5661549Srgrimes	} else {
5671549Srgrimes		size = PAGE_SIZE;
56812767Sdyson		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
56912767Sdyson			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
5707178Sdg
5715455Sdg		/*
5725455Sdg		 * Allocate a kernel virtual address and initialize so that
5735455Sdg		 * we can use VOP_READ/WRITE routines.
5745455Sdg		 */
5751549Srgrimes		kva = vm_pager_map_page(m);
5767178Sdg
5771827Sdg		aiov.iov_base = (caddr_t) kva;
5781549Srgrimes		aiov.iov_len = size;
5791549Srgrimes		auio.uio_iov = &aiov;
5801549Srgrimes		auio.uio_iovcnt = 1;
58112767Sdyson		auio.uio_offset = IDX_TO_OFF(m->pindex);
5821549Srgrimes		auio.uio_segflg = UIO_SYSSPACE;
5831549Srgrimes		auio.uio_rw = UIO_READ;
5841549Srgrimes		auio.uio_resid = size;
5851827Sdg		auio.uio_procp = (struct proc *) 0;
5861549Srgrimes
5879507Sdg		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
5881549Srgrimes		if (!error) {
5891549Srgrimes			register int count = size - auio.uio_resid;
5901549Srgrimes
5911549Srgrimes			if (count == 0)
5921549Srgrimes				error = EINVAL;
5931549Srgrimes			else if (count != PAGE_SIZE)
5941827Sdg				bzero((caddr_t) kva + count, PAGE_SIZE - count);
5951549Srgrimes		}
5961549Srgrimes		vm_pager_unmap_page(kva);
5971549Srgrimes	}
59817334Sdyson	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
5995455Sdg	m->dirty = 0;
60010669Sdyson	m->flags &= ~PG_ZERO;
6014207Sdg	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
6021549Srgrimes}
6031549Srgrimes
6041549Srgrimes/*
6051549Srgrimes * generic vnode pager input routine
6061549Srgrimes */
60710556Sdyson
60812820Sphkstatic int
6099507Sdgvnode_pager_getpages(object, m, count, reqpage)
6109507Sdg	vm_object_t object;
6111549Srgrimes	vm_page_t *m;
6129507Sdg	int count;
6139507Sdg	int reqpage;
6141549Srgrimes{
61510556Sdyson	int rtval;
61610556Sdyson	struct vnode *vp;
61718973Sdyson	if (object->flags & OBJ_VNODE_GONE)
61818973Sdyson		return VM_PAGER_ERROR;
61910556Sdyson	vp = object->handle;
62011701Sdyson	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
62110556Sdyson	if (rtval == EOPNOTSUPP)
62211943Sbde		return vnode_pager_leaf_getpages(object, m, count, reqpage);
62310556Sdyson	else
62410556Sdyson		return rtval;
62510556Sdyson}
62610556Sdyson
62710556Sdysonstatic int
62810556Sdysonvnode_pager_leaf_getpages(object, m, count, reqpage)
62910556Sdyson	vm_object_t object;
63010556Sdyson	vm_page_t *m;
63110556Sdyson	int count;
63210556Sdyson	int reqpage;
63310556Sdyson{
63412767Sdyson	vm_offset_t kva;
63512767Sdyson	off_t foff;
6369507Sdg	int i, size, bsize, first, firstaddr;
6371549Srgrimes	struct vnode *dp, *vp;
6386151Sdg	int runpg;
6396151Sdg	int runend;
6407178Sdg	struct buf *bp;
6415455Sdg	int s;
6425455Sdg	int error = 0;
6431549Srgrimes
6449507Sdg	vp = object->handle;
64511701Sdyson	if (vp->v_mount == NULL)
64611701Sdyson		return VM_PAGER_BAD;
64711701Sdyson
6481549Srgrimes	bsize = vp->v_mount->mnt_stat.f_iosize;
6491549Srgrimes
6501549Srgrimes	/* get the UNDERLYING device for the file with VOP_BMAP() */
6511827Sdg
6521549Srgrimes	/*
6531827Sdg	 * originally, we did not check for an error return value -- assuming
6541827Sdg	 * an fs always has a bmap entry point -- that assumption is wrong!!!
6551549Srgrimes	 */
65612767Sdyson	foff = IDX_TO_OFF(m[reqpage]->pindex);
6571827Sdg
6581549Srgrimes	/*
6591887Sdg	 * if we can't bmap, use old VOP code
6601549Srgrimes	 */
66110551Sdyson	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
6621549Srgrimes		for (i = 0; i < count; i++) {
6631549Srgrimes			if (i != reqpage) {
6641549Srgrimes				vnode_pager_freepage(m[i]);
6651549Srgrimes			}
6661549Srgrimes		}
6673612Sdg		cnt.v_vnodein++;
6683612Sdg		cnt.v_vnodepgsin++;
6699507Sdg		return vnode_pager_input_old(object, m[reqpage]);
6701549Srgrimes
6711827Sdg		/*
6721827Sdg		 * if the blocksize is smaller than a page size, then use
6731827Sdg		 * special small filesystem code.  NFS sometimes has a small
6741827Sdg		 * blocksize, but it can handle large reads itself.
6751827Sdg		 */
6761827Sdg	} else if ((PAGE_SIZE / bsize) > 1 &&
6775455Sdg	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
6781827Sdg
6791549Srgrimes		for (i = 0; i < count; i++) {
6801549Srgrimes			if (i != reqpage) {
6811549Srgrimes				vnode_pager_freepage(m[i]);
6821549Srgrimes			}
6831549Srgrimes		}
6843612Sdg		cnt.v_vnodein++;
6853612Sdg		cnt.v_vnodepgsin++;
6869507Sdg		return vnode_pager_input_smlfs(object, m[reqpage]);
6871549Srgrimes	}
6881549Srgrimes	/*
6895455Sdg	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
6905455Sdg	 * then, the entire page is valid --
69125930Sdfr	 * XXX no it isn't
6921549Srgrimes	 */
69325930Sdfr
69425930Sdfr	if (m[reqpage]->valid != VM_PAGE_BITS_ALL)
69525930Sdfr	    m[reqpage]->valid = 0;
69625930Sdfr
6975455Sdg	if (m[reqpage]->valid) {
6985455Sdg		m[reqpage]->valid = VM_PAGE_BITS_ALL;
6995455Sdg		for (i = 0; i < count; i++) {
7005455Sdg			if (i != reqpage)
7015455Sdg				vnode_pager_freepage(m[i]);
7021549Srgrimes		}
7035455Sdg		return VM_PAGER_OK;
7041549Srgrimes	}
7057178Sdg
7065455Sdg	/*
7075455Sdg	 * here on direct device I/O
7085455Sdg	 */
7091549Srgrimes
7106151Sdg	firstaddr = -1;
7111549Srgrimes	/*
7126151Sdg	 * calculate the run that includes the required page
7131549Srgrimes	 */
7146151Sdg	for(first = 0, i = 0; i < count; i = runend) {
71512767Sdyson		firstaddr = vnode_pager_addr(vp,
71612767Sdyson			IDX_TO_OFF(m[i]->pindex), &runpg);
7176151Sdg		if (firstaddr == -1) {
7189507Sdg			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
7199507Sdg				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
7209507Sdg			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
7216151Sdg			}
7221549Srgrimes			vnode_pager_freepage(m[i]);
7236151Sdg			runend = i + 1;
7246151Sdg			first = runend;
7256151Sdg			continue;
7261549Srgrimes		}
7276151Sdg		runend = i + runpg;
7289507Sdg		if (runend <= reqpage) {
7296151Sdg			int j;
7309507Sdg			for (j = i; j < runend; j++) {
7316151Sdg				vnode_pager_freepage(m[j]);
7326151Sdg			}
7331549Srgrimes		} else {
7349507Sdg			if (runpg < (count - first)) {
7359507Sdg				for (i = first + runpg; i < count; i++)
7366151Sdg					vnode_pager_freepage(m[i]);
7376151Sdg				count = first + runpg;
7386151Sdg			}
7396151Sdg			break;
7401549Srgrimes		}
7416151Sdg		first = runend;
7421549Srgrimes	}
7431549Srgrimes
7441549Srgrimes	/*
7451827Sdg	 * the first and last page have been calculated now, move input pages
7461827Sdg	 * to be zero based...
7471549Srgrimes	 */
7481549Srgrimes	if (first != 0) {
7491549Srgrimes		for (i = first; i < count; i++) {
7501549Srgrimes			m[i - first] = m[i];
7511549Srgrimes		}
7521549Srgrimes		count -= first;
7531549Srgrimes		reqpage -= first;
7541549Srgrimes	}
7556151Sdg
7561549Srgrimes	/*
7571549Srgrimes	 * calculate the file virtual address for the transfer
7581549Srgrimes	 */
75912767Sdyson	foff = IDX_TO_OFF(m[0]->pindex);
7601827Sdg
7611549Srgrimes	/*
7621549Srgrimes	 * calculate the size of the transfer
7631549Srgrimes	 */
7641549Srgrimes	size = count * PAGE_SIZE;
7659507Sdg	if ((foff + size) > object->un_pager.vnp.vnp_size)
7669507Sdg		size = object->un_pager.vnp.vnp_size - foff;
7671549Srgrimes
7681549Srgrimes	/*
7691549Srgrimes	 * round up physical size for real devices
7701549Srgrimes	 */
7711827Sdg	if (dp->v_type == VBLK || dp->v_type == VCHR)
7721549Srgrimes		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
7731549Srgrimes
7745841Sdg	bp = getpbuf();
7755455Sdg	kva = (vm_offset_t) bp->b_data;
7761887Sdg
7771549Srgrimes	/*
7781549Srgrimes	 * and map the pages to be read into the kva
7791549Srgrimes	 */
7801887Sdg	pmap_qenter(kva, m, count);
7811549Srgrimes
7821549Srgrimes	/* build a minimal buffer header */
7831549Srgrimes	bp->b_flags = B_BUSY | B_READ | B_CALL;
7841549Srgrimes	bp->b_iodone = vnode_pager_iodone;
7851549Srgrimes	/* B_PHYS is not set, but it is nice to fill this in */
7861549Srgrimes	bp->b_proc = curproc;
7871549Srgrimes	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
7881827Sdg	if (bp->b_rcred != NOCRED)
7891549Srgrimes		crhold(bp->b_rcred);
7901827Sdg	if (bp->b_wcred != NOCRED)
7911549Srgrimes		crhold(bp->b_wcred);
7926626Sdg	bp->b_blkno = firstaddr;
7935455Sdg	pbgetvp(dp, bp);
7941549Srgrimes	bp->b_bcount = size;
7951549Srgrimes	bp->b_bufsize = size;
7961549Srgrimes
7973612Sdg	cnt.v_vnodein++;
7983612Sdg	cnt.v_vnodepgsin += count;
7993612Sdg
8001549Srgrimes	/* do the input */
8011549Srgrimes	VOP_STRATEGY(bp);
8023612Sdg
8031549Srgrimes	s = splbio();
8041549Srgrimes	/* we definitely need to be at splbio here */
8051549Srgrimes
8061549Srgrimes	while ((bp->b_flags & B_DONE) == 0) {
8079356Sdg		tsleep(bp, PVM, "vnread", 0);
8081549Srgrimes	}
8091549Srgrimes	splx(s);
8101549Srgrimes	if ((bp->b_flags & B_ERROR) != 0)
8111549Srgrimes		error = EIO;
8121549Srgrimes
8131549Srgrimes	if (!error) {
8141549Srgrimes		if (size != count * PAGE_SIZE)
8151827Sdg			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
8161549Srgrimes	}
8175455Sdg	pmap_qremove(kva, count);
8181549Srgrimes
8191549Srgrimes	/*
8201549Srgrimes	 * free the buffer header back to the swap buffer pool
8211549Srgrimes	 */
8221549Srgrimes	relpbuf(bp);
8231549Srgrimes
8241549Srgrimes	for (i = 0; i < count; i++) {
82517334Sdyson		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
8265455Sdg		m[i]->dirty = 0;
8275455Sdg		m[i]->valid = VM_PAGE_BITS_ALL;
82810669Sdyson		m[i]->flags &= ~PG_ZERO;
8291549Srgrimes		if (i != reqpage) {
8301827Sdg
8311549Srgrimes			/*
8321827Sdg			 * whether or not to leave the page activated is up in
8331827Sdg			 * the air, but we should put the page on a page queue
8341827Sdg			 * somewhere. (it already is in the object). Result:
8351827Sdg			 * It appears that emperical results show that
8361827Sdg			 * deactivating pages is best.
8371549Srgrimes			 */
8381827Sdg
8391549Srgrimes			/*
8401827Sdg			 * just in case someone was asking for this page we
8411827Sdg			 * now tell them that it is ok to use
8421549Srgrimes			 */
8431549Srgrimes			if (!error) {
8445841Sdg				vm_page_deactivate(m[i]);
8451549Srgrimes				PAGE_WAKEUP(m[i]);
8461549Srgrimes			} else {
8471549Srgrimes				vnode_pager_freepage(m[i]);
8481549Srgrimes			}
8491549Srgrimes		}
8501549Srgrimes	}
8511549Srgrimes	if (error) {
8529507Sdg		printf("vnode_pager_getpages: I/O read error\n");
8531549Srgrimes	}
8544207Sdg	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
8551549Srgrimes}
8561549Srgrimes
85712820Sphkstatic int
85810556Sdysonvnode_pager_putpages(object, m, count, sync, rtvals)
85910556Sdyson	vm_object_t object;
86010556Sdyson	vm_page_t *m;
86110556Sdyson	int count;
86210556Sdyson	boolean_t sync;
86310556Sdyson	int *rtvals;
86410556Sdyson{
86510556Sdyson	int rtval;
86610556Sdyson	struct vnode *vp;
86718973Sdyson
86818973Sdyson	if (object->flags & OBJ_VNODE_GONE)
86918973Sdyson		return VM_PAGER_ERROR;
87018973Sdyson
87110556Sdyson	vp = object->handle;
87211701Sdyson	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
87310556Sdyson	if (rtval == EOPNOTSUPP)
87411943Sbde		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
87510556Sdyson	else
87610556Sdyson		return rtval;
87710556Sdyson}
87810556Sdyson
8791549Srgrimes/*
8801549Srgrimes * generic vnode pager output routine
8811549Srgrimes */
88210556Sdysonstatic int
88310556Sdysonvnode_pager_leaf_putpages(object, m, count, sync, rtvals)
8849507Sdg	vm_object_t object;
8851549Srgrimes	vm_page_t *m;
8865455Sdg	int count;
8879507Sdg	boolean_t sync;
8885455Sdg	int *rtvals;
8891549Srgrimes{
8907695Sdg	int i;
8911549Srgrimes
8927695Sdg	struct vnode *vp;
8937695Sdg	int maxsize, ncount;
89412767Sdyson	vm_ooffset_t poffset;
8957695Sdg	struct uio auio;
8967695Sdg	struct iovec aiov;
8977695Sdg	int error;
8981549Srgrimes
8999507Sdg	vp = object->handle;;
9001827Sdg	for (i = 0; i < count; i++)
9011549Srgrimes		rtvals[i] = VM_PAGER_AGAIN;
9021549Srgrimes
90312767Sdyson	if ((int) m[0]->pindex < 0) {
90412767Sdyson		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty);
9057695Sdg		rtvals[0] = VM_PAGER_BAD;
9067695Sdg		return VM_PAGER_BAD;
9075455Sdg	}
9087178Sdg
9097695Sdg	maxsize = count * PAGE_SIZE;
9107695Sdg	ncount = count;
9111549Srgrimes
91212767Sdyson	poffset = IDX_TO_OFF(m[0]->pindex);
91312767Sdyson	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
91412767Sdyson		if (object->un_pager.vnp.vnp_size > poffset)
91512767Sdyson			maxsize = object->un_pager.vnp.vnp_size - poffset;
9168585Sdg		else
9178585Sdg			maxsize = 0;
91815583Sphk		ncount = btoc(maxsize);
9198585Sdg		if (ncount < count) {
9208585Sdg			for (i = ncount; i < count; i++) {
9217695Sdg				rtvals[i] = VM_PAGER_BAD;
9221549Srgrimes			}
92312767Sdyson#ifdef BOGUS
9248585Sdg			if (ncount == 0) {
92512767Sdyson				printf("vnode_pager_putpages: write past end of file: %d, %lu\n",
92612767Sdyson					poffset,
92712767Sdyson					(unsigned long) object->un_pager.vnp.vnp_size);
9287695Sdg				return rtvals[0];
9297695Sdg			}
93012767Sdyson#endif
9311549Srgrimes		}
9321541Srgrimes	}
9337695Sdg
9348585Sdg	for (i = 0; i < count; i++) {
9358585Sdg		m[i]->busy++;
9367695Sdg		m[i]->flags &= ~PG_BUSY;
9371549Srgrimes	}
9381827Sdg
9397695Sdg	aiov.iov_base = (caddr_t) 0;
9407695Sdg	aiov.iov_len = maxsize;
9417695Sdg	auio.uio_iov = &aiov;
9427695Sdg	auio.uio_iovcnt = 1;
94312767Sdyson	auio.uio_offset = poffset;
9447695Sdg	auio.uio_segflg = UIO_NOCOPY;
9457695Sdg	auio.uio_rw = UIO_WRITE;
9467695Sdg	auio.uio_resid = maxsize;
9477695Sdg	auio.uio_procp = (struct proc *) 0;
94812767Sdyson	error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred);
9493612Sdg	cnt.v_vnodeout++;
9507695Sdg	cnt.v_vnodepgsout += ncount;
9513612Sdg
9528585Sdg	if (error) {
9539507Sdg		printf("vnode_pager_putpages: I/O error %d\n", error);
9547695Sdg	}
9558585Sdg	if (auio.uio_resid) {
95612820Sphk		printf("vnode_pager_putpages: residual I/O %d at %ld\n",
95712767Sdyson			auio.uio_resid, m[0]->pindex);
9587695Sdg	}
9598585Sdg	for (i = 0; i < count; i++) {
9608585Sdg		m[i]->busy--;
9618585Sdg		if (i < ncount) {
9627695Sdg			rtvals[i] = VM_PAGER_OK;
9637695Sdg		}
9648585Sdg		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
9659507Sdg			wakeup(m[i]);
9667695Sdg	}
9677695Sdg	return rtvals[0];
9687695Sdg}
9691549Srgrimes
9707695Sdgstruct vnode *
9719507Sdgvnode_pager_lock(object)
9729507Sdg	vm_object_t object;
9739507Sdg{
97422521Sdyson	struct proc *p = curproc;	/* XXX */
97522521Sdyson
9769507Sdg	for (; object != NULL; object = object->backing_object) {
9779507Sdg		if (object->type != OBJT_VNODE)
9787695Sdg			continue;
9791549Srgrimes
98030137Sdyson		vn_lock(object->handle,
98130137Sdyson			LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p);
9829507Sdg		return object->handle;
9831549Srgrimes	}
9849507Sdg	return NULL;
9857695Sdg}
986