vnode_pager.c revision 32071
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 * Copyright (c) 1993, 1994 John S. Dyson
6 * Copyright (c) 1995, David Greenman
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
41 *	$Id: vnode_pager.c,v 1.77 1997/12/19 09:03:17 dyson Exp $
42 */
43
44/*
45 * Page to/from files (vnodes).
46 */
47
48/*
49 * TODO:
50 *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
51 *	greatly re-simplify the vnode_pager.
52 */
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/proc.h>
57#include <sys/vnode.h>
58#include <sys/mount.h>
59#include <sys/buf.h>
60#include <sys/vmmeter.h>
61
62#include <vm/vm.h>
63#include <vm/vm_prot.h>
64#include <vm/vm_object.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vm_map.h>
68#include <vm/vnode_pager.h>
69#include <vm/vm_extern.h>
70
71static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
72					 int *run));
73static void vnode_pager_iodone __P((struct buf *bp));
74static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
75static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
76static void vnode_pager_dealloc __P((vm_object_t));
77static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
78static int vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
79static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
80
81struct pagerops vnodepagerops = {
82	NULL,
83	vnode_pager_alloc,
84	vnode_pager_dealloc,
85	vnode_pager_getpages,
86	vnode_pager_putpages,
87	vnode_pager_haspage,
88	NULL
89};
90
91static int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
92					  int count, int reqpage));
93static int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
94					  int count, boolean_t sync,
95					  int *rtvals));
96
97/*
98 * Allocate (or lookup) pager for a vnode.
99 * Handle is a vnode pointer.
100 */
101vm_object_t
102vnode_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot,
103		  vm_ooffset_t offset)
104{
105	vm_object_t object;
106	struct vnode *vp;
107
108	/*
109	 * Pageout to vnode, no can do yet.
110	 */
111	if (handle == NULL)
112		return (NULL);
113
114	vp = (struct vnode *) handle;
115
116	/*
117	 * Prevent race condition when allocating the object. This
118	 * can happen with NFS vnodes since the nfsnode isn't locked.
119	 */
120	while (vp->v_flag & VOLOCK) {
121		vp->v_flag |= VOWANT;
122		tsleep(vp, PVM, "vnpobj", 0);
123	}
124	vp->v_flag |= VOLOCK;
125
126	/*
127	 * If the object is being terminated, wait for it to
128	 * go away.
129	 */
130	while (((object = vp->v_object) != NULL) &&
131		(object->flags & OBJ_DEAD)) {
132		tsleep(object, PVM, "vadead", 0);
133	}
134
135	if (vp->v_usecount == 0)
136		panic("vnode_pager_alloc: no vnode reference");
137
138	if (object == NULL) {
139		/*
140		 * And an object of the appropriate size
141		 */
142		object = vm_object_allocate(OBJT_VNODE, size);
143		if (vp->v_type == VREG)
144			object->flags = OBJ_CANPERSIST;
145		else
146			object->flags = 0;
147
148		object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE;
149
150		object->handle = handle;
151		vp->v_object = object;
152	} else {
153		/*
154		 * vm_object_reference() will remove the object from the cache if
155		 * found and gain a reference to the object.
156		 */
157		vm_object_reference(object);
158	}
159
160	if (vp->v_type == VREG)
161		vp->v_flag |= VVMIO;
162
163	vp->v_flag &= ~VOLOCK;
164	if (vp->v_flag & VOWANT) {
165		vp->v_flag &= ~VOWANT;
166		wakeup(vp);
167	}
168	return (object);
169}
170
171static void
172vnode_pager_dealloc(object)
173	vm_object_t object;
174{
175	register struct vnode *vp = object->handle;
176
177	if (vp == NULL)
178		panic("vnode_pager_dealloc: pager already dealloced");
179
180	if (object->paging_in_progress) {
181		int s = splbio();
182		while (object->paging_in_progress) {
183			object->flags |= OBJ_PIPWNT;
184			tsleep(object, PVM, "vnpdea", 0);
185		}
186		splx(s);
187	}
188
189	object->handle = NULL;
190
191	vp->v_object = NULL;
192	vp->v_flag &= ~(VTEXT | VVMIO);
193}
194
195static boolean_t
196vnode_pager_haspage(object, pindex, before, after)
197	vm_object_t object;
198	vm_pindex_t pindex;
199	int *before;
200	int *after;
201{
202	struct vnode *vp = object->handle;
203	daddr_t bn;
204	int err;
205	daddr_t reqblock;
206	int poff;
207	int bsize;
208	int pagesperblock, blocksperpage;
209
210	/*
211	 * If filesystem no longer mounted or offset beyond end of file we do
212	 * not have the page.
213	 */
214	if ((vp->v_mount == NULL) ||
215		(IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
216		return FALSE;
217
218	bsize = vp->v_mount->mnt_stat.f_iosize;
219	pagesperblock = bsize / PAGE_SIZE;
220	blocksperpage = 0;
221	if (pagesperblock > 0) {
222		reqblock = pindex / pagesperblock;
223	} else {
224		blocksperpage = (PAGE_SIZE / bsize);
225		reqblock = pindex * blocksperpage;
226	}
227	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
228		after, before);
229	if (err)
230		return TRUE;
231	if ( bn == -1)
232		return FALSE;
233	if (pagesperblock > 0) {
234		poff = pindex - (reqblock * pagesperblock);
235		if (before) {
236			*before *= pagesperblock;
237			*before += poff;
238		}
239		if (after) {
240			int numafter;
241			*after *= pagesperblock;
242			numafter = pagesperblock - (poff + 1);
243			if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
244				numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
245			}
246			*after += numafter;
247		}
248	} else {
249		if (before) {
250			*before /= blocksperpage;
251		}
252
253		if (after) {
254			*after /= blocksperpage;
255		}
256	}
257	return TRUE;
258}
259
260/*
261 * Lets the VM system know about a change in size for a file.
262 * We adjust our own internal size and flush any cached pages in
263 * the associated object that are affected by the size change.
264 *
265 * Note: this routine may be invoked as a result of a pager put
266 * operation (possibly at object termination time), so we must be careful.
267 */
268void
269vnode_pager_setsize(vp, nsize)
270	struct vnode *vp;
271	vm_ooffset_t nsize;
272{
273	vm_object_t object = vp->v_object;
274
275	if (object == NULL)
276		return;
277
278	/*
279	 * Hasn't changed size
280	 */
281	if (nsize == object->un_pager.vnp.vnp_size)
282		return;
283
284	/*
285	 * File has shrunk. Toss any cached pages beyond the new EOF.
286	 */
287	if (nsize < object->un_pager.vnp.vnp_size) {
288		vm_ooffset_t nsizerounded;
289		nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_MASK));
290		if (nsizerounded < object->un_pager.vnp.vnp_size) {
291			vm_pindex_t st, end;
292			st = OFF_TO_IDX(nsize + PAGE_MASK);
293			end = OFF_TO_IDX(object->un_pager.vnp.vnp_size);
294
295			vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
296			vm_object_page_remove(object, st, end, FALSE);
297		}
298		/*
299		 * this gets rid of garbage at the end of a page that is now
300		 * only partially backed by the vnode...
301		 */
302		if (nsize & PAGE_MASK) {
303			vm_offset_t kva;
304			vm_page_t m;
305
306			m = vm_page_lookup(object, OFF_TO_IDX(nsize));
307			if (m) {
308				kva = vm_pager_map_page(m);
309				bzero((caddr_t) kva + (nsize & PAGE_MASK),
310				    (int) (round_page(nsize) - nsize));
311				vm_pager_unmap_page(kva);
312			}
313		}
314	}
315	object->un_pager.vnp.vnp_size = nsize;
316	object->size = OFF_TO_IDX(nsize + PAGE_MASK);
317}
318
319void
320vnode_pager_freepage(m)
321	vm_page_t m;
322{
323	PAGE_WAKEUP(m);
324	vm_page_free(m);
325}
326
327/*
328 * calculate the linear (byte) disk address of specified virtual
329 * file address
330 */
331static vm_offset_t
332vnode_pager_addr(vp, address, run)
333	struct vnode *vp;
334	vm_ooffset_t address;
335	int *run;
336{
337	int rtaddress;
338	int bsize;
339	daddr_t block;
340	struct vnode *rtvp;
341	int err;
342	daddr_t vblock;
343	int voffset;
344
345	if ((int) address < 0)
346		return -1;
347
348	if (vp->v_mount == NULL)
349		return -1;
350
351	bsize = vp->v_mount->mnt_stat.f_iosize;
352	vblock = address / bsize;
353	voffset = address % bsize;
354
355	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
356
357	if (err || (block == -1))
358		rtaddress = -1;
359	else {
360		rtaddress = block + voffset / DEV_BSIZE;
361		if( run) {
362			*run += 1;
363			*run *= bsize/PAGE_SIZE;
364			*run -= voffset/PAGE_SIZE;
365		}
366	}
367
368	return rtaddress;
369}
370
371/*
372 * interrupt routine for I/O completion
373 */
374static void
375vnode_pager_iodone(bp)
376	struct buf *bp;
377{
378	bp->b_flags |= B_DONE;
379	wakeup(bp);
380}
381
382/*
383 * small block file system vnode pager input
384 */
385static int
386vnode_pager_input_smlfs(object, m)
387	vm_object_t object;
388	vm_page_t m;
389{
390	int i;
391	int s;
392	struct vnode *dp, *vp;
393	struct buf *bp;
394	vm_offset_t kva;
395	int fileaddr;
396	vm_offset_t bsize;
397	int error = 0;
398
399	vp = object->handle;
400	if (vp->v_mount == NULL)
401		return VM_PAGER_BAD;
402
403	bsize = vp->v_mount->mnt_stat.f_iosize;
404
405
406	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
407
408	kva = vm_pager_map_page(m);
409
410	for (i = 0; i < PAGE_SIZE / bsize; i++) {
411
412		if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid))
413			continue;
414
415		fileaddr = vnode_pager_addr(vp,
416			IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
417		if (fileaddr != -1) {
418			bp = getpbuf();
419
420			/* build a minimal buffer header */
421			bp->b_flags = B_BUSY | B_READ | B_CALL;
422			bp->b_iodone = vnode_pager_iodone;
423			bp->b_proc = curproc;
424			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
425			if (bp->b_rcred != NOCRED)
426				crhold(bp->b_rcred);
427			if (bp->b_wcred != NOCRED)
428				crhold(bp->b_wcred);
429			bp->b_data = (caddr_t) kva + i * bsize;
430			bp->b_blkno = fileaddr;
431			pbgetvp(dp, bp);
432			bp->b_bcount = bsize;
433			bp->b_bufsize = bsize;
434
435			/* do the input */
436			VOP_STRATEGY(bp);
437
438			/* we definitely need to be at splbio here */
439
440			s = splbio();
441			while ((bp->b_flags & B_DONE) == 0) {
442				tsleep(bp, PVM, "vnsrd", 0);
443			}
444			splx(s);
445			if ((bp->b_flags & B_ERROR) != 0)
446				error = EIO;
447
448			/*
449			 * free the buffer header back to the swap buffer pool
450			 */
451			relpbuf(bp);
452			if (error)
453				break;
454
455			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
456		} else {
457			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
458			bzero((caddr_t) kva + i * bsize, bsize);
459		}
460	}
461	vm_pager_unmap_page(kva);
462	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
463	m->flags &= ~PG_ZERO;
464	if (error) {
465		return VM_PAGER_ERROR;
466	}
467	return VM_PAGER_OK;
468
469}
470
471
472/*
473 * old style vnode pager output routine
474 */
475static int
476vnode_pager_input_old(object, m)
477	vm_object_t object;
478	vm_page_t m;
479{
480	struct uio auio;
481	struct iovec aiov;
482	int error;
483	int size;
484	vm_offset_t kva;
485
486	error = 0;
487
488	/*
489	 * Return failure if beyond current EOF
490	 */
491	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
492		return VM_PAGER_BAD;
493	} else {
494		size = PAGE_SIZE;
495		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
496			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
497
498		/*
499		 * Allocate a kernel virtual address and initialize so that
500		 * we can use VOP_READ/WRITE routines.
501		 */
502		kva = vm_pager_map_page(m);
503
504		aiov.iov_base = (caddr_t) kva;
505		aiov.iov_len = size;
506		auio.uio_iov = &aiov;
507		auio.uio_iovcnt = 1;
508		auio.uio_offset = IDX_TO_OFF(m->pindex);
509		auio.uio_segflg = UIO_SYSSPACE;
510		auio.uio_rw = UIO_READ;
511		auio.uio_resid = size;
512		auio.uio_procp = (struct proc *) 0;
513
514		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
515		if (!error) {
516			register int count = size - auio.uio_resid;
517
518			if (count == 0)
519				error = EINVAL;
520			else if (count != PAGE_SIZE)
521				bzero((caddr_t) kva + count, PAGE_SIZE - count);
522		}
523		vm_pager_unmap_page(kva);
524	}
525	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
526	m->dirty = 0;
527	m->flags &= ~PG_ZERO;
528	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
529}
530
531/*
532 * generic vnode pager input routine
533 */
534
535static int
536vnode_pager_getpages(object, m, count, reqpage)
537	vm_object_t object;
538	vm_page_t *m;
539	int count;
540	int reqpage;
541{
542	int rtval;
543	struct vnode *vp;
544	if (object->flags & OBJ_VNODE_GONE)
545		return VM_PAGER_ERROR;
546	vp = object->handle;
547	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
548	if (rtval == EOPNOTSUPP)
549		return vnode_pager_leaf_getpages(object, m, count, reqpage);
550	else
551		return rtval;
552}
553
554static int
555vnode_pager_leaf_getpages(object, m, count, reqpage)
556	vm_object_t object;
557	vm_page_t *m;
558	int count;
559	int reqpage;
560{
561	vm_offset_t kva;
562	off_t foff;
563	int i, size, bsize, first, firstaddr;
564	struct vnode *dp, *vp;
565	int runpg;
566	int runend;
567	struct buf *bp;
568	int s;
569	int error = 0;
570
571	vp = object->handle;
572	if (vp->v_mount == NULL)
573		return VM_PAGER_BAD;
574
575	bsize = vp->v_mount->mnt_stat.f_iosize;
576
577	/* get the UNDERLYING device for the file with VOP_BMAP() */
578
579	/*
580	 * originally, we did not check for an error return value -- assuming
581	 * an fs always has a bmap entry point -- that assumption is wrong!!!
582	 */
583	foff = IDX_TO_OFF(m[reqpage]->pindex);
584
585	/*
586	 * if we can't bmap, use old VOP code
587	 */
588	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
589		for (i = 0; i < count; i++) {
590			if (i != reqpage) {
591				vnode_pager_freepage(m[i]);
592			}
593		}
594		cnt.v_vnodein++;
595		cnt.v_vnodepgsin++;
596		return vnode_pager_input_old(object, m[reqpage]);
597
598		/*
599		 * if the blocksize is smaller than a page size, then use
600		 * special small filesystem code.  NFS sometimes has a small
601		 * blocksize, but it can handle large reads itself.
602		 */
603	} else if ((PAGE_SIZE / bsize) > 1 &&
604	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
605
606		for (i = 0; i < count; i++) {
607			if (i != reqpage) {
608				vnode_pager_freepage(m[i]);
609			}
610		}
611		cnt.v_vnodein++;
612		cnt.v_vnodepgsin++;
613		return vnode_pager_input_smlfs(object, m[reqpage]);
614	}
615	/*
616	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
617	 * then, the entire page is valid --
618	 * XXX no it isn't
619	 */
620
621	if (m[reqpage]->valid != VM_PAGE_BITS_ALL)
622	    m[reqpage]->valid = 0;
623
624	if (m[reqpage]->valid) {
625		m[reqpage]->valid = VM_PAGE_BITS_ALL;
626		for (i = 0; i < count; i++) {
627			if (i != reqpage)
628				vnode_pager_freepage(m[i]);
629		}
630		return VM_PAGER_OK;
631	}
632
633	/*
634	 * here on direct device I/O
635	 */
636
637	firstaddr = -1;
638	/*
639	 * calculate the run that includes the required page
640	 */
641	for(first = 0, i = 0; i < count; i = runend) {
642		firstaddr = vnode_pager_addr(vp,
643			IDX_TO_OFF(m[i]->pindex), &runpg);
644		if (firstaddr == -1) {
645			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
646				panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
647			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
648			}
649			vnode_pager_freepage(m[i]);
650			runend = i + 1;
651			first = runend;
652			continue;
653		}
654		runend = i + runpg;
655		if (runend <= reqpage) {
656			int j;
657			for (j = i; j < runend; j++) {
658				vnode_pager_freepage(m[j]);
659			}
660		} else {
661			if (runpg < (count - first)) {
662				for (i = first + runpg; i < count; i++)
663					vnode_pager_freepage(m[i]);
664				count = first + runpg;
665			}
666			break;
667		}
668		first = runend;
669	}
670
671	/*
672	 * the first and last page have been calculated now, move input pages
673	 * to be zero based...
674	 */
675	if (first != 0) {
676		for (i = first; i < count; i++) {
677			m[i - first] = m[i];
678		}
679		count -= first;
680		reqpage -= first;
681	}
682
683	/*
684	 * calculate the file virtual address for the transfer
685	 */
686	foff = IDX_TO_OFF(m[0]->pindex);
687
688	/*
689	 * calculate the size of the transfer
690	 */
691	size = count * PAGE_SIZE;
692	if ((foff + size) > object->un_pager.vnp.vnp_size)
693		size = object->un_pager.vnp.vnp_size - foff;
694
695	/*
696	 * round up physical size for real devices
697	 */
698	if (dp->v_type == VBLK || dp->v_type == VCHR)
699		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
700
701	bp = getpbuf();
702	kva = (vm_offset_t) bp->b_data;
703
704	/*
705	 * and map the pages to be read into the kva
706	 */
707	pmap_qenter(kva, m, count);
708
709	/* build a minimal buffer header */
710	bp->b_flags = B_BUSY | B_READ | B_CALL;
711	bp->b_iodone = vnode_pager_iodone;
712	/* B_PHYS is not set, but it is nice to fill this in */
713	bp->b_proc = curproc;
714	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
715	if (bp->b_rcred != NOCRED)
716		crhold(bp->b_rcred);
717	if (bp->b_wcred != NOCRED)
718		crhold(bp->b_wcred);
719	bp->b_blkno = firstaddr;
720	pbgetvp(dp, bp);
721	bp->b_bcount = size;
722	bp->b_bufsize = size;
723
724	cnt.v_vnodein++;
725	cnt.v_vnodepgsin += count;
726
727	/* do the input */
728	VOP_STRATEGY(bp);
729
730	s = splbio();
731	/* we definitely need to be at splbio here */
732
733	while ((bp->b_flags & B_DONE) == 0) {
734		tsleep(bp, PVM, "vnread", 0);
735	}
736	splx(s);
737	if ((bp->b_flags & B_ERROR) != 0)
738		error = EIO;
739
740	if (!error) {
741		if (size != count * PAGE_SIZE)
742			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
743	}
744	pmap_qremove(kva, count);
745
746	/*
747	 * free the buffer header back to the swap buffer pool
748	 */
749	relpbuf(bp);
750
751	for (i = 0; i < count; i++) {
752		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
753		m[i]->dirty = 0;
754		m[i]->valid = VM_PAGE_BITS_ALL;
755		m[i]->flags &= ~PG_ZERO;
756		if (i != reqpage) {
757
758			/*
759			 * whether or not to leave the page activated is up in
760			 * the air, but we should put the page on a page queue
761			 * somewhere. (it already is in the object). Result:
762			 * It appears that emperical results show that
763			 * deactivating pages is best.
764			 */
765
766			/*
767			 * just in case someone was asking for this page we
768			 * now tell them that it is ok to use
769			 */
770			if (!error) {
771				vm_page_deactivate(m[i]);
772				PAGE_WAKEUP(m[i]);
773			} else {
774				vnode_pager_freepage(m[i]);
775			}
776		}
777	}
778	if (error) {
779		printf("vnode_pager_getpages: I/O read error\n");
780	}
781	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
782}
783
784static int
785vnode_pager_putpages(object, m, count, sync, rtvals)
786	vm_object_t object;
787	vm_page_t *m;
788	int count;
789	boolean_t sync;
790	int *rtvals;
791{
792	int rtval;
793	struct vnode *vp;
794
795	if (object->flags & OBJ_VNODE_GONE)
796		return VM_PAGER_ERROR;
797
798	vp = object->handle;
799	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
800	if (rtval == EOPNOTSUPP)
801		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
802	else
803		return rtval;
804}
805
806/*
807 * generic vnode pager output routine
808 */
809static int
810vnode_pager_leaf_putpages(object, m, count, sync, rtvals)
811	vm_object_t object;
812	vm_page_t *m;
813	int count;
814	boolean_t sync;
815	int *rtvals;
816{
817	int i;
818
819	struct vnode *vp;
820	int maxsize, ncount;
821	vm_ooffset_t poffset;
822	struct uio auio;
823	struct iovec aiov;
824	int error;
825
826	vp = object->handle;;
827	for (i = 0; i < count; i++)
828		rtvals[i] = VM_PAGER_AGAIN;
829
830	if ((int) m[0]->pindex < 0) {
831		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty);
832		rtvals[0] = VM_PAGER_BAD;
833		return VM_PAGER_BAD;
834	}
835
836	maxsize = count * PAGE_SIZE;
837	ncount = count;
838
839	poffset = IDX_TO_OFF(m[0]->pindex);
840	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
841		if (object->un_pager.vnp.vnp_size > poffset)
842			maxsize = object->un_pager.vnp.vnp_size - poffset;
843		else
844			maxsize = 0;
845		ncount = btoc(maxsize);
846		if (ncount < count) {
847			for (i = ncount; i < count; i++) {
848				rtvals[i] = VM_PAGER_BAD;
849			}
850#ifdef BOGUS
851			if (ncount == 0) {
852				printf("vnode_pager_putpages: write past end of file: %d, %lu\n",
853					poffset,
854					(unsigned long) object->un_pager.vnp.vnp_size);
855				return rtvals[0];
856			}
857#endif
858		}
859	}
860
861	for (i = 0; i < count; i++) {
862		m[i]->busy++;
863		m[i]->flags &= ~PG_BUSY;
864	}
865
866	aiov.iov_base = (caddr_t) 0;
867	aiov.iov_len = maxsize;
868	auio.uio_iov = &aiov;
869	auio.uio_iovcnt = 1;
870	auio.uio_offset = poffset;
871	auio.uio_segflg = UIO_NOCOPY;
872	auio.uio_rw = UIO_WRITE;
873	auio.uio_resid = maxsize;
874	auio.uio_procp = (struct proc *) 0;
875	error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred);
876	cnt.v_vnodeout++;
877	cnt.v_vnodepgsout += ncount;
878
879	if (error) {
880		printf("vnode_pager_putpages: I/O error %d\n", error);
881	}
882	if (auio.uio_resid) {
883		printf("vnode_pager_putpages: residual I/O %d at %ld\n",
884			auio.uio_resid, m[0]->pindex);
885	}
886	for (i = 0; i < count; i++) {
887		m[i]->busy--;
888		if (i < ncount) {
889			rtvals[i] = VM_PAGER_OK;
890		}
891		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED))
892			wakeup(m[i]);
893	}
894	return rtvals[0];
895}
896
897struct vnode *
898vnode_pager_lock(object)
899	vm_object_t object;
900{
901	struct proc *p = curproc;	/* XXX */
902
903	for (; object != NULL; object = object->backing_object) {
904		if (object->type != OBJT_VNODE)
905			continue;
906
907		vn_lock(object->handle,
908			LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p);
909		return object->handle;
910	}
911	return NULL;
912}
913