vnode_pager.c revision 33109
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 * Copyright (c) 1993, 1994 John S. Dyson
6 * Copyright (c) 1995, David Greenman
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 *    must display the following acknowledgement:
22 *	This product includes software developed by the University of
23 *	California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 *    may be used to endorse or promote products derived from this software
26 *    without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 *	from: @(#)vnode_pager.c	7.5 (Berkeley) 4/20/91
41 *	$Id: vnode_pager.c,v 1.82 1998/02/04 22:34:03 eivind Exp $
42 */
43
44/*
45 * Page to/from files (vnodes).
46 */
47
48/*
49 * TODO:
50 *	Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
51 *	greatly re-simplify the vnode_pager.
52 */
53
54#include "opt_diagnostic.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/proc.h>
59#include <sys/vnode.h>
60#include <sys/mount.h>
61#include <sys/buf.h>
62#include <sys/vmmeter.h>
63
64#include <vm/vm.h>
65#include <vm/vm_prot.h>
66#include <vm/vm_object.h>
67#include <vm/vm_page.h>
68#include <vm/vm_pager.h>
69#include <vm/vm_map.h>
70#include <vm/vnode_pager.h>
71#include <vm/vm_extern.h>
72
73static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
74					 int *run));
75static void vnode_pager_iodone __P((struct buf *bp));
76static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
77static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
78static void vnode_pager_dealloc __P((vm_object_t));
79static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
80static int vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
81static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
82
83struct pagerops vnodepagerops = {
84	NULL,
85	vnode_pager_alloc,
86	vnode_pager_dealloc,
87	vnode_pager_getpages,
88	vnode_pager_putpages,
89	vnode_pager_haspage,
90	NULL
91};
92
93static int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m,
94					  int count, int reqpage));
95static int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m,
96					  int count, boolean_t sync,
97					  int *rtvals));
98
99/*
100 * Allocate (or lookup) pager for a vnode.
101 * Handle is a vnode pointer.
102 */
103vm_object_t
104vnode_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot,
105		  vm_ooffset_t offset)
106{
107	vm_object_t object;
108	struct vnode *vp;
109
110	/*
111	 * Pageout to vnode, no can do yet.
112	 */
113	if (handle == NULL)
114		return (NULL);
115
116	vp = (struct vnode *) handle;
117
118	/*
119	 * Prevent race condition when allocating the object. This
120	 * can happen with NFS vnodes since the nfsnode isn't locked.
121	 */
122	while (vp->v_flag & VOLOCK) {
123		vp->v_flag |= VOWANT;
124		tsleep(vp, PVM, "vnpobj", 0);
125	}
126	vp->v_flag |= VOLOCK;
127
128	/*
129	 * If the object is being terminated, wait for it to
130	 * go away.
131	 */
132	while (((object = vp->v_object) != NULL) &&
133		(object->flags & OBJ_DEAD)) {
134		tsleep(object, PVM, "vadead", 0);
135	}
136
137	if (vp->v_usecount == 0)
138		panic("vnode_pager_alloc: no vnode reference");
139
140	if (object == NULL) {
141		/*
142		 * And an object of the appropriate size
143		 */
144		object = vm_object_allocate(OBJT_VNODE, size);
145		object->flags = 0;
146
147		object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE;
148
149		object->handle = handle;
150		vp->v_object = object;
151		vp->v_usecount++;
152	} else {
153		object->ref_count++;
154		vp->v_usecount++;
155	}
156
157	vp->v_flag &= ~VOLOCK;
158	if (vp->v_flag & VOWANT) {
159		vp->v_flag &= ~VOWANT;
160		wakeup(vp);
161	}
162	return (object);
163}
164
165static void
166vnode_pager_dealloc(object)
167	vm_object_t object;
168{
169	register struct vnode *vp = object->handle;
170
171	if (vp == NULL)
172		panic("vnode_pager_dealloc: pager already dealloced");
173
174	if (object->paging_in_progress) {
175		int s = splbio();
176		while (object->paging_in_progress) {
177			object->flags |= OBJ_PIPWNT;
178			tsleep(object, PVM, "vnpdea", 0);
179		}
180		splx(s);
181	}
182
183	object->handle = NULL;
184	object->type = OBJT_DEAD;
185	vp->v_object = NULL;
186	vp->v_flag &= ~(VTEXT | VOBJBUF);
187}
188
189static boolean_t
190vnode_pager_haspage(object, pindex, before, after)
191	vm_object_t object;
192	vm_pindex_t pindex;
193	int *before;
194	int *after;
195{
196	struct vnode *vp = object->handle;
197	daddr_t bn;
198	int err;
199	daddr_t reqblock;
200	int poff;
201	int bsize;
202	int pagesperblock, blocksperpage;
203
204	if ((vp == NULL) || (vp->v_flag & VDOOMED))
205		return FALSE;
206
207	/*
208	 * If filesystem no longer mounted or offset beyond end of file we do
209	 * not have the page.
210	 */
211	if ((vp->v_mount == NULL) ||
212		(IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
213		return FALSE;
214
215	bsize = vp->v_mount->mnt_stat.f_iosize;
216	pagesperblock = bsize / PAGE_SIZE;
217	blocksperpage = 0;
218	if (pagesperblock > 0) {
219		reqblock = pindex / pagesperblock;
220	} else {
221		blocksperpage = (PAGE_SIZE / bsize);
222		reqblock = pindex * blocksperpage;
223	}
224	err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
225		after, before);
226	if (err)
227		return TRUE;
228	if ( bn == -1)
229		return FALSE;
230	if (pagesperblock > 0) {
231		poff = pindex - (reqblock * pagesperblock);
232		if (before) {
233			*before *= pagesperblock;
234			*before += poff;
235		}
236		if (after) {
237			int numafter;
238			*after *= pagesperblock;
239			numafter = pagesperblock - (poff + 1);
240			if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
241				numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
242			}
243			*after += numafter;
244		}
245	} else {
246		if (before) {
247			*before /= blocksperpage;
248		}
249
250		if (after) {
251			*after /= blocksperpage;
252		}
253	}
254	return TRUE;
255}
256
257/*
258 * Lets the VM system know about a change in size for a file.
259 * We adjust our own internal size and flush any cached pages in
260 * the associated object that are affected by the size change.
261 *
262 * Note: this routine may be invoked as a result of a pager put
263 * operation (possibly at object termination time), so we must be careful.
264 */
265void
266vnode_pager_setsize(vp, nsize)
267	struct vnode *vp;
268	vm_ooffset_t nsize;
269{
270	vm_object_t object = vp->v_object;
271
272	if (object == NULL)
273		return;
274
275	/*
276	 * Hasn't changed size
277	 */
278	if (nsize == object->un_pager.vnp.vnp_size)
279		return;
280
281	/*
282	 * File has shrunk. Toss any cached pages beyond the new EOF.
283	 */
284	if (nsize < object->un_pager.vnp.vnp_size) {
285		vm_ooffset_t nsizerounded;
286		nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_MASK));
287		if (nsizerounded < object->un_pager.vnp.vnp_size) {
288			vm_pindex_t st, end;
289			st = OFF_TO_IDX(nsize + PAGE_MASK);
290			end = OFF_TO_IDX(object->un_pager.vnp.vnp_size);
291
292			vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
293			vm_object_page_remove(object, st, end, FALSE);
294		}
295		/*
296		 * this gets rid of garbage at the end of a page that is now
297		 * only partially backed by the vnode...
298		 */
299		if (nsize & PAGE_MASK) {
300			vm_offset_t kva;
301			vm_page_t m;
302
303			m = vm_page_lookup(object, OFF_TO_IDX(nsize));
304			if (m) {
305				kva = vm_pager_map_page(m);
306				bzero((caddr_t) kva + (nsize & PAGE_MASK),
307				    (int) (round_page(nsize) - nsize));
308				vm_pager_unmap_page(kva);
309			}
310		}
311	}
312	object->un_pager.vnp.vnp_size = nsize;
313	object->size = OFF_TO_IDX(nsize + PAGE_MASK);
314}
315
316void
317vnode_pager_freepage(m)
318	vm_page_t m;
319{
320	vm_page_free(m);
321}
322
323/*
324 * calculate the linear (byte) disk address of specified virtual
325 * file address
326 */
327static vm_offset_t
328vnode_pager_addr(vp, address, run)
329	struct vnode *vp;
330	vm_ooffset_t address;
331	int *run;
332{
333	int rtaddress;
334	int bsize;
335	daddr_t block;
336	struct vnode *rtvp;
337	int err;
338	daddr_t vblock;
339	int voffset;
340
341	if ((int) address < 0)
342		return -1;
343
344	if (vp->v_mount == NULL)
345		return -1;
346
347	bsize = vp->v_mount->mnt_stat.f_iosize;
348	vblock = address / bsize;
349	voffset = address % bsize;
350
351	err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
352
353	if (err || (block == -1))
354		rtaddress = -1;
355	else {
356		rtaddress = block + voffset / DEV_BSIZE;
357		if( run) {
358			*run += 1;
359			*run *= bsize/PAGE_SIZE;
360			*run -= voffset/PAGE_SIZE;
361		}
362	}
363
364	return rtaddress;
365}
366
367/*
368 * interrupt routine for I/O completion
369 */
370static void
371vnode_pager_iodone(bp)
372	struct buf *bp;
373{
374	bp->b_flags |= B_DONE;
375	wakeup(bp);
376}
377
378/*
379 * small block file system vnode pager input
380 */
381static int
382vnode_pager_input_smlfs(object, m)
383	vm_object_t object;
384	vm_page_t m;
385{
386	int i;
387	int s;
388	struct vnode *dp, *vp;
389	struct buf *bp;
390	vm_offset_t kva;
391	int fileaddr;
392	vm_offset_t bsize;
393	int error = 0;
394
395	vp = object->handle;
396	if (vp->v_mount == NULL)
397		return VM_PAGER_BAD;
398
399	bsize = vp->v_mount->mnt_stat.f_iosize;
400
401
402	VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
403
404	kva = vm_pager_map_page(m);
405
406	for (i = 0; i < PAGE_SIZE / bsize; i++) {
407
408		if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid))
409			continue;
410
411		fileaddr = vnode_pager_addr(vp,
412			IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
413		if (fileaddr != -1) {
414			bp = getpbuf();
415
416			/* build a minimal buffer header */
417			bp->b_flags = B_BUSY | B_READ | B_CALL;
418			bp->b_iodone = vnode_pager_iodone;
419			bp->b_proc = curproc;
420			bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
421			if (bp->b_rcred != NOCRED)
422				crhold(bp->b_rcred);
423			if (bp->b_wcred != NOCRED)
424				crhold(bp->b_wcred);
425			bp->b_data = (caddr_t) kva + i * bsize;
426			bp->b_blkno = fileaddr;
427			pbgetvp(dp, bp);
428			bp->b_bcount = bsize;
429			bp->b_bufsize = bsize;
430
431			/* do the input */
432			VOP_STRATEGY(bp);
433
434			/* we definitely need to be at splbio here */
435
436			s = splbio();
437			while ((bp->b_flags & B_DONE) == 0) {
438				tsleep(bp, PVM, "vnsrd", 0);
439			}
440			splx(s);
441			if ((bp->b_flags & B_ERROR) != 0)
442				error = EIO;
443
444			/*
445			 * free the buffer header back to the swap buffer pool
446			 */
447			relpbuf(bp);
448			if (error)
449				break;
450
451			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
452		} else {
453			vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
454			bzero((caddr_t) kva + i * bsize, bsize);
455		}
456	}
457	vm_pager_unmap_page(kva);
458	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
459	m->flags &= ~PG_ZERO;
460	if (error) {
461		return VM_PAGER_ERROR;
462	}
463	return VM_PAGER_OK;
464
465}
466
467
468/*
469 * old style vnode pager output routine
470 */
471static int
472vnode_pager_input_old(object, m)
473	vm_object_t object;
474	vm_page_t m;
475{
476	struct uio auio;
477	struct iovec aiov;
478	int error;
479	int size;
480	vm_offset_t kva;
481
482	error = 0;
483
484	/*
485	 * Return failure if beyond current EOF
486	 */
487	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
488		return VM_PAGER_BAD;
489	} else {
490		size = PAGE_SIZE;
491		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
492			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
493
494		/*
495		 * Allocate a kernel virtual address and initialize so that
496		 * we can use VOP_READ/WRITE routines.
497		 */
498		kva = vm_pager_map_page(m);
499
500		aiov.iov_base = (caddr_t) kva;
501		aiov.iov_len = size;
502		auio.uio_iov = &aiov;
503		auio.uio_iovcnt = 1;
504		auio.uio_offset = IDX_TO_OFF(m->pindex);
505		auio.uio_segflg = UIO_SYSSPACE;
506		auio.uio_rw = UIO_READ;
507		auio.uio_resid = size;
508		auio.uio_procp = (struct proc *) 0;
509
510		error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
511		if (!error) {
512			register int count = size - auio.uio_resid;
513
514			if (count == 0)
515				error = EINVAL;
516			else if (count != PAGE_SIZE)
517				bzero((caddr_t) kva + count, PAGE_SIZE - count);
518		}
519		vm_pager_unmap_page(kva);
520	}
521	pmap_clear_modify(VM_PAGE_TO_PHYS(m));
522	m->dirty = 0;
523	m->flags &= ~PG_ZERO;
524	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
525}
526
527/*
528 * generic vnode pager input routine
529 */
530
531static int
532vnode_pager_getpages(object, m, count, reqpage)
533	vm_object_t object;
534	vm_page_t *m;
535	int count;
536	int reqpage;
537{
538	int rtval;
539	struct vnode *vp;
540
541	vp = object->handle;
542	rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0);
543	if (rtval == EOPNOTSUPP)
544		return vnode_pager_leaf_getpages(object, m, count, reqpage);
545	else
546		return rtval;
547}
548
549static int
550vnode_pager_leaf_getpages(object, m, count, reqpage)
551	vm_object_t object;
552	vm_page_t *m;
553	int count;
554	int reqpage;
555{
556	vm_offset_t kva;
557	off_t foff;
558	int i, size, bsize, first, firstaddr;
559	struct vnode *dp, *vp;
560	int runpg;
561	int runend;
562	struct buf *bp;
563	int s;
564	int error = 0;
565
566	vp = object->handle;
567	if (vp->v_mount == NULL)
568		return VM_PAGER_BAD;
569
570	bsize = vp->v_mount->mnt_stat.f_iosize;
571
572	/* get the UNDERLYING device for the file with VOP_BMAP() */
573
574	/*
575	 * originally, we did not check for an error return value -- assuming
576	 * an fs always has a bmap entry point -- that assumption is wrong!!!
577	 */
578	foff = IDX_TO_OFF(m[reqpage]->pindex);
579
580	/*
581	 * if we can't bmap, use old VOP code
582	 */
583	if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
584		for (i = 0; i < count; i++) {
585			if (i != reqpage) {
586				vnode_pager_freepage(m[i]);
587			}
588		}
589		cnt.v_vnodein++;
590		cnt.v_vnodepgsin++;
591		return vnode_pager_input_old(object, m[reqpage]);
592
593		/*
594		 * if the blocksize is smaller than a page size, then use
595		 * special small filesystem code.  NFS sometimes has a small
596		 * blocksize, but it can handle large reads itself.
597		 */
598	} else if ((PAGE_SIZE / bsize) > 1 &&
599	    (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
600
601		for (i = 0; i < count; i++) {
602			if (i != reqpage) {
603				vnode_pager_freepage(m[i]);
604			}
605		}
606		cnt.v_vnodein++;
607		cnt.v_vnodepgsin++;
608		return vnode_pager_input_smlfs(object, m[reqpage]);
609	}
610	/*
611	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
612	 * then, the entire page is valid --
613	 * XXX no it isn't
614	 */
615
616	if (m[reqpage]->valid != VM_PAGE_BITS_ALL)
617	    m[reqpage]->valid = 0;
618
619	if (m[reqpage]->valid) {
620		m[reqpage]->valid = VM_PAGE_BITS_ALL;
621		for (i = 0; i < count; i++) {
622			if (i != reqpage)
623				vnode_pager_freepage(m[i]);
624		}
625		return VM_PAGER_OK;
626	}
627
628	/*
629	 * here on direct device I/O
630	 */
631
632	firstaddr = -1;
633	/*
634	 * calculate the run that includes the required page
635	 */
636	for(first = 0, i = 0; i < count; i = runend) {
637		firstaddr = vnode_pager_addr(vp,
638			IDX_TO_OFF(m[i]->pindex), &runpg);
639		if (firstaddr == -1) {
640			if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
641				panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d",
642			   	 firstaddr, foff, object->un_pager.vnp.vnp_size);
643			}
644			vnode_pager_freepage(m[i]);
645			runend = i + 1;
646			first = runend;
647			continue;
648		}
649		runend = i + runpg;
650		if (runend <= reqpage) {
651			int j;
652			for (j = i; j < runend; j++) {
653				vnode_pager_freepage(m[j]);
654			}
655		} else {
656			if (runpg < (count - first)) {
657				for (i = first + runpg; i < count; i++)
658					vnode_pager_freepage(m[i]);
659				count = first + runpg;
660			}
661			break;
662		}
663		first = runend;
664	}
665
666	/*
667	 * the first and last page have been calculated now, move input pages
668	 * to be zero based...
669	 */
670	if (first != 0) {
671		for (i = first; i < count; i++) {
672			m[i - first] = m[i];
673		}
674		count -= first;
675		reqpage -= first;
676	}
677
678	/*
679	 * calculate the file virtual address for the transfer
680	 */
681	foff = IDX_TO_OFF(m[0]->pindex);
682
683	/*
684	 * calculate the size of the transfer
685	 */
686	size = count * PAGE_SIZE;
687	if ((foff + size) > object->un_pager.vnp.vnp_size)
688		size = object->un_pager.vnp.vnp_size - foff;
689
690	/*
691	 * round up physical size for real devices
692	 */
693	if (dp->v_type == VBLK || dp->v_type == VCHR)
694		size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
695
696	bp = getpbuf();
697	kva = (vm_offset_t) bp->b_data;
698
699	/*
700	 * and map the pages to be read into the kva
701	 */
702	pmap_qenter(kva, m, count);
703
704	/* build a minimal buffer header */
705	bp->b_flags = B_BUSY | B_READ | B_CALL;
706	bp->b_iodone = vnode_pager_iodone;
707	/* B_PHYS is not set, but it is nice to fill this in */
708	bp->b_proc = curproc;
709	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
710	if (bp->b_rcred != NOCRED)
711		crhold(bp->b_rcred);
712	if (bp->b_wcred != NOCRED)
713		crhold(bp->b_wcred);
714	bp->b_blkno = firstaddr;
715	pbgetvp(dp, bp);
716	bp->b_bcount = size;
717	bp->b_bufsize = size;
718
719	cnt.v_vnodein++;
720	cnt.v_vnodepgsin += count;
721
722	/* do the input */
723	VOP_STRATEGY(bp);
724
725	s = splbio();
726	/* we definitely need to be at splbio here */
727
728	while ((bp->b_flags & B_DONE) == 0) {
729		tsleep(bp, PVM, "vnread", 0);
730	}
731	splx(s);
732	if ((bp->b_flags & B_ERROR) != 0)
733		error = EIO;
734
735	if (!error) {
736		if (size != count * PAGE_SIZE)
737			bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
738	}
739	pmap_qremove(kva, count);
740
741	/*
742	 * free the buffer header back to the swap buffer pool
743	 */
744	relpbuf(bp);
745
746	for (i = 0; i < count; i++) {
747		pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
748		m[i]->dirty = 0;
749		m[i]->valid = VM_PAGE_BITS_ALL;
750		m[i]->flags &= ~PG_ZERO;
751		if (i != reqpage) {
752
753			/*
754			 * whether or not to leave the page activated is up in
755			 * the air, but we should put the page on a page queue
756			 * somewhere. (it already is in the object). Result:
757			 * It appears that emperical results show that
758			 * deactivating pages is best.
759			 */
760
761			/*
762			 * just in case someone was asking for this page we
763			 * now tell them that it is ok to use
764			 */
765			if (!error) {
766				if (m[i]->flags & PG_WANTED)
767					vm_page_activate(m[i]);
768				else
769					vm_page_deactivate(m[i]);
770				PAGE_WAKEUP(m[i]);
771			} else {
772				vnode_pager_freepage(m[i]);
773			}
774		}
775	}
776	if (error) {
777		printf("vnode_pager_getpages: I/O read error\n");
778	}
779	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
780}
781
782static int
783vnode_pager_putpages(object, m, count, sync, rtvals)
784	vm_object_t object;
785	vm_page_t *m;
786	int count;
787	boolean_t sync;
788	int *rtvals;
789{
790	int rtval;
791	struct vnode *vp;
792
793	vp = object->handle;
794	rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0);
795	if (rtval == EOPNOTSUPP)
796		return vnode_pager_leaf_putpages(object, m, count, sync, rtvals);
797	else
798		return rtval;
799}
800
801/*
802 * generic vnode pager output routine
803 */
804static int
805vnode_pager_leaf_putpages(object, m, count, sync, rtvals)
806	vm_object_t object;
807	vm_page_t *m;
808	int count;
809	boolean_t sync;
810	int *rtvals;
811{
812	int i;
813
814	struct vnode *vp;
815	int maxsize, ncount;
816	vm_ooffset_t poffset;
817	struct uio auio;
818	struct iovec aiov;
819	int error;
820
821	vp = object->handle;;
822	for (i = 0; i < count; i++)
823		rtvals[i] = VM_PAGER_AGAIN;
824
825	if ((int) m[0]->pindex < 0) {
826		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty);
827		rtvals[0] = VM_PAGER_BAD;
828		return VM_PAGER_BAD;
829	}
830
831	maxsize = count * PAGE_SIZE;
832	ncount = count;
833
834	poffset = IDX_TO_OFF(m[0]->pindex);
835	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
836		if (object->un_pager.vnp.vnp_size > poffset)
837			maxsize = object->un_pager.vnp.vnp_size - poffset;
838		else
839			maxsize = 0;
840		ncount = btoc(maxsize);
841		if (ncount < count) {
842			for (i = ncount; i < count; i++) {
843				rtvals[i] = VM_PAGER_BAD;
844			}
845#ifdef BOGUS
846			if (ncount == 0) {
847				printf("vnode_pager_putpages: write past end of file: %d, %lu\n",
848					poffset,
849					(unsigned long) object->un_pager.vnp.vnp_size);
850				return rtvals[0];
851			}
852#endif
853		}
854	}
855
856	for (i = 0; i < count; i++) {
857		m[i]->busy++;
858		m[i]->flags &= ~PG_BUSY;
859	}
860
861	aiov.iov_base = (caddr_t) 0;
862	aiov.iov_len = maxsize;
863	auio.uio_iov = &aiov;
864	auio.uio_iovcnt = 1;
865	auio.uio_offset = poffset;
866	auio.uio_segflg = UIO_NOCOPY;
867	auio.uio_rw = UIO_WRITE;
868	auio.uio_resid = maxsize;
869	auio.uio_procp = (struct proc *) 0;
870	error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred);
871	cnt.v_vnodeout++;
872	cnt.v_vnodepgsout += ncount;
873
874	if (error) {
875		printf("vnode_pager_putpages: I/O error %d\n", error);
876	}
877	if (auio.uio_resid) {
878		printf("vnode_pager_putpages: residual I/O %d at %ld\n",
879			auio.uio_resid, m[0]->pindex);
880	}
881	for (i = 0; i < count; i++) {
882		m[i]->busy--;
883		if (i < ncount) {
884			rtvals[i] = VM_PAGER_OK;
885		}
886		if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) {
887			vm_page_activate(m[i]);
888			wakeup(m[i]);
889		}
890	}
891	return rtvals[0];
892}
893
894struct vnode *
895vnode_pager_lock(object)
896	vm_object_t object;
897{
898	struct proc *p = curproc;	/* XXX */
899
900	for (; object != NULL; object = object->backing_object) {
901		if (object->type != OBJT_VNODE)
902			continue;
903		if (object->flags & OBJ_DEAD)
904			return NULL;
905
906		while (vget(object->handle,
907			LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p)) {
908			printf("vnode_pager_lock: retrying\n");
909		}
910		return object->handle;
911	}
912	return NULL;
913}
914