Deleted Added
full compact
vnode_pager.c (58934) vnode_pager.c (60041)
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 * Copyright (c) 1993, 1994 John S. Dyson
6 * Copyright (c) 1995, David Greenman
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
1/*
2 * Copyright (c) 1990 University of Utah.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 * Copyright (c) 1993, 1994 John S. Dyson
6 * Copyright (c) 1995, David Greenman
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
41 * $FreeBSD: head/sys/vm/vnode_pager.c 58934 2000-04-02 15:24:56Z phk $
41 * $FreeBSD: head/sys/vm/vnode_pager.c 60041 2000-05-05 09:59:14Z phk $
42 */
43
44/*
45 * Page to/from files (vnodes).
46 */
47
48/*
49 * TODO:
50 * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
51 * greatly re-simplify the vnode_pager.
52 */
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/proc.h>
57#include <sys/vnode.h>
58#include <sys/mount.h>
42 */
43
44/*
45 * Page to/from files (vnodes).
46 */
47
48/*
49 * TODO:
50 * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will
51 * greatly re-simplify the vnode_pager.
52 */
53
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/proc.h>
57#include <sys/vnode.h>
58#include <sys/mount.h>
59#include <sys/bio.h>
59#include <sys/buf.h>
60#include <sys/vmmeter.h>
61#include <sys/conf.h>
62
63#include <vm/vm.h>
64#include <vm/vm_object.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vm_map.h>
68#include <vm/vnode_pager.h>
69#include <vm/vm_extern.h>
70
71static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
72 int *run));
73static void vnode_pager_iodone __P((struct buf *bp));
74static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
75static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
76static void vnode_pager_dealloc __P((vm_object_t));
77static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
78static void vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
79static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
80
81struct pagerops vnodepagerops = {
82 NULL,
83 vnode_pager_alloc,
84 vnode_pager_dealloc,
85 vnode_pager_getpages,
86 vnode_pager_putpages,
87 vnode_pager_haspage,
88 NULL
89};
90
91int vnode_pbuf_freecnt = -1; /* start out unlimited */
92
93
94/*
95 * Allocate (or lookup) pager for a vnode.
96 * Handle is a vnode pointer.
97 */
98vm_object_t
99vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
100 vm_ooffset_t offset)
101{
102 vm_object_t object;
103 struct vnode *vp;
104
105 /*
106 * Pageout to vnode, no can do yet.
107 */
108 if (handle == NULL)
109 return (NULL);
110
111 /*
112 * XXX hack - This initialization should be put somewhere else.
113 */
114 if (vnode_pbuf_freecnt < 0) {
115 vnode_pbuf_freecnt = nswbuf / 2 + 1;
116 }
117
118 vp = (struct vnode *) handle;
119
120 /*
121 * Prevent race condition when allocating the object. This
122 * can happen with NFS vnodes since the nfsnode isn't locked.
123 */
124 while (vp->v_flag & VOLOCK) {
125 vp->v_flag |= VOWANT;
126 tsleep(vp, PVM, "vnpobj", 0);
127 }
128 vp->v_flag |= VOLOCK;
129
130 /*
131 * If the object is being terminated, wait for it to
132 * go away.
133 */
134 while (((object = vp->v_object) != NULL) &&
135 (object->flags & OBJ_DEAD)) {
136 tsleep(object, PVM, "vadead", 0);
137 }
138
139 if (vp->v_usecount == 0)
140 panic("vnode_pager_alloc: no vnode reference");
141
142 if (object == NULL) {
143 /*
144 * And an object of the appropriate size
145 */
146 object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
147 object->flags = 0;
148
149 object->un_pager.vnp.vnp_size = size;
150
151 object->handle = handle;
152 vp->v_object = object;
153 vp->v_usecount++;
154 } else {
155 object->ref_count++;
156 vp->v_usecount++;
157 }
158
159 vp->v_flag &= ~VOLOCK;
160 if (vp->v_flag & VOWANT) {
161 vp->v_flag &= ~VOWANT;
162 wakeup(vp);
163 }
164 return (object);
165}
166
167static void
168vnode_pager_dealloc(object)
169 vm_object_t object;
170{
171 register struct vnode *vp = object->handle;
172
173 if (vp == NULL)
174 panic("vnode_pager_dealloc: pager already dealloced");
175
176 vm_object_pip_wait(object, "vnpdea");
177
178 object->handle = NULL;
179 object->type = OBJT_DEAD;
180 vp->v_object = NULL;
181 vp->v_flag &= ~(VTEXT | VOBJBUF);
182}
183
184static boolean_t
185vnode_pager_haspage(object, pindex, before, after)
186 vm_object_t object;
187 vm_pindex_t pindex;
188 int *before;
189 int *after;
190{
191 struct vnode *vp = object->handle;
192 daddr_t bn;
193 int err;
194 daddr_t reqblock;
195 int poff;
196 int bsize;
197 int pagesperblock, blocksperpage;
198
199 /*
200 * If no vp or vp is doomed or marked transparent to VM, we do not
201 * have the page.
202 */
203 if ((vp == NULL) || (vp->v_flag & VDOOMED))
204 return FALSE;
205
206 /*
207 * If filesystem no longer mounted or offset beyond end of file we do
208 * not have the page.
209 */
210 if ((vp->v_mount == NULL) ||
211 (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
212 return FALSE;
213
214 bsize = vp->v_mount->mnt_stat.f_iosize;
215 pagesperblock = bsize / PAGE_SIZE;
216 blocksperpage = 0;
217 if (pagesperblock > 0) {
218 reqblock = pindex / pagesperblock;
219 } else {
220 blocksperpage = (PAGE_SIZE / bsize);
221 reqblock = pindex * blocksperpage;
222 }
223 err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
224 after, before);
225 if (err)
226 return TRUE;
227 if ( bn == -1)
228 return FALSE;
229 if (pagesperblock > 0) {
230 poff = pindex - (reqblock * pagesperblock);
231 if (before) {
232 *before *= pagesperblock;
233 *before += poff;
234 }
235 if (after) {
236 int numafter;
237 *after *= pagesperblock;
238 numafter = pagesperblock - (poff + 1);
239 if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
240 numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
241 }
242 *after += numafter;
243 }
244 } else {
245 if (before) {
246 *before /= blocksperpage;
247 }
248
249 if (after) {
250 *after /= blocksperpage;
251 }
252 }
253 return TRUE;
254}
255
256/*
257 * Lets the VM system know about a change in size for a file.
258 * We adjust our own internal size and flush any cached pages in
259 * the associated object that are affected by the size change.
260 *
261 * Note: this routine may be invoked as a result of a pager put
262 * operation (possibly at object termination time), so we must be careful.
263 */
264void
265vnode_pager_setsize(vp, nsize)
266 struct vnode *vp;
267 vm_ooffset_t nsize;
268{
269 vm_pindex_t nobjsize;
270 vm_object_t object = vp->v_object;
271
272 if (object == NULL)
273 return;
274
275 /*
276 * Hasn't changed size
277 */
278 if (nsize == object->un_pager.vnp.vnp_size)
279 return;
280
281 nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
282
283 /*
284 * File has shrunk. Toss any cached pages beyond the new EOF.
285 */
286 if (nsize < object->un_pager.vnp.vnp_size) {
287 vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
288 if (nobjsize < object->size) {
289 vm_object_page_remove(object, nobjsize, object->size,
290 FALSE);
291 }
292 /*
293 * this gets rid of garbage at the end of a page that is now
294 * only partially backed by the vnode...
295 */
296 if (nsize & PAGE_MASK) {
297 vm_offset_t kva;
298 vm_page_t m;
299
300 m = vm_page_lookup(object, OFF_TO_IDX(nsize));
301 if (m) {
302 kva = vm_pager_map_page(m);
303 bzero((caddr_t) kva + (nsize & PAGE_MASK),
304 (int) (round_page(nsize) - nsize));
305 vm_pager_unmap_page(kva);
306 }
307 }
308 }
309 object->un_pager.vnp.vnp_size = nsize;
310 object->size = nobjsize;
311}
312
313void
314vnode_pager_freepage(m)
315 vm_page_t m;
316{
317 vm_page_free(m);
318}
319
320/*
321 * calculate the linear (byte) disk address of specified virtual
322 * file address
323 */
324static vm_offset_t
325vnode_pager_addr(vp, address, run)
326 struct vnode *vp;
327 vm_ooffset_t address;
328 int *run;
329{
330 int rtaddress;
331 int bsize;
332 daddr_t block;
333 struct vnode *rtvp;
334 int err;
335 daddr_t vblock;
336 int voffset;
337
338 if ((int) address < 0)
339 return -1;
340
341 if (vp->v_mount == NULL)
342 return -1;
343
344 bsize = vp->v_mount->mnt_stat.f_iosize;
345 vblock = address / bsize;
346 voffset = address % bsize;
347
348 err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
349
350 if (err || (block == -1))
351 rtaddress = -1;
352 else {
353 rtaddress = block + voffset / DEV_BSIZE;
354 if( run) {
355 *run += 1;
356 *run *= bsize/PAGE_SIZE;
357 *run -= voffset/PAGE_SIZE;
358 }
359 }
360
361 return rtaddress;
362}
363
364/*
365 * interrupt routine for I/O completion
366 */
367static void
368vnode_pager_iodone(bp)
369 struct buf *bp;
370{
371 bp->b_flags |= B_DONE;
372 wakeup(bp);
373}
374
375/*
376 * small block file system vnode pager input
377 */
378static int
379vnode_pager_input_smlfs(object, m)
380 vm_object_t object;
381 vm_page_t m;
382{
383 int i;
384 int s;
385 struct vnode *dp, *vp;
386 struct buf *bp;
387 vm_offset_t kva;
388 int fileaddr;
389 vm_offset_t bsize;
390 int error = 0;
391
392 vp = object->handle;
393 if (vp->v_mount == NULL)
394 return VM_PAGER_BAD;
395
396 bsize = vp->v_mount->mnt_stat.f_iosize;
397
398
399 VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
400
401 kva = vm_pager_map_page(m);
402
403 for (i = 0; i < PAGE_SIZE / bsize; i++) {
404
405 if (vm_page_bits(i * bsize, bsize) & m->valid)
406 continue;
407
408 fileaddr = vnode_pager_addr(vp,
409 IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
410 if (fileaddr != -1) {
411 bp = getpbuf(&vnode_pbuf_freecnt);
412
413 /* build a minimal buffer header */
414 bp->b_iocmd = BIO_READ;
415 bp->b_iodone = vnode_pager_iodone;
416 bp->b_rcred = bp->b_wcred = curproc->p_ucred;
417 if (bp->b_rcred != NOCRED)
418 crhold(bp->b_rcred);
419 if (bp->b_wcred != NOCRED)
420 crhold(bp->b_wcred);
421 bp->b_data = (caddr_t) kva + i * bsize;
422 bp->b_blkno = fileaddr;
423 pbgetvp(dp, bp);
424 bp->b_bcount = bsize;
425 bp->b_bufsize = bsize;
426
427 /* do the input */
428 BUF_STRATEGY(bp);
429
430 /* we definitely need to be at splvm here */
431
432 s = splvm();
433 while ((bp->b_flags & B_DONE) == 0) {
434 tsleep(bp, PVM, "vnsrd", 0);
435 }
436 splx(s);
437 if ((bp->b_ioflags & BIO_ERROR) != 0)
438 error = EIO;
439
440 /*
441 * free the buffer header back to the swap buffer pool
442 */
443 relpbuf(bp, &vnode_pbuf_freecnt);
444 if (error)
445 break;
446
447 vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
448 } else {
449 vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
450 bzero((caddr_t) kva + i * bsize, bsize);
451 }
452 }
453 vm_pager_unmap_page(kva);
454 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
455 vm_page_flag_clear(m, PG_ZERO);
456 if (error) {
457 return VM_PAGER_ERROR;
458 }
459 return VM_PAGER_OK;
460
461}
462
463
464/*
465 * old style vnode pager output routine
466 */
467static int
468vnode_pager_input_old(object, m)
469 vm_object_t object;
470 vm_page_t m;
471{
472 struct uio auio;
473 struct iovec aiov;
474 int error;
475 int size;
476 vm_offset_t kva;
477
478 error = 0;
479
480 /*
481 * Return failure if beyond current EOF
482 */
483 if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
484 return VM_PAGER_BAD;
485 } else {
486 size = PAGE_SIZE;
487 if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
488 size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
489
490 /*
491 * Allocate a kernel virtual address and initialize so that
492 * we can use VOP_READ/WRITE routines.
493 */
494 kva = vm_pager_map_page(m);
495
496 aiov.iov_base = (caddr_t) kva;
497 aiov.iov_len = size;
498 auio.uio_iov = &aiov;
499 auio.uio_iovcnt = 1;
500 auio.uio_offset = IDX_TO_OFF(m->pindex);
501 auio.uio_segflg = UIO_SYSSPACE;
502 auio.uio_rw = UIO_READ;
503 auio.uio_resid = size;
504 auio.uio_procp = curproc;
505
506 error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
507 if (!error) {
508 register int count = size - auio.uio_resid;
509
510 if (count == 0)
511 error = EINVAL;
512 else if (count != PAGE_SIZE)
513 bzero((caddr_t) kva + count, PAGE_SIZE - count);
514 }
515 vm_pager_unmap_page(kva);
516 }
517 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
518 vm_page_undirty(m);
519 vm_page_flag_clear(m, PG_ZERO);
520 if (!error)
521 m->valid = VM_PAGE_BITS_ALL;
522 return error ? VM_PAGER_ERROR : VM_PAGER_OK;
523}
524
525/*
526 * generic vnode pager input routine
527 */
528
529/*
530 * EOPNOTSUPP is no longer legal. For local media VFS's that do not
531 * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to
532 * vnode_pager_generic_getpages() to implement the previous behaviour.
533 *
534 * All other FS's should use the bypass to get to the local media
535 * backing vp's VOP_GETPAGES.
536 */
537static int
538vnode_pager_getpages(object, m, count, reqpage)
539 vm_object_t object;
540 vm_page_t *m;
541 int count;
542 int reqpage;
543{
544 int rtval;
545 struct vnode *vp;
546 int bytes = count * PAGE_SIZE;
547
548 vp = object->handle;
549 /*
550 * XXX temporary diagnostic message to help track stale FS code,
551 * Returning EOPNOTSUPP from here may make things unhappy.
552 */
553 rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
554 if (rtval == EOPNOTSUPP) {
555 printf("vnode_pager: *** WARNING *** stale FS getpages\n");
556 rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage);
557 }
558 return rtval;
559}
560
561
562/*
563 * This is now called from local media FS's to operate against their
564 * own vnodes if they fail to implement VOP_GETPAGES.
565 */
566int
567vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
568 struct vnode *vp;
569 vm_page_t *m;
570 int bytecount;
571 int reqpage;
572{
573 vm_object_t object;
574 vm_offset_t kva;
575 off_t foff, tfoff, nextoff;
576 int i, size, bsize, first, firstaddr;
577 struct vnode *dp;
578 int runpg;
579 int runend;
580 struct buf *bp;
581 int s;
582 int count;
583 int error = 0;
584
585 object = vp->v_object;
586 count = bytecount / PAGE_SIZE;
587
588 if (vp->v_mount == NULL)
589 return VM_PAGER_BAD;
590
591 bsize = vp->v_mount->mnt_stat.f_iosize;
592
593 /* get the UNDERLYING device for the file with VOP_BMAP() */
594
595 /*
596 * originally, we did not check for an error return value -- assuming
597 * an fs always has a bmap entry point -- that assumption is wrong!!!
598 */
599 foff = IDX_TO_OFF(m[reqpage]->pindex);
600
601 /*
602 * if we can't bmap, use old VOP code
603 */
604 if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
605 for (i = 0; i < count; i++) {
606 if (i != reqpage) {
607 vnode_pager_freepage(m[i]);
608 }
609 }
610 cnt.v_vnodein++;
611 cnt.v_vnodepgsin++;
612 return vnode_pager_input_old(object, m[reqpage]);
613
614 /*
615 * if the blocksize is smaller than a page size, then use
616 * special small filesystem code. NFS sometimes has a small
617 * blocksize, but it can handle large reads itself.
618 */
619 } else if ((PAGE_SIZE / bsize) > 1 &&
620 (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
621 for (i = 0; i < count; i++) {
622 if (i != reqpage) {
623 vnode_pager_freepage(m[i]);
624 }
625 }
626 cnt.v_vnodein++;
627 cnt.v_vnodepgsin++;
628 return vnode_pager_input_smlfs(object, m[reqpage]);
629 }
630
631 /*
632 * If we have a completely valid page available to us, we can
633 * clean up and return. Otherwise we have to re-read the
634 * media.
635 */
636
637 if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
638 for (i = 0; i < count; i++) {
639 if (i != reqpage)
640 vnode_pager_freepage(m[i]);
641 }
642 return VM_PAGER_OK;
643 }
644 m[reqpage]->valid = 0;
645
646 /*
647 * here on direct device I/O
648 */
649
650 firstaddr = -1;
651 /*
652 * calculate the run that includes the required page
653 */
654 for(first = 0, i = 0; i < count; i = runend) {
655 firstaddr = vnode_pager_addr(vp,
656 IDX_TO_OFF(m[i]->pindex), &runpg);
657 if (firstaddr == -1) {
658 if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
659 /* XXX no %qd in kernel. */
660 panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: 0x%lx%08lx, vnp_size: 0x%lx%08lx",
661 firstaddr, (u_long)(foff >> 32),
662 (u_long)(u_int32_t)foff,
663 (u_long)(u_int32_t)
664 (object->un_pager.vnp.vnp_size >> 32),
665 (u_long)(u_int32_t)
666 object->un_pager.vnp.vnp_size);
667 }
668 vnode_pager_freepage(m[i]);
669 runend = i + 1;
670 first = runend;
671 continue;
672 }
673 runend = i + runpg;
674 if (runend <= reqpage) {
675 int j;
676 for (j = i; j < runend; j++) {
677 vnode_pager_freepage(m[j]);
678 }
679 } else {
680 if (runpg < (count - first)) {
681 for (i = first + runpg; i < count; i++)
682 vnode_pager_freepage(m[i]);
683 count = first + runpg;
684 }
685 break;
686 }
687 first = runend;
688 }
689
690 /*
691 * the first and last page have been calculated now, move input pages
692 * to be zero based...
693 */
694 if (first != 0) {
695 for (i = first; i < count; i++) {
696 m[i - first] = m[i];
697 }
698 count -= first;
699 reqpage -= first;
700 }
701
702 /*
703 * calculate the file virtual address for the transfer
704 */
705 foff = IDX_TO_OFF(m[0]->pindex);
706
707 /*
708 * calculate the size of the transfer
709 */
710 size = count * PAGE_SIZE;
711 if ((foff + size) > object->un_pager.vnp.vnp_size)
712 size = object->un_pager.vnp.vnp_size - foff;
713
714 /*
715 * round up physical size for real devices.
716 */
717 if (dp->v_type == VBLK || dp->v_type == VCHR) {
718 int secmask = dp->v_rdev->si_bsize_phys - 1;
719 KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large\n", secmask + 1));
720 size = (size + secmask) & ~secmask;
721 }
722
723 bp = getpbuf(&vnode_pbuf_freecnt);
724 kva = (vm_offset_t) bp->b_data;
725
726 /*
727 * and map the pages to be read into the kva
728 */
729 pmap_qenter(kva, m, count);
730
731 /* build a minimal buffer header */
732 bp->b_iocmd = BIO_READ;
733 bp->b_iodone = vnode_pager_iodone;
734 /* B_PHYS is not set, but it is nice to fill this in */
735 bp->b_rcred = bp->b_wcred = curproc->p_ucred;
736 if (bp->b_rcred != NOCRED)
737 crhold(bp->b_rcred);
738 if (bp->b_wcred != NOCRED)
739 crhold(bp->b_wcred);
740 bp->b_blkno = firstaddr;
741 pbgetvp(dp, bp);
742 bp->b_bcount = size;
743 bp->b_bufsize = size;
744
745 cnt.v_vnodein++;
746 cnt.v_vnodepgsin += count;
747
748 /* do the input */
749 BUF_STRATEGY(bp);
750
751 s = splvm();
752 /* we definitely need to be at splvm here */
753
754 while ((bp->b_flags & B_DONE) == 0) {
755 tsleep(bp, PVM, "vnread", 0);
756 }
757 splx(s);
758 if ((bp->b_ioflags & BIO_ERROR) != 0)
759 error = EIO;
760
761 if (!error) {
762 if (size != count * PAGE_SIZE)
763 bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
764 }
765 pmap_qremove(kva, count);
766
767 /*
768 * free the buffer header back to the swap buffer pool
769 */
770 relpbuf(bp, &vnode_pbuf_freecnt);
771
772 for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
773 vm_page_t mt;
774
775 nextoff = tfoff + PAGE_SIZE;
776 mt = m[i];
777
778 if (nextoff <= object->un_pager.vnp.vnp_size) {
779 /*
780 * Read filled up entire page.
781 */
782 mt->valid = VM_PAGE_BITS_ALL;
783 vm_page_undirty(mt); /* should be an assert? XXX */
784 pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
785 } else {
786 /*
787 * Read did not fill up entire page. Since this
788 * is getpages, the page may be mapped, so we have
789 * to zero the invalid portions of the page even
790 * though we aren't setting them valid.
791 *
792 * Currently we do not set the entire page valid,
793 * we just try to clear the piece that we couldn't
794 * read.
795 */
796 vm_page_set_validclean(mt, 0,
797 object->un_pager.vnp.vnp_size - tfoff);
798 /* handled by vm_fault now */
799 /* vm_page_zero_invalid(mt, FALSE); */
800 }
801
802 vm_page_flag_clear(mt, PG_ZERO);
803 if (i != reqpage) {
804
805 /*
806 * whether or not to leave the page activated is up in
807 * the air, but we should put the page on a page queue
808 * somewhere. (it already is in the object). Result:
809 * It appears that empirical results show that
810 * deactivating pages is best.
811 */
812
813 /*
814 * just in case someone was asking for this page we
815 * now tell them that it is ok to use
816 */
817 if (!error) {
818 if (mt->flags & PG_WANTED)
819 vm_page_activate(mt);
820 else
821 vm_page_deactivate(mt);
822 vm_page_wakeup(mt);
823 } else {
824 vnode_pager_freepage(mt);
825 }
826 }
827 }
828 if (error) {
829 printf("vnode_pager_getpages: I/O read error\n");
830 }
831 return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
832}
833
834/*
835 * EOPNOTSUPP is no longer legal. For local media VFS's that do not
836 * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
837 * vnode_pager_generic_putpages() to implement the previous behaviour.
838 *
839 * All other FS's should use the bypass to get to the local media
840 * backing vp's VOP_PUTPAGES.
841 */
842static void
843vnode_pager_putpages(object, m, count, sync, rtvals)
844 vm_object_t object;
845 vm_page_t *m;
846 int count;
847 boolean_t sync;
848 int *rtvals;
849{
850 int rtval;
851 struct vnode *vp;
852 int bytes = count * PAGE_SIZE;
853
854 /*
855 * Force synchronous operation if we are extremely low on memory
856 * to prevent a low-memory deadlock. VOP operations often need to
857 * allocate more memory to initiate the I/O ( i.e. do a BMAP
858 * operation ). The swapper handles the case by limiting the amount
859 * of asynchronous I/O, but that sort of solution doesn't scale well
860 * for the vnode pager without a lot of work.
861 *
862 * Also, the backing vnode's iodone routine may not wake the pageout
863 * daemon up. This should be probably be addressed XXX.
864 */
865
866 if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
867 sync |= OBJPC_SYNC;
868
869 /*
870 * Call device-specific putpages function
871 */
872
873 vp = object->handle;
874 rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
875 if (rtval == EOPNOTSUPP) {
876 printf("vnode_pager: *** WARNING *** stale FS putpages\n");
877 rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
878 }
879}
880
881
882/*
883 * This is now called from local media FS's to operate against their
884 * own vnodes if they fail to implement VOP_PUTPAGES.
885 */
886int
887vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
888 struct vnode *vp;
889 vm_page_t *m;
890 int bytecount;
891 int flags;
892 int *rtvals;
893{
894 int i;
895 vm_object_t object;
896 int count;
897
898 int maxsize, ncount;
899 vm_ooffset_t poffset;
900 struct uio auio;
901 struct iovec aiov;
902 int error;
903 int ioflags;
904
905 object = vp->v_object;
906 count = bytecount / PAGE_SIZE;
907
908 for (i = 0; i < count; i++)
909 rtvals[i] = VM_PAGER_AGAIN;
910
911 if ((int) m[0]->pindex < 0) {
912 printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%x)\n",
913 (long)m[0]->pindex, m[0]->dirty);
914 rtvals[0] = VM_PAGER_BAD;
915 return VM_PAGER_BAD;
916 }
917
918 maxsize = count * PAGE_SIZE;
919 ncount = count;
920
921 poffset = IDX_TO_OFF(m[0]->pindex);
922 if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
923 if (object->un_pager.vnp.vnp_size > poffset)
924 maxsize = object->un_pager.vnp.vnp_size - poffset;
925 else
926 maxsize = 0;
927 ncount = btoc(maxsize);
928 if (ncount < count) {
929 for (i = ncount; i < count; i++) {
930 rtvals[i] = VM_PAGER_BAD;
931 }
932 }
933 }
934
935 ioflags = IO_VMIO;
936 ioflags |= (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) ? IO_SYNC: 0;
937 ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
938
939 aiov.iov_base = (caddr_t) 0;
940 aiov.iov_len = maxsize;
941 auio.uio_iov = &aiov;
942 auio.uio_iovcnt = 1;
943 auio.uio_offset = poffset;
944 auio.uio_segflg = UIO_NOCOPY;
945 auio.uio_rw = UIO_WRITE;
946 auio.uio_resid = maxsize;
947 auio.uio_procp = (struct proc *) 0;
948 error = VOP_WRITE(vp, &auio, ioflags, curproc->p_ucred);
949 cnt.v_vnodeout++;
950 cnt.v_vnodepgsout += ncount;
951
952 if (error) {
953 printf("vnode_pager_putpages: I/O error %d\n", error);
954 }
955 if (auio.uio_resid) {
956 printf("vnode_pager_putpages: residual I/O %d at %lu\n",
957 auio.uio_resid, (u_long)m[0]->pindex);
958 }
959 for (i = 0; i < ncount; i++) {
960 rtvals[i] = VM_PAGER_OK;
961 }
962 return rtvals[0];
963}
964
965struct vnode *
966vnode_pager_lock(object)
967 vm_object_t object;
968{
969 struct proc *p = curproc; /* XXX */
970
971 for (; object != NULL; object = object->backing_object) {
972 if (object->type != OBJT_VNODE)
973 continue;
974 if (object->flags & OBJ_DEAD)
975 return NULL;
976
977 while (vget(object->handle,
978 LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p)) {
979 if ((object->flags & OBJ_DEAD) || (object->type != OBJT_VNODE))
980 return NULL;
981 printf("vnode_pager_lock: retrying\n");
982 }
983 return object->handle;
984 }
985 return NULL;
986}
60#include <sys/buf.h>
61#include <sys/vmmeter.h>
62#include <sys/conf.h>
63
64#include <vm/vm.h>
65#include <vm/vm_object.h>
66#include <vm/vm_page.h>
67#include <vm/vm_pager.h>
68#include <vm/vm_map.h>
69#include <vm/vnode_pager.h>
70#include <vm/vm_extern.h>
71
72static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address,
73 int *run));
74static void vnode_pager_iodone __P((struct buf *bp));
75static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m));
76static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m));
77static void vnode_pager_dealloc __P((vm_object_t));
78static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
79static void vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
80static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
81
82struct pagerops vnodepagerops = {
83 NULL,
84 vnode_pager_alloc,
85 vnode_pager_dealloc,
86 vnode_pager_getpages,
87 vnode_pager_putpages,
88 vnode_pager_haspage,
89 NULL
90};
91
92int vnode_pbuf_freecnt = -1; /* start out unlimited */
93
94
95/*
96 * Allocate (or lookup) pager for a vnode.
97 * Handle is a vnode pointer.
98 */
99vm_object_t
100vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
101 vm_ooffset_t offset)
102{
103 vm_object_t object;
104 struct vnode *vp;
105
106 /*
107 * Pageout to vnode, no can do yet.
108 */
109 if (handle == NULL)
110 return (NULL);
111
112 /*
113 * XXX hack - This initialization should be put somewhere else.
114 */
115 if (vnode_pbuf_freecnt < 0) {
116 vnode_pbuf_freecnt = nswbuf / 2 + 1;
117 }
118
119 vp = (struct vnode *) handle;
120
121 /*
122 * Prevent race condition when allocating the object. This
123 * can happen with NFS vnodes since the nfsnode isn't locked.
124 */
125 while (vp->v_flag & VOLOCK) {
126 vp->v_flag |= VOWANT;
127 tsleep(vp, PVM, "vnpobj", 0);
128 }
129 vp->v_flag |= VOLOCK;
130
131 /*
132 * If the object is being terminated, wait for it to
133 * go away.
134 */
135 while (((object = vp->v_object) != NULL) &&
136 (object->flags & OBJ_DEAD)) {
137 tsleep(object, PVM, "vadead", 0);
138 }
139
140 if (vp->v_usecount == 0)
141 panic("vnode_pager_alloc: no vnode reference");
142
143 if (object == NULL) {
144 /*
145 * And an object of the appropriate size
146 */
147 object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
148 object->flags = 0;
149
150 object->un_pager.vnp.vnp_size = size;
151
152 object->handle = handle;
153 vp->v_object = object;
154 vp->v_usecount++;
155 } else {
156 object->ref_count++;
157 vp->v_usecount++;
158 }
159
160 vp->v_flag &= ~VOLOCK;
161 if (vp->v_flag & VOWANT) {
162 vp->v_flag &= ~VOWANT;
163 wakeup(vp);
164 }
165 return (object);
166}
167
168static void
169vnode_pager_dealloc(object)
170 vm_object_t object;
171{
172 register struct vnode *vp = object->handle;
173
174 if (vp == NULL)
175 panic("vnode_pager_dealloc: pager already dealloced");
176
177 vm_object_pip_wait(object, "vnpdea");
178
179 object->handle = NULL;
180 object->type = OBJT_DEAD;
181 vp->v_object = NULL;
182 vp->v_flag &= ~(VTEXT | VOBJBUF);
183}
184
185static boolean_t
186vnode_pager_haspage(object, pindex, before, after)
187 vm_object_t object;
188 vm_pindex_t pindex;
189 int *before;
190 int *after;
191{
192 struct vnode *vp = object->handle;
193 daddr_t bn;
194 int err;
195 daddr_t reqblock;
196 int poff;
197 int bsize;
198 int pagesperblock, blocksperpage;
199
200 /*
201 * If no vp or vp is doomed or marked transparent to VM, we do not
202 * have the page.
203 */
204 if ((vp == NULL) || (vp->v_flag & VDOOMED))
205 return FALSE;
206
207 /*
208 * If filesystem no longer mounted or offset beyond end of file we do
209 * not have the page.
210 */
211 if ((vp->v_mount == NULL) ||
212 (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size))
213 return FALSE;
214
215 bsize = vp->v_mount->mnt_stat.f_iosize;
216 pagesperblock = bsize / PAGE_SIZE;
217 blocksperpage = 0;
218 if (pagesperblock > 0) {
219 reqblock = pindex / pagesperblock;
220 } else {
221 blocksperpage = (PAGE_SIZE / bsize);
222 reqblock = pindex * blocksperpage;
223 }
224 err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
225 after, before);
226 if (err)
227 return TRUE;
228 if ( bn == -1)
229 return FALSE;
230 if (pagesperblock > 0) {
231 poff = pindex - (reqblock * pagesperblock);
232 if (before) {
233 *before *= pagesperblock;
234 *before += poff;
235 }
236 if (after) {
237 int numafter;
238 *after *= pagesperblock;
239 numafter = pagesperblock - (poff + 1);
240 if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) {
241 numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex)));
242 }
243 *after += numafter;
244 }
245 } else {
246 if (before) {
247 *before /= blocksperpage;
248 }
249
250 if (after) {
251 *after /= blocksperpage;
252 }
253 }
254 return TRUE;
255}
256
257/*
258 * Lets the VM system know about a change in size for a file.
259 * We adjust our own internal size and flush any cached pages in
260 * the associated object that are affected by the size change.
261 *
262 * Note: this routine may be invoked as a result of a pager put
263 * operation (possibly at object termination time), so we must be careful.
264 */
265void
266vnode_pager_setsize(vp, nsize)
267 struct vnode *vp;
268 vm_ooffset_t nsize;
269{
270 vm_pindex_t nobjsize;
271 vm_object_t object = vp->v_object;
272
273 if (object == NULL)
274 return;
275
276 /*
277 * Hasn't changed size
278 */
279 if (nsize == object->un_pager.vnp.vnp_size)
280 return;
281
282 nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
283
284 /*
285 * File has shrunk. Toss any cached pages beyond the new EOF.
286 */
287 if (nsize < object->un_pager.vnp.vnp_size) {
288 vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
289 if (nobjsize < object->size) {
290 vm_object_page_remove(object, nobjsize, object->size,
291 FALSE);
292 }
293 /*
294 * this gets rid of garbage at the end of a page that is now
295 * only partially backed by the vnode...
296 */
297 if (nsize & PAGE_MASK) {
298 vm_offset_t kva;
299 vm_page_t m;
300
301 m = vm_page_lookup(object, OFF_TO_IDX(nsize));
302 if (m) {
303 kva = vm_pager_map_page(m);
304 bzero((caddr_t) kva + (nsize & PAGE_MASK),
305 (int) (round_page(nsize) - nsize));
306 vm_pager_unmap_page(kva);
307 }
308 }
309 }
310 object->un_pager.vnp.vnp_size = nsize;
311 object->size = nobjsize;
312}
313
314void
315vnode_pager_freepage(m)
316 vm_page_t m;
317{
318 vm_page_free(m);
319}
320
321/*
322 * calculate the linear (byte) disk address of specified virtual
323 * file address
324 */
325static vm_offset_t
326vnode_pager_addr(vp, address, run)
327 struct vnode *vp;
328 vm_ooffset_t address;
329 int *run;
330{
331 int rtaddress;
332 int bsize;
333 daddr_t block;
334 struct vnode *rtvp;
335 int err;
336 daddr_t vblock;
337 int voffset;
338
339 if ((int) address < 0)
340 return -1;
341
342 if (vp->v_mount == NULL)
343 return -1;
344
345 bsize = vp->v_mount->mnt_stat.f_iosize;
346 vblock = address / bsize;
347 voffset = address % bsize;
348
349 err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL);
350
351 if (err || (block == -1))
352 rtaddress = -1;
353 else {
354 rtaddress = block + voffset / DEV_BSIZE;
355 if( run) {
356 *run += 1;
357 *run *= bsize/PAGE_SIZE;
358 *run -= voffset/PAGE_SIZE;
359 }
360 }
361
362 return rtaddress;
363}
364
365/*
366 * interrupt routine for I/O completion
367 */
368static void
369vnode_pager_iodone(bp)
370 struct buf *bp;
371{
372 bp->b_flags |= B_DONE;
373 wakeup(bp);
374}
375
376/*
377 * small block file system vnode pager input
378 */
379static int
380vnode_pager_input_smlfs(object, m)
381 vm_object_t object;
382 vm_page_t m;
383{
384 int i;
385 int s;
386 struct vnode *dp, *vp;
387 struct buf *bp;
388 vm_offset_t kva;
389 int fileaddr;
390 vm_offset_t bsize;
391 int error = 0;
392
393 vp = object->handle;
394 if (vp->v_mount == NULL)
395 return VM_PAGER_BAD;
396
397 bsize = vp->v_mount->mnt_stat.f_iosize;
398
399
400 VOP_BMAP(vp, 0, &dp, 0, NULL, NULL);
401
402 kva = vm_pager_map_page(m);
403
404 for (i = 0; i < PAGE_SIZE / bsize; i++) {
405
406 if (vm_page_bits(i * bsize, bsize) & m->valid)
407 continue;
408
409 fileaddr = vnode_pager_addr(vp,
410 IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
411 if (fileaddr != -1) {
412 bp = getpbuf(&vnode_pbuf_freecnt);
413
414 /* build a minimal buffer header */
415 bp->b_iocmd = BIO_READ;
416 bp->b_iodone = vnode_pager_iodone;
417 bp->b_rcred = bp->b_wcred = curproc->p_ucred;
418 if (bp->b_rcred != NOCRED)
419 crhold(bp->b_rcred);
420 if (bp->b_wcred != NOCRED)
421 crhold(bp->b_wcred);
422 bp->b_data = (caddr_t) kva + i * bsize;
423 bp->b_blkno = fileaddr;
424 pbgetvp(dp, bp);
425 bp->b_bcount = bsize;
426 bp->b_bufsize = bsize;
427
428 /* do the input */
429 BUF_STRATEGY(bp);
430
431 /* we definitely need to be at splvm here */
432
433 s = splvm();
434 while ((bp->b_flags & B_DONE) == 0) {
435 tsleep(bp, PVM, "vnsrd", 0);
436 }
437 splx(s);
438 if ((bp->b_ioflags & BIO_ERROR) != 0)
439 error = EIO;
440
441 /*
442 * free the buffer header back to the swap buffer pool
443 */
444 relpbuf(bp, &vnode_pbuf_freecnt);
445 if (error)
446 break;
447
448 vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
449 } else {
450 vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize);
451 bzero((caddr_t) kva + i * bsize, bsize);
452 }
453 }
454 vm_pager_unmap_page(kva);
455 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
456 vm_page_flag_clear(m, PG_ZERO);
457 if (error) {
458 return VM_PAGER_ERROR;
459 }
460 return VM_PAGER_OK;
461
462}
463
464
465/*
466 * old style vnode pager output routine
467 */
468static int
469vnode_pager_input_old(object, m)
470 vm_object_t object;
471 vm_page_t m;
472{
473 struct uio auio;
474 struct iovec aiov;
475 int error;
476 int size;
477 vm_offset_t kva;
478
479 error = 0;
480
481 /*
482 * Return failure if beyond current EOF
483 */
484 if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
485 return VM_PAGER_BAD;
486 } else {
487 size = PAGE_SIZE;
488 if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
489 size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
490
491 /*
492 * Allocate a kernel virtual address and initialize so that
493 * we can use VOP_READ/WRITE routines.
494 */
495 kva = vm_pager_map_page(m);
496
497 aiov.iov_base = (caddr_t) kva;
498 aiov.iov_len = size;
499 auio.uio_iov = &aiov;
500 auio.uio_iovcnt = 1;
501 auio.uio_offset = IDX_TO_OFF(m->pindex);
502 auio.uio_segflg = UIO_SYSSPACE;
503 auio.uio_rw = UIO_READ;
504 auio.uio_resid = size;
505 auio.uio_procp = curproc;
506
507 error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred);
508 if (!error) {
509 register int count = size - auio.uio_resid;
510
511 if (count == 0)
512 error = EINVAL;
513 else if (count != PAGE_SIZE)
514 bzero((caddr_t) kva + count, PAGE_SIZE - count);
515 }
516 vm_pager_unmap_page(kva);
517 }
518 pmap_clear_modify(VM_PAGE_TO_PHYS(m));
519 vm_page_undirty(m);
520 vm_page_flag_clear(m, PG_ZERO);
521 if (!error)
522 m->valid = VM_PAGE_BITS_ALL;
523 return error ? VM_PAGER_ERROR : VM_PAGER_OK;
524}
525
526/*
527 * generic vnode pager input routine
528 */
529
530/*
531 * EOPNOTSUPP is no longer legal. For local media VFS's that do not
532 * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to
533 * vnode_pager_generic_getpages() to implement the previous behaviour.
534 *
535 * All other FS's should use the bypass to get to the local media
536 * backing vp's VOP_GETPAGES.
537 */
538static int
539vnode_pager_getpages(object, m, count, reqpage)
540 vm_object_t object;
541 vm_page_t *m;
542 int count;
543 int reqpage;
544{
545 int rtval;
546 struct vnode *vp;
547 int bytes = count * PAGE_SIZE;
548
549 vp = object->handle;
550 /*
551 * XXX temporary diagnostic message to help track stale FS code,
552 * Returning EOPNOTSUPP from here may make things unhappy.
553 */
554 rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
555 if (rtval == EOPNOTSUPP) {
556 printf("vnode_pager: *** WARNING *** stale FS getpages\n");
557 rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage);
558 }
559 return rtval;
560}
561
562
563/*
564 * This is now called from local media FS's to operate against their
565 * own vnodes if they fail to implement VOP_GETPAGES.
566 */
567int
568vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
569 struct vnode *vp;
570 vm_page_t *m;
571 int bytecount;
572 int reqpage;
573{
574 vm_object_t object;
575 vm_offset_t kva;
576 off_t foff, tfoff, nextoff;
577 int i, size, bsize, first, firstaddr;
578 struct vnode *dp;
579 int runpg;
580 int runend;
581 struct buf *bp;
582 int s;
583 int count;
584 int error = 0;
585
586 object = vp->v_object;
587 count = bytecount / PAGE_SIZE;
588
589 if (vp->v_mount == NULL)
590 return VM_PAGER_BAD;
591
592 bsize = vp->v_mount->mnt_stat.f_iosize;
593
594 /* get the UNDERLYING device for the file with VOP_BMAP() */
595
596 /*
597 * originally, we did not check for an error return value -- assuming
598 * an fs always has a bmap entry point -- that assumption is wrong!!!
599 */
600 foff = IDX_TO_OFF(m[reqpage]->pindex);
601
602 /*
603 * if we can't bmap, use old VOP code
604 */
605 if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) {
606 for (i = 0; i < count; i++) {
607 if (i != reqpage) {
608 vnode_pager_freepage(m[i]);
609 }
610 }
611 cnt.v_vnodein++;
612 cnt.v_vnodepgsin++;
613 return vnode_pager_input_old(object, m[reqpage]);
614
615 /*
616 * if the blocksize is smaller than a page size, then use
617 * special small filesystem code. NFS sometimes has a small
618 * blocksize, but it can handle large reads itself.
619 */
620 } else if ((PAGE_SIZE / bsize) > 1 &&
621 (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) {
622 for (i = 0; i < count; i++) {
623 if (i != reqpage) {
624 vnode_pager_freepage(m[i]);
625 }
626 }
627 cnt.v_vnodein++;
628 cnt.v_vnodepgsin++;
629 return vnode_pager_input_smlfs(object, m[reqpage]);
630 }
631
632 /*
633 * If we have a completely valid page available to us, we can
634 * clean up and return. Otherwise we have to re-read the
635 * media.
636 */
637
638 if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
639 for (i = 0; i < count; i++) {
640 if (i != reqpage)
641 vnode_pager_freepage(m[i]);
642 }
643 return VM_PAGER_OK;
644 }
645 m[reqpage]->valid = 0;
646
647 /*
648 * here on direct device I/O
649 */
650
651 firstaddr = -1;
652 /*
653 * calculate the run that includes the required page
654 */
655 for(first = 0, i = 0; i < count; i = runend) {
656 firstaddr = vnode_pager_addr(vp,
657 IDX_TO_OFF(m[i]->pindex), &runpg);
658 if (firstaddr == -1) {
659 if (i == reqpage && foff < object->un_pager.vnp.vnp_size) {
660 /* XXX no %qd in kernel. */
661 panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: 0x%lx%08lx, vnp_size: 0x%lx%08lx",
662 firstaddr, (u_long)(foff >> 32),
663 (u_long)(u_int32_t)foff,
664 (u_long)(u_int32_t)
665 (object->un_pager.vnp.vnp_size >> 32),
666 (u_long)(u_int32_t)
667 object->un_pager.vnp.vnp_size);
668 }
669 vnode_pager_freepage(m[i]);
670 runend = i + 1;
671 first = runend;
672 continue;
673 }
674 runend = i + runpg;
675 if (runend <= reqpage) {
676 int j;
677 for (j = i; j < runend; j++) {
678 vnode_pager_freepage(m[j]);
679 }
680 } else {
681 if (runpg < (count - first)) {
682 for (i = first + runpg; i < count; i++)
683 vnode_pager_freepage(m[i]);
684 count = first + runpg;
685 }
686 break;
687 }
688 first = runend;
689 }
690
691 /*
692 * the first and last page have been calculated now, move input pages
693 * to be zero based...
694 */
695 if (first != 0) {
696 for (i = first; i < count; i++) {
697 m[i - first] = m[i];
698 }
699 count -= first;
700 reqpage -= first;
701 }
702
703 /*
704 * calculate the file virtual address for the transfer
705 */
706 foff = IDX_TO_OFF(m[0]->pindex);
707
708 /*
709 * calculate the size of the transfer
710 */
711 size = count * PAGE_SIZE;
712 if ((foff + size) > object->un_pager.vnp.vnp_size)
713 size = object->un_pager.vnp.vnp_size - foff;
714
715 /*
716 * round up physical size for real devices.
717 */
718 if (dp->v_type == VBLK || dp->v_type == VCHR) {
719 int secmask = dp->v_rdev->si_bsize_phys - 1;
720 KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large\n", secmask + 1));
721 size = (size + secmask) & ~secmask;
722 }
723
724 bp = getpbuf(&vnode_pbuf_freecnt);
725 kva = (vm_offset_t) bp->b_data;
726
727 /*
728 * and map the pages to be read into the kva
729 */
730 pmap_qenter(kva, m, count);
731
732 /* build a minimal buffer header */
733 bp->b_iocmd = BIO_READ;
734 bp->b_iodone = vnode_pager_iodone;
735 /* B_PHYS is not set, but it is nice to fill this in */
736 bp->b_rcred = bp->b_wcred = curproc->p_ucred;
737 if (bp->b_rcred != NOCRED)
738 crhold(bp->b_rcred);
739 if (bp->b_wcred != NOCRED)
740 crhold(bp->b_wcred);
741 bp->b_blkno = firstaddr;
742 pbgetvp(dp, bp);
743 bp->b_bcount = size;
744 bp->b_bufsize = size;
745
746 cnt.v_vnodein++;
747 cnt.v_vnodepgsin += count;
748
749 /* do the input */
750 BUF_STRATEGY(bp);
751
752 s = splvm();
753 /* we definitely need to be at splvm here */
754
755 while ((bp->b_flags & B_DONE) == 0) {
756 tsleep(bp, PVM, "vnread", 0);
757 }
758 splx(s);
759 if ((bp->b_ioflags & BIO_ERROR) != 0)
760 error = EIO;
761
762 if (!error) {
763 if (size != count * PAGE_SIZE)
764 bzero((caddr_t) kva + size, PAGE_SIZE * count - size);
765 }
766 pmap_qremove(kva, count);
767
768 /*
769 * free the buffer header back to the swap buffer pool
770 */
771 relpbuf(bp, &vnode_pbuf_freecnt);
772
773 for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
774 vm_page_t mt;
775
776 nextoff = tfoff + PAGE_SIZE;
777 mt = m[i];
778
779 if (nextoff <= object->un_pager.vnp.vnp_size) {
780 /*
781 * Read filled up entire page.
782 */
783 mt->valid = VM_PAGE_BITS_ALL;
784 vm_page_undirty(mt); /* should be an assert? XXX */
785 pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
786 } else {
787 /*
788 * Read did not fill up entire page. Since this
789 * is getpages, the page may be mapped, so we have
790 * to zero the invalid portions of the page even
791 * though we aren't setting them valid.
792 *
793 * Currently we do not set the entire page valid,
794 * we just try to clear the piece that we couldn't
795 * read.
796 */
797 vm_page_set_validclean(mt, 0,
798 object->un_pager.vnp.vnp_size - tfoff);
799 /* handled by vm_fault now */
800 /* vm_page_zero_invalid(mt, FALSE); */
801 }
802
803 vm_page_flag_clear(mt, PG_ZERO);
804 if (i != reqpage) {
805
806 /*
807 * whether or not to leave the page activated is up in
808 * the air, but we should put the page on a page queue
809 * somewhere. (it already is in the object). Result:
810 * It appears that empirical results show that
811 * deactivating pages is best.
812 */
813
814 /*
815 * just in case someone was asking for this page we
816 * now tell them that it is ok to use
817 */
818 if (!error) {
819 if (mt->flags & PG_WANTED)
820 vm_page_activate(mt);
821 else
822 vm_page_deactivate(mt);
823 vm_page_wakeup(mt);
824 } else {
825 vnode_pager_freepage(mt);
826 }
827 }
828 }
829 if (error) {
830 printf("vnode_pager_getpages: I/O read error\n");
831 }
832 return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
833}
834
835/*
836 * EOPNOTSUPP is no longer legal. For local media VFS's that do not
837 * implement their own VOP_PUTPAGES, their VOP_PUTPAGES should call to
838 * vnode_pager_generic_putpages() to implement the previous behaviour.
839 *
840 * All other FS's should use the bypass to get to the local media
841 * backing vp's VOP_PUTPAGES.
842 */
843static void
844vnode_pager_putpages(object, m, count, sync, rtvals)
845 vm_object_t object;
846 vm_page_t *m;
847 int count;
848 boolean_t sync;
849 int *rtvals;
850{
851 int rtval;
852 struct vnode *vp;
853 int bytes = count * PAGE_SIZE;
854
855 /*
856 * Force synchronous operation if we are extremely low on memory
857 * to prevent a low-memory deadlock. VOP operations often need to
858 * allocate more memory to initiate the I/O ( i.e. do a BMAP
859 * operation ). The swapper handles the case by limiting the amount
860 * of asynchronous I/O, but that sort of solution doesn't scale well
861 * for the vnode pager without a lot of work.
862 *
863 * Also, the backing vnode's iodone routine may not wake the pageout
864 * daemon up. This should be probably be addressed XXX.
865 */
866
867 if ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min)
868 sync |= OBJPC_SYNC;
869
870 /*
871 * Call device-specific putpages function
872 */
873
874 vp = object->handle;
875 rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
876 if (rtval == EOPNOTSUPP) {
877 printf("vnode_pager: *** WARNING *** stale FS putpages\n");
878 rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
879 }
880}
881
882
883/*
884 * This is now called from local media FS's to operate against their
885 * own vnodes if they fail to implement VOP_PUTPAGES.
886 */
887int
888vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals)
889 struct vnode *vp;
890 vm_page_t *m;
891 int bytecount;
892 int flags;
893 int *rtvals;
894{
895 int i;
896 vm_object_t object;
897 int count;
898
899 int maxsize, ncount;
900 vm_ooffset_t poffset;
901 struct uio auio;
902 struct iovec aiov;
903 int error;
904 int ioflags;
905
906 object = vp->v_object;
907 count = bytecount / PAGE_SIZE;
908
909 for (i = 0; i < count; i++)
910 rtvals[i] = VM_PAGER_AGAIN;
911
912 if ((int) m[0]->pindex < 0) {
913 printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%x)\n",
914 (long)m[0]->pindex, m[0]->dirty);
915 rtvals[0] = VM_PAGER_BAD;
916 return VM_PAGER_BAD;
917 }
918
919 maxsize = count * PAGE_SIZE;
920 ncount = count;
921
922 poffset = IDX_TO_OFF(m[0]->pindex);
923 if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
924 if (object->un_pager.vnp.vnp_size > poffset)
925 maxsize = object->un_pager.vnp.vnp_size - poffset;
926 else
927 maxsize = 0;
928 ncount = btoc(maxsize);
929 if (ncount < count) {
930 for (i = ncount; i < count; i++) {
931 rtvals[i] = VM_PAGER_BAD;
932 }
933 }
934 }
935
936 ioflags = IO_VMIO;
937 ioflags |= (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) ? IO_SYNC: 0;
938 ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
939
940 aiov.iov_base = (caddr_t) 0;
941 aiov.iov_len = maxsize;
942 auio.uio_iov = &aiov;
943 auio.uio_iovcnt = 1;
944 auio.uio_offset = poffset;
945 auio.uio_segflg = UIO_NOCOPY;
946 auio.uio_rw = UIO_WRITE;
947 auio.uio_resid = maxsize;
948 auio.uio_procp = (struct proc *) 0;
949 error = VOP_WRITE(vp, &auio, ioflags, curproc->p_ucred);
950 cnt.v_vnodeout++;
951 cnt.v_vnodepgsout += ncount;
952
953 if (error) {
954 printf("vnode_pager_putpages: I/O error %d\n", error);
955 }
956 if (auio.uio_resid) {
957 printf("vnode_pager_putpages: residual I/O %d at %lu\n",
958 auio.uio_resid, (u_long)m[0]->pindex);
959 }
960 for (i = 0; i < ncount; i++) {
961 rtvals[i] = VM_PAGER_OK;
962 }
963 return rtvals[0];
964}
965
966struct vnode *
967vnode_pager_lock(object)
968 vm_object_t object;
969{
970 struct proc *p = curproc; /* XXX */
971
972 for (; object != NULL; object = object->backing_object) {
973 if (object->type != OBJT_VNODE)
974 continue;
975 if (object->flags & OBJ_DEAD)
976 return NULL;
977
978 while (vget(object->handle,
979 LK_NOPAUSE | LK_SHARED | LK_RETRY | LK_CANRECURSE, p)) {
980 if ((object->flags & OBJ_DEAD) || (object->type != OBJT_VNODE))
981 return NULL;
982 printf("vnode_pager_lock: retrying\n");
983 }
984 return object->handle;
985 }
986 return NULL;
987}