vfs_bio.c revision 5759
1/*
2 * Copyright (c) 1994 John S. Dyson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice immediately at the beginning of the file, without modification,
10 *    this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Absolutely no warranty of function or purpose is made by the author
15 *    John S. Dyson.
16 * 4. This work was done expressly for inclusion into FreeBSD.  Other use
17 *    is allowed if this notation is included.
18 * 5. Modifications may be freely made to this file if the above conditions
19 *    are met.
20 *
21 * $Id: vfs_bio.c,v 1.22 1995/01/20 20:11:31 wpaul Exp $
22 */
23
24/*
25 * this file contains a new buffer I/O scheme implementing a coherent
26 * VM object and buffer cache scheme.  Pains have been taken to make
27 * sure that the performance degradation associated with schemes such
28 * as this is not realized.
29 *
30 * Author:  John S. Dyson
31 * Significant help during the development and debugging phases
32 * had been provided by David Greenman, also of the FreeBSD core team.
33 */
34
35#define VMIO
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/proc.h>
40#include <sys/vnode.h>
41#include <vm/vm.h>
42#include <vm/vm_pageout.h>
43#include <vm/vm_page.h>
44#include <vm/vm_object.h>
45#include <sys/buf.h>
46#include <sys/mount.h>
47#include <sys/malloc.h>
48#include <sys/resourcevar.h>
49#include <sys/proc.h>
50
51#include <miscfs/specfs/specdev.h>
52
53struct buf *buf;		/* buffer header pool */
54int nbuf;			/* number of buffer headers calculated
55				 * elsewhere */
56struct swqueue bswlist;
57int nvmio, nlru;
58
59extern vm_map_t buffer_map, io_map, kernel_map, pager_map;
60
61void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
62void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to);
63void vfs_dirty_pages(struct buf * bp);
64void vfs_busy_pages(struct buf *, int clear_modify);
65
66int needsbuffer;
67
68/*
69 * Internal update daemon, process 3
70 *	The variable vfs_update_wakeup allows for internal syncs.
71 */
72int vfs_update_wakeup;
73
74
75/*
76 * buffers base kva
77 */
78caddr_t buffers_kva;
79
80/*
81 * bogus page -- for I/O to/from partially complete buffers
82 */
83vm_page_t bogus_page;
84vm_offset_t bogus_offset;
85
86/*
87 * Initialize buffer headers and related structures.
88 */
89void
90bufinit()
91{
92	struct buf *bp;
93	int i;
94
95	TAILQ_INIT(&bswlist);
96	LIST_INIT(&invalhash);
97
98	/* first, make a null hash table */
99	for (i = 0; i < BUFHSZ; i++)
100		LIST_INIT(&bufhashtbl[i]);
101
102	/* next, make a null set of free lists */
103	for (i = 0; i < BUFFER_QUEUES; i++)
104		TAILQ_INIT(&bufqueues[i]);
105
106	buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf);
107	/* finally, initialize each buffer header and stick on empty q */
108	for (i = 0; i < nbuf; i++) {
109		bp = &buf[i];
110		bzero(bp, sizeof *bp);
111		bp->b_flags = B_INVAL;	/* we're just an empty header */
112		bp->b_dev = NODEV;
113		bp->b_vp = NULL;
114		bp->b_rcred = NOCRED;
115		bp->b_wcred = NOCRED;
116		bp->b_qindex = QUEUE_EMPTY;
117		bp->b_vnbufs.le_next = NOLIST;
118		bp->b_data = buffers_kva + i * MAXBSIZE;
119		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
120		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
121	}
122
123	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
124	bogus_page = vm_page_alloc(kernel_object, bogus_offset - VM_MIN_KERNEL_ADDRESS, 0);
125
126}
127
128/*
129 * remove the buffer from the appropriate free list
130 */
131void
132bremfree(struct buf * bp)
133{
134	int s = splbio();
135
136	if (bp->b_qindex != QUEUE_NONE) {
137		if (bp->b_qindex == QUEUE_LRU)
138			--nlru;
139		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
140		bp->b_qindex = QUEUE_NONE;
141	} else {
142		panic("bremfree: removing a buffer when not on a queue");
143	}
144	splx(s);
145}
146
147/*
148 * Get a buffer with the specified data.  Look in the cache first.
149 */
150int
151bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
152    struct buf ** bpp)
153{
154	struct buf *bp;
155
156	bp = getblk(vp, blkno, size, 0, 0);
157	*bpp = bp;
158
159	/* if not found in cache, do some I/O */
160	if ((bp->b_flags & B_CACHE) == 0) {
161		if (curproc && curproc->p_stats)	/* count block I/O */
162			curproc->p_stats->p_ru.ru_inblock++;
163		bp->b_flags |= B_READ;
164		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
165		if (bp->b_rcred == NOCRED) {
166			if (cred != NOCRED)
167				crhold(cred);
168			bp->b_rcred = cred;
169		}
170		vfs_busy_pages(bp, 0);
171		VOP_STRATEGY(bp);
172		return (biowait(bp));
173	} else if (bp->b_lblkno == bp->b_blkno) {
174		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
175		    &bp->b_blkno, (int *) 0);
176	}
177	return (0);
178}
179
180/*
181 * Operates like bread, but also starts asynchronous I/O on
182 * read-ahead blocks.
183 */
184int
185breadn(struct vnode * vp, daddr_t blkno, int size,
186    daddr_t * rablkno, int *rabsize,
187    int cnt, struct ucred * cred, struct buf ** bpp)
188{
189	struct buf *bp, *rabp;
190	int i;
191	int rv = 0, readwait = 0;
192
193	*bpp = bp = getblk(vp, blkno, size, 0, 0);
194
195	/* if not found in cache, do some I/O */
196	if ((bp->b_flags & B_CACHE) == 0) {
197		if (curproc && curproc->p_stats)	/* count block I/O */
198			curproc->p_stats->p_ru.ru_inblock++;
199		bp->b_flags |= B_READ;
200		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
201		if (bp->b_rcred == NOCRED) {
202			if (cred != NOCRED)
203				crhold(cred);
204			bp->b_rcred = cred;
205		}
206		vfs_busy_pages(bp, 0);
207		VOP_STRATEGY(bp);
208		++readwait;
209	} else if (bp->b_lblkno == bp->b_blkno) {
210		VOP_BMAP(vp, bp->b_lblkno, (struct vnode **) 0,
211		    &bp->b_blkno, (int *) 0);
212	}
213	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
214		if (inmem(vp, *rablkno))
215			continue;
216		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
217
218		if ((rabp->b_flags & B_CACHE) == 0) {
219			if (curproc && curproc->p_stats)
220				curproc->p_stats->p_ru.ru_inblock++;
221			rabp->b_flags |= B_READ | B_ASYNC;
222			rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
223			if (rabp->b_rcred == NOCRED) {
224				if (cred != NOCRED)
225					crhold(cred);
226				rabp->b_rcred = cred;
227			}
228			vfs_busy_pages(rabp, 0);
229			VOP_STRATEGY(rabp);
230		} else {
231			brelse(rabp);
232		}
233	}
234
235	if (readwait) {
236		rv = biowait(bp);
237	}
238	return (rv);
239}
240
241/*
242 * this routine is used by filesystems to get at pages in the PG_CACHE
243 * queue.  also, it is used to read pages that are currently being
244 * written out by the file i/o routines.
245 */
246int
247vfs_read_bypass(struct vnode * vp, struct uio * uio, int maxread, daddr_t lbn)
248{
249	vm_page_t m;
250	vm_offset_t kv;
251	int nread;
252	int error;
253	struct buf *bp, *bpa;
254	vm_object_t obj;
255	int off;
256	int nrest;
257	int flags;
258	int s;
259
260	return 0;
261	/*
262	 * don't use the bypass mechanism for non-vmio vnodes
263	 */
264	if ((vp->v_flag & VVMIO) == 0)
265		return 0;
266	/*
267	 * get the VM object (it has the pages)
268	 */
269	obj = (vm_object_t) vp->v_vmdata;
270	if (obj == NULL)
271		return 0;
272
273	/*
274	 * if there is a buffer that is not busy, it is faster to use it.
275	 * This like read-ahead, etc work better
276	 */
277
278	s = splbio();
279	if ((bp = incore(vp, lbn)) &&
280	    (((bp->b_flags & B_READ) && (bp->b_flags & B_BUSY))
281		|| (bp->b_flags & B_BUSY) == 0)) {
282		splx(s);
283		return 0;
284	}
285	splx(s);
286
287	/*
288	 * get a pbuf --> we just use the kva
289	 */
290	kv = kmem_alloc_wait(pager_map, PAGE_SIZE);
291	nread = 0;
292	error = 0;
293
294	while (!error && uio->uio_resid && maxread > 0) {
295		int po;
296		int count;
297		int s;
298
299relookup:
300		/*
301		 * lookup the page
302		 */
303		m = vm_page_lookup(obj, trunc_page(uio->uio_offset));
304		if (!m)
305			break;
306		/*
307		 * get the offset into the page, and the amount to read in the
308		 * page
309		 */
310		nrest = round_page(uio->uio_offset) - uio->uio_offset;
311		if (nrest > uio->uio_resid)
312			nrest = uio->uio_resid;
313
314		/*
315		 * check the valid bits for the page (DEV_BSIZE chunks)
316		 */
317		if (!vm_page_is_valid(m, uio->uio_offset, nrest))
318			break;
319
320		/*
321		 * if the page is busy, wait for it
322		 */
323		s = splhigh();
324		if (!m->valid || (m->flags & PG_BUSY)) {
325			m->flags |= PG_WANTED;
326			tsleep((caddr_t) m, PVM, "vnibyp", 0);
327			splx(s);
328			goto relookup;
329		}
330		/*
331		 * if the page is on the cache queue, remove it -- cache queue
332		 * pages should be freeable by vm_page_alloc anytime.
333		 */
334		if (m->flags & PG_CACHE) {
335			if (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_reserved) {
336				VM_WAIT;
337				goto relookup;
338			}
339			vm_page_unqueue(m);
340		}
341		/*
342		 * add a buffer mapping (essentially wires the page too).
343		 */
344		m->bmapped++;
345		splx(s);
346
347		/*
348		 * enter it into the kva
349		 */
350		pmap_qenter(kv, &m, 1);
351
352		/*
353		 * do the copy
354		 */
355		po = uio->uio_offset & (PAGE_SIZE - 1);
356		count = PAGE_SIZE - po;
357		if (count > maxread)
358			count = maxread;
359		if (count > uio->uio_resid)
360			count = uio->uio_resid;
361
362		error = uiomove((caddr_t) kv + po, count, uio);
363		if (!error) {
364			nread += count;
365			maxread -= count;
366		}
367		/*
368		 * remove from kva
369		 */
370		pmap_qremove(kv, 1);
371		PAGE_WAKEUP(m);	/* XXX probably unnecessary */
372		/*
373		 * If the page was on the cache queue, then by definition
374		 * bmapped was 0. Thus the following case will also take care
375		 * of the page being removed from the cache queue above.
376		 * Also, it is possible that the page was already entered onto
377		 * another queue (or was already there), so we don't put it
378		 * onto the cache queue...
379		 */
380		m->bmapped--;
381		if (m->bmapped == 0 &&
382		    (m->flags & (PG_CACHE | PG_ACTIVE | PG_INACTIVE)) == 0 &&
383		    m->wire_count == 0) {
384			vm_page_test_dirty(m);
385
386			/*
387			 * make sure that the darned page is on a queue
388			 * somewhere...
389			 */
390			if ((m->dirty & m->valid) == 0) {
391				vm_page_cache(m);
392			} else if (m->hold_count == 0) {
393				vm_page_deactivate(m);
394			} else {
395				vm_page_activate(m);
396			}
397		}
398	}
399	/*
400	 * release our buffer(kva).
401	 */
402	kmem_free_wakeup(pager_map, kv, PAGE_SIZE);
403	return nread;
404}
405
406
407/*
408 * Write, release buffer on completion.  (Done by iodone
409 * if async.)
410 */
411int
412bwrite(struct buf * bp)
413{
414	int oldflags = bp->b_flags;
415
416	if (bp->b_flags & B_INVAL) {
417		brelse(bp);
418		return (0);
419	}
420	if (!(bp->b_flags & B_BUSY))
421		panic("bwrite: buffer is not busy???");
422
423	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
424	bp->b_flags |= B_WRITEINPROG;
425
426	if (oldflags & B_ASYNC) {
427		if (oldflags & B_DELWRI) {
428			reassignbuf(bp, bp->b_vp);
429		} else if (curproc) {
430			++curproc->p_stats->p_ru.ru_oublock;
431		}
432	}
433	bp->b_vp->v_numoutput++;
434	vfs_busy_pages(bp, 1);
435	VOP_STRATEGY(bp);
436
437	if ((oldflags & B_ASYNC) == 0) {
438		int rtval = biowait(bp);
439
440		if (oldflags & B_DELWRI) {
441			reassignbuf(bp, bp->b_vp);
442		} else if (curproc) {
443			++curproc->p_stats->p_ru.ru_oublock;
444		}
445		brelse(bp);
446		return (rtval);
447	}
448	return (0);
449}
450
451int
452vn_bwrite(ap)
453	struct vop_bwrite_args *ap;
454{
455	return (bwrite(ap->a_bp));
456}
457
458/*
459 * Delayed write. (Buffer is marked dirty).
460 */
461void
462bdwrite(struct buf * bp)
463{
464
465	if ((bp->b_flags & B_BUSY) == 0) {
466		panic("bdwrite: buffer is not busy");
467	}
468	if (bp->b_flags & B_INVAL) {
469		brelse(bp);
470		return;
471	}
472	if (bp->b_flags & B_TAPE) {
473		bawrite(bp);
474		return;
475	}
476	bp->b_flags &= ~B_READ;
477	vfs_dirty_pages(bp);
478	if ((bp->b_flags & B_DELWRI) == 0) {
479		if (curproc)
480			++curproc->p_stats->p_ru.ru_oublock;
481		bp->b_flags |= B_DONE | B_DELWRI;
482		reassignbuf(bp, bp->b_vp);
483	}
484	brelse(bp);
485	return;
486}
487
488/*
489 * Asynchronous write.
490 * Start output on a buffer, but do not wait for it to complete.
491 * The buffer is released when the output completes.
492 */
493void
494bawrite(struct buf * bp)
495{
496	if (((bp->b_flags & B_DELWRI) == 0) && (bp->b_vp->v_numoutput > 24)) {
497		int s = splbio();
498
499		while (bp->b_vp->v_numoutput > 16) {
500			bp->b_vp->v_flag |= VBWAIT;
501			tsleep((caddr_t) &bp->b_vp->v_numoutput, PRIBIO, "bawnmo", 0);
502		}
503		splx(s);
504	}
505	bp->b_flags |= B_ASYNC;
506	(void) bwrite(bp);
507}
508
509/*
510 * Release a buffer.
511 */
512void
513brelse(struct buf * bp)
514{
515	int s;
516
517	if (bp->b_flags & B_CLUSTER) {
518		relpbuf(bp);
519		return;
520	}
521	/* anyone need a "free" block? */
522	s = splbio();
523
524	if (needsbuffer) {
525		needsbuffer = 0;
526		wakeup((caddr_t) &needsbuffer);
527	}
528	/* anyone need this block? */
529	if (bp->b_flags & B_WANTED) {
530		bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_AGE);
531		wakeup((caddr_t) bp);
532	} else if (bp->b_flags & B_VMIO) {
533		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
534		wakeup((caddr_t) bp);
535	}
536	if (bp->b_flags & B_LOCKED)
537		bp->b_flags &= ~B_ERROR;
538
539	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
540	    (bp->b_bufsize <= 0)) {
541		bp->b_flags |= B_INVAL;
542		bp->b_flags &= ~(B_DELWRI | B_CACHE);
543		if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp)
544			brelvp(bp);
545	}
546	if (bp->b_flags & B_VMIO) {
547		vm_offset_t foff;
548		vm_object_t obj;
549		int i, resid;
550		vm_page_t m;
551		int iototal = bp->b_bufsize;
552
553		foff = 0;
554		obj = 0;
555		if (bp->b_npages) {
556			if (bp->b_vp && bp->b_vp->v_mount) {
557				foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
558			} else {
559				/*
560				 * vnode pointer has been ripped away --
561				 * probably file gone...
562				 */
563				foff = bp->b_pages[0]->offset;
564			}
565		}
566		for (i = 0; i < bp->b_npages; i++) {
567			m = bp->b_pages[i];
568			if (m == bogus_page) {
569				panic("brelse: bogus page found");
570			}
571			resid = (m->offset + PAGE_SIZE) - foff;
572			if (resid > iototal)
573				resid = iototal;
574			if (resid > 0) {
575				if (bp->b_flags & (B_ERROR | B_NOCACHE)) {
576					vm_page_set_invalid(m, foff, resid);
577				} else if ((bp->b_flags & B_DELWRI) == 0) {
578					vm_page_set_clean(m, foff, resid);
579					vm_page_set_valid(m, foff, resid);
580				}
581			} else {
582				vm_page_test_dirty(m);
583			}
584			if (bp->b_flags & B_INVAL) {
585				if (m->bmapped == 0) {
586					panic("brelse: bmapped is zero for page\n");
587				}
588				--m->bmapped;
589				if (m->bmapped == 0) {
590					PAGE_WAKEUP(m);
591					if ((m->dirty & m->valid) == 0)
592						vm_page_cache(m);
593				}
594			}
595			foff += resid;
596			iototal -= resid;
597		}
598
599		if (bp->b_flags & B_INVAL) {
600			pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
601			bp->b_npages = 0;
602			bp->b_bufsize = 0;
603			bp->b_flags &= ~B_VMIO;
604			if (bp->b_vp)
605				brelvp(bp);
606			--nvmio;
607		}
608	}
609	if (bp->b_qindex != QUEUE_NONE)
610		panic("brelse: free buffer onto another queue???");
611
612	/* enqueue */
613	/* buffers with no memory */
614	if (bp->b_bufsize == 0) {
615		bp->b_qindex = QUEUE_EMPTY;
616		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
617		LIST_REMOVE(bp, b_hash);
618		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
619		bp->b_dev = NODEV;
620		/* buffers with junk contents */
621	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE)) {
622		bp->b_qindex = QUEUE_AGE;
623		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
624		LIST_REMOVE(bp, b_hash);
625		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
626		bp->b_dev = NODEV;
627		/* buffers that are locked */
628	} else if (bp->b_flags & B_LOCKED) {
629		bp->b_qindex = QUEUE_LOCKED;
630		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
631		/* buffers with stale but valid contents */
632	} else if (bp->b_flags & B_AGE) {
633		bp->b_qindex = QUEUE_AGE;
634		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
635		/* buffers with valid and quite potentially reuseable contents */
636	} else {
637		if (bp->b_flags & B_VMIO)
638			bp->b_qindex = QUEUE_VMIO;
639		else {
640			bp->b_qindex = QUEUE_LRU;
641			++nlru;
642		}
643		TAILQ_INSERT_TAIL(&bufqueues[bp->b_qindex], bp, b_freelist);
644	}
645
646	/* unlock */
647	bp->b_flags &= ~(B_PDWANTED | B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE);
648	splx(s);
649}
650
651/*
652 * this routine implements clustered async writes for
653 * clearing out B_DELWRI buffers...
654 */
655void
656vfs_bio_awrite(struct buf * bp)
657{
658	int i;
659	daddr_t lblkno = bp->b_lblkno;
660	struct vnode *vp = bp->b_vp;
661	int s;
662	int ncl;
663	struct buf *bpa;
664
665	s = splbio();
666	if( vp->v_mount && (vp->v_flag & VVMIO) &&
667		(bp->b_flags & (B_CLUSTEROK|B_INVAL)) == B_CLUSTEROK) {
668		int size  = vp->v_mount->mnt_stat.f_iosize;
669		for (i = 1; i < MAXPHYS / size; i++) {
670			if ((bpa = incore(vp, lblkno + i)) &&
671			    ((bpa->b_flags & (B_BUSY | B_DELWRI | B_BUSY | B_CLUSTEROK | B_INVAL)) == B_DELWRI | B_CLUSTEROK) &&
672			    (bpa->b_bufsize == size)) {
673				if ((bpa->b_blkno == bpa->b_lblkno) ||
674				    (bpa->b_blkno != bp->b_blkno + (i * size) / DEV_BSIZE))
675					break;
676			} else {
677				break;
678			}
679		}
680		ncl = i;
681		/*
682		 * this is a possible cluster write
683		 */
684		if (ncl != 1) {
685			cluster_wbuild(vp, NULL, size, lblkno, ncl, -1);
686			splx(s);
687			return;
688		}
689	}
690	/*
691	 * default (old) behavior, writing out only one block
692	 */
693	bremfree(bp);
694	bp->b_flags |= B_BUSY | B_ASYNC;
695	bwrite(bp);
696	splx(s);
697}
698
699int freebufspace;
700int allocbufspace;
701
702/*
703 * Find a buffer header which is available for use.
704 */
705struct buf *
706getnewbuf(int slpflag, int slptimeo, int doingvmio)
707{
708	struct buf *bp;
709	int s;
710	int firstbp = 1;
711
712	s = splbio();
713start:
714	/* can we constitute a new buffer? */
715	if ((bp = bufqueues[QUEUE_EMPTY].tqh_first)) {
716		if (bp->b_qindex != QUEUE_EMPTY)
717			panic("getnewbuf: inconsistent EMPTY queue");
718		bremfree(bp);
719		goto fillbuf;
720	}
721	/*
722	 * we keep the file I/O from hogging metadata I/O
723	 */
724	if (bp = bufqueues[QUEUE_AGE].tqh_first) {
725		if (bp->b_qindex != QUEUE_AGE)
726			panic("getnewbuf: inconsistent AGE queue");
727	} else if ((nvmio > (2 * nbuf / 3))
728	    && (bp = bufqueues[QUEUE_VMIO].tqh_first)) {
729		if (bp->b_qindex != QUEUE_VMIO)
730			panic("getnewbuf: inconsistent VMIO queue");
731	} else if ((!doingvmio || (nlru > (2 * nbuf / 3))) &&
732	    (bp = bufqueues[QUEUE_LRU].tqh_first)) {
733		if (bp->b_qindex != QUEUE_LRU)
734			panic("getnewbuf: inconsistent LRU queue");
735	}
736	if (!bp) {
737		if (doingvmio) {
738			if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
739				if (bp->b_qindex != QUEUE_VMIO)
740					panic("getnewbuf: inconsistent VMIO queue");
741			} else if (bp = bufqueues[QUEUE_LRU].tqh_first) {
742				if (bp->b_qindex != QUEUE_LRU)
743					panic("getnewbuf: inconsistent LRU queue");
744			}
745		} else {
746			if (bp = bufqueues[QUEUE_LRU].tqh_first) {
747				if (bp->b_qindex != QUEUE_LRU)
748					panic("getnewbuf: inconsistent LRU queue");
749			} else if (bp = bufqueues[QUEUE_VMIO].tqh_first) {
750				if (bp->b_qindex != QUEUE_VMIO)
751					panic("getnewbuf: inconsistent VMIO queue");
752			}
753		}
754	}
755	if (!bp) {
756		/* wait for a free buffer of any kind */
757		needsbuffer = 1;
758		tsleep((caddr_t) &needsbuffer, PRIBIO | slpflag, "newbuf", slptimeo);
759		splx(s);
760		return (0);
761	}
762	/* if we are a delayed write, convert to an async write */
763	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
764		vfs_bio_awrite(bp);
765		if (!slpflag && !slptimeo) {
766			splx(s);
767			return (0);
768		}
769		goto start;
770	}
771	bremfree(bp);
772
773	if (bp->b_flags & B_VMIO) {
774		bp->b_flags |= B_INVAL | B_BUSY;
775		brelse(bp);
776		bremfree(bp);
777	}
778	if (bp->b_vp)
779		brelvp(bp);
780
781	/* we are not free, nor do we contain interesting data */
782	if (bp->b_rcred != NOCRED)
783		crfree(bp->b_rcred);
784	if (bp->b_wcred != NOCRED)
785		crfree(bp->b_wcred);
786fillbuf:
787	bp->b_flags = B_BUSY;
788	LIST_REMOVE(bp, b_hash);
789	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
790	splx(s);
791	if (bp->b_bufsize) {
792		allocbuf(bp, 0, 0);
793	}
794	bp->b_dev = NODEV;
795	bp->b_vp = NULL;
796	bp->b_blkno = bp->b_lblkno = 0;
797	bp->b_iodone = 0;
798	bp->b_error = 0;
799	bp->b_resid = 0;
800	bp->b_bcount = 0;
801	bp->b_npages = 0;
802	bp->b_wcred = bp->b_rcred = NOCRED;
803	bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
804	bp->b_dirtyoff = bp->b_dirtyend = 0;
805	bp->b_validoff = bp->b_validend = 0;
806	return (bp);
807}
808
809/*
810 * Check to see if a block is currently memory resident.
811 */
812struct buf *
813incore(struct vnode * vp, daddr_t blkno)
814{
815	struct buf *bp;
816	struct bufhashhdr *bh;
817
818	int s = splbio();
819
820	bh = BUFHASH(vp, blkno);
821	bp = bh->lh_first;
822
823	/* Search hash chain */
824	while (bp) {
825		/* hit */
826		if (bp->b_lblkno == blkno && bp->b_vp == vp
827		    && (bp->b_flags & B_INVAL) == 0) {
828			splx(s);
829			return (bp);
830		}
831		bp = bp->b_hash.le_next;
832	}
833	splx(s);
834
835	return (0);
836}
837
838/*
839 * returns true if no I/O is needed to access the
840 * associated VM object.
841 */
842
843int
844inmem(struct vnode * vp, daddr_t blkno)
845{
846	vm_object_t obj;
847	vm_offset_t off, toff, tinc;
848	vm_page_t m;
849
850	if (incore(vp, blkno))
851		return 1;
852	if (vp->v_mount == 0)
853		return 0;
854	if (vp->v_vmdata == 0)
855		return 0;
856
857	obj = (vm_object_t) vp->v_vmdata;
858	tinc = PAGE_SIZE;
859	if (tinc > vp->v_mount->mnt_stat.f_iosize)
860		tinc = vp->v_mount->mnt_stat.f_iosize;
861	off = blkno * vp->v_mount->mnt_stat.f_iosize;
862
863	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
864		int mask;
865
866		m = vm_page_lookup(obj, trunc_page(toff + off));
867		if (!m)
868			return 0;
869		if (vm_page_is_valid(m, toff + off, tinc) == 0)
870			return 0;
871	}
872	return 1;
873}
874
875/*
876 * Get a block given a specified block and offset into a file/device.
877 */
878struct buf *
879getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
880{
881	struct buf *bp;
882	int s;
883	struct bufhashhdr *bh;
884	vm_offset_t off;
885	int nleft;
886
887	s = splbio();
888loop:
889	if ((cnt.v_free_count + cnt.v_cache_count) <
890	    cnt.v_free_reserved + MAXBSIZE / PAGE_SIZE)
891		wakeup((caddr_t) &vm_pages_needed);
892	if (bp = incore(vp, blkno)) {
893		if (bp->b_flags & B_BUSY) {
894			bp->b_flags |= B_WANTED;
895			if (curproc == pageproc) {
896				bp->b_flags |= B_PDWANTED;
897				wakeup((caddr_t) &cnt.v_free_count);
898			}
899			if (!tsleep((caddr_t) bp, PRIBIO | slpflag, "getblk", slptimeo))
900				goto loop;
901			splx(s);
902			return (struct buf *) NULL;
903		}
904		bp->b_flags |= B_BUSY | B_CACHE;
905		bremfree(bp);
906		/*
907		 * check for size inconsistancies
908		 */
909		if (bp->b_bcount != size) {
910#if defined(VFS_BIO_DEBUG)
911			printf("getblk: invalid buffer size: %ld\n", bp->b_bcount);
912#endif
913			bp->b_flags |= B_INVAL;
914			bwrite(bp);
915			goto loop;
916		}
917		splx(s);
918		return (bp);
919	} else {
920		vm_object_t obj;
921		int doingvmio;
922
923		if ((obj = (vm_object_t) vp->v_vmdata) &&
924		    (vp->v_flag & VVMIO) /* && (blkno >= 0) */ ) {
925			doingvmio = 1;
926		} else {
927			doingvmio = 0;
928		}
929		if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) {
930			if (slpflag || slptimeo)
931				return NULL;
932			goto loop;
933		}
934		if (incore(vp, blkno)) {
935			bp->b_flags |= B_INVAL;
936			brelse(bp);
937			goto loop;
938		}
939		bp->b_blkno = bp->b_lblkno = blkno;
940		bgetvp(vp, bp);
941		LIST_REMOVE(bp, b_hash);
942		bh = BUFHASH(vp, blkno);
943		LIST_INSERT_HEAD(bh, bp, b_hash);
944		if (doingvmio) {
945			bp->b_flags |= (B_VMIO | B_CACHE);
946#if defined(VFS_BIO_DEBUG)
947			if (vp->v_type != VREG)
948				printf("getblk: vmioing file type %d???\n", vp->v_type);
949#endif
950			++nvmio;
951		} else {
952			if (bp->b_flags & B_VMIO)
953				--nvmio;
954			bp->b_flags &= ~B_VMIO;
955		}
956		splx(s);
957		if (!allocbuf(bp, size, 1)) {
958			s = splbio();
959			goto loop;
960		}
961		return (bp);
962	}
963}
964
965/*
966 * Get an empty, disassociated buffer of given size.
967 */
968struct buf *
969geteblk(int size)
970{
971	struct buf *bp;
972
973	while ((bp = getnewbuf(0, 0, 0)) == 0);
974	allocbuf(bp, size, 0);
975	bp->b_flags |= B_INVAL;
976	return (bp);
977}
978
979/*
980 * Modify the length of a buffer's underlying buffer storage without
981 * destroying information (unless, of course the buffer is shrinking).
982 */
983int
984allocbuf(struct buf * bp, int size, int vmio)
985{
986
987	int s;
988	int newbsize;
989	int i;
990
991	if ((bp->b_flags & B_VMIO) == 0) {
992		newbsize = round_page(size);
993		if (newbsize == bp->b_bufsize) {
994			bp->b_bcount = size;
995			return 1;
996		} else if (newbsize < bp->b_bufsize) {
997			if (bp->b_flags & B_MALLOC) {
998				bp->b_bcount = size;
999				return 1;
1000			}
1001			vm_hold_free_pages(
1002			    bp,
1003			    (vm_offset_t) bp->b_data + newbsize,
1004			    (vm_offset_t) bp->b_data + bp->b_bufsize);
1005		} else if (newbsize > bp->b_bufsize) {
1006			if (bp->b_flags & B_MALLOC) {
1007				vm_offset_t bufaddr;
1008
1009				bufaddr = (vm_offset_t) bp->b_data;
1010				bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
1011				vm_hold_load_pages(
1012				    bp,
1013				    (vm_offset_t) bp->b_data,
1014				    (vm_offset_t) bp->b_data + newbsize);
1015				bcopy((caddr_t) bufaddr, bp->b_data, bp->b_bcount);
1016				free((caddr_t) bufaddr, M_TEMP);
1017			} else if ((newbsize <= PAGE_SIZE / 2) && (bp->b_bufsize == 0)) {
1018				bp->b_flags |= B_MALLOC;
1019				bp->b_data = malloc(newbsize, M_TEMP, M_WAITOK);
1020				bp->b_npages = 0;
1021			} else {
1022				vm_hold_load_pages(
1023				    bp,
1024				    (vm_offset_t) bp->b_data + bp->b_bufsize,
1025				    (vm_offset_t) bp->b_data + newbsize);
1026			}
1027		}
1028		/*
1029		 * adjust buffer cache's idea of memory allocated to buffer
1030		 * contents
1031		 */
1032		freebufspace -= newbsize - bp->b_bufsize;
1033		allocbufspace += newbsize - bp->b_bufsize;
1034	} else {
1035		vm_page_t m;
1036		int desiredpages;
1037
1038		newbsize = ((size + DEV_BSIZE - 1) / DEV_BSIZE) * DEV_BSIZE;
1039		desiredpages = round_page(newbsize) / PAGE_SIZE;
1040
1041		if (newbsize == bp->b_bufsize) {
1042			bp->b_bcount = size;
1043			return 1;
1044		} else if (newbsize < bp->b_bufsize) {
1045			if (desiredpages < bp->b_npages) {
1046				pmap_qremove((vm_offset_t) trunc_page(bp->b_data) +
1047				    desiredpages * PAGE_SIZE, (bp->b_npages - desiredpages));
1048				for (i = desiredpages; i < bp->b_npages; i++) {
1049					m = bp->b_pages[i];
1050					s = splhigh();
1051					if ((m->flags & PG_BUSY) || (m->busy != 0)) {
1052						m->flags |= PG_WANTED;
1053						tsleep(m, PVM, "biodep", 0);
1054					}
1055					splx(s);
1056
1057					if (m->bmapped == 0) {
1058						printf("allocbuf: bmapped is zero for page %d\n", i);
1059						panic("allocbuf: error");
1060					}
1061					--m->bmapped;
1062					if (m->bmapped == 0) {
1063						PAGE_WAKEUP(m);
1064						pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
1065						vm_page_free(m);
1066					}
1067					bp->b_pages[i] = NULL;
1068				}
1069				bp->b_npages = desiredpages;
1070			}
1071		} else {
1072			vm_object_t obj;
1073			vm_offset_t tinc, off, toff, objoff;
1074			int pageindex, curbpnpages;
1075			struct vnode *vp;
1076			int bsize;
1077
1078			vp = bp->b_vp;
1079			bsize = vp->v_mount->mnt_stat.f_iosize;
1080
1081			if (bp->b_npages < desiredpages) {
1082				obj = (vm_object_t) vp->v_vmdata;
1083				tinc = PAGE_SIZE;
1084				if (tinc > bsize)
1085					tinc = bsize;
1086				off = bp->b_lblkno * bsize;
1087				curbpnpages = bp->b_npages;
1088		doretry:
1089				for (toff = 0; toff < newbsize; toff += tinc) {
1090					int mask;
1091					int bytesinpage;
1092
1093					pageindex = toff / PAGE_SIZE;
1094					objoff = trunc_page(toff + off);
1095					if (pageindex < curbpnpages) {
1096						int pb;
1097
1098						m = bp->b_pages[pageindex];
1099						if (m->offset != objoff)
1100							panic("allocbuf: page changed offset??!!!?");
1101						bytesinpage = tinc;
1102						if (tinc > (newbsize - toff))
1103							bytesinpage = newbsize - toff;
1104						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
1105							bp->b_flags &= ~B_CACHE;
1106						}
1107						if ((m->flags & PG_ACTIVE) == 0)
1108							vm_page_activate(m);
1109						continue;
1110					}
1111					s = splhigh();
1112					m = vm_page_lookup(obj, objoff);
1113					if (!m) {
1114						splx(s);
1115						m = vm_page_alloc(obj, objoff, 0);
1116						if (!m) {
1117							int j;
1118
1119							for (j = bp->b_npages; j < pageindex; j++) {
1120								vm_page_t mt = bp->b_pages[j];
1121
1122								PAGE_WAKEUP(mt);
1123								if (!mt->valid) {
1124									vm_page_free(mt);
1125								}
1126							}
1127							VM_WAIT;
1128							if (vmio && (bp->b_flags & B_PDWANTED)) {
1129								--nvmio;
1130								bp->b_flags &= ~B_VMIO;
1131								bp->b_flags |= B_INVAL;
1132								brelse(bp);
1133								return 0;
1134							}
1135							curbpnpages = bp->b_npages;
1136							goto doretry;
1137						}
1138						m->valid = 0;
1139						vm_page_activate(m);
1140					} else if ((m->valid == 0) || (m->flags & PG_BUSY)) {
1141						int j;
1142						int bufferdestroyed = 0;
1143
1144						splx(s);
1145						for (j = bp->b_npages; j < pageindex; j++) {
1146							vm_page_t mt = bp->b_pages[j];
1147
1148							PAGE_WAKEUP(mt);
1149							if (mt->valid == 0) {
1150								vm_page_free(mt);
1151							}
1152						}
1153						if (vmio && (bp->b_flags & B_PDWANTED)) {
1154							--nvmio;
1155							bp->b_flags &= ~B_VMIO;
1156							bp->b_flags |= B_INVAL;
1157							brelse(bp);
1158							VM_WAIT;
1159							bufferdestroyed = 1;
1160						}
1161						s = splbio();
1162						if (m && (m->flags & PG_BUSY)) {
1163							m->flags |= PG_WANTED;
1164							tsleep(m, PRIBIO, "pgtblk", 0);
1165						} else
1166							if (m && m->valid == 0)
1167								vm_page_free(m);
1168						splx(s);
1169						if (bufferdestroyed)
1170							return 0;
1171						curbpnpages = bp->b_npages;
1172						goto doretry;
1173					} else {
1174						int pb;
1175
1176						if ((m->flags & PG_CACHE) &&
1177						    (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) {
1178							int j;
1179
1180							splx(s);
1181							for (j = bp->b_npages; j < pageindex; j++) {
1182								vm_page_t mt = bp->b_pages[j];
1183
1184								PAGE_WAKEUP(mt);
1185								if (mt->valid == 0) {
1186									vm_page_free(mt);
1187								}
1188							}
1189							VM_WAIT;
1190							if (vmio && (bp->b_flags & B_PDWANTED)) {
1191								--nvmio;
1192								bp->b_flags &= ~B_VMIO;
1193								bp->b_flags |= B_INVAL;
1194								brelse(bp);
1195								return 0;
1196							}
1197							curbpnpages = bp->b_npages;
1198							goto doretry;
1199						}
1200						bytesinpage = tinc;
1201						if (tinc > (newbsize - toff))
1202							bytesinpage = newbsize - toff;
1203						if (!vm_page_is_valid(m, toff + off, bytesinpage)) {
1204							bp->b_flags &= ~B_CACHE;
1205						}
1206						if ((m->flags & PG_ACTIVE) == 0)
1207							vm_page_activate(m);
1208						m->flags |= PG_BUSY;
1209						splx(s);
1210					}
1211					bp->b_pages[pageindex] = m;
1212					curbpnpages = pageindex + 1;
1213				}
1214				if (bsize >= PAGE_SIZE) {
1215					for (i = bp->b_npages; i < curbpnpages; i++) {
1216						m = bp->b_pages[i];
1217						if (m->valid == 0) {
1218							bp->b_flags &= ~B_CACHE;
1219						}
1220						m->bmapped++;
1221						PAGE_WAKEUP(m);
1222					}
1223				} else {
1224					if (!vm_page_is_valid(bp->b_pages[0], off, bsize))
1225						bp->b_flags &= ~B_CACHE;
1226					bp->b_pages[0]->bmapped++;
1227					PAGE_WAKEUP(bp->b_pages[0]);
1228				}
1229				bp->b_npages = curbpnpages;
1230				bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE;
1231				pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages);
1232				bp->b_data += off % PAGE_SIZE;
1233			}
1234		}
1235	}
1236	bp->b_bufsize = newbsize;
1237	bp->b_bcount = size;
1238	return 1;
1239}
1240
1241/*
1242 * Wait for buffer I/O completion, returning error status.
1243 */
1244int
1245biowait(register struct buf * bp)
1246{
1247	int s;
1248
1249	s = splbio();
1250	while ((bp->b_flags & B_DONE) == 0)
1251		tsleep((caddr_t) bp, PRIBIO, "biowait", 0);
1252	if ((bp->b_flags & B_ERROR) || bp->b_error) {
1253		if ((bp->b_flags & B_INVAL) == 0) {
1254			bp->b_flags |= B_INVAL;
1255			bp->b_dev = NODEV;
1256			LIST_REMOVE(bp, b_hash);
1257			LIST_INSERT_HEAD(&invalhash, bp, b_hash);
1258			wakeup((caddr_t) bp);
1259		}
1260		if (!bp->b_error)
1261			bp->b_error = EIO;
1262		else
1263			bp->b_flags |= B_ERROR;
1264		splx(s);
1265		return (bp->b_error);
1266	} else {
1267		splx(s);
1268		return (0);
1269	}
1270}
1271
1272/*
1273 * Finish I/O on a buffer, calling an optional function.
1274 * This is usually called from interrupt level, so process blocking
1275 * is not *a good idea*.
1276 */
1277void
1278biodone(register struct buf * bp)
1279{
1280	int s;
1281
1282	s = splbio();
1283	if (bp->b_flags & B_DONE)
1284		printf("biodone: buffer already done\n");
1285	bp->b_flags |= B_DONE;
1286
1287	if ((bp->b_flags & B_READ) == 0) {
1288		vwakeup(bp);
1289	}
1290#ifdef BOUNCE_BUFFERS
1291	if (bp->b_flags & B_BOUNCE)
1292		vm_bounce_free(bp);
1293#endif
1294
1295	/* call optional completion function if requested */
1296	if (bp->b_flags & B_CALL) {
1297		bp->b_flags &= ~B_CALL;
1298		(*bp->b_iodone) (bp);
1299		splx(s);
1300		return;
1301	}
1302	if (bp->b_flags & B_VMIO) {
1303		int i, resid;
1304		vm_offset_t foff;
1305		vm_page_t m;
1306		vm_object_t obj;
1307		int iosize;
1308		struct vnode *vp = bp->b_vp;
1309
1310		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1311		obj = (vm_object_t) vp->v_vmdata;
1312		if (!obj) {
1313			return;
1314		}
1315#if defined(VFS_BIO_DEBUG)
1316		if (obj->paging_in_progress < bp->b_npages) {
1317			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
1318			    obj->paging_in_progress, bp->b_npages);
1319		}
1320#endif
1321		iosize = bp->b_bufsize;
1322		for (i = 0; i < bp->b_npages; i++) {
1323			m = bp->b_pages[i];
1324			if (m == bogus_page) {
1325				m = vm_page_lookup(obj, foff);
1326				if (!m) {
1327#if defined(VFS_BIO_DEBUG)
1328					printf("biodone: page disappeared\n");
1329#endif
1330					--obj->paging_in_progress;
1331					continue;
1332				}
1333				bp->b_pages[i] = m;
1334				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1335			}
1336#if defined(VFS_BIO_DEBUG)
1337			if (trunc_page(foff) != m->offset) {
1338				printf("biodone: foff(%d)/m->offset(%d) mismatch\n", foff, m->offset);
1339			}
1340#endif
1341			resid = (m->offset + PAGE_SIZE) - foff;
1342			if (resid > iosize)
1343				resid = iosize;
1344			if (resid > 0) {
1345				vm_page_set_valid(m, foff, resid);
1346				vm_page_set_clean(m, foff, resid);
1347			}
1348			if (m->busy == 0) {
1349				printf("biodone: page busy < 0, off: %d, foff: %d, resid: %d, index: %d\n",
1350				    m->offset, foff, resid, i);
1351				printf(" iosize: %d, lblkno: %d\n",
1352				    bp->b_vp->v_mount->mnt_stat.f_iosize, bp->b_lblkno);
1353				printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n",
1354				    m->valid, m->dirty, m->bmapped);
1355				panic("biodone: page busy < 0\n");
1356			}
1357			--m->busy;
1358			PAGE_WAKEUP(m);
1359			--obj->paging_in_progress;
1360			foff += resid;
1361			iosize -= resid;
1362		}
1363		if (obj && obj->paging_in_progress == 0)
1364			wakeup((caddr_t) obj);
1365	}
1366	/*
1367	 * For asynchronous completions, release the buffer now. The brelse
1368	 * checks for B_WANTED and will do the wakeup there if necessary - so
1369	 * no need to do a wakeup here in the async case.
1370	 */
1371
1372	if (bp->b_flags & B_ASYNC) {
1373		brelse(bp);
1374	} else {
1375		bp->b_flags &= ~(B_WANTED | B_PDWANTED);
1376		wakeup((caddr_t) bp);
1377	}
1378	splx(s);
1379}
1380
1381int
1382count_lock_queue()
1383{
1384	int count;
1385	struct buf *bp;
1386
1387	count = 0;
1388	for (bp = bufqueues[QUEUE_LOCKED].tqh_first;
1389	    bp != NULL;
1390	    bp = bp->b_freelist.tqe_next)
1391		count++;
1392	return (count);
1393}
1394
1395int vfs_update_interval = 30;
1396
1397void
1398vfs_update()
1399{
1400	(void) spl0();
1401	while (1) {
1402		tsleep((caddr_t) &vfs_update_wakeup, PRIBIO, "update",
1403		    hz * vfs_update_interval);
1404		vfs_update_wakeup = 0;
1405		sync(curproc, NULL, NULL);
1406	}
1407}
1408
1409void
1410vfs_unbusy_pages(struct buf * bp)
1411{
1412	int i;
1413
1414	if (bp->b_flags & B_VMIO) {
1415		struct vnode *vp = bp->b_vp;
1416		vm_object_t obj = (vm_object_t) vp->v_vmdata;
1417		vm_offset_t foff;
1418
1419		foff = vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1420
1421		for (i = 0; i < bp->b_npages; i++) {
1422			vm_page_t m = bp->b_pages[i];
1423
1424			if (m == bogus_page) {
1425				m = vm_page_lookup(obj, foff);
1426				if (!m) {
1427					panic("vfs_unbusy_pages: page missing\n");
1428				}
1429				bp->b_pages[i] = m;
1430				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1431			}
1432			--obj->paging_in_progress;
1433			--m->busy;
1434			PAGE_WAKEUP(m);
1435		}
1436		if (obj->paging_in_progress == 0)
1437			wakeup((caddr_t) obj);
1438	}
1439}
1440
1441void
1442vfs_busy_pages(struct buf * bp, int clear_modify)
1443{
1444	int i;
1445
1446	if (bp->b_flags & B_VMIO) {
1447		vm_object_t obj = (vm_object_t) bp->b_vp->v_vmdata;
1448		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1449		int iocount = bp->b_bufsize;
1450
1451		for (i = 0; i < bp->b_npages; i++) {
1452			vm_page_t m = bp->b_pages[i];
1453			int resid = (m->offset + PAGE_SIZE) - foff;
1454
1455			if (resid > iocount)
1456				resid = iocount;
1457			obj->paging_in_progress++;
1458			m->busy++;
1459			if (clear_modify) {
1460				vm_page_test_dirty(m);
1461				pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ);
1462			} else if (bp->b_bcount >= PAGE_SIZE) {
1463				if (m->valid && (bp->b_flags & B_CACHE) == 0) {
1464					bp->b_pages[i] = bogus_page;
1465					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
1466				}
1467			}
1468			foff += resid;
1469			iocount -= resid;
1470		}
1471	}
1472}
1473
1474void
1475vfs_dirty_pages(struct buf * bp)
1476{
1477	int i;
1478
1479	if (bp->b_flags & B_VMIO) {
1480		vm_offset_t foff = bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
1481		int iocount = bp->b_bufsize;
1482
1483		for (i = 0; i < bp->b_npages; i++) {
1484			vm_page_t m = bp->b_pages[i];
1485			int resid = (m->offset + PAGE_SIZE) - foff;
1486
1487			if (resid > iocount)
1488				resid = iocount;
1489			if (resid > 0) {
1490				vm_page_set_valid(m, foff, resid);
1491				vm_page_set_dirty(m, foff, resid);
1492			}
1493			PAGE_WAKEUP(m);
1494			foff += resid;
1495			iocount -= resid;
1496		}
1497	}
1498}
1499/*
1500 * these routines are not in the correct place (yet)
1501 * also they work *ONLY* for kernel_pmap!!!
1502 */
1503void
1504vm_hold_load_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1505{
1506	vm_offset_t pg;
1507	vm_page_t p;
1508	vm_offset_t from = round_page(froma);
1509	vm_offset_t to = round_page(toa);
1510
1511tryagain0:
1512	if ((curproc != pageproc) && ((cnt.v_free_count + cnt.v_cache_count) <=
1513		cnt.v_free_reserved + (toa - froma) / PAGE_SIZE)) {
1514		VM_WAIT;
1515		goto tryagain0;
1516	}
1517	for (pg = from; pg < to; pg += PAGE_SIZE) {
1518
1519tryagain:
1520
1521		p = vm_page_alloc(kernel_object, pg - VM_MIN_KERNEL_ADDRESS, 0);
1522		if (!p) {
1523			VM_WAIT;
1524			goto tryagain;
1525		}
1526		vm_page_wire(p);
1527		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
1528		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = p;
1529		PAGE_WAKEUP(p);
1530		bp->b_npages++;
1531	}
1532}
1533
1534void
1535vm_hold_free_pages(struct buf * bp, vm_offset_t froma, vm_offset_t toa)
1536{
1537	vm_offset_t pg;
1538	vm_page_t p;
1539	vm_offset_t from = round_page(froma);
1540	vm_offset_t to = round_page(toa);
1541
1542	for (pg = from; pg < to; pg += PAGE_SIZE) {
1543		p = bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE];
1544		bp->b_pages[((caddr_t) pg - bp->b_data) / PAGE_SIZE] = 0;
1545		pmap_kremove(pg);
1546		vm_page_free(p);
1547		--bp->b_npages;
1548	}
1549}
1550
1551void
1552bufstats()
1553{
1554}
1555