vm_page.c revision 33181
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37 *	$Id: vm_page.c,v 1.92 1998/02/06 12:14:27 eivind Exp $
38 */
39
40/*
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45 *
46 * Permission to use, copy, modify and distribute this software and
47 * its documentation is hereby granted, provided that both the copyright
48 * notice and this permission notice appear in all copies of the
49 * software, derivative works or modified versions, and any portions
50 * thereof, and that both notices appear in supporting documentation.
51 *
52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55 *
56 * Carnegie Mellon requests users of this software to return to
57 *
58 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59 *  School of Computer Science
60 *  Carnegie Mellon University
61 *  Pittsburgh PA 15213-3890
62 *
63 * any improvements or extensions that they make and grant Carnegie the
64 * rights to redistribute these changes.
65 */
66
67/*
68 *	Resident memory management module.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/malloc.h>
74#include <sys/proc.h>
75#include <sys/vmmeter.h>
76#include <sys/vnode.h>
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <sys/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_pageout.h>
86#include <vm/vm_extern.h>
87
88static void	vm_page_queue_init __P((void));
89static vm_page_t vm_page_select_free __P((vm_object_t object,
90			vm_pindex_t pindex, int prefqueue));
91
92/*
93 *	Associated with page of user-allocatable memory is a
94 *	page structure.
95 */
96
97static struct pglist *vm_page_buckets;	/* Array of buckets */
98static int vm_page_bucket_count;	/* How big is array? */
99static int vm_page_hash_mask;		/* Mask for hash function */
100static volatile int vm_page_bucket_generation;
101
102struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0};
103struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0};
104struct pglist vm_page_queue_active = {0};
105struct pglist vm_page_queue_inactive = {0};
106struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0};
107
108static int no_queue=0;
109
110struct vpgqueues vm_page_queues[PQ_COUNT] = {0};
111static int pqcnt[PQ_COUNT] = {0};
112
113static void
114vm_page_queue_init(void) {
115	int i;
116
117	vm_page_queues[PQ_NONE].pl = NULL;
118	vm_page_queues[PQ_NONE].cnt = &no_queue;
119	for(i=0;i<PQ_L2_SIZE;i++) {
120		vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i];
121		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
122	}
123	for(i=0;i<PQ_L2_SIZE;i++) {
124		vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i];
125		vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count;
126	}
127	vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive;
128	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
129
130	vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active;
131	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
132	for(i=0;i<PQ_L2_SIZE;i++) {
133		vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i];
134		vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
135	}
136	for(i=0;i<PQ_COUNT;i++) {
137		if (vm_page_queues[i].pl) {
138			TAILQ_INIT(vm_page_queues[i].pl);
139		} else if (i != 0) {
140			panic("vm_page_queue_init: queue %d is null", i);
141		}
142		vm_page_queues[i].lcnt = &pqcnt[i];
143	}
144}
145
146vm_page_t vm_page_array = 0;
147static int vm_page_array_size = 0;
148long first_page = 0;
149static long last_page;
150static vm_size_t page_mask;
151static int page_shift;
152int vm_page_zero_count = 0;
153
154/*
155 * map of contiguous valid DEV_BSIZE chunks in a page
156 * (this list is valid for page sizes upto 16*DEV_BSIZE)
157 */
158static u_short vm_page_dev_bsize_chunks[] = {
159	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
160	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
161};
162
163static inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex));
164static int vm_page_freechk_and_unqueue __P((vm_page_t m));
165static void vm_page_free_wakeup __P((void));
166
167/*
168 *	vm_set_page_size:
169 *
170 *	Sets the page size, perhaps based upon the memory
171 *	size.  Must be called before any use of page-size
172 *	dependent functions.
173 *
174 *	Sets page_shift and page_mask from cnt.v_page_size.
175 */
176void
177vm_set_page_size()
178{
179
180	if (cnt.v_page_size == 0)
181		cnt.v_page_size = DEFAULT_PAGE_SIZE;
182	page_mask = cnt.v_page_size - 1;
183	if ((page_mask & cnt.v_page_size) != 0)
184		panic("vm_set_page_size: page size not a power of two");
185	for (page_shift = 0;; page_shift++)
186		if ((1 << page_shift) == cnt.v_page_size)
187			break;
188}
189
190/*
191 *	vm_page_startup:
192 *
193 *	Initializes the resident memory module.
194 *
195 *	Allocates memory for the page cells, and
196 *	for the object/offset-to-page hash table headers.
197 *	Each page cell is initialized and placed on the free list.
198 */
199
200vm_offset_t
201vm_page_startup(starta, enda, vaddr)
202	register vm_offset_t starta;
203	vm_offset_t enda;
204	register vm_offset_t vaddr;
205{
206	register vm_offset_t mapped;
207	register vm_page_t m;
208	register struct pglist *bucket;
209	vm_size_t npages, page_range;
210	register vm_offset_t new_start;
211	int i;
212	vm_offset_t pa;
213	int nblocks;
214	vm_offset_t first_managed_page;
215
216	/* the biggest memory array is the second group of pages */
217	vm_offset_t start;
218	vm_offset_t biggestone, biggestsize;
219
220	vm_offset_t total;
221
222	total = 0;
223	biggestsize = 0;
224	biggestone = 0;
225	nblocks = 0;
226	vaddr = round_page(vaddr);
227
228	for (i = 0; phys_avail[i + 1]; i += 2) {
229		phys_avail[i] = round_page(phys_avail[i]);
230		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
231	}
232
233	for (i = 0; phys_avail[i + 1]; i += 2) {
234		int size = phys_avail[i + 1] - phys_avail[i];
235
236		if (size > biggestsize) {
237			biggestone = i;
238			biggestsize = size;
239		}
240		++nblocks;
241		total += size;
242	}
243
244	start = phys_avail[biggestone];
245
246	/*
247	 * Initialize the queue headers for the free queue, the active queue
248	 * and the inactive queue.
249	 */
250
251	vm_page_queue_init();
252
253	/*
254	 * Allocate (and initialize) the hash table buckets.
255	 *
256	 * The number of buckets MUST BE a power of 2, and the actual value is
257	 * the next power of 2 greater than the number of physical pages in
258	 * the system.
259	 *
260	 * Note: This computation can be tweaked if desired.
261	 */
262	vm_page_buckets = (struct pglist *) vaddr;
263	bucket = vm_page_buckets;
264	if (vm_page_bucket_count == 0) {
265		vm_page_bucket_count = 1;
266		while (vm_page_bucket_count < atop(total))
267			vm_page_bucket_count <<= 1;
268	}
269	vm_page_hash_mask = vm_page_bucket_count - 1;
270
271	/*
272	 * Validate these addresses.
273	 */
274
275	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
276	new_start = round_page(new_start);
277	mapped = vaddr;
278	vaddr = pmap_map(mapped, start, new_start,
279	    VM_PROT_READ | VM_PROT_WRITE);
280	start = new_start;
281	bzero((caddr_t) mapped, vaddr - mapped);
282	mapped = vaddr;
283
284	for (i = 0; i < vm_page_bucket_count; i++) {
285		TAILQ_INIT(bucket);
286		bucket++;
287	}
288
289	/*
290	 * Validate these zone addresses.
291	 */
292
293	new_start = start + (vaddr - mapped);
294	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
295	bzero((caddr_t) mapped, (vaddr - mapped));
296	start = round_page(new_start);
297
298	/*
299	 * Compute the number of pages of memory that will be available for
300	 * use (taking into account the overhead of a page structure per
301	 * page).
302	 */
303
304	first_page = phys_avail[0] / PAGE_SIZE;
305	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
306
307	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
308	npages = (total - (page_range * sizeof(struct vm_page)) -
309	    (start - phys_avail[biggestone])) / PAGE_SIZE;
310
311	/*
312	 * Initialize the mem entry structures now, and put them in the free
313	 * queue.
314	 */
315
316	vm_page_array = (vm_page_t) vaddr;
317	mapped = vaddr;
318
319	/*
320	 * Validate these addresses.
321	 */
322
323	new_start = round_page(start + page_range * sizeof(struct vm_page));
324	mapped = pmap_map(mapped, start, new_start,
325	    VM_PROT_READ | VM_PROT_WRITE);
326	start = new_start;
327
328	first_managed_page = start / PAGE_SIZE;
329
330	/*
331	 * Clear all of the page structures
332	 */
333	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
334	vm_page_array_size = page_range;
335
336	cnt.v_page_count = 0;
337	cnt.v_free_count = 0;
338	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
339		if (i == biggestone)
340			pa = ptoa(first_managed_page);
341		else
342			pa = phys_avail[i];
343		while (pa < phys_avail[i + 1] && npages-- > 0) {
344			++cnt.v_page_count;
345			++cnt.v_free_count;
346			m = PHYS_TO_VM_PAGE(pa);
347			m->phys_addr = pa;
348			m->flags = 0;
349			m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
350			m->queue = PQ_FREE + m->pc;
351			TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
352			++(*vm_page_queues[m->queue].lcnt);
353			pa += PAGE_SIZE;
354		}
355	}
356	return (mapped);
357}
358
359/*
360 *	vm_page_hash:
361 *
362 *	Distributes the object/offset key pair among hash buckets.
363 *
364 *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
365 */
366static inline int
367vm_page_hash(object, pindex)
368	vm_object_t object;
369	vm_pindex_t pindex;
370{
371	return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
372}
373
374/*
375 *	vm_page_insert:		[ internal use only ]
376 *
377 *	Inserts the given mem entry into the object/object-page
378 *	table and object list.
379 *
380 *	The object and page must be locked, and must be splhigh.
381 */
382
383void
384vm_page_insert(m, object, pindex)
385	register vm_page_t m;
386	register vm_object_t object;
387	register vm_pindex_t pindex;
388{
389	register struct pglist *bucket;
390
391#if !defined(MAX_PERF)
392	if (m->flags & PG_TABLED)
393		panic("vm_page_insert: already inserted");
394#endif
395
396	/*
397	 * Record the object/offset pair in this page
398	 */
399
400	m->object = object;
401	m->pindex = pindex;
402
403	/*
404	 * Insert it into the object_object/offset hash table
405	 */
406
407	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
408	TAILQ_INSERT_TAIL(bucket, m, hashq);
409	vm_page_bucket_generation++;
410
411	/*
412	 * Now link into the object's list of backed pages.
413	 */
414
415	TAILQ_INSERT_TAIL(&object->memq, m, listq);
416	m->flags |= PG_TABLED;
417	m->object->page_hint = m;
418	m->object->generation++;
419
420	if (m->wire_count)
421		object->wire_count++;
422
423	if ((m->queue - m->pc) == PQ_CACHE)
424		object->cache_count++;
425
426	/*
427	 * And show that the object has one more resident page.
428	 */
429
430	object->resident_page_count++;
431}
432
433/*
434 *	vm_page_remove:		[ internal use only ]
435 *				NOTE: used by device pager as well -wfj
436 *
437 *	Removes the given mem entry from the object/offset-page
438 *	table and the object page list.
439 *
440 *	The object and page must be locked, and at splhigh.
441 */
442
443void
444vm_page_remove(m)
445	register vm_page_t m;
446{
447	register struct pglist *bucket;
448	vm_object_t object;
449
450	if (!(m->flags & PG_TABLED))
451		return;
452
453#if !defined(MAX_PERF)
454	if ((m->flags & PG_BUSY) == 0) {
455		panic("vm_page_remove: page not busy");
456	}
457#endif
458
459	m->flags &= ~PG_BUSY;
460	if (m->flags & PG_WANTED) {
461		m->flags &= ~PG_WANTED;
462		wakeup(m);
463	}
464
465	object = m->object;
466	if (object->page_hint == m)
467		object->page_hint = NULL;
468
469	if (m->wire_count)
470		object->wire_count--;
471
472	if ((m->queue - m->pc) == PQ_CACHE)
473		object->cache_count--;
474
475	/*
476	 * Remove from the object_object/offset hash table
477	 */
478
479	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
480	TAILQ_REMOVE(bucket, m, hashq);
481	vm_page_bucket_generation++;
482
483	/*
484	 * Now remove from the object's list of backed pages.
485	 */
486
487	TAILQ_REMOVE(&object->memq, m, listq);
488
489	/*
490	 * And show that the object has one fewer resident page.
491	 */
492
493	object->resident_page_count--;
494	object->generation++;
495	m->object = NULL;
496
497	m->flags &= ~PG_TABLED;
498}
499
500/*
501 *	vm_page_lookup:
502 *
503 *	Returns the page associated with the object/offset
504 *	pair specified; if none is found, NULL is returned.
505 *
506 *	The object must be locked.  No side effects.
507 */
508
509vm_page_t
510vm_page_lookup(object, pindex)
511	register vm_object_t object;
512	register vm_pindex_t pindex;
513{
514	register vm_page_t m;
515	register struct pglist *bucket;
516	int generation;
517	int s;
518
519	/*
520	 * Search the hash table for this object/offset pair
521	 */
522
523	if (object->page_hint && (object->page_hint->pindex == pindex) &&
524		(object->page_hint->object == object))
525		return object->page_hint;
526
527retry:
528	generation = vm_page_bucket_generation;
529	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
530	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
531		if ((m->object == object) && (m->pindex == pindex)) {
532			if (vm_page_bucket_generation != generation)
533				goto retry;
534			m->object->page_hint = m;
535			return (m);
536		}
537	}
538	if (vm_page_bucket_generation != generation)
539		goto retry;
540	return (NULL);
541}
542
543/*
544 *	vm_page_rename:
545 *
546 *	Move the given memory entry from its
547 *	current object to the specified target object/offset.
548 *
549 *	The object must be locked.
550 */
551void
552vm_page_rename(m, new_object, new_pindex)
553	register vm_page_t m;
554	register vm_object_t new_object;
555	vm_pindex_t new_pindex;
556{
557	int s;
558
559	s = splvm();
560	vm_page_remove(m);
561	vm_page_insert(m, new_object, new_pindex);
562	splx(s);
563}
564
565/*
566 * vm_page_unqueue without any wakeup
567 */
568void
569vm_page_unqueue_nowakeup(m)
570	vm_page_t m;
571{
572	int queue = m->queue;
573	struct vpgqueues *pq;
574	if (queue != PQ_NONE) {
575		pq = &vm_page_queues[queue];
576		m->queue = PQ_NONE;
577		TAILQ_REMOVE(pq->pl, m, pageq);
578		(*pq->cnt)--;
579		(*pq->lcnt)--;
580		if ((queue - m->pc) == PQ_CACHE) {
581			if (m->object)
582				m->object->cache_count--;
583		}
584	}
585}
586
587/*
588 * vm_page_unqueue must be called at splhigh();
589 */
590void
591vm_page_unqueue(m)
592	vm_page_t m;
593{
594	int queue = m->queue;
595	struct vpgqueues *pq;
596	if (queue != PQ_NONE) {
597		m->queue = PQ_NONE;
598		pq = &vm_page_queues[queue];
599		TAILQ_REMOVE(pq->pl, m, pageq);
600		(*pq->cnt)--;
601		(*pq->lcnt)--;
602		if ((queue - m->pc) == PQ_CACHE) {
603			if ((cnt.v_cache_count + cnt.v_free_count) <
604				(cnt.v_free_reserved + cnt.v_cache_min))
605				pagedaemon_wakeup();
606			if (m->object)
607				m->object->cache_count--;
608		}
609	}
610}
611
612/*
613 * Find a page on the specified queue with color optimization.
614 */
615vm_page_t
616vm_page_list_find(basequeue, index)
617	int basequeue, index;
618{
619#if PQ_L2_SIZE > 1
620
621	int i,j;
622	vm_page_t m;
623	int hindex;
624	struct vpgqueues *pq;
625
626	pq = &vm_page_queues[basequeue];
627
628	m = TAILQ_FIRST(pq[index].pl);
629	if (m)
630		return m;
631
632	for(j = 0; j < PQ_L1_SIZE; j++) {
633		int ij;
634		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
635			(ij = i + j) > 0;
636			i -= PQ_L1_SIZE) {
637
638			hindex = index + ij;
639			if (hindex >= PQ_L2_SIZE)
640				hindex -= PQ_L2_SIZE;
641			if (m = TAILQ_FIRST(pq[hindex].pl))
642				return m;
643
644			hindex = index - ij;
645			if (hindex < 0)
646				hindex += PQ_L2_SIZE;
647			if (m = TAILQ_FIRST(pq[hindex].pl))
648				return m;
649		}
650	}
651
652	hindex = index + PQ_L2_SIZE / 2;
653	if (hindex >= PQ_L2_SIZE)
654		hindex -= PQ_L2_SIZE;
655	m = TAILQ_FIRST(pq[hindex].pl);
656	if (m)
657		return m;
658
659	return NULL;
660#else
661	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
662#endif
663
664}
665
666/*
667 * Find a page on the specified queue with color optimization.
668 */
669vm_page_t
670vm_page_select(object, pindex, basequeue)
671	vm_object_t object;
672	vm_pindex_t pindex;
673	int basequeue;
674{
675
676#if PQ_L2_SIZE > 1
677	int index;
678	index = (pindex + object->pg_color) & PQ_L2_MASK;
679	return vm_page_list_find(basequeue, index);
680
681#else
682	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
683#endif
684
685}
686
687/*
688 * Find a free or zero page, with specified preference.
689 */
690static vm_page_t
691vm_page_select_free(object, pindex, prefqueue)
692	vm_object_t object;
693	vm_pindex_t pindex;
694	int prefqueue;
695{
696#if PQ_L2_SIZE > 1
697	int i,j;
698	int index, hindex;
699#endif
700	vm_page_t m, mh;
701	int oqueuediff;
702	struct vpgqueues *pq;
703
704	if (prefqueue == PQ_ZERO)
705		oqueuediff = PQ_FREE - PQ_ZERO;
706	else
707		oqueuediff = PQ_ZERO - PQ_FREE;
708
709	if (mh = object->page_hint) {
710		 if (mh->pindex == (pindex - 1)) {
711			if ((mh->flags & PG_FICTITIOUS) == 0) {
712				if ((mh < &vm_page_array[cnt.v_page_count-1]) &&
713					(mh >= &vm_page_array[0])) {
714					int queue;
715					m = mh + 1;
716					if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) {
717						queue = m->queue - m->pc;
718						if (queue == PQ_FREE || queue == PQ_ZERO) {
719							return m;
720						}
721					}
722				}
723			}
724		}
725	}
726
727	pq = &vm_page_queues[prefqueue];
728
729#if PQ_L2_SIZE > 1
730
731	index = (pindex + object->pg_color) & PQ_L2_MASK;
732
733	if (m = TAILQ_FIRST(pq[index].pl))
734		return m;
735	if (m = TAILQ_FIRST(pq[index + oqueuediff].pl))
736		return m;
737
738	for(j = 0; j < PQ_L1_SIZE; j++) {
739		int ij;
740		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
741			(ij = i + j) >= 0;
742			i -= PQ_L1_SIZE) {
743
744			hindex = index + ij;
745			if (hindex >= PQ_L2_SIZE)
746				hindex -= PQ_L2_SIZE;
747			if (m = TAILQ_FIRST(pq[hindex].pl))
748				return m;
749			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
750				return m;
751
752			hindex = index - ij;
753			if (hindex < 0)
754				hindex += PQ_L2_SIZE;
755			if (m = TAILQ_FIRST(pq[hindex].pl))
756				return m;
757			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
758				return m;
759		}
760	}
761
762	hindex = index + PQ_L2_SIZE / 2;
763	if (hindex >= PQ_L2_SIZE)
764		hindex -= PQ_L2_SIZE;
765	if (m = TAILQ_FIRST(pq[hindex].pl))
766		return m;
767	if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl))
768		return m;
769
770#else
771	if (m = TAILQ_FIRST(pq[0].pl))
772		return m;
773	else
774		return TAILQ_FIRST(pq[oqueuediff].pl);
775#endif
776
777	return NULL;
778}
779
780/*
781 *	vm_page_alloc:
782 *
783 *	Allocate and return a memory cell associated
784 *	with this VM object/offset pair.
785 *
786 *	page_req classes:
787 *	VM_ALLOC_NORMAL		normal process request
788 *	VM_ALLOC_SYSTEM		system *really* needs a page
789 *	VM_ALLOC_INTERRUPT	interrupt time request
790 *	VM_ALLOC_ZERO		zero page
791 *
792 *	Object must be locked.
793 */
794vm_page_t
795vm_page_alloc(object, pindex, page_req)
796	vm_object_t object;
797	vm_pindex_t pindex;
798	int page_req;
799{
800	register vm_page_t m;
801	struct vpgqueues *pq;
802	vm_object_t oldobject;
803	int queue, qtype;
804	int s;
805
806#ifdef DIAGNOSTIC
807	m = vm_page_lookup(object, pindex);
808	if (m)
809		panic("vm_page_alloc: page already allocated");
810#endif
811
812	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
813		page_req = VM_ALLOC_SYSTEM;
814	};
815
816	s = splvm();
817
818	switch (page_req) {
819
820	case VM_ALLOC_NORMAL:
821		if (cnt.v_free_count >= cnt.v_free_reserved) {
822			m = vm_page_select_free(object, pindex, PQ_FREE);
823#if defined(DIAGNOSTIC)
824			if (m == NULL)
825				panic("vm_page_alloc(NORMAL): missing page on free queue\n");
826#endif
827		} else {
828			m = vm_page_select(object, pindex, PQ_CACHE);
829			if (m == NULL) {
830				splx(s);
831#if defined(DIAGNOSTIC)
832				if (cnt.v_cache_count > 0)
833					printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
834#endif
835				vm_pageout_deficit++;
836				pagedaemon_wakeup();
837				return (NULL);
838			}
839		}
840		break;
841
842	case VM_ALLOC_ZERO:
843		if (cnt.v_free_count >= cnt.v_free_reserved) {
844			m = vm_page_select_free(object, pindex, PQ_ZERO);
845#if defined(DIAGNOSTIC)
846			if (m == NULL)
847				panic("vm_page_alloc(ZERO): missing page on free queue\n");
848#endif
849		} else {
850			m = vm_page_select(object, pindex, PQ_CACHE);
851			if (m == NULL) {
852				splx(s);
853#if defined(DIAGNOSTIC)
854				if (cnt.v_cache_count > 0)
855					printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
856#endif
857				vm_pageout_deficit++;
858				pagedaemon_wakeup();
859				return (NULL);
860			}
861		}
862		break;
863
864	case VM_ALLOC_SYSTEM:
865		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
866		    ((cnt.v_cache_count == 0) &&
867		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
868			m = vm_page_select_free(object, pindex, PQ_FREE);
869#if defined(DIAGNOSTIC)
870			if (m == NULL)
871				panic("vm_page_alloc(SYSTEM): missing page on free queue\n");
872#endif
873		} else {
874			m = vm_page_select(object, pindex, PQ_CACHE);
875			if (m == NULL) {
876				splx(s);
877#if defined(DIAGNOSTIC)
878				if (cnt.v_cache_count > 0)
879					printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
880#endif
881				vm_pageout_deficit++;
882				pagedaemon_wakeup();
883				return (NULL);
884			}
885		}
886		break;
887
888	case VM_ALLOC_INTERRUPT:
889		if (cnt.v_free_count > 0) {
890			m = vm_page_select_free(object, pindex, PQ_FREE);
891#if defined(DIAGNOSTIC)
892			if (m == NULL)
893				panic("vm_page_alloc(INTERRUPT): missing page on free queue\n");
894#endif
895		} else {
896			splx(s);
897			vm_pageout_deficit++;
898			pagedaemon_wakeup();
899			return (NULL);
900		}
901		break;
902
903	default:
904		m = NULL;
905#if !defined(MAX_PERF)
906		panic("vm_page_alloc: invalid allocation class");
907#endif
908	}
909
910	queue = m->queue;
911	qtype = queue - m->pc;
912	if (qtype == PQ_ZERO)
913		vm_page_zero_count--;
914	pq = &vm_page_queues[queue];
915	TAILQ_REMOVE(pq->pl, m, pageq);
916	(*pq->cnt)--;
917	(*pq->lcnt)--;
918	oldobject = NULL;
919	if (qtype == PQ_ZERO) {
920		m->flags = PG_ZERO|PG_BUSY;
921	} else if (qtype == PQ_CACHE) {
922		oldobject = m->object;
923		m->flags |= PG_BUSY;
924		vm_page_remove(m);
925		m->flags = PG_BUSY;
926	} else {
927		m->flags = PG_BUSY;
928	}
929	m->wire_count = 0;
930	m->hold_count = 0;
931	m->act_count = 0;
932	m->busy = 0;
933	m->valid = 0;
934	m->dirty = 0;
935	m->queue = PQ_NONE;
936
937	/* XXX before splx until vm_page_insert is safe */
938	vm_page_insert(m, object, pindex);
939
940	/*
941	 * Don't wakeup too often - wakeup the pageout daemon when
942	 * we would be nearly out of memory.
943	 */
944	if (((cnt.v_free_count + cnt.v_cache_count) <
945		(cnt.v_free_reserved + cnt.v_cache_min)) ||
946			(cnt.v_free_count < cnt.v_pageout_free_min))
947		pagedaemon_wakeup();
948
949	if ((qtype == PQ_CACHE) &&
950		((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) &&
951		oldobject && (oldobject->type == OBJT_VNODE) &&
952		((oldobject->flags & OBJ_DEAD) == 0)) {
953		struct vnode *vp;
954		vp = (struct vnode *) oldobject->handle;
955		if (vp && VSHOULDFREE(vp)) {
956			if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) {
957				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
958				vp->v_flag |= VTBFREE;
959			}
960		}
961	}
962	splx(s);
963
964	return (m);
965}
966
967void
968vm_wait()
969{
970	int s;
971
972	s = splvm();
973	if (curproc == pageproc) {
974		vm_pageout_pages_needed = 1;
975		tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
976	} else {
977		if (!vm_pages_needed) {
978			vm_pages_needed++;
979			wakeup(&vm_pages_needed);
980		}
981		tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
982	}
983	splx(s);
984}
985
986
987/*
988 *	vm_page_activate:
989 *
990 *	Put the specified page on the active list (if appropriate).
991 *
992 *	The page queues must be locked.
993 */
994void
995vm_page_activate(m)
996	register vm_page_t m;
997{
998	int s;
999	vm_page_t np;
1000	vm_object_t object;
1001
1002	s = splvm();
1003	if (m->queue != PQ_ACTIVE) {
1004		if ((m->queue - m->pc) == PQ_CACHE)
1005			cnt.v_reactivated++;
1006
1007		vm_page_unqueue(m);
1008
1009		if (m->wire_count == 0) {
1010			m->queue = PQ_ACTIVE;
1011			++(*vm_page_queues[PQ_ACTIVE].lcnt);
1012			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1013			if (m->act_count < ACT_INIT)
1014				m->act_count = ACT_INIT;
1015			cnt.v_active_count++;
1016		}
1017	} else {
1018		if (m->act_count < ACT_INIT)
1019			m->act_count = ACT_INIT;
1020	}
1021
1022	object = m->object;
1023	TAILQ_REMOVE(&object->memq, m, listq);
1024	TAILQ_INSERT_TAIL(&object->memq, m, listq);
1025	object->generation++;
1026
1027	splx(s);
1028}
1029
1030/*
1031 * helper routine for vm_page_free and vm_page_free_zero
1032 */
1033static int
1034vm_page_freechk_and_unqueue(m)
1035	vm_page_t m;
1036{
1037	vm_object_t oldobject;
1038
1039	oldobject = m->object;
1040
1041#if !defined(MAX_PERF)
1042	if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
1043		(m->hold_count != 0)) {
1044		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n",
1045			m->pindex, m->busy,
1046			(m->flags & PG_BUSY) ? 1 : 0, m->hold_count);
1047		if ((m->queue - m->pc) == PQ_FREE)
1048			panic("vm_page_free: freeing free page");
1049		else
1050			panic("vm_page_free: freeing busy page");
1051	}
1052#endif
1053
1054	vm_page_unqueue_nowakeup(m);
1055	vm_page_remove(m);
1056
1057	if ((m->flags & PG_FICTITIOUS) != 0) {
1058		return 0;
1059	}
1060
1061	if (m->wire_count != 0) {
1062#if !defined(MAX_PERF)
1063		if (m->wire_count > 1) {
1064			panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
1065				m->wire_count, m->pindex);
1066		}
1067#endif
1068		m->wire_count = 0;
1069		if (m->object)
1070			m->object->wire_count--;
1071		cnt.v_wire_count--;
1072	}
1073
1074	if (oldobject && (oldobject->type == OBJT_VNODE) &&
1075		((oldobject->flags & OBJ_DEAD) == 0)) {
1076		struct vnode *vp;
1077		vp = (struct vnode *) oldobject->handle;
1078		if (vp && VSHOULDFREE(vp)) {
1079			if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) {
1080				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
1081				vp->v_flag |= VTBFREE;
1082			}
1083		}
1084	}
1085
1086	return 1;
1087}
1088
1089/*
1090 * helper routine for vm_page_free and vm_page_free_zero
1091 */
1092static __inline void
1093vm_page_free_wakeup()
1094{
1095
1096/*
1097 * if pageout daemon needs pages, then tell it that there are
1098 * some free.
1099 */
1100	if (vm_pageout_pages_needed) {
1101		wakeup(&vm_pageout_pages_needed);
1102		vm_pageout_pages_needed = 0;
1103	}
1104	/*
1105	 * wakeup processes that are waiting on memory if we hit a
1106	 * high water mark. And wakeup scheduler process if we have
1107	 * lots of memory. this process will swapin processes.
1108	 */
1109	if (vm_pages_needed &&
1110		((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
1111		wakeup(&cnt.v_free_count);
1112		vm_pages_needed = 0;
1113	}
1114}
1115
1116/*
1117 *	vm_page_free:
1118 *
1119 *	Returns the given page to the free list,
1120 *	disassociating it with any VM object.
1121 *
1122 *	Object and page must be locked prior to entry.
1123 */
1124void
1125vm_page_free(m)
1126	register vm_page_t m;
1127{
1128	int s;
1129	struct vpgqueues *pq;
1130
1131	s = splvm();
1132
1133	cnt.v_tfree++;
1134
1135	if (!vm_page_freechk_and_unqueue(m)) {
1136		splx(s);
1137		return;
1138	}
1139
1140	m->queue = PQ_FREE + m->pc;
1141	pq = &vm_page_queues[m->queue];
1142	++(*pq->lcnt);
1143	++(*pq->cnt);
1144	/*
1145	 * If the pageout process is grabbing the page, it is likely
1146	 * that the page is NOT in the cache.  It is more likely that
1147	 * the page will be partially in the cache if it is being
1148	 * explicitly freed.
1149	 */
1150	if (curproc == pageproc) {
1151		TAILQ_INSERT_TAIL(pq->pl, m, pageq);
1152	} else {
1153		TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1154	}
1155
1156	vm_page_free_wakeup();
1157	splx(s);
1158}
1159
1160void
1161vm_page_free_zero(m)
1162	register vm_page_t m;
1163{
1164	int s;
1165	struct vpgqueues *pq;
1166
1167	s = splvm();
1168
1169	cnt.v_tfree++;
1170
1171	if (!vm_page_freechk_and_unqueue(m)) {
1172		splx(s);
1173		return;
1174	}
1175
1176	m->queue = PQ_ZERO + m->pc;
1177	pq = &vm_page_queues[m->queue];
1178	++(*pq->lcnt);
1179	++(*pq->cnt);
1180
1181	TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1182	++vm_page_zero_count;
1183	vm_page_free_wakeup();
1184	splx(s);
1185}
1186
1187/*
1188 *	vm_page_wire:
1189 *
1190 *	Mark this page as wired down by yet
1191 *	another map, removing it from paging queues
1192 *	as necessary.
1193 *
1194 *	The page queues must be locked.
1195 */
1196void
1197vm_page_wire(m)
1198	register vm_page_t m;
1199{
1200	int s;
1201
1202	if (m->wire_count == 0) {
1203		s = splvm();
1204		vm_page_unqueue(m);
1205		splx(s);
1206		cnt.v_wire_count++;
1207		if (m->object)
1208			m->object->wire_count++;
1209	}
1210	(*vm_page_queues[PQ_NONE].lcnt)++;
1211	m->wire_count++;
1212	m->flags |= PG_MAPPED;
1213}
1214
1215/*
1216 *	vm_page_unwire:
1217 *
1218 *	Release one wiring of this page, potentially
1219 *	enabling it to be paged again.
1220 *
1221 *	The page queues must be locked.
1222 */
1223void
1224vm_page_unwire(m)
1225	register vm_page_t m;
1226{
1227	int s;
1228
1229	s = splvm();
1230
1231	if (m->wire_count > 0) {
1232		m->wire_count--;
1233		if (m->wire_count == 0) {
1234			if (m->object)
1235				m->object->wire_count--;
1236			cnt.v_wire_count--;
1237			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1238			m->queue = PQ_ACTIVE;
1239			(*vm_page_queues[PQ_ACTIVE].lcnt)++;
1240			cnt.v_active_count++;
1241		}
1242	} else {
1243#if !defined(MAX_PERF)
1244		panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count);
1245#endif
1246	}
1247	splx(s);
1248}
1249
1250
1251/*
1252 *	vm_page_deactivate:
1253 *
1254 *	Returns the given page to the inactive list,
1255 *	indicating that no physical maps have access
1256 *	to this page.  [Used by the physical mapping system.]
1257 *
1258 *	The page queues must be locked.
1259 */
1260void
1261vm_page_deactivate(m)
1262	register vm_page_t m;
1263{
1264	int s;
1265
1266	/*
1267	 * Only move active pages -- ignore locked or already inactive ones.
1268	 *
1269	 * XXX: sometimes we get pages which aren't wired down or on any queue -
1270	 * we need to put them on the inactive queue also, otherwise we lose
1271	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
1272	 */
1273	if (m->queue == PQ_INACTIVE)
1274		return;
1275
1276	s = splvm();
1277	if (m->wire_count == 0 && m->hold_count == 0) {
1278		if ((m->queue - m->pc) == PQ_CACHE)
1279			cnt.v_reactivated++;
1280		vm_page_unqueue(m);
1281		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
1282		m->queue = PQ_INACTIVE;
1283		++(*vm_page_queues[PQ_INACTIVE].lcnt);
1284		cnt.v_inactive_count++;
1285	}
1286	splx(s);
1287}
1288
1289/*
1290 * vm_page_cache
1291 *
1292 * Put the specified page onto the page cache queue (if appropriate).
1293 */
1294void
1295vm_page_cache(m)
1296	register vm_page_t m;
1297{
1298	int s;
1299
1300#if !defined(MAX_PERF)
1301	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
1302		printf("vm_page_cache: attempting to cache busy page\n");
1303		return;
1304	}
1305#endif
1306	if ((m->queue - m->pc) == PQ_CACHE)
1307		return;
1308
1309	vm_page_protect(m, VM_PROT_NONE);
1310#if !defined(MAX_PERF)
1311	if (m->dirty != 0) {
1312		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
1313	}
1314#endif
1315	s = splvm();
1316	vm_page_unqueue_nowakeup(m);
1317	m->queue = PQ_CACHE + m->pc;
1318	(*vm_page_queues[m->queue].lcnt)++;
1319	TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
1320	cnt.v_cache_count++;
1321	m->object->cache_count++;
1322	vm_page_free_wakeup();
1323	splx(s);
1324}
1325
1326/*
1327 * Grab a page, waiting until we are waken up due to the page
1328 * changing state.  We keep on waiting, if the page continues
1329 * to be in the object.  If the page doesn't exist, allocate it.
1330 */
1331vm_page_t
1332vm_page_grab(object, pindex, allocflags)
1333	vm_object_t object;
1334	vm_pindex_t pindex;
1335	int allocflags;
1336{
1337
1338	vm_page_t m;
1339	int s, generation;
1340
1341retrylookup:
1342	if ((m = vm_page_lookup(object, pindex)) != NULL) {
1343		if (m->busy || (m->flags & PG_BUSY)) {
1344			generation = object->generation;
1345
1346			s = splvm();
1347			while ((object->generation == generation) &&
1348					(m->busy || (m->flags & PG_BUSY))) {
1349				m->flags |= PG_WANTED | PG_REFERENCED;
1350				tsleep(m, PVM, "pgrbwt", 0);
1351				if ((allocflags & VM_ALLOC_RETRY) == 0) {
1352					splx(s);
1353					return NULL;
1354				}
1355			}
1356			splx(s);
1357			goto retrylookup;
1358		} else {
1359			m->flags |= PG_BUSY;
1360			return m;
1361		}
1362	}
1363
1364	m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
1365	if (m == NULL) {
1366		VM_WAIT;
1367		if ((allocflags & VM_ALLOC_RETRY) == 0)
1368			return NULL;
1369		goto retrylookup;
1370	}
1371
1372	return m;
1373}
1374
1375/*
1376 * mapping function for valid bits or for dirty bits in
1377 * a page
1378 */
1379inline int
1380vm_page_bits(int base, int size)
1381{
1382	u_short chunk;
1383
1384	if ((base == 0) && (size >= PAGE_SIZE))
1385		return VM_PAGE_BITS_ALL;
1386	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1387	base = (base % PAGE_SIZE) / DEV_BSIZE;
1388	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1389	return (chunk << base) & VM_PAGE_BITS_ALL;
1390}
1391
1392/*
1393 * set a page valid and clean
1394 */
1395void
1396vm_page_set_validclean(m, base, size)
1397	vm_page_t m;
1398	int base;
1399	int size;
1400{
1401	int pagebits = vm_page_bits(base, size);
1402	m->valid |= pagebits;
1403	m->dirty &= ~pagebits;
1404	if( base == 0 && size == PAGE_SIZE)
1405		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1406}
1407
1408/*
1409 * set a page (partially) invalid
1410 */
1411void
1412vm_page_set_invalid(m, base, size)
1413	vm_page_t m;
1414	int base;
1415	int size;
1416{
1417	int bits;
1418
1419	m->valid &= ~(bits = vm_page_bits(base, size));
1420	if (m->valid == 0)
1421		m->dirty &= ~bits;
1422}
1423
1424/*
1425 * is (partial) page valid?
1426 */
1427int
1428vm_page_is_valid(m, base, size)
1429	vm_page_t m;
1430	int base;
1431	int size;
1432{
1433	int bits = vm_page_bits(base, size);
1434
1435	if (m->valid && ((m->valid & bits) == bits))
1436		return 1;
1437	else
1438		return 0;
1439}
1440
1441void
1442vm_page_test_dirty(m)
1443	vm_page_t m;
1444{
1445	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1446	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1447		m->dirty = VM_PAGE_BITS_ALL;
1448	}
1449}
1450
1451/*
1452 * This interface is for merging with malloc() someday.
1453 * Even if we never implement compaction so that contiguous allocation
1454 * works after initialization time, malloc()'s data structures are good
1455 * for statistics and for allocations of less than a page.
1456 */
1457void *
1458contigmalloc1(size, type, flags, low, high, alignment, boundary, map)
1459	unsigned long size;	/* should be size_t here and for malloc() */
1460	struct malloc_type *type;
1461	int flags;
1462	unsigned long low;
1463	unsigned long high;
1464	unsigned long alignment;
1465	unsigned long boundary;
1466	vm_map_t map;
1467{
1468	int i, s, start;
1469	vm_offset_t addr, phys, tmp_addr;
1470	int pass;
1471	vm_page_t pga = vm_page_array;
1472
1473	size = round_page(size);
1474#if !defined(MAX_PERF)
1475	if (size == 0)
1476		panic("contigmalloc1: size must not be 0");
1477	if ((alignment & (alignment - 1)) != 0)
1478		panic("contigmalloc1: alignment must be a power of 2");
1479	if ((boundary & (boundary - 1)) != 0)
1480		panic("contigmalloc1: boundary must be a power of 2");
1481#endif
1482
1483	start = 0;
1484	for (pass = 0; pass <= 1; pass++) {
1485		s = splvm();
1486again:
1487		/*
1488		 * Find first page in array that is free, within range, aligned, and
1489		 * such that the boundary won't be crossed.
1490		 */
1491		for (i = start; i < cnt.v_page_count; i++) {
1492			int pqtype;
1493			phys = VM_PAGE_TO_PHYS(&pga[i]);
1494			pqtype = pga[i].queue - pga[i].pc;
1495			if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
1496			    (phys >= low) && (phys < high) &&
1497			    ((phys & (alignment - 1)) == 0) &&
1498			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
1499				break;
1500		}
1501
1502		/*
1503		 * If the above failed or we will exceed the upper bound, fail.
1504		 */
1505		if ((i == cnt.v_page_count) ||
1506			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
1507			vm_page_t m, next;
1508
1509again1:
1510			for (m = TAILQ_FIRST(&vm_page_queue_inactive);
1511				m != NULL;
1512				m = next) {
1513
1514				if (m->queue != PQ_INACTIVE) {
1515					break;
1516				}
1517
1518				next = TAILQ_NEXT(m, pageq);
1519				if (m->flags & PG_BUSY) {
1520					m->flags |= PG_WANTED;
1521					tsleep(m, PVM, "vpctw0", 0);
1522					goto again1;
1523				}
1524				vm_page_test_dirty(m);
1525				if (m->dirty) {
1526					if (m->object->type == OBJT_VNODE) {
1527						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1528						vm_object_page_clean(m->object, 0, 0, TRUE);
1529						VOP_UNLOCK(m->object->handle, 0, curproc);
1530						goto again1;
1531					} else if (m->object->type == OBJT_SWAP ||
1532								m->object->type == OBJT_DEFAULT) {
1533						vm_page_protect(m, VM_PROT_NONE);
1534						vm_pageout_flush(&m, 1, 0);
1535						goto again1;
1536					}
1537				}
1538				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1539					vm_page_cache(m);
1540			}
1541
1542			for (m = TAILQ_FIRST(&vm_page_queue_active);
1543				m != NULL;
1544				m = next) {
1545
1546				if (m->queue != PQ_ACTIVE) {
1547					break;
1548				}
1549
1550				next = TAILQ_NEXT(m, pageq);
1551				if (m->flags & PG_BUSY) {
1552					m->flags |= PG_WANTED;
1553					tsleep(m, PVM, "vpctw1", 0);
1554					goto again1;
1555				}
1556				vm_page_test_dirty(m);
1557				if (m->dirty) {
1558					if (m->object->type == OBJT_VNODE) {
1559						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1560						vm_object_page_clean(m->object, 0, 0, TRUE);
1561						VOP_UNLOCK(m->object->handle, 0, curproc);
1562						goto again1;
1563					} else if (m->object->type == OBJT_SWAP ||
1564								m->object->type == OBJT_DEFAULT) {
1565						vm_page_protect(m, VM_PROT_NONE);
1566						vm_pageout_flush(&m, 1, 0);
1567						goto again1;
1568					}
1569				}
1570				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1571					vm_page_cache(m);
1572			}
1573
1574			splx(s);
1575			continue;
1576		}
1577		start = i;
1578
1579		/*
1580		 * Check successive pages for contiguous and free.
1581		 */
1582		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
1583			int pqtype;
1584			pqtype = pga[i].queue - pga[i].pc;
1585			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
1586			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
1587			    ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
1588				start++;
1589				goto again;
1590			}
1591		}
1592
1593		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1594			int pqtype;
1595			vm_page_t m = &pga[i];
1596
1597			pqtype = m->queue - m->pc;
1598			if (pqtype == PQ_CACHE) {
1599				m->flags |= PG_BUSY;
1600				vm_page_free(m);
1601			}
1602
1603			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
1604			(*vm_page_queues[m->queue].lcnt)--;
1605			cnt.v_free_count--;
1606			m->valid = VM_PAGE_BITS_ALL;
1607			m->flags = 0;
1608			m->dirty = 0;
1609			m->wire_count = 0;
1610			m->busy = 0;
1611			m->queue = PQ_NONE;
1612			m->object = NULL;
1613			vm_page_wire(m);
1614		}
1615
1616		/*
1617		 * We've found a contiguous chunk that meets are requirements.
1618		 * Allocate kernel VM, unfree and assign the physical pages to it and
1619		 * return kernel VM pointer.
1620		 */
1621		tmp_addr = addr = kmem_alloc_pageable(map, size);
1622		if (addr == 0) {
1623			/*
1624			 * XXX We almost never run out of kernel virtual
1625			 * space, so we don't make the allocated memory
1626			 * above available.
1627			 */
1628			splx(s);
1629			return (NULL);
1630		}
1631
1632		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1633			vm_page_t m = &pga[i];
1634			vm_page_insert(m, kernel_object,
1635				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
1636			pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
1637			tmp_addr += PAGE_SIZE;
1638		}
1639
1640		splx(s);
1641		return ((void *)addr);
1642	}
1643	return NULL;
1644}
1645
1646void *
1647contigmalloc(size, type, flags, low, high, alignment, boundary)
1648	unsigned long size;	/* should be size_t here and for malloc() */
1649	struct malloc_type *type;
1650	int flags;
1651	unsigned long low;
1652	unsigned long high;
1653	unsigned long alignment;
1654	unsigned long boundary;
1655{
1656	return contigmalloc1(size, type, flags, low, high, alignment, boundary,
1657			     kernel_map);
1658}
1659
1660vm_offset_t
1661vm_page_alloc_contig(size, low, high, alignment)
1662	vm_offset_t size;
1663	vm_offset_t low;
1664	vm_offset_t high;
1665	vm_offset_t alignment;
1666{
1667	return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
1668					  alignment, 0ul, kernel_map));
1669}
1670
1671#include "opt_ddb.h"
1672#ifdef DDB
1673#include <sys/kernel.h>
1674
1675#include <ddb/ddb.h>
1676
1677DB_SHOW_COMMAND(page, vm_page_print_page_info)
1678{
1679	db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1680	db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1681	db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1682	db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1683	db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1684	db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1685	db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1686	db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1687	db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1688	db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1689}
1690
1691DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
1692{
1693	int i;
1694	db_printf("PQ_FREE:");
1695	for(i=0;i<PQ_L2_SIZE;i++) {
1696		db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt);
1697	}
1698	db_printf("\n");
1699
1700	db_printf("PQ_CACHE:");
1701	for(i=0;i<PQ_L2_SIZE;i++) {
1702		db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt);
1703	}
1704	db_printf("\n");
1705
1706	db_printf("PQ_ZERO:");
1707	for(i=0;i<PQ_L2_SIZE;i++) {
1708		db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt);
1709	}
1710	db_printf("\n");
1711
1712	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
1713		*vm_page_queues[PQ_ACTIVE].lcnt,
1714		*vm_page_queues[PQ_INACTIVE].lcnt);
1715}
1716#endif /* DDB */
1717