vm_page.c revision 35612
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37 *	$Id: vm_page.c,v 1.98 1998/04/15 17:47:38 bde Exp $
38 */
39
40/*
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45 *
46 * Permission to use, copy, modify and distribute this software and
47 * its documentation is hereby granted, provided that both the copyright
48 * notice and this permission notice appear in all copies of the
49 * software, derivative works or modified versions, and any portions
50 * thereof, and that both notices appear in supporting documentation.
51 *
52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55 *
56 * Carnegie Mellon requests users of this software to return to
57 *
58 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59 *  School of Computer Science
60 *  Carnegie Mellon University
61 *  Pittsburgh PA 15213-3890
62 *
63 * any improvements or extensions that they make and grant Carnegie the
64 * rights to redistribute these changes.
65 */
66
67/*
68 *	Resident memory management module.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/malloc.h>
74#include <sys/proc.h>
75#include <sys/vmmeter.h>
76#include <sys/vnode.h>
77
78#include <vm/vm.h>
79#include <vm/vm_param.h>
80#include <vm/vm_prot.h>
81#include <sys/lock.h>
82#include <vm/vm_kern.h>
83#include <vm/vm_object.h>
84#include <vm/vm_page.h>
85#include <vm/vm_pageout.h>
86#include <vm/vm_extern.h>
87
88static void	vm_page_queue_init __P((void));
89static vm_page_t vm_page_select_free __P((vm_object_t object,
90			vm_pindex_t pindex, int prefqueue));
91static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t));
92
93/*
94 *	Associated with page of user-allocatable memory is a
95 *	page structure.
96 */
97
98static struct pglist *vm_page_buckets;	/* Array of buckets */
99static int vm_page_bucket_count;	/* How big is array? */
100static int vm_page_hash_mask;		/* Mask for hash function */
101static volatile int vm_page_bucket_generation;
102
103struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0};
104struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0};
105struct pglist vm_page_queue_active = {0};
106struct pglist vm_page_queue_inactive = {0};
107struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0};
108
109static int no_queue=0;
110
111struct vpgqueues vm_page_queues[PQ_COUNT] = {0};
112static int pqcnt[PQ_COUNT] = {0};
113
114static void
115vm_page_queue_init(void) {
116	int i;
117
118	vm_page_queues[PQ_NONE].pl = NULL;
119	vm_page_queues[PQ_NONE].cnt = &no_queue;
120	for(i=0;i<PQ_L2_SIZE;i++) {
121		vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i];
122		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
123	}
124	for(i=0;i<PQ_L2_SIZE;i++) {
125		vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i];
126		vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count;
127	}
128	vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive;
129	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
130
131	vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active;
132	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
133	for(i=0;i<PQ_L2_SIZE;i++) {
134		vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i];
135		vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
136	}
137	for(i=0;i<PQ_COUNT;i++) {
138		if (vm_page_queues[i].pl) {
139			TAILQ_INIT(vm_page_queues[i].pl);
140		} else if (i != 0) {
141			panic("vm_page_queue_init: queue %d is null", i);
142		}
143		vm_page_queues[i].lcnt = &pqcnt[i];
144	}
145}
146
147vm_page_t vm_page_array = 0;
148static int vm_page_array_size = 0;
149long first_page = 0;
150static long last_page;
151static vm_size_t page_mask;
152static int page_shift;
153int vm_page_zero_count = 0;
154
155/*
156 * map of contiguous valid DEV_BSIZE chunks in a page
157 * (this list is valid for page sizes upto 16*DEV_BSIZE)
158 */
159static u_short vm_page_dev_bsize_chunks[] = {
160	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
161	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
162};
163
164static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex));
165static int vm_page_freechk_and_unqueue __P((vm_page_t m));
166static void vm_page_free_wakeup __P((void));
167
168/*
169 *	vm_set_page_size:
170 *
171 *	Sets the page size, perhaps based upon the memory
172 *	size.  Must be called before any use of page-size
173 *	dependent functions.
174 *
175 *	Sets page_shift and page_mask from cnt.v_page_size.
176 */
177void
178vm_set_page_size()
179{
180
181	if (cnt.v_page_size == 0)
182		cnt.v_page_size = DEFAULT_PAGE_SIZE;
183	page_mask = cnt.v_page_size - 1;
184	if ((page_mask & cnt.v_page_size) != 0)
185		panic("vm_set_page_size: page size not a power of two");
186	for (page_shift = 0;; page_shift++)
187		if ((1 << page_shift) == cnt.v_page_size)
188			break;
189}
190
191/*
192 *	vm_page_startup:
193 *
194 *	Initializes the resident memory module.
195 *
196 *	Allocates memory for the page cells, and
197 *	for the object/offset-to-page hash table headers.
198 *	Each page cell is initialized and placed on the free list.
199 */
200
201vm_offset_t
202vm_page_startup(starta, enda, vaddr)
203	register vm_offset_t starta;
204	vm_offset_t enda;
205	register vm_offset_t vaddr;
206{
207	register vm_offset_t mapped;
208	register vm_page_t m;
209	register struct pglist *bucket;
210	vm_size_t npages, page_range;
211	register vm_offset_t new_start;
212	int i;
213	vm_offset_t pa;
214	int nblocks;
215	vm_offset_t first_managed_page;
216
217	/* the biggest memory array is the second group of pages */
218	vm_offset_t start;
219	vm_offset_t biggestone, biggestsize;
220
221	vm_offset_t total;
222
223	total = 0;
224	biggestsize = 0;
225	biggestone = 0;
226	nblocks = 0;
227	vaddr = round_page(vaddr);
228
229	for (i = 0; phys_avail[i + 1]; i += 2) {
230		phys_avail[i] = round_page(phys_avail[i]);
231		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
232	}
233
234	for (i = 0; phys_avail[i + 1]; i += 2) {
235		int size = phys_avail[i + 1] - phys_avail[i];
236
237		if (size > biggestsize) {
238			biggestone = i;
239			biggestsize = size;
240		}
241		++nblocks;
242		total += size;
243	}
244
245	start = phys_avail[biggestone];
246
247	/*
248	 * Initialize the queue headers for the free queue, the active queue
249	 * and the inactive queue.
250	 */
251
252	vm_page_queue_init();
253
254	/*
255	 * Allocate (and initialize) the hash table buckets.
256	 *
257	 * The number of buckets MUST BE a power of 2, and the actual value is
258	 * the next power of 2 greater than the number of physical pages in
259	 * the system.
260	 *
261	 * Note: This computation can be tweaked if desired.
262	 */
263	vm_page_buckets = (struct pglist *) vaddr;
264	bucket = vm_page_buckets;
265	if (vm_page_bucket_count == 0) {
266		vm_page_bucket_count = 1;
267		while (vm_page_bucket_count < atop(total))
268			vm_page_bucket_count <<= 1;
269	}
270	vm_page_hash_mask = vm_page_bucket_count - 1;
271
272	/*
273	 * Validate these addresses.
274	 */
275
276	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
277	new_start = round_page(new_start);
278	mapped = vaddr;
279	vaddr = pmap_map(mapped, start, new_start,
280	    VM_PROT_READ | VM_PROT_WRITE);
281	start = new_start;
282	bzero((caddr_t) mapped, vaddr - mapped);
283	mapped = vaddr;
284
285	for (i = 0; i < vm_page_bucket_count; i++) {
286		TAILQ_INIT(bucket);
287		bucket++;
288	}
289
290	/*
291	 * Validate these zone addresses.
292	 */
293
294	new_start = start + (vaddr - mapped);
295	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
296	bzero((caddr_t) mapped, (vaddr - mapped));
297	start = round_page(new_start);
298
299	/*
300	 * Compute the number of pages of memory that will be available for
301	 * use (taking into account the overhead of a page structure per
302	 * page).
303	 */
304
305	first_page = phys_avail[0] / PAGE_SIZE;
306	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
307
308	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
309	npages = (total - (page_range * sizeof(struct vm_page)) -
310	    (start - phys_avail[biggestone])) / PAGE_SIZE;
311
312	/*
313	 * Initialize the mem entry structures now, and put them in the free
314	 * queue.
315	 */
316
317	vm_page_array = (vm_page_t) vaddr;
318	mapped = vaddr;
319
320	/*
321	 * Validate these addresses.
322	 */
323
324	new_start = round_page(start + page_range * sizeof(struct vm_page));
325	mapped = pmap_map(mapped, start, new_start,
326	    VM_PROT_READ | VM_PROT_WRITE);
327	start = new_start;
328
329	first_managed_page = start / PAGE_SIZE;
330
331	/*
332	 * Clear all of the page structures
333	 */
334	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
335	vm_page_array_size = page_range;
336
337	cnt.v_page_count = 0;
338	cnt.v_free_count = 0;
339	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
340		if (i == biggestone)
341			pa = ptoa(first_managed_page);
342		else
343			pa = phys_avail[i];
344		while (pa < phys_avail[i + 1] && npages-- > 0) {
345			++cnt.v_page_count;
346			++cnt.v_free_count;
347			m = PHYS_TO_VM_PAGE(pa);
348			m->phys_addr = pa;
349			m->flags = 0;
350			m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
351			m->queue = PQ_FREE + m->pc;
352			TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
353			++(*vm_page_queues[m->queue].lcnt);
354			pa += PAGE_SIZE;
355		}
356	}
357	return (mapped);
358}
359
360/*
361 *	vm_page_hash:
362 *
363 *	Distributes the object/offset key pair among hash buckets.
364 *
365 *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
366 */
367static __inline int
368vm_page_hash(object, pindex)
369	vm_object_t object;
370	vm_pindex_t pindex;
371{
372	return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
373}
374
375/*
376 *	vm_page_insert:		[ internal use only ]
377 *
378 *	Inserts the given mem entry into the object/object-page
379 *	table and object list.
380 *
381 *	The object and page must be locked, and must be splhigh.
382 */
383
384void
385vm_page_insert(m, object, pindex)
386	register vm_page_t m;
387	register vm_object_t object;
388	register vm_pindex_t pindex;
389{
390	register struct pglist *bucket;
391
392#if !defined(MAX_PERF)
393	if (m->flags & PG_TABLED)
394		panic("vm_page_insert: already inserted");
395#endif
396
397	/*
398	 * Record the object/offset pair in this page
399	 */
400
401	m->object = object;
402	m->pindex = pindex;
403
404	/*
405	 * Insert it into the object_object/offset hash table
406	 */
407
408	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
409	TAILQ_INSERT_TAIL(bucket, m, hashq);
410	vm_page_bucket_generation++;
411
412	/*
413	 * Now link into the object's list of backed pages.
414	 */
415
416	TAILQ_INSERT_TAIL(&object->memq, m, listq);
417	m->flags |= PG_TABLED;
418	m->object->page_hint = m;
419	m->object->generation++;
420
421	if (m->wire_count)
422		object->wire_count++;
423
424	if ((m->queue - m->pc) == PQ_CACHE)
425		object->cache_count++;
426
427	/*
428	 * And show that the object has one more resident page.
429	 */
430
431	object->resident_page_count++;
432}
433
434/*
435 *	vm_page_remove:		[ internal use only ]
436 *				NOTE: used by device pager as well -wfj
437 *
438 *	Removes the given mem entry from the object/offset-page
439 *	table and the object page list.
440 *
441 *	The object and page must be locked, and at splhigh.
442 */
443
444void
445vm_page_remove(m)
446	register vm_page_t m;
447{
448	register struct pglist *bucket;
449	vm_object_t object;
450
451	if (!(m->flags & PG_TABLED))
452		return;
453
454#if !defined(MAX_PERF)
455	if ((m->flags & PG_BUSY) == 0) {
456		panic("vm_page_remove: page not busy");
457	}
458#endif
459
460	m->flags &= ~PG_BUSY;
461	if (m->flags & PG_WANTED) {
462		m->flags &= ~PG_WANTED;
463		wakeup(m);
464	}
465
466	object = m->object;
467	if (object->page_hint == m)
468		object->page_hint = NULL;
469
470	if (m->wire_count)
471		object->wire_count--;
472
473	if ((m->queue - m->pc) == PQ_CACHE)
474		object->cache_count--;
475
476	/*
477	 * Remove from the object_object/offset hash table
478	 */
479
480	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
481	TAILQ_REMOVE(bucket, m, hashq);
482	vm_page_bucket_generation++;
483
484	/*
485	 * Now remove from the object's list of backed pages.
486	 */
487
488	TAILQ_REMOVE(&object->memq, m, listq);
489
490	/*
491	 * And show that the object has one fewer resident page.
492	 */
493
494	object->resident_page_count--;
495	object->generation++;
496	m->object = NULL;
497
498	m->flags &= ~PG_TABLED;
499}
500
501/*
502 *	vm_page_lookup:
503 *
504 *	Returns the page associated with the object/offset
505 *	pair specified; if none is found, NULL is returned.
506 *
507 *	The object must be locked.  No side effects.
508 */
509
510vm_page_t
511vm_page_lookup(object, pindex)
512	register vm_object_t object;
513	register vm_pindex_t pindex;
514{
515	register vm_page_t m;
516	register struct pglist *bucket;
517	int generation;
518	int s;
519
520	/*
521	 * Search the hash table for this object/offset pair
522	 */
523
524	if (object->page_hint && (object->page_hint->pindex == pindex) &&
525		(object->page_hint->object == object))
526		return object->page_hint;
527
528retry:
529	generation = vm_page_bucket_generation;
530	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
531	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
532		if ((m->object == object) && (m->pindex == pindex)) {
533			if (vm_page_bucket_generation != generation)
534				goto retry;
535			m->object->page_hint = m;
536			return (m);
537		}
538	}
539	if (vm_page_bucket_generation != generation)
540		goto retry;
541	return (NULL);
542}
543
544/*
545 *	vm_page_rename:
546 *
547 *	Move the given memory entry from its
548 *	current object to the specified target object/offset.
549 *
550 *	The object must be locked.
551 */
552void
553vm_page_rename(m, new_object, new_pindex)
554	register vm_page_t m;
555	register vm_object_t new_object;
556	vm_pindex_t new_pindex;
557{
558	int s;
559
560	s = splvm();
561	vm_page_remove(m);
562	vm_page_insert(m, new_object, new_pindex);
563	splx(s);
564}
565
566/*
567 * vm_page_unqueue without any wakeup
568 */
569void
570vm_page_unqueue_nowakeup(m)
571	vm_page_t m;
572{
573	int queue = m->queue;
574	struct vpgqueues *pq;
575	if (queue != PQ_NONE) {
576		pq = &vm_page_queues[queue];
577		m->queue = PQ_NONE;
578		TAILQ_REMOVE(pq->pl, m, pageq);
579		(*pq->cnt)--;
580		(*pq->lcnt)--;
581		if ((queue - m->pc) == PQ_CACHE) {
582			if (m->object)
583				m->object->cache_count--;
584		}
585	}
586}
587
588/*
589 * vm_page_unqueue must be called at splhigh();
590 */
591void
592vm_page_unqueue(m)
593	vm_page_t m;
594{
595	int queue = m->queue;
596	struct vpgqueues *pq;
597	if (queue != PQ_NONE) {
598		m->queue = PQ_NONE;
599		pq = &vm_page_queues[queue];
600		TAILQ_REMOVE(pq->pl, m, pageq);
601		(*pq->cnt)--;
602		(*pq->lcnt)--;
603		if ((queue - m->pc) == PQ_CACHE) {
604			if ((cnt.v_cache_count + cnt.v_free_count) <
605				(cnt.v_free_reserved + cnt.v_cache_min))
606				pagedaemon_wakeup();
607			if (m->object)
608				m->object->cache_count--;
609		}
610	}
611}
612
613/*
614 * Find a page on the specified queue with color optimization.
615 */
616vm_page_t
617vm_page_list_find(basequeue, index)
618	int basequeue, index;
619{
620#if PQ_L2_SIZE > 1
621
622	int i,j;
623	vm_page_t m;
624	int hindex;
625	struct vpgqueues *pq;
626
627	pq = &vm_page_queues[basequeue];
628
629	m = TAILQ_FIRST(pq[index].pl);
630	if (m)
631		return m;
632
633	for(j = 0; j < PQ_L1_SIZE; j++) {
634		int ij;
635		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
636			(ij = i + j) > 0;
637			i -= PQ_L1_SIZE) {
638
639			hindex = index + ij;
640			if (hindex >= PQ_L2_SIZE)
641				hindex -= PQ_L2_SIZE;
642			if (m = TAILQ_FIRST(pq[hindex].pl))
643				return m;
644
645			hindex = index - ij;
646			if (hindex < 0)
647				hindex += PQ_L2_SIZE;
648			if (m = TAILQ_FIRST(pq[hindex].pl))
649				return m;
650		}
651	}
652
653	hindex = index + PQ_L2_SIZE / 2;
654	if (hindex >= PQ_L2_SIZE)
655		hindex -= PQ_L2_SIZE;
656	m = TAILQ_FIRST(pq[hindex].pl);
657	if (m)
658		return m;
659
660	return NULL;
661#else
662	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
663#endif
664
665}
666
667/*
668 * Find a page on the specified queue with color optimization.
669 */
670vm_page_t
671vm_page_select(object, pindex, basequeue)
672	vm_object_t object;
673	vm_pindex_t pindex;
674	int basequeue;
675{
676
677#if PQ_L2_SIZE > 1
678	int index;
679	index = (pindex + object->pg_color) & PQ_L2_MASK;
680	return vm_page_list_find(basequeue, index);
681
682#else
683	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
684#endif
685
686}
687
688/*
689 * Find a page on the cache queue with color optimization.  As pages
690 * might be found, but not applicable, they are deactivated.  This
691 * keeps us from using potentially busy cached pages.
692 */
693vm_page_t
694vm_page_select_cache(object, pindex)
695	vm_object_t object;
696	vm_pindex_t pindex;
697{
698	vm_page_t m;
699
700	while (TRUE) {
701#if PQ_L2_SIZE > 1
702		int index;
703		index = (pindex + object->pg_color) & PQ_L2_MASK;
704		m = vm_page_list_find(PQ_CACHE, index);
705
706#else
707		m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl);
708#endif
709		if (m && ((m->flags & PG_BUSY) || m->busy ||
710			       m->hold_count || m->wire_count)) {
711			vm_page_deactivate(m);
712			continue;
713		}
714		return m;
715	}
716}
717
718/*
719 * Find a free or zero page, with specified preference.
720 */
721static vm_page_t
722vm_page_select_free(object, pindex, prefqueue)
723	vm_object_t object;
724	vm_pindex_t pindex;
725	int prefqueue;
726{
727#if PQ_L2_SIZE > 1
728	int i,j;
729	int index, hindex;
730#endif
731	vm_page_t m, mh;
732	int oqueuediff;
733	struct vpgqueues *pq;
734
735	if (prefqueue == PQ_ZERO)
736		oqueuediff = PQ_FREE - PQ_ZERO;
737	else
738		oqueuediff = PQ_ZERO - PQ_FREE;
739
740	if (mh = object->page_hint) {
741		 if (mh->pindex == (pindex - 1)) {
742			if ((mh->flags & PG_FICTITIOUS) == 0) {
743				if ((mh < &vm_page_array[cnt.v_page_count-1]) &&
744					(mh >= &vm_page_array[0])) {
745					int queue;
746					m = mh + 1;
747					if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) {
748						queue = m->queue - m->pc;
749						if (queue == PQ_FREE || queue == PQ_ZERO) {
750							return m;
751						}
752					}
753				}
754			}
755		}
756	}
757
758	pq = &vm_page_queues[prefqueue];
759
760#if PQ_L2_SIZE > 1
761
762	index = (pindex + object->pg_color) & PQ_L2_MASK;
763
764	if (m = TAILQ_FIRST(pq[index].pl))
765		return m;
766	if (m = TAILQ_FIRST(pq[index + oqueuediff].pl))
767		return m;
768
769	for(j = 0; j < PQ_L1_SIZE; j++) {
770		int ij;
771		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
772			(ij = i + j) >= 0;
773			i -= PQ_L1_SIZE) {
774
775			hindex = index + ij;
776			if (hindex >= PQ_L2_SIZE)
777				hindex -= PQ_L2_SIZE;
778			if (m = TAILQ_FIRST(pq[hindex].pl))
779				return m;
780			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
781				return m;
782
783			hindex = index - ij;
784			if (hindex < 0)
785				hindex += PQ_L2_SIZE;
786			if (m = TAILQ_FIRST(pq[hindex].pl))
787				return m;
788			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
789				return m;
790		}
791	}
792
793	hindex = index + PQ_L2_SIZE / 2;
794	if (hindex >= PQ_L2_SIZE)
795		hindex -= PQ_L2_SIZE;
796	if (m = TAILQ_FIRST(pq[hindex].pl))
797		return m;
798	if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl))
799		return m;
800
801#else
802	if (m = TAILQ_FIRST(pq[0].pl))
803		return m;
804	else
805		return TAILQ_FIRST(pq[oqueuediff].pl);
806#endif
807
808	return NULL;
809}
810
811/*
812 *	vm_page_alloc:
813 *
814 *	Allocate and return a memory cell associated
815 *	with this VM object/offset pair.
816 *
817 *	page_req classes:
818 *	VM_ALLOC_NORMAL		normal process request
819 *	VM_ALLOC_SYSTEM		system *really* needs a page
820 *	VM_ALLOC_INTERRUPT	interrupt time request
821 *	VM_ALLOC_ZERO		zero page
822 *
823 *	Object must be locked.
824 */
825vm_page_t
826vm_page_alloc(object, pindex, page_req)
827	vm_object_t object;
828	vm_pindex_t pindex;
829	int page_req;
830{
831	register vm_page_t m;
832	struct vpgqueues *pq;
833	vm_object_t oldobject;
834	int queue, qtype;
835	int s;
836
837#ifdef DIAGNOSTIC
838	m = vm_page_lookup(object, pindex);
839	if (m)
840		panic("vm_page_alloc: page already allocated");
841#endif
842
843	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
844		page_req = VM_ALLOC_SYSTEM;
845	};
846
847	s = splvm();
848
849	switch (page_req) {
850
851	case VM_ALLOC_NORMAL:
852		if (cnt.v_free_count >= cnt.v_free_reserved) {
853			m = vm_page_select_free(object, pindex, PQ_FREE);
854#if defined(DIAGNOSTIC)
855			if (m == NULL)
856				panic("vm_page_alloc(NORMAL): missing page on free queue\n");
857#endif
858		} else {
859			m = vm_page_select_cache(object, pindex);
860			if (m == NULL) {
861				splx(s);
862#if defined(DIAGNOSTIC)
863				if (cnt.v_cache_count > 0)
864					printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
865#endif
866				vm_pageout_deficit++;
867				pagedaemon_wakeup();
868				return (NULL);
869			}
870		}
871		break;
872
873	case VM_ALLOC_ZERO:
874		if (cnt.v_free_count >= cnt.v_free_reserved) {
875			m = vm_page_select_free(object, pindex, PQ_ZERO);
876#if defined(DIAGNOSTIC)
877			if (m == NULL)
878				panic("vm_page_alloc(ZERO): missing page on free queue\n");
879#endif
880		} else {
881			m = vm_page_select_cache(object, pindex);
882			if (m == NULL) {
883				splx(s);
884#if defined(DIAGNOSTIC)
885				if (cnt.v_cache_count > 0)
886					printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
887#endif
888				vm_pageout_deficit++;
889				pagedaemon_wakeup();
890				return (NULL);
891			}
892		}
893		break;
894
895	case VM_ALLOC_SYSTEM:
896		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
897		    ((cnt.v_cache_count == 0) &&
898		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
899			m = vm_page_select_free(object, pindex, PQ_FREE);
900#if defined(DIAGNOSTIC)
901			if (m == NULL)
902				panic("vm_page_alloc(SYSTEM): missing page on free queue\n");
903#endif
904		} else {
905			m = vm_page_select_cache(object, pindex);
906			if (m == NULL) {
907				splx(s);
908#if defined(DIAGNOSTIC)
909				if (cnt.v_cache_count > 0)
910					printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
911#endif
912				vm_pageout_deficit++;
913				pagedaemon_wakeup();
914				return (NULL);
915			}
916		}
917		break;
918
919	case VM_ALLOC_INTERRUPT:
920		if (cnt.v_free_count > 0) {
921			m = vm_page_select_free(object, pindex, PQ_FREE);
922#if defined(DIAGNOSTIC)
923			if (m == NULL)
924				panic("vm_page_alloc(INTERRUPT): missing page on free queue\n");
925#endif
926		} else {
927			splx(s);
928			vm_pageout_deficit++;
929			pagedaemon_wakeup();
930			return (NULL);
931		}
932		break;
933
934	default:
935		m = NULL;
936#if !defined(MAX_PERF)
937		panic("vm_page_alloc: invalid allocation class");
938#endif
939	}
940
941	queue = m->queue;
942	qtype = queue - m->pc;
943	if (qtype == PQ_ZERO)
944		vm_page_zero_count--;
945	pq = &vm_page_queues[queue];
946	TAILQ_REMOVE(pq->pl, m, pageq);
947	(*pq->cnt)--;
948	(*pq->lcnt)--;
949	oldobject = NULL;
950	if (qtype == PQ_ZERO) {
951		m->flags = PG_ZERO | PG_BUSY;
952	} else if (qtype == PQ_CACHE) {
953		oldobject = m->object;
954		m->flags |= PG_BUSY;
955		vm_page_remove(m);
956		m->flags = PG_BUSY;
957	} else {
958		m->flags = PG_BUSY;
959	}
960	m->wire_count = 0;
961	m->hold_count = 0;
962	m->act_count = 0;
963	m->busy = 0;
964	m->valid = 0;
965	m->dirty = 0;
966	m->queue = PQ_NONE;
967
968	/* XXX before splx until vm_page_insert is safe */
969	vm_page_insert(m, object, pindex);
970
971	/*
972	 * Don't wakeup too often - wakeup the pageout daemon when
973	 * we would be nearly out of memory.
974	 */
975	if (((cnt.v_free_count + cnt.v_cache_count) <
976		(cnt.v_free_reserved + cnt.v_cache_min)) ||
977			(cnt.v_free_count < cnt.v_pageout_free_min))
978		pagedaemon_wakeup();
979
980	if ((qtype == PQ_CACHE) &&
981		((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) &&
982		oldobject && (oldobject->type == OBJT_VNODE) &&
983		((oldobject->flags & OBJ_DEAD) == 0)) {
984		struct vnode *vp;
985		vp = (struct vnode *) oldobject->handle;
986		if (vp && VSHOULDFREE(vp)) {
987			if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) {
988				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
989				vp->v_flag |= VTBFREE;
990			}
991		}
992	}
993	splx(s);
994
995	return (m);
996}
997
998void
999vm_wait()
1000{
1001	int s;
1002
1003	s = splvm();
1004	if (curproc == pageproc) {
1005		vm_pageout_pages_needed = 1;
1006		tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
1007	} else {
1008		if (!vm_pages_needed) {
1009			vm_pages_needed++;
1010			wakeup(&vm_pages_needed);
1011		}
1012		tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
1013	}
1014	splx(s);
1015}
1016
1017int
1018vm_page_sleep(vm_page_t m, char *msg, char *busy) {
1019	vm_object_t object = m->object;
1020	int slept = 0;
1021	if ((busy && *busy) || (m->flags & PG_BUSY)) {
1022		int s;
1023		s = splvm();
1024		if ((busy && *busy) || (m->flags & PG_BUSY)) {
1025			m->flags |= PG_WANTED;
1026			tsleep(m, PVM, msg, 0);
1027			slept = 1;
1028		}
1029		splx(s);
1030	}
1031	return slept;
1032}
1033
1034/*
1035 *	vm_page_activate:
1036 *
1037 *	Put the specified page on the active list (if appropriate).
1038 *
1039 *	The page queues must be locked.
1040 */
1041void
1042vm_page_activate(m)
1043	register vm_page_t m;
1044{
1045	int s;
1046	vm_page_t np;
1047	vm_object_t object;
1048
1049	s = splvm();
1050	if (m->queue != PQ_ACTIVE) {
1051		if ((m->queue - m->pc) == PQ_CACHE)
1052			cnt.v_reactivated++;
1053
1054		vm_page_unqueue(m);
1055
1056		if (m->wire_count == 0) {
1057			m->queue = PQ_ACTIVE;
1058			++(*vm_page_queues[PQ_ACTIVE].lcnt);
1059			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1060			if (m->act_count < ACT_INIT)
1061				m->act_count = ACT_INIT;
1062			cnt.v_active_count++;
1063		}
1064	} else {
1065		if (m->act_count < ACT_INIT)
1066			m->act_count = ACT_INIT;
1067	}
1068
1069	splx(s);
1070}
1071
1072/*
1073 * helper routine for vm_page_free and vm_page_free_zero
1074 */
1075static int
1076vm_page_freechk_and_unqueue(m)
1077	vm_page_t m;
1078{
1079	vm_object_t oldobject;
1080
1081	oldobject = m->object;
1082
1083#if !defined(MAX_PERF)
1084	if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
1085		(m->hold_count != 0)) {
1086		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n",
1087			m->pindex, m->busy,
1088			(m->flags & PG_BUSY) ? 1 : 0, m->hold_count);
1089		if ((m->queue - m->pc) == PQ_FREE)
1090			panic("vm_page_free: freeing free page");
1091		else
1092			panic("vm_page_free: freeing busy page");
1093	}
1094#endif
1095
1096	vm_page_unqueue_nowakeup(m);
1097	vm_page_remove(m);
1098
1099	if ((m->flags & PG_FICTITIOUS) != 0) {
1100		return 0;
1101	}
1102
1103	m->valid = 0;
1104
1105	if (m->wire_count != 0) {
1106#if !defined(MAX_PERF)
1107		if (m->wire_count > 1) {
1108			panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
1109				m->wire_count, m->pindex);
1110		}
1111#endif
1112		m->wire_count = 0;
1113		if (m->object)
1114			m->object->wire_count--;
1115		cnt.v_wire_count--;
1116	}
1117
1118	if (oldobject && (oldobject->type == OBJT_VNODE) &&
1119		((oldobject->flags & OBJ_DEAD) == 0)) {
1120		struct vnode *vp;
1121		vp = (struct vnode *) oldobject->handle;
1122		if (vp && VSHOULDFREE(vp)) {
1123			if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) {
1124				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
1125				vp->v_flag |= VTBFREE;
1126			}
1127		}
1128	}
1129
1130	return 1;
1131}
1132
1133/*
1134 * helper routine for vm_page_free and vm_page_free_zero
1135 */
1136static __inline void
1137vm_page_free_wakeup()
1138{
1139
1140/*
1141 * if pageout daemon needs pages, then tell it that there are
1142 * some free.
1143 */
1144	if (vm_pageout_pages_needed) {
1145		wakeup(&vm_pageout_pages_needed);
1146		vm_pageout_pages_needed = 0;
1147	}
1148	/*
1149	 * wakeup processes that are waiting on memory if we hit a
1150	 * high water mark. And wakeup scheduler process if we have
1151	 * lots of memory. this process will swapin processes.
1152	 */
1153	if (vm_pages_needed &&
1154		((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
1155		wakeup(&cnt.v_free_count);
1156		vm_pages_needed = 0;
1157	}
1158}
1159
1160/*
1161 *	vm_page_free:
1162 *
1163 *	Returns the given page to the free list,
1164 *	disassociating it with any VM object.
1165 *
1166 *	Object and page must be locked prior to entry.
1167 */
1168void
1169vm_page_free(m)
1170	register vm_page_t m;
1171{
1172	int s;
1173	struct vpgqueues *pq;
1174
1175	s = splvm();
1176
1177	cnt.v_tfree++;
1178
1179	if (!vm_page_freechk_and_unqueue(m)) {
1180		splx(s);
1181		return;
1182	}
1183
1184	m->queue = PQ_FREE + m->pc;
1185	pq = &vm_page_queues[m->queue];
1186	++(*pq->lcnt);
1187	++(*pq->cnt);
1188	/*
1189	 * If the pageout process is grabbing the page, it is likely
1190	 * that the page is NOT in the cache.  It is more likely that
1191	 * the page will be partially in the cache if it is being
1192	 * explicitly freed.
1193	 */
1194	if (curproc == pageproc) {
1195		TAILQ_INSERT_TAIL(pq->pl, m, pageq);
1196	} else {
1197		TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1198	}
1199
1200	vm_page_free_wakeup();
1201	splx(s);
1202}
1203
1204void
1205vm_page_free_zero(m)
1206	register vm_page_t m;
1207{
1208	int s;
1209	struct vpgqueues *pq;
1210
1211	s = splvm();
1212
1213	cnt.v_tfree++;
1214
1215	if (!vm_page_freechk_and_unqueue(m)) {
1216		splx(s);
1217		return;
1218	}
1219
1220	m->queue = PQ_ZERO + m->pc;
1221	pq = &vm_page_queues[m->queue];
1222	++(*pq->lcnt);
1223	++(*pq->cnt);
1224
1225	TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1226	++vm_page_zero_count;
1227	vm_page_free_wakeup();
1228	splx(s);
1229}
1230
1231/*
1232 *	vm_page_wire:
1233 *
1234 *	Mark this page as wired down by yet
1235 *	another map, removing it from paging queues
1236 *	as necessary.
1237 *
1238 *	The page queues must be locked.
1239 */
1240void
1241vm_page_wire(m)
1242	register vm_page_t m;
1243{
1244	int s;
1245
1246	if (m->wire_count == 0) {
1247		s = splvm();
1248		vm_page_unqueue(m);
1249		splx(s);
1250		cnt.v_wire_count++;
1251		if (m->object)
1252			m->object->wire_count++;
1253	}
1254	(*vm_page_queues[PQ_NONE].lcnt)++;
1255	m->wire_count++;
1256	m->flags |= PG_MAPPED;
1257}
1258
1259/*
1260 *	vm_page_unwire:
1261 *
1262 *	Release one wiring of this page, potentially
1263 *	enabling it to be paged again.
1264 *
1265 *	The page queues must be locked.
1266 */
1267void
1268vm_page_unwire(m)
1269	register vm_page_t m;
1270{
1271	int s;
1272
1273	s = splvm();
1274
1275	if (m->wire_count > 0) {
1276		m->wire_count--;
1277		if (m->wire_count == 0) {
1278			if (m->object)
1279				m->object->wire_count--;
1280			cnt.v_wire_count--;
1281			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1282			m->queue = PQ_ACTIVE;
1283			(*vm_page_queues[PQ_ACTIVE].lcnt)++;
1284			cnt.v_active_count++;
1285		}
1286	} else {
1287#if !defined(MAX_PERF)
1288		panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count);
1289#endif
1290	}
1291	splx(s);
1292}
1293
1294
1295/*
1296 *	vm_page_deactivate:
1297 *
1298 *	Returns the given page to the inactive list,
1299 *	indicating that no physical maps have access
1300 *	to this page.  [Used by the physical mapping system.]
1301 *
1302 *	The page queues must be locked.
1303 */
1304void
1305vm_page_deactivate(m)
1306	register vm_page_t m;
1307{
1308	int s;
1309
1310	/*
1311	 * Only move active pages -- ignore locked or already inactive ones.
1312	 *
1313	 * XXX: sometimes we get pages which aren't wired down or on any queue -
1314	 * we need to put them on the inactive queue also, otherwise we lose
1315	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
1316	 */
1317	if (m->queue == PQ_INACTIVE)
1318		return;
1319
1320	s = splvm();
1321	if (m->wire_count == 0) {
1322		if ((m->queue - m->pc) == PQ_CACHE)
1323			cnt.v_reactivated++;
1324		vm_page_unqueue(m);
1325		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
1326		m->queue = PQ_INACTIVE;
1327		++(*vm_page_queues[PQ_INACTIVE].lcnt);
1328		cnt.v_inactive_count++;
1329	}
1330	splx(s);
1331}
1332
1333/*
1334 * vm_page_cache
1335 *
1336 * Put the specified page onto the page cache queue (if appropriate).
1337 */
1338void
1339vm_page_cache(m)
1340	register vm_page_t m;
1341{
1342	int s;
1343
1344#if !defined(MAX_PERF)
1345	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
1346		printf("vm_page_cache: attempting to cache busy page\n");
1347		return;
1348	}
1349#endif
1350	if ((m->queue - m->pc) == PQ_CACHE)
1351		return;
1352
1353	vm_page_protect(m, VM_PROT_NONE);
1354#if !defined(MAX_PERF)
1355	if (m->dirty != 0) {
1356		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
1357	}
1358#endif
1359	s = splvm();
1360	vm_page_unqueue_nowakeup(m);
1361	m->queue = PQ_CACHE + m->pc;
1362	(*vm_page_queues[m->queue].lcnt)++;
1363	TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
1364	cnt.v_cache_count++;
1365	m->object->cache_count++;
1366	vm_page_free_wakeup();
1367	splx(s);
1368}
1369
1370/*
1371 * Grab a page, waiting until we are waken up due to the page
1372 * changing state.  We keep on waiting, if the page continues
1373 * to be in the object.  If the page doesn't exist, allocate it.
1374 */
1375vm_page_t
1376vm_page_grab(object, pindex, allocflags)
1377	vm_object_t object;
1378	vm_pindex_t pindex;
1379	int allocflags;
1380{
1381
1382	vm_page_t m;
1383	int s, generation;
1384
1385retrylookup:
1386	if ((m = vm_page_lookup(object, pindex)) != NULL) {
1387		if (m->busy || (m->flags & PG_BUSY)) {
1388			generation = object->generation;
1389
1390			s = splvm();
1391			while ((object->generation == generation) &&
1392					(m->busy || (m->flags & PG_BUSY))) {
1393				m->flags |= PG_WANTED | PG_REFERENCED;
1394				tsleep(m, PVM, "pgrbwt", 0);
1395				if ((allocflags & VM_ALLOC_RETRY) == 0) {
1396					splx(s);
1397					return NULL;
1398				}
1399			}
1400			splx(s);
1401			goto retrylookup;
1402		} else {
1403			m->flags |= PG_BUSY;
1404			return m;
1405		}
1406	}
1407
1408	m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
1409	if (m == NULL) {
1410		VM_WAIT;
1411		if ((allocflags & VM_ALLOC_RETRY) == 0)
1412			return NULL;
1413		goto retrylookup;
1414	}
1415
1416	return m;
1417}
1418
1419/*
1420 * mapping function for valid bits or for dirty bits in
1421 * a page
1422 */
1423__inline int
1424vm_page_bits(int base, int size)
1425{
1426	u_short chunk;
1427
1428	if ((base == 0) && (size >= PAGE_SIZE))
1429		return VM_PAGE_BITS_ALL;
1430
1431	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1432	base &= PAGE_MASK;
1433	if (size > PAGE_SIZE - base) {
1434		size = PAGE_SIZE - base;
1435	}
1436
1437	base = base / DEV_BSIZE;
1438	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1439	return (chunk << base) & VM_PAGE_BITS_ALL;
1440}
1441
1442/*
1443 * set a page valid and clean
1444 */
1445void
1446vm_page_set_validclean(m, base, size)
1447	vm_page_t m;
1448	int base;
1449	int size;
1450{
1451	int pagebits = vm_page_bits(base, size);
1452	m->valid |= pagebits;
1453	m->dirty &= ~pagebits;
1454	if( base == 0 && size == PAGE_SIZE)
1455		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1456}
1457
1458/*
1459 * set a page (partially) invalid
1460 */
1461void
1462vm_page_set_invalid(m, base, size)
1463	vm_page_t m;
1464	int base;
1465	int size;
1466{
1467	int bits;
1468
1469	m->valid &= ~(bits = vm_page_bits(base, size));
1470	if (m->valid == 0)
1471		m->dirty &= ~bits;
1472	m->object->generation++;
1473}
1474
1475/*
1476 * is (partial) page valid?
1477 */
1478int
1479vm_page_is_valid(m, base, size)
1480	vm_page_t m;
1481	int base;
1482	int size;
1483{
1484	int bits = vm_page_bits(base, size);
1485
1486	if (m->valid && ((m->valid & bits) == bits))
1487		return 1;
1488	else
1489		return 0;
1490}
1491
1492void
1493vm_page_test_dirty(m)
1494	vm_page_t m;
1495{
1496	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1497	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1498		m->dirty = VM_PAGE_BITS_ALL;
1499	}
1500}
1501
1502/*
1503 * This interface is for merging with malloc() someday.
1504 * Even if we never implement compaction so that contiguous allocation
1505 * works after initialization time, malloc()'s data structures are good
1506 * for statistics and for allocations of less than a page.
1507 */
1508void *
1509contigmalloc1(size, type, flags, low, high, alignment, boundary, map)
1510	unsigned long size;	/* should be size_t here and for malloc() */
1511	struct malloc_type *type;
1512	int flags;
1513	unsigned long low;
1514	unsigned long high;
1515	unsigned long alignment;
1516	unsigned long boundary;
1517	vm_map_t map;
1518{
1519	int i, s, start;
1520	vm_offset_t addr, phys, tmp_addr;
1521	int pass;
1522	vm_page_t pga = vm_page_array;
1523
1524	size = round_page(size);
1525#if !defined(MAX_PERF)
1526	if (size == 0)
1527		panic("contigmalloc1: size must not be 0");
1528	if ((alignment & (alignment - 1)) != 0)
1529		panic("contigmalloc1: alignment must be a power of 2");
1530	if ((boundary & (boundary - 1)) != 0)
1531		panic("contigmalloc1: boundary must be a power of 2");
1532#endif
1533
1534	start = 0;
1535	for (pass = 0; pass <= 1; pass++) {
1536		s = splvm();
1537again:
1538		/*
1539		 * Find first page in array that is free, within range, aligned, and
1540		 * such that the boundary won't be crossed.
1541		 */
1542		for (i = start; i < cnt.v_page_count; i++) {
1543			int pqtype;
1544			phys = VM_PAGE_TO_PHYS(&pga[i]);
1545			pqtype = pga[i].queue - pga[i].pc;
1546			if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
1547			    (phys >= low) && (phys < high) &&
1548			    ((phys & (alignment - 1)) == 0) &&
1549			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
1550				break;
1551		}
1552
1553		/*
1554		 * If the above failed or we will exceed the upper bound, fail.
1555		 */
1556		if ((i == cnt.v_page_count) ||
1557			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
1558			vm_page_t m, next;
1559
1560again1:
1561			for (m = TAILQ_FIRST(&vm_page_queue_inactive);
1562				m != NULL;
1563				m = next) {
1564
1565				if (m->queue != PQ_INACTIVE) {
1566					break;
1567				}
1568
1569				next = TAILQ_NEXT(m, pageq);
1570				if (vm_page_sleep(m, "vpctw0", &m->busy))
1571					goto again1;
1572				vm_page_test_dirty(m);
1573				if (m->dirty) {
1574					if (m->object->type == OBJT_VNODE) {
1575						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1576						vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
1577						VOP_UNLOCK(m->object->handle, 0, curproc);
1578						goto again1;
1579					} else if (m->object->type == OBJT_SWAP ||
1580								m->object->type == OBJT_DEFAULT) {
1581						vm_pageout_flush(&m, 1, 0);
1582						goto again1;
1583					}
1584				}
1585				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1586					vm_page_cache(m);
1587			}
1588
1589			for (m = TAILQ_FIRST(&vm_page_queue_active);
1590				m != NULL;
1591				m = next) {
1592
1593				if (m->queue != PQ_ACTIVE) {
1594					break;
1595				}
1596
1597				next = TAILQ_NEXT(m, pageq);
1598				if (vm_page_sleep(m, "vpctw1", &m->busy))
1599					goto again1;
1600				vm_page_test_dirty(m);
1601				if (m->dirty) {
1602					if (m->object->type == OBJT_VNODE) {
1603						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1604						vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
1605						VOP_UNLOCK(m->object->handle, 0, curproc);
1606						goto again1;
1607					} else if (m->object->type == OBJT_SWAP ||
1608								m->object->type == OBJT_DEFAULT) {
1609						vm_pageout_flush(&m, 1, 0);
1610						goto again1;
1611					}
1612				}
1613				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1614					vm_page_cache(m);
1615			}
1616
1617			splx(s);
1618			continue;
1619		}
1620		start = i;
1621
1622		/*
1623		 * Check successive pages for contiguous and free.
1624		 */
1625		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
1626			int pqtype;
1627			pqtype = pga[i].queue - pga[i].pc;
1628			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
1629			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
1630			    ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
1631				start++;
1632				goto again;
1633			}
1634		}
1635
1636		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1637			int pqtype;
1638			vm_page_t m = &pga[i];
1639
1640			pqtype = m->queue - m->pc;
1641			if (pqtype == PQ_CACHE) {
1642				m->flags |= PG_BUSY;
1643				vm_page_free(m);
1644			}
1645
1646			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
1647			(*vm_page_queues[m->queue].lcnt)--;
1648			cnt.v_free_count--;
1649			m->valid = VM_PAGE_BITS_ALL;
1650			m->flags = 0;
1651			m->dirty = 0;
1652			m->wire_count = 0;
1653			m->busy = 0;
1654			m->queue = PQ_NONE;
1655			m->object = NULL;
1656			vm_page_wire(m);
1657		}
1658
1659		/*
1660		 * We've found a contiguous chunk that meets are requirements.
1661		 * Allocate kernel VM, unfree and assign the physical pages to it and
1662		 * return kernel VM pointer.
1663		 */
1664		tmp_addr = addr = kmem_alloc_pageable(map, size);
1665		if (addr == 0) {
1666			/*
1667			 * XXX We almost never run out of kernel virtual
1668			 * space, so we don't make the allocated memory
1669			 * above available.
1670			 */
1671			splx(s);
1672			return (NULL);
1673		}
1674
1675		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1676			vm_page_t m = &pga[i];
1677			vm_page_insert(m, kernel_object,
1678				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
1679			pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
1680			tmp_addr += PAGE_SIZE;
1681		}
1682
1683		splx(s);
1684		return ((void *)addr);
1685	}
1686	return NULL;
1687}
1688
1689void *
1690contigmalloc(size, type, flags, low, high, alignment, boundary)
1691	unsigned long size;	/* should be size_t here and for malloc() */
1692	struct malloc_type *type;
1693	int flags;
1694	unsigned long low;
1695	unsigned long high;
1696	unsigned long alignment;
1697	unsigned long boundary;
1698{
1699	return contigmalloc1(size, type, flags, low, high, alignment, boundary,
1700			     kernel_map);
1701}
1702
1703vm_offset_t
1704vm_page_alloc_contig(size, low, high, alignment)
1705	vm_offset_t size;
1706	vm_offset_t low;
1707	vm_offset_t high;
1708	vm_offset_t alignment;
1709{
1710	return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
1711					  alignment, 0ul, kernel_map));
1712}
1713
1714#include "opt_ddb.h"
1715#ifdef DDB
1716#include <sys/kernel.h>
1717
1718#include <ddb/ddb.h>
1719
1720DB_SHOW_COMMAND(page, vm_page_print_page_info)
1721{
1722	db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1723	db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1724	db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1725	db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1726	db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1727	db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1728	db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1729	db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1730	db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1731	db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1732}
1733
1734DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
1735{
1736	int i;
1737	db_printf("PQ_FREE:");
1738	for(i=0;i<PQ_L2_SIZE;i++) {
1739		db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt);
1740	}
1741	db_printf("\n");
1742
1743	db_printf("PQ_CACHE:");
1744	for(i=0;i<PQ_L2_SIZE;i++) {
1745		db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt);
1746	}
1747	db_printf("\n");
1748
1749	db_printf("PQ_ZERO:");
1750	for(i=0;i<PQ_L2_SIZE;i++) {
1751		db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt);
1752	}
1753	db_printf("\n");
1754
1755	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
1756		*vm_page_queues[PQ_ACTIVE].lcnt,
1757		*vm_page_queues[PQ_INACTIVE].lcnt);
1758}
1759#endif /* DDB */
1760