vm_page.c revision 15890
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37 *	$Id: vm_page.c,v 1.51 1996/05/18 03:37:57 dyson Exp $
38 */
39
40/*
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45 *
46 * Permission to use, copy, modify and distribute this software and
47 * its documentation is hereby granted, provided that both the copyright
48 * notice and this permission notice appear in all copies of the
49 * software, derivative works or modified versions, and any portions
50 * thereof, and that both notices appear in supporting documentation.
51 *
52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55 *
56 * Carnegie Mellon requests users of this software to return to
57 *
58 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59 *  School of Computer Science
60 *  Carnegie Mellon University
61 *  Pittsburgh PA 15213-3890
62 *
63 * any improvements or extensions that they make and grant Carnegie the
64 * rights to redistribute these changes.
65 */
66
67/*
68 *	Resident memory management module.
69 */
70#include "opt_ddb.h"
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/malloc.h>
75#include <sys/proc.h>
76#include <sys/queue.h>
77#include <sys/vmmeter.h>
78
79#include <vm/vm.h>
80#include <vm/vm_param.h>
81#include <vm/vm_prot.h>
82#include <vm/lock.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_map.h>
87#include <vm/vm_pageout.h>
88#include <vm/vm_extern.h>
89
90#ifdef DDB
91extern void	DDB_print_page_info __P((void));
92#endif
93
94/*
95 *	Associated with page of user-allocatable memory is a
96 *	page structure.
97 */
98
99static struct pglist *vm_page_buckets;	/* Array of buckets */
100static int vm_page_bucket_count;	/* How big is array? */
101static int vm_page_hash_mask;		/* Mask for hash function */
102
103struct pglist vm_page_queue_free;
104struct pglist vm_page_queue_zero;
105struct pglist vm_page_queue_active;
106struct pglist vm_page_queue_inactive;
107struct pglist vm_page_queue_cache;
108
109int no_queue;
110
111struct {
112	struct pglist *pl;
113	int	*cnt;
114} vm_page_queues[PQ_CACHE+1] = {
115	{NULL, &no_queue},
116	{ &vm_page_queue_free, &cnt.v_free_count},
117	{ &vm_page_queue_zero, &cnt.v_free_count},
118	{ &vm_page_queue_inactive, &cnt.v_inactive_count},
119	{ &vm_page_queue_active, &cnt.v_active_count},
120	{ &vm_page_queue_cache, &cnt.v_cache_count}
121};
122
123vm_page_t vm_page_array;
124static int vm_page_array_size;
125long first_page;
126static long last_page;
127static vm_size_t page_mask;
128static int page_shift;
129int vm_page_zero_count;
130
131/*
132 * map of contiguous valid DEV_BSIZE chunks in a page
133 * (this list is valid for page sizes upto 16*DEV_BSIZE)
134 */
135static u_short vm_page_dev_bsize_chunks[] = {
136	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
137	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
138};
139
140static inline __pure int
141		vm_page_hash __P((vm_object_t object, vm_pindex_t pindex))
142		__pure2;
143
144/*
145 *	vm_set_page_size:
146 *
147 *	Sets the page size, perhaps based upon the memory
148 *	size.  Must be called before any use of page-size
149 *	dependent functions.
150 *
151 *	Sets page_shift and page_mask from cnt.v_page_size.
152 */
153void
154vm_set_page_size()
155{
156
157	if (cnt.v_page_size == 0)
158		cnt.v_page_size = DEFAULT_PAGE_SIZE;
159	page_mask = cnt.v_page_size - 1;
160	if ((page_mask & cnt.v_page_size) != 0)
161		panic("vm_set_page_size: page size not a power of two");
162	for (page_shift = 0;; page_shift++)
163		if ((1 << page_shift) == cnt.v_page_size)
164			break;
165}
166
167/*
168 *	vm_page_startup:
169 *
170 *	Initializes the resident memory module.
171 *
172 *	Allocates memory for the page cells, and
173 *	for the object/offset-to-page hash table headers.
174 *	Each page cell is initialized and placed on the free list.
175 */
176
177vm_offset_t
178vm_page_startup(starta, enda, vaddr)
179	register vm_offset_t starta;
180	vm_offset_t enda;
181	register vm_offset_t vaddr;
182{
183	register vm_offset_t mapped;
184	register vm_page_t m;
185	register struct pglist *bucket;
186	vm_size_t npages, page_range;
187	register vm_offset_t new_start;
188	int i;
189	vm_offset_t pa;
190	int nblocks;
191	vm_offset_t first_managed_page;
192
193	/* the biggest memory array is the second group of pages */
194	vm_offset_t start;
195	vm_offset_t biggestone, biggestsize;
196
197	vm_offset_t total;
198
199	total = 0;
200	biggestsize = 0;
201	biggestone = 0;
202	nblocks = 0;
203	vaddr = round_page(vaddr);
204
205	for (i = 0; phys_avail[i + 1]; i += 2) {
206		phys_avail[i] = round_page(phys_avail[i]);
207		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
208	}
209
210	for (i = 0; phys_avail[i + 1]; i += 2) {
211		int size = phys_avail[i + 1] - phys_avail[i];
212
213		if (size > biggestsize) {
214			biggestone = i;
215			biggestsize = size;
216		}
217		++nblocks;
218		total += size;
219	}
220
221	start = phys_avail[biggestone];
222
223	/*
224	 * Initialize the queue headers for the free queue, the active queue
225	 * and the inactive queue.
226	 */
227
228	TAILQ_INIT(&vm_page_queue_free);
229	TAILQ_INIT(&vm_page_queue_zero);
230	TAILQ_INIT(&vm_page_queue_active);
231	TAILQ_INIT(&vm_page_queue_inactive);
232	TAILQ_INIT(&vm_page_queue_cache);
233
234	/*
235	 * Allocate (and initialize) the hash table buckets.
236	 *
237	 * The number of buckets MUST BE a power of 2, and the actual value is
238	 * the next power of 2 greater than the number of physical pages in
239	 * the system.
240	 *
241	 * Note: This computation can be tweaked if desired.
242	 */
243	vm_page_buckets = (struct pglist *) vaddr;
244	bucket = vm_page_buckets;
245	if (vm_page_bucket_count == 0) {
246		vm_page_bucket_count = 1;
247		while (vm_page_bucket_count < atop(total))
248			vm_page_bucket_count <<= 1;
249	}
250	vm_page_hash_mask = vm_page_bucket_count - 1;
251
252	/*
253	 * Validate these addresses.
254	 */
255
256	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
257	new_start = round_page(new_start);
258	mapped = vaddr;
259	vaddr = pmap_map(mapped, start, new_start,
260	    VM_PROT_READ | VM_PROT_WRITE);
261	start = new_start;
262	bzero((caddr_t) mapped, vaddr - mapped);
263	mapped = vaddr;
264
265	for (i = 0; i < vm_page_bucket_count; i++) {
266		TAILQ_INIT(bucket);
267		bucket++;
268	}
269
270	/*
271	 * round (or truncate) the addresses to our page size.
272	 */
273
274	/*
275	 * Pre-allocate maps and map entries that cannot be dynamically
276	 * allocated via malloc().  The maps include the kernel_map and
277	 * kmem_map which must be initialized before malloc() will work
278	 * (obviously).  Also could include pager maps which would be
279	 * allocated before kmeminit.
280	 *
281	 * Allow some kernel map entries... this should be plenty since people
282	 * shouldn't be cluttering up the kernel map (they should use their
283	 * own maps).
284	 */
285
286	kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
287	    MAX_KMAPENT * sizeof(struct vm_map_entry);
288	kentry_data_size = round_page(kentry_data_size);
289	kentry_data = (vm_offset_t) vaddr;
290	vaddr += kentry_data_size;
291
292	/*
293	 * Validate these zone addresses.
294	 */
295
296	new_start = start + (vaddr - mapped);
297	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
298	bzero((caddr_t) mapped, (vaddr - mapped));
299	start = round_page(new_start);
300
301	/*
302	 * Compute the number of pages of memory that will be available for
303	 * use (taking into account the overhead of a page structure per
304	 * page).
305	 */
306
307	first_page = phys_avail[0] / PAGE_SIZE;
308	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
309
310	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
311	npages = (total - (page_range * sizeof(struct vm_page)) -
312	    (start - phys_avail[biggestone])) / PAGE_SIZE;
313
314	/*
315	 * Initialize the mem entry structures now, and put them in the free
316	 * queue.
317	 */
318
319	vm_page_array = (vm_page_t) vaddr;
320	mapped = vaddr;
321
322	/*
323	 * Validate these addresses.
324	 */
325
326	new_start = round_page(start + page_range * sizeof(struct vm_page));
327	mapped = pmap_map(mapped, start, new_start,
328	    VM_PROT_READ | VM_PROT_WRITE);
329	start = new_start;
330
331	first_managed_page = start / PAGE_SIZE;
332
333	/*
334	 * Clear all of the page structures
335	 */
336	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
337	vm_page_array_size = page_range;
338
339	cnt.v_page_count = 0;
340	cnt.v_free_count = 0;
341	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
342		if (i == biggestone)
343			pa = ptoa(first_managed_page);
344		else
345			pa = phys_avail[i];
346		while (pa < phys_avail[i + 1] && npages-- > 0) {
347			++cnt.v_page_count;
348			++cnt.v_free_count;
349			m = PHYS_TO_VM_PAGE(pa);
350			m->queue = PQ_FREE;
351			m->flags = 0;
352			m->phys_addr = pa;
353			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
354			pa += PAGE_SIZE;
355		}
356	}
357
358	return (mapped);
359}
360
361/*
362 *	vm_page_hash:
363 *
364 *	Distributes the object/offset key pair among hash buckets.
365 *
366 *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
367 */
368static inline __pure int
369vm_page_hash(object, pindex)
370	vm_object_t object;
371	vm_pindex_t pindex;
372{
373	return ((unsigned) object + pindex) & vm_page_hash_mask;
374}
375
376/*
377 *	vm_page_insert:		[ internal use only ]
378 *
379 *	Inserts the given mem entry into the object/object-page
380 *	table and object list.
381 *
382 *	The object and page must be locked, and must be splhigh.
383 */
384
385__inline void
386vm_page_insert(m, object, pindex)
387	register vm_page_t m;
388	register vm_object_t object;
389	register vm_pindex_t pindex;
390{
391	register struct pglist *bucket;
392
393	if (m->flags & PG_TABLED)
394		panic("vm_page_insert: already inserted");
395
396	/*
397	 * Record the object/offset pair in this page
398	 */
399
400	m->object = object;
401	m->pindex = pindex;
402
403	/*
404	 * Insert it into the object_object/offset hash table
405	 */
406
407	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
408	TAILQ_INSERT_TAIL(bucket, m, hashq);
409
410	/*
411	 * Now link into the object's list of backed pages.
412	 */
413
414	TAILQ_INSERT_TAIL(&object->memq, m, listq);
415	m->flags |= PG_TABLED;
416
417	/*
418	 * And show that the object has one more resident page.
419	 */
420
421	object->resident_page_count++;
422}
423
424/*
425 *	vm_page_remove:		[ internal use only ]
426 *				NOTE: used by device pager as well -wfj
427 *
428 *	Removes the given mem entry from the object/offset-page
429 *	table and the object page list.
430 *
431 *	The object and page must be locked, and at splhigh.
432 */
433
434__inline void
435vm_page_remove(m)
436	register vm_page_t m;
437{
438	register struct pglist *bucket;
439
440	if (!(m->flags & PG_TABLED))
441		return;
442
443	/*
444	 * Remove from the object_object/offset hash table
445	 */
446
447	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
448	TAILQ_REMOVE(bucket, m, hashq);
449
450	/*
451	 * Now remove from the object's list of backed pages.
452	 */
453
454	TAILQ_REMOVE(&m->object->memq, m, listq);
455
456	/*
457	 * And show that the object has one fewer resident page.
458	 */
459
460	m->object->resident_page_count--;
461
462	m->flags &= ~PG_TABLED;
463}
464
465/*
466 *	vm_page_lookup:
467 *
468 *	Returns the page associated with the object/offset
469 *	pair specified; if none is found, NULL is returned.
470 *
471 *	The object must be locked.  No side effects.
472 */
473
474vm_page_t
475vm_page_lookup(object, pindex)
476	register vm_object_t object;
477	register vm_pindex_t pindex;
478{
479	register vm_page_t m;
480	register struct pglist *bucket;
481	int s;
482
483	/*
484	 * Search the hash table for this object/offset pair
485	 */
486
487	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
488
489	s = splvm();
490	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
491		if ((m->object == object) && (m->pindex == pindex)) {
492			splx(s);
493			return (m);
494		}
495	}
496	splx(s);
497	return (NULL);
498}
499
500/*
501 *	vm_page_rename:
502 *
503 *	Move the given memory entry from its
504 *	current object to the specified target object/offset.
505 *
506 *	The object must be locked.
507 */
508void
509vm_page_rename(m, new_object, new_pindex)
510	register vm_page_t m;
511	register vm_object_t new_object;
512	vm_pindex_t new_pindex;
513{
514	int s;
515
516	s = splvm();
517	vm_page_remove(m);
518	vm_page_insert(m, new_object, new_pindex);
519	splx(s);
520}
521
522/*
523 * vm_page_unqueue must be called at splhigh();
524 */
525__inline void
526vm_page_unqueue(vm_page_t m)
527{
528	int queue = m->queue;
529	if (queue == PQ_NONE)
530		return;
531	m->queue = PQ_NONE;
532	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
533	--(*vm_page_queues[queue].cnt);
534	if (queue == PQ_CACHE) {
535		if ((cnt.v_cache_count + cnt.v_free_count) <
536			(cnt.v_free_reserved + cnt.v_cache_min))
537			pagedaemon_wakeup();
538	}
539	return;
540}
541
542/*
543 *	vm_page_alloc:
544 *
545 *	Allocate and return a memory cell associated
546 *	with this VM object/offset pair.
547 *
548 *	page_req classes:
549 *	VM_ALLOC_NORMAL		normal process request
550 *	VM_ALLOC_SYSTEM		system *really* needs a page
551 *	VM_ALLOC_INTERRUPT	interrupt time request
552 *	VM_ALLOC_ZERO		zero page
553 *
554 *	Object must be locked.
555 */
556vm_page_t
557vm_page_alloc(object, pindex, page_req)
558	vm_object_t object;
559	vm_pindex_t pindex;
560	int page_req;
561{
562	register vm_page_t m;
563	int queue;
564	int s;
565
566#ifdef DIAGNOSTIC
567	m = vm_page_lookup(object, pindex);
568	if (m)
569		panic("vm_page_alloc: page already allocated");
570#endif
571
572	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
573		page_req = VM_ALLOC_SYSTEM;
574	};
575
576	s = splvm();
577
578	switch (page_req) {
579
580	case VM_ALLOC_NORMAL:
581		if (cnt.v_free_count >= cnt.v_free_reserved) {
582			m = TAILQ_FIRST(&vm_page_queue_free);
583			if (m == NULL) {
584				--vm_page_zero_count;
585				m = TAILQ_FIRST(&vm_page_queue_zero);
586			}
587		} else {
588			m = TAILQ_FIRST(&vm_page_queue_cache);
589			if (m == NULL) {
590				splx(s);
591				pagedaemon_wakeup();
592				return (NULL);
593			}
594		}
595		break;
596
597	case VM_ALLOC_ZERO:
598		if (cnt.v_free_count >= cnt.v_free_reserved) {
599			m = TAILQ_FIRST(&vm_page_queue_zero);
600			if (m) {
601				--vm_page_zero_count;
602			} else {
603				m = TAILQ_FIRST(&vm_page_queue_free);
604			}
605		} else {
606			m = TAILQ_FIRST(&vm_page_queue_cache);
607			if (m == NULL) {
608				splx(s);
609				pagedaemon_wakeup();
610				return (NULL);
611			}
612		}
613		break;
614
615	case VM_ALLOC_SYSTEM:
616		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
617		    ((cnt.v_cache_count == 0) &&
618		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
619				m = TAILQ_FIRST(&vm_page_queue_free);
620				if (m == NULL) {
621					--vm_page_zero_count;
622					m = TAILQ_FIRST(&vm_page_queue_zero);
623				}
624		} else {
625			m = TAILQ_FIRST(&vm_page_queue_cache);
626			if (m == NULL) {
627				splx(s);
628				pagedaemon_wakeup();
629				return (NULL);
630			}
631		}
632		break;
633
634	case VM_ALLOC_INTERRUPT:
635		if (cnt.v_free_count > 0) {
636			m = TAILQ_FIRST(&vm_page_queue_free);
637			if (m == NULL) {
638				--vm_page_zero_count;
639				m = TAILQ_FIRST(&vm_page_queue_zero);
640			}
641		} else {
642			splx(s);
643			pagedaemon_wakeup();
644			return (NULL);
645		}
646		break;
647
648	default:
649		panic("vm_page_alloc: invalid allocation class");
650	}
651
652	queue = m->queue;
653	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
654	--(*vm_page_queues[queue].cnt);
655	if (queue == PQ_ZERO) {
656		m->flags = PG_ZERO|PG_BUSY;
657	} else if (queue == PQ_CACHE) {
658		vm_page_remove(m);
659		m->flags = PG_BUSY;
660	} else {
661		m->flags = PG_BUSY;
662	}
663	m->wire_count = 0;
664	m->act_count = 0;
665	m->hold_count = 0;
666	m->busy = 0;
667	m->valid = 0;
668	m->dirty = 0;
669	m->queue = PQ_NONE;
670
671	/* XXX before splx until vm_page_insert is safe */
672	vm_page_insert(m, object, pindex);
673
674	splx(s);
675
676	/*
677	 * Don't wakeup too often - wakeup the pageout daemon when
678	 * we would be nearly out of memory.
679	 */
680	if (((cnt.v_free_count + cnt.v_cache_count) <
681		(cnt.v_free_reserved + cnt.v_cache_min)) ||
682			(cnt.v_free_count < cnt.v_pageout_free_min))
683		pagedaemon_wakeup();
684
685	return (m);
686}
687
688/*
689 *	vm_page_activate:
690 *
691 *	Put the specified page on the active list (if appropriate).
692 *
693 *	The page queues must be locked.
694 */
695void
696vm_page_activate(m)
697	register vm_page_t m;
698{
699	int s;
700
701	s = splvm();
702	if (m->queue == PQ_ACTIVE)
703		panic("vm_page_activate: already active");
704
705	if (m->queue == PQ_CACHE)
706		cnt.v_reactivated++;
707
708	vm_page_unqueue(m);
709
710	if (m->wire_count == 0) {
711		if (m->act_count < 5)
712			m->act_count = 5;
713		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
714		m->queue = PQ_ACTIVE;
715		cnt.v_active_count++;
716	}
717	splx(s);
718}
719
720/*
721 *	vm_page_free:
722 *
723 *	Returns the given page to the free list,
724 *	disassociating it with any VM object.
725 *
726 *	Object and page must be locked prior to entry.
727 */
728void
729vm_page_free(m)
730	register vm_page_t m;
731{
732	int s;
733	int flags = m->flags;
734
735	s = splvm();
736	if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) {
737		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n",
738		    m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0);
739		if (m->queue == PQ_FREE)
740			panic("vm_page_free: freeing free page");
741		else
742			panic("vm_page_free: freeing busy page");
743	}
744
745 	if (m->hold_count) {
746 		panic("freeing held page, count=%d, pindex=%d(0x%x)",
747			m->hold_count, m->pindex, m->pindex);
748 	}
749
750	vm_page_remove(m);
751	vm_page_unqueue(m);
752
753	if ((flags & PG_FICTITIOUS) == 0) {
754		if (m->wire_count) {
755			if (m->wire_count > 1) {
756				printf("vm_page_free: wire count > 1 (%d)", m->wire_count);
757				panic("vm_page_free: invalid wire count");
758			}
759			cnt.v_wire_count--;
760			m->wire_count = 0;
761		}
762		m->queue = PQ_FREE;
763
764		/*
765		 * If the pageout process is grabbing the page, it is likely
766		 * that the page is NOT in the cache.  It is more likely that
767		 * the page will be partially in the cache if it is being
768		 * explicitly freed.
769		 */
770		if (curproc == pageproc) {
771			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
772		} else {
773			TAILQ_INSERT_HEAD(&vm_page_queue_free, m, pageq);
774		}
775
776		splx(s);
777		/*
778		 * if pageout daemon needs pages, then tell it that there are
779		 * some free.
780		 */
781		if (vm_pageout_pages_needed) {
782			wakeup(&vm_pageout_pages_needed);
783			vm_pageout_pages_needed = 0;
784		}
785
786		cnt.v_free_count++;
787		/*
788		 * wakeup processes that are waiting on memory if we hit a
789		 * high water mark. And wakeup scheduler process if we have
790		 * lots of memory. this process will swapin processes.
791		 */
792		if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
793			wakeup(&cnt.v_free_count);
794		}
795	} else {
796		splx(s);
797	}
798	cnt.v_tfree++;
799}
800
801
802/*
803 *	vm_page_wire:
804 *
805 *	Mark this page as wired down by yet
806 *	another map, removing it from paging queues
807 *	as necessary.
808 *
809 *	The page queues must be locked.
810 */
811void
812vm_page_wire(m)
813	register vm_page_t m;
814{
815	int s;
816
817	if (m->wire_count == 0) {
818		s = splvm();
819		vm_page_unqueue(m);
820		splx(s);
821		cnt.v_wire_count++;
822	}
823	m->wire_count++;
824	m->flags |= PG_MAPPED;
825}
826
827/*
828 *	vm_page_unwire:
829 *
830 *	Release one wiring of this page, potentially
831 *	enabling it to be paged again.
832 *
833 *	The page queues must be locked.
834 */
835void
836vm_page_unwire(m)
837	register vm_page_t m;
838{
839	int s;
840
841	s = splvm();
842
843	if (m->wire_count > 0)
844		m->wire_count--;
845
846	if (m->wire_count == 0) {
847		cnt.v_wire_count--;
848		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
849		m->queue = PQ_ACTIVE;
850		if (m->act_count < 5)
851			m->act_count = 5;
852		cnt.v_active_count++;
853	}
854	splx(s);
855}
856
857
858/*
859 *	vm_page_deactivate:
860 *
861 *	Returns the given page to the inactive list,
862 *	indicating that no physical maps have access
863 *	to this page.  [Used by the physical mapping system.]
864 *
865 *	The page queues must be locked.
866 */
867void
868vm_page_deactivate(m)
869	register vm_page_t m;
870{
871	int spl;
872
873	/*
874	 * Only move active pages -- ignore locked or already inactive ones.
875	 *
876	 * XXX: sometimes we get pages which aren't wired down or on any queue -
877	 * we need to put them on the inactive queue also, otherwise we lose
878	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
879	 */
880	if (m->queue == PQ_INACTIVE)
881		return;
882
883	spl = splvm();
884	if (m->wire_count == 0 && m->hold_count == 0) {
885		if (m->queue == PQ_CACHE)
886			cnt.v_reactivated++;
887		vm_page_unqueue(m);
888		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
889		m->queue = PQ_INACTIVE;
890		cnt.v_inactive_count++;
891	}
892	splx(spl);
893}
894
895/*
896 * vm_page_cache
897 *
898 * Put the specified page onto the page cache queue (if appropriate).
899 */
900void
901vm_page_cache(m)
902	register vm_page_t m;
903{
904	int s;
905
906	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
907		printf("vm_page_cache: attempting to cache busy page\n");
908		return;
909	}
910	if (m->queue == PQ_CACHE)
911		return;
912
913	vm_page_protect(m, VM_PROT_NONE);
914	if (m->dirty != 0) {
915		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
916	}
917	s = splvm();
918	vm_page_unqueue(m);
919	TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq);
920	m->queue = PQ_CACHE;
921	cnt.v_cache_count++;
922	if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
923		wakeup(&cnt.v_free_count);
924		wakeup(&proc0);
925	}
926	if (vm_pageout_pages_needed) {
927		wakeup(&vm_pageout_pages_needed);
928		vm_pageout_pages_needed = 0;
929	}
930	splx(s);
931}
932
933
934/*
935 * mapping function for valid bits or for dirty bits in
936 * a page
937 */
938inline int
939vm_page_bits(int base, int size)
940{
941	u_short chunk;
942
943	if ((base == 0) && (size >= PAGE_SIZE))
944		return VM_PAGE_BITS_ALL;
945	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
946	base = (base % PAGE_SIZE) / DEV_BSIZE;
947	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
948	return (chunk << base) & VM_PAGE_BITS_ALL;
949}
950
951/*
952 * set a page valid and clean
953 */
954void
955vm_page_set_validclean(m, base, size)
956	vm_page_t m;
957	int base;
958	int size;
959{
960	int pagebits = vm_page_bits(base, size);
961	m->valid |= pagebits;
962	m->dirty &= ~pagebits;
963	if( base == 0 && size == PAGE_SIZE)
964		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
965}
966
967/*
968 * set a page (partially) invalid
969 */
970void
971vm_page_set_invalid(m, base, size)
972	vm_page_t m;
973	int base;
974	int size;
975{
976	int bits;
977
978	m->valid &= ~(bits = vm_page_bits(base, size));
979	if (m->valid == 0)
980		m->dirty &= ~bits;
981}
982
983/*
984 * is (partial) page valid?
985 */
986int
987vm_page_is_valid(m, base, size)
988	vm_page_t m;
989	int base;
990	int size;
991{
992	int bits = vm_page_bits(base, size);
993
994	if (m->valid && ((m->valid & bits) == bits))
995		return 1;
996	else
997		return 0;
998}
999
1000void
1001vm_page_test_dirty(m)
1002	vm_page_t m;
1003{
1004	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1005	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1006		m->dirty = VM_PAGE_BITS_ALL;
1007	}
1008}
1009
1010/*
1011 * This interface is for merging with malloc() someday.
1012 * Even if we never implement compaction so that contiguous allocation
1013 * works after initialization time, malloc()'s data structures are good
1014 * for statistics and for allocations of less than a page.
1015 */
1016void *
1017contigmalloc(size, type, flags, low, high, alignment, boundary)
1018	unsigned long size;	/* should be size_t here and for malloc() */
1019	int type;
1020	int flags;
1021	unsigned long low;
1022	unsigned long high;
1023	unsigned long alignment;
1024	unsigned long boundary;
1025{
1026	int i, s, start;
1027	vm_offset_t addr, phys, tmp_addr;
1028	vm_page_t pga = vm_page_array;
1029
1030	size = round_page(size);
1031	if (size == 0)
1032		panic("vm_page_alloc_contig: size must not be 0");
1033	if ((alignment & (alignment - 1)) != 0)
1034		panic("vm_page_alloc_contig: alignment must be a power of 2");
1035	if ((boundary & (boundary - 1)) != 0)
1036		panic("vm_page_alloc_contig: boundary must be a power of 2");
1037
1038	start = 0;
1039	s = splvm();
1040again:
1041	/*
1042	 * Find first page in array that is free, within range, aligned, and
1043	 * such that the boundary won't be crossed.
1044	 */
1045	for (i = start; i < cnt.v_page_count; i++) {
1046		phys = VM_PAGE_TO_PHYS(&pga[i]);
1047		if ((pga[i].queue == PQ_FREE) &&
1048		    (phys >= low) && (phys < high) &&
1049		    ((phys & (alignment - 1)) == 0) &&
1050		    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
1051			break;
1052	}
1053
1054	/*
1055	 * If the above failed or we will exceed the upper bound, fail.
1056	 */
1057	if ((i == cnt.v_page_count) ||
1058		((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
1059		splx(s);
1060		return (NULL);
1061	}
1062	start = i;
1063
1064	/*
1065	 * Check successive pages for contiguous and free.
1066	 */
1067	for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
1068		if ((VM_PAGE_TO_PHYS(&pga[i]) !=
1069		    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
1070		    (pga[i].queue != PQ_FREE)) {
1071			start++;
1072			goto again;
1073		}
1074	}
1075
1076	/*
1077	 * We've found a contiguous chunk that meets are requirements.
1078	 * Allocate kernel VM, unfree and assign the physical pages to it and
1079	 * return kernel VM pointer.
1080	 */
1081	tmp_addr = addr = kmem_alloc_pageable(kernel_map, size);
1082	if (addr == 0) {
1083		splx(s);
1084		return (NULL);
1085	}
1086
1087	for (i = start; i < (start + size / PAGE_SIZE); i++) {
1088		vm_page_t m = &pga[i];
1089
1090		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
1091		cnt.v_free_count--;
1092		m->valid = VM_PAGE_BITS_ALL;
1093		m->flags = 0;
1094		m->dirty = 0;
1095		m->wire_count = 0;
1096		m->busy = 0;
1097		m->queue = PQ_NONE;
1098		vm_page_insert(m, kernel_object,
1099			OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
1100		vm_page_wire(m);
1101		pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
1102		tmp_addr += PAGE_SIZE;
1103	}
1104
1105	splx(s);
1106	return ((void *)addr);
1107}
1108
1109vm_offset_t
1110vm_page_alloc_contig(size, low, high, alignment)
1111	vm_offset_t size;
1112	vm_offset_t low;
1113	vm_offset_t high;
1114	vm_offset_t alignment;
1115{
1116	return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high,
1117					  alignment, 0ul));
1118}
1119#ifdef DDB
1120void
1121DDB_print_page_info(void)
1122{
1123	printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1124	printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1125	printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1126	printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1127	printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1128	printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1129	printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1130	printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1131	printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1132	printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1133}
1134#endif
1135