vm_page.c revision 14865
1238104Sdes/*
2238104Sdes * Copyright (c) 1991 Regents of the University of California.
3238104Sdes * All rights reserved.
4238104Sdes *
5238104Sdes * This code is derived from software contributed to Berkeley by
6238104Sdes * The Mach Operating System project at Carnegie-Mellon University.
7238104Sdes *
8238104Sdes * Redistribution and use in source and binary forms, with or without
9238104Sdes * modification, are permitted provided that the following conditions
10238104Sdes * are met:
11238104Sdes * 1. Redistributions of source code must retain the above copyright
12238104Sdes *    notice, this list of conditions and the following disclaimer.
13238104Sdes * 2. Redistributions in binary form must reproduce the above copyright
14238104Sdes *    notice, this list of conditions and the following disclaimer in the
15238104Sdes *    documentation and/or other materials provided with the distribution.
16238104Sdes * 3. All advertising materials mentioning features or use of this software
17238104Sdes *    must display the following acknowledgement:
18238104Sdes *	This product includes software developed by the University of
19238104Sdes *	California, Berkeley and its contributors.
20238104Sdes * 4. Neither the name of the University nor the names of its contributors
21238104Sdes *    may be used to endorse or promote products derived from this software
22238104Sdes *    without specific prior written permission.
23238104Sdes *
24238104Sdes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25238104Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26238104Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27238104Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28238104Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29238104Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30238104Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31238104Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32238104Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33238104Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34238104Sdes * SUCH DAMAGE.
35238104Sdes *
36238104Sdes *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37238104Sdes *	$Id: vm_page.c,v 1.49 1996/03/09 06:56:39 dyson Exp $
38238104Sdes */
39238104Sdes
40238104Sdes/*
41238104Sdes * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42238104Sdes * All rights reserved.
43238104Sdes *
44238104Sdes * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45238104Sdes *
46238104Sdes * Permission to use, copy, modify and distribute this software and
47238104Sdes * its documentation is hereby granted, provided that both the copyright
48238104Sdes * notice and this permission notice appear in all copies of the
49238104Sdes * software, derivative works or modified versions, and any portions
50238104Sdes * thereof, and that both notices appear in supporting documentation.
51238104Sdes *
52238104Sdes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53238104Sdes * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54246827Sdes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55238104Sdes *
56238104Sdes * Carnegie Mellon requests users of this software to return to
57238104Sdes *
58238104Sdes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59238104Sdes *  School of Computer Science
60238104Sdes *  Carnegie Mellon University
61238104Sdes *  Pittsburgh PA 15213-3890
62238104Sdes *
63238104Sdes * any improvements or extensions that they make and grant Carnegie the
64238104Sdes * rights to redistribute these changes.
65238104Sdes */
66238104Sdes
67238104Sdes/*
68238104Sdes *	Resident memory management module.
69238104Sdes */
70238104Sdes#include "opt_ddb.h"
71238104Sdes
72238104Sdes#include <sys/param.h>
73238104Sdes#include <sys/systm.h>
74238104Sdes#include <sys/malloc.h>
75238104Sdes#include <sys/proc.h>
76238104Sdes#include <sys/queue.h>
77238104Sdes#include <sys/vmmeter.h>
78238104Sdes
79238104Sdes#include <vm/vm.h>
80238104Sdes#include <vm/vm_param.h>
81238104Sdes#include <vm/vm_prot.h>
82238104Sdes#include <vm/lock.h>
83238104Sdes#include <vm/vm_kern.h>
84238104Sdes#include <vm/vm_object.h>
85238104Sdes#include <vm/vm_page.h>
86238104Sdes#include <vm/vm_map.h>
87238104Sdes#include <vm/vm_pageout.h>
88238104Sdes#include <vm/vm_extern.h>
89238104Sdes
90238104Sdes#ifdef DDB
91238104Sdesextern void	DDB_print_page_info __P((void));
92238104Sdes#endif
93238104Sdes
94238104Sdes/*
95238104Sdes *	Associated with page of user-allocatable memory is a
96238104Sdes *	page structure.
97238104Sdes */
98238104Sdes
99238104Sdesstatic struct pglist *vm_page_buckets;	/* Array of buckets */
100238104Sdesstatic int vm_page_bucket_count;	/* How big is array? */
101238104Sdesstatic int vm_page_hash_mask;		/* Mask for hash function */
102238104Sdes
103238104Sdesstruct pglist vm_page_queue_free;
104238104Sdesstruct pglist vm_page_queue_zero;
105238104Sdesstruct pglist vm_page_queue_active;
106238104Sdesstruct pglist vm_page_queue_inactive;
107238104Sdesstruct pglist vm_page_queue_cache;
108238104Sdes
109238104Sdesint no_queue;
110238104Sdes
111238104Sdesstruct {
112238104Sdes	struct pglist *pl;
113238104Sdes	int	*cnt;
114238104Sdes} vm_page_queues[PQ_CACHE+1] = {
115238104Sdes	{NULL, &no_queue},
116238104Sdes	{ &vm_page_queue_free, &cnt.v_free_count},
117238104Sdes	{ &vm_page_queue_zero, &cnt.v_free_count},
118238104Sdes	{ &vm_page_queue_inactive, &cnt.v_inactive_count},
119238104Sdes	{ &vm_page_queue_active, &cnt.v_active_count},
120238104Sdes	{ &vm_page_queue_cache, &cnt.v_cache_count}
121238104Sdes};
122238104Sdes
123238104Sdesvm_page_t vm_page_array;
124238104Sdesstatic int vm_page_array_size;
125238104Sdeslong first_page;
126238104Sdesstatic long last_page;
127238104Sdesstatic vm_size_t page_mask;
128238104Sdesstatic int page_shift;
129238104Sdesint vm_page_zero_count;
130238104Sdes
131238104Sdes/*
132238104Sdes * map of contiguous valid DEV_BSIZE chunks in a page
133238104Sdes * (this list is valid for page sizes upto 16*DEV_BSIZE)
134238104Sdes */
135238104Sdesstatic u_short vm_page_dev_bsize_chunks[] = {
136238104Sdes	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
137269257Sdes	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
138238104Sdes};
139238104Sdes
140238104Sdesstatic inline __pure int
141238104Sdes		vm_page_hash __P((vm_object_t object, vm_pindex_t pindex))
142238104Sdes		__pure2;
143269257Sdesstatic void	vm_page_unqueue __P((vm_page_t ));
144238104Sdes
145269257Sdes/*
146238104Sdes *	vm_set_page_size:
147238104Sdes *
148238104Sdes *	Sets the page size, perhaps based upon the memory
149238104Sdes *	size.  Must be called before any use of page-size
150238104Sdes *	dependent functions.
151238104Sdes *
152238104Sdes *	Sets page_shift and page_mask from cnt.v_page_size.
153238104Sdes */
154238104Sdesvoid
155238104Sdesvm_set_page_size()
156238104Sdes{
157238104Sdes
158238104Sdes	if (cnt.v_page_size == 0)
159238104Sdes		cnt.v_page_size = DEFAULT_PAGE_SIZE;
160238104Sdes	page_mask = cnt.v_page_size - 1;
161238104Sdes	if ((page_mask & cnt.v_page_size) != 0)
162238104Sdes		panic("vm_set_page_size: page size not a power of two");
163238104Sdes	for (page_shift = 0;; page_shift++)
164238104Sdes		if ((1 << page_shift) == cnt.v_page_size)
165238104Sdes			break;
166238104Sdes}
167238104Sdes
168238104Sdes/*
169238104Sdes *	vm_page_startup:
170238104Sdes *
171238104Sdes *	Initializes the resident memory module.
172238104Sdes *
173238104Sdes *	Allocates memory for the page cells, and
174238104Sdes *	for the object/offset-to-page hash table headers.
175238104Sdes *	Each page cell is initialized and placed on the free list.
176238104Sdes */
177238104Sdes
178246854Sdesvm_offset_t
179246854Sdesvm_page_startup(starta, enda, vaddr)
180246854Sdes	register vm_offset_t starta;
181238104Sdes	vm_offset_t enda;
182246854Sdes	register vm_offset_t vaddr;
183246854Sdes{
184238104Sdes	register vm_offset_t mapped;
185238104Sdes	register vm_page_t m;
186238104Sdes	register struct pglist *bucket;
187238104Sdes	vm_size_t npages, page_range;
188238104Sdes	register vm_offset_t new_start;
189238104Sdes	int i;
190238104Sdes	vm_offset_t pa;
191238104Sdes	int nblocks;
192238104Sdes	vm_offset_t first_managed_page;
193238104Sdes
194238104Sdes	/* the biggest memory array is the second group of pages */
195238104Sdes	vm_offset_t start;
196238104Sdes	vm_offset_t biggestone, biggestsize;
197238104Sdes
198238104Sdes	vm_offset_t total;
199238104Sdes
200238104Sdes	total = 0;
201238104Sdes	biggestsize = 0;
202238104Sdes	biggestone = 0;
203238104Sdes	nblocks = 0;
204238104Sdes	vaddr = round_page(vaddr);
205238104Sdes
206238104Sdes	for (i = 0; phys_avail[i + 1]; i += 2) {
207238104Sdes		phys_avail[i] = round_page(phys_avail[i]);
208238104Sdes		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
209238104Sdes	}
210238104Sdes
211238104Sdes	for (i = 0; phys_avail[i + 1]; i += 2) {
212238104Sdes		int size = phys_avail[i + 1] - phys_avail[i];
213238104Sdes
214238104Sdes		if (size > biggestsize) {
215238104Sdes			biggestone = i;
216238104Sdes			biggestsize = size;
217238104Sdes		}
218238104Sdes		++nblocks;
219238104Sdes		total += size;
220238104Sdes	}
221238104Sdes
222238104Sdes	start = phys_avail[biggestone];
223238104Sdes
224238104Sdes	/*
225238104Sdes	 * Initialize the queue headers for the free queue, the active queue
226238104Sdes	 * and the inactive queue.
227238104Sdes	 */
228238104Sdes
229238104Sdes	TAILQ_INIT(&vm_page_queue_free);
230238104Sdes	TAILQ_INIT(&vm_page_queue_zero);
231238104Sdes	TAILQ_INIT(&vm_page_queue_active);
232238104Sdes	TAILQ_INIT(&vm_page_queue_inactive);
233238104Sdes	TAILQ_INIT(&vm_page_queue_cache);
234238104Sdes
235238104Sdes	/*
236238104Sdes	 * Allocate (and initialize) the hash table buckets.
237238104Sdes	 *
238238104Sdes	 * The number of buckets MUST BE a power of 2, and the actual value is
239238104Sdes	 * the next power of 2 greater than the number of physical pages in
240238104Sdes	 * the system.
241238104Sdes	 *
242238104Sdes	 * Note: This computation can be tweaked if desired.
243238104Sdes	 */
244238104Sdes	vm_page_buckets = (struct pglist *) vaddr;
245238104Sdes	bucket = vm_page_buckets;
246238104Sdes	if (vm_page_bucket_count == 0) {
247238104Sdes		vm_page_bucket_count = 2;
248238104Sdes		while (vm_page_bucket_count < atop(total))
249238104Sdes			vm_page_bucket_count <<= 1;
250238104Sdes	}
251238104Sdes	vm_page_hash_mask = vm_page_bucket_count - 1;
252238104Sdes
253238104Sdes	/*
254238104Sdes	 * Validate these addresses.
255238104Sdes	 */
256238104Sdes
257238104Sdes	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
258238104Sdes	new_start = round_page(new_start);
259238104Sdes	mapped = vaddr;
260238104Sdes	vaddr = pmap_map(mapped, start, new_start,
261238104Sdes	    VM_PROT_READ | VM_PROT_WRITE);
262238104Sdes	start = new_start;
263238104Sdes	bzero((caddr_t) mapped, vaddr - mapped);
264238104Sdes	mapped = vaddr;
265238104Sdes
266238104Sdes	for (i = 0; i < vm_page_bucket_count; i++) {
267246854Sdes		TAILQ_INIT(bucket);
268246854Sdes		bucket++;
269238104Sdes	}
270238104Sdes
271238104Sdes	/*
272238104Sdes	 * round (or truncate) the addresses to our page size.
273238104Sdes	 */
274238104Sdes
275238104Sdes	/*
276238104Sdes	 * Pre-allocate maps and map entries that cannot be dynamically
277238104Sdes	 * allocated via malloc().  The maps include the kernel_map and
278238104Sdes	 * kmem_map which must be initialized before malloc() will work
279238104Sdes	 * (obviously).  Also could include pager maps which would be
280238104Sdes	 * allocated before kmeminit.
281238104Sdes	 *
282238104Sdes	 * Allow some kernel map entries... this should be plenty since people
283238104Sdes	 * shouldn't be cluttering up the kernel map (they should use their
284238104Sdes	 * own maps).
285238104Sdes	 */
286238104Sdes
287238104Sdes	kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
288238104Sdes	    MAX_KMAPENT * sizeof(struct vm_map_entry);
289238104Sdes	kentry_data_size = round_page(kentry_data_size);
290238104Sdes	kentry_data = (vm_offset_t) vaddr;
291238104Sdes	vaddr += kentry_data_size;
292238104Sdes
293238104Sdes	/*
294238104Sdes	 * Validate these zone addresses.
295238104Sdes	 */
296238104Sdes
297238104Sdes	new_start = start + (vaddr - mapped);
298238104Sdes	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
299238104Sdes	bzero((caddr_t) mapped, (vaddr - mapped));
300238104Sdes	start = round_page(new_start);
301238104Sdes
302238104Sdes	/*
303238104Sdes	 * Compute the number of pages of memory that will be available for
304238104Sdes	 * use (taking into account the overhead of a page structure per
305238104Sdes	 * page).
306238104Sdes	 */
307238104Sdes
308238104Sdes	first_page = phys_avail[0] / PAGE_SIZE;
309238104Sdes	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
310238104Sdes
311238104Sdes	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
312238104Sdes	npages = (total - (page_range * sizeof(struct vm_page)) -
313238104Sdes	    (start - phys_avail[biggestone])) / PAGE_SIZE;
314238104Sdes
315238104Sdes	/*
316238104Sdes	 * Initialize the mem entry structures now, and put them in the free
317238104Sdes	 * queue.
318238104Sdes	 */
319238104Sdes
320238104Sdes	vm_page_array = (vm_page_t) vaddr;
321238104Sdes	mapped = vaddr;
322238104Sdes
323238104Sdes	/*
324238104Sdes	 * Validate these addresses.
325238104Sdes	 */
326238104Sdes
327238104Sdes	new_start = round_page(start + page_range * sizeof(struct vm_page));
328238104Sdes	mapped = pmap_map(mapped, start, new_start,
329238104Sdes	    VM_PROT_READ | VM_PROT_WRITE);
330238104Sdes	start = new_start;
331238104Sdes
332238104Sdes	first_managed_page = start / PAGE_SIZE;
333238104Sdes
334238104Sdes	/*
335238104Sdes	 * Clear all of the page structures
336238104Sdes	 */
337238104Sdes	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
338238104Sdes	vm_page_array_size = page_range;
339238104Sdes
340238104Sdes	cnt.v_page_count = 0;
341238104Sdes	cnt.v_free_count = 0;
342238104Sdes	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
343238104Sdes		if (i == biggestone)
344238104Sdes			pa = ptoa(first_managed_page);
345238104Sdes		else
346238104Sdes			pa = phys_avail[i];
347238104Sdes		while (pa < phys_avail[i + 1] && npages-- > 0) {
348238104Sdes			++cnt.v_page_count;
349238104Sdes			++cnt.v_free_count;
350238104Sdes			m = PHYS_TO_VM_PAGE(pa);
351238104Sdes			m->queue = PQ_FREE;
352238104Sdes			m->flags = 0;
353238104Sdes			m->phys_addr = pa;
354238104Sdes			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
355238104Sdes			pa += PAGE_SIZE;
356238104Sdes		}
357238104Sdes	}
358238104Sdes
359238104Sdes	return (mapped);
360238104Sdes}
361238104Sdes
362238104Sdes/*
363238104Sdes *	vm_page_hash:
364238104Sdes *
365238104Sdes *	Distributes the object/offset key pair among hash buckets.
366238104Sdes *
367238104Sdes *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
368238104Sdes */
369238104Sdesstatic inline __pure int
370238104Sdesvm_page_hash(object, pindex)
371238104Sdes	vm_object_t object;
372238104Sdes	vm_pindex_t pindex;
373238104Sdes{
374238104Sdes	return ((unsigned) object + pindex) & vm_page_hash_mask;
375238104Sdes}
376238104Sdes
377238104Sdes/*
378238104Sdes *	vm_page_insert:		[ internal use only ]
379238104Sdes *
380238104Sdes *	Inserts the given mem entry into the object/object-page
381238104Sdes *	table and object list.
382238104Sdes *
383238104Sdes *	The object and page must be locked, and must be splhigh.
384238104Sdes */
385238104Sdes
386238104Sdesinline void
387238104Sdesvm_page_insert(m, object, pindex)
388238104Sdes	register vm_page_t m;
389238104Sdes	register vm_object_t object;
390238104Sdes	register vm_pindex_t pindex;
391238104Sdes{
392238104Sdes	register struct pglist *bucket;
393238104Sdes
394238104Sdes	if (m->flags & PG_TABLED)
395238104Sdes		panic("vm_page_insert: already inserted");
396238104Sdes
397238104Sdes	/*
398238104Sdes	 * Record the object/offset pair in this page
399238104Sdes	 */
400238104Sdes
401238104Sdes	m->object = object;
402238104Sdes	m->pindex = pindex;
403238104Sdes
404238104Sdes	/*
405238104Sdes	 * Insert it into the object_object/offset hash table
406238104Sdes	 */
407238104Sdes
408238104Sdes	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
409238104Sdes	TAILQ_INSERT_TAIL(bucket, m, hashq);
410238104Sdes
411238104Sdes	/*
412238104Sdes	 * Now link into the object's list of backed pages.
413238104Sdes	 */
414238104Sdes
415238104Sdes	TAILQ_INSERT_TAIL(&object->memq, m, listq);
416238104Sdes	m->flags |= PG_TABLED;
417238104Sdes
418238104Sdes	/*
419238104Sdes	 * And show that the object has one more resident page.
420238104Sdes	 */
421238104Sdes
422238104Sdes	object->resident_page_count++;
423238104Sdes}
424238104Sdes
425238104Sdes/*
426238104Sdes *	vm_page_remove:		[ internal use only ]
427238104Sdes *				NOTE: used by device pager as well -wfj
428238104Sdes *
429238104Sdes *	Removes the given mem entry from the object/offset-page
430238104Sdes *	table and the object page list.
431238104Sdes *
432238104Sdes *	The object and page must be locked, and at splhigh.
433238104Sdes */
434238104Sdes
435238104Sdesinline void
436238104Sdesvm_page_remove(m)
437238104Sdes	register vm_page_t m;
438238104Sdes{
439238104Sdes	register struct pglist *bucket;
440238104Sdes
441238104Sdes	if (!(m->flags & PG_TABLED))
442238104Sdes		return;
443238104Sdes
444238104Sdes	/*
445238104Sdes	 * Remove from the object_object/offset hash table
446238104Sdes	 */
447238104Sdes
448238104Sdes	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
449238104Sdes	TAILQ_REMOVE(bucket, m, hashq);
450238104Sdes
451238104Sdes	/*
452238104Sdes	 * Now remove from the object's list of backed pages.
453238104Sdes	 */
454238104Sdes
455238104Sdes	TAILQ_REMOVE(&m->object->memq, m, listq);
456238104Sdes
457238104Sdes	/*
458238104Sdes	 * And show that the object has one fewer resident page.
459238104Sdes	 */
460238104Sdes
461238104Sdes	m->object->resident_page_count--;
462238104Sdes
463238104Sdes	m->flags &= ~PG_TABLED;
464238104Sdes}
465238104Sdes
466238104Sdes/*
467238104Sdes *	vm_page_lookup:
468238104Sdes *
469238104Sdes *	Returns the page associated with the object/offset
470238104Sdes *	pair specified; if none is found, NULL is returned.
471 *
472 *	The object must be locked.  No side effects.
473 */
474
475vm_page_t
476vm_page_lookup(object, pindex)
477	register vm_object_t object;
478	register vm_pindex_t pindex;
479{
480	register vm_page_t m;
481	register struct pglist *bucket;
482	int s;
483
484	/*
485	 * Search the hash table for this object/offset pair
486	 */
487
488	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
489
490	s = splhigh();
491	for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) {
492		if ((m->object == object) && (m->pindex == pindex)) {
493			splx(s);
494			return (m);
495		}
496	}
497
498	splx(s);
499	return (NULL);
500}
501
502/*
503 *	vm_page_rename:
504 *
505 *	Move the given memory entry from its
506 *	current object to the specified target object/offset.
507 *
508 *	The object must be locked.
509 */
510void
511vm_page_rename(m, new_object, new_pindex)
512	register vm_page_t m;
513	register vm_object_t new_object;
514	vm_pindex_t new_pindex;
515{
516	int s;
517
518	s = splhigh();
519	vm_page_remove(m);
520	vm_page_insert(m, new_object, new_pindex);
521	splx(s);
522}
523
524/*
525 * vm_page_unqueue must be called at splhigh();
526 */
527static inline void
528vm_page_unqueue(vm_page_t m)
529{
530	int queue = m->queue;
531	if (queue == PQ_NONE)
532		return;
533	m->queue = PQ_NONE;
534	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
535	--(*vm_page_queues[queue].cnt);
536	if (queue == PQ_CACHE) {
537		if ((cnt.v_cache_count + cnt.v_free_count) <
538			(cnt.v_free_reserved + cnt.v_cache_min))
539			pagedaemon_wakeup();
540	}
541	return;
542}
543
544/*
545 *	vm_page_alloc:
546 *
547 *	Allocate and return a memory cell associated
548 *	with this VM object/offset pair.
549 *
550 *	page_req classes:
551 *	VM_ALLOC_NORMAL		normal process request
552 *	VM_ALLOC_SYSTEM		system *really* needs a page
553 *	VM_ALLOC_INTERRUPT	interrupt time request
554 *	VM_ALLOC_ZERO		zero page
555 *
556 *	Object must be locked.
557 */
558vm_page_t
559vm_page_alloc(object, pindex, page_req)
560	vm_object_t object;
561	vm_pindex_t pindex;
562	int page_req;
563{
564	register vm_page_t m;
565	int queue;
566	int s;
567
568#ifdef DIAGNOSTIC
569	m = vm_page_lookup(object, pindex);
570	if (m)
571		panic("vm_page_alloc: page already allocated");
572#endif
573
574	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
575		page_req = VM_ALLOC_SYSTEM;
576	};
577
578	s = splhigh();
579
580	switch (page_req) {
581
582	case VM_ALLOC_NORMAL:
583		if (cnt.v_free_count >= cnt.v_free_reserved) {
584			m = vm_page_queue_free.tqh_first;
585			if (m == NULL) {
586				--vm_page_zero_count;
587				m = vm_page_queue_zero.tqh_first;
588			}
589		} else {
590			m = vm_page_queue_cache.tqh_first;
591			if (m == NULL) {
592				splx(s);
593				pagedaemon_wakeup();
594				return (NULL);
595			}
596		}
597		break;
598
599	case VM_ALLOC_ZERO:
600		if (cnt.v_free_count >= cnt.v_free_reserved) {
601			m = vm_page_queue_zero.tqh_first;
602			if (m) {
603				--vm_page_zero_count;
604			} else {
605				m = vm_page_queue_free.tqh_first;
606			}
607		} else {
608			m = vm_page_queue_cache.tqh_first;
609			if (m == NULL) {
610				splx(s);
611				pagedaemon_wakeup();
612				return (NULL);
613			}
614		}
615		break;
616
617	case VM_ALLOC_SYSTEM:
618		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
619		    ((cnt.v_cache_count == 0) &&
620		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
621				m = vm_page_queue_free.tqh_first;
622				if (m == NULL) {
623					--vm_page_zero_count;
624					m = vm_page_queue_zero.tqh_first;
625				}
626		} else {
627			m = vm_page_queue_cache.tqh_first;
628			if (m == NULL) {
629				splx(s);
630				pagedaemon_wakeup();
631				return (NULL);
632			}
633		}
634		break;
635
636	case VM_ALLOC_INTERRUPT:
637		if (cnt.v_free_count > 0) {
638			m = vm_page_queue_free.tqh_first;
639			if (m == NULL) {
640				--vm_page_zero_count;
641				m = vm_page_queue_zero.tqh_first;
642			}
643		} else {
644			splx(s);
645			pagedaemon_wakeup();
646			return (NULL);
647		}
648		break;
649
650	default:
651		panic("vm_page_alloc: invalid allocation class");
652	}
653
654	queue = m->queue;
655	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
656	--(*vm_page_queues[queue].cnt);
657	if (queue == PQ_ZERO) {
658		m->flags = PG_ZERO|PG_BUSY;
659	} else if (queue == PQ_CACHE) {
660		vm_page_remove(m);
661		m->flags = PG_BUSY;
662	} else {
663		m->flags = PG_BUSY;
664	}
665	m->wire_count = 0;
666	m->hold_count = 0;
667	m->act_count = 0;
668	m->busy = 0;
669	m->valid = 0;
670	m->dirty = 0;
671	m->queue = PQ_NONE;
672
673	/* XXX before splx until vm_page_insert is safe */
674	vm_page_insert(m, object, pindex);
675
676	splx(s);
677
678	/*
679	 * Don't wakeup too often - wakeup the pageout daemon when
680	 * we would be nearly out of memory.
681	 */
682	if (((cnt.v_free_count + cnt.v_cache_count) <
683		(cnt.v_free_reserved + cnt.v_cache_min)) ||
684			(cnt.v_free_count < cnt.v_pageout_free_min))
685		pagedaemon_wakeup();
686
687	return (m);
688}
689
690/*
691 * This interface is for merging with malloc() someday.
692 * Even if we never implement compaction so that contiguous allocation
693 * works after initialization time, malloc()'s data structures are good
694 * for statistics and for allocations of less than a page.
695 */
696void *
697contigmalloc(size, type, flags, low, high, alignment, boundary)
698	unsigned long size;	/* should be size_t here and for malloc() */
699	int type;
700	int flags;
701	unsigned long low;
702	unsigned long high;
703	unsigned long alignment;
704	unsigned long boundary;
705{
706	int i, s, start;
707	vm_offset_t addr, phys, tmp_addr;
708	vm_page_t pga = vm_page_array;
709
710	size = round_page(size);
711	if (size == 0)
712		panic("vm_page_alloc_contig: size must not be 0");
713	if ((alignment & (alignment - 1)) != 0)
714		panic("vm_page_alloc_contig: alignment must be a power of 2");
715	if ((boundary & (boundary - 1)) != 0)
716		panic("vm_page_alloc_contig: boundary must be a power of 2");
717
718	start = 0;
719	s = splhigh();
720again:
721	/*
722	 * Find first page in array that is free, within range, aligned, and
723	 * such that the boundary won't be crossed.
724	 */
725	for (i = start; i < cnt.v_page_count; i++) {
726		phys = VM_PAGE_TO_PHYS(&pga[i]);
727		if ((pga[i].queue == PQ_FREE) &&
728		    (phys >= low) && (phys < high) &&
729		    ((phys & (alignment - 1)) == 0) &&
730		    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
731			break;
732	}
733
734	/*
735	 * If the above failed or we will exceed the upper bound, fail.
736	 */
737	if ((i == cnt.v_page_count) ||
738		((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
739		splx(s);
740		return (NULL);
741	}
742	start = i;
743
744	/*
745	 * Check successive pages for contiguous and free.
746	 */
747	for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
748		if ((VM_PAGE_TO_PHYS(&pga[i]) !=
749		    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
750		    (pga[i].queue != PQ_FREE)) {
751			start++;
752			goto again;
753		}
754	}
755
756	/*
757	 * We've found a contiguous chunk that meets are requirements.
758	 * Allocate kernel VM, unfree and assign the physical pages to it and
759	 * return kernel VM pointer.
760	 */
761	tmp_addr = addr = kmem_alloc_pageable(kernel_map, size);
762	if (addr == 0) {
763		splx(s);
764		return (NULL);
765	}
766
767	for (i = start; i < (start + size / PAGE_SIZE); i++) {
768		vm_page_t m = &pga[i];
769
770		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
771		cnt.v_free_count--;
772		m->valid = VM_PAGE_BITS_ALL;
773		m->flags = 0;
774		m->dirty = 0;
775		m->wire_count = 0;
776		m->act_count = 0;
777		m->busy = 0;
778		m->queue = PQ_NONE;
779		vm_page_insert(m, kernel_object,
780			OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
781		vm_page_wire(m);
782		pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
783		tmp_addr += PAGE_SIZE;
784	}
785
786	splx(s);
787	return ((void *)addr);
788}
789
790vm_offset_t
791vm_page_alloc_contig(size, low, high, alignment)
792	vm_offset_t size;
793	vm_offset_t low;
794	vm_offset_t high;
795	vm_offset_t alignment;
796{
797	return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high,
798					  alignment, 0ul));
799}
800
801/*
802 *	vm_page_free:
803 *
804 *	Returns the given page to the free list,
805 *	disassociating it with any VM object.
806 *
807 *	Object and page must be locked prior to entry.
808 */
809void
810vm_page_free(m)
811	register vm_page_t m;
812{
813	int s;
814	int flags = m->flags;
815
816	s = splhigh();
817	if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) {
818		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n",
819		    m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0);
820		if (m->queue == PQ_FREE)
821			panic("vm_page_free: freeing free page");
822		else
823			panic("vm_page_free: freeing busy page");
824	}
825
826 	if (m->hold_count) {
827 		panic("freeing held page, count=%d", m->hold_count);
828 	}
829
830	vm_page_remove(m);
831	vm_page_unqueue(m);
832
833	if ((flags & PG_FICTITIOUS) == 0) {
834		if (m->wire_count) {
835			if (m->wire_count > 1) {
836				printf("vm_page_free: wire count > 1 (%d)", m->wire_count);
837				panic("vm_page_free: invalid wire count");
838			}
839			cnt.v_wire_count--;
840			m->wire_count = 0;
841		}
842		m->queue = PQ_FREE;
843		TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
844		splx(s);
845		/*
846		 * if pageout daemon needs pages, then tell it that there are
847		 * some free.
848		 */
849		if (vm_pageout_pages_needed) {
850			wakeup(&vm_pageout_pages_needed);
851			vm_pageout_pages_needed = 0;
852		}
853
854		cnt.v_free_count++;
855		/*
856		 * wakeup processes that are waiting on memory if we hit a
857		 * high water mark. And wakeup scheduler process if we have
858		 * lots of memory. this process will swapin processes.
859		 */
860		if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
861			wakeup(&cnt.v_free_count);
862			wakeup(&proc0);
863		}
864	} else {
865		splx(s);
866	}
867	cnt.v_tfree++;
868}
869
870
871/*
872 *	vm_page_wire:
873 *
874 *	Mark this page as wired down by yet
875 *	another map, removing it from paging queues
876 *	as necessary.
877 *
878 *	The page queues must be locked.
879 */
880void
881vm_page_wire(m)
882	register vm_page_t m;
883{
884	int s;
885
886	if (m->wire_count == 0) {
887		s = splhigh();
888		vm_page_unqueue(m);
889		splx(s);
890		cnt.v_wire_count++;
891	}
892	m->wire_count++;
893	m->flags |= PG_MAPPED;
894}
895
896/*
897 *	vm_page_unwire:
898 *
899 *	Release one wiring of this page, potentially
900 *	enabling it to be paged again.
901 *
902 *	The page queues must be locked.
903 */
904void
905vm_page_unwire(m)
906	register vm_page_t m;
907{
908	int s;
909
910	s = splhigh();
911
912	if (m->wire_count > 0)
913		m->wire_count--;
914
915	if (m->wire_count == 0) {
916		cnt.v_wire_count--;
917		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
918		m->queue = PQ_ACTIVE;
919		if( m->act_count < ACT_MAX)
920			m->act_count += 1;
921		cnt.v_active_count++;
922	}
923	splx(s);
924}
925
926/*
927 *	vm_page_activate:
928 *
929 *	Put the specified page on the active list (if appropriate).
930 *
931 *	The page queues must be locked.
932 */
933void
934vm_page_activate(m)
935	register vm_page_t m;
936{
937	int s;
938
939	s = splhigh();
940	if (m->queue == PQ_ACTIVE)
941		panic("vm_page_activate: already active");
942
943	if (m->queue == PQ_CACHE)
944		cnt.v_reactivated++;
945
946	vm_page_unqueue(m);
947
948	if (m->wire_count == 0) {
949		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
950		m->queue = PQ_ACTIVE;
951		if (m->act_count < 5)
952			m->act_count = 5;
953		else if( m->act_count < ACT_MAX)
954			m->act_count += 1;
955		cnt.v_active_count++;
956	}
957	splx(s);
958}
959
960/*
961 *	vm_page_deactivate:
962 *
963 *	Returns the given page to the inactive list,
964 *	indicating that no physical maps have access
965 *	to this page.  [Used by the physical mapping system.]
966 *
967 *	The page queues must be locked.
968 */
969void
970vm_page_deactivate(m)
971	register vm_page_t m;
972{
973	int spl;
974
975	/*
976	 * Only move active pages -- ignore locked or already inactive ones.
977	 *
978	 * XXX: sometimes we get pages which aren't wired down or on any queue -
979	 * we need to put them on the inactive queue also, otherwise we lose
980	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
981	 */
982	if (m->queue == PQ_INACTIVE)
983		return;
984
985	spl = splhigh();
986	if (m->wire_count == 0 && m->hold_count == 0) {
987		if (m->queue == PQ_CACHE)
988			cnt.v_reactivated++;
989		vm_page_unqueue(m);
990		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
991		m->queue = PQ_INACTIVE;
992		cnt.v_inactive_count++;
993		m->act_count = 0;
994	}
995	splx(spl);
996}
997
998/*
999 * vm_page_cache
1000 *
1001 * Put the specified page onto the page cache queue (if appropriate).
1002 */
1003void
1004vm_page_cache(m)
1005	register vm_page_t m;
1006{
1007	int s;
1008
1009	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
1010		printf("vm_page_cache: attempting to cache busy page\n");
1011		return;
1012	}
1013	if (m->queue == PQ_CACHE)
1014		return;
1015
1016	vm_page_protect(m, VM_PROT_NONE);
1017	s = splhigh();
1018	vm_page_unqueue(m);
1019	TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq);
1020	m->queue = PQ_CACHE;
1021	cnt.v_cache_count++;
1022	if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) {
1023		wakeup(&cnt.v_free_count);
1024		wakeup(&proc0);
1025	}
1026	if (vm_pageout_pages_needed) {
1027		wakeup(&vm_pageout_pages_needed);
1028		vm_pageout_pages_needed = 0;
1029	}
1030	splx(s);
1031}
1032
1033/*
1034 *	vm_page_zero_fill:
1035 *
1036 *	Zero-fill the specified page.
1037 *	Written as a standard pagein routine, to
1038 *	be used by the zero-fill object.
1039 */
1040boolean_t
1041vm_page_zero_fill(m)
1042	vm_page_t m;
1043{
1044	pmap_zero_page(VM_PAGE_TO_PHYS(m));
1045	return (TRUE);
1046}
1047
1048/*
1049 *	vm_page_copy:
1050 *
1051 *	Copy one page to another
1052 */
1053void
1054vm_page_copy(src_m, dest_m)
1055	vm_page_t src_m;
1056	vm_page_t dest_m;
1057{
1058	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
1059	dest_m->valid = VM_PAGE_BITS_ALL;
1060}
1061
1062
1063/*
1064 * mapping function for valid bits or for dirty bits in
1065 * a page
1066 */
1067inline int
1068vm_page_bits(int base, int size)
1069{
1070	u_short chunk;
1071
1072	if ((base == 0) && (size >= PAGE_SIZE))
1073		return VM_PAGE_BITS_ALL;
1074	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1075	base = (base % PAGE_SIZE) / DEV_BSIZE;
1076	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1077	return (chunk << base) & VM_PAGE_BITS_ALL;
1078}
1079
1080/*
1081 * set a page valid and clean
1082 */
1083void
1084vm_page_set_validclean(m, base, size)
1085	vm_page_t m;
1086	int base;
1087	int size;
1088{
1089	int pagebits = vm_page_bits(base, size);
1090	m->valid |= pagebits;
1091	m->dirty &= ~pagebits;
1092	if( base == 0 && size == PAGE_SIZE)
1093		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1094}
1095
1096/*
1097 * set a page (partially) invalid
1098 */
1099void
1100vm_page_set_invalid(m, base, size)
1101	vm_page_t m;
1102	int base;
1103	int size;
1104{
1105	int bits;
1106
1107	m->valid &= ~(bits = vm_page_bits(base, size));
1108	if (m->valid == 0)
1109		m->dirty &= ~bits;
1110}
1111
1112/*
1113 * is (partial) page valid?
1114 */
1115int
1116vm_page_is_valid(m, base, size)
1117	vm_page_t m;
1118	int base;
1119	int size;
1120{
1121	int bits = vm_page_bits(base, size);
1122
1123	if (m->valid && ((m->valid & bits) == bits))
1124		return 1;
1125	else
1126		return 0;
1127}
1128
1129
1130
1131void
1132vm_page_test_dirty(m)
1133	vm_page_t m;
1134{
1135	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1136	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1137		m->dirty = VM_PAGE_BITS_ALL;
1138	}
1139}
1140
1141#ifdef DDB
1142void
1143DDB_print_page_info(void)
1144{
1145	printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1146	printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1147	printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1148	printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1149	printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1150	printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1151	printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1152	printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1153	printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1154	printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1155}
1156#endif
1157