vm_page.c revision 16562
1/*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
37 *	$Id: vm_page.c,v 1.58 1996/06/17 03:35:37 dyson Exp $
38 */
39
40/*
41 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42 * All rights reserved.
43 *
44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young
45 *
46 * Permission to use, copy, modify and distribute this software and
47 * its documentation is hereby granted, provided that both the copyright
48 * notice and this permission notice appear in all copies of the
49 * software, derivative works or modified versions, and any portions
50 * thereof, and that both notices appear in supporting documentation.
51 *
52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
53 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
55 *
56 * Carnegie Mellon requests users of this software to return to
57 *
58 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
59 *  School of Computer Science
60 *  Carnegie Mellon University
61 *  Pittsburgh PA 15213-3890
62 *
63 * any improvements or extensions that they make and grant Carnegie the
64 * rights to redistribute these changes.
65 */
66
67/*
68 *	Resident memory management module.
69 */
70#include "opt_ddb.h"
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/malloc.h>
75#include <sys/proc.h>
76#include <sys/queue.h>
77#include <sys/vmmeter.h>
78
79#include <vm/vm.h>
80#include <vm/vm_param.h>
81#include <vm/vm_prot.h>
82#include <vm/lock.h>
83#include <vm/vm_kern.h>
84#include <vm/vm_object.h>
85#include <vm/vm_page.h>
86#include <vm/vm_map.h>
87#include <vm/vm_pageout.h>
88#include <vm/vm_extern.h>
89
90#ifdef DDB
91extern void	DDB_print_page_info __P((void));
92#endif
93
94/*
95 *	Associated with page of user-allocatable memory is a
96 *	page structure.
97 */
98
99static struct pglist *vm_page_buckets;	/* Array of buckets */
100static int vm_page_bucket_count;	/* How big is array? */
101static int vm_page_hash_mask;		/* Mask for hash function */
102
103struct pglist vm_page_queue_free;
104struct pglist vm_page_queue_zero;
105struct pglist vm_page_queue_active;
106struct pglist vm_page_queue_inactive;
107struct pglist vm_page_queue_cache;
108
109int no_queue;
110
111struct {
112	struct pglist *pl;
113	int	*cnt;
114} vm_page_queues[PQ_CACHE+1] = {
115	{NULL, &no_queue},
116	{ &vm_page_queue_free, &cnt.v_free_count},
117	{ &vm_page_queue_zero, &cnt.v_free_count},
118	{ &vm_page_queue_inactive, &cnt.v_inactive_count},
119	{ &vm_page_queue_active, &cnt.v_active_count},
120	{ &vm_page_queue_cache, &cnt.v_cache_count}
121};
122
123vm_page_t vm_page_array;
124static int vm_page_array_size;
125long first_page;
126static long last_page;
127static vm_size_t page_mask;
128static int page_shift;
129int vm_page_zero_count;
130
131/*
132 * map of contiguous valid DEV_BSIZE chunks in a page
133 * (this list is valid for page sizes upto 16*DEV_BSIZE)
134 */
135static u_short vm_page_dev_bsize_chunks[] = {
136	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
137	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
138};
139
140static inline __pure int
141		vm_page_hash __P((vm_object_t object, vm_pindex_t pindex))
142		__pure2;
143
144static int vm_page_freechk_and_unqueue __P((vm_page_t m));
145static void vm_page_free_wakeup __P((void));
146
147/*
148 *	vm_set_page_size:
149 *
150 *	Sets the page size, perhaps based upon the memory
151 *	size.  Must be called before any use of page-size
152 *	dependent functions.
153 *
154 *	Sets page_shift and page_mask from cnt.v_page_size.
155 */
156void
157vm_set_page_size()
158{
159
160	if (cnt.v_page_size == 0)
161		cnt.v_page_size = DEFAULT_PAGE_SIZE;
162	page_mask = cnt.v_page_size - 1;
163	if ((page_mask & cnt.v_page_size) != 0)
164		panic("vm_set_page_size: page size not a power of two");
165	for (page_shift = 0;; page_shift++)
166		if ((1 << page_shift) == cnt.v_page_size)
167			break;
168}
169
170/*
171 *	vm_page_startup:
172 *
173 *	Initializes the resident memory module.
174 *
175 *	Allocates memory for the page cells, and
176 *	for the object/offset-to-page hash table headers.
177 *	Each page cell is initialized and placed on the free list.
178 */
179
180vm_offset_t
181vm_page_startup(starta, enda, vaddr)
182	register vm_offset_t starta;
183	vm_offset_t enda;
184	register vm_offset_t vaddr;
185{
186	register vm_offset_t mapped;
187	register vm_page_t m;
188	register struct pglist *bucket;
189	vm_size_t npages, page_range;
190	register vm_offset_t new_start;
191	int i;
192	vm_offset_t pa;
193	int nblocks;
194	vm_offset_t first_managed_page;
195
196	/* the biggest memory array is the second group of pages */
197	vm_offset_t start;
198	vm_offset_t biggestone, biggestsize;
199
200	vm_offset_t total;
201
202	total = 0;
203	biggestsize = 0;
204	biggestone = 0;
205	nblocks = 0;
206	vaddr = round_page(vaddr);
207
208	for (i = 0; phys_avail[i + 1]; i += 2) {
209		phys_avail[i] = round_page(phys_avail[i]);
210		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
211	}
212
213	for (i = 0; phys_avail[i + 1]; i += 2) {
214		int size = phys_avail[i + 1] - phys_avail[i];
215
216		if (size > biggestsize) {
217			biggestone = i;
218			biggestsize = size;
219		}
220		++nblocks;
221		total += size;
222	}
223
224	start = phys_avail[biggestone];
225
226	/*
227	 * Initialize the queue headers for the free queue, the active queue
228	 * and the inactive queue.
229	 */
230
231	TAILQ_INIT(&vm_page_queue_free);
232	TAILQ_INIT(&vm_page_queue_zero);
233	TAILQ_INIT(&vm_page_queue_active);
234	TAILQ_INIT(&vm_page_queue_inactive);
235	TAILQ_INIT(&vm_page_queue_cache);
236
237	/*
238	 * Allocate (and initialize) the hash table buckets.
239	 *
240	 * The number of buckets MUST BE a power of 2, and the actual value is
241	 * the next power of 2 greater than the number of physical pages in
242	 * the system.
243	 *
244	 * Note: This computation can be tweaked if desired.
245	 */
246	vm_page_buckets = (struct pglist *) vaddr;
247	bucket = vm_page_buckets;
248	if (vm_page_bucket_count == 0) {
249		vm_page_bucket_count = 1;
250		while (vm_page_bucket_count < atop(total))
251			vm_page_bucket_count <<= 1;
252	}
253	vm_page_hash_mask = vm_page_bucket_count - 1;
254
255	/*
256	 * Validate these addresses.
257	 */
258
259	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
260	new_start = round_page(new_start);
261	mapped = vaddr;
262	vaddr = pmap_map(mapped, start, new_start,
263	    VM_PROT_READ | VM_PROT_WRITE);
264	start = new_start;
265	bzero((caddr_t) mapped, vaddr - mapped);
266	mapped = vaddr;
267
268	for (i = 0; i < vm_page_bucket_count; i++) {
269		TAILQ_INIT(bucket);
270		bucket++;
271	}
272
273	/*
274	 * round (or truncate) the addresses to our page size.
275	 */
276
277	/*
278	 * Pre-allocate maps and map entries that cannot be dynamically
279	 * allocated via malloc().  The maps include the kernel_map and
280	 * kmem_map which must be initialized before malloc() will work
281	 * (obviously).  Also could include pager maps which would be
282	 * allocated before kmeminit.
283	 *
284	 * Allow some kernel map entries... this should be plenty since people
285	 * shouldn't be cluttering up the kernel map (they should use their
286	 * own maps).
287	 */
288
289	kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
290	    MAX_KMAPENT * sizeof(struct vm_map_entry);
291	kentry_data_size = round_page(kentry_data_size);
292	kentry_data = (vm_offset_t) vaddr;
293	vaddr += kentry_data_size;
294
295	/*
296	 * Validate these zone addresses.
297	 */
298
299	new_start = start + (vaddr - mapped);
300	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
301	bzero((caddr_t) mapped, (vaddr - mapped));
302	start = round_page(new_start);
303
304	/*
305	 * Compute the number of pages of memory that will be available for
306	 * use (taking into account the overhead of a page structure per
307	 * page).
308	 */
309
310	first_page = phys_avail[0] / PAGE_SIZE;
311	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
312
313	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
314	npages = (total - (page_range * sizeof(struct vm_page)) -
315	    (start - phys_avail[biggestone])) / PAGE_SIZE;
316
317	/*
318	 * Initialize the mem entry structures now, and put them in the free
319	 * queue.
320	 */
321
322	vm_page_array = (vm_page_t) vaddr;
323	mapped = vaddr;
324
325	/*
326	 * Validate these addresses.
327	 */
328
329	new_start = round_page(start + page_range * sizeof(struct vm_page));
330	mapped = pmap_map(mapped, start, new_start,
331	    VM_PROT_READ | VM_PROT_WRITE);
332	start = new_start;
333
334	first_managed_page = start / PAGE_SIZE;
335
336	/*
337	 * Clear all of the page structures
338	 */
339	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
340	vm_page_array_size = page_range;
341
342	cnt.v_page_count = 0;
343	cnt.v_free_count = 0;
344	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
345		if (i == biggestone)
346			pa = ptoa(first_managed_page);
347		else
348			pa = phys_avail[i];
349		while (pa < phys_avail[i + 1] && npages-- > 0) {
350			++cnt.v_page_count;
351			++cnt.v_free_count;
352			m = PHYS_TO_VM_PAGE(pa);
353			m->queue = PQ_FREE;
354			m->flags = 0;
355			m->phys_addr = pa;
356			TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
357			pa += PAGE_SIZE;
358		}
359	}
360
361	return (mapped);
362}
363
364/*
365 *	vm_page_hash:
366 *
367 *	Distributes the object/offset key pair among hash buckets.
368 *
369 *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
370 */
371static inline __pure int
372vm_page_hash(object, pindex)
373	vm_object_t object;
374	vm_pindex_t pindex;
375{
376	return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
377}
378
379/*
380 *	vm_page_insert:		[ internal use only ]
381 *
382 *	Inserts the given mem entry into the object/object-page
383 *	table and object list.
384 *
385 *	The object and page must be locked, and must be splhigh.
386 */
387
388__inline void
389vm_page_insert(m, object, pindex)
390	register vm_page_t m;
391	register vm_object_t object;
392	register vm_pindex_t pindex;
393{
394	register struct pglist *bucket;
395
396	if (m->flags & PG_TABLED)
397		panic("vm_page_insert: already inserted");
398
399	/*
400	 * Record the object/offset pair in this page
401	 */
402
403	m->object = object;
404	m->pindex = pindex;
405
406	/*
407	 * Insert it into the object_object/offset hash table
408	 */
409
410	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
411	TAILQ_INSERT_TAIL(bucket, m, hashq);
412
413	/*
414	 * Now link into the object's list of backed pages.
415	 */
416
417	TAILQ_INSERT_TAIL(&object->memq, m, listq);
418	m->flags |= PG_TABLED;
419
420	/*
421	 * And show that the object has one more resident page.
422	 */
423
424	object->resident_page_count++;
425}
426
427/*
428 *	vm_page_remove:		[ internal use only ]
429 *				NOTE: used by device pager as well -wfj
430 *
431 *	Removes the given mem entry from the object/offset-page
432 *	table and the object page list.
433 *
434 *	The object and page must be locked, and at splhigh.
435 */
436
437__inline void
438vm_page_remove(m)
439	register vm_page_t m;
440{
441	register struct pglist *bucket;
442
443	if (!(m->flags & PG_TABLED))
444		return;
445
446	/*
447	 * Remove from the object_object/offset hash table
448	 */
449
450	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
451	TAILQ_REMOVE(bucket, m, hashq);
452
453	/*
454	 * Now remove from the object's list of backed pages.
455	 */
456
457	TAILQ_REMOVE(&m->object->memq, m, listq);
458
459	/*
460	 * And show that the object has one fewer resident page.
461	 */
462
463	m->object->resident_page_count--;
464
465	m->flags &= ~PG_TABLED;
466}
467
468/*
469 *	vm_page_lookup:
470 *
471 *	Returns the page associated with the object/offset
472 *	pair specified; if none is found, NULL is returned.
473 *
474 *	The object must be locked.  No side effects.
475 */
476
477vm_page_t
478vm_page_lookup(object, pindex)
479	register vm_object_t object;
480	register vm_pindex_t pindex;
481{
482	register vm_page_t m;
483	register struct pglist *bucket;
484	int s;
485
486	/*
487	 * Search the hash table for this object/offset pair
488	 */
489
490	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
491
492	s = splvm();
493	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
494		if ((m->object == object) && (m->pindex == pindex)) {
495			splx(s);
496			return (m);
497		}
498	}
499	splx(s);
500	return (NULL);
501}
502
503/*
504 *	vm_page_rename:
505 *
506 *	Move the given memory entry from its
507 *	current object to the specified target object/offset.
508 *
509 *	The object must be locked.
510 */
511void
512vm_page_rename(m, new_object, new_pindex)
513	register vm_page_t m;
514	register vm_object_t new_object;
515	vm_pindex_t new_pindex;
516{
517	int s;
518
519	s = splvm();
520	vm_page_remove(m);
521	vm_page_insert(m, new_object, new_pindex);
522	splx(s);
523}
524
525/*
526 * vm_page_unqueue without any wakeup
527 */
528__inline void
529vm_page_unqueue_nowakeup(m)
530	vm_page_t m;
531{
532	int queue = m->queue;
533	if (queue != PQ_NONE) {
534		m->queue = PQ_NONE;
535		TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
536		--(*vm_page_queues[queue].cnt);
537	}
538}
539
540
541/*
542 * vm_page_unqueue must be called at splhigh();
543 */
544__inline void
545vm_page_unqueue(m)
546	vm_page_t m;
547{
548	int queue = m->queue;
549	if (queue != PQ_NONE) {
550		m->queue = PQ_NONE;
551		TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
552		--(*vm_page_queues[queue].cnt);
553		if (queue == PQ_CACHE) {
554			if ((cnt.v_cache_count + cnt.v_free_count) <
555				(cnt.v_free_reserved + cnt.v_cache_min))
556				pagedaemon_wakeup();
557		}
558	}
559}
560
561/*
562 *	vm_page_alloc:
563 *
564 *	Allocate and return a memory cell associated
565 *	with this VM object/offset pair.
566 *
567 *	page_req classes:
568 *	VM_ALLOC_NORMAL		normal process request
569 *	VM_ALLOC_SYSTEM		system *really* needs a page
570 *	VM_ALLOC_INTERRUPT	interrupt time request
571 *	VM_ALLOC_ZERO		zero page
572 *
573 *	Object must be locked.
574 */
575vm_page_t
576vm_page_alloc(object, pindex, page_req)
577	vm_object_t object;
578	vm_pindex_t pindex;
579	int page_req;
580{
581	register vm_page_t m;
582	int queue;
583	int s;
584
585#ifdef DIAGNOSTIC
586	m = vm_page_lookup(object, pindex);
587	if (m)
588		panic("vm_page_alloc: page already allocated");
589#endif
590
591	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
592		page_req = VM_ALLOC_SYSTEM;
593	};
594
595	s = splvm();
596
597	switch (page_req) {
598
599	case VM_ALLOC_NORMAL:
600		if (cnt.v_free_count >= cnt.v_free_reserved) {
601			m = TAILQ_FIRST(&vm_page_queue_free);
602			if (m == NULL) {
603				--vm_page_zero_count;
604				m = TAILQ_FIRST(&vm_page_queue_zero);
605			}
606		} else {
607			m = TAILQ_FIRST(&vm_page_queue_cache);
608			if (m == NULL) {
609				splx(s);
610#if defined(DIAGNOSTIC)
611				if (cnt.v_cache_count > 0)
612					printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
613#endif
614				pagedaemon_wakeup();
615				return (NULL);
616			}
617		}
618		break;
619
620	case VM_ALLOC_ZERO:
621		if (cnt.v_free_count >= cnt.v_free_reserved) {
622			m = TAILQ_FIRST(&vm_page_queue_zero);
623			if (m) {
624				--vm_page_zero_count;
625			} else {
626				m = TAILQ_FIRST(&vm_page_queue_free);
627			}
628		} else {
629			m = TAILQ_FIRST(&vm_page_queue_cache);
630			if (m == NULL) {
631				splx(s);
632#if defined(DIAGNOSTIC)
633				if (cnt.v_cache_count > 0)
634					printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
635#endif
636				pagedaemon_wakeup();
637				return (NULL);
638			}
639		}
640		break;
641
642	case VM_ALLOC_SYSTEM:
643		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
644		    ((cnt.v_cache_count == 0) &&
645		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
646				m = TAILQ_FIRST(&vm_page_queue_free);
647				if (m == NULL) {
648					--vm_page_zero_count;
649					m = TAILQ_FIRST(&vm_page_queue_zero);
650				}
651		} else {
652			m = TAILQ_FIRST(&vm_page_queue_cache);
653			if (m == NULL) {
654				splx(s);
655#if defined(DIAGNOSTIC)
656				if (cnt.v_cache_count > 0)
657					printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
658#endif
659				pagedaemon_wakeup();
660				return (NULL);
661			}
662		}
663		break;
664
665	case VM_ALLOC_INTERRUPT:
666		if (cnt.v_free_count > 0) {
667			m = TAILQ_FIRST(&vm_page_queue_free);
668			if (m == NULL) {
669				--vm_page_zero_count;
670				m = TAILQ_FIRST(&vm_page_queue_zero);
671			}
672		} else {
673			splx(s);
674			pagedaemon_wakeup();
675			return (NULL);
676		}
677		break;
678
679	default:
680		panic("vm_page_alloc: invalid allocation class");
681	}
682
683	queue = m->queue;
684	TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq);
685	--(*vm_page_queues[queue].cnt);
686	if (queue == PQ_ZERO) {
687		m->flags = PG_ZERO|PG_BUSY;
688	} else if (queue == PQ_CACHE) {
689		vm_page_remove(m);
690		m->flags = PG_BUSY;
691	} else {
692		m->flags = PG_BUSY;
693	}
694	m->wire_count = 0;
695	m->hold_count = 0;
696	m->busy = 0;
697	m->valid = 0;
698	m->dirty = 0;
699	m->queue = PQ_NONE;
700
701	/* XXX before splx until vm_page_insert is safe */
702	vm_page_insert(m, object, pindex);
703
704	splx(s);
705
706	/*
707	 * Don't wakeup too often - wakeup the pageout daemon when
708	 * we would be nearly out of memory.
709	 */
710	if (((cnt.v_free_count + cnt.v_cache_count) <
711		(cnt.v_free_reserved + cnt.v_cache_min)) ||
712			(cnt.v_free_count < cnt.v_pageout_free_min))
713		pagedaemon_wakeup();
714
715	return (m);
716}
717
718/*
719 *	vm_page_activate:
720 *
721 *	Put the specified page on the active list (if appropriate).
722 *
723 *	The page queues must be locked.
724 */
725void
726vm_page_activate(m)
727	register vm_page_t m;
728{
729	int s;
730
731	s = splvm();
732	if (m->queue == PQ_ACTIVE)
733		panic("vm_page_activate: already active");
734
735	if (m->queue == PQ_CACHE)
736		cnt.v_reactivated++;
737
738	vm_page_unqueue(m);
739
740	if (m->wire_count == 0) {
741		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
742		m->queue = PQ_ACTIVE;
743		cnt.v_active_count++;
744	}
745	splx(s);
746}
747
748/*
749 * helper routine for vm_page_free and vm_page_free_zero
750 */
751static int
752vm_page_freechk_and_unqueue(m)
753	vm_page_t m;
754{
755	if (m->busy ||
756		(m->flags & PG_BUSY) ||
757		(m->queue == PQ_FREE) ||
758		(m->hold_count != 0)) {
759		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n",
760			m->pindex, m->busy,
761			(m->flags & PG_BUSY) ? 1 : 0, m->hold_count);
762		if (m->queue == PQ_FREE)
763			panic("vm_page_free: freeing free page");
764		else
765			panic("vm_page_free: freeing busy page");
766	}
767
768	vm_page_remove(m);
769	vm_page_unqueue_nowakeup(m);
770	if ((m->flags & PG_FICTITIOUS) != 0) {
771		return 0;
772	}
773	if (m->wire_count != 0) {
774		if (m->wire_count > 1) {
775			panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
776				m->wire_count, m->pindex);
777		}
778		m->wire_count = 0;
779		cnt.v_wire_count--;
780	}
781
782	return 1;
783}
784
785/*
786 * helper routine for vm_page_free and vm_page_free_zero
787 */
788static __inline void
789vm_page_free_wakeup()
790{
791
792/*
793 * if pageout daemon needs pages, then tell it that there are
794 * some free.
795 */
796	if (vm_pageout_pages_needed) {
797		wakeup(&vm_pageout_pages_needed);
798		vm_pageout_pages_needed = 0;
799	}
800	/*
801	 * wakeup processes that are waiting on memory if we hit a
802	 * high water mark. And wakeup scheduler process if we have
803	 * lots of memory. this process will swapin processes.
804	 */
805	if (vm_pages_needed &&
806		((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
807		wakeup(&cnt.v_free_count);
808		vm_pages_needed = 0;
809	}
810}
811
812/*
813 *	vm_page_free:
814 *
815 *	Returns the given page to the free list,
816 *	disassociating it with any VM object.
817 *
818 *	Object and page must be locked prior to entry.
819 */
820void
821vm_page_free(m)
822	register vm_page_t m;
823{
824	int s;
825
826	s = splvm();
827
828	cnt.v_tfree++;
829
830	if (!vm_page_freechk_and_unqueue(m)) {
831		splx(s);
832		return;
833	}
834
835	m->queue = PQ_FREE;
836
837	/*
838	 * If the pageout process is grabbing the page, it is likely
839	 * that the page is NOT in the cache.  It is more likely that
840	 * the page will be partially in the cache if it is being
841	 * explicitly freed.
842	 */
843	if (curproc == pageproc) {
844		TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
845	} else {
846		TAILQ_INSERT_HEAD(&vm_page_queue_free, m, pageq);
847	}
848
849	cnt.v_free_count++;
850	vm_page_free_wakeup();
851	splx(s);
852}
853
854void
855vm_page_free_zero(m)
856	register vm_page_t m;
857{
858	int s;
859
860	s = splvm();
861
862	cnt.v_tfree++;
863
864	if (!vm_page_freechk_and_unqueue(m)) {
865		splx(s);
866		return;
867	}
868
869	m->queue = PQ_ZERO;
870
871	TAILQ_INSERT_HEAD(&vm_page_queue_zero, m, pageq);
872	++vm_page_zero_count;
873	cnt.v_free_count++;
874	vm_page_free_wakeup();
875	splx(s);
876}
877
878/*
879 *	vm_page_wire:
880 *
881 *	Mark this page as wired down by yet
882 *	another map, removing it from paging queues
883 *	as necessary.
884 *
885 *	The page queues must be locked.
886 */
887void
888vm_page_wire(m)
889	register vm_page_t m;
890{
891	int s;
892
893	if (m->wire_count == 0) {
894		s = splvm();
895		vm_page_unqueue(m);
896		splx(s);
897		cnt.v_wire_count++;
898	}
899	m->wire_count++;
900	m->flags |= PG_MAPPED;
901}
902
903/*
904 *	vm_page_unwire:
905 *
906 *	Release one wiring of this page, potentially
907 *	enabling it to be paged again.
908 *
909 *	The page queues must be locked.
910 */
911void
912vm_page_unwire(m)
913	register vm_page_t m;
914{
915	int s;
916
917	s = splvm();
918
919	if (m->wire_count > 0)
920		m->wire_count--;
921
922	if (m->wire_count == 0) {
923		cnt.v_wire_count--;
924		TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
925		m->queue = PQ_ACTIVE;
926		cnt.v_active_count++;
927	}
928	splx(s);
929}
930
931
932/*
933 *	vm_page_deactivate:
934 *
935 *	Returns the given page to the inactive list,
936 *	indicating that no physical maps have access
937 *	to this page.  [Used by the physical mapping system.]
938 *
939 *	The page queues must be locked.
940 */
941void
942vm_page_deactivate(m)
943	register vm_page_t m;
944{
945	int s;
946
947	/*
948	 * Only move active pages -- ignore locked or already inactive ones.
949	 *
950	 * XXX: sometimes we get pages which aren't wired down or on any queue -
951	 * we need to put them on the inactive queue also, otherwise we lose
952	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
953	 */
954	if (m->queue == PQ_INACTIVE)
955		return;
956
957	s = splvm();
958	if (m->wire_count == 0 && m->hold_count == 0) {
959		if (m->queue == PQ_CACHE)
960			cnt.v_reactivated++;
961		vm_page_unqueue(m);
962		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
963		m->queue = PQ_INACTIVE;
964		cnt.v_inactive_count++;
965	}
966	splx(s);
967}
968
969/*
970 * vm_page_cache
971 *
972 * Put the specified page onto the page cache queue (if appropriate).
973 */
974void
975vm_page_cache(m)
976	register vm_page_t m;
977{
978	int s;
979
980	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
981		printf("vm_page_cache: attempting to cache busy page\n");
982		return;
983	}
984	if (m->queue == PQ_CACHE)
985		return;
986
987	vm_page_protect(m, VM_PROT_NONE);
988	if (m->dirty != 0) {
989		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
990	}
991	s = splvm();
992	vm_page_unqueue_nowakeup(m);
993	TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq);
994	m->queue = PQ_CACHE;
995	cnt.v_cache_count++;
996	vm_page_free_wakeup();
997	splx(s);
998}
999
1000
1001/*
1002 * mapping function for valid bits or for dirty bits in
1003 * a page
1004 */
1005inline int
1006vm_page_bits(int base, int size)
1007{
1008	u_short chunk;
1009
1010	if ((base == 0) && (size >= PAGE_SIZE))
1011		return VM_PAGE_BITS_ALL;
1012	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1013	base = (base % PAGE_SIZE) / DEV_BSIZE;
1014	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1015	return (chunk << base) & VM_PAGE_BITS_ALL;
1016}
1017
1018/*
1019 * set a page valid and clean
1020 */
1021void
1022vm_page_set_validclean(m, base, size)
1023	vm_page_t m;
1024	int base;
1025	int size;
1026{
1027	int pagebits = vm_page_bits(base, size);
1028	m->valid |= pagebits;
1029	m->dirty &= ~pagebits;
1030	if( base == 0 && size == PAGE_SIZE)
1031		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1032}
1033
1034/*
1035 * set a page (partially) invalid
1036 */
1037void
1038vm_page_set_invalid(m, base, size)
1039	vm_page_t m;
1040	int base;
1041	int size;
1042{
1043	int bits;
1044
1045	m->valid &= ~(bits = vm_page_bits(base, size));
1046	if (m->valid == 0)
1047		m->dirty &= ~bits;
1048}
1049
1050/*
1051 * is (partial) page valid?
1052 */
1053int
1054vm_page_is_valid(m, base, size)
1055	vm_page_t m;
1056	int base;
1057	int size;
1058{
1059	int bits = vm_page_bits(base, size);
1060
1061	if (m->valid && ((m->valid & bits) == bits))
1062		return 1;
1063	else
1064		return 0;
1065}
1066
1067void
1068vm_page_test_dirty(m)
1069	vm_page_t m;
1070{
1071	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1072	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1073		m->dirty = VM_PAGE_BITS_ALL;
1074	}
1075}
1076
1077/*
1078 * This interface is for merging with malloc() someday.
1079 * Even if we never implement compaction so that contiguous allocation
1080 * works after initialization time, malloc()'s data structures are good
1081 * for statistics and for allocations of less than a page.
1082 */
1083void *
1084contigmalloc(size, type, flags, low, high, alignment, boundary)
1085	unsigned long size;	/* should be size_t here and for malloc() */
1086	int type;
1087	int flags;
1088	unsigned long low;
1089	unsigned long high;
1090	unsigned long alignment;
1091	unsigned long boundary;
1092{
1093	int i, s, start;
1094	vm_offset_t addr, phys, tmp_addr;
1095	vm_page_t pga = vm_page_array;
1096
1097	size = round_page(size);
1098	if (size == 0)
1099		panic("vm_page_alloc_contig: size must not be 0");
1100	if ((alignment & (alignment - 1)) != 0)
1101		panic("vm_page_alloc_contig: alignment must be a power of 2");
1102	if ((boundary & (boundary - 1)) != 0)
1103		panic("vm_page_alloc_contig: boundary must be a power of 2");
1104
1105	start = 0;
1106	s = splvm();
1107again:
1108	/*
1109	 * Find first page in array that is free, within range, aligned, and
1110	 * such that the boundary won't be crossed.
1111	 */
1112	for (i = start; i < cnt.v_page_count; i++) {
1113		phys = VM_PAGE_TO_PHYS(&pga[i]);
1114		if ((pga[i].queue == PQ_FREE) &&
1115		    (phys >= low) && (phys < high) &&
1116		    ((phys & (alignment - 1)) == 0) &&
1117		    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
1118			break;
1119	}
1120
1121	/*
1122	 * If the above failed or we will exceed the upper bound, fail.
1123	 */
1124	if ((i == cnt.v_page_count) ||
1125		((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
1126		splx(s);
1127		return (NULL);
1128	}
1129	start = i;
1130
1131	/*
1132	 * Check successive pages for contiguous and free.
1133	 */
1134	for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
1135		if ((VM_PAGE_TO_PHYS(&pga[i]) !=
1136		    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
1137		    (pga[i].queue != PQ_FREE)) {
1138			start++;
1139			goto again;
1140		}
1141	}
1142
1143	/*
1144	 * We've found a contiguous chunk that meets are requirements.
1145	 * Allocate kernel VM, unfree and assign the physical pages to it and
1146	 * return kernel VM pointer.
1147	 */
1148	tmp_addr = addr = kmem_alloc_pageable(kernel_map, size);
1149	if (addr == 0) {
1150		splx(s);
1151		return (NULL);
1152	}
1153
1154	for (i = start; i < (start + size / PAGE_SIZE); i++) {
1155		vm_page_t m = &pga[i];
1156
1157		TAILQ_REMOVE(&vm_page_queue_free, m, pageq);
1158		cnt.v_free_count--;
1159		m->valid = VM_PAGE_BITS_ALL;
1160		m->flags = 0;
1161		m->dirty = 0;
1162		m->wire_count = 0;
1163		m->busy = 0;
1164		m->queue = PQ_NONE;
1165		vm_page_insert(m, kernel_object,
1166			OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
1167		vm_page_wire(m);
1168		pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
1169		tmp_addr += PAGE_SIZE;
1170	}
1171
1172	splx(s);
1173	return ((void *)addr);
1174}
1175
1176vm_offset_t
1177vm_page_alloc_contig(size, low, high, alignment)
1178	vm_offset_t size;
1179	vm_offset_t low;
1180	vm_offset_t high;
1181	vm_offset_t alignment;
1182{
1183	return ((vm_offset_t)contigmalloc(size, M_DEVBUF, M_NOWAIT, low, high,
1184					  alignment, 0ul));
1185}
1186#ifdef DDB
1187void
1188DDB_print_page_info(void)
1189{
1190	printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1191	printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1192	printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1193	printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1194	printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1195	printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1196	printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1197	printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1198	printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1199	printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1200}
1201#endif
1202