vm_page.c revision 33936
11541Srgrimes/*
21541Srgrimes * Copyright (c) 1991 Regents of the University of California.
31541Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * This code is derived from software contributed to Berkeley by
61541Srgrimes * The Mach Operating System project at Carnegie-Mellon University.
71541Srgrimes *
81541Srgrimes * Redistribution and use in source and binary forms, with or without
91541Srgrimes * modification, are permitted provided that the following conditions
101541Srgrimes * are met:
111541Srgrimes * 1. Redistributions of source code must retain the above copyright
121541Srgrimes *    notice, this list of conditions and the following disclaimer.
131541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer in the
151541Srgrimes *    documentation and/or other materials provided with the distribution.
161541Srgrimes * 3. All advertising materials mentioning features or use of this software
171541Srgrimes *    must display the following acknowledgement:
181541Srgrimes *	This product includes software developed by the University of
191541Srgrimes *	California, Berkeley and its contributors.
201541Srgrimes * 4. Neither the name of the University nor the names of its contributors
211541Srgrimes *    may be used to endorse or promote products derived from this software
221541Srgrimes *    without specific prior written permission.
231541Srgrimes *
241541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2985051Sru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3050477Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341541Srgrimes * SUCH DAMAGE.
351541Srgrimes *
361541Srgrimes *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
3732356Seivind *	$Id: vm_page.c,v 1.93 1998/02/09 06:11:32 eivind Exp $
3832350Seivind */
3954263Sshin
4031742Seivind/*
41108041Srwatson * Copyright (c) 1987, 1990 Carnegie-Mellon University.
4231742Seivind * All rights reserved.
431541Srgrimes *
441541Srgrimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young
451541Srgrimes *
46108041Srwatson * Permission to use, copy, modify and distribute this software and
4771791Speter * its documentation is hereby granted, provided that both the copyright
481541Srgrimes * notice and this permission notice appear in all copies of the
4971862Speter * software, derivative works or modified versions, and any portions
5091648Sbrooks * thereof, and that both notices appear in supporting documentation.
5191648Sbrooks *
521541Srgrimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
5324204Sbde * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
5471791Speter * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
551541Srgrimes *
561541Srgrimes * Carnegie Mellon requests users of this software to return to
571541Srgrimes *
581541Srgrimes *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
591541Srgrimes *  School of Computer Science
601541Srgrimes *  Carnegie Mellon University
6178064Sume *  Pittsburgh PA 15213-3890
621541Srgrimes *
631541Srgrimes * any improvements or extensions that they make and grant Carnegie the
641541Srgrimes * rights to redistribute these changes.
651541Srgrimes */
661541Srgrimes
671541Srgrimes/*
6811819Sjulian *	Resident memory management module.
6911819Sjulian */
7011819Sjulian
7111819Sjulian#include <sys/param.h>
7211819Sjulian#include <sys/systm.h>
7353541Sshin#include <sys/malloc.h>
7453541Sshin#include <sys/proc.h>
7553541Sshin#include <sys/vmmeter.h>
7653541Sshin#include <sys/vnode.h>
7753541Sshin
7862587Sitojun#include <vm/vm.h>
7953541Sshin#include <vm/vm_param.h>
8053541Sshin#include <vm/vm_prot.h>
8115885Sjulian#include <sys/lock.h>
8215885Sjulian#include <vm/vm_kern.h>
8315885Sjulian#include <vm/vm_object.h>
8483268Speter#include <vm/vm_page.h>
8515885Sjulian#include <vm/vm_pageout.h>
861622Sdg#include <vm/vm_extern.h>
871541Srgrimes
8853541Sshinstatic void	vm_page_queue_init __P((void));
8953541Sshinstatic vm_page_t vm_page_select_free __P((vm_object_t object,
901622Sdg			vm_pindex_t pindex, int prefqueue));
916876Sdg
921622Sdg/*
931541Srgrimes *	Associated with page of user-allocatable memory is a
9491648Sbrooks *	page structure.
951541Srgrimes */
9671791Speter
9771791Speterstatic struct pglist *vm_page_buckets;	/* Array of buckets */
9887914Sjlemonstatic int vm_page_bucket_count;	/* How big is array? */
9971791Speterstatic int vm_page_hash_mask;		/* Mask for hash function */
10091648Sbrooksstatic volatile int vm_page_bucket_generation;
10191648Sbrooks
10291648Sbrooksstruct pglist vm_page_queue_free[PQ_L2_SIZE] = {0};
10391648Sbrooksstruct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0};
10491648Sbrooksstruct pglist vm_page_queue_active = {0};
105128209Sbrooksstruct pglist vm_page_queue_inactive = {0};
106128209Sbrooksstruct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0};
10791648Sbrooks
10891648Sbrooksstatic int no_queue=0;
10991648Sbrooks
11091648Sbrooksstruct vpgqueues vm_page_queues[PQ_COUNT] = {0};
11191648Sbrooksstatic int pqcnt[PQ_COUNT] = {0};
112126778Srwatson
11371791Speterstatic void
11471791Spetervm_page_queue_init(void) {
11597289Sbrooks	int i;
11697289Sbrooks
11791648Sbrooks	vm_page_queues[PQ_NONE].pl = NULL;
118128209Sbrooks	vm_page_queues[PQ_NONE].cnt = &no_queue;
11991648Sbrooks	for(i=0;i<PQ_L2_SIZE;i++) {
12091648Sbrooks		vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i];
12191648Sbrooks		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
12291648Sbrooks	}
12391648Sbrooks	for(i=0;i<PQ_L2_SIZE;i++) {
12491648Sbrooks		vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i];
12591648Sbrooks		vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count;
12697289Sbrooks	}
12797289Sbrooks	vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive;
12891648Sbrooks	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
129126778Srwatson
130126778Srwatson	vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active;
131126778Srwatson	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
13291648Sbrooks	for(i=0;i<PQ_L2_SIZE;i++) {
13391648Sbrooks		vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i];
13493752Sluigi		vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
13591648Sbrooks	}
13691648Sbrooks	for(i=0;i<PQ_COUNT;i++) {
137128209Sbrooks		if (vm_page_queues[i].pl) {
13892081Smux			TAILQ_INIT(vm_page_queues[i].pl);
13992081Smux		} else if (i != 0) {
14092081Smux			panic("vm_page_queue_init: queue %d is null", i);
14171791Speter		}
14271791Speter		vm_page_queues[i].lcnt = &pqcnt[i];
14371791Speter	}
144111119Simp}
14571791Speter
146121816Sbrooksvm_page_t vm_page_array = 0;
14771791Speterstatic int vm_page_array_size = 0;
14871791Speterlong first_page = 0;
14971791Speterstatic long last_page;
15071791Speterstatic vm_size_t page_mask;
15171791Speterstatic int page_shift;
15271791Speterint vm_page_zero_count = 0;
15391648Sbrooks
15471791Speter/*
15571791Speter * map of contiguous valid DEV_BSIZE chunks in a page
156126778Srwatson * (this list is valid for page sizes upto 16*DEV_BSIZE)
15771791Speter */
158126778Srwatsonstatic u_short vm_page_dev_bsize_chunks[] = {
15971791Speter	0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
16071791Speter	0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
16192081Smux};
16292081Smux
16371791Speterstatic inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex));
16471791Speterstatic int vm_page_freechk_and_unqueue __P((vm_page_t m));
16571791Speterstatic void vm_page_free_wakeup __P((void));
16671862Speter
16771862Speter/*
16871862Speter *	vm_set_page_size:
16971862Speter *
170126778Srwatson *	Sets the page size, perhaps based upon the memory
17191648Sbrooks *	size.  Must be called before any use of page-size
17291648Sbrooks *	dependent functions.
17371862Speter *
17471862Speter *	Sets page_shift and page_mask from cnt.v_page_size.
17571862Speter */
17671862Spetervoid
17771862Spetervm_set_page_size()
17871862Speter{
17971862Speter
1801541Srgrimes	if (cnt.v_page_size == 0)
18171862Speter		cnt.v_page_size = DEFAULT_PAGE_SIZE;
18271862Speter	page_mask = cnt.v_page_size - 1;
18371862Speter	if ((page_mask & cnt.v_page_size) != 0)
18471862Speter		panic("vm_set_page_size: page size not a power of two");
18571862Speter	for (page_shift = 0;; page_shift++)
18671862Speter		if ((1 << page_shift) == cnt.v_page_size)
187121596Skan			break;
18871862Speter}
18954263Sshin
1901541Srgrimes/*
1911541Srgrimes *	vm_page_startup:
1921541Srgrimes *
1931541Srgrimes *	Initializes the resident memory module.
1941541Srgrimes *
1951541Srgrimes *	Allocates memory for the page cells, and
196113255Sdes *	for the object/offset-to-page hash table headers.
197113255Sdes *	Each page cell is initialized and placed on the free list.
19836908Sjulian */
19936908Sjulian
20036908Sjulianvm_offset_t
20136908Sjulianvm_page_startup(starta, enda, vaddr)
20236908Sjulian	register vm_offset_t starta;
20353541Sshin	vm_offset_t enda;
20436908Sjulian	register vm_offset_t vaddr;
20536908Sjulian{
20636992Sjulian	register vm_offset_t mapped;
20736992Sjulian	register vm_page_t m;
20836992Sjulian	register struct pglist *bucket;
20953541Sshin	vm_size_t npages, page_range;
21036992Sjulian	register vm_offset_t new_start;
21136992Sjulian	int i;
21236994Sjulian	vm_offset_t pa;
21336992Sjulian	int nblocks;
21465454Srwatson	vm_offset_t first_managed_page;
21536992Sjulian
21636992Sjulian	/* the biggest memory array is the second group of pages */
21736992Sjulian	vm_offset_t start;
21836992Sjulian	vm_offset_t biggestone, biggestsize;
21960889Sarchie
22036908Sjulian	vm_offset_t total;
22136908Sjulian
22236908Sjulian	total = 0;
22336908Sjulian	biggestsize = 0;
22436908Sjulian	biggestone = 0;
22536908Sjulian	nblocks = 0;
22636908Sjulian	vaddr = round_page(vaddr);
22736908Sjulian
22836908Sjulian	for (i = 0; phys_avail[i + 1]; i += 2) {
22936908Sjulian		phys_avail[i] = round_page(phys_avail[i]);
23036908Sjulian		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
23136908Sjulian	}
23236908Sjulian
23336908Sjulian	for (i = 0; phys_avail[i + 1]; i += 2) {
23460889Sarchie		int size = phys_avail[i + 1] - phys_avail[i];
23536908Sjulian
23678064Sume		if (size > biggestsize) {
23760889Sarchie			biggestone = i;
23836908Sjulian			biggestsize = size;
23936908Sjulian		}
24069152Sjlemon		++nblocks;
2411541Srgrimes		total += size;
242113255Sdes	}
243121645Ssam
24436908Sjulian	start = phys_avail[biggestone];
24560889Sarchie
24610957Swollman	/*
24760889Sarchie	 * Initialize the queue headers for the free queue, the active queue
24860889Sarchie	 * and the inactive queue.
24960889Sarchie	 */
25010957Swollman
25110957Swollman	vm_page_queue_init();
25210957Swollman
25310957Swollman	/*
25410957Swollman	 * Allocate (and initialize) the hash table buckets.
25560889Sarchie	 *
2568090Spst	 * The number of buckets MUST BE a power of 2, and the actual value is
25778064Sume	 * the next power of 2 greater than the number of physical pages in
258123922Ssam	 * the system.
25978064Sume	 *
260123922Ssam	 * Note: This computation can be tweaked if desired.
26178064Sume	 */
262123922Ssam	vm_page_buckets = (struct pglist *) vaddr;
263123922Ssam	bucket = vm_page_buckets;
264123922Ssam	if (vm_page_bucket_count == 0) {
2651541Srgrimes		vm_page_bucket_count = 1;
2661541Srgrimes		while (vm_page_bucket_count < atop(total))
26736908Sjulian			vm_page_bucket_count <<= 1;
26837600Sdfr	}
26960952Sgallatin	vm_page_hash_mask = vm_page_bucket_count - 1;
27088660Sjake
27137600Sdfr	/*
27237600Sdfr	 * Validate these addresses.
27360952Sgallatin	 */
27461181Smjacob
27560952Sgallatin	new_start = start + vm_page_bucket_count * sizeof(struct pglist);
27660952Sgallatin	new_start = round_page(new_start);
27760952Sgallatin	mapped = vaddr;
27860952Sgallatin	vaddr = pmap_map(mapped, start, new_start,
27960952Sgallatin	    VM_PROT_READ | VM_PROT_WRITE);
28060952Sgallatin	start = new_start;
28137600Sdfr	bzero((caddr_t) mapped, vaddr - mapped);
28237600Sdfr	mapped = vaddr;
28336908Sjulian
28460889Sarchie	for (i = 0; i < vm_page_bucket_count; i++) {
28560889Sarchie		TAILQ_INIT(bucket);
2861541Srgrimes		bucket++;
2871541Srgrimes	}
2881541Srgrimes
2891541Srgrimes	/*
2901541Srgrimes	 * Validate these zone addresses.
29153541Sshin	 */
29253541Sshin
29353541Sshin	new_start = start + (vaddr - mapped);
29453541Sshin	pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE);
29553541Sshin	bzero((caddr_t) mapped, (vaddr - mapped));
29653541Sshin	start = round_page(new_start);
29711819Sjulian
29811819Sjulian	/*
29911819Sjulian	 * Compute the number of pages of memory that will be available for
30011819Sjulian	 * use (taking into account the overhead of a page structure per
30111819Sjulian	 * page).
30215885Sjulian	 */
30315885Sjulian
304111888Sjlemon	first_page = phys_avail[0] / PAGE_SIZE;
30515885Sjulian	last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE;
30683268Speter
3071541Srgrimes	page_range = last_page - (phys_avail[0] / PAGE_SIZE);
30860889Sarchie	npages = (total - (page_range * sizeof(struct vm_page)) -
3091541Srgrimes	    (start - phys_avail[biggestone])) / PAGE_SIZE;
3101541Srgrimes
3111541Srgrimes	/*
3121541Srgrimes	 * Initialize the mem entry structures now, and put them in the free
3131541Srgrimes	 * queue.
314121698Ssam	 */
3151541Srgrimes
3161541Srgrimes	vm_page_array = (vm_page_t) vaddr;
3171541Srgrimes	mapped = vaddr;
3181541Srgrimes
31912706Sphk	/*
32085074Sru	 * Validate these addresses.
3211541Srgrimes	 */
3221541Srgrimes
32385074Sru	new_start = round_page(start + page_range * sizeof(struct vm_page));
3241541Srgrimes	mapped = pmap_map(mapped, start, new_start,
325120727Ssam	    VM_PROT_READ | VM_PROT_WRITE);
326122922Sandre	start = new_start;
327122922Sandre
3281541Srgrimes	first_managed_page = start / PAGE_SIZE;
3291541Srgrimes
3301541Srgrimes	/*
3311541Srgrimes	 * Clear all of the page structures
3321541Srgrimes	 */
3331541Srgrimes	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
33454263Sshin	vm_page_array_size = page_range;
3351541Srgrimes
3361541Srgrimes	cnt.v_page_count = 0;
33736735Sdfr	cnt.v_free_count = 0;
3381541Srgrimes	for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
3391541Srgrimes		if (i == biggestone)
3401541Srgrimes			pa = ptoa(first_managed_page);
3411944Sdg		else
3421541Srgrimes			pa = phys_avail[i];
3431541Srgrimes		while (pa < phys_avail[i + 1] && npages-- > 0) {
3441541Srgrimes			++cnt.v_page_count;
3451541Srgrimes			++cnt.v_free_count;
3461541Srgrimes			m = PHYS_TO_VM_PAGE(pa);
34716512Swollman			m->phys_addr = pa;
3481541Srgrimes			m->flags = 0;
34913928Swollman			m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
3501541Srgrimes			m->queue = PQ_FREE + m->pc;
3511541Srgrimes			TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
3521541Srgrimes			++(*vm_page_queues[m->queue].lcnt);
3531541Srgrimes			pa += PAGE_SIZE;
3541541Srgrimes		}
3551541Srgrimes	}
3561541Srgrimes	return (mapped);
3571541Srgrimes}
3581541Srgrimes
3591541Srgrimes/*
3601541Srgrimes *	vm_page_hash:
3611541Srgrimes *
3621541Srgrimes *	Distributes the object/offset key pair among hash buckets.
3631541Srgrimes *
3641541Srgrimes *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
3651541Srgrimes */
3661541Srgrimesstatic inline int
36753541Sshinvm_page_hash(object, pindex)
36853541Sshin	vm_object_t object;
36953541Sshin	vm_pindex_t pindex;
37053541Sshin{
3711541Srgrimes	return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
3721541Srgrimes}
3731541Srgrimes
3741541Srgrimes/*
3751541Srgrimes *	vm_page_insert:		[ internal use only ]
3761541Srgrimes *
3771541Srgrimes *	Inserts the given mem entry into the object/object-page
3781944Sdg *	table and object list.
37949468Sbrian *
3801944Sdg *	The object and page must be locked, and must be splhigh.
3811944Sdg */
38235563Sphk
38335563Sphkvoid
38435563Sphkvm_page_insert(m, object, pindex)
3851541Srgrimes	register vm_page_t m;
3861541Srgrimes	register vm_object_t object;
3871541Srgrimes	register vm_pindex_t pindex;
3881541Srgrimes{
3891541Srgrimes	register struct pglist *bucket;
390
391#if !defined(MAX_PERF)
392	if (m->flags & PG_TABLED)
393		panic("vm_page_insert: already inserted");
394#endif
395
396	/*
397	 * Record the object/offset pair in this page
398	 */
399
400	m->object = object;
401	m->pindex = pindex;
402
403	/*
404	 * Insert it into the object_object/offset hash table
405	 */
406
407	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
408	TAILQ_INSERT_TAIL(bucket, m, hashq);
409	vm_page_bucket_generation++;
410
411	/*
412	 * Now link into the object's list of backed pages.
413	 */
414
415	TAILQ_INSERT_TAIL(&object->memq, m, listq);
416	m->flags |= PG_TABLED;
417	m->object->page_hint = m;
418	m->object->generation++;
419
420	if (m->wire_count)
421		object->wire_count++;
422
423	if ((m->queue - m->pc) == PQ_CACHE)
424		object->cache_count++;
425
426	/*
427	 * And show that the object has one more resident page.
428	 */
429
430	object->resident_page_count++;
431}
432
433/*
434 *	vm_page_remove:		[ internal use only ]
435 *				NOTE: used by device pager as well -wfj
436 *
437 *	Removes the given mem entry from the object/offset-page
438 *	table and the object page list.
439 *
440 *	The object and page must be locked, and at splhigh.
441 */
442
443void
444vm_page_remove(m)
445	register vm_page_t m;
446{
447	register struct pglist *bucket;
448	vm_object_t object;
449
450	if (!(m->flags & PG_TABLED))
451		return;
452
453#if !defined(MAX_PERF)
454	if ((m->flags & PG_BUSY) == 0) {
455		panic("vm_page_remove: page not busy");
456	}
457#endif
458
459	m->flags &= ~PG_BUSY;
460	if (m->flags & PG_WANTED) {
461		m->flags &= ~PG_WANTED;
462		wakeup(m);
463	}
464
465	object = m->object;
466	if (object->page_hint == m)
467		object->page_hint = NULL;
468
469	if (m->wire_count)
470		object->wire_count--;
471
472	if ((m->queue - m->pc) == PQ_CACHE)
473		object->cache_count--;
474
475	/*
476	 * Remove from the object_object/offset hash table
477	 */
478
479	bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
480	TAILQ_REMOVE(bucket, m, hashq);
481	vm_page_bucket_generation++;
482
483	/*
484	 * Now remove from the object's list of backed pages.
485	 */
486
487	TAILQ_REMOVE(&object->memq, m, listq);
488
489	/*
490	 * And show that the object has one fewer resident page.
491	 */
492
493	object->resident_page_count--;
494	object->generation++;
495	m->object = NULL;
496
497	m->flags &= ~PG_TABLED;
498}
499
500/*
501 *	vm_page_lookup:
502 *
503 *	Returns the page associated with the object/offset
504 *	pair specified; if none is found, NULL is returned.
505 *
506 *	The object must be locked.  No side effects.
507 */
508
509vm_page_t
510vm_page_lookup(object, pindex)
511	register vm_object_t object;
512	register vm_pindex_t pindex;
513{
514	register vm_page_t m;
515	register struct pglist *bucket;
516	int generation;
517	int s;
518
519	/*
520	 * Search the hash table for this object/offset pair
521	 */
522
523	if (object->page_hint && (object->page_hint->pindex == pindex) &&
524		(object->page_hint->object == object))
525		return object->page_hint;
526
527retry:
528	generation = vm_page_bucket_generation;
529	bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
530	for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
531		if ((m->object == object) && (m->pindex == pindex)) {
532			if (vm_page_bucket_generation != generation)
533				goto retry;
534			m->object->page_hint = m;
535			return (m);
536		}
537	}
538	if (vm_page_bucket_generation != generation)
539		goto retry;
540	return (NULL);
541}
542
543/*
544 *	vm_page_rename:
545 *
546 *	Move the given memory entry from its
547 *	current object to the specified target object/offset.
548 *
549 *	The object must be locked.
550 */
551void
552vm_page_rename(m, new_object, new_pindex)
553	register vm_page_t m;
554	register vm_object_t new_object;
555	vm_pindex_t new_pindex;
556{
557	int s;
558
559	s = splvm();
560	vm_page_remove(m);
561	vm_page_insert(m, new_object, new_pindex);
562	splx(s);
563}
564
565/*
566 * vm_page_unqueue without any wakeup
567 */
568void
569vm_page_unqueue_nowakeup(m)
570	vm_page_t m;
571{
572	int queue = m->queue;
573	struct vpgqueues *pq;
574	if (queue != PQ_NONE) {
575		pq = &vm_page_queues[queue];
576		m->queue = PQ_NONE;
577		TAILQ_REMOVE(pq->pl, m, pageq);
578		(*pq->cnt)--;
579		(*pq->lcnt)--;
580		if ((queue - m->pc) == PQ_CACHE) {
581			if (m->object)
582				m->object->cache_count--;
583		}
584	}
585}
586
587/*
588 * vm_page_unqueue must be called at splhigh();
589 */
590void
591vm_page_unqueue(m)
592	vm_page_t m;
593{
594	int queue = m->queue;
595	struct vpgqueues *pq;
596	if (queue != PQ_NONE) {
597		m->queue = PQ_NONE;
598		pq = &vm_page_queues[queue];
599		TAILQ_REMOVE(pq->pl, m, pageq);
600		(*pq->cnt)--;
601		(*pq->lcnt)--;
602		if ((queue - m->pc) == PQ_CACHE) {
603			if ((cnt.v_cache_count + cnt.v_free_count) <
604				(cnt.v_free_reserved + cnt.v_cache_min))
605				pagedaemon_wakeup();
606			if (m->object)
607				m->object->cache_count--;
608		}
609	}
610}
611
612/*
613 * Find a page on the specified queue with color optimization.
614 */
615vm_page_t
616vm_page_list_find(basequeue, index)
617	int basequeue, index;
618{
619#if PQ_L2_SIZE > 1
620
621	int i,j;
622	vm_page_t m;
623	int hindex;
624	struct vpgqueues *pq;
625
626	pq = &vm_page_queues[basequeue];
627
628	m = TAILQ_FIRST(pq[index].pl);
629	if (m)
630		return m;
631
632	for(j = 0; j < PQ_L1_SIZE; j++) {
633		int ij;
634		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
635			(ij = i + j) > 0;
636			i -= PQ_L1_SIZE) {
637
638			hindex = index + ij;
639			if (hindex >= PQ_L2_SIZE)
640				hindex -= PQ_L2_SIZE;
641			if (m = TAILQ_FIRST(pq[hindex].pl))
642				return m;
643
644			hindex = index - ij;
645			if (hindex < 0)
646				hindex += PQ_L2_SIZE;
647			if (m = TAILQ_FIRST(pq[hindex].pl))
648				return m;
649		}
650	}
651
652	hindex = index + PQ_L2_SIZE / 2;
653	if (hindex >= PQ_L2_SIZE)
654		hindex -= PQ_L2_SIZE;
655	m = TAILQ_FIRST(pq[hindex].pl);
656	if (m)
657		return m;
658
659	return NULL;
660#else
661	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
662#endif
663
664}
665
666/*
667 * Find a page on the specified queue with color optimization.
668 */
669vm_page_t
670vm_page_select(object, pindex, basequeue)
671	vm_object_t object;
672	vm_pindex_t pindex;
673	int basequeue;
674{
675
676#if PQ_L2_SIZE > 1
677	int index;
678	index = (pindex + object->pg_color) & PQ_L2_MASK;
679	return vm_page_list_find(basequeue, index);
680
681#else
682	return TAILQ_FIRST(vm_page_queues[basequeue].pl);
683#endif
684
685}
686
687/*
688 * Find a free or zero page, with specified preference.
689 */
690static vm_page_t
691vm_page_select_free(object, pindex, prefqueue)
692	vm_object_t object;
693	vm_pindex_t pindex;
694	int prefqueue;
695{
696#if PQ_L2_SIZE > 1
697	int i,j;
698	int index, hindex;
699#endif
700	vm_page_t m, mh;
701	int oqueuediff;
702	struct vpgqueues *pq;
703
704	if (prefqueue == PQ_ZERO)
705		oqueuediff = PQ_FREE - PQ_ZERO;
706	else
707		oqueuediff = PQ_ZERO - PQ_FREE;
708
709	if (mh = object->page_hint) {
710		 if (mh->pindex == (pindex - 1)) {
711			if ((mh->flags & PG_FICTITIOUS) == 0) {
712				if ((mh < &vm_page_array[cnt.v_page_count-1]) &&
713					(mh >= &vm_page_array[0])) {
714					int queue;
715					m = mh + 1;
716					if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) {
717						queue = m->queue - m->pc;
718						if (queue == PQ_FREE || queue == PQ_ZERO) {
719							return m;
720						}
721					}
722				}
723			}
724		}
725	}
726
727	pq = &vm_page_queues[prefqueue];
728
729#if PQ_L2_SIZE > 1
730
731	index = (pindex + object->pg_color) & PQ_L2_MASK;
732
733	if (m = TAILQ_FIRST(pq[index].pl))
734		return m;
735	if (m = TAILQ_FIRST(pq[index + oqueuediff].pl))
736		return m;
737
738	for(j = 0; j < PQ_L1_SIZE; j++) {
739		int ij;
740		for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE;
741			(ij = i + j) >= 0;
742			i -= PQ_L1_SIZE) {
743
744			hindex = index + ij;
745			if (hindex >= PQ_L2_SIZE)
746				hindex -= PQ_L2_SIZE;
747			if (m = TAILQ_FIRST(pq[hindex].pl))
748				return m;
749			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
750				return m;
751
752			hindex = index - ij;
753			if (hindex < 0)
754				hindex += PQ_L2_SIZE;
755			if (m = TAILQ_FIRST(pq[hindex].pl))
756				return m;
757			if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl))
758				return m;
759		}
760	}
761
762	hindex = index + PQ_L2_SIZE / 2;
763	if (hindex >= PQ_L2_SIZE)
764		hindex -= PQ_L2_SIZE;
765	if (m = TAILQ_FIRST(pq[hindex].pl))
766		return m;
767	if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl))
768		return m;
769
770#else
771	if (m = TAILQ_FIRST(pq[0].pl))
772		return m;
773	else
774		return TAILQ_FIRST(pq[oqueuediff].pl);
775#endif
776
777	return NULL;
778}
779
780/*
781 *	vm_page_alloc:
782 *
783 *	Allocate and return a memory cell associated
784 *	with this VM object/offset pair.
785 *
786 *	page_req classes:
787 *	VM_ALLOC_NORMAL		normal process request
788 *	VM_ALLOC_SYSTEM		system *really* needs a page
789 *	VM_ALLOC_INTERRUPT	interrupt time request
790 *	VM_ALLOC_ZERO		zero page
791 *
792 *	Object must be locked.
793 */
794vm_page_t
795vm_page_alloc(object, pindex, page_req)
796	vm_object_t object;
797	vm_pindex_t pindex;
798	int page_req;
799{
800	register vm_page_t m;
801	struct vpgqueues *pq;
802	vm_object_t oldobject;
803	int queue, qtype;
804	int s;
805
806#ifdef DIAGNOSTIC
807	m = vm_page_lookup(object, pindex);
808	if (m)
809		panic("vm_page_alloc: page already allocated");
810#endif
811
812	if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
813		page_req = VM_ALLOC_SYSTEM;
814	};
815
816	s = splvm();
817
818	switch (page_req) {
819
820	case VM_ALLOC_NORMAL:
821		if (cnt.v_free_count >= cnt.v_free_reserved) {
822			m = vm_page_select_free(object, pindex, PQ_FREE);
823#if defined(DIAGNOSTIC)
824			if (m == NULL)
825				panic("vm_page_alloc(NORMAL): missing page on free queue\n");
826#endif
827		} else {
828			m = vm_page_select(object, pindex, PQ_CACHE);
829			if (m == NULL) {
830				splx(s);
831#if defined(DIAGNOSTIC)
832				if (cnt.v_cache_count > 0)
833					printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count);
834#endif
835				vm_pageout_deficit++;
836				pagedaemon_wakeup();
837				return (NULL);
838			}
839		}
840		break;
841
842	case VM_ALLOC_ZERO:
843		if (cnt.v_free_count >= cnt.v_free_reserved) {
844			m = vm_page_select_free(object, pindex, PQ_ZERO);
845#if defined(DIAGNOSTIC)
846			if (m == NULL)
847				panic("vm_page_alloc(ZERO): missing page on free queue\n");
848#endif
849		} else {
850			m = vm_page_select(object, pindex, PQ_CACHE);
851			if (m == NULL) {
852				splx(s);
853#if defined(DIAGNOSTIC)
854				if (cnt.v_cache_count > 0)
855					printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count);
856#endif
857				vm_pageout_deficit++;
858				pagedaemon_wakeup();
859				return (NULL);
860			}
861		}
862		break;
863
864	case VM_ALLOC_SYSTEM:
865		if ((cnt.v_free_count >= cnt.v_free_reserved) ||
866		    ((cnt.v_cache_count == 0) &&
867		    (cnt.v_free_count >= cnt.v_interrupt_free_min))) {
868			m = vm_page_select_free(object, pindex, PQ_FREE);
869#if defined(DIAGNOSTIC)
870			if (m == NULL)
871				panic("vm_page_alloc(SYSTEM): missing page on free queue\n");
872#endif
873		} else {
874			m = vm_page_select(object, pindex, PQ_CACHE);
875			if (m == NULL) {
876				splx(s);
877#if defined(DIAGNOSTIC)
878				if (cnt.v_cache_count > 0)
879					printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count);
880#endif
881				vm_pageout_deficit++;
882				pagedaemon_wakeup();
883				return (NULL);
884			}
885		}
886		break;
887
888	case VM_ALLOC_INTERRUPT:
889		if (cnt.v_free_count > 0) {
890			m = vm_page_select_free(object, pindex, PQ_FREE);
891#if defined(DIAGNOSTIC)
892			if (m == NULL)
893				panic("vm_page_alloc(INTERRUPT): missing page on free queue\n");
894#endif
895		} else {
896			splx(s);
897			vm_pageout_deficit++;
898			pagedaemon_wakeup();
899			return (NULL);
900		}
901		break;
902
903	default:
904		m = NULL;
905#if !defined(MAX_PERF)
906		panic("vm_page_alloc: invalid allocation class");
907#endif
908	}
909
910	queue = m->queue;
911	qtype = queue - m->pc;
912	if (qtype == PQ_ZERO)
913		vm_page_zero_count--;
914	pq = &vm_page_queues[queue];
915	TAILQ_REMOVE(pq->pl, m, pageq);
916	(*pq->cnt)--;
917	(*pq->lcnt)--;
918	oldobject = NULL;
919	if (qtype == PQ_ZERO) {
920		m->flags = PG_ZERO | PG_BUSY;
921	} else if (qtype == PQ_CACHE) {
922		oldobject = m->object;
923		m->flags |= PG_BUSY;
924		vm_page_remove(m);
925		m->flags = PG_BUSY;
926	} else {
927		m->flags = PG_BUSY;
928	}
929	m->wire_count = 0;
930	m->hold_count = 0;
931	m->act_count = 0;
932	m->busy = 0;
933	m->valid = 0;
934	m->dirty = 0;
935	m->queue = PQ_NONE;
936
937	/* XXX before splx until vm_page_insert is safe */
938	vm_page_insert(m, object, pindex);
939
940	/*
941	 * Don't wakeup too often - wakeup the pageout daemon when
942	 * we would be nearly out of memory.
943	 */
944	if (((cnt.v_free_count + cnt.v_cache_count) <
945		(cnt.v_free_reserved + cnt.v_cache_min)) ||
946			(cnt.v_free_count < cnt.v_pageout_free_min))
947		pagedaemon_wakeup();
948
949	if ((qtype == PQ_CACHE) &&
950		((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) &&
951		oldobject && (oldobject->type == OBJT_VNODE) &&
952		((oldobject->flags & OBJ_DEAD) == 0)) {
953		struct vnode *vp;
954		vp = (struct vnode *) oldobject->handle;
955		if (vp && VSHOULDFREE(vp)) {
956			if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) {
957				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
958				vp->v_flag |= VTBFREE;
959			}
960		}
961	}
962	splx(s);
963
964	return (m);
965}
966
967void
968vm_wait()
969{
970	int s;
971
972	s = splvm();
973	if (curproc == pageproc) {
974		vm_pageout_pages_needed = 1;
975		tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
976	} else {
977		if (!vm_pages_needed) {
978			vm_pages_needed++;
979			wakeup(&vm_pages_needed);
980		}
981		tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
982	}
983	splx(s);
984}
985
986int
987vm_page_sleep(vm_page_t m, char *msg, char *busy) {
988	vm_object_t object = m->object;
989	int generation = object->generation;
990	if ((busy && *busy) || (m->flags & PG_BUSY)) {
991		int s;
992		s = splvm();
993		if ((busy && *busy) || (m->flags & PG_BUSY)) {
994			m->flags |= PG_WANTED;
995			tsleep(m, PVM, msg, 800);
996		}
997		splx(s);
998	}
999	return ((generation != object->generation) || (busy && *busy) ||
1000		(m->flags & PG_BUSY));
1001}
1002
1003/*
1004 *	vm_page_activate:
1005 *
1006 *	Put the specified page on the active list (if appropriate).
1007 *
1008 *	The page queues must be locked.
1009 */
1010void
1011vm_page_activate(m)
1012	register vm_page_t m;
1013{
1014	int s;
1015	vm_page_t np;
1016	vm_object_t object;
1017
1018	s = splvm();
1019	if (m->queue != PQ_ACTIVE) {
1020		if ((m->queue - m->pc) == PQ_CACHE)
1021			cnt.v_reactivated++;
1022
1023		vm_page_unqueue(m);
1024
1025		if (m->wire_count == 0) {
1026			m->queue = PQ_ACTIVE;
1027			++(*vm_page_queues[PQ_ACTIVE].lcnt);
1028			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1029			if (m->act_count < ACT_INIT)
1030				m->act_count = ACT_INIT;
1031			cnt.v_active_count++;
1032		}
1033	} else {
1034		if (m->act_count < ACT_INIT)
1035			m->act_count = ACT_INIT;
1036	}
1037
1038	object = m->object;
1039	TAILQ_REMOVE(&object->memq, m, listq);
1040	TAILQ_INSERT_TAIL(&object->memq, m, listq);
1041	object->generation++;
1042
1043	splx(s);
1044}
1045
1046/*
1047 * helper routine for vm_page_free and vm_page_free_zero
1048 */
1049static int
1050vm_page_freechk_and_unqueue(m)
1051	vm_page_t m;
1052{
1053	vm_object_t oldobject;
1054
1055	oldobject = m->object;
1056
1057#if !defined(MAX_PERF)
1058	if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
1059		(m->hold_count != 0)) {
1060		printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n",
1061			m->pindex, m->busy,
1062			(m->flags & PG_BUSY) ? 1 : 0, m->hold_count);
1063		if ((m->queue - m->pc) == PQ_FREE)
1064			panic("vm_page_free: freeing free page");
1065		else
1066			panic("vm_page_free: freeing busy page");
1067	}
1068#endif
1069
1070	vm_page_unqueue_nowakeup(m);
1071	vm_page_remove(m);
1072
1073	if ((m->flags & PG_FICTITIOUS) != 0) {
1074		return 0;
1075	}
1076
1077	m->valid = 0;
1078
1079	if (m->wire_count != 0) {
1080#if !defined(MAX_PERF)
1081		if (m->wire_count > 1) {
1082			panic("vm_page_free: invalid wire count (%d), pindex: 0x%x",
1083				m->wire_count, m->pindex);
1084		}
1085#endif
1086		m->wire_count = 0;
1087		if (m->object)
1088			m->object->wire_count--;
1089		cnt.v_wire_count--;
1090	}
1091
1092	if (oldobject && (oldobject->type == OBJT_VNODE) &&
1093		((oldobject->flags & OBJ_DEAD) == 0)) {
1094		struct vnode *vp;
1095		vp = (struct vnode *) oldobject->handle;
1096		if (vp && VSHOULDFREE(vp)) {
1097			if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) {
1098				TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
1099				vp->v_flag |= VTBFREE;
1100			}
1101		}
1102	}
1103
1104	return 1;
1105}
1106
1107/*
1108 * helper routine for vm_page_free and vm_page_free_zero
1109 */
1110static __inline void
1111vm_page_free_wakeup()
1112{
1113
1114/*
1115 * if pageout daemon needs pages, then tell it that there are
1116 * some free.
1117 */
1118	if (vm_pageout_pages_needed) {
1119		wakeup(&vm_pageout_pages_needed);
1120		vm_pageout_pages_needed = 0;
1121	}
1122	/*
1123	 * wakeup processes that are waiting on memory if we hit a
1124	 * high water mark. And wakeup scheduler process if we have
1125	 * lots of memory. this process will swapin processes.
1126	 */
1127	if (vm_pages_needed &&
1128		((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
1129		wakeup(&cnt.v_free_count);
1130		vm_pages_needed = 0;
1131	}
1132}
1133
1134/*
1135 *	vm_page_free:
1136 *
1137 *	Returns the given page to the free list,
1138 *	disassociating it with any VM object.
1139 *
1140 *	Object and page must be locked prior to entry.
1141 */
1142void
1143vm_page_free(m)
1144	register vm_page_t m;
1145{
1146	int s;
1147	struct vpgqueues *pq;
1148
1149	s = splvm();
1150
1151	cnt.v_tfree++;
1152
1153	if (!vm_page_freechk_and_unqueue(m)) {
1154		splx(s);
1155		return;
1156	}
1157
1158	m->queue = PQ_FREE + m->pc;
1159	pq = &vm_page_queues[m->queue];
1160	++(*pq->lcnt);
1161	++(*pq->cnt);
1162	/*
1163	 * If the pageout process is grabbing the page, it is likely
1164	 * that the page is NOT in the cache.  It is more likely that
1165	 * the page will be partially in the cache if it is being
1166	 * explicitly freed.
1167	 */
1168	if (curproc == pageproc) {
1169		TAILQ_INSERT_TAIL(pq->pl, m, pageq);
1170	} else {
1171		TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1172	}
1173
1174	vm_page_free_wakeup();
1175	splx(s);
1176}
1177
1178void
1179vm_page_free_zero(m)
1180	register vm_page_t m;
1181{
1182	int s;
1183	struct vpgqueues *pq;
1184
1185	s = splvm();
1186
1187	cnt.v_tfree++;
1188
1189	if (!vm_page_freechk_and_unqueue(m)) {
1190		splx(s);
1191		return;
1192	}
1193
1194	m->queue = PQ_ZERO + m->pc;
1195	pq = &vm_page_queues[m->queue];
1196	++(*pq->lcnt);
1197	++(*pq->cnt);
1198
1199	TAILQ_INSERT_HEAD(pq->pl, m, pageq);
1200	++vm_page_zero_count;
1201	vm_page_free_wakeup();
1202	splx(s);
1203}
1204
1205/*
1206 *	vm_page_wire:
1207 *
1208 *	Mark this page as wired down by yet
1209 *	another map, removing it from paging queues
1210 *	as necessary.
1211 *
1212 *	The page queues must be locked.
1213 */
1214void
1215vm_page_wire(m)
1216	register vm_page_t m;
1217{
1218	int s;
1219
1220	if (m->wire_count == 0) {
1221		s = splvm();
1222		vm_page_unqueue(m);
1223		splx(s);
1224		cnt.v_wire_count++;
1225		if (m->object)
1226			m->object->wire_count++;
1227	}
1228	(*vm_page_queues[PQ_NONE].lcnt)++;
1229	m->wire_count++;
1230	m->flags |= PG_MAPPED;
1231}
1232
1233/*
1234 *	vm_page_unwire:
1235 *
1236 *	Release one wiring of this page, potentially
1237 *	enabling it to be paged again.
1238 *
1239 *	The page queues must be locked.
1240 */
1241void
1242vm_page_unwire(m)
1243	register vm_page_t m;
1244{
1245	int s;
1246
1247	s = splvm();
1248
1249	if (m->wire_count > 0) {
1250		m->wire_count--;
1251		if (m->wire_count == 0) {
1252			if (m->object)
1253				m->object->wire_count--;
1254			cnt.v_wire_count--;
1255			TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
1256			m->queue = PQ_ACTIVE;
1257			(*vm_page_queues[PQ_ACTIVE].lcnt)++;
1258			cnt.v_active_count++;
1259		}
1260	} else {
1261#if !defined(MAX_PERF)
1262		panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count);
1263#endif
1264	}
1265	splx(s);
1266}
1267
1268
1269/*
1270 *	vm_page_deactivate:
1271 *
1272 *	Returns the given page to the inactive list,
1273 *	indicating that no physical maps have access
1274 *	to this page.  [Used by the physical mapping system.]
1275 *
1276 *	The page queues must be locked.
1277 */
1278void
1279vm_page_deactivate(m)
1280	register vm_page_t m;
1281{
1282	int s;
1283
1284	/*
1285	 * Only move active pages -- ignore locked or already inactive ones.
1286	 *
1287	 * XXX: sometimes we get pages which aren't wired down or on any queue -
1288	 * we need to put them on the inactive queue also, otherwise we lose
1289	 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
1290	 */
1291	if (m->queue == PQ_INACTIVE)
1292		return;
1293
1294	s = splvm();
1295	if (m->wire_count == 0 && m->hold_count == 0) {
1296		if ((m->queue - m->pc) == PQ_CACHE)
1297			cnt.v_reactivated++;
1298		vm_page_unqueue(m);
1299		TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
1300		m->queue = PQ_INACTIVE;
1301		++(*vm_page_queues[PQ_INACTIVE].lcnt);
1302		cnt.v_inactive_count++;
1303	}
1304	splx(s);
1305}
1306
1307/*
1308 * vm_page_cache
1309 *
1310 * Put the specified page onto the page cache queue (if appropriate).
1311 */
1312void
1313vm_page_cache(m)
1314	register vm_page_t m;
1315{
1316	int s;
1317
1318#if !defined(MAX_PERF)
1319	if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
1320		printf("vm_page_cache: attempting to cache busy page\n");
1321		return;
1322	}
1323#endif
1324	if ((m->queue - m->pc) == PQ_CACHE)
1325		return;
1326
1327	vm_page_protect(m, VM_PROT_NONE);
1328#if !defined(MAX_PERF)
1329	if (m->dirty != 0) {
1330		panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex);
1331	}
1332#endif
1333	s = splvm();
1334	vm_page_unqueue_nowakeup(m);
1335	m->queue = PQ_CACHE + m->pc;
1336	(*vm_page_queues[m->queue].lcnt)++;
1337	TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq);
1338	cnt.v_cache_count++;
1339	m->object->cache_count++;
1340	vm_page_free_wakeup();
1341	splx(s);
1342}
1343
1344/*
1345 * Grab a page, waiting until we are waken up due to the page
1346 * changing state.  We keep on waiting, if the page continues
1347 * to be in the object.  If the page doesn't exist, allocate it.
1348 */
1349vm_page_t
1350vm_page_grab(object, pindex, allocflags)
1351	vm_object_t object;
1352	vm_pindex_t pindex;
1353	int allocflags;
1354{
1355
1356	vm_page_t m;
1357	int s, generation;
1358
1359retrylookup:
1360	if ((m = vm_page_lookup(object, pindex)) != NULL) {
1361		if (m->busy || (m->flags & PG_BUSY)) {
1362			generation = object->generation;
1363
1364			s = splvm();
1365			while ((object->generation == generation) &&
1366					(m->busy || (m->flags & PG_BUSY))) {
1367				m->flags |= PG_WANTED | PG_REFERENCED;
1368				tsleep(m, PVM, "pgrbwt", 0);
1369				if ((allocflags & VM_ALLOC_RETRY) == 0) {
1370					splx(s);
1371					return NULL;
1372				}
1373			}
1374			splx(s);
1375			goto retrylookup;
1376		} else {
1377			m->flags |= PG_BUSY;
1378			return m;
1379		}
1380	}
1381
1382	m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY);
1383	if (m == NULL) {
1384		VM_WAIT;
1385		if ((allocflags & VM_ALLOC_RETRY) == 0)
1386			return NULL;
1387		goto retrylookup;
1388	}
1389
1390	return m;
1391}
1392
1393/*
1394 * mapping function for valid bits or for dirty bits in
1395 * a page
1396 */
1397inline int
1398vm_page_bits(int base, int size)
1399{
1400	u_short chunk;
1401
1402	if ((base == 0) && (size >= PAGE_SIZE))
1403		return VM_PAGE_BITS_ALL;
1404	size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
1405	base = (base % PAGE_SIZE) / DEV_BSIZE;
1406	chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE];
1407	return (chunk << base) & VM_PAGE_BITS_ALL;
1408}
1409
1410/*
1411 * set a page valid and clean
1412 */
1413void
1414vm_page_set_validclean(m, base, size)
1415	vm_page_t m;
1416	int base;
1417	int size;
1418{
1419	int pagebits = vm_page_bits(base, size);
1420	m->valid |= pagebits;
1421	m->dirty &= ~pagebits;
1422	if( base == 0 && size == PAGE_SIZE)
1423		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
1424}
1425
1426/*
1427 * set a page (partially) invalid
1428 */
1429void
1430vm_page_set_invalid(m, base, size)
1431	vm_page_t m;
1432	int base;
1433	int size;
1434{
1435	int bits;
1436
1437	m->valid &= ~(bits = vm_page_bits(base, size));
1438	if (m->valid == 0)
1439		m->dirty &= ~bits;
1440}
1441
1442/*
1443 * is (partial) page valid?
1444 */
1445int
1446vm_page_is_valid(m, base, size)
1447	vm_page_t m;
1448	int base;
1449	int size;
1450{
1451	int bits = vm_page_bits(base, size);
1452
1453	if (m->valid && ((m->valid & bits) == bits))
1454		return 1;
1455	else
1456		return 0;
1457}
1458
1459void
1460vm_page_test_dirty(m)
1461	vm_page_t m;
1462{
1463	if ((m->dirty != VM_PAGE_BITS_ALL) &&
1464	    pmap_is_modified(VM_PAGE_TO_PHYS(m))) {
1465		m->dirty = VM_PAGE_BITS_ALL;
1466	}
1467}
1468
1469/*
1470 * This interface is for merging with malloc() someday.
1471 * Even if we never implement compaction so that contiguous allocation
1472 * works after initialization time, malloc()'s data structures are good
1473 * for statistics and for allocations of less than a page.
1474 */
1475void *
1476contigmalloc1(size, type, flags, low, high, alignment, boundary, map)
1477	unsigned long size;	/* should be size_t here and for malloc() */
1478	struct malloc_type *type;
1479	int flags;
1480	unsigned long low;
1481	unsigned long high;
1482	unsigned long alignment;
1483	unsigned long boundary;
1484	vm_map_t map;
1485{
1486	int i, s, start;
1487	vm_offset_t addr, phys, tmp_addr;
1488	int pass;
1489	vm_page_t pga = vm_page_array;
1490
1491	size = round_page(size);
1492#if !defined(MAX_PERF)
1493	if (size == 0)
1494		panic("contigmalloc1: size must not be 0");
1495	if ((alignment & (alignment - 1)) != 0)
1496		panic("contigmalloc1: alignment must be a power of 2");
1497	if ((boundary & (boundary - 1)) != 0)
1498		panic("contigmalloc1: boundary must be a power of 2");
1499#endif
1500
1501	start = 0;
1502	for (pass = 0; pass <= 1; pass++) {
1503		s = splvm();
1504again:
1505		/*
1506		 * Find first page in array that is free, within range, aligned, and
1507		 * such that the boundary won't be crossed.
1508		 */
1509		for (i = start; i < cnt.v_page_count; i++) {
1510			int pqtype;
1511			phys = VM_PAGE_TO_PHYS(&pga[i]);
1512			pqtype = pga[i].queue - pga[i].pc;
1513			if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
1514			    (phys >= low) && (phys < high) &&
1515			    ((phys & (alignment - 1)) == 0) &&
1516			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
1517				break;
1518		}
1519
1520		/*
1521		 * If the above failed or we will exceed the upper bound, fail.
1522		 */
1523		if ((i == cnt.v_page_count) ||
1524			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
1525			vm_page_t m, next;
1526
1527again1:
1528			for (m = TAILQ_FIRST(&vm_page_queue_inactive);
1529				m != NULL;
1530				m = next) {
1531
1532				if (m->queue != PQ_INACTIVE) {
1533					break;
1534				}
1535
1536				next = TAILQ_NEXT(m, pageq);
1537				if (vm_page_sleep(m, "vpctw0", &m->busy))
1538					goto again1;
1539				vm_page_test_dirty(m);
1540				if (m->dirty) {
1541					if (m->object->type == OBJT_VNODE) {
1542						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1543						vm_object_page_clean(m->object, 0, 0, TRUE);
1544						VOP_UNLOCK(m->object->handle, 0, curproc);
1545						goto again1;
1546					} else if (m->object->type == OBJT_SWAP ||
1547								m->object->type == OBJT_DEFAULT) {
1548						m->flags |= PG_BUSY;
1549						vm_page_protect(m, VM_PROT_NONE);
1550						vm_pageout_flush(&m, 1, 0);
1551						goto again1;
1552					}
1553				}
1554				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1555					vm_page_cache(m);
1556			}
1557
1558			for (m = TAILQ_FIRST(&vm_page_queue_active);
1559				m != NULL;
1560				m = next) {
1561
1562				if (m->queue != PQ_ACTIVE) {
1563					break;
1564				}
1565
1566				next = TAILQ_NEXT(m, pageq);
1567				if (vm_page_sleep(m, "vpctw1", &m->busy))
1568					goto again1;
1569				vm_page_test_dirty(m);
1570				if (m->dirty) {
1571					if (m->object->type == OBJT_VNODE) {
1572						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
1573						vm_object_page_clean(m->object, 0, 0, TRUE);
1574						VOP_UNLOCK(m->object->handle, 0, curproc);
1575						goto again1;
1576					} else if (m->object->type == OBJT_SWAP ||
1577								m->object->type == OBJT_DEFAULT) {
1578						m->flags |= PG_BUSY;
1579						vm_page_protect(m, VM_PROT_NONE);
1580						vm_pageout_flush(&m, 1, 0);
1581						goto again1;
1582					}
1583				}
1584				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
1585					vm_page_cache(m);
1586			}
1587
1588			splx(s);
1589			continue;
1590		}
1591		start = i;
1592
1593		/*
1594		 * Check successive pages for contiguous and free.
1595		 */
1596		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
1597			int pqtype;
1598			pqtype = pga[i].queue - pga[i].pc;
1599			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
1600			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
1601			    ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
1602				start++;
1603				goto again;
1604			}
1605		}
1606
1607		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1608			int pqtype;
1609			vm_page_t m = &pga[i];
1610
1611			pqtype = m->queue - m->pc;
1612			if (pqtype == PQ_CACHE) {
1613				m->flags |= PG_BUSY;
1614				vm_page_free(m);
1615			}
1616
1617			TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
1618			(*vm_page_queues[m->queue].lcnt)--;
1619			cnt.v_free_count--;
1620			m->valid = VM_PAGE_BITS_ALL;
1621			m->flags = 0;
1622			m->dirty = 0;
1623			m->wire_count = 0;
1624			m->busy = 0;
1625			m->queue = PQ_NONE;
1626			m->object = NULL;
1627			vm_page_wire(m);
1628		}
1629
1630		/*
1631		 * We've found a contiguous chunk that meets are requirements.
1632		 * Allocate kernel VM, unfree and assign the physical pages to it and
1633		 * return kernel VM pointer.
1634		 */
1635		tmp_addr = addr = kmem_alloc_pageable(map, size);
1636		if (addr == 0) {
1637			/*
1638			 * XXX We almost never run out of kernel virtual
1639			 * space, so we don't make the allocated memory
1640			 * above available.
1641			 */
1642			splx(s);
1643			return (NULL);
1644		}
1645
1646		for (i = start; i < (start + size / PAGE_SIZE); i++) {
1647			vm_page_t m = &pga[i];
1648			vm_page_insert(m, kernel_object,
1649				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
1650			pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
1651			tmp_addr += PAGE_SIZE;
1652		}
1653
1654		splx(s);
1655		return ((void *)addr);
1656	}
1657	return NULL;
1658}
1659
1660void *
1661contigmalloc(size, type, flags, low, high, alignment, boundary)
1662	unsigned long size;	/* should be size_t here and for malloc() */
1663	struct malloc_type *type;
1664	int flags;
1665	unsigned long low;
1666	unsigned long high;
1667	unsigned long alignment;
1668	unsigned long boundary;
1669{
1670	return contigmalloc1(size, type, flags, low, high, alignment, boundary,
1671			     kernel_map);
1672}
1673
1674vm_offset_t
1675vm_page_alloc_contig(size, low, high, alignment)
1676	vm_offset_t size;
1677	vm_offset_t low;
1678	vm_offset_t high;
1679	vm_offset_t alignment;
1680{
1681	return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
1682					  alignment, 0ul, kernel_map));
1683}
1684
1685#include "opt_ddb.h"
1686#ifdef DDB
1687#include <sys/kernel.h>
1688
1689#include <ddb/ddb.h>
1690
1691DB_SHOW_COMMAND(page, vm_page_print_page_info)
1692{
1693	db_printf("cnt.v_free_count: %d\n", cnt.v_free_count);
1694	db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count);
1695	db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count);
1696	db_printf("cnt.v_active_count: %d\n", cnt.v_active_count);
1697	db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count);
1698	db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved);
1699	db_printf("cnt.v_free_min: %d\n", cnt.v_free_min);
1700	db_printf("cnt.v_free_target: %d\n", cnt.v_free_target);
1701	db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min);
1702	db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target);
1703}
1704
1705DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
1706{
1707	int i;
1708	db_printf("PQ_FREE:");
1709	for(i=0;i<PQ_L2_SIZE;i++) {
1710		db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt);
1711	}
1712	db_printf("\n");
1713
1714	db_printf("PQ_CACHE:");
1715	for(i=0;i<PQ_L2_SIZE;i++) {
1716		db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt);
1717	}
1718	db_printf("\n");
1719
1720	db_printf("PQ_ZERO:");
1721	for(i=0;i<PQ_L2_SIZE;i++) {
1722		db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt);
1723	}
1724	db_printf("\n");
1725
1726	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
1727		*vm_page_queues[PQ_ACTIVE].lcnt,
1728		*vm_page_queues[PQ_INACTIVE].lcnt);
1729}
1730#endif /* DDB */
1731