1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_page.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67
68#include <mach/clock_types.h>
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
71#include <mach/sdt.h>
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/zalloc.h>
77#include <kern/xpr.h>
78#include <vm/pmap.h>
79#include <vm/vm_init.h>
80#include <vm/vm_map.h>
81#include <vm/vm_page.h>
82#include <vm/vm_pageout.h>
83#include <vm/vm_kern.h>			/* kernel_memory_allocate() */
84#include <kern/misc_protos.h>
85#include <zone_debug.h>
86#include <vm/cpm.h>
87#include <ppc/mappings.h>		/* (BRINGUP) */
88#include <pexpert/pexpert.h>	/* (BRINGUP) */
89
90#include <vm/vm_protos.h>
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
94#if CONFIG_EMBEDDED
95#include <sys/kern_memorystatus.h>
96#endif
97
98int			speculative_age_index = 0;
99int			speculative_steal_index = 0;
100
101struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
102
103
104/*
105 *	Associated with page of user-allocatable memory is a
106 *	page structure.
107 */
108
109/*
110 *	These variables record the values returned by vm_page_bootstrap,
111 *	for debugging purposes.  The implementation of pmap_steal_memory
112 *	and pmap_startup here also uses them internally.
113 */
114
115vm_offset_t virtual_space_start;
116vm_offset_t virtual_space_end;
117int	vm_page_pages;
118
119/*
120 *	The vm_page_lookup() routine, which provides for fast
121 *	(virtual memory object, offset) to page lookup, employs
122 *	the following hash table.  The vm_page_{insert,remove}
123 *	routines install and remove associations in the table.
124 *	[This table is often called the virtual-to-physical,
125 *	or VP, table.]
126 */
127typedef struct {
128	vm_page_t	pages;
129#if	MACH_PAGE_HASH_STATS
130	int		cur_count;		/* current count */
131	int		hi_count;		/* high water mark */
132#endif /* MACH_PAGE_HASH_STATS */
133} vm_page_bucket_t;
134
135vm_page_bucket_t *vm_page_buckets;		/* Array of buckets */
136unsigned int	vm_page_bucket_count = 0;	/* How big is array? */
137unsigned int	vm_page_hash_mask;		/* Mask for hash function */
138unsigned int	vm_page_hash_shift;		/* Shift for hash function */
139uint32_t	vm_page_bucket_hash;		/* Basic bucket hash */
140decl_simple_lock_data(,vm_page_bucket_lock)
141
142
143#if	MACH_PAGE_HASH_STATS
144/* This routine is only for debug.  It is intended to be called by
145 * hand by a developer using a kernel debugger.  This routine prints
146 * out vm_page_hash table statistics to the kernel debug console.
147 */
148void
149hash_debug(void)
150{
151	int	i;
152	int	numbuckets = 0;
153	int	highsum = 0;
154	int	maxdepth = 0;
155
156	for (i = 0; i < vm_page_bucket_count; i++) {
157		if (vm_page_buckets[i].hi_count) {
158			numbuckets++;
159			highsum += vm_page_buckets[i].hi_count;
160			if (vm_page_buckets[i].hi_count > maxdepth)
161				maxdepth = vm_page_buckets[i].hi_count;
162		}
163	}
164	printf("Total number of buckets: %d\n", vm_page_bucket_count);
165	printf("Number used buckets:     %d = %d%%\n",
166		numbuckets, 100*numbuckets/vm_page_bucket_count);
167	printf("Number unused buckets:   %d = %d%%\n",
168		vm_page_bucket_count - numbuckets,
169		100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
170	printf("Sum of bucket max depth: %d\n", highsum);
171	printf("Average bucket depth:    %d.%2d\n",
172		highsum/vm_page_bucket_count,
173		highsum%vm_page_bucket_count);
174	printf("Maximum bucket depth:    %d\n", maxdepth);
175}
176#endif /* MACH_PAGE_HASH_STATS */
177
178/*
179 *	The virtual page size is currently implemented as a runtime
180 *	variable, but is constant once initialized using vm_set_page_size.
181 *	This initialization must be done in the machine-dependent
182 *	bootstrap sequence, before calling other machine-independent
183 *	initializations.
184 *
185 *	All references to the virtual page size outside this
186 *	module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
187 *	constants.
188 */
189vm_size_t	page_size  = PAGE_SIZE;
190vm_size_t	page_mask  = PAGE_MASK;
191int		page_shift = PAGE_SHIFT;
192
193/*
194 *	Resident page structures are initialized from
195 *	a template (see vm_page_alloc).
196 *
197 *	When adding a new field to the virtual memory
198 *	object structure, be sure to add initialization
199 *	(see vm_page_bootstrap).
200 */
201struct vm_page	vm_page_template;
202
203vm_page_t	vm_pages = VM_PAGE_NULL;
204unsigned int	vm_pages_count = 0;
205
206/*
207 *	Resident pages that represent real memory
208 *	are allocated from a set of free lists,
209 *	one per color.
210 */
211unsigned int	vm_colors;
212unsigned int    vm_color_mask;			/* mask is == (vm_colors-1) */
213unsigned int	vm_cache_geometry_colors = 0;	/* set by hw dependent code during startup */
214queue_head_t	vm_page_queue_free[MAX_COLORS];
215vm_page_t       vm_page_queue_fictitious;
216unsigned int	vm_page_free_wanted;
217unsigned int	vm_page_free_wanted_privileged;
218unsigned int	vm_page_free_count;
219unsigned int	vm_page_fictitious_count;
220
221unsigned int	vm_page_free_count_minimum;	/* debugging */
222
223/*
224 *	Occasionally, the virtual memory system uses
225 *	resident page structures that do not refer to
226 *	real pages, for example to leave a page with
227 *	important state information in the VP table.
228 *
229 *	These page structures are allocated the way
230 *	most other kernel structures are.
231 */
232zone_t	vm_page_zone;
233decl_mutex_data(,vm_page_alloc_lock)
234unsigned int io_throttle_zero_fill;
235
236/*
237 *	Fictitious pages don't have a physical address,
238 *	but we must initialize phys_page to something.
239 *	For debugging, this should be a strange value
240 *	that the pmap module can recognize in assertions.
241 */
242vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
243
244/*
245 *	Guard pages are not accessible so they don't
246 * 	need a physical address, but we need to enter
247 *	one in the pmap.
248 *	Let's make it recognizable and make sure that
249 *	we don't use a real physical page with that
250 *	physical address.
251 */
252vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
253
254/*
255 *	Resident page structures are also chained on
256 *	queues that are used by the page replacement
257 *	system (pageout daemon).  These queues are
258 *	defined here, but are shared by the pageout
259 *	module.  The inactive queue is broken into
260 *	inactive and zf for convenience as the
261 *	pageout daemon often assignes a higher
262 *	affinity to zf pages
263 */
264queue_head_t	vm_page_queue_active;
265queue_head_t	vm_page_queue_inactive;
266queue_head_t	vm_page_queue_zf;	/* inactive memory queue for zero fill */
267
268unsigned int	vm_page_active_count;
269unsigned int	vm_page_inactive_count;
270unsigned int	vm_page_throttled_count;
271unsigned int	vm_page_speculative_count;
272unsigned int	vm_page_wire_count;
273unsigned int	vm_page_gobble_count = 0;
274unsigned int	vm_page_wire_count_warning = 0;
275unsigned int	vm_page_gobble_count_warning = 0;
276
277unsigned int	vm_page_purgeable_count = 0; /* # of pages purgeable now */
278uint64_t	vm_page_purged_count = 0;    /* total count of purged pages */
279
280unsigned int	vm_page_speculative_recreated = 0;
281unsigned int	vm_page_speculative_created = 0;
282unsigned int	vm_page_speculative_used = 0;
283
284ppnum_t		vm_lopage_poolstart = 0;
285ppnum_t		vm_lopage_poolend = 0;
286int		vm_lopage_poolsize = 0;
287uint64_t	max_valid_dma_address = 0xffffffffffffffffULL;
288
289
290/*
291 *	Several page replacement parameters are also
292 *	shared with this module, so that page allocation
293 *	(done here in vm_page_alloc) can trigger the
294 *	pageout daemon.
295 */
296unsigned int	vm_page_free_target = 0;
297unsigned int	vm_page_free_min = 0;
298unsigned int	vm_page_inactive_target = 0;
299unsigned int	vm_page_inactive_min = 0;
300unsigned int	vm_page_free_reserved = 0;
301unsigned int	vm_page_zfill_throttle_count = 0;
302
303/*
304 *	The VM system has a couple of heuristics for deciding
305 *	that pages are "uninteresting" and should be placed
306 *	on the inactive queue as likely candidates for replacement.
307 *	These variables let the heuristics be controlled at run-time
308 *	to make experimentation easier.
309 */
310
311boolean_t vm_page_deactivate_hint = TRUE;
312
313/*
314 *	vm_set_page_size:
315 *
316 *	Sets the page size, perhaps based upon the memory
317 *	size.  Must be called before any use of page-size
318 *	dependent functions.
319 *
320 *	Sets page_shift and page_mask from page_size.
321 */
322void
323vm_set_page_size(void)
324{
325	page_mask = page_size - 1;
326
327	if ((page_mask & page_size) != 0)
328		panic("vm_set_page_size: page size not a power of two");
329
330	for (page_shift = 0; ; page_shift++)
331		if ((1U << page_shift) == page_size)
332			break;
333}
334
335
336/* Called once during statup, once the cache geometry is known.
337 */
338static void
339vm_page_set_colors( void )
340{
341	unsigned int	n, override;
342
343	if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )		/* colors specified as a boot-arg? */
344		n = override;
345	else if ( vm_cache_geometry_colors )			/* do we know what the cache geometry is? */
346		n = vm_cache_geometry_colors;
347	else	n = DEFAULT_COLORS;				/* use default if all else fails */
348
349	if ( n == 0 )
350		n = 1;
351	if ( n > MAX_COLORS )
352		n = MAX_COLORS;
353
354	/* the count must be a power of 2  */
355	if ( ( n & (n - 1)) !=0  )
356		panic("vm_page_set_colors");
357
358	vm_colors = n;
359	vm_color_mask = n - 1;
360}
361
362
363/*
364 *	vm_page_bootstrap:
365 *
366 *	Initializes the resident memory module.
367 *
368 *	Allocates memory for the page cells, and
369 *	for the object/offset-to-page hash table headers.
370 *	Each page cell is initialized and placed on the free list.
371 *	Returns the range of available kernel virtual memory.
372 */
373
374void
375vm_page_bootstrap(
376	vm_offset_t		*startp,
377	vm_offset_t		*endp)
378{
379	register vm_page_t	m;
380	unsigned int		i;
381	unsigned int		log1;
382	unsigned int		log2;
383	unsigned int		size;
384
385	/*
386	 *	Initialize the vm_page template.
387	 */
388
389	m = &vm_page_template;
390	m->object = VM_OBJECT_NULL;		/* reset later */
391	m->offset = (vm_object_offset_t) -1;	/* reset later */
392	m->wire_count = 0;
393
394	m->pageq.next = NULL;
395	m->pageq.prev = NULL;
396	m->listq.next = NULL;
397	m->listq.prev = NULL;
398
399	m->speculative = FALSE;
400	m->throttled = FALSE;
401	m->inactive = FALSE;
402	m->active = FALSE;
403	m->no_cache = FALSE;
404	m->laundry = FALSE;
405	m->free = FALSE;
406	m->pmapped = FALSE;
407	m->wpmapped = FALSE;
408	m->reference = FALSE;
409	m->pageout = FALSE;
410	m->dump_cleaning = FALSE;
411	m->list_req_pending = FALSE;
412
413	m->busy = TRUE;
414	m->wanted = FALSE;
415	m->tabled = FALSE;
416	m->fictitious = FALSE;
417	m->private = FALSE;
418	m->absent = FALSE;
419	m->error = FALSE;
420	m->dirty = FALSE;
421	m->cleaning = FALSE;
422	m->precious = FALSE;
423	m->clustered = FALSE;
424	m->unusual = FALSE;
425	m->restart = FALSE;
426	m->zero_fill = FALSE;
427	m->encrypted = FALSE;
428	m->encrypted_cleaning = FALSE;
429	m->deactivated = FALSE;
430
431	m->phys_page = 0;		/* reset later */
432
433	/*
434	 *	Initialize the page queues.
435	 */
436
437	mutex_init(&vm_page_queue_free_lock, 0);
438	mutex_init(&vm_page_queue_lock, 0);
439
440	mutex_init(&vm_purgeable_queue_lock, 0);
441
442	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
443		int group;
444
445		purgeable_queues[i].token_q_head = 0;
446		purgeable_queues[i].token_q_tail = 0;
447		for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
448		        queue_init(&purgeable_queues[i].objq[group]);
449
450		purgeable_queues[i].type = i;
451		purgeable_queues[i].new_pages = 0;
452#if MACH_ASSERT
453		purgeable_queues[i].debug_count_tokens = 0;
454		purgeable_queues[i].debug_count_objects = 0;
455#endif
456	};
457
458	for (i = 0; i < MAX_COLORS; i++ )
459		queue_init(&vm_page_queue_free[i]);
460	queue_init(&vm_lopage_queue_free);
461	vm_page_queue_fictitious = VM_PAGE_NULL;
462	queue_init(&vm_page_queue_active);
463	queue_init(&vm_page_queue_inactive);
464	queue_init(&vm_page_queue_throttled);
465	queue_init(&vm_page_queue_zf);
466
467	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
468	        queue_init(&vm_page_queue_speculative[i].age_q);
469
470		vm_page_queue_speculative[i].age_ts.tv_sec = 0;
471		vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
472	}
473	vm_page_free_wanted = 0;
474	vm_page_free_wanted_privileged = 0;
475
476	vm_page_set_colors();
477
478
479	/*
480	 *	Steal memory for the map and zone subsystems.
481	 */
482
483	vm_map_steal_memory();
484	zone_steal_memory();
485
486	/*
487	 *	Allocate (and initialize) the virtual-to-physical
488	 *	table hash buckets.
489	 *
490	 *	The number of buckets should be a power of two to
491	 *	get a good hash function.  The following computation
492	 *	chooses the first power of two that is greater
493	 *	than the number of physical pages in the system.
494	 */
495
496	simple_lock_init(&vm_page_bucket_lock, 0);
497
498	if (vm_page_bucket_count == 0) {
499		unsigned int npages = pmap_free_pages();
500
501		vm_page_bucket_count = 1;
502		while (vm_page_bucket_count < npages)
503			vm_page_bucket_count <<= 1;
504	}
505
506	vm_page_hash_mask = vm_page_bucket_count - 1;
507
508	/*
509	 *	Calculate object shift value for hashing algorithm:
510	 *		O = log2(sizeof(struct vm_object))
511	 *		B = log2(vm_page_bucket_count)
512	 *	        hash shifts the object left by
513	 *		B/2 - O
514	 */
515	size = vm_page_bucket_count;
516	for (log1 = 0; size > 1; log1++)
517		size /= 2;
518	size = sizeof(struct vm_object);
519	for (log2 = 0; size > 1; log2++)
520		size /= 2;
521	vm_page_hash_shift = log1/2 - log2 + 1;
522
523	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);		/* Get (ceiling of sqrt of table size) */
524	vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);		/* Get (ceiling of quadroot of table size) */
525	vm_page_bucket_hash |= 1;							/* Set bit and add 1 - always must be 1 to insure unique series */
526
527	if (vm_page_hash_mask & vm_page_bucket_count)
528		printf("vm_page_bootstrap: WARNING -- strange page hash\n");
529
530	vm_page_buckets = (vm_page_bucket_t *)
531		pmap_steal_memory(vm_page_bucket_count *
532				  sizeof(vm_page_bucket_t));
533
534	for (i = 0; i < vm_page_bucket_count; i++) {
535		register vm_page_bucket_t *bucket = &vm_page_buckets[i];
536
537		bucket->pages = VM_PAGE_NULL;
538#if     MACH_PAGE_HASH_STATS
539		bucket->cur_count = 0;
540		bucket->hi_count = 0;
541#endif /* MACH_PAGE_HASH_STATS */
542	}
543
544	/*
545	 *	Machine-dependent code allocates the resident page table.
546	 *	It uses vm_page_init to initialize the page frames.
547	 *	The code also returns to us the virtual space available
548	 *	to the kernel.  We don't trust the pmap module
549	 *	to get the alignment right.
550	 */
551
552	pmap_startup(&virtual_space_start, &virtual_space_end);
553	virtual_space_start = round_page(virtual_space_start);
554	virtual_space_end = trunc_page(virtual_space_end);
555
556	*startp = virtual_space_start;
557	*endp = virtual_space_end;
558
559	/*
560	 *	Compute the initial "wire" count.
561	 *	Up until now, the pages which have been set aside are not under
562	 *	the VM system's control, so although they aren't explicitly
563	 *	wired, they nonetheless can't be moved. At this moment,
564	 *	all VM managed pages are "free", courtesy of pmap_startup.
565	 */
566	vm_page_wire_count = atop_64(max_mem) - vm_page_free_count;	/* initial value */
567	vm_page_free_count_minimum = vm_page_free_count;
568
569	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
570	       vm_page_free_count, vm_page_wire_count);
571
572	simple_lock_init(&vm_paging_lock, 0);
573}
574
575#ifndef	MACHINE_PAGES
576/*
577 *	We implement pmap_steal_memory and pmap_startup with the help
578 *	of two simpler functions, pmap_virtual_space and pmap_next_page.
579 */
580
581void *
582pmap_steal_memory(
583	vm_size_t size)
584{
585	vm_offset_t addr, vaddr;
586	ppnum_t	phys_page;
587
588	/*
589	 *	We round the size to a round multiple.
590	 */
591
592	size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
593
594	/*
595	 *	If this is the first call to pmap_steal_memory,
596	 *	we have to initialize ourself.
597	 */
598
599	if (virtual_space_start == virtual_space_end) {
600		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
601
602		/*
603		 *	The initial values must be aligned properly, and
604		 *	we don't trust the pmap module to do it right.
605		 */
606
607		virtual_space_start = round_page(virtual_space_start);
608		virtual_space_end = trunc_page(virtual_space_end);
609	}
610
611	/*
612	 *	Allocate virtual memory for this request.
613	 */
614
615	addr = virtual_space_start;
616	virtual_space_start += size;
617
618	kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size);	/* (TEST/DEBUG) */
619
620	/*
621	 *	Allocate and map physical pages to back new virtual pages.
622	 */
623
624	for (vaddr = round_page(addr);
625	     vaddr < addr + size;
626	     vaddr += PAGE_SIZE) {
627		if (!pmap_next_page(&phys_page))
628			panic("pmap_steal_memory");
629
630		/*
631		 *	XXX Logically, these mappings should be wired,
632		 *	but some pmap modules barf if they are.
633		 */
634
635		pmap_enter(kernel_pmap, vaddr, phys_page,
636			   VM_PROT_READ|VM_PROT_WRITE,
637				VM_WIMG_USE_DEFAULT, FALSE);
638		/*
639		 * Account for newly stolen memory
640		 */
641		vm_page_wire_count++;
642
643	}
644
645	return (void *) addr;
646}
647
648void
649pmap_startup(
650	vm_offset_t *startp,
651	vm_offset_t *endp)
652{
653	unsigned int i, npages, pages_initialized, fill, fillval;
654	ppnum_t		phys_page;
655	addr64_t	tmpaddr;
656	unsigned int	num_of_lopages = 0;
657	unsigned int	last_index;
658
659	/*
660	 *	We calculate how many page frames we will have
661	 *	and then allocate the page structures in one chunk.
662	 */
663
664	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;	/* Get the amount of memory left */
665	tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start);	/* Account for any slop */
666	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));	/* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
667
668	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
669
670	/*
671	 *	Initialize the page frames.
672	 */
673	for (i = 0, pages_initialized = 0; i < npages; i++) {
674		if (!pmap_next_page(&phys_page))
675			break;
676
677		vm_page_init(&vm_pages[i], phys_page);
678		vm_page_pages++;
679		pages_initialized++;
680	}
681	vm_pages_count = pages_initialized;
682
683	/*
684	 * Check if we want to initialize pages to a known value
685	 */
686	fill = 0;								/* Assume no fill */
687	if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;			/* Set fill */
688
689
690	/*
691	 * if vm_lopage_poolsize is non-zero, than we need to reserve
692	 * a pool of pages whose addresess are less than 4G... this pool
693	 * is used by drivers whose hardware can't DMA beyond 32 bits...
694	 *
695	 * note that I'm assuming that the page list is ascending and
696	 * ordered w/r to the physical address
697	 */
698	for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
699	        vm_page_t m;
700
701		m = &vm_pages[i];
702
703		if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
704		        panic("couldn't reserve the lopage pool: not enough lo pages\n");
705
706		if (m->phys_page < vm_lopage_poolend)
707		        panic("couldn't reserve the lopage pool: page list out of order\n");
708
709		vm_lopage_poolend = m->phys_page;
710
711		if (vm_lopage_poolstart == 0)
712		        vm_lopage_poolstart = m->phys_page;
713		else {
714		        if (m->phys_page < vm_lopage_poolstart)
715			        panic("couldn't reserve the lopage pool: page list out of order\n");
716		}
717
718		if (fill)
719		        fillPage(m->phys_page, fillval);		/* Fill the page with a know value if requested at boot */
720
721		vm_page_release(m);
722	}
723	last_index = i;
724
725	// -debug code remove
726	if (2 == vm_himemory_mode) {
727		// free low -> high so high is preferred
728		for (i = last_index + 1; i <= pages_initialized; i++) {
729			if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
730			vm_page_release(&vm_pages[i - 1]);
731		}
732	}
733	else
734	// debug code remove-
735
736	/*
737	 * Release pages in reverse order so that physical pages
738	 * initially get allocated in ascending addresses. This keeps
739	 * the devices (which must address physical memory) happy if
740	 * they require several consecutive pages.
741	 */
742	for (i = pages_initialized; i > last_index; i--) {
743		if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
744		vm_page_release(&vm_pages[i - 1]);
745	}
746
747#if 0
748	{
749		vm_page_t xx, xxo, xxl;
750		int i, j, k, l;
751
752		j = 0;													/* (BRINGUP) */
753		xxl = 0;
754
755		for( i = 0; i < vm_colors; i++ ) {
756			queue_iterate(&vm_page_queue_free[i],
757				      xx,
758				      vm_page_t,
759				      pageq) {	/* BRINGUP */
760				j++;												/* (BRINGUP) */
761				if(j > vm_page_free_count) {						/* (BRINGUP) */
762					panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
763				}
764
765				l = vm_page_free_count - j;							/* (BRINGUP) */
766				k = 0;												/* (BRINGUP) */
767
768				if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
769
770				for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {	/* (BRINGUP) */
771					k++;
772					if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
773					if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {	/* (BRINGUP) */
774						panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
775					}
776				}
777
778				xxl = xx;
779			}
780		}
781
782		if(j != vm_page_free_count) {						/* (BRINGUP) */
783			panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
784		}
785	}
786#endif
787
788
789	/*
790	 *	We have to re-align virtual_space_start,
791	 *	because pmap_steal_memory has been using it.
792	 */
793
794	virtual_space_start = round_page_32(virtual_space_start);
795
796	*startp = virtual_space_start;
797	*endp = virtual_space_end;
798}
799#endif	/* MACHINE_PAGES */
800
801/*
802 *	Routine:	vm_page_module_init
803 *	Purpose:
804 *		Second initialization pass, to be done after
805 *		the basic VM system is ready.
806 */
807void
808vm_page_module_init(void)
809{
810	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
811			     0, PAGE_SIZE, "vm pages");
812
813#if	ZONE_DEBUG
814	zone_debug_disable(vm_page_zone);
815#endif	/* ZONE_DEBUG */
816
817	zone_change(vm_page_zone, Z_EXPAND, FALSE);
818	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
819	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
820
821        /*
822         * Adjust zone statistics to account for the real pages allocated
823         * in vm_page_create(). [Q: is this really what we want?]
824         */
825        vm_page_zone->count += vm_page_pages;
826        vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
827
828	mutex_init(&vm_page_alloc_lock, 0);
829}
830
831/*
832 *	Routine:	vm_page_create
833 *	Purpose:
834 *		After the VM system is up, machine-dependent code
835 *		may stumble across more physical memory.  For example,
836 *		memory that it was reserving for a frame buffer.
837 *		vm_page_create turns this memory into available pages.
838 */
839
840void
841vm_page_create(
842	ppnum_t start,
843	ppnum_t end)
844{
845	ppnum_t		phys_page;
846	vm_page_t 	m;
847
848	for (phys_page = start;
849	     phys_page < end;
850	     phys_page++) {
851		while ((m = (vm_page_t) vm_page_grab_fictitious())
852			== VM_PAGE_NULL)
853			vm_page_more_fictitious();
854
855		vm_page_init(m, phys_page);
856		vm_page_pages++;
857		vm_page_release(m);
858	}
859}
860
861/*
862 *	vm_page_hash:
863 *
864 *	Distributes the object/offset key pair among hash buckets.
865 *
866 *	NOTE:	The bucket count must be a power of 2
867 */
868#define vm_page_hash(object, offset) (\
869	( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
870	 & vm_page_hash_mask)
871
872
873/*
874 *	vm_page_insert:		[ internal use only ]
875 *
876 *	Inserts the given mem entry into the object/object-page
877 *	table and object list.
878 *
879 *	The object must be locked.
880 */
881void
882vm_page_insert(
883	vm_page_t		mem,
884	vm_object_t		object,
885	vm_object_offset_t	offset)
886{
887	vm_page_insert_internal(mem, object, offset, FALSE);
888}
889
890
891void
892vm_page_insert_internal(
893	vm_page_t		mem,
894	vm_object_t		object,
895	vm_object_offset_t	offset,
896	boolean_t	queues_lock_held)
897{
898	register vm_page_bucket_t *bucket;
899
900        XPR(XPR_VM_PAGE,
901                "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
902                (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
903
904	VM_PAGE_CHECK(mem);
905
906	if (object == vm_submap_object) {
907		/* the vm_submap_object is only a placeholder for submaps */
908		panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
909	}
910
911	vm_object_lock_assert_exclusive(object);
912#if DEBUG
913	if (mem->tabled || mem->object != VM_OBJECT_NULL)
914		panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
915		      "already in (obj=%p,off=0x%llx)",
916		      mem, object, offset, mem->object, mem->offset);
917#endif
918	assert(!object->internal || offset < object->size);
919
920	/* only insert "pageout" pages into "pageout" objects,
921	 * and normal pages into normal objects */
922	assert(object->pageout == mem->pageout);
923
924	assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
925
926	/*
927	 *	Record the object/offset pair in this page
928	 */
929
930	mem->object = object;
931	mem->offset = offset;
932
933	/*
934	 *	Insert it into the object_object/offset hash table
935	 */
936
937	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
938	simple_lock(&vm_page_bucket_lock);
939	mem->next = bucket->pages;
940	bucket->pages = mem;
941#if     MACH_PAGE_HASH_STATS
942	if (++bucket->cur_count > bucket->hi_count)
943		bucket->hi_count = bucket->cur_count;
944#endif /* MACH_PAGE_HASH_STATS */
945	simple_unlock(&vm_page_bucket_lock);
946
947	/*
948	 *	Now link into the object's list of backed pages.
949	 */
950
951	VM_PAGE_INSERT(mem, object);
952	mem->tabled = TRUE;
953
954	/*
955	 *	Show that the object has one more resident page.
956	 */
957
958	object->resident_page_count++;
959
960	if (object->purgable == VM_PURGABLE_VOLATILE) {
961		if (queues_lock_held == FALSE)
962			vm_page_lockspin_queues();
963
964		vm_page_purgeable_count++;
965
966		if (queues_lock_held == FALSE)
967			vm_page_unlock_queues();
968	} else if (object->purgable == VM_PURGABLE_EMPTY &&
969		   mem->throttled) {
970		if (queues_lock_held == FALSE)
971			vm_page_lock_queues();
972		vm_page_deactivate(mem);
973		if (queues_lock_held == FALSE)
974			vm_page_unlock_queues();
975	}
976}
977
978/*
979 *	vm_page_replace:
980 *
981 *	Exactly like vm_page_insert, except that we first
982 *	remove any existing page at the given offset in object.
983 *
984 *	The object and page queues must be locked.
985 */
986
987void
988vm_page_replace(
989	register vm_page_t		mem,
990	register vm_object_t		object,
991	register vm_object_offset_t	offset)
992{
993	vm_page_bucket_t *bucket;
994	vm_page_t	 found_m = VM_PAGE_NULL;
995
996	VM_PAGE_CHECK(mem);
997	vm_object_lock_assert_exclusive(object);
998#if DEBUG
999	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
1000
1001	if (mem->tabled || mem->object != VM_OBJECT_NULL)
1002		panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1003		      "already in (obj=%p,off=0x%llx)",
1004		      mem, object, offset, mem->object, mem->offset);
1005#endif
1006	/*
1007	 *	Record the object/offset pair in this page
1008	 */
1009
1010	mem->object = object;
1011	mem->offset = offset;
1012
1013	/*
1014	 *	Insert it into the object_object/offset hash table,
1015	 *	replacing any page that might have been there.
1016	 */
1017
1018	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1019	simple_lock(&vm_page_bucket_lock);
1020
1021	if (bucket->pages) {
1022		vm_page_t *mp = &bucket->pages;
1023		register vm_page_t m = *mp;
1024
1025		do {
1026			if (m->object == object && m->offset == offset) {
1027				/*
1028				 * Remove old page from hash list
1029				 */
1030				*mp = m->next;
1031
1032				found_m = m;
1033				break;
1034			}
1035			mp = &m->next;
1036		} while ((m = *mp));
1037
1038		mem->next = bucket->pages;
1039	} else {
1040		mem->next = VM_PAGE_NULL;
1041	}
1042	/*
1043	 * insert new page at head of hash list
1044	 */
1045	bucket->pages = mem;
1046
1047	simple_unlock(&vm_page_bucket_lock);
1048
1049	if (found_m) {
1050	        /*
1051		 * there was already a page at the specified
1052		 * offset for this object... remove it from
1053		 * the object and free it back to the free list
1054		 */
1055		VM_PAGE_REMOVE(found_m);
1056		found_m->tabled = FALSE;
1057
1058		found_m->object = VM_OBJECT_NULL;
1059		found_m->offset = (vm_object_offset_t) -1;
1060		object->resident_page_count--;
1061
1062		if (object->purgable == VM_PURGABLE_VOLATILE) {
1063		        assert(vm_page_purgeable_count > 0);
1064			vm_page_purgeable_count--;
1065		}
1066
1067		/*
1068		 * Return page to the free list.
1069		 * Note the page is not tabled now
1070		 */
1071		vm_page_free(found_m);
1072	}
1073	/*
1074	 *	Now link into the object's list of backed pages.
1075	 */
1076
1077	VM_PAGE_INSERT(mem, object);
1078	mem->tabled = TRUE;
1079
1080	/*
1081	 *	And show that the object has one more resident
1082	 *	page.
1083	 */
1084
1085	object->resident_page_count++;
1086
1087	if (object->purgable == VM_PURGABLE_VOLATILE) {
1088		vm_page_purgeable_count++;
1089	} else if (object->purgable == VM_PURGABLE_EMPTY) {
1090		if (mem->throttled) {
1091			vm_page_deactivate(mem);
1092		}
1093	}
1094}
1095
1096/*
1097 *	vm_page_remove:		[ internal use only ]
1098 *
1099 *	Removes the given mem entry from the object/offset-page
1100 *	table and the object page list.
1101 *
1102 *	The object and page queues must be locked.
1103 */
1104
1105void
1106vm_page_remove(
1107	register vm_page_t	mem)
1108{
1109	register vm_page_bucket_t	*bucket;
1110	register vm_page_t	this;
1111
1112        XPR(XPR_VM_PAGE,
1113                "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1114                (integer_t)mem->object, (integer_t)mem->offset,
1115		(integer_t)mem, 0,0);
1116#if DEBUG
1117	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
1118#endif
1119	vm_object_lock_assert_exclusive(mem->object);
1120	assert(mem->tabled);
1121	assert(!mem->cleaning);
1122	VM_PAGE_CHECK(mem);
1123
1124
1125	/*
1126	 *	Remove from the object_object/offset hash table
1127	 */
1128
1129	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
1130	simple_lock(&vm_page_bucket_lock);
1131	if ((this = bucket->pages) == mem) {
1132		/* optimize for common case */
1133
1134		bucket->pages = mem->next;
1135	} else {
1136		register vm_page_t	*prev;
1137
1138		for (prev = &this->next;
1139		     (this = *prev) != mem;
1140		     prev = &this->next)
1141			continue;
1142		*prev = this->next;
1143	}
1144#if     MACH_PAGE_HASH_STATS
1145	bucket->cur_count--;
1146#endif /* MACH_PAGE_HASH_STATS */
1147	simple_unlock(&vm_page_bucket_lock);
1148
1149	/*
1150	 *	Now remove from the object's list of backed pages.
1151	 */
1152
1153	VM_PAGE_REMOVE(mem);
1154
1155	/*
1156	 *	And show that the object has one fewer resident
1157	 *	page.
1158	 */
1159
1160	mem->object->resident_page_count--;
1161
1162	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1163		assert(vm_page_purgeable_count > 0);
1164		vm_page_purgeable_count--;
1165	}
1166	mem->tabled = FALSE;
1167	mem->object = VM_OBJECT_NULL;
1168	mem->offset = (vm_object_offset_t) -1;
1169}
1170
1171/*
1172 *	vm_page_lookup:
1173 *
1174 *	Returns the page associated with the object/offset
1175 *	pair specified; if none is found, VM_PAGE_NULL is returned.
1176 *
1177 *	The object must be locked.  No side effects.
1178 */
1179
1180unsigned long vm_page_lookup_hint = 0;
1181unsigned long vm_page_lookup_hint_next = 0;
1182unsigned long vm_page_lookup_hint_prev = 0;
1183unsigned long vm_page_lookup_hint_miss = 0;
1184unsigned long vm_page_lookup_bucket_NULL = 0;
1185unsigned long vm_page_lookup_miss = 0;
1186
1187
1188vm_page_t
1189vm_page_lookup(
1190	register vm_object_t		object,
1191	register vm_object_offset_t	offset)
1192{
1193	register vm_page_t	mem;
1194	register vm_page_bucket_t *bucket;
1195	queue_entry_t		qe;
1196
1197	vm_object_lock_assert_held(object);
1198	mem = object->memq_hint;
1199
1200	if (mem != VM_PAGE_NULL) {
1201		assert(mem->object == object);
1202
1203		if (mem->offset == offset) {
1204			vm_page_lookup_hint++;
1205			return mem;
1206		}
1207		qe = queue_next(&mem->listq);
1208
1209		if (! queue_end(&object->memq, qe)) {
1210			vm_page_t	next_page;
1211
1212			next_page = (vm_page_t) qe;
1213			assert(next_page->object == object);
1214
1215			if (next_page->offset == offset) {
1216				vm_page_lookup_hint_next++;
1217				object->memq_hint = next_page; /* new hint */
1218				return next_page;
1219			}
1220		}
1221		qe = queue_prev(&mem->listq);
1222
1223		if (! queue_end(&object->memq, qe)) {
1224			vm_page_t prev_page;
1225
1226			prev_page = (vm_page_t) qe;
1227			assert(prev_page->object == object);
1228
1229			if (prev_page->offset == offset) {
1230				vm_page_lookup_hint_prev++;
1231				object->memq_hint = prev_page; /* new hint */
1232				return prev_page;
1233			}
1234		}
1235	}
1236	/*
1237	 * Search the hash table for this object/offset pair
1238	 */
1239	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1240
1241	/*
1242	 * since we hold the object lock, we are guaranteed that no
1243	 * new pages can be inserted into this object... this in turn
1244	 * guarantess that the page we're looking for can't exist
1245	 * if the bucket it hashes to is currently NULL even when looked
1246	 * at outside the scope of the hash bucket lock... this is a
1247	 * really cheap optimiztion to avoid taking the lock
1248	 */
1249	if (bucket->pages == VM_PAGE_NULL) {
1250	        vm_page_lookup_bucket_NULL++;
1251
1252	        return (VM_PAGE_NULL);
1253	}
1254	simple_lock(&vm_page_bucket_lock);
1255
1256	for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1257		VM_PAGE_CHECK(mem);
1258		if ((mem->object == object) && (mem->offset == offset))
1259			break;
1260	}
1261	simple_unlock(&vm_page_bucket_lock);
1262
1263	if (mem != VM_PAGE_NULL) {
1264		if (object->memq_hint != VM_PAGE_NULL) {
1265			vm_page_lookup_hint_miss++;
1266		}
1267		assert(mem->object == object);
1268		object->memq_hint = mem;
1269	} else
1270	        vm_page_lookup_miss++;
1271
1272	return(mem);
1273}
1274
1275
1276/*
1277 *	vm_page_rename:
1278 *
1279 *	Move the given memory entry from its
1280 *	current object to the specified target object/offset.
1281 *
1282 *	The object must be locked.
1283 */
1284void
1285vm_page_rename(
1286	register vm_page_t		mem,
1287	register vm_object_t		new_object,
1288	vm_object_offset_t		new_offset,
1289	boolean_t			encrypted_ok)
1290{
1291	assert(mem->object != new_object);
1292
1293	/*
1294	 * ENCRYPTED SWAP:
1295	 * The encryption key is based on the page's memory object
1296	 * (aka "pager") and paging offset.  Moving the page to
1297	 * another VM object changes its "pager" and "paging_offset"
1298	 * so it has to be decrypted first, or we would lose the key.
1299	 *
1300	 * One exception is VM object collapsing, where we transfer pages
1301	 * from one backing object to its parent object.  This operation also
1302	 * transfers the paging information, so the <pager,paging_offset> info
1303	 * should remain consistent.  The caller (vm_object_do_collapse())
1304	 * sets "encrypted_ok" in this case.
1305	 */
1306	if (!encrypted_ok && mem->encrypted) {
1307		panic("vm_page_rename: page %p is encrypted\n", mem);
1308	}
1309
1310	/*
1311	 *	Changes to mem->object require the page lock because
1312	 *	the pageout daemon uses that lock to get the object.
1313	 */
1314
1315        XPR(XPR_VM_PAGE,
1316                "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1317                (integer_t)new_object, (integer_t)new_offset,
1318		(integer_t)mem, 0,0);
1319
1320	vm_page_lockspin_queues();
1321    	vm_page_remove(mem);
1322	vm_page_insert(mem, new_object, new_offset);
1323	vm_page_unlock_queues();
1324}
1325
1326/*
1327 *	vm_page_init:
1328 *
1329 *	Initialize the fields in a new page.
1330 *	This takes a structure with random values and initializes it
1331 *	so that it can be given to vm_page_release or vm_page_insert.
1332 */
1333void
1334vm_page_init(
1335	vm_page_t	mem,
1336	ppnum_t	phys_page)
1337{
1338	assert(phys_page);
1339	*mem = vm_page_template;
1340	mem->phys_page = phys_page;
1341}
1342
1343/*
1344 *	vm_page_grab_fictitious:
1345 *
1346 *	Remove a fictitious page from the free list.
1347 *	Returns VM_PAGE_NULL if there are no free pages.
1348 */
1349int	c_vm_page_grab_fictitious = 0;
1350int	c_vm_page_release_fictitious = 0;
1351int	c_vm_page_more_fictitious = 0;
1352
1353extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
1354
1355vm_page_t
1356vm_page_grab_fictitious_common(
1357	vm_offset_t phys_addr)
1358{
1359	register vm_page_t m;
1360
1361	m = (vm_page_t)zget(vm_page_zone);
1362	if (m) {
1363		vm_page_init(m, phys_addr);
1364		m->fictitious = TRUE;
1365	}
1366
1367	c_vm_page_grab_fictitious++;
1368	return m;
1369}
1370
1371vm_page_t
1372vm_page_grab_fictitious(void)
1373{
1374	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1375}
1376
1377vm_page_t
1378vm_page_grab_guard(void)
1379{
1380	return vm_page_grab_fictitious_common(vm_page_guard_addr);
1381}
1382
1383/*
1384 *	vm_page_release_fictitious:
1385 *
1386 *	Release a fictitious page to the free list.
1387 */
1388
1389void
1390vm_page_release_fictitious(
1391	register vm_page_t m)
1392{
1393	assert(!m->free);
1394	assert(m->busy);
1395	assert(m->fictitious);
1396	assert(m->phys_page == vm_page_fictitious_addr ||
1397	       m->phys_page == vm_page_guard_addr);
1398
1399	c_vm_page_release_fictitious++;
1400#if DEBUG
1401	if (m->free)
1402		panic("vm_page_release_fictitious");
1403#endif
1404	m->free = TRUE;
1405	zfree(vm_page_zone, m);
1406}
1407
1408/*
1409 *	vm_page_more_fictitious:
1410 *
1411 *	Add more fictitious pages to the free list.
1412 *	Allowed to block. This routine is way intimate
1413 *	with the zones code, for several reasons:
1414 *	1. we need to carve some page structures out of physical
1415 *	   memory before zones work, so they _cannot_ come from
1416 *	   the zone_map.
1417 *	2. the zone needs to be collectable in order to prevent
1418 *	   growth without bound. These structures are used by
1419 *	   the device pager (by the hundreds and thousands), as
1420 *	   private pages for pageout, and as blocking pages for
1421 *	   pagein. Temporary bursts in demand should not result in
1422 *	   permanent allocation of a resource.
1423 *	3. To smooth allocation humps, we allocate single pages
1424 *	   with kernel_memory_allocate(), and cram them into the
1425 *	   zone. This also allows us to initialize the vm_page_t's
1426 *	   on the way into the zone, so that zget() always returns
1427 *	   an initialized structure. The zone free element pointer
1428 *	   and the free page pointer are both the first item in the
1429 *	   vm_page_t.
1430 *	4. By having the pages in the zone pre-initialized, we need
1431 *	   not keep 2 levels of lists. The garbage collector simply
1432 *	   scans our list, and reduces physical memory usage as it
1433 *	   sees fit.
1434 */
1435
1436void vm_page_more_fictitious(void)
1437{
1438	register vm_page_t m;
1439	vm_offset_t addr;
1440	kern_return_t retval;
1441	int i;
1442
1443	c_vm_page_more_fictitious++;
1444
1445	/*
1446	 * Allocate a single page from the zone_map. Do not wait if no physical
1447	 * pages are immediately available, and do not zero the space. We need
1448	 * our own blocking lock here to prevent having multiple,
1449	 * simultaneous requests from piling up on the zone_map lock. Exactly
1450	 * one (of our) threads should be potentially waiting on the map lock.
1451	 * If winner is not vm-privileged, then the page allocation will fail,
1452	 * and it will temporarily block here in the vm_page_wait().
1453	 */
1454	mutex_lock(&vm_page_alloc_lock);
1455	/*
1456	 * If another thread allocated space, just bail out now.
1457	 */
1458	if (zone_free_count(vm_page_zone) > 5) {
1459		/*
1460		 * The number "5" is a small number that is larger than the
1461		 * number of fictitious pages that any single caller will
1462		 * attempt to allocate. Otherwise, a thread will attempt to
1463		 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1464		 * release all of the resources and locks already acquired,
1465		 * and then call this routine. This routine finds the pages
1466		 * that the caller released, so fails to allocate new space.
1467		 * The process repeats infinitely. The largest known number
1468		 * of fictitious pages required in this manner is 2. 5 is
1469		 * simply a somewhat larger number.
1470		 */
1471		mutex_unlock(&vm_page_alloc_lock);
1472		return;
1473	}
1474
1475	retval = kernel_memory_allocate(zone_map,
1476					&addr, PAGE_SIZE, VM_PROT_ALL,
1477					KMA_KOBJECT|KMA_NOPAGEWAIT);
1478	if (retval != KERN_SUCCESS) {
1479		/*
1480		 * No page was available. Tell the pageout daemon, drop the
1481		 * lock to give another thread a chance at it, and
1482		 * wait for the pageout daemon to make progress.
1483		 */
1484		mutex_unlock(&vm_page_alloc_lock);
1485		vm_page_wait(THREAD_UNINT);
1486		return;
1487	}
1488	/*
1489	 * Initialize as many vm_page_t's as will fit on this page. This
1490	 * depends on the zone code disturbing ONLY the first item of
1491	 * each zone element.
1492	 */
1493	m = (vm_page_t)addr;
1494	for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1495		vm_page_init(m, vm_page_fictitious_addr);
1496		m->fictitious = TRUE;
1497		m++;
1498	}
1499	zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1500	mutex_unlock(&vm_page_alloc_lock);
1501}
1502
1503
1504/*
1505 *	vm_pool_low():
1506 *
1507 *	Return true if it is not likely that a non-vm_privileged thread
1508 *	can get memory without blocking.  Advisory only, since the
1509 *	situation may change under us.
1510 */
1511int
1512vm_pool_low(void)
1513{
1514	/* No locking, at worst we will fib. */
1515	return( vm_page_free_count < vm_page_free_reserved );
1516}
1517
1518
1519
1520/*
1521 * this is an interface to support bring-up of drivers
1522 * on platforms with physical memory > 4G...
1523 */
1524int		vm_himemory_mode = 0;
1525
1526
1527/*
1528 * this interface exists to support hardware controllers
1529 * incapable of generating DMAs with more than 32 bits
1530 * of address on platforms with physical memory > 4G...
1531 */
1532unsigned int	vm_lopage_free_count = 0;
1533unsigned int	vm_lopage_max_count = 0;
1534queue_head_t	vm_lopage_queue_free;
1535
1536vm_page_t
1537vm_page_grablo(void)
1538{
1539	register vm_page_t	mem;
1540	unsigned int vm_lopage_alloc_count;
1541
1542	if (vm_lopage_poolsize == 0)
1543	        return (vm_page_grab());
1544
1545	mutex_lock(&vm_page_queue_free_lock);
1546
1547	if (! queue_empty(&vm_lopage_queue_free)) {
1548		queue_remove_first(&vm_lopage_queue_free,
1549				   mem,
1550				   vm_page_t,
1551				   pageq);
1552		assert(mem->free);
1553		assert(mem->busy);
1554		assert(!mem->pmapped);
1555		assert(!mem->wpmapped);
1556
1557		mem->pageq.next = NULL;
1558		mem->pageq.prev = NULL;
1559		mem->free = FALSE;
1560
1561		vm_lopage_free_count--;
1562		vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1563		if (vm_lopage_alloc_count > vm_lopage_max_count)
1564			vm_lopage_max_count = vm_lopage_alloc_count;
1565	} else {
1566		mem = VM_PAGE_NULL;
1567	}
1568	mutex_unlock(&vm_page_queue_free_lock);
1569
1570	return (mem);
1571}
1572
1573
1574/*
1575 *	vm_page_grab:
1576 *
1577 *	first try to grab a page from the per-cpu free list...
1578 *	this must be done while pre-emption is disabled... if
1579 * 	a page is available, we're done...
1580 *	if no page is available, grab the vm_page_queue_free_lock
1581 *	and see if current number of free pages would allow us
1582 * 	to grab at least 1... if not, return VM_PAGE_NULL as before...
1583 *	if there are pages available, disable preemption and
1584 * 	recheck the state of the per-cpu free list... we could
1585 *	have been preempted and moved to a different cpu, or
1586 * 	some other thread could have re-filled it... if still
1587 *	empty, figure out how many pages we can steal from the
1588 *	global free queue and move to the per-cpu queue...
1589 *	return 1 of these pages when done... only wakeup the
1590 * 	pageout_scan thread if we moved pages from the global
1591 *	list... no need for the wakeup if we've satisfied the
1592 *	request from the per-cpu queue.
1593 */
1594
1595#define COLOR_GROUPS_TO_STEAL	4
1596
1597
1598vm_page_t
1599vm_page_grab( void )
1600{
1601	vm_page_t	mem;
1602
1603
1604	disable_preemption();
1605
1606	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1607return_page_from_cpu_list:
1608	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1609	        PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1610		mem->pageq.next = NULL;
1611
1612	        enable_preemption();
1613
1614		assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1615		assert(mem->tabled == FALSE);
1616		assert(mem->object == VM_OBJECT_NULL);
1617		assert(!mem->laundry);
1618		assert(!mem->free);
1619		assert(pmap_verify_free(mem->phys_page));
1620		assert(mem->busy);
1621		assert(!mem->encrypted);
1622		assert(!mem->pmapped);
1623		assert(!mem->wpmapped);
1624
1625		return mem;
1626	}
1627	enable_preemption();
1628
1629
1630	mutex_lock(&vm_page_queue_free_lock);
1631
1632	/*
1633	 *	Optionally produce warnings if the wire or gobble
1634	 *	counts exceed some threshold.
1635	 */
1636	if (vm_page_wire_count_warning > 0
1637	    && vm_page_wire_count >= vm_page_wire_count_warning) {
1638		printf("mk: vm_page_grab(): high wired page count of %d\n",
1639			vm_page_wire_count);
1640		assert(vm_page_wire_count < vm_page_wire_count_warning);
1641	}
1642	if (vm_page_gobble_count_warning > 0
1643	    && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1644		printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1645			vm_page_gobble_count);
1646		assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1647	}
1648
1649	/*
1650	 *	Only let privileged threads (involved in pageout)
1651	 *	dip into the reserved pool.
1652	 */
1653	if ((vm_page_free_count < vm_page_free_reserved) &&
1654	    !(current_thread()->options & TH_OPT_VMPRIV)) {
1655		mutex_unlock(&vm_page_queue_free_lock);
1656		mem = VM_PAGE_NULL;
1657	}
1658	else {
1659	       vm_page_t	head;
1660	       vm_page_t	tail;
1661	       unsigned int	pages_to_steal;
1662	       unsigned int	color;
1663
1664	       while ( vm_page_free_count == 0 ) {
1665
1666			mutex_unlock(&vm_page_queue_free_lock);
1667			/*
1668			 * must be a privileged thread to be
1669			 * in this state since a non-privileged
1670			 * thread would have bailed if we were
1671			 * under the vm_page_free_reserved mark
1672			 */
1673			VM_PAGE_WAIT();
1674			mutex_lock(&vm_page_queue_free_lock);
1675		}
1676
1677		disable_preemption();
1678
1679		if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1680			mutex_unlock(&vm_page_queue_free_lock);
1681
1682		        /*
1683			 * we got preempted and moved to another processor
1684			 * or we got preempted and someone else ran and filled the cache
1685			 */
1686			goto return_page_from_cpu_list;
1687		}
1688		if (vm_page_free_count <= vm_page_free_reserved)
1689		        pages_to_steal = 1;
1690		else {
1691		        pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1692
1693			if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1694			        pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1695		}
1696		color = PROCESSOR_DATA(current_processor(), start_color);
1697		head = tail = NULL;
1698
1699		while (pages_to_steal--) {
1700		        if (--vm_page_free_count < vm_page_free_count_minimum)
1701			        vm_page_free_count_minimum = vm_page_free_count;
1702
1703			while (queue_empty(&vm_page_queue_free[color]))
1704			        color = (color + 1) & vm_color_mask;
1705
1706			queue_remove_first(&vm_page_queue_free[color],
1707					   mem,
1708					   vm_page_t,
1709					   pageq);
1710			mem->pageq.next = NULL;
1711			mem->pageq.prev = NULL;
1712
1713			color = (color + 1) & vm_color_mask;
1714
1715			if (head == NULL)
1716				head = mem;
1717			else
1718			        tail->pageq.next = (queue_t)mem;
1719		        tail = mem;
1720
1721			mem->pageq.prev = NULL;
1722			assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1723			assert(mem->tabled == FALSE);
1724			assert(mem->object == VM_OBJECT_NULL);
1725			assert(!mem->laundry);
1726			assert(mem->free);
1727			mem->free = FALSE;
1728
1729			assert(pmap_verify_free(mem->phys_page));
1730			assert(mem->busy);
1731			assert(!mem->free);
1732			assert(!mem->encrypted);
1733			assert(!mem->pmapped);
1734			assert(!mem->wpmapped);
1735		}
1736		PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1737		PROCESSOR_DATA(current_processor(), start_color) = color;
1738
1739		/*
1740		 * satisfy this request
1741		 */
1742	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1743		mem = head;
1744		mem->pageq.next = NULL;
1745
1746		mutex_unlock(&vm_page_queue_free_lock);
1747
1748		enable_preemption();
1749	}
1750	/*
1751	 *	Decide if we should poke the pageout daemon.
1752	 *	We do this if the free count is less than the low
1753	 *	water mark, or if the free count is less than the high
1754	 *	water mark (but above the low water mark) and the inactive
1755	 *	count is less than its target.
1756	 *
1757	 *	We don't have the counts locked ... if they change a little,
1758	 *	it doesn't really matter.
1759	 */
1760	if ((vm_page_free_count < vm_page_free_min) ||
1761	    ((vm_page_free_count < vm_page_free_target) &&
1762	     ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1763	        thread_wakeup((event_t) &vm_page_free_wanted);
1764
1765#if CONFIG_EMBEDDED
1766	{
1767	int 	percent_avail;
1768
1769	/*
1770	 * Decide if we need to poke the memorystatus notification thread.
1771	 */
1772	percent_avail =
1773		(vm_page_active_count + vm_page_inactive_count +
1774		 vm_page_speculative_count + vm_page_free_count +
1775		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1776		atop_64(max_mem);
1777	if (percent_avail <= (kern_memorystatus_level - 5)) {
1778		kern_memorystatus_level = percent_avail;
1779		thread_wakeup((event_t)&kern_memorystatus_wakeup);
1780	}
1781	}
1782#endif
1783
1784//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);	/* (TEST/DEBUG) */
1785
1786	return mem;
1787}
1788
1789/*
1790 *	vm_page_release:
1791 *
1792 *	Return a page to the free list.
1793 */
1794
1795void
1796vm_page_release(
1797	register vm_page_t	mem)
1798{
1799	unsigned int	color;
1800#if 0
1801	unsigned int pindex;
1802	phys_entry *physent;
1803
1804	physent = mapping_phys_lookup(mem->phys_page, &pindex);		/* (BRINGUP) */
1805	if(physent->ppLink & ppN) {											/* (BRINGUP) */
1806		panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1807	}
1808	physent->ppLink = physent->ppLink | ppN;							/* (BRINGUP) */
1809#endif
1810	assert(!mem->private && !mem->fictitious);
1811
1812//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);	/* (TEST/DEBUG) */
1813
1814	mutex_lock(&vm_page_queue_free_lock);
1815#if DEBUG
1816	if (mem->free)
1817		panic("vm_page_release");
1818#endif
1819	mem->free = TRUE;
1820
1821	assert(mem->busy);
1822	assert(!mem->laundry);
1823	assert(mem->object == VM_OBJECT_NULL);
1824	assert(mem->pageq.next == NULL &&
1825	       mem->pageq.prev == NULL);
1826	assert(mem->listq.next == NULL &&
1827	       mem->listq.prev == NULL);
1828
1829	if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1830	        /*
1831		 * this exists to support hardware controllers
1832		 * incapable of generating DMAs with more than 32 bits
1833		 * of address on platforms with physical memory > 4G...
1834		 */
1835		queue_enter_first(&vm_lopage_queue_free,
1836				  mem,
1837				  vm_page_t,
1838				  pageq);
1839		vm_lopage_free_count++;
1840	} else {
1841	        color = mem->phys_page & vm_color_mask;
1842		queue_enter_first(&vm_page_queue_free[color],
1843				  mem,
1844				  vm_page_t,
1845				  pageq);
1846		vm_page_free_count++;
1847		/*
1848		 *	Check if we should wake up someone waiting for page.
1849		 *	But don't bother waking them unless they can allocate.
1850		 *
1851		 *	We wakeup only one thread, to prevent starvation.
1852		 *	Because the scheduling system handles wait queues FIFO,
1853		 *	if we wakeup all waiting threads, one greedy thread
1854		 *	can starve multiple niceguy threads.  When the threads
1855		 *	all wakeup, the greedy threads runs first, grabs the page,
1856		 *	and waits for another page.  It will be the first to run
1857		 *	when the next page is freed.
1858		 *
1859		 *	However, there is a slight danger here.
1860		 *	The thread we wake might not use the free page.
1861		 *	Then the other threads could wait indefinitely
1862		 *	while the page goes unused.  To forestall this,
1863		 *	the pageout daemon will keep making free pages
1864		 *	as long as vm_page_free_wanted is non-zero.
1865		 */
1866
1867		if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
1868		        vm_page_free_wanted_privileged--;
1869			thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
1870		} else if ((vm_page_free_wanted > 0) &&
1871			   (vm_page_free_count >= vm_page_free_reserved)) {
1872		        vm_page_free_wanted--;
1873			thread_wakeup_one((event_t) &vm_page_free_count);
1874		}
1875	}
1876	mutex_unlock(&vm_page_queue_free_lock);
1877
1878#if CONFIG_EMBEDDED
1879	{
1880	int	percent_avail;
1881
1882	/*
1883	 * Decide if we need to poke the memorystatus notification thread.
1884	 * Locking is not a big issue, as only a single thread delivers these.
1885	 */
1886	percent_avail =
1887		(vm_page_active_count + vm_page_inactive_count +
1888		 vm_page_speculative_count + vm_page_free_count +
1889		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
1890		atop_64(max_mem);
1891	if (percent_avail >= (kern_memorystatus_level + 5)) {
1892		kern_memorystatus_level = percent_avail;
1893		thread_wakeup((event_t)&kern_memorystatus_wakeup);
1894	}
1895	}
1896#endif
1897}
1898
1899/*
1900 *	vm_page_wait:
1901 *
1902 *	Wait for a page to become available.
1903 *	If there are plenty of free pages, then we don't sleep.
1904 *
1905 *	Returns:
1906 *		TRUE:  There may be another page, try again
1907 *		FALSE: We were interrupted out of our wait, don't try again
1908 */
1909
1910boolean_t
1911vm_page_wait(
1912	int	interruptible )
1913{
1914	/*
1915	 *	We can't use vm_page_free_reserved to make this
1916	 *	determination.  Consider: some thread might
1917	 *	need to allocate two pages.  The first allocation
1918	 *	succeeds, the second fails.  After the first page is freed,
1919	 *	a call to vm_page_wait must really block.
1920	 */
1921	kern_return_t	wait_result;
1922	int          	need_wakeup = 0;
1923	int		is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1924
1925	mutex_lock(&vm_page_queue_free_lock);
1926
1927	if (is_privileged && vm_page_free_count) {
1928		mutex_unlock(&vm_page_queue_free_lock);
1929		return TRUE;
1930	}
1931	if (vm_page_free_count < vm_page_free_target) {
1932
1933	        if (is_privileged) {
1934		        if (vm_page_free_wanted_privileged++ == 0)
1935			        need_wakeup = 1;
1936			wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
1937		} else {
1938		        if (vm_page_free_wanted++ == 0)
1939			        need_wakeup = 1;
1940			wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
1941		}
1942		mutex_unlock(&vm_page_queue_free_lock);
1943		counter(c_vm_page_wait_block++);
1944
1945		if (need_wakeup)
1946			thread_wakeup((event_t)&vm_page_free_wanted);
1947
1948		if (wait_result == THREAD_WAITING)
1949			wait_result = thread_block(THREAD_CONTINUE_NULL);
1950
1951		return(wait_result == THREAD_AWAKENED);
1952	} else {
1953		mutex_unlock(&vm_page_queue_free_lock);
1954		return TRUE;
1955	}
1956}
1957
1958/*
1959 *	vm_page_alloc:
1960 *
1961 *	Allocate and return a memory cell associated
1962 *	with this VM object/offset pair.
1963 *
1964 *	Object must be locked.
1965 */
1966
1967vm_page_t
1968vm_page_alloc(
1969	vm_object_t		object,
1970	vm_object_offset_t	offset)
1971{
1972	register vm_page_t	mem;
1973
1974	vm_object_lock_assert_exclusive(object);
1975	mem = vm_page_grab();
1976	if (mem == VM_PAGE_NULL)
1977		return VM_PAGE_NULL;
1978
1979	vm_page_insert(mem, object, offset);
1980
1981	return(mem);
1982}
1983
1984vm_page_t
1985vm_page_alloclo(
1986	vm_object_t		object,
1987	vm_object_offset_t	offset)
1988{
1989	register vm_page_t	mem;
1990
1991	vm_object_lock_assert_exclusive(object);
1992	mem = vm_page_grablo();
1993	if (mem == VM_PAGE_NULL)
1994		return VM_PAGE_NULL;
1995
1996	vm_page_insert(mem, object, offset);
1997
1998	return(mem);
1999}
2000
2001
2002/*
2003 *	vm_page_alloc_guard:
2004 *
2005 * 	Allocate a ficticious page which will be used
2006 *	as a guard page.  The page will be inserted into
2007 *	the object and returned to the caller.
2008 */
2009
2010vm_page_t
2011vm_page_alloc_guard(
2012	vm_object_t		object,
2013	vm_object_offset_t	offset)
2014{
2015	register vm_page_t	mem;
2016
2017	vm_object_lock_assert_exclusive(object);
2018	mem = vm_page_grab_guard();
2019	if (mem == VM_PAGE_NULL)
2020		return VM_PAGE_NULL;
2021
2022	vm_page_insert(mem, object, offset);
2023
2024	return(mem);
2025}
2026
2027
2028counter(unsigned int c_laundry_pages_freed = 0;)
2029
2030boolean_t	vm_page_free_verify = TRUE;
2031/*
2032 *	vm_page_free:
2033 *
2034 *	Returns the given page to the free list,
2035 *	disassociating it with any VM object.
2036 *
2037 *	Object and page queues must be locked prior to entry.
2038 */
2039void
2040vm_page_free_prepare(
2041	register vm_page_t	mem)
2042{
2043	VM_PAGE_CHECK(mem);
2044	assert(!mem->free);
2045	assert(!mem->cleaning);
2046	assert(!mem->pageout);
2047
2048#if DEBUG
2049	if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2050		assert(pmap_verify_free(mem->phys_page));
2051	}
2052	if (mem->object)
2053	        vm_object_lock_assert_exclusive(mem->object);
2054	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2055
2056	if (mem->free)
2057	       panic("vm_page_free: freeing page on free list\n");
2058#endif
2059
2060	if (mem->laundry) {
2061		/*
2062		 * We may have to free a page while it's being laundered
2063		 * if we lost its pager (due to a forced unmount, for example).
2064		 * We need to call vm_pageout_throttle_up() before removing
2065		 * the page from its VM object, so that we can find out on
2066		 * which pageout queue the page is.
2067		 */
2068		vm_pageout_throttle_up(mem);
2069		counter(++c_laundry_pages_freed);
2070	}
2071
2072	if (mem->tabled)
2073		vm_page_remove(mem);	/* clears tabled, object, offset */
2074
2075	VM_PAGE_QUEUES_REMOVE(mem);	/* clears active/inactive/throttled/speculative */
2076
2077	if (mem->wire_count) {
2078		if (!mem->private && !mem->fictitious)
2079			vm_page_wire_count--;
2080		mem->wire_count = 0;
2081		assert(!mem->gobbled);
2082	} else if (mem->gobbled) {
2083		if (!mem->private && !mem->fictitious)
2084			vm_page_wire_count--;
2085		vm_page_gobble_count--;
2086	}
2087	mem->gobbled = FALSE;
2088
2089	PAGE_WAKEUP(mem);	/* clears wanted */
2090
2091	/* Some of these may be unnecessary */
2092	mem->busy = TRUE;
2093	mem->absent = FALSE;
2094	mem->error = FALSE;
2095	mem->dirty = FALSE;
2096	mem->precious = FALSE;
2097	mem->reference = FALSE;
2098	mem->encrypted = FALSE;
2099	mem->encrypted_cleaning = FALSE;
2100	mem->deactivated = FALSE;
2101	mem->pmapped = FALSE;
2102	mem->wpmapped = FALSE;
2103
2104	if (mem->private) {
2105		mem->private = FALSE;
2106		mem->fictitious = TRUE;
2107		mem->phys_page = vm_page_fictitious_addr;
2108	}
2109	if (!mem->fictitious) {
2110		if (mem->zero_fill == TRUE) {
2111			mem->zero_fill = FALSE;
2112		        OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2113		}
2114		vm_page_init(mem, mem->phys_page);
2115	}
2116}
2117
2118void
2119vm_page_free(
2120	vm_page_t	mem)
2121{
2122	vm_page_free_prepare(mem);
2123	if (mem->fictitious) {
2124		vm_page_release_fictitious(mem);
2125	} else {
2126		vm_page_release(mem);
2127	}
2128}
2129
2130/*
2131 * Free a list of pages.  The list can be up to several hundred pages,
2132 * as blocked up by vm_pageout_scan().
2133 * The big win is not having to take the page q and free list locks once
2134 * per page.  We sort the incoming pages into n lists, one for
2135 * each color.
2136 *
2137 * The page queues must be locked, and are kept locked.
2138 */
2139void
2140vm_page_free_list(
2141	vm_page_t	mem)
2142{
2143        vm_page_t	nxt;
2144	int		pg_count = 0;
2145	int		color;
2146	int		inuse_list_head = -1;
2147
2148	queue_head_t	free_list[MAX_COLORS];
2149	int		inuse[MAX_COLORS];
2150
2151	for (color = 0; color < (signed) vm_colors; color++) {
2152		queue_init(&free_list[color]);
2153	}
2154
2155#if DEBUG
2156	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2157#endif
2158	while (mem) {
2159#if DEBUG
2160		if (mem->tabled || mem->object)
2161		        panic("vm_page_free_list: freeing tabled page\n");
2162		if (mem->inactive || mem->active || mem->throttled || mem->free)
2163		        panic("vm_page_free_list: freeing page on list\n");
2164		if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2165			assert(pmap_verify_free(mem->phys_page));
2166		}
2167#endif
2168		assert(mem->pageq.prev == NULL);
2169		assert(mem->busy);
2170		assert(!mem->free);
2171		nxt = (vm_page_t)(mem->pageq.next);
2172
2173		if (!mem->fictitious) {
2174			if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
2175				mem->pageq.next = NULL;
2176				vm_page_release(mem);
2177			} else {
2178				mem->free = TRUE;
2179
2180				color = mem->phys_page & vm_color_mask;
2181				if (queue_empty(&free_list[color])) {
2182					inuse[color] = inuse_list_head;
2183					inuse_list_head = color;
2184				}
2185				queue_enter_first(&free_list[color],
2186						  mem,
2187						  vm_page_t,
2188						  pageq);
2189				pg_count++;
2190			}
2191		} else {
2192			assert(mem->phys_page == vm_page_fictitious_addr ||
2193			       mem->phys_page == vm_page_guard_addr);
2194		        vm_page_release_fictitious(mem);
2195		}
2196		mem = nxt;
2197	}
2198	if (pg_count) {
2199	        unsigned int	avail_free_count;
2200
2201	        mutex_lock(&vm_page_queue_free_lock);
2202
2203		color = inuse_list_head;
2204
2205		while( color != -1 ) {
2206			vm_page_t first, last;
2207			vm_page_t first_free;
2208
2209			first = (vm_page_t) queue_first(&free_list[color]);
2210			last = (vm_page_t) queue_last(&free_list[color]);
2211			first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2212
2213			if (queue_empty(&vm_page_queue_free[color])) {
2214				queue_last(&vm_page_queue_free[color]) =
2215					(queue_entry_t) last;
2216			} else {
2217				queue_prev(&first_free->pageq) =
2218					(queue_entry_t) last;
2219			}
2220			queue_first(&vm_page_queue_free[color]) =
2221				(queue_entry_t) first;
2222			queue_prev(&first->pageq) =
2223				(queue_entry_t) &vm_page_queue_free[color];
2224			queue_next(&last->pageq) =
2225				(queue_entry_t) first_free;
2226			color = inuse[color];
2227		}
2228
2229		vm_page_free_count += pg_count;
2230		avail_free_count = vm_page_free_count;
2231
2232		while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
2233		        vm_page_free_wanted_privileged--;
2234			avail_free_count--;
2235
2236			thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2237		}
2238
2239		if ((vm_page_free_wanted > 0) &&
2240		    (avail_free_count >= vm_page_free_reserved)) {
2241		        unsigned int  available_pages;
2242
2243			if (avail_free_count >= vm_page_free_reserved) {
2244				available_pages = (avail_free_count - vm_page_free_reserved);
2245			} else {
2246				available_pages = 0;
2247			}
2248
2249			if (available_pages >= vm_page_free_wanted) {
2250			        vm_page_free_wanted = 0;
2251				thread_wakeup((event_t) &vm_page_free_count);
2252			} else {
2253			        while (available_pages--) {
2254				        vm_page_free_wanted--;
2255					thread_wakeup_one((event_t) &vm_page_free_count);
2256				}
2257			}
2258		}
2259		mutex_unlock(&vm_page_queue_free_lock);
2260
2261#if CONFIG_EMBEDDED
2262		{
2263		int percent_avail;
2264
2265		/*
2266		 * Decide if we need to poke the memorystatus notification thread.
2267		 */
2268		percent_avail =
2269			(vm_page_active_count + vm_page_inactive_count +
2270			 vm_page_speculative_count + vm_page_free_count +
2271			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
2272			atop_64(max_mem);
2273		if (percent_avail >= (kern_memorystatus_level + 5)) {
2274			kern_memorystatus_level = percent_avail;
2275			thread_wakeup((event_t)&kern_memorystatus_wakeup);
2276		}
2277		}
2278#endif
2279	}
2280}
2281
2282
2283/*
2284 *	vm_page_wire:
2285 *
2286 *	Mark this page as wired down by yet
2287 *	another map, removing it from paging queues
2288 *	as necessary.
2289 *
2290 *	The page's object and the page queues must be locked.
2291 */
2292void
2293vm_page_wire(
2294	register vm_page_t	mem)
2295{
2296
2297//	dbgLog(current_thread(), mem->offset, mem->object, 1);	/* (TEST/DEBUG) */
2298
2299	VM_PAGE_CHECK(mem);
2300#if DEBUG
2301	if (mem->object)
2302	        vm_object_lock_assert_exclusive(mem->object);
2303	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2304#endif
2305	if (mem->wire_count == 0) {
2306		VM_PAGE_QUEUES_REMOVE(mem);
2307		if (!mem->private && !mem->fictitious && !mem->gobbled)
2308			vm_page_wire_count++;
2309		if (mem->gobbled)
2310			vm_page_gobble_count--;
2311		mem->gobbled = FALSE;
2312		if (mem->zero_fill == TRUE) {
2313			mem->zero_fill = FALSE;
2314		        OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
2315		}
2316#if CONFIG_EMBEDDED
2317		{
2318		int 	percent_avail;
2319
2320		/*
2321		 * Decide if we need to poke the memorystatus notification thread.
2322		 */
2323		percent_avail =
2324			(vm_page_active_count + vm_page_inactive_count +
2325			 vm_page_speculative_count + vm_page_free_count +
2326			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2327			atop_64(max_mem);
2328		if (percent_avail <= (kern_memorystatus_level - 5)) {
2329			kern_memorystatus_level = percent_avail;
2330			thread_wakeup((event_t)&kern_memorystatus_wakeup);
2331		}
2332		}
2333#endif
2334		/*
2335		 * ENCRYPTED SWAP:
2336		 * The page could be encrypted, but
2337		 * We don't have to decrypt it here
2338		 * because we don't guarantee that the
2339		 * data is actually valid at this point.
2340		 * The page will get decrypted in
2341		 * vm_fault_wire() if needed.
2342		 */
2343	}
2344	assert(!mem->gobbled);
2345	mem->wire_count++;
2346}
2347
2348/*
2349 *      vm_page_gobble:
2350 *
2351 *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2352 *
2353 *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2354 */
2355void
2356vm_page_gobble(
2357        register vm_page_t      mem)
2358{
2359        vm_page_lockspin_queues();
2360        VM_PAGE_CHECK(mem);
2361
2362	assert(!mem->gobbled);
2363	assert(mem->wire_count == 0);
2364
2365        if (!mem->gobbled && mem->wire_count == 0) {
2366                if (!mem->private && !mem->fictitious)
2367                        vm_page_wire_count++;
2368        }
2369	vm_page_gobble_count++;
2370        mem->gobbled = TRUE;
2371        vm_page_unlock_queues();
2372}
2373
2374/*
2375 *	vm_page_unwire:
2376 *
2377 *	Release one wiring of this page, potentially
2378 *	enabling it to be paged again.
2379 *
2380 *	The page's object and the page queues must be locked.
2381 */
2382void
2383vm_page_unwire(
2384	register vm_page_t	mem)
2385{
2386
2387//	dbgLog(current_thread(), mem->offset, mem->object, 0);	/* (TEST/DEBUG) */
2388
2389	VM_PAGE_CHECK(mem);
2390	assert(mem->wire_count > 0);
2391#if DEBUG
2392	if (mem->object)
2393	        vm_object_lock_assert_exclusive(mem->object);
2394	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2395#endif
2396	if (--mem->wire_count == 0) {
2397		assert(!mem->private && !mem->fictitious);
2398		vm_page_wire_count--;
2399		assert(!mem->laundry);
2400		assert(mem->object != kernel_object);
2401		assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2402		if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2403			vm_page_deactivate(mem);
2404		} else {
2405			vm_page_activate(mem);
2406		}
2407#if CONFIG_EMBEDDED
2408		{
2409		int 	percent_avail;
2410
2411		/*
2412		 * Decide if we need to poke the memorystatus notification thread.
2413		 */
2414		percent_avail =
2415			(vm_page_active_count + vm_page_inactive_count +
2416			 vm_page_speculative_count + vm_page_free_count +
2417			 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2418			atop_64(max_mem);
2419		if (percent_avail >= (kern_memorystatus_level + 5)) {
2420			kern_memorystatus_level = percent_avail;
2421			thread_wakeup((event_t)&kern_memorystatus_wakeup);
2422		}
2423		}
2424#endif
2425	}
2426}
2427
2428
2429/*
2430 *	vm_page_deactivate:
2431 *
2432 *	Returns the given page to the inactive list,
2433 *	indicating that no physical maps have access
2434 *	to this page.  [Used by the physical mapping system.]
2435 *
2436 *	The page queues must be locked.
2437 */
2438void
2439vm_page_deactivate(
2440	register vm_page_t	m)
2441{
2442        boolean_t rapid_age = FALSE;
2443
2444	VM_PAGE_CHECK(m);
2445	assert(m->object != kernel_object);
2446	assert(m->phys_page != vm_page_guard_addr);
2447
2448//	dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);	/* (TEST/DEBUG) */
2449#if DEBUG
2450	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2451#endif
2452	/*
2453	 *	This page is no longer very interesting.  If it was
2454	 *	interesting (active or inactive/referenced), then we
2455	 *	clear the reference bit and (re)enter it in the
2456	 *	inactive queue.  Note wired pages should not have
2457	 *	their reference bit cleared.
2458	 */
2459	if (m->gobbled) {		/* can this happen? */
2460		assert(m->wire_count == 0);
2461
2462		if (!m->private && !m->fictitious)
2463			vm_page_wire_count--;
2464		vm_page_gobble_count--;
2465		m->gobbled = FALSE;
2466	}
2467	if (m->private || (m->wire_count != 0))
2468		return;
2469
2470	if (m->active && m->deactivated == TRUE) {
2471	        if (!pmap_is_referenced(m->phys_page))
2472		        rapid_age = TRUE;
2473	}
2474	if (rapid_age == FALSE && !m->fictitious && !m->absent)
2475		pmap_clear_reference(m->phys_page);
2476
2477	m->reference = FALSE;
2478	m->deactivated = FALSE;
2479	m->no_cache = FALSE;
2480
2481	if (!m->inactive) {
2482		VM_PAGE_QUEUES_REMOVE(m);
2483
2484		assert(!m->laundry);
2485		assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2486
2487		if (!IP_VALID(memory_manager_default) &&
2488			m->dirty && m->object->internal &&
2489			(m->object->purgable == VM_PURGABLE_DENY ||
2490			 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2491			 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2492			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2493			m->throttled = TRUE;
2494			vm_page_throttled_count++;
2495		} else {
2496			if (rapid_age == TRUE ||
2497			    (!m->fictitious && m->object->named && m->object->ref_count == 1)) {
2498			        vm_page_speculate(m, FALSE);
2499				vm_page_speculative_recreated++;
2500				return;
2501			} else {
2502				if (m->zero_fill) {
2503					queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2504					vm_zf_queue_count++;
2505				} else {
2506					queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2507				}
2508			}
2509			m->inactive = TRUE;
2510			if (!m->fictitious) {
2511			        vm_page_inactive_count++;
2512				token_new_pagecount++;
2513			}
2514		}
2515	}
2516}
2517
2518/*
2519 *	vm_page_activate:
2520 *
2521 *	Put the specified page on the active list (if appropriate).
2522 *
2523 *	The page queues must be locked.
2524 */
2525
2526void
2527vm_page_activate(
2528	register vm_page_t	m)
2529{
2530	VM_PAGE_CHECK(m);
2531#ifdef	FIXME_4778297
2532	assert(m->object != kernel_object);
2533#endif
2534	assert(m->phys_page != vm_page_guard_addr);
2535#if DEBUG
2536	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2537#endif
2538	if (m->gobbled) {
2539		assert(m->wire_count == 0);
2540		if (!m->private && !m->fictitious)
2541			vm_page_wire_count--;
2542		vm_page_gobble_count--;
2543		m->gobbled = FALSE;
2544	}
2545	if (m->private)
2546		return;
2547
2548#if DEBUG
2549	if (m->active)
2550	        panic("vm_page_activate: already active");
2551#endif
2552
2553	if (m->speculative) {
2554		DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2555		DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2556	}
2557
2558	VM_PAGE_QUEUES_REMOVE(m);
2559
2560	if (m->wire_count == 0) {
2561		assert(!m->laundry);
2562		assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2563		if (!IP_VALID(memory_manager_default) &&
2564			!m->fictitious && m->dirty && m->object->internal &&
2565			(m->object->purgable == VM_PURGABLE_DENY ||
2566			 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2567			 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2568			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2569			m->throttled = TRUE;
2570			vm_page_throttled_count++;
2571		} else {
2572			queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2573			m->active = TRUE;
2574			if (!m->fictitious)
2575				vm_page_active_count++;
2576		}
2577		m->reference = TRUE;
2578		m->no_cache = FALSE;
2579	}
2580}
2581
2582
2583/*
2584 *      vm_page_speculate:
2585 *
2586 *      Put the specified page on the speculative list (if appropriate).
2587 *
2588 *      The page queues must be locked.
2589 */
2590void
2591vm_page_speculate(
2592	vm_page_t	m,
2593	boolean_t	new)
2594{
2595        struct vm_speculative_age_q	*aq;
2596
2597	VM_PAGE_CHECK(m);
2598	assert(m->object != kernel_object);
2599	assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
2600	assert(m->phys_page != vm_page_guard_addr);
2601	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2602#if DEBUG
2603	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2604#endif
2605	if (m->wire_count == 0) {
2606	        mach_timespec_t		ts;
2607
2608		clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
2609
2610		if (vm_page_speculative_count == 0) {
2611
2612			speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2613			speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2614
2615			aq = &vm_page_queue_speculative[speculative_age_index];
2616
2617		        /*
2618			 * set the timer to begin a new group
2619			 */
2620			aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2621			aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2622
2623			ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2624		} else {
2625			aq = &vm_page_queue_speculative[speculative_age_index];
2626
2627			if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2628
2629			        speculative_age_index++;
2630
2631				if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2632				        speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2633				if (speculative_age_index == speculative_steal_index) {
2634				        speculative_steal_index = speculative_age_index + 1;
2635
2636					if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2637					        speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2638				}
2639				aq = &vm_page_queue_speculative[speculative_age_index];
2640
2641				if (!queue_empty(&aq->age_q))
2642				        vm_page_speculate_ageit(aq);
2643
2644				aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2645				aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2646
2647				ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2648			}
2649		}
2650		enqueue_tail(&aq->age_q, &m->pageq);
2651		m->speculative = TRUE;
2652		vm_page_speculative_count++;
2653
2654		if (new == TRUE) {
2655		        m->object->pages_created++;
2656			vm_page_speculative_created++;
2657		}
2658	}
2659}
2660
2661
2662/*
2663 * move pages from the specified aging bin to
2664 * the speculative bin that pageout_scan claims from
2665 *
2666 *      The page queues must be locked.
2667 */
2668void
2669vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2670{
2671        struct vm_speculative_age_q	*sq;
2672	vm_page_t	t;
2673
2674	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2675
2676	if (queue_empty(&sq->age_q)) {
2677	        sq->age_q.next = aq->age_q.next;
2678		sq->age_q.prev = aq->age_q.prev;
2679
2680		t = (vm_page_t)sq->age_q.next;
2681		t->pageq.prev = &sq->age_q;
2682
2683		t = (vm_page_t)sq->age_q.prev;
2684		t->pageq.next = &sq->age_q;
2685	} else {
2686	        t = (vm_page_t)sq->age_q.prev;
2687		t->pageq.next = aq->age_q.next;
2688
2689		t = (vm_page_t)aq->age_q.next;
2690		t->pageq.prev = sq->age_q.prev;
2691
2692		t = (vm_page_t)aq->age_q.prev;
2693		t->pageq.next = &sq->age_q;
2694
2695		sq->age_q.prev = aq->age_q.prev;
2696	}
2697	queue_init(&aq->age_q);
2698}
2699
2700
2701void
2702vm_page_lru(
2703	vm_page_t	m)
2704{
2705	VM_PAGE_CHECK(m);
2706	assert(m->object != kernel_object);
2707	assert(m->phys_page != vm_page_guard_addr);
2708
2709#if DEBUG
2710	_mutex_assert(&vm_page_queue_lock, MA_OWNED);
2711#endif
2712	if (m->active || m->reference)
2713		return;
2714
2715	if (m->private || (m->wire_count != 0))
2716		return;
2717
2718	m->no_cache = FALSE;
2719
2720	VM_PAGE_QUEUES_REMOVE(m);
2721
2722	assert(!m->laundry);
2723	assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2724
2725	queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2726	m->inactive = TRUE;
2727
2728        vm_page_inactive_count++;
2729	token_new_pagecount++;
2730}
2731
2732
2733/*
2734 *	vm_page_part_zero_fill:
2735 *
2736 *	Zero-fill a part of the page.
2737 */
2738void
2739vm_page_part_zero_fill(
2740	vm_page_t	m,
2741	vm_offset_t	m_pa,
2742	vm_size_t	len)
2743{
2744	vm_page_t	tmp;
2745
2746	VM_PAGE_CHECK(m);
2747#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
2748	pmap_zero_part_page(m->phys_page, m_pa, len);
2749#else
2750	while (1) {
2751       		tmp = vm_page_grab();
2752		if (tmp == VM_PAGE_NULL) {
2753			vm_page_wait(THREAD_UNINT);
2754			continue;
2755		}
2756		break;
2757	}
2758	vm_page_zero_fill(tmp);
2759	if(m_pa != 0) {
2760		vm_page_part_copy(m, 0, tmp, 0, m_pa);
2761	}
2762	if((m_pa + len) <  PAGE_SIZE) {
2763		vm_page_part_copy(m, m_pa + len, tmp,
2764				m_pa + len, PAGE_SIZE - (m_pa + len));
2765	}
2766	vm_page_copy(tmp,m);
2767	vm_page_lock_queues();
2768	vm_page_free(tmp);
2769	vm_page_unlock_queues();
2770#endif
2771
2772}
2773
2774/*
2775 *	vm_page_zero_fill:
2776 *
2777 *	Zero-fill the specified page.
2778 */
2779void
2780vm_page_zero_fill(
2781	vm_page_t	m)
2782{
2783        XPR(XPR_VM_PAGE,
2784                "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2785                (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
2786
2787	VM_PAGE_CHECK(m);
2788
2789//	dbgTrace(0xAEAEAEAE, m->phys_page, 0);		/* (BRINGUP) */
2790	pmap_zero_page(m->phys_page);
2791}
2792
2793/*
2794 *	vm_page_part_copy:
2795 *
2796 *	copy part of one page to another
2797 */
2798
2799void
2800vm_page_part_copy(
2801	vm_page_t	src_m,
2802	vm_offset_t	src_pa,
2803	vm_page_t	dst_m,
2804	vm_offset_t	dst_pa,
2805	vm_size_t	len)
2806{
2807	VM_PAGE_CHECK(src_m);
2808	VM_PAGE_CHECK(dst_m);
2809
2810	pmap_copy_part_page(src_m->phys_page, src_pa,
2811			dst_m->phys_page, dst_pa, len);
2812}
2813
2814/*
2815 *	vm_page_copy:
2816 *
2817 *	Copy one page to another
2818 *
2819 * ENCRYPTED SWAP:
2820 * The source page should not be encrypted.  The caller should
2821 * make sure the page is decrypted first, if necessary.
2822 */
2823
2824int vm_page_copy_cs_validations = 0;
2825int vm_page_copy_cs_tainted = 0;
2826
2827void
2828vm_page_copy(
2829	vm_page_t	src_m,
2830	vm_page_t	dest_m)
2831{
2832        XPR(XPR_VM_PAGE,
2833        "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2834        (integer_t)src_m->object, src_m->offset,
2835	(integer_t)dest_m->object, dest_m->offset,
2836	0);
2837
2838	VM_PAGE_CHECK(src_m);
2839	VM_PAGE_CHECK(dest_m);
2840
2841	/*
2842	 * ENCRYPTED SWAP:
2843	 * The source page should not be encrypted at this point.
2844	 * The destination page will therefore not contain encrypted
2845	 * data after the copy.
2846	 */
2847	if (src_m->encrypted) {
2848		panic("vm_page_copy: source page %p is encrypted\n", src_m);
2849	}
2850	dest_m->encrypted = FALSE;
2851
2852	if (src_m->object != VM_OBJECT_NULL &&
2853	    src_m->object->code_signed) {
2854		/*
2855		 * We're copying a page from a code-signed object.
2856		 * Whoever ends up mapping the copy page might care about
2857		 * the original page's integrity, so let's validate the
2858		 * source page now.
2859		 */
2860		vm_page_copy_cs_validations++;
2861		vm_page_validate_cs(src_m);
2862	}
2863	/*
2864	 * Propagate the code-signing bits to the copy page.
2865	 */
2866	dest_m->cs_validated = src_m->cs_validated;
2867	dest_m->cs_tainted = src_m->cs_tainted;
2868	if (dest_m->cs_tainted) {
2869		assert(dest_m->cs_validated);
2870		vm_page_copy_cs_tainted++;
2871	}
2872
2873	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
2874}
2875
2876#if MACH_ASSERT
2877/*
2878 *	Check that the list of pages is ordered by
2879 *	ascending physical address and has no holes.
2880 */
2881static int
2882vm_page_verify_contiguous(
2883	vm_page_t	pages,
2884	unsigned int	npages)
2885{
2886	register vm_page_t	m;
2887	unsigned int		page_count;
2888	vm_offset_t		prev_addr;
2889
2890	prev_addr = pages->phys_page;
2891	page_count = 1;
2892	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
2893		if (m->phys_page != prev_addr + 1) {
2894			printf("m %p prev_addr 0x%x, current addr 0x%x\n",
2895			       m, prev_addr, m->phys_page);
2896			printf("pages %p page_count %d\n", pages, page_count);
2897			panic("vm_page_verify_contiguous:  not contiguous!");
2898		}
2899		prev_addr = m->phys_page;
2900		++page_count;
2901	}
2902	if (page_count != npages) {
2903		printf("pages %p actual count 0x%x but requested 0x%x\n",
2904		       pages, page_count, npages);
2905		panic("vm_page_verify_contiguous:  count error");
2906	}
2907	return 1;
2908}
2909#endif	/* MACH_ASSERT */
2910
2911
2912#if MACH_ASSERT
2913/*
2914 *	Check the free lists for proper length etc.
2915 */
2916static void
2917vm_page_verify_free_lists( void )
2918{
2919	unsigned int	color, npages;
2920	vm_page_t	m;
2921	vm_page_t	prev_m;
2922
2923	npages = 0;
2924
2925	mutex_lock(&vm_page_queue_free_lock);
2926
2927	for( color = 0; color < vm_colors; color++ ) {
2928		prev_m = (vm_page_t) &vm_page_queue_free[color];
2929		queue_iterate(&vm_page_queue_free[color],
2930			      m,
2931			      vm_page_t,
2932			      pageq) {
2933			if ((vm_page_t) m->pageq.prev != prev_m)
2934				panic("vm_page_verify_free_lists: corrupted prev ptr");
2935			if ( ! m->free )
2936				panic("vm_page_verify_free_lists: not free");
2937			if ( ! m->busy )
2938				panic("vm_page_verify_free_lists: not busy");
2939			if ( (m->phys_page & vm_color_mask) != color)
2940				panic("vm_page_verify_free_lists: wrong color");
2941			++npages;
2942			prev_m = m;
2943		}
2944	}
2945	if (npages != vm_page_free_count)
2946		panic("vm_page_verify_free_lists:  npages %u free_count %d",
2947		      npages, vm_page_free_count);
2948
2949	mutex_unlock(&vm_page_queue_free_lock);
2950}
2951#endif	/* MACH_ASSERT */
2952
2953
2954
2955/*
2956 *	CONTIGUOUS PAGE ALLOCATION
2957 *	Additional levels of effort:
2958 *		+ consider pages that are currently 'pmapped'
2959 *		    this could be expensive since we'd have
2960 * 		    to ask the pmap layer about there state
2961 *		+ consider dirty pages
2962 * 		    either clean them or
2963 *		    copy them to other locations...
2964 *
2965 *	Find a region large enough to contain at least n pages
2966 *	of contiguous physical memory.
2967 *
2968 *	This is done by traversing the vm_page_t array in a linear fashion
2969 *	we assume that the vm_page_t array has the avaiable physical pages in an
2970 *	ordered, ascending list... this is currently true of all our implementations
2971 * 	and must remain so... there can be 'holes' in the array...  we also can
2972 *	no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
2973 * 	which use to happen via 'vm_page_convert'... that function was no longer
2974 * 	being called and was removed...
2975 *
2976 *	The basic flow consists of stabilizing some of the interesting state of
2977 *	a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
2978 *	sweep at the beginning of the array looking for pages that meet our criterea
2979 *	for a 'stealable' page... currently we are pretty conservative... if the page
2980 *	meets this criterea and is physically contiguous to the previous page in the 'run'
2981 * 	we keep developing it.  If we hit a page that doesn't fit, we reset our state
2982 *	and start to develop a new run... if at this point we've already considered
2983 * 	at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
2984 *	and mutex_pause (which will yield the processor), to keep the latency low w/r
2985 *	to other threads trying to acquire free pages (or move pages from q to q),
2986 *	and then continue from the spot we left off... we only make 1 pass through the
2987 *	array.  Once we have a 'run' that is long enough, we'll go into the loop which
2988 * 	which steals the pages from the queues they're currently on... pages on the free
2989 *	queue can be stolen directly... pages that are on any of the other queues
2990 *	must be removed from the object they are tabled on... this requires taking the
2991 * 	object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
2992 *	or if the state of the page behind the vm_object lock is no longer viable, we'll
2993 *	dump the pages we've currently stolen back to the free list, and pick up our
2994 *	scan from the point where we aborted the 'current' run.
2995 *
2996 *
2997 *	Requirements:
2998 *		- neither vm_page_queue nor vm_free_list lock can be held on entry
2999 *
3000 *	Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3001 *
3002 * Algorithm:
3003 */
3004
3005#define	MAX_CONSIDERED_BEFORE_YIELD	1000
3006
3007
3008#define RESET_STATE_OF_RUN()	\
3009	MACRO_BEGIN		\
3010	prevcontaddr = -2;	\
3011	free_considered = 0;	\
3012	substitute_needed = 0;	\
3013	npages = 0;		\
3014	MACRO_END
3015
3016
3017static vm_page_t
3018vm_page_find_contiguous(
3019	unsigned int	contig_pages,
3020	ppnum_t		max_pnum,
3021	boolean_t	wire)
3022{
3023	vm_page_t	m = NULL;
3024	ppnum_t		prevcontaddr;
3025	unsigned int	npages, considered;
3026	unsigned int	page_idx, start_idx;
3027	int		free_considered, free_available;
3028	int		substitute_needed;
3029#if DEBUG
3030	uint32_t	tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
3031#endif
3032#if MACH_ASSERT
3033	int		yielded = 0;
3034	int		dumped_run = 0;
3035	int		stolen_pages = 0;
3036#endif
3037
3038	if (contig_pages == 0)
3039		return VM_PAGE_NULL;
3040
3041#if MACH_ASSERT
3042	vm_page_verify_free_lists();
3043#endif
3044#if DEBUG
3045	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3046#endif
3047	vm_page_lock_queues();
3048	mutex_lock(&vm_page_queue_free_lock);
3049
3050	RESET_STATE_OF_RUN();
3051
3052	considered = 0;
3053	free_available = vm_page_free_count - vm_page_free_reserved;
3054
3055	for (page_idx = 0, start_idx = 0;
3056	     npages < contig_pages && page_idx < vm_pages_count;
3057	     page_idx++) {
3058retry:
3059		m = &vm_pages[page_idx];
3060
3061		if (max_pnum && m->phys_page > max_pnum) {
3062			/* no more low pages... */
3063			break;
3064		}
3065		if (m->phys_page <= vm_lopage_poolend &&
3066		    m->phys_page >= vm_lopage_poolstart) {
3067			/*
3068			 * don't want to take pages from our
3069			 * reserved pool of low memory
3070			 * so don't consider it which
3071			 * means starting a new run
3072			 */
3073			RESET_STATE_OF_RUN();
3074
3075		} else if (m->wire_count || m->gobbled ||
3076			   m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3077			   m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3078			   m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) {
3079			/*
3080			 * page is in a transient state
3081			 * or a state we don't want to deal
3082			 * with, so don't consider it which
3083			 * means starting a new run
3084			 */
3085			RESET_STATE_OF_RUN();
3086
3087		} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3088			/*
3089			 * page needs to be on one of our queues
3090			 * in order for it to be stable behind the
3091			 * locks we hold at this point...
3092			 * if not, don't consider it which
3093			 * means starting a new run
3094			 */
3095			RESET_STATE_OF_RUN();
3096
3097		} else if (!m->free && (!m->tabled || m->busy)) {
3098			/*
3099			 * pages on the free list are always 'busy'
3100			 * so we couldn't test for 'busy' in the check
3101			 * for the transient states... pages that are
3102			 * 'free' are never 'tabled', so we also couldn't
3103			 * test for 'tabled'.  So we check here to make
3104			 * sure that a non-free page is not busy and is
3105			 * tabled on an object...
3106			 * if not, don't consider it which
3107			 * means starting a new run
3108			 */
3109			RESET_STATE_OF_RUN();
3110
3111		} else {
3112			if (m->phys_page != prevcontaddr + 1) {
3113				npages = 1;
3114				start_idx = page_idx;
3115			} else {
3116				npages++;
3117			}
3118			prevcontaddr = m->phys_page;
3119
3120			if (m->pmapped || m->dirty)
3121				substitute_needed++;
3122
3123			if (m->free) {
3124				free_considered++;
3125			}
3126			if ((free_considered + substitute_needed) > free_available) {
3127				/*
3128				 * if we let this run continue
3129				 * we will end up dropping the vm_page_free_count
3130				 * below the reserve limit... we need to abort
3131				 * this run, but we can at least re-consider this
3132				 * page... thus the jump back to 'retry'
3133				 */
3134				RESET_STATE_OF_RUN();
3135
3136				if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3137					considered++;
3138					goto retry;
3139				}
3140				/*
3141				 * free_available == 0
3142				 * so can't consider any free pages... if
3143				 * we went to retry in this case, we'd
3144				 * get stuck looking at the same page
3145				 * w/o making any forward progress
3146				 * we also want to take this path if we've already
3147				 * reached our limit that controls the lock latency
3148				 */
3149			}
3150		}
3151		if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3152
3153			mutex_unlock(&vm_page_queue_free_lock);
3154			vm_page_unlock_queues();
3155
3156			mutex_pause(0);
3157
3158			vm_page_lock_queues();
3159			mutex_lock(&vm_page_queue_free_lock);
3160
3161			RESET_STATE_OF_RUN();
3162			/*
3163			 * reset our free page limit since we
3164			 * dropped the lock protecting the vm_page_free_queue
3165			 */
3166			free_available = vm_page_free_count - vm_page_free_reserved;
3167			considered = 0;
3168#if MACH_ASSERT
3169			yielded++;
3170#endif
3171			goto retry;
3172		}
3173		considered++;
3174	}
3175	m = VM_PAGE_NULL;
3176
3177	if (npages != contig_pages)
3178		mutex_unlock(&vm_page_queue_free_lock);
3179	else {
3180		vm_page_t	m1;
3181		vm_page_t	m2;
3182		unsigned int	cur_idx;
3183		unsigned int	tmp_start_idx;
3184		vm_object_t	locked_object = VM_OBJECT_NULL;
3185		boolean_t	abort_run = FALSE;
3186
3187		tmp_start_idx = start_idx;
3188
3189		/*
3190		 * first pass through to pull the free pages
3191		 * off of the free queue so that in case we
3192		 * need substitute pages, we won't grab any
3193		 * of the free pages in the run... we'll clear
3194		 * the 'free' bit in the 2nd pass, and even in
3195		 * an abort_run case, we'll collect all of the
3196		 * free pages in this run and return them to the free list
3197		 */
3198		while (start_idx < page_idx) {
3199
3200			m1 = &vm_pages[start_idx++];
3201
3202			if (m1->free) {
3203				unsigned int color;
3204
3205				color = m1->phys_page & vm_color_mask;
3206				queue_remove(&vm_page_queue_free[color],
3207					     m1,
3208					     vm_page_t,
3209					     pageq);
3210
3211				vm_page_free_count--;
3212			}
3213		}
3214		/*
3215		 * adjust global freelist counts
3216		 */
3217		if (vm_page_free_count < vm_page_free_count_minimum)
3218			vm_page_free_count_minimum = vm_page_free_count;
3219
3220		/*
3221		 * we can drop the free queue lock at this point since
3222		 * we've pulled any 'free' candidates off of the list
3223		 * we need it dropped so that we can do a vm_page_grab
3224		 * when substituing for pmapped/dirty pages
3225		 */
3226		mutex_unlock(&vm_page_queue_free_lock);
3227
3228		start_idx = tmp_start_idx;
3229		cur_idx = page_idx - 1;
3230
3231		while (start_idx++ < page_idx) {
3232			/*
3233			 * must go through the list from back to front
3234			 * so that the page list is created in the
3235			 * correct order - low -> high phys addresses
3236			 */
3237			m1 = &vm_pages[cur_idx--];
3238
3239			if (m1->free) {
3240				/*
3241				 * pages have already been removed from
3242				 * the free list in the 1st pass
3243				 */
3244				assert(m1->free);
3245				assert(m1->busy);
3246				assert(!m1->wanted);
3247				assert(!m1->laundry);
3248				m1->free = FALSE;
3249
3250			} else {
3251				vm_object_t object;
3252
3253				if (abort_run == TRUE)
3254					continue;
3255
3256				object = m1->object;
3257
3258				if (object != locked_object) {
3259					if (locked_object) {
3260						vm_object_unlock(locked_object);
3261						locked_object = VM_OBJECT_NULL;
3262					}
3263					if (vm_object_lock_try(object))
3264						locked_object = object;
3265				}
3266				if (locked_object == VM_OBJECT_NULL ||
3267				    (m1->wire_count || m1->gobbled ||
3268				     m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3269				     m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3270				     m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3271
3272					if (locked_object) {
3273						vm_object_unlock(locked_object);
3274						locked_object = VM_OBJECT_NULL;
3275					}
3276					tmp_start_idx = cur_idx;
3277					abort_run = TRUE;
3278					continue;
3279				}
3280				if (m1->pmapped || m1->dirty) {
3281					int refmod;
3282					vm_object_offset_t offset;
3283
3284					m2 = vm_page_grab();
3285
3286					if (m2 == VM_PAGE_NULL) {
3287						if (locked_object) {
3288							vm_object_unlock(locked_object);
3289							locked_object = VM_OBJECT_NULL;
3290						}
3291						tmp_start_idx = cur_idx;
3292						abort_run = TRUE;
3293						continue;
3294					}
3295					if (m1->pmapped)
3296						refmod = pmap_disconnect(m1->phys_page);
3297					else
3298						refmod = 0;
3299					vm_page_copy(m1, m2);
3300
3301					m2->reference = m1->reference;
3302					m2->dirty     = m1->dirty;
3303
3304					if (refmod & VM_MEM_REFERENCED)
3305						m2->reference = TRUE;
3306					if (refmod & VM_MEM_MODIFIED)
3307						m2->dirty = TRUE;
3308					offset = m1->offset;
3309
3310					/*
3311					 * completely cleans up the state
3312					 * of the page so that it is ready
3313					 * to be put onto the free list, or
3314					 * for this purpose it looks like it
3315					 * just came off of the free list
3316					 */
3317					vm_page_free_prepare(m1);
3318
3319					/*
3320					 * make sure we clear the ref/mod state
3321					 * from the pmap layer... else we risk
3322					 * inheriting state from the last time
3323					 * this page was used...
3324					 */
3325					pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3326					/*
3327					 * now put the substitute page on the object
3328					 */
3329					vm_page_insert_internal(m2, locked_object, offset, TRUE);
3330
3331					if (m2->reference)
3332						vm_page_activate(m2);
3333					else
3334						vm_page_deactivate(m2);
3335
3336					PAGE_WAKEUP_DONE(m2);
3337
3338				} else {
3339					/*
3340					 * completely cleans up the state
3341					 * of the page so that it is ready
3342					 * to be put onto the free list, or
3343					 * for this purpose it looks like it
3344					 * just came off of the free list
3345					 */
3346					vm_page_free_prepare(m1);
3347				}
3348#if MACH_ASSERT
3349				stolen_pages++;
3350#endif
3351			}
3352			m1->pageq.next = (queue_entry_t) m;
3353			m1->pageq.prev = NULL;
3354			m = m1;
3355		}
3356		if (locked_object) {
3357			vm_object_unlock(locked_object);
3358			locked_object = VM_OBJECT_NULL;
3359		}
3360
3361		if (abort_run == TRUE) {
3362			if (m != VM_PAGE_NULL) {
3363				vm_page_free_list(m);
3364			}
3365#if MACH_ASSERT
3366			dumped_run++;
3367#endif
3368			/*
3369			 * want the index of the last
3370			 * page in this run that was
3371			 * successfully 'stolen', so back
3372			 * it up 1 for the auto-decrement on use
3373			 * and 1 more to bump back over this page
3374			 */
3375			page_idx = tmp_start_idx + 2;
3376
3377			if (page_idx >= vm_pages_count)
3378				goto done_scanning;
3379
3380			mutex_lock(&vm_page_queue_free_lock);
3381
3382			RESET_STATE_OF_RUN();
3383
3384			/*
3385			 * reset our free page limit since we
3386			 * dropped the lock protecting the vm_page_free_queue
3387			 */
3388			free_available = vm_page_free_count - vm_page_free_reserved;
3389
3390			goto retry;
3391		}
3392
3393		for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
3394
3395			if (wire == TRUE)
3396				m1->wire_count++;
3397			else
3398				m1->gobbled = TRUE;
3399		}
3400		if (wire == FALSE)
3401			vm_page_gobble_count += npages;
3402
3403		/*
3404		 * gobbled pages are also counted as wired pages
3405		 */
3406		vm_page_wire_count += npages;
3407
3408 		assert(vm_page_verify_contiguous(m, npages));
3409	}
3410done_scanning:
3411	vm_page_unlock_queues();
3412
3413#if DEBUG
3414	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
3415
3416	tv_end_sec -= tv_start_sec;
3417	if (tv_end_usec < tv_start_usec) {
3418		tv_end_sec--;
3419		tv_end_usec += 1000000;
3420	}
3421	tv_end_usec -= tv_start_usec;
3422	if (tv_end_usec >= 1000000) {
3423		tv_end_sec++;
3424		tv_end_sec -= 1000000;
3425	}
3426	printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
3427	       contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
3428
3429#endif
3430#if MACH_ASSERT
3431	vm_page_verify_free_lists();
3432#endif
3433	return m;
3434}
3435
3436/*
3437 *	Allocate a list of contiguous, wired pages.
3438 */
3439kern_return_t
3440cpm_allocate(
3441	vm_size_t	size,
3442	vm_page_t	*list,
3443	ppnum_t		max_pnum,
3444	boolean_t	wire)
3445{
3446	vm_page_t		pages;
3447	unsigned int		npages;
3448
3449	if (size % page_size != 0)
3450		return KERN_INVALID_ARGUMENT;
3451
3452	npages = size / page_size;
3453
3454	/*
3455	 *	Obtain a pointer to a subset of the free
3456	 *	list large enough to satisfy the request;
3457	 *	the region will be physically contiguous.
3458	 */
3459	pages = vm_page_find_contiguous(npages, max_pnum, wire);
3460
3461	if (pages == VM_PAGE_NULL)
3462		return KERN_NO_SPACE;
3463	/*
3464	 * determine need for wakeups
3465	 */
3466	if ((vm_page_free_count < vm_page_free_min) ||
3467	    ((vm_page_free_count < vm_page_free_target) &&
3468	     ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
3469		thread_wakeup((event_t) &vm_page_free_wanted);
3470
3471#if CONFIG_EMBEDDED
3472	{
3473	int			percent_avail;
3474
3475	/*
3476	 * Decide if we need to poke the memorystatus notification thread.
3477	 */
3478	percent_avail =
3479		(vm_page_active_count + vm_page_inactive_count +
3480		 vm_page_speculative_count + vm_page_free_count +
3481		 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count)  ) * 100 /
3482		atop_64(max_mem);
3483	if (percent_avail <= (kern_memorystatus_level - 5)) {
3484		kern_memorystatus_level = percent_avail;
3485		thread_wakeup((event_t)&kern_memorystatus_wakeup);
3486	}
3487	}
3488#endif
3489	/*
3490	 *	The CPM pages should now be available and
3491	 *	ordered by ascending physical address.
3492	 */
3493	assert(vm_page_verify_contiguous(pages, npages));
3494
3495	*list = pages;
3496	return KERN_SUCCESS;
3497}
3498
3499
3500#include <mach_vm_debug.h>
3501#if	MACH_VM_DEBUG
3502
3503#include <mach_debug/hash_info.h>
3504#include <vm/vm_debug.h>
3505
3506/*
3507 *	Routine:	vm_page_info
3508 *	Purpose:
3509 *		Return information about the global VP table.
3510 *		Fills the buffer with as much information as possible
3511 *		and returns the desired size of the buffer.
3512 *	Conditions:
3513 *		Nothing locked.  The caller should provide
3514 *		possibly-pageable memory.
3515 */
3516
3517unsigned int
3518vm_page_info(
3519	hash_info_bucket_t *info,
3520	unsigned int count)
3521{
3522	unsigned int i;
3523
3524	if (vm_page_bucket_count < count)
3525		count = vm_page_bucket_count;
3526
3527	for (i = 0; i < count; i++) {
3528		vm_page_bucket_t *bucket = &vm_page_buckets[i];
3529		unsigned int bucket_count = 0;
3530		vm_page_t m;
3531
3532		simple_lock(&vm_page_bucket_lock);
3533		for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
3534			bucket_count++;
3535		simple_unlock(&vm_page_bucket_lock);
3536
3537		/* don't touch pageable memory while holding locks */
3538		info[i].hib_count = bucket_count;
3539	}
3540
3541	return vm_page_bucket_count;
3542}
3543#endif	/* MACH_VM_DEBUG */
3544
3545#include <mach_kdb.h>
3546#if	MACH_KDB
3547
3548#include <ddb/db_output.h>
3549#include <vm/vm_print.h>
3550#define	printf	kdbprintf
3551
3552/*
3553 *	Routine:	vm_page_print [exported]
3554 */
3555void
3556vm_page_print(
3557	db_addr_t	db_addr)
3558{
3559	vm_page_t	p;
3560
3561	p = (vm_page_t) (long) db_addr;
3562
3563	iprintf("page 0x%x\n", p);
3564
3565	db_indent += 2;
3566
3567	iprintf("object=0x%x", p->object);
3568	printf(", offset=0x%x", p->offset);
3569	printf(", wire_count=%d", p->wire_count);
3570
3571	iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
3572		(p->inactive ? "" : "!"),
3573		(p->active ? "" : "!"),
3574		(p->throttled ? "" : "!"),
3575		(p->gobbled ? "" : "!"),
3576		(p->laundry ? "" : "!"),
3577		(p->free ? "" : "!"),
3578		(p->reference ? "" : "!"),
3579		(p->encrypted ? "" : "!"));
3580	iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
3581		(p->busy ? "" : "!"),
3582		(p->wanted ? "" : "!"),
3583		(p->tabled ? "" : "!"),
3584		(p->fictitious ? "" : "!"),
3585		(p->private ? "" : "!"),
3586		(p->precious ? "" : "!"));
3587	iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
3588		(p->absent ? "" : "!"),
3589		(p->error ? "" : "!"),
3590		(p->dirty ? "" : "!"),
3591		(p->cleaning ? "" : "!"),
3592		(p->pageout ? "" : "!"),
3593		(p->clustered ? "" : "!"));
3594	iprintf("%soverwriting, %srestart, %sunusual\n",
3595		(p->overwriting ? "" : "!"),
3596		(p->restart ? "" : "!"),
3597		(p->unusual ? "" : "!"));
3598
3599	iprintf("phys_page=0x%x", p->phys_page);
3600
3601	db_indent -= 2;
3602}
3603#endif	/* MACH_KDB */
3604