1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 *	File:	vm/vm_page.c
60 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 *	Resident memory management module.
63 */
64
65#include <debug.h>
66#include <libkern/OSAtomic.h>
67
68#include <mach/clock_types.h>
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
71#include <mach/sdt.h>
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/kalloc.h>
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h>			/* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
88#include <pexpert/pexpert.h>
89
90#include <vm/vm_protos.h>
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
94#include <IOKit/IOHibernatePrivate.h>
95
96#include <sys/kdebug.h>
97
98#if defined(__arm__)
99#include <arm/mp.h>
100#endif
101
102boolean_t	hibernate_cleaning_in_progress = FALSE;
103boolean_t	vm_page_free_verify = TRUE;
104
105uint32_t	vm_lopage_free_count = 0;
106uint32_t	vm_lopage_free_limit = 0;
107uint32_t	vm_lopage_lowater    = 0;
108boolean_t	vm_lopage_refill = FALSE;
109boolean_t	vm_lopage_needed = FALSE;
110
111lck_mtx_ext_t	vm_page_queue_lock_ext;
112lck_mtx_ext_t	vm_page_queue_free_lock_ext;
113lck_mtx_ext_t	vm_purgeable_queue_lock_ext;
114
115int		speculative_age_index = 0;
116int		speculative_steal_index = 0;
117struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
118
119
120__private_extern__ void		vm_page_init_lck_grp(void);
121
122static void		vm_page_free_prepare(vm_page_t	page);
123static vm_page_t	vm_page_grab_fictitious_common(ppnum_t phys_addr);
124
125
126
127
128/*
129 *	Associated with page of user-allocatable memory is a
130 *	page structure.
131 */
132
133/*
134 *	These variables record the values returned by vm_page_bootstrap,
135 *	for debugging purposes.  The implementation of pmap_steal_memory
136 *	and pmap_startup here also uses them internally.
137 */
138
139vm_offset_t virtual_space_start;
140vm_offset_t virtual_space_end;
141uint32_t	vm_page_pages;
142
143/*
144 *	The vm_page_lookup() routine, which provides for fast
145 *	(virtual memory object, offset) to page lookup, employs
146 *	the following hash table.  The vm_page_{insert,remove}
147 *	routines install and remove associations in the table.
148 *	[This table is often called the virtual-to-physical,
149 *	or VP, table.]
150 */
151typedef struct {
152	vm_page_t	pages;
153#if	MACH_PAGE_HASH_STATS
154	int		cur_count;		/* current count */
155	int		hi_count;		/* high water mark */
156#endif /* MACH_PAGE_HASH_STATS */
157} vm_page_bucket_t;
158
159
160#define BUCKETS_PER_LOCK	16
161
162vm_page_bucket_t *vm_page_buckets;		/* Array of buckets */
163unsigned int	vm_page_bucket_count = 0;	/* How big is array? */
164unsigned int	vm_page_hash_mask;		/* Mask for hash function */
165unsigned int	vm_page_hash_shift;		/* Shift for hash function */
166uint32_t	vm_page_bucket_hash;		/* Basic bucket hash */
167unsigned int	vm_page_bucket_lock_count = 0;		/* How big is array of locks? */
168
169lck_spin_t	*vm_page_bucket_locks;
170
171
172#if	MACH_PAGE_HASH_STATS
173/* This routine is only for debug.  It is intended to be called by
174 * hand by a developer using a kernel debugger.  This routine prints
175 * out vm_page_hash table statistics to the kernel debug console.
176 */
177void
178hash_debug(void)
179{
180	int	i;
181	int	numbuckets = 0;
182	int	highsum = 0;
183	int	maxdepth = 0;
184
185	for (i = 0; i < vm_page_bucket_count; i++) {
186		if (vm_page_buckets[i].hi_count) {
187			numbuckets++;
188			highsum += vm_page_buckets[i].hi_count;
189			if (vm_page_buckets[i].hi_count > maxdepth)
190				maxdepth = vm_page_buckets[i].hi_count;
191		}
192	}
193	printf("Total number of buckets: %d\n", vm_page_bucket_count);
194	printf("Number used buckets:     %d = %d%%\n",
195		numbuckets, 100*numbuckets/vm_page_bucket_count);
196	printf("Number unused buckets:   %d = %d%%\n",
197		vm_page_bucket_count - numbuckets,
198		100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
199	printf("Sum of bucket max depth: %d\n", highsum);
200	printf("Average bucket depth:    %d.%2d\n",
201		highsum/vm_page_bucket_count,
202		highsum%vm_page_bucket_count);
203	printf("Maximum bucket depth:    %d\n", maxdepth);
204}
205#endif /* MACH_PAGE_HASH_STATS */
206
207/*
208 *	The virtual page size is currently implemented as a runtime
209 *	variable, but is constant once initialized using vm_set_page_size.
210 *	This initialization must be done in the machine-dependent
211 *	bootstrap sequence, before calling other machine-independent
212 *	initializations.
213 *
214 *	All references to the virtual page size outside this
215 *	module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
216 *	constants.
217 */
218vm_size_t	page_size  = PAGE_SIZE;
219vm_size_t	page_mask  = PAGE_MASK;
220int		page_shift = PAGE_SHIFT;
221
222/*
223 *	Resident page structures are initialized from
224 *	a template (see vm_page_alloc).
225 *
226 *	When adding a new field to the virtual memory
227 *	object structure, be sure to add initialization
228 *	(see vm_page_bootstrap).
229 */
230struct vm_page	vm_page_template;
231
232vm_page_t	vm_pages = VM_PAGE_NULL;
233unsigned int	vm_pages_count = 0;
234ppnum_t		vm_page_lowest = 0;
235
236/*
237 *	Resident pages that represent real memory
238 *	are allocated from a set of free lists,
239 *	one per color.
240 */
241unsigned int	vm_colors;
242unsigned int    vm_color_mask;			/* mask is == (vm_colors-1) */
243unsigned int	vm_cache_geometry_colors = 0;	/* set by hw dependent code during startup */
244queue_head_t	vm_page_queue_free[MAX_COLORS];
245unsigned int	vm_page_free_wanted;
246unsigned int	vm_page_free_wanted_privileged;
247unsigned int	vm_page_free_count;
248unsigned int	vm_page_fictitious_count;
249
250unsigned int	vm_page_free_count_minimum;	/* debugging */
251
252/*
253 *	Occasionally, the virtual memory system uses
254 *	resident page structures that do not refer to
255 *	real pages, for example to leave a page with
256 *	important state information in the VP table.
257 *
258 *	These page structures are allocated the way
259 *	most other kernel structures are.
260 */
261zone_t	vm_page_zone;
262vm_locks_array_t vm_page_locks;
263decl_lck_mtx_data(,vm_page_alloc_lock)
264lck_mtx_ext_t vm_page_alloc_lock_ext;
265
266unsigned int io_throttle_zero_fill;
267
268unsigned int	vm_page_local_q_count = 0;
269unsigned int	vm_page_local_q_soft_limit = 250;
270unsigned int	vm_page_local_q_hard_limit = 500;
271struct vplq     *vm_page_local_q = NULL;
272
273/* N.B. Guard and fictitious pages must not
274 * be assigned a zero phys_page value.
275 */
276/*
277 *	Fictitious pages don't have a physical address,
278 *	but we must initialize phys_page to something.
279 *	For debugging, this should be a strange value
280 *	that the pmap module can recognize in assertions.
281 */
282ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
283
284/*
285 *	Guard pages are not accessible so they don't
286 * 	need a physical address, but we need to enter
287 *	one in the pmap.
288 *	Let's make it recognizable and make sure that
289 *	we don't use a real physical page with that
290 *	physical address.
291 */
292ppnum_t vm_page_guard_addr = (ppnum_t) -2;
293
294/*
295 *	Resident page structures are also chained on
296 *	queues that are used by the page replacement
297 *	system (pageout daemon).  These queues are
298 *	defined here, but are shared by the pageout
299 *	module.  The inactive queue is broken into
300 *	inactive and zf for convenience as the
301 *	pageout daemon often assignes a higher
302 *	affinity to zf pages
303 */
304queue_head_t	vm_page_queue_active;
305queue_head_t	vm_page_queue_inactive;
306queue_head_t	vm_page_queue_anonymous;	/* inactive memory queue for anonymous pages */
307queue_head_t	vm_page_queue_throttled;
308
309unsigned int	vm_page_active_count;
310unsigned int	vm_page_inactive_count;
311unsigned int	vm_page_anonymous_count;
312unsigned int	vm_page_throttled_count;
313unsigned int	vm_page_speculative_count;
314unsigned int	vm_page_wire_count = 0;
315unsigned int	vm_page_wire_count_initial;
316unsigned int	vm_page_gobble_count = 0;
317unsigned int	vm_page_wire_count_warning = 0;
318unsigned int	vm_page_gobble_count_warning = 0;
319
320unsigned int	vm_page_purgeable_count = 0; /* # of pages purgeable now */
321unsigned int	vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
322uint64_t	vm_page_purged_count = 0;    /* total count of purged pages */
323
324#if DEVELOPMENT || DEBUG
325unsigned int	vm_page_speculative_recreated = 0;
326unsigned int	vm_page_speculative_created = 0;
327unsigned int	vm_page_speculative_used = 0;
328#endif
329
330queue_head_t    vm_page_queue_cleaned;
331
332unsigned int	vm_page_cleaned_count = 0;
333unsigned int	vm_pageout_enqueued_cleaned = 0;
334
335uint64_t	max_valid_dma_address = 0xffffffffffffffffULL;
336ppnum_t		max_valid_low_ppnum = 0xffffffff;
337
338
339/*
340 *	Several page replacement parameters are also
341 *	shared with this module, so that page allocation
342 *	(done here in vm_page_alloc) can trigger the
343 *	pageout daemon.
344 */
345unsigned int	vm_page_free_target = 0;
346unsigned int	vm_page_free_min = 0;
347unsigned int	vm_page_throttle_limit = 0;
348uint32_t	vm_page_creation_throttle = 0;
349unsigned int	vm_page_inactive_target = 0;
350unsigned int   vm_page_anonymous_min = 0;
351unsigned int	vm_page_inactive_min = 0;
352unsigned int	vm_page_free_reserved = 0;
353unsigned int	vm_page_throttle_count = 0;
354
355
356/*
357 *	The VM system has a couple of heuristics for deciding
358 *	that pages are "uninteresting" and should be placed
359 *	on the inactive queue as likely candidates for replacement.
360 *	These variables let the heuristics be controlled at run-time
361 *	to make experimentation easier.
362 */
363
364boolean_t vm_page_deactivate_hint = TRUE;
365
366struct vm_page_stats_reusable vm_page_stats_reusable;
367
368/*
369 *	vm_set_page_size:
370 *
371 *	Sets the page size, perhaps based upon the memory
372 *	size.  Must be called before any use of page-size
373 *	dependent functions.
374 *
375 *	Sets page_shift and page_mask from page_size.
376 */
377void
378vm_set_page_size(void)
379{
380	page_mask = page_size - 1;
381
382	if ((page_mask & page_size) != 0)
383		panic("vm_set_page_size: page size not a power of two");
384
385	for (page_shift = 0; ; page_shift++)
386		if ((1U << page_shift) == page_size)
387			break;
388}
389
390
391/* Called once during statup, once the cache geometry is known.
392 */
393static void
394vm_page_set_colors( void )
395{
396	unsigned int	n, override;
397
398	if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )		/* colors specified as a boot-arg? */
399		n = override;
400	else if ( vm_cache_geometry_colors )			/* do we know what the cache geometry is? */
401		n = vm_cache_geometry_colors;
402	else	n = DEFAULT_COLORS;				/* use default if all else fails */
403
404	if ( n == 0 )
405		n = 1;
406	if ( n > MAX_COLORS )
407		n = MAX_COLORS;
408
409	/* the count must be a power of 2  */
410	if ( ( n & (n - 1)) != 0  )
411		panic("vm_page_set_colors");
412
413	vm_colors = n;
414	vm_color_mask = n - 1;
415}
416
417
418lck_grp_t		vm_page_lck_grp_free;
419lck_grp_t		vm_page_lck_grp_queue;
420lck_grp_t		vm_page_lck_grp_local;
421lck_grp_t		vm_page_lck_grp_purge;
422lck_grp_t		vm_page_lck_grp_alloc;
423lck_grp_t		vm_page_lck_grp_bucket;
424lck_grp_attr_t		vm_page_lck_grp_attr;
425lck_attr_t		vm_page_lck_attr;
426
427
428__private_extern__ void
429vm_page_init_lck_grp(void)
430{
431	/*
432	 * initialze the vm_page lock world
433	 */
434	lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
435	lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
436	lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
437	lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
438	lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
439	lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
440	lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
441	lck_attr_setdefault(&vm_page_lck_attr);
442	lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
443}
444
445void
446vm_page_init_local_q()
447{
448	unsigned int		num_cpus;
449	unsigned int		i;
450	struct vplq     	*t_local_q;
451
452	num_cpus = ml_get_max_cpus();
453
454	/*
455	 * no point in this for a uni-processor system
456	 */
457	if (num_cpus >= 2) {
458		t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
459
460		for (i = 0; i < num_cpus; i++) {
461			struct vpl	*lq;
462
463			lq = &t_local_q[i].vpl_un.vpl;
464			VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
465			queue_init(&lq->vpl_queue);
466			lq->vpl_count = 0;
467		}
468		vm_page_local_q_count = num_cpus;
469
470		vm_page_local_q = (struct vplq *)t_local_q;
471	}
472}
473
474
475/*
476 *	vm_page_bootstrap:
477 *
478 *	Initializes the resident memory module.
479 *
480 *	Allocates memory for the page cells, and
481 *	for the object/offset-to-page hash table headers.
482 *	Each page cell is initialized and placed on the free list.
483 *	Returns the range of available kernel virtual memory.
484 */
485
486void
487vm_page_bootstrap(
488	vm_offset_t		*startp,
489	vm_offset_t		*endp)
490{
491	register vm_page_t	m;
492	unsigned int		i;
493	unsigned int		log1;
494	unsigned int		log2;
495	unsigned int		size;
496
497	/*
498	 *	Initialize the vm_page template.
499	 */
500
501	m = &vm_page_template;
502	bzero(m, sizeof (*m));
503
504	m->pageq.next = NULL;
505	m->pageq.prev = NULL;
506	m->listq.next = NULL;
507	m->listq.prev = NULL;
508	m->next = VM_PAGE_NULL;
509
510	m->object = VM_OBJECT_NULL;		/* reset later */
511	m->offset = (vm_object_offset_t) -1;	/* reset later */
512
513	m->wire_count = 0;
514	m->local = FALSE;
515	m->inactive = FALSE;
516	m->active = FALSE;
517	m->pageout_queue = FALSE;
518	m->speculative = FALSE;
519	m->laundry = FALSE;
520	m->free = FALSE;
521	m->reference = FALSE;
522	m->gobbled = FALSE;
523	m->private = FALSE;
524	m->throttled = FALSE;
525	m->__unused_pageq_bits = 0;
526
527	m->phys_page = 0;		/* reset later */
528
529	m->busy = TRUE;
530	m->wanted = FALSE;
531	m->tabled = FALSE;
532	m->fictitious = FALSE;
533	m->pmapped = FALSE;
534	m->wpmapped = FALSE;
535	m->pageout = FALSE;
536	m->absent = FALSE;
537	m->error = FALSE;
538	m->dirty = FALSE;
539	m->cleaning = FALSE;
540	m->precious = FALSE;
541	m->clustered = FALSE;
542	m->overwriting = FALSE;
543	m->restart = FALSE;
544	m->unusual = FALSE;
545	m->encrypted = FALSE;
546	m->encrypted_cleaning = FALSE;
547	m->cs_validated = FALSE;
548	m->cs_tainted = FALSE;
549	m->no_cache = FALSE;
550	m->reusable = FALSE;
551	m->slid = FALSE;
552	m->was_dirty = FALSE;
553	m->__unused_object_bits = 0;
554
555
556	/*
557	 *	Initialize the page queues.
558	 */
559	vm_page_init_lck_grp();
560
561	lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
562	lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
563	lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
564
565	for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
566		int group;
567
568		purgeable_queues[i].token_q_head = 0;
569		purgeable_queues[i].token_q_tail = 0;
570		for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
571		        queue_init(&purgeable_queues[i].objq[group]);
572
573		purgeable_queues[i].type = i;
574		purgeable_queues[i].new_pages = 0;
575#if MACH_ASSERT
576		purgeable_queues[i].debug_count_tokens = 0;
577		purgeable_queues[i].debug_count_objects = 0;
578#endif
579	};
580
581	for (i = 0; i < MAX_COLORS; i++ )
582		queue_init(&vm_page_queue_free[i]);
583
584	queue_init(&vm_lopage_queue_free);
585	queue_init(&vm_page_queue_active);
586	queue_init(&vm_page_queue_inactive);
587	queue_init(&vm_page_queue_cleaned);
588	queue_init(&vm_page_queue_throttled);
589	queue_init(&vm_page_queue_anonymous);
590
591	for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
592	        queue_init(&vm_page_queue_speculative[i].age_q);
593
594		vm_page_queue_speculative[i].age_ts.tv_sec = 0;
595		vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
596	}
597	vm_page_free_wanted = 0;
598	vm_page_free_wanted_privileged = 0;
599
600	vm_page_set_colors();
601
602	/*
603	 *	Steal memory for the map and zone subsystems.
604	 */
605	zone_steal_memory();
606	vm_map_steal_memory();
607
608	/*
609	 *	Allocate (and initialize) the virtual-to-physical
610	 *	table hash buckets.
611	 *
612	 *	The number of buckets should be a power of two to
613	 *	get a good hash function.  The following computation
614	 *	chooses the first power of two that is greater
615	 *	than the number of physical pages in the system.
616	 */
617
618	if (vm_page_bucket_count == 0) {
619		unsigned int npages = pmap_free_pages();
620
621		vm_page_bucket_count = 1;
622		while (vm_page_bucket_count < npages)
623			vm_page_bucket_count <<= 1;
624	}
625	vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
626
627	vm_page_hash_mask = vm_page_bucket_count - 1;
628
629	/*
630	 *	Calculate object shift value for hashing algorithm:
631	 *		O = log2(sizeof(struct vm_object))
632	 *		B = log2(vm_page_bucket_count)
633	 *	        hash shifts the object left by
634	 *		B/2 - O
635	 */
636	size = vm_page_bucket_count;
637	for (log1 = 0; size > 1; log1++)
638		size /= 2;
639	size = sizeof(struct vm_object);
640	for (log2 = 0; size > 1; log2++)
641		size /= 2;
642	vm_page_hash_shift = log1/2 - log2 + 1;
643
644	vm_page_bucket_hash = 1 << ((log1 + 1) >> 1);		/* Get (ceiling of sqrt of table size) */
645	vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2);		/* Get (ceiling of quadroot of table size) */
646	vm_page_bucket_hash |= 1;							/* Set bit and add 1 - always must be 1 to insure unique series */
647
648	if (vm_page_hash_mask & vm_page_bucket_count)
649		printf("vm_page_bootstrap: WARNING -- strange page hash\n");
650
651	vm_page_buckets = (vm_page_bucket_t *)
652		pmap_steal_memory(vm_page_bucket_count *
653				  sizeof(vm_page_bucket_t));
654
655	vm_page_bucket_locks = (lck_spin_t *)
656		pmap_steal_memory(vm_page_bucket_lock_count *
657				  sizeof(lck_spin_t));
658
659
660	for (i = 0; i < vm_page_bucket_count; i++) {
661		register vm_page_bucket_t *bucket = &vm_page_buckets[i];
662
663		bucket->pages = VM_PAGE_NULL;
664#if     MACH_PAGE_HASH_STATS
665		bucket->cur_count = 0;
666		bucket->hi_count = 0;
667#endif /* MACH_PAGE_HASH_STATS */
668	}
669
670	for (i = 0; i < vm_page_bucket_lock_count; i++)
671	        lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
672
673	/*
674	 *	Machine-dependent code allocates the resident page table.
675	 *	It uses vm_page_init to initialize the page frames.
676	 *	The code also returns to us the virtual space available
677	 *	to the kernel.  We don't trust the pmap module
678	 *	to get the alignment right.
679	 */
680
681
682	pmap_startup(&virtual_space_start, &virtual_space_end);
683	virtual_space_start = round_page(virtual_space_start);
684	virtual_space_end = trunc_page(virtual_space_end);
685
686	*startp = virtual_space_start;
687	*endp = virtual_space_end;
688
689	/*
690	 *	Compute the initial "wire" count.
691	 *	Up until now, the pages which have been set aside are not under
692	 *	the VM system's control, so although they aren't explicitly
693	 *	wired, they nonetheless can't be moved. At this moment,
694	 *	all VM managed pages are "free", courtesy of pmap_startup.
695	 */
696	assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
697	vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count;	/* initial value */
698	vm_page_wire_count_initial = vm_page_wire_count;
699	vm_page_free_count_minimum = vm_page_free_count;
700
701	printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
702	       vm_page_free_count, vm_page_wire_count);
703
704	simple_lock_init(&vm_paging_lock, 0);
705}
706
707#ifndef	MACHINE_PAGES
708/*
709 *	We implement pmap_steal_memory and pmap_startup with the help
710 *	of two simpler functions, pmap_virtual_space and pmap_next_page.
711 */
712
713void *
714pmap_steal_memory(
715	vm_size_t size)
716{
717	vm_offset_t addr, vaddr;
718	ppnum_t	phys_page;
719
720	/*
721	 *	We round the size to a round multiple.
722	 */
723
724	size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
725
726	/*
727	 *	If this is the first call to pmap_steal_memory,
728	 *	we have to initialize ourself.
729	 */
730
731	if (virtual_space_start == virtual_space_end) {
732		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
733
734		/*
735		 *	The initial values must be aligned properly, and
736		 *	we don't trust the pmap module to do it right.
737		 */
738
739		virtual_space_start = round_page(virtual_space_start);
740		virtual_space_end = trunc_page(virtual_space_end);
741	}
742
743	/*
744	 *	Allocate virtual memory for this request.
745	 */
746
747	addr = virtual_space_start;
748	virtual_space_start += size;
749
750	kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size);	/* (TEST/DEBUG) */
751
752	/*
753	 *	Allocate and map physical pages to back new virtual pages.
754	 */
755
756	for (vaddr = round_page(addr);
757	     vaddr < addr + size;
758	     vaddr += PAGE_SIZE) {
759
760		if (!pmap_next_page_hi(&phys_page))
761			panic("pmap_steal_memory");
762
763		/*
764		 *	XXX Logically, these mappings should be wired,
765		 *	but some pmap modules barf if they are.
766		 */
767#if defined(__LP64__)
768		pmap_pre_expand(kernel_pmap, vaddr);
769#endif
770
771		pmap_enter(kernel_pmap, vaddr, phys_page,
772			   VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
773				VM_WIMG_USE_DEFAULT, FALSE);
774		/*
775		 * Account for newly stolen memory
776		 */
777		vm_page_wire_count++;
778
779	}
780
781	return (void *) addr;
782}
783
784void
785pmap_startup(
786	vm_offset_t *startp,
787	vm_offset_t *endp)
788{
789	unsigned int i, npages, pages_initialized, fill, fillval;
790	ppnum_t		phys_page;
791	addr64_t	tmpaddr;
792
793	/*
794	 *	We calculate how many page frames we will have
795	 *	and then allocate the page structures in one chunk.
796	 */
797
798	tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE;	/* Get the amount of memory left */
799	tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start);	/* Account for any slop */
800	npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages)));	/* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
801
802	vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
803
804	/*
805	 *	Initialize the page frames.
806	 */
807	for (i = 0, pages_initialized = 0; i < npages; i++) {
808		if (!pmap_next_page(&phys_page))
809			break;
810		if (pages_initialized == 0 || phys_page < vm_page_lowest)
811			vm_page_lowest = phys_page;
812
813		vm_page_init(&vm_pages[i], phys_page, FALSE);
814		vm_page_pages++;
815		pages_initialized++;
816	}
817	vm_pages_count = pages_initialized;
818
819	/*
820	 * Check if we want to initialize pages to a known value
821	 */
822	fill = 0;								/* Assume no fill */
823	if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;			/* Set fill */
824#if	DEBUG
825	/* This slows down booting the DEBUG kernel, particularly on
826	 * large memory systems, but is worthwhile in deterministically
827	 * trapping uninitialized memory usage.
828	 */
829	if (fill == 0) {
830		fill = 1;
831		fillval = 0xDEB8F177;
832	}
833#endif
834
835	if (fill)
836		kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
837	// -debug code remove
838	if (2 == vm_himemory_mode) {
839		// free low -> high so high is preferred
840		for (i = 1; i <= pages_initialized; i++) {
841			if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
842			vm_page_release(&vm_pages[i - 1]);
843		}
844	}
845	else
846	// debug code remove-
847
848	/*
849	 * Release pages in reverse order so that physical pages
850	 * initially get allocated in ascending addresses. This keeps
851	 * the devices (which must address physical memory) happy if
852	 * they require several consecutive pages.
853	 */
854	for (i = pages_initialized; i > 0; i--) {
855		if(fill) fillPage(vm_pages[i - 1].phys_page, fillval);		/* Fill the page with a know value if requested at boot */
856		vm_page_release(&vm_pages[i - 1]);
857	}
858
859#if 0
860	{
861		vm_page_t xx, xxo, xxl;
862		int i, j, k, l;
863
864		j = 0;													/* (BRINGUP) */
865		xxl = 0;
866
867		for( i = 0; i < vm_colors; i++ ) {
868			queue_iterate(&vm_page_queue_free[i],
869				      xx,
870				      vm_page_t,
871				      pageq) {	/* BRINGUP */
872				j++;												/* (BRINGUP) */
873				if(j > vm_page_free_count) {						/* (BRINGUP) */
874					panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
875				}
876
877				l = vm_page_free_count - j;							/* (BRINGUP) */
878				k = 0;												/* (BRINGUP) */
879
880				if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
881
882				for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) {	/* (BRINGUP) */
883					k++;
884					if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
885					if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) {	/* (BRINGUP) */
886						panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
887					}
888				}
889
890				xxl = xx;
891			}
892		}
893
894		if(j != vm_page_free_count) {						/* (BRINGUP) */
895			panic("pmap_startup: vm_page_free_count does not match, calc =  %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
896		}
897	}
898#endif
899
900
901	/*
902	 *	We have to re-align virtual_space_start,
903	 *	because pmap_steal_memory has been using it.
904	 */
905
906	virtual_space_start = round_page(virtual_space_start);
907
908	*startp = virtual_space_start;
909	*endp = virtual_space_end;
910}
911#endif	/* MACHINE_PAGES */
912
913/*
914 *	Routine:	vm_page_module_init
915 *	Purpose:
916 *		Second initialization pass, to be done after
917 *		the basic VM system is ready.
918 */
919void
920vm_page_module_init(void)
921{
922	vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
923			     0, PAGE_SIZE, "vm pages");
924
925#if	ZONE_DEBUG
926	zone_debug_disable(vm_page_zone);
927#endif	/* ZONE_DEBUG */
928
929	zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
930	zone_change(vm_page_zone, Z_EXPAND, FALSE);
931	zone_change(vm_page_zone, Z_EXHAUST, TRUE);
932	zone_change(vm_page_zone, Z_FOREIGN, TRUE);
933	zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
934        /*
935         * Adjust zone statistics to account for the real pages allocated
936         * in vm_page_create(). [Q: is this really what we want?]
937         */
938        vm_page_zone->count += vm_page_pages;
939        vm_page_zone->sum_count += vm_page_pages;
940        vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
941}
942
943/*
944 *	Routine:	vm_page_create
945 *	Purpose:
946 *		After the VM system is up, machine-dependent code
947 *		may stumble across more physical memory.  For example,
948 *		memory that it was reserving for a frame buffer.
949 *		vm_page_create turns this memory into available pages.
950 */
951
952void
953vm_page_create(
954	ppnum_t start,
955	ppnum_t end)
956{
957	ppnum_t		phys_page;
958	vm_page_t 	m;
959
960	for (phys_page = start;
961	     phys_page < end;
962	     phys_page++) {
963		while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
964			== VM_PAGE_NULL)
965			vm_page_more_fictitious();
966
967		m->fictitious = FALSE;
968		pmap_clear_noencrypt(phys_page);
969
970		vm_page_pages++;
971		vm_page_release(m);
972	}
973}
974
975/*
976 *	vm_page_hash:
977 *
978 *	Distributes the object/offset key pair among hash buckets.
979 *
980 *	NOTE:	The bucket count must be a power of 2
981 */
982#define vm_page_hash(object, offset) (\
983	( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
984	 & vm_page_hash_mask)
985
986
987/*
988 *	vm_page_insert:		[ internal use only ]
989 *
990 *	Inserts the given mem entry into the object/object-page
991 *	table and object list.
992 *
993 *	The object must be locked.
994 */
995void
996vm_page_insert(
997	vm_page_t		mem,
998	vm_object_t		object,
999	vm_object_offset_t	offset)
1000{
1001	vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1002}
1003
1004void
1005vm_page_insert_internal(
1006	vm_page_t		mem,
1007	vm_object_t		object,
1008	vm_object_offset_t	offset,
1009	boolean_t		queues_lock_held,
1010	boolean_t		insert_in_hash,
1011	boolean_t		batch_pmap_op)
1012{
1013	vm_page_bucket_t *bucket;
1014	lck_spin_t	*bucket_lock;
1015	int	hash_id;
1016
1017        XPR(XPR_VM_PAGE,
1018                "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1019                object, offset, mem, 0,0);
1020#if 0
1021	/*
1022	 * we may not hold the page queue lock
1023	 * so this check isn't safe to make
1024	 */
1025	VM_PAGE_CHECK(mem);
1026#endif
1027
1028	if (object == vm_submap_object) {
1029		/* the vm_submap_object is only a placeholder for submaps */
1030		panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1031	}
1032
1033	vm_object_lock_assert_exclusive(object);
1034#if DEBUG
1035	lck_mtx_assert(&vm_page_queue_lock,
1036		       queues_lock_held ? LCK_MTX_ASSERT_OWNED
1037		       			: LCK_MTX_ASSERT_NOTOWNED);
1038#endif	/* DEBUG */
1039
1040	if (insert_in_hash == TRUE) {
1041#if DEBUG
1042		if (mem->tabled || mem->object != VM_OBJECT_NULL)
1043			panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1044			      "already in (obj=%p,off=0x%llx)",
1045			      mem, object, offset, mem->object, mem->offset);
1046#endif
1047		assert(!object->internal || offset < object->vo_size);
1048
1049		/* only insert "pageout" pages into "pageout" objects,
1050		 * and normal pages into normal objects */
1051		assert(object->pageout == mem->pageout);
1052
1053		assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1054
1055		/*
1056		 *	Record the object/offset pair in this page
1057		 */
1058
1059		mem->object = object;
1060		mem->offset = offset;
1061
1062		/*
1063		 *	Insert it into the object_object/offset hash table
1064		 */
1065		hash_id = vm_page_hash(object, offset);
1066		bucket = &vm_page_buckets[hash_id];
1067		bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1068
1069		lck_spin_lock(bucket_lock);
1070
1071		mem->next = bucket->pages;
1072		bucket->pages = mem;
1073#if     MACH_PAGE_HASH_STATS
1074		if (++bucket->cur_count > bucket->hi_count)
1075			bucket->hi_count = bucket->cur_count;
1076#endif /* MACH_PAGE_HASH_STATS */
1077
1078		lck_spin_unlock(bucket_lock);
1079	}
1080
1081	{
1082		unsigned int    cache_attr;
1083
1084		cache_attr = object->wimg_bits & VM_WIMG_MASK;
1085
1086		if (cache_attr != VM_WIMG_USE_DEFAULT) {
1087			PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1088		}
1089	}
1090	/*
1091	 *	Now link into the object's list of backed pages.
1092	 */
1093
1094	VM_PAGE_INSERT(mem, object);
1095	mem->tabled = TRUE;
1096
1097	/*
1098	 *	Show that the object has one more resident page.
1099	 */
1100
1101	object->resident_page_count++;
1102	if (VM_PAGE_WIRED(mem)) {
1103		object->wired_page_count++;
1104	}
1105	assert(object->resident_page_count >= object->wired_page_count);
1106
1107	assert(!mem->reusable);
1108
1109	if (object->purgable == VM_PURGABLE_VOLATILE) {
1110		if (VM_PAGE_WIRED(mem)) {
1111			OSAddAtomic(1, &vm_page_purgeable_wired_count);
1112		} else {
1113			OSAddAtomic(1, &vm_page_purgeable_count);
1114		}
1115	} else if (object->purgable == VM_PURGABLE_EMPTY &&
1116		   mem->throttled) {
1117		/*
1118		 * This page belongs to a purged VM object but hasn't
1119		 * been purged (because it was "busy").
1120		 * It's in the "throttled" queue and hence not
1121		 * visible to vm_pageout_scan().  Move it to a pageable
1122		 * queue, so that it can eventually be reclaimed, instead
1123		 * of lingering in the "empty" object.
1124		 */
1125		if (queues_lock_held == FALSE)
1126			vm_page_lockspin_queues();
1127		vm_page_deactivate(mem);
1128		if (queues_lock_held == FALSE)
1129			vm_page_unlock_queues();
1130	}
1131}
1132
1133/*
1134 *	vm_page_replace:
1135 *
1136 *	Exactly like vm_page_insert, except that we first
1137 *	remove any existing page at the given offset in object.
1138 *
1139 *	The object must be locked.
1140 */
1141void
1142vm_page_replace(
1143	register vm_page_t		mem,
1144	register vm_object_t		object,
1145	register vm_object_offset_t	offset)
1146{
1147	vm_page_bucket_t *bucket;
1148	vm_page_t	 found_m = VM_PAGE_NULL;
1149	lck_spin_t	*bucket_lock;
1150	int		hash_id;
1151
1152#if 0
1153	/*
1154	 * we don't hold the page queue lock
1155	 * so this check isn't safe to make
1156	 */
1157	VM_PAGE_CHECK(mem);
1158#endif
1159	vm_object_lock_assert_exclusive(object);
1160#if DEBUG
1161	if (mem->tabled || mem->object != VM_OBJECT_NULL)
1162		panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1163		      "already in (obj=%p,off=0x%llx)",
1164		      mem, object, offset, mem->object, mem->offset);
1165	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1166#endif
1167	/*
1168	 *	Record the object/offset pair in this page
1169	 */
1170
1171	mem->object = object;
1172	mem->offset = offset;
1173
1174	/*
1175	 *	Insert it into the object_object/offset hash table,
1176	 *	replacing any page that might have been there.
1177	 */
1178
1179	hash_id = vm_page_hash(object, offset);
1180	bucket = &vm_page_buckets[hash_id];
1181	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1182
1183	lck_spin_lock(bucket_lock);
1184
1185	if (bucket->pages) {
1186		vm_page_t *mp = &bucket->pages;
1187		vm_page_t m = *mp;
1188
1189		do {
1190			if (m->object == object && m->offset == offset) {
1191				/*
1192				 * Remove old page from hash list
1193				 */
1194				*mp = m->next;
1195
1196				found_m = m;
1197				break;
1198			}
1199			mp = &m->next;
1200		} while ((m = *mp));
1201
1202		mem->next = bucket->pages;
1203	} else {
1204		mem->next = VM_PAGE_NULL;
1205	}
1206	/*
1207	 * insert new page at head of hash list
1208	 */
1209	bucket->pages = mem;
1210
1211	lck_spin_unlock(bucket_lock);
1212
1213	if (found_m) {
1214	        /*
1215		 * there was already a page at the specified
1216		 * offset for this object... remove it from
1217		 * the object and free it back to the free list
1218		 */
1219		vm_page_free_unlocked(found_m, FALSE);
1220	}
1221	vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1222}
1223
1224/*
1225 *	vm_page_remove:		[ internal use only ]
1226 *
1227 *	Removes the given mem entry from the object/offset-page
1228 *	table and the object page list.
1229 *
1230 *	The object must be locked.
1231 */
1232
1233void
1234vm_page_remove(
1235	vm_page_t	mem,
1236	boolean_t	remove_from_hash)
1237{
1238	vm_page_bucket_t *bucket;
1239	vm_page_t	this;
1240	lck_spin_t	*bucket_lock;
1241	int		hash_id;
1242
1243        XPR(XPR_VM_PAGE,
1244                "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1245                mem->object, mem->offset,
1246		mem, 0,0);
1247
1248	vm_object_lock_assert_exclusive(mem->object);
1249	assert(mem->tabled);
1250	assert(!mem->cleaning);
1251	assert(!mem->laundry);
1252#if 0
1253	/*
1254	 * we don't hold the page queue lock
1255	 * so this check isn't safe to make
1256	 */
1257	VM_PAGE_CHECK(mem);
1258#endif
1259	if (remove_from_hash == TRUE) {
1260		/*
1261		 *	Remove from the object_object/offset hash table
1262		 */
1263		hash_id = vm_page_hash(mem->object, mem->offset);
1264		bucket = &vm_page_buckets[hash_id];
1265		bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1266
1267		lck_spin_lock(bucket_lock);
1268
1269		if ((this = bucket->pages) == mem) {
1270			/* optimize for common case */
1271
1272			bucket->pages = mem->next;
1273		} else {
1274			vm_page_t	*prev;
1275
1276			for (prev = &this->next;
1277			     (this = *prev) != mem;
1278			     prev = &this->next)
1279				continue;
1280			*prev = this->next;
1281		}
1282#if     MACH_PAGE_HASH_STATS
1283		bucket->cur_count--;
1284#endif /* MACH_PAGE_HASH_STATS */
1285
1286		lck_spin_unlock(bucket_lock);
1287	}
1288	/*
1289	 *	Now remove from the object's list of backed pages.
1290	 */
1291
1292	VM_PAGE_REMOVE(mem);
1293
1294	/*
1295	 *	And show that the object has one fewer resident
1296	 *	page.
1297	 */
1298
1299	assert(mem->object->resident_page_count > 0);
1300	mem->object->resident_page_count--;
1301
1302	if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1303		if (mem->object->resident_page_count == 0)
1304			vm_object_cache_remove(mem->object);
1305	}
1306
1307	if (VM_PAGE_WIRED(mem)) {
1308		assert(mem->object->wired_page_count > 0);
1309		mem->object->wired_page_count--;
1310	}
1311	assert(mem->object->resident_page_count >=
1312	       mem->object->wired_page_count);
1313	if (mem->reusable) {
1314		assert(mem->object->reusable_page_count > 0);
1315		mem->object->reusable_page_count--;
1316		assert(mem->object->reusable_page_count <=
1317		       mem->object->resident_page_count);
1318		mem->reusable = FALSE;
1319		OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1320		vm_page_stats_reusable.reused_remove++;
1321	} else if (mem->object->all_reusable) {
1322		OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1323		vm_page_stats_reusable.reused_remove++;
1324	}
1325
1326	if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1327		if (VM_PAGE_WIRED(mem)) {
1328			assert(vm_page_purgeable_wired_count > 0);
1329			OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1330		} else {
1331			assert(vm_page_purgeable_count > 0);
1332			OSAddAtomic(-1, &vm_page_purgeable_count);
1333		}
1334	}
1335	if (mem->object->set_cache_attr == TRUE)
1336		pmap_set_cache_attributes(mem->phys_page, 0);
1337
1338	mem->tabled = FALSE;
1339	mem->object = VM_OBJECT_NULL;
1340	mem->offset = (vm_object_offset_t) -1;
1341}
1342
1343
1344/*
1345 *	vm_page_lookup:
1346 *
1347 *	Returns the page associated with the object/offset
1348 *	pair specified; if none is found, VM_PAGE_NULL is returned.
1349 *
1350 *	The object must be locked.  No side effects.
1351 */
1352
1353unsigned long vm_page_lookup_hint = 0;
1354unsigned long vm_page_lookup_hint_next = 0;
1355unsigned long vm_page_lookup_hint_prev = 0;
1356unsigned long vm_page_lookup_hint_miss = 0;
1357unsigned long vm_page_lookup_bucket_NULL = 0;
1358unsigned long vm_page_lookup_miss = 0;
1359
1360
1361vm_page_t
1362vm_page_lookup(
1363	vm_object_t		object,
1364	vm_object_offset_t	offset)
1365{
1366	vm_page_t	mem;
1367	vm_page_bucket_t *bucket;
1368	queue_entry_t	qe;
1369	lck_spin_t	*bucket_lock;
1370	int		hash_id;
1371
1372	vm_object_lock_assert_held(object);
1373	mem = object->memq_hint;
1374
1375	if (mem != VM_PAGE_NULL) {
1376		assert(mem->object == object);
1377
1378		if (mem->offset == offset) {
1379			vm_page_lookup_hint++;
1380			return mem;
1381		}
1382		qe = queue_next(&mem->listq);
1383
1384		if (! queue_end(&object->memq, qe)) {
1385			vm_page_t	next_page;
1386
1387			next_page = (vm_page_t) qe;
1388			assert(next_page->object == object);
1389
1390			if (next_page->offset == offset) {
1391				vm_page_lookup_hint_next++;
1392				object->memq_hint = next_page; /* new hint */
1393				return next_page;
1394			}
1395		}
1396		qe = queue_prev(&mem->listq);
1397
1398		if (! queue_end(&object->memq, qe)) {
1399			vm_page_t prev_page;
1400
1401			prev_page = (vm_page_t) qe;
1402			assert(prev_page->object == object);
1403
1404			if (prev_page->offset == offset) {
1405				vm_page_lookup_hint_prev++;
1406				object->memq_hint = prev_page; /* new hint */
1407				return prev_page;
1408			}
1409		}
1410	}
1411	/*
1412	 * Search the hash table for this object/offset pair
1413	 */
1414	hash_id = vm_page_hash(object, offset);
1415	bucket = &vm_page_buckets[hash_id];
1416
1417	/*
1418	 * since we hold the object lock, we are guaranteed that no
1419	 * new pages can be inserted into this object... this in turn
1420	 * guarantess that the page we're looking for can't exist
1421	 * if the bucket it hashes to is currently NULL even when looked
1422	 * at outside the scope of the hash bucket lock... this is a
1423	 * really cheap optimiztion to avoid taking the lock
1424	 */
1425	if (bucket->pages == VM_PAGE_NULL) {
1426	        vm_page_lookup_bucket_NULL++;
1427
1428	        return (VM_PAGE_NULL);
1429	}
1430	bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1431
1432	lck_spin_lock(bucket_lock);
1433
1434	for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1435#if 0
1436		/*
1437		 * we don't hold the page queue lock
1438		 * so this check isn't safe to make
1439		 */
1440		VM_PAGE_CHECK(mem);
1441#endif
1442		if ((mem->object == object) && (mem->offset == offset))
1443			break;
1444	}
1445	lck_spin_unlock(bucket_lock);
1446
1447	if (mem != VM_PAGE_NULL) {
1448		if (object->memq_hint != VM_PAGE_NULL) {
1449			vm_page_lookup_hint_miss++;
1450		}
1451		assert(mem->object == object);
1452		object->memq_hint = mem;
1453	} else
1454	        vm_page_lookup_miss++;
1455
1456	return(mem);
1457}
1458
1459
1460/*
1461 *	vm_page_rename:
1462 *
1463 *	Move the given memory entry from its
1464 *	current object to the specified target object/offset.
1465 *
1466 *	The object must be locked.
1467 */
1468void
1469vm_page_rename(
1470	register vm_page_t		mem,
1471	register vm_object_t		new_object,
1472	vm_object_offset_t		new_offset,
1473	boolean_t			encrypted_ok)
1474{
1475	assert(mem->object != new_object);
1476
1477	/*
1478	 * ENCRYPTED SWAP:
1479	 * The encryption key is based on the page's memory object
1480	 * (aka "pager") and paging offset.  Moving the page to
1481	 * another VM object changes its "pager" and "paging_offset"
1482	 * so it has to be decrypted first, or we would lose the key.
1483	 *
1484	 * One exception is VM object collapsing, where we transfer pages
1485	 * from one backing object to its parent object.  This operation also
1486	 * transfers the paging information, so the <pager,paging_offset> info
1487	 * should remain consistent.  The caller (vm_object_do_collapse())
1488	 * sets "encrypted_ok" in this case.
1489	 */
1490	if (!encrypted_ok && mem->encrypted) {
1491		panic("vm_page_rename: page %p is encrypted\n", mem);
1492	}
1493
1494        XPR(XPR_VM_PAGE,
1495                "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1496                new_object, new_offset,
1497		mem, 0,0);
1498
1499	/*
1500	 *	Changes to mem->object require the page lock because
1501	 *	the pageout daemon uses that lock to get the object.
1502	 */
1503	vm_page_lockspin_queues();
1504
1505    	vm_page_remove(mem, TRUE);
1506	vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1507
1508	vm_page_unlock_queues();
1509}
1510
1511/*
1512 *	vm_page_init:
1513 *
1514 *	Initialize the fields in a new page.
1515 *	This takes a structure with random values and initializes it
1516 *	so that it can be given to vm_page_release or vm_page_insert.
1517 */
1518void
1519vm_page_init(
1520	vm_page_t	mem,
1521	ppnum_t		phys_page,
1522	boolean_t	lopage)
1523{
1524	assert(phys_page);
1525
1526#if	DEBUG
1527	if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1528		if (!(pmap_valid_page(phys_page))) {
1529			panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1530		}
1531	}
1532#endif
1533	*mem = vm_page_template;
1534	mem->phys_page = phys_page;
1535#if 0
1536	/*
1537	 * we're leaving this turned off for now... currently pages
1538	 * come off the free list and are either immediately dirtied/referenced
1539	 * due to zero-fill or COW faults, or are used to read or write files...
1540	 * in the file I/O case, the UPL mechanism takes care of clearing
1541	 * the state of the HW ref/mod bits in a somewhat fragile way.
1542	 * Since we may change the way this works in the future (to toughen it up),
1543	 * I'm leaving this as a reminder of where these bits could get cleared
1544	 */
1545
1546	/*
1547	 * make sure both the h/w referenced and modified bits are
1548	 * clear at this point... we are especially dependent on
1549	 * not finding a 'stale' h/w modified in a number of spots
1550	 * once this page goes back into use
1551	 */
1552	pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1553#endif
1554	mem->lopage = lopage;
1555}
1556
1557/*
1558 *	vm_page_grab_fictitious:
1559 *
1560 *	Remove a fictitious page from the free list.
1561 *	Returns VM_PAGE_NULL if there are no free pages.
1562 */
1563int	c_vm_page_grab_fictitious = 0;
1564int	c_vm_page_grab_fictitious_failed = 0;
1565int	c_vm_page_release_fictitious = 0;
1566int	c_vm_page_more_fictitious = 0;
1567
1568vm_page_t
1569vm_page_grab_fictitious_common(
1570	ppnum_t phys_addr)
1571{
1572	vm_page_t	m;
1573
1574	if ((m = (vm_page_t)zget(vm_page_zone))) {
1575
1576		vm_page_init(m, phys_addr, FALSE);
1577		m->fictitious = TRUE;
1578
1579		c_vm_page_grab_fictitious++;
1580	} else
1581		c_vm_page_grab_fictitious_failed++;
1582
1583	return m;
1584}
1585
1586vm_page_t
1587vm_page_grab_fictitious(void)
1588{
1589	return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1590}
1591
1592vm_page_t
1593vm_page_grab_guard(void)
1594{
1595	return vm_page_grab_fictitious_common(vm_page_guard_addr);
1596}
1597
1598
1599/*
1600 *	vm_page_release_fictitious:
1601 *
1602 *	Release a fictitious page to the zone pool
1603 */
1604void
1605vm_page_release_fictitious(
1606	vm_page_t m)
1607{
1608	assert(!m->free);
1609	assert(m->fictitious);
1610	assert(m->phys_page == vm_page_fictitious_addr ||
1611	       m->phys_page == vm_page_guard_addr);
1612
1613	c_vm_page_release_fictitious++;
1614
1615	zfree(vm_page_zone, m);
1616}
1617
1618/*
1619 *	vm_page_more_fictitious:
1620 *
1621 *	Add more fictitious pages to the zone.
1622 *	Allowed to block. This routine is way intimate
1623 *	with the zones code, for several reasons:
1624 *	1. we need to carve some page structures out of physical
1625 *	   memory before zones work, so they _cannot_ come from
1626 *	   the zone_map.
1627 *	2. the zone needs to be collectable in order to prevent
1628 *	   growth without bound. These structures are used by
1629 *	   the device pager (by the hundreds and thousands), as
1630 *	   private pages for pageout, and as blocking pages for
1631 *	   pagein. Temporary bursts in demand should not result in
1632 *	   permanent allocation of a resource.
1633 *	3. To smooth allocation humps, we allocate single pages
1634 *	   with kernel_memory_allocate(), and cram them into the
1635 *	   zone.
1636 */
1637
1638void vm_page_more_fictitious(void)
1639{
1640	vm_offset_t	addr;
1641	kern_return_t	retval;
1642
1643	c_vm_page_more_fictitious++;
1644
1645	/*
1646	 * Allocate a single page from the zone_map. Do not wait if no physical
1647	 * pages are immediately available, and do not zero the space. We need
1648	 * our own blocking lock here to prevent having multiple,
1649	 * simultaneous requests from piling up on the zone_map lock. Exactly
1650	 * one (of our) threads should be potentially waiting on the map lock.
1651	 * If winner is not vm-privileged, then the page allocation will fail,
1652	 * and it will temporarily block here in the vm_page_wait().
1653	 */
1654	lck_mtx_lock(&vm_page_alloc_lock);
1655	/*
1656	 * If another thread allocated space, just bail out now.
1657	 */
1658	if (zone_free_count(vm_page_zone) > 5) {
1659		/*
1660		 * The number "5" is a small number that is larger than the
1661		 * number of fictitious pages that any single caller will
1662		 * attempt to allocate. Otherwise, a thread will attempt to
1663		 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1664		 * release all of the resources and locks already acquired,
1665		 * and then call this routine. This routine finds the pages
1666		 * that the caller released, so fails to allocate new space.
1667		 * The process repeats infinitely. The largest known number
1668		 * of fictitious pages required in this manner is 2. 5 is
1669		 * simply a somewhat larger number.
1670		 */
1671		lck_mtx_unlock(&vm_page_alloc_lock);
1672		return;
1673	}
1674
1675	retval = kernel_memory_allocate(zone_map,
1676					&addr, PAGE_SIZE, VM_PROT_ALL,
1677					KMA_KOBJECT|KMA_NOPAGEWAIT);
1678	if (retval != KERN_SUCCESS) {
1679		/*
1680		 * No page was available. Drop the
1681		 * lock to give another thread a chance at it, and
1682		 * wait for the pageout daemon to make progress.
1683		 */
1684		lck_mtx_unlock(&vm_page_alloc_lock);
1685		vm_page_wait(THREAD_UNINT);
1686		return;
1687	}
1688	zcram(vm_page_zone, addr, PAGE_SIZE);
1689
1690	lck_mtx_unlock(&vm_page_alloc_lock);
1691}
1692
1693
1694/*
1695 *	vm_pool_low():
1696 *
1697 *	Return true if it is not likely that a non-vm_privileged thread
1698 *	can get memory without blocking.  Advisory only, since the
1699 *	situation may change under us.
1700 */
1701int
1702vm_pool_low(void)
1703{
1704	/* No locking, at worst we will fib. */
1705	return( vm_page_free_count <= vm_page_free_reserved );
1706}
1707
1708
1709
1710/*
1711 * this is an interface to support bring-up of drivers
1712 * on platforms with physical memory > 4G...
1713 */
1714int		vm_himemory_mode = 0;
1715
1716
1717/*
1718 * this interface exists to support hardware controllers
1719 * incapable of generating DMAs with more than 32 bits
1720 * of address on platforms with physical memory > 4G...
1721 */
1722unsigned int	vm_lopages_allocated_q = 0;
1723unsigned int	vm_lopages_allocated_cpm_success = 0;
1724unsigned int	vm_lopages_allocated_cpm_failed = 0;
1725queue_head_t	vm_lopage_queue_free;
1726
1727vm_page_t
1728vm_page_grablo(void)
1729{
1730	vm_page_t	mem;
1731
1732	if (vm_lopage_needed == FALSE)
1733	        return (vm_page_grab());
1734
1735	lck_mtx_lock_spin(&vm_page_queue_free_lock);
1736
1737        if ( !queue_empty(&vm_lopage_queue_free)) {
1738                queue_remove_first(&vm_lopage_queue_free,
1739                                   mem,
1740                                   vm_page_t,
1741                                   pageq);
1742		assert(vm_lopage_free_count);
1743
1744                vm_lopage_free_count--;
1745		vm_lopages_allocated_q++;
1746
1747		if (vm_lopage_free_count < vm_lopage_lowater)
1748			vm_lopage_refill = TRUE;
1749
1750		lck_mtx_unlock(&vm_page_queue_free_lock);
1751	} else {
1752		lck_mtx_unlock(&vm_page_queue_free_lock);
1753
1754		if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1755
1756			lck_mtx_lock_spin(&vm_page_queue_free_lock);
1757			vm_lopages_allocated_cpm_failed++;
1758			lck_mtx_unlock(&vm_page_queue_free_lock);
1759
1760			return (VM_PAGE_NULL);
1761		}
1762		mem->busy = TRUE;
1763
1764		vm_page_lockspin_queues();
1765
1766		mem->gobbled = FALSE;
1767		vm_page_gobble_count--;
1768		vm_page_wire_count--;
1769
1770		vm_lopages_allocated_cpm_success++;
1771		vm_page_unlock_queues();
1772	}
1773	assert(mem->busy);
1774	assert(!mem->free);
1775	assert(!mem->pmapped);
1776	assert(!mem->wpmapped);
1777	assert(!pmap_is_noencrypt(mem->phys_page));
1778
1779	mem->pageq.next = NULL;
1780	mem->pageq.prev = NULL;
1781
1782	return (mem);
1783}
1784
1785
1786/*
1787 *	vm_page_grab:
1788 *
1789 *	first try to grab a page from the per-cpu free list...
1790 *	this must be done while pre-emption is disabled... if
1791 * 	a page is available, we're done...
1792 *	if no page is available, grab the vm_page_queue_free_lock
1793 *	and see if current number of free pages would allow us
1794 * 	to grab at least 1... if not, return VM_PAGE_NULL as before...
1795 *	if there are pages available, disable preemption and
1796 * 	recheck the state of the per-cpu free list... we could
1797 *	have been preempted and moved to a different cpu, or
1798 * 	some other thread could have re-filled it... if still
1799 *	empty, figure out how many pages we can steal from the
1800 *	global free queue and move to the per-cpu queue...
1801 *	return 1 of these pages when done... only wakeup the
1802 * 	pageout_scan thread if we moved pages from the global
1803 *	list... no need for the wakeup if we've satisfied the
1804 *	request from the per-cpu queue.
1805 */
1806
1807#define COLOR_GROUPS_TO_STEAL	4
1808
1809
1810vm_page_t
1811vm_page_grab( void )
1812{
1813	vm_page_t	mem;
1814
1815
1816	disable_preemption();
1817
1818	if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1819return_page_from_cpu_list:
1820	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1821	        PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1822		mem->pageq.next = NULL;
1823
1824	        enable_preemption();
1825
1826		assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1827		assert(mem->tabled == FALSE);
1828		assert(mem->object == VM_OBJECT_NULL);
1829		assert(!mem->laundry);
1830		assert(!mem->free);
1831		assert(pmap_verify_free(mem->phys_page));
1832		assert(mem->busy);
1833		assert(!mem->encrypted);
1834		assert(!mem->pmapped);
1835		assert(!mem->wpmapped);
1836		assert(!mem->active);
1837		assert(!mem->inactive);
1838		assert(!mem->throttled);
1839		assert(!mem->speculative);
1840		assert(!pmap_is_noencrypt(mem->phys_page));
1841
1842		return mem;
1843	}
1844	enable_preemption();
1845
1846
1847	/*
1848	 *	Optionally produce warnings if the wire or gobble
1849	 *	counts exceed some threshold.
1850	 */
1851	if (vm_page_wire_count_warning > 0
1852	    && vm_page_wire_count >= vm_page_wire_count_warning) {
1853		printf("mk: vm_page_grab(): high wired page count of %d\n",
1854			vm_page_wire_count);
1855		assert(vm_page_wire_count < vm_page_wire_count_warning);
1856	}
1857	if (vm_page_gobble_count_warning > 0
1858	    && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1859		printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1860			vm_page_gobble_count);
1861		assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1862	}
1863
1864	lck_mtx_lock_spin(&vm_page_queue_free_lock);
1865
1866	/*
1867	 *	Only let privileged threads (involved in pageout)
1868	 *	dip into the reserved pool.
1869	 */
1870	if ((vm_page_free_count < vm_page_free_reserved) &&
1871	    !(current_thread()->options & TH_OPT_VMPRIV)) {
1872		lck_mtx_unlock(&vm_page_queue_free_lock);
1873		mem = VM_PAGE_NULL;
1874	}
1875	else {
1876	       vm_page_t	head;
1877	       vm_page_t	tail;
1878	       unsigned int	pages_to_steal;
1879	       unsigned int	color;
1880
1881	       while ( vm_page_free_count == 0 ) {
1882
1883			lck_mtx_unlock(&vm_page_queue_free_lock);
1884			/*
1885			 * must be a privileged thread to be
1886			 * in this state since a non-privileged
1887			 * thread would have bailed if we were
1888			 * under the vm_page_free_reserved mark
1889			 */
1890			VM_PAGE_WAIT();
1891			lck_mtx_lock_spin(&vm_page_queue_free_lock);
1892		}
1893
1894		disable_preemption();
1895
1896		if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1897			lck_mtx_unlock(&vm_page_queue_free_lock);
1898
1899		        /*
1900			 * we got preempted and moved to another processor
1901			 * or we got preempted and someone else ran and filled the cache
1902			 */
1903			goto return_page_from_cpu_list;
1904		}
1905		if (vm_page_free_count <= vm_page_free_reserved)
1906		        pages_to_steal = 1;
1907		else {
1908		        pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1909
1910			if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1911			        pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1912		}
1913		color = PROCESSOR_DATA(current_processor(), start_color);
1914		head = tail = NULL;
1915
1916		while (pages_to_steal--) {
1917		        if (--vm_page_free_count < vm_page_free_count_minimum)
1918			        vm_page_free_count_minimum = vm_page_free_count;
1919
1920			while (queue_empty(&vm_page_queue_free[color]))
1921			        color = (color + 1) & vm_color_mask;
1922
1923			queue_remove_first(&vm_page_queue_free[color],
1924					   mem,
1925					   vm_page_t,
1926					   pageq);
1927			mem->pageq.next = NULL;
1928			mem->pageq.prev = NULL;
1929
1930			assert(!mem->active);
1931			assert(!mem->inactive);
1932			assert(!mem->throttled);
1933			assert(!mem->speculative);
1934
1935			color = (color + 1) & vm_color_mask;
1936
1937			if (head == NULL)
1938				head = mem;
1939			else
1940			        tail->pageq.next = (queue_t)mem;
1941		        tail = mem;
1942
1943			mem->pageq.prev = NULL;
1944			assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1945			assert(mem->tabled == FALSE);
1946			assert(mem->object == VM_OBJECT_NULL);
1947			assert(!mem->laundry);
1948			assert(mem->free);
1949			mem->free = FALSE;
1950
1951			assert(pmap_verify_free(mem->phys_page));
1952			assert(mem->busy);
1953			assert(!mem->free);
1954			assert(!mem->encrypted);
1955			assert(!mem->pmapped);
1956			assert(!mem->wpmapped);
1957			assert(!pmap_is_noencrypt(mem->phys_page));
1958		}
1959		PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1960		PROCESSOR_DATA(current_processor(), start_color) = color;
1961
1962		/*
1963		 * satisfy this request
1964		 */
1965	        PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1966		mem = head;
1967		mem->pageq.next = NULL;
1968
1969		lck_mtx_unlock(&vm_page_queue_free_lock);
1970
1971		enable_preemption();
1972	}
1973	/*
1974	 *	Decide if we should poke the pageout daemon.
1975	 *	We do this if the free count is less than the low
1976	 *	water mark, or if the free count is less than the high
1977	 *	water mark (but above the low water mark) and the inactive
1978	 *	count is less than its target.
1979	 *
1980	 *	We don't have the counts locked ... if they change a little,
1981	 *	it doesn't really matter.
1982	 */
1983	if ((vm_page_free_count < vm_page_free_min) ||
1984	     ((vm_page_free_count < vm_page_free_target) &&
1985	      ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1986	         thread_wakeup((event_t) &vm_page_free_wanted);
1987
1988	VM_CHECK_MEMORYSTATUS;
1989
1990//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4);	/* (TEST/DEBUG) */
1991
1992	return mem;
1993}
1994
1995/*
1996 *	vm_page_release:
1997 *
1998 *	Return a page to the free list.
1999 */
2000
2001void
2002vm_page_release(
2003	register vm_page_t	mem)
2004{
2005	unsigned int	color;
2006	int	need_wakeup = 0;
2007	int	need_priv_wakeup = 0;
2008
2009
2010	assert(!mem->private && !mem->fictitious);
2011	if (vm_page_free_verify) {
2012		assert(pmap_verify_free(mem->phys_page));
2013	}
2014//	dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5);	/* (TEST/DEBUG) */
2015
2016	pmap_clear_noencrypt(mem->phys_page);
2017
2018	lck_mtx_lock_spin(&vm_page_queue_free_lock);
2019#if DEBUG
2020	if (mem->free)
2021		panic("vm_page_release");
2022#endif
2023
2024	assert(mem->busy);
2025	assert(!mem->laundry);
2026	assert(mem->object == VM_OBJECT_NULL);
2027	assert(mem->pageq.next == NULL &&
2028	       mem->pageq.prev == NULL);
2029	assert(mem->listq.next == NULL &&
2030	       mem->listq.prev == NULL);
2031
2032	if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2033	    vm_lopage_free_count < vm_lopage_free_limit &&
2034	    mem->phys_page < max_valid_low_ppnum) {
2035	        /*
2036		 * this exists to support hardware controllers
2037		 * incapable of generating DMAs with more than 32 bits
2038		 * of address on platforms with physical memory > 4G...
2039		 */
2040		queue_enter_first(&vm_lopage_queue_free,
2041				  mem,
2042				  vm_page_t,
2043				  pageq);
2044		vm_lopage_free_count++;
2045
2046		if (vm_lopage_free_count >= vm_lopage_free_limit)
2047			vm_lopage_refill = FALSE;
2048
2049		mem->lopage = TRUE;
2050	} else {
2051		mem->lopage = FALSE;
2052		mem->free = TRUE;
2053
2054	        color = mem->phys_page & vm_color_mask;
2055		queue_enter_first(&vm_page_queue_free[color],
2056				  mem,
2057				  vm_page_t,
2058				  pageq);
2059		vm_page_free_count++;
2060		/*
2061		 *	Check if we should wake up someone waiting for page.
2062		 *	But don't bother waking them unless they can allocate.
2063		 *
2064		 *	We wakeup only one thread, to prevent starvation.
2065		 *	Because the scheduling system handles wait queues FIFO,
2066		 *	if we wakeup all waiting threads, one greedy thread
2067		 *	can starve multiple niceguy threads.  When the threads
2068		 *	all wakeup, the greedy threads runs first, grabs the page,
2069		 *	and waits for another page.  It will be the first to run
2070		 *	when the next page is freed.
2071		 *
2072		 *	However, there is a slight danger here.
2073		 *	The thread we wake might not use the free page.
2074		 *	Then the other threads could wait indefinitely
2075		 *	while the page goes unused.  To forestall this,
2076		 *	the pageout daemon will keep making free pages
2077		 *	as long as vm_page_free_wanted is non-zero.
2078		 */
2079
2080		assert(vm_page_free_count > 0);
2081		if (vm_page_free_wanted_privileged > 0) {
2082		        vm_page_free_wanted_privileged--;
2083			need_priv_wakeup = 1;
2084		} else if (vm_page_free_wanted > 0 &&
2085			   vm_page_free_count > vm_page_free_reserved) {
2086		        vm_page_free_wanted--;
2087			need_wakeup = 1;
2088		}
2089	}
2090	lck_mtx_unlock(&vm_page_queue_free_lock);
2091
2092	if (need_priv_wakeup)
2093		thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2094	else if (need_wakeup)
2095		thread_wakeup_one((event_t) &vm_page_free_count);
2096
2097	VM_CHECK_MEMORYSTATUS;
2098}
2099
2100/*
2101 *	vm_page_wait:
2102 *
2103 *	Wait for a page to become available.
2104 *	If there are plenty of free pages, then we don't sleep.
2105 *
2106 *	Returns:
2107 *		TRUE:  There may be another page, try again
2108 *		FALSE: We were interrupted out of our wait, don't try again
2109 */
2110
2111boolean_t
2112vm_page_wait(
2113	int	interruptible )
2114{
2115	/*
2116	 *	We can't use vm_page_free_reserved to make this
2117	 *	determination.  Consider: some thread might
2118	 *	need to allocate two pages.  The first allocation
2119	 *	succeeds, the second fails.  After the first page is freed,
2120	 *	a call to vm_page_wait must really block.
2121	 */
2122	kern_return_t	wait_result;
2123	int          	need_wakeup = 0;
2124	int		is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2125
2126	lck_mtx_lock_spin(&vm_page_queue_free_lock);
2127
2128	if (is_privileged && vm_page_free_count) {
2129		lck_mtx_unlock(&vm_page_queue_free_lock);
2130		return TRUE;
2131	}
2132	if (vm_page_free_count < vm_page_free_target) {
2133
2134	        if (is_privileged) {
2135		        if (vm_page_free_wanted_privileged++ == 0)
2136			        need_wakeup = 1;
2137			wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2138		} else {
2139		        if (vm_page_free_wanted++ == 0)
2140			        need_wakeup = 1;
2141			wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2142		}
2143		lck_mtx_unlock(&vm_page_queue_free_lock);
2144		counter(c_vm_page_wait_block++);
2145
2146		if (need_wakeup)
2147			thread_wakeup((event_t)&vm_page_free_wanted);
2148
2149		if (wait_result == THREAD_WAITING)
2150			wait_result = thread_block(THREAD_CONTINUE_NULL);
2151
2152		return(wait_result == THREAD_AWAKENED);
2153	} else {
2154		lck_mtx_unlock(&vm_page_queue_free_lock);
2155		return TRUE;
2156	}
2157}
2158
2159/*
2160 *	vm_page_alloc:
2161 *
2162 *	Allocate and return a memory cell associated
2163 *	with this VM object/offset pair.
2164 *
2165 *	Object must be locked.
2166 */
2167
2168vm_page_t
2169vm_page_alloc(
2170	vm_object_t		object,
2171	vm_object_offset_t	offset)
2172{
2173	register vm_page_t	mem;
2174
2175	vm_object_lock_assert_exclusive(object);
2176	mem = vm_page_grab();
2177	if (mem == VM_PAGE_NULL)
2178		return VM_PAGE_NULL;
2179
2180	vm_page_insert(mem, object, offset);
2181
2182	return(mem);
2183}
2184
2185vm_page_t
2186vm_page_alloclo(
2187	vm_object_t		object,
2188	vm_object_offset_t	offset)
2189{
2190	register vm_page_t	mem;
2191
2192	vm_object_lock_assert_exclusive(object);
2193	mem = vm_page_grablo();
2194	if (mem == VM_PAGE_NULL)
2195		return VM_PAGE_NULL;
2196
2197	vm_page_insert(mem, object, offset);
2198
2199	return(mem);
2200}
2201
2202
2203/*
2204 *	vm_page_alloc_guard:
2205 *
2206 * 	Allocate a fictitious page which will be used
2207 *	as a guard page.  The page will be inserted into
2208 *	the object and returned to the caller.
2209 */
2210
2211vm_page_t
2212vm_page_alloc_guard(
2213	vm_object_t		object,
2214	vm_object_offset_t	offset)
2215{
2216	register vm_page_t	mem;
2217
2218	vm_object_lock_assert_exclusive(object);
2219	mem = vm_page_grab_guard();
2220	if (mem == VM_PAGE_NULL)
2221		return VM_PAGE_NULL;
2222
2223	vm_page_insert(mem, object, offset);
2224
2225	return(mem);
2226}
2227
2228
2229counter(unsigned int c_laundry_pages_freed = 0;)
2230
2231/*
2232 *	vm_page_free_prepare:
2233 *
2234 *	Removes page from any queue it may be on
2235 *	and disassociates it from its VM object.
2236 *
2237 *	Object and page queues must be locked prior to entry.
2238 */
2239static void
2240vm_page_free_prepare(
2241	vm_page_t	mem)
2242{
2243	vm_page_free_prepare_queues(mem);
2244	vm_page_free_prepare_object(mem, TRUE);
2245}
2246
2247
2248void
2249vm_page_free_prepare_queues(
2250	vm_page_t	mem)
2251{
2252	VM_PAGE_CHECK(mem);
2253	assert(!mem->free);
2254	assert(!mem->cleaning);
2255#if DEBUG
2256	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2257	if (mem->free)
2258		panic("vm_page_free: freeing page on free list\n");
2259#endif
2260	if (mem->object) {
2261		vm_object_lock_assert_exclusive(mem->object);
2262	}
2263	if (mem->laundry) {
2264		/*
2265		 * We may have to free a page while it's being laundered
2266		 * if we lost its pager (due to a forced unmount, for example).
2267		 * We need to call vm_pageout_steal_laundry() before removing
2268		 * the page from its VM object, so that we can remove it
2269		 * from its pageout queue and adjust the laundry accounting
2270		 */
2271		vm_pageout_steal_laundry(mem, TRUE);
2272		counter(++c_laundry_pages_freed);
2273	}
2274
2275	VM_PAGE_QUEUES_REMOVE(mem);	/* clears local/active/inactive/throttled/speculative */
2276
2277	if (VM_PAGE_WIRED(mem)) {
2278		if (mem->object) {
2279			assert(mem->object->wired_page_count > 0);
2280			mem->object->wired_page_count--;
2281			assert(mem->object->resident_page_count >=
2282			       mem->object->wired_page_count);
2283
2284			if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2285				OSAddAtomic(+1, &vm_page_purgeable_count);
2286				assert(vm_page_purgeable_wired_count > 0);
2287				OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2288			}
2289		}
2290		if (!mem->private && !mem->fictitious)
2291			vm_page_wire_count--;
2292		mem->wire_count = 0;
2293		assert(!mem->gobbled);
2294	} else if (mem->gobbled) {
2295		if (!mem->private && !mem->fictitious)
2296			vm_page_wire_count--;
2297		vm_page_gobble_count--;
2298	}
2299}
2300
2301
2302void
2303vm_page_free_prepare_object(
2304	vm_page_t	mem,
2305	boolean_t	remove_from_hash)
2306{
2307	if (mem->tabled)
2308		vm_page_remove(mem, remove_from_hash);	/* clears tabled, object, offset */
2309
2310	PAGE_WAKEUP(mem);		/* clears wanted */
2311
2312	if (mem->private) {
2313		mem->private = FALSE;
2314		mem->fictitious = TRUE;
2315		mem->phys_page = vm_page_fictitious_addr;
2316	}
2317	if ( !mem->fictitious) {
2318		vm_page_init(mem, mem->phys_page, mem->lopage);
2319	}
2320}
2321
2322
2323/*
2324 *	vm_page_free:
2325 *
2326 *	Returns the given page to the free list,
2327 *	disassociating it with any VM object.
2328 *
2329 *	Object and page queues must be locked prior to entry.
2330 */
2331void
2332vm_page_free(
2333	vm_page_t	mem)
2334{
2335	vm_page_free_prepare(mem);
2336
2337	if (mem->fictitious) {
2338		vm_page_release_fictitious(mem);
2339	} else {
2340		vm_page_release(mem);
2341	}
2342}
2343
2344
2345void
2346vm_page_free_unlocked(
2347	vm_page_t	mem,
2348	boolean_t	remove_from_hash)
2349{
2350	vm_page_lockspin_queues();
2351	vm_page_free_prepare_queues(mem);
2352	vm_page_unlock_queues();
2353
2354	vm_page_free_prepare_object(mem, remove_from_hash);
2355
2356	if (mem->fictitious) {
2357		vm_page_release_fictitious(mem);
2358	} else {
2359		vm_page_release(mem);
2360	}
2361}
2362
2363
2364/*
2365 * Free a list of pages.  The list can be up to several hundred pages,
2366 * as blocked up by vm_pageout_scan().
2367 * The big win is not having to take the free list lock once
2368 * per page.
2369 */
2370void
2371vm_page_free_list(
2372	vm_page_t	freeq,
2373	boolean_t	prepare_object)
2374{
2375        vm_page_t	mem;
2376        vm_page_t	nxt;
2377	vm_page_t	local_freeq;
2378	int		pg_count;
2379
2380	while (freeq) {
2381
2382		pg_count = 0;
2383		local_freeq = VM_PAGE_NULL;
2384		mem = freeq;
2385
2386		/*
2387		 * break up the processing into smaller chunks so
2388		 * that we can 'pipeline' the pages onto the
2389		 * free list w/o introducing too much
2390		 * contention on the global free queue lock
2391		 */
2392		while (mem && pg_count < 64) {
2393
2394			assert(!mem->inactive);
2395			assert(!mem->active);
2396			assert(!mem->throttled);
2397			assert(!mem->free);
2398			assert(!mem->speculative);
2399			assert(!VM_PAGE_WIRED(mem));
2400			assert(mem->pageq.prev == NULL);
2401
2402			nxt = (vm_page_t)(mem->pageq.next);
2403
2404			if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2405				assert(pmap_verify_free(mem->phys_page));
2406			}
2407			if (prepare_object == TRUE)
2408				vm_page_free_prepare_object(mem, TRUE);
2409
2410			if (!mem->fictitious) {
2411				assert(mem->busy);
2412
2413				if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2414				    vm_lopage_free_count < vm_lopage_free_limit &&
2415				    mem->phys_page < max_valid_low_ppnum) {
2416					mem->pageq.next = NULL;
2417					vm_page_release(mem);
2418				} else {
2419					/*
2420					 * IMPORTANT: we can't set the page "free" here
2421					 * because that would make the page eligible for
2422					 * a physically-contiguous allocation (see
2423					 * vm_page_find_contiguous()) right away (we don't
2424					 * hold the vm_page_queue_free lock).  That would
2425					 * cause trouble because the page is not actually
2426					 * in the free queue yet...
2427					 */
2428					mem->pageq.next = (queue_entry_t)local_freeq;
2429					local_freeq = mem;
2430					pg_count++;
2431
2432					pmap_clear_noencrypt(mem->phys_page);
2433				}
2434			} else {
2435				assert(mem->phys_page == vm_page_fictitious_addr ||
2436				       mem->phys_page == vm_page_guard_addr);
2437				vm_page_release_fictitious(mem);
2438			}
2439			mem = nxt;
2440		}
2441		freeq = mem;
2442
2443		if ( (mem = local_freeq) ) {
2444			unsigned int	avail_free_count;
2445			unsigned int	need_wakeup = 0;
2446			unsigned int	need_priv_wakeup = 0;
2447
2448			lck_mtx_lock_spin(&vm_page_queue_free_lock);
2449
2450			while (mem) {
2451				int	color;
2452
2453				nxt = (vm_page_t)(mem->pageq.next);
2454
2455				assert(!mem->free);
2456				assert(mem->busy);
2457				mem->free = TRUE;
2458
2459				color = mem->phys_page & vm_color_mask;
2460				queue_enter_first(&vm_page_queue_free[color],
2461						  mem,
2462						  vm_page_t,
2463						  pageq);
2464				mem = nxt;
2465			}
2466			vm_page_free_count += pg_count;
2467			avail_free_count = vm_page_free_count;
2468
2469			if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2470
2471				if (avail_free_count < vm_page_free_wanted_privileged) {
2472					need_priv_wakeup = avail_free_count;
2473					vm_page_free_wanted_privileged -= avail_free_count;
2474					avail_free_count = 0;
2475				} else {
2476					need_priv_wakeup = vm_page_free_wanted_privileged;
2477					vm_page_free_wanted_privileged = 0;
2478					avail_free_count -= vm_page_free_wanted_privileged;
2479				}
2480			}
2481			if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2482				unsigned int  available_pages;
2483
2484				available_pages = avail_free_count - vm_page_free_reserved;
2485
2486				if (available_pages >= vm_page_free_wanted) {
2487					need_wakeup = vm_page_free_wanted;
2488					vm_page_free_wanted = 0;
2489				} else {
2490					need_wakeup = available_pages;
2491					vm_page_free_wanted -= available_pages;
2492				}
2493			}
2494			lck_mtx_unlock(&vm_page_queue_free_lock);
2495
2496			if (need_priv_wakeup != 0) {
2497				/*
2498				 * There shouldn't be that many VM-privileged threads,
2499				 * so let's wake them all up, even if we don't quite
2500				 * have enough pages to satisfy them all.
2501				 */
2502				thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2503			}
2504			if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2505				/*
2506				 * We don't expect to have any more waiters
2507				 * after this, so let's wake them all up at
2508				 * once.
2509				 */
2510				thread_wakeup((event_t) &vm_page_free_count);
2511			} else for (; need_wakeup != 0; need_wakeup--) {
2512				/*
2513				 * Wake up one waiter per page we just released.
2514				 */
2515				thread_wakeup_one((event_t) &vm_page_free_count);
2516			}
2517
2518			VM_CHECK_MEMORYSTATUS;
2519		}
2520	}
2521}
2522
2523
2524/*
2525 *	vm_page_wire:
2526 *
2527 *	Mark this page as wired down by yet
2528 *	another map, removing it from paging queues
2529 *	as necessary.
2530 *
2531 *	The page's object and the page queues must be locked.
2532 */
2533void
2534vm_page_wire(
2535	register vm_page_t	mem)
2536{
2537
2538//	dbgLog(current_thread(), mem->offset, mem->object, 1);	/* (TEST/DEBUG) */
2539
2540	VM_PAGE_CHECK(mem);
2541	if (mem->object) {
2542		vm_object_lock_assert_exclusive(mem->object);
2543	} else {
2544		/*
2545		 * In theory, the page should be in an object before it
2546		 * gets wired, since we need to hold the object lock
2547		 * to update some fields in the page structure.
2548		 * However, some code (i386 pmap, for example) might want
2549		 * to wire a page before it gets inserted into an object.
2550		 * That's somewhat OK, as long as nobody else can get to
2551		 * that page and update it at the same time.
2552		 */
2553	}
2554#if DEBUG
2555	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2556#endif
2557	if ( !VM_PAGE_WIRED(mem)) {
2558
2559		if (mem->pageout_queue) {
2560			mem->pageout = FALSE;
2561			vm_pageout_throttle_up(mem);
2562		}
2563		VM_PAGE_QUEUES_REMOVE(mem);
2564
2565		if (mem->object) {
2566			mem->object->wired_page_count++;
2567			assert(mem->object->resident_page_count >=
2568			       mem->object->wired_page_count);
2569			if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2570				assert(vm_page_purgeable_count > 0);
2571				OSAddAtomic(-1, &vm_page_purgeable_count);
2572				OSAddAtomic(1, &vm_page_purgeable_wired_count);
2573			}
2574			if (mem->object->all_reusable) {
2575				/*
2576				 * Wired pages are not counted as "re-usable"
2577				 * in "all_reusable" VM objects, so nothing
2578				 * to do here.
2579				 */
2580			} else if (mem->reusable) {
2581				/*
2582				 * This page is not "re-usable" when it's
2583				 * wired, so adjust its state and the
2584				 * accounting.
2585				 */
2586				vm_object_reuse_pages(mem->object,
2587						      mem->offset,
2588						      mem->offset+PAGE_SIZE_64,
2589						      FALSE);
2590			}
2591		}
2592		assert(!mem->reusable);
2593
2594		if (!mem->private && !mem->fictitious && !mem->gobbled)
2595			vm_page_wire_count++;
2596		if (mem->gobbled)
2597			vm_page_gobble_count--;
2598		mem->gobbled = FALSE;
2599
2600		VM_CHECK_MEMORYSTATUS;
2601
2602		/*
2603		 * ENCRYPTED SWAP:
2604		 * The page could be encrypted, but
2605		 * We don't have to decrypt it here
2606		 * because we don't guarantee that the
2607		 * data is actually valid at this point.
2608		 * The page will get decrypted in
2609		 * vm_fault_wire() if needed.
2610		 */
2611	}
2612	assert(!mem->gobbled);
2613	mem->wire_count++;
2614	VM_PAGE_CHECK(mem);
2615}
2616
2617/*
2618 *      vm_page_gobble:
2619 *
2620 *      Mark this page as consumed by the vm/ipc/xmm subsystems.
2621 *
2622 *      Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2623 */
2624void
2625vm_page_gobble(
2626        register vm_page_t      mem)
2627{
2628        vm_page_lockspin_queues();
2629        VM_PAGE_CHECK(mem);
2630
2631	assert(!mem->gobbled);
2632	assert( !VM_PAGE_WIRED(mem));
2633
2634        if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2635                if (!mem->private && !mem->fictitious)
2636                        vm_page_wire_count++;
2637        }
2638	vm_page_gobble_count++;
2639        mem->gobbled = TRUE;
2640        vm_page_unlock_queues();
2641}
2642
2643/*
2644 *	vm_page_unwire:
2645 *
2646 *	Release one wiring of this page, potentially
2647 *	enabling it to be paged again.
2648 *
2649 *	The page's object and the page queues must be locked.
2650 */
2651void
2652vm_page_unwire(
2653	vm_page_t	mem,
2654	boolean_t	queueit)
2655{
2656
2657//	dbgLog(current_thread(), mem->offset, mem->object, 0);	/* (TEST/DEBUG) */
2658
2659	VM_PAGE_CHECK(mem);
2660	assert(VM_PAGE_WIRED(mem));
2661	assert(mem->object != VM_OBJECT_NULL);
2662#if DEBUG
2663	vm_object_lock_assert_exclusive(mem->object);
2664	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2665#endif
2666	if (--mem->wire_count == 0) {
2667		assert(!mem->private && !mem->fictitious);
2668		vm_page_wire_count--;
2669		assert(mem->object->wired_page_count > 0);
2670		mem->object->wired_page_count--;
2671		assert(mem->object->resident_page_count >=
2672		       mem->object->wired_page_count);
2673		if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2674			OSAddAtomic(+1, &vm_page_purgeable_count);
2675			assert(vm_page_purgeable_wired_count > 0);
2676			OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2677		}
2678		assert(!mem->laundry);
2679		assert(mem->object != kernel_object);
2680		assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2681
2682		if (queueit == TRUE) {
2683			if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2684				vm_page_deactivate(mem);
2685			} else {
2686				vm_page_activate(mem);
2687			}
2688		}
2689
2690		VM_CHECK_MEMORYSTATUS;
2691
2692	}
2693	VM_PAGE_CHECK(mem);
2694}
2695
2696/*
2697 *	vm_page_deactivate:
2698 *
2699 *	Returns the given page to the inactive list,
2700 *	indicating that no physical maps have access
2701 *	to this page.  [Used by the physical mapping system.]
2702 *
2703 *	The page queues must be locked.
2704 */
2705void
2706vm_page_deactivate(
2707	vm_page_t	m)
2708{
2709	vm_page_deactivate_internal(m, TRUE);
2710}
2711
2712
2713void
2714vm_page_deactivate_internal(
2715	vm_page_t	m,
2716	boolean_t	clear_hw_reference)
2717{
2718
2719	VM_PAGE_CHECK(m);
2720	assert(m->object != kernel_object);
2721	assert(m->phys_page != vm_page_guard_addr);
2722
2723//	dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6);	/* (TEST/DEBUG) */
2724#if DEBUG
2725	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2726#endif
2727	/*
2728	 *	This page is no longer very interesting.  If it was
2729	 *	interesting (active or inactive/referenced), then we
2730	 *	clear the reference bit and (re)enter it in the
2731	 *	inactive queue.  Note wired pages should not have
2732	 *	their reference bit cleared.
2733	 */
2734	assert ( !(m->absent && !m->unusual));
2735
2736	if (m->gobbled) {		/* can this happen? */
2737		assert( !VM_PAGE_WIRED(m));
2738
2739		if (!m->private && !m->fictitious)
2740			vm_page_wire_count--;
2741		vm_page_gobble_count--;
2742		m->gobbled = FALSE;
2743	}
2744	/*
2745	 * if this page is currently on the pageout queue, we can't do the
2746	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2747	 * and we can't remove it manually since we would need the object lock
2748	 * (which is not required here) to decrement the activity_in_progress
2749	 * reference which is held on the object while the page is in the pageout queue...
2750	 * just let the normal laundry processing proceed
2751	 */
2752	if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2753		return;
2754
2755	if (!m->absent && clear_hw_reference == TRUE)
2756		pmap_clear_reference(m->phys_page);
2757
2758	m->reference = FALSE;
2759	m->no_cache = FALSE;
2760
2761	if (!m->inactive) {
2762		VM_PAGE_QUEUES_REMOVE(m);
2763
2764		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2765		    m->dirty && m->object->internal &&
2766		    (m->object->purgable == VM_PURGABLE_DENY ||
2767		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2768		     m->object->purgable == VM_PURGABLE_VOLATILE)) {
2769			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2770			m->throttled = TRUE;
2771			vm_page_throttled_count++;
2772		} else {
2773			if (m->object->named && m->object->ref_count == 1) {
2774			        vm_page_speculate(m, FALSE);
2775#if DEVELOPMENT || DEBUG
2776				vm_page_speculative_recreated++;
2777#endif
2778			} else {
2779				VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2780			}
2781		}
2782	}
2783}
2784
2785/*
2786 * vm_page_enqueue_cleaned
2787 *
2788 * Put the page on the cleaned queue, mark it cleaned, etc.
2789 * Being on the cleaned queue (and having m->clean_queue set)
2790 * does ** NOT ** guarantee that the page is clean!
2791 *
2792 * Call with the queues lock held.
2793 */
2794
2795void vm_page_enqueue_cleaned(vm_page_t m)
2796{
2797	assert(m->phys_page != vm_page_guard_addr);
2798#if DEBUG
2799	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2800#endif
2801	assert( !(m->absent && !m->unusual));
2802
2803	if (m->gobbled) {
2804		assert( !VM_PAGE_WIRED(m));
2805		if (!m->private && !m->fictitious)
2806			vm_page_wire_count--;
2807		vm_page_gobble_count--;
2808		m->gobbled = FALSE;
2809	}
2810	/*
2811	 * if this page is currently on the pageout queue, we can't do the
2812	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2813	 * and we can't remove it manually since we would need the object lock
2814	 * (which is not required here) to decrement the activity_in_progress
2815	 * reference which is held on the object while the page is in the pageout queue...
2816	 * just let the normal laundry processing proceed
2817	 */
2818	if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2819		return;
2820
2821	VM_PAGE_QUEUES_REMOVE(m);
2822
2823	queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2824	m->clean_queue = TRUE;
2825	vm_page_cleaned_count++;
2826
2827	m->inactive = TRUE;
2828	vm_page_inactive_count++;
2829
2830	vm_pageout_enqueued_cleaned++;
2831}
2832
2833/*
2834 *	vm_page_activate:
2835 *
2836 *	Put the specified page on the active list (if appropriate).
2837 *
2838 *	The page queues must be locked.
2839 */
2840
2841void
2842vm_page_activate(
2843	register vm_page_t	m)
2844{
2845	VM_PAGE_CHECK(m);
2846#ifdef	FIXME_4778297
2847	assert(m->object != kernel_object);
2848#endif
2849	assert(m->phys_page != vm_page_guard_addr);
2850#if DEBUG
2851	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2852#endif
2853	assert( !(m->absent && !m->unusual));
2854
2855	if (m->gobbled) {
2856		assert( !VM_PAGE_WIRED(m));
2857		if (!m->private && !m->fictitious)
2858			vm_page_wire_count--;
2859		vm_page_gobble_count--;
2860		m->gobbled = FALSE;
2861	}
2862	/*
2863	 * if this page is currently on the pageout queue, we can't do the
2864	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2865	 * and we can't remove it manually since we would need the object lock
2866	 * (which is not required here) to decrement the activity_in_progress
2867	 * reference which is held on the object while the page is in the pageout queue...
2868	 * just let the normal laundry processing proceed
2869	 */
2870	if (m->pageout_queue || m->private || m->fictitious)
2871		return;
2872
2873#if DEBUG
2874	if (m->active)
2875	        panic("vm_page_activate: already active");
2876#endif
2877
2878	if (m->speculative) {
2879		DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2880		DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2881	}
2882
2883	VM_PAGE_QUEUES_REMOVE(m);
2884
2885	if ( !VM_PAGE_WIRED(m)) {
2886
2887		if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2888		    m->dirty && m->object->internal &&
2889		    (m->object->purgable == VM_PURGABLE_DENY ||
2890		     m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2891		     m->object->purgable == VM_PURGABLE_VOLATILE)) {
2892			queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2893			m->throttled = TRUE;
2894			vm_page_throttled_count++;
2895		} else {
2896			queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2897			m->active = TRUE;
2898			vm_page_active_count++;
2899		}
2900		m->reference = TRUE;
2901		m->no_cache = FALSE;
2902	}
2903	VM_PAGE_CHECK(m);
2904}
2905
2906
2907/*
2908 *      vm_page_speculate:
2909 *
2910 *      Put the specified page on the speculative list (if appropriate).
2911 *
2912 *      The page queues must be locked.
2913 */
2914void
2915vm_page_speculate(
2916	vm_page_t	m,
2917	boolean_t	new)
2918{
2919        struct vm_speculative_age_q	*aq;
2920
2921	VM_PAGE_CHECK(m);
2922	assert(m->object != kernel_object);
2923	assert(m->phys_page != vm_page_guard_addr);
2924#if DEBUG
2925	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2926#endif
2927	assert( !(m->absent && !m->unusual));
2928
2929	/*
2930	 * if this page is currently on the pageout queue, we can't do the
2931	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2932	 * and we can't remove it manually since we would need the object lock
2933	 * (which is not required here) to decrement the activity_in_progress
2934	 * reference which is held on the object while the page is in the pageout queue...
2935	 * just let the normal laundry processing proceed
2936	 */
2937	if (m->pageout_queue || m->private || m->fictitious)
2938		return;
2939
2940	VM_PAGE_QUEUES_REMOVE(m);
2941
2942	if ( !VM_PAGE_WIRED(m)) {
2943	        mach_timespec_t		ts;
2944		clock_sec_t sec;
2945		clock_nsec_t nsec;
2946
2947	        clock_get_system_nanotime(&sec, &nsec);
2948		ts.tv_sec = (unsigned int) sec;
2949		ts.tv_nsec = nsec;
2950
2951		if (vm_page_speculative_count == 0) {
2952
2953			speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2954			speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2955
2956			aq = &vm_page_queue_speculative[speculative_age_index];
2957
2958		        /*
2959			 * set the timer to begin a new group
2960			 */
2961			aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2962			aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2963
2964			ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2965		} else {
2966			aq = &vm_page_queue_speculative[speculative_age_index];
2967
2968			if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2969
2970			        speculative_age_index++;
2971
2972				if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2973				        speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2974				if (speculative_age_index == speculative_steal_index) {
2975				        speculative_steal_index = speculative_age_index + 1;
2976
2977					if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2978					        speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2979				}
2980				aq = &vm_page_queue_speculative[speculative_age_index];
2981
2982				if (!queue_empty(&aq->age_q))
2983				        vm_page_speculate_ageit(aq);
2984
2985				aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2986				aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2987
2988				ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2989			}
2990		}
2991		enqueue_tail(&aq->age_q, &m->pageq);
2992		m->speculative = TRUE;
2993		vm_page_speculative_count++;
2994
2995		if (new == TRUE) {
2996			vm_object_lock_assert_exclusive(m->object);
2997
2998		        m->object->pages_created++;
2999#if DEVELOPMENT || DEBUG
3000			vm_page_speculative_created++;
3001#endif
3002		}
3003	}
3004	VM_PAGE_CHECK(m);
3005}
3006
3007
3008/*
3009 * move pages from the specified aging bin to
3010 * the speculative bin that pageout_scan claims from
3011 *
3012 *      The page queues must be locked.
3013 */
3014void
3015vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3016{
3017        struct vm_speculative_age_q	*sq;
3018	vm_page_t	t;
3019
3020	sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3021
3022	if (queue_empty(&sq->age_q)) {
3023	        sq->age_q.next = aq->age_q.next;
3024		sq->age_q.prev = aq->age_q.prev;
3025
3026		t = (vm_page_t)sq->age_q.next;
3027		t->pageq.prev = &sq->age_q;
3028
3029		t = (vm_page_t)sq->age_q.prev;
3030		t->pageq.next = &sq->age_q;
3031	} else {
3032	        t = (vm_page_t)sq->age_q.prev;
3033		t->pageq.next = aq->age_q.next;
3034
3035		t = (vm_page_t)aq->age_q.next;
3036		t->pageq.prev = sq->age_q.prev;
3037
3038		t = (vm_page_t)aq->age_q.prev;
3039		t->pageq.next = &sq->age_q;
3040
3041		sq->age_q.prev = aq->age_q.prev;
3042	}
3043	queue_init(&aq->age_q);
3044}
3045
3046
3047void
3048vm_page_lru(
3049	vm_page_t	m)
3050{
3051	VM_PAGE_CHECK(m);
3052	assert(m->object != kernel_object);
3053	assert(m->phys_page != vm_page_guard_addr);
3054
3055#if DEBUG
3056	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3057#endif
3058	/*
3059	 * if this page is currently on the pageout queue, we can't do the
3060	 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3061	 * and we can't remove it manually since we would need the object lock
3062	 * (which is not required here) to decrement the activity_in_progress
3063	 * reference which is held on the object while the page is in the pageout queue...
3064	 * just let the normal laundry processing proceed
3065	 */
3066	if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m)))
3067		return;
3068
3069	m->no_cache = FALSE;
3070
3071	VM_PAGE_QUEUES_REMOVE(m);
3072
3073	VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3074}
3075
3076
3077void
3078vm_page_reactivate_all_throttled(void)
3079{
3080	vm_page_t	first_throttled, last_throttled;
3081	vm_page_t	first_active;
3082	vm_page_t	m;
3083	int		extra_active_count;
3084
3085	if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3086		return;
3087
3088	extra_active_count = 0;
3089	vm_page_lock_queues();
3090	if (! queue_empty(&vm_page_queue_throttled)) {
3091		/*
3092		 * Switch "throttled" pages to "active".
3093		 */
3094		queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3095			VM_PAGE_CHECK(m);
3096			assert(m->throttled);
3097			assert(!m->active);
3098			assert(!m->inactive);
3099			assert(!m->speculative);
3100			assert(!VM_PAGE_WIRED(m));
3101
3102			extra_active_count++;
3103
3104			m->throttled = FALSE;
3105			m->active = TRUE;
3106			VM_PAGE_CHECK(m);
3107		}
3108
3109		/*
3110		 * Transfer the entire throttled queue to a regular LRU page queues.
3111		 * We insert it at the head of the active queue, so that these pages
3112		 * get re-evaluated by the LRU algorithm first, since they've been
3113		 * completely out of it until now.
3114		 */
3115		first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3116		last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3117		first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3118		if (queue_empty(&vm_page_queue_active)) {
3119			queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3120		} else {
3121			queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3122		}
3123		queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3124		queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3125		queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3126
3127#if DEBUG
3128		printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3129#endif
3130		queue_init(&vm_page_queue_throttled);
3131		/*
3132		 * Adjust the global page counts.
3133		 */
3134		vm_page_active_count += extra_active_count;
3135		vm_page_throttled_count = 0;
3136	}
3137	assert(vm_page_throttled_count == 0);
3138	assert(queue_empty(&vm_page_queue_throttled));
3139	vm_page_unlock_queues();
3140}
3141
3142
3143/*
3144 * move pages from the indicated local queue to the global active queue
3145 * its ok to fail if we're below the hard limit and force == FALSE
3146 * the nolocks == TRUE case is to allow this function to be run on
3147 * the hibernate path
3148 */
3149
3150void
3151vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3152{
3153	struct vpl	*lq;
3154	vm_page_t	first_local, last_local;
3155	vm_page_t	first_active;
3156	vm_page_t	m;
3157	uint32_t	count = 0;
3158
3159	if (vm_page_local_q == NULL)
3160		return;
3161
3162	lq = &vm_page_local_q[lid].vpl_un.vpl;
3163
3164	if (nolocks == FALSE) {
3165		if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3166			if ( !vm_page_trylockspin_queues())
3167				return;
3168		} else
3169			vm_page_lockspin_queues();
3170
3171		VPL_LOCK(&lq->vpl_lock);
3172	}
3173	if (lq->vpl_count) {
3174		/*
3175		 * Switch "local" pages to "active".
3176		 */
3177		assert(!queue_empty(&lq->vpl_queue));
3178
3179		queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3180			VM_PAGE_CHECK(m);
3181			assert(m->local);
3182			assert(!m->active);
3183			assert(!m->inactive);
3184			assert(!m->speculative);
3185			assert(!VM_PAGE_WIRED(m));
3186			assert(!m->throttled);
3187			assert(!m->fictitious);
3188
3189			if (m->local_id != lid)
3190				panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3191
3192			m->local_id = 0;
3193			m->local = FALSE;
3194			m->active = TRUE;
3195			VM_PAGE_CHECK(m);
3196
3197			count++;
3198		}
3199		if (count != lq->vpl_count)
3200			panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3201
3202		/*
3203		 * Transfer the entire local queue to a regular LRU page queues.
3204		 */
3205		first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3206		last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3207		first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3208
3209		if (queue_empty(&vm_page_queue_active)) {
3210			queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3211		} else {
3212			queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3213		}
3214		queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3215		queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3216		queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3217
3218		queue_init(&lq->vpl_queue);
3219		/*
3220		 * Adjust the global page counts.
3221		 */
3222		vm_page_active_count += lq->vpl_count;
3223		lq->vpl_count = 0;
3224	}
3225	assert(queue_empty(&lq->vpl_queue));
3226
3227	if (nolocks == FALSE) {
3228		VPL_UNLOCK(&lq->vpl_lock);
3229		vm_page_unlock_queues();
3230	}
3231}
3232
3233/*
3234 *	vm_page_part_zero_fill:
3235 *
3236 *	Zero-fill a part of the page.
3237 */
3238void
3239vm_page_part_zero_fill(
3240	vm_page_t	m,
3241	vm_offset_t	m_pa,
3242	vm_size_t	len)
3243{
3244	vm_page_t	tmp;
3245
3246#if 0
3247	/*
3248	 * we don't hold the page queue lock
3249	 * so this check isn't safe to make
3250	 */
3251	VM_PAGE_CHECK(m);
3252#endif
3253
3254#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3255	pmap_zero_part_page(m->phys_page, m_pa, len);
3256#else
3257	while (1) {
3258       		tmp = vm_page_grab();
3259		if (tmp == VM_PAGE_NULL) {
3260			vm_page_wait(THREAD_UNINT);
3261			continue;
3262		}
3263		break;
3264	}
3265	vm_page_zero_fill(tmp);
3266	if(m_pa != 0) {
3267		vm_page_part_copy(m, 0, tmp, 0, m_pa);
3268	}
3269	if((m_pa + len) <  PAGE_SIZE) {
3270		vm_page_part_copy(m, m_pa + len, tmp,
3271				m_pa + len, PAGE_SIZE - (m_pa + len));
3272	}
3273	vm_page_copy(tmp,m);
3274	VM_PAGE_FREE(tmp);
3275#endif
3276
3277}
3278
3279/*
3280 *	vm_page_zero_fill:
3281 *
3282 *	Zero-fill the specified page.
3283 */
3284void
3285vm_page_zero_fill(
3286	vm_page_t	m)
3287{
3288        XPR(XPR_VM_PAGE,
3289                "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3290                m->object, m->offset, m, 0,0);
3291#if 0
3292	/*
3293	 * we don't hold the page queue lock
3294	 * so this check isn't safe to make
3295	 */
3296	VM_PAGE_CHECK(m);
3297#endif
3298
3299//	dbgTrace(0xAEAEAEAE, m->phys_page, 0);		/* (BRINGUP) */
3300	pmap_zero_page(m->phys_page);
3301}
3302
3303/*
3304 *	vm_page_part_copy:
3305 *
3306 *	copy part of one page to another
3307 */
3308
3309void
3310vm_page_part_copy(
3311	vm_page_t	src_m,
3312	vm_offset_t	src_pa,
3313	vm_page_t	dst_m,
3314	vm_offset_t	dst_pa,
3315	vm_size_t	len)
3316{
3317#if 0
3318	/*
3319	 * we don't hold the page queue lock
3320	 * so this check isn't safe to make
3321	 */
3322	VM_PAGE_CHECK(src_m);
3323	VM_PAGE_CHECK(dst_m);
3324#endif
3325	pmap_copy_part_page(src_m->phys_page, src_pa,
3326			dst_m->phys_page, dst_pa, len);
3327}
3328
3329/*
3330 *	vm_page_copy:
3331 *
3332 *	Copy one page to another
3333 *
3334 * ENCRYPTED SWAP:
3335 * The source page should not be encrypted.  The caller should
3336 * make sure the page is decrypted first, if necessary.
3337 */
3338
3339int vm_page_copy_cs_validations = 0;
3340int vm_page_copy_cs_tainted = 0;
3341
3342void
3343vm_page_copy(
3344	vm_page_t	src_m,
3345	vm_page_t	dest_m)
3346{
3347        XPR(XPR_VM_PAGE,
3348        "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3349        src_m->object, src_m->offset,
3350	dest_m->object, dest_m->offset,
3351	0);
3352#if 0
3353	/*
3354	 * we don't hold the page queue lock
3355	 * so this check isn't safe to make
3356	 */
3357	VM_PAGE_CHECK(src_m);
3358	VM_PAGE_CHECK(dest_m);
3359#endif
3360	vm_object_lock_assert_held(src_m->object);
3361
3362	/*
3363	 * ENCRYPTED SWAP:
3364	 * The source page should not be encrypted at this point.
3365	 * The destination page will therefore not contain encrypted
3366	 * data after the copy.
3367	 */
3368	if (src_m->encrypted) {
3369		panic("vm_page_copy: source page %p is encrypted\n", src_m);
3370	}
3371	dest_m->encrypted = FALSE;
3372
3373	if (src_m->object != VM_OBJECT_NULL &&
3374	    src_m->object->code_signed) {
3375		/*
3376		 * We're copying a page from a code-signed object.
3377		 * Whoever ends up mapping the copy page might care about
3378		 * the original page's integrity, so let's validate the
3379		 * source page now.
3380		 */
3381		vm_page_copy_cs_validations++;
3382		vm_page_validate_cs(src_m);
3383	}
3384
3385	if (vm_page_is_slideable(src_m)) {
3386		boolean_t was_busy = src_m->busy;
3387		src_m->busy = TRUE;
3388		(void) vm_page_slide(src_m, 0);
3389		assert(src_m->busy);
3390		if (!was_busy) {
3391			PAGE_WAKEUP_DONE(src_m);
3392		}
3393	}
3394
3395	/*
3396	 * Propagate the cs_tainted bit to the copy page. Do not propagate
3397	 * the cs_validated bit.
3398	 */
3399	dest_m->cs_tainted = src_m->cs_tainted;
3400	if (dest_m->cs_tainted) {
3401		vm_page_copy_cs_tainted++;
3402	}
3403	dest_m->slid = src_m->slid;
3404	dest_m->error = src_m->error; /* sliding src_m might have failed... */
3405	pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3406}
3407
3408#if MACH_ASSERT
3409static void
3410_vm_page_print(
3411	vm_page_t	p)
3412{
3413	printf("vm_page %p: \n", p);
3414	printf("  pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3415	printf("  listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3416	printf("  next=%p\n", p->next);
3417	printf("  object=%p offset=0x%llx\n", p->object, p->offset);
3418	printf("  wire_count=%u\n", p->wire_count);
3419
3420	printf("  %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3421	       (p->local ? "" : "!"),
3422	       (p->inactive ? "" : "!"),
3423	       (p->active ? "" : "!"),
3424	       (p->pageout_queue ? "" : "!"),
3425	       (p->speculative ? "" : "!"),
3426	       (p->laundry ? "" : "!"));
3427	printf("  %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3428	       (p->free ? "" : "!"),
3429	       (p->reference ? "" : "!"),
3430	       (p->gobbled ? "" : "!"),
3431	       (p->private ? "" : "!"),
3432	       (p->throttled ? "" : "!"));
3433	printf("  %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3434		(p->busy ? "" : "!"),
3435		(p->wanted ? "" : "!"),
3436		(p->tabled ? "" : "!"),
3437		(p->fictitious ? "" : "!"),
3438		(p->pmapped ? "" : "!"),
3439		(p->wpmapped ? "" : "!"));
3440	printf("  %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3441	       (p->pageout ? "" : "!"),
3442	       (p->absent ? "" : "!"),
3443	       (p->error ? "" : "!"),
3444	       (p->dirty ? "" : "!"),
3445	       (p->cleaning ? "" : "!"),
3446	       (p->precious ? "" : "!"),
3447	       (p->clustered ? "" : "!"));
3448	printf("  %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3449	       (p->overwriting ? "" : "!"),
3450	       (p->restart ? "" : "!"),
3451	       (p->unusual ? "" : "!"),
3452	       (p->encrypted ? "" : "!"),
3453	       (p->encrypted_cleaning ? "" : "!"));
3454	printf("  %scs_validated, %scs_tainted, %sno_cache\n",
3455	       (p->cs_validated ? "" : "!"),
3456	       (p->cs_tainted ? "" : "!"),
3457	       (p->no_cache ? "" : "!"));
3458
3459	printf("phys_page=0x%x\n", p->phys_page);
3460}
3461
3462/*
3463 *	Check that the list of pages is ordered by
3464 *	ascending physical address and has no holes.
3465 */
3466static int
3467vm_page_verify_contiguous(
3468	vm_page_t	pages,
3469	unsigned int	npages)
3470{
3471	register vm_page_t	m;
3472	unsigned int		page_count;
3473	vm_offset_t		prev_addr;
3474
3475	prev_addr = pages->phys_page;
3476	page_count = 1;
3477	for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3478		if (m->phys_page != prev_addr + 1) {
3479			printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3480			       m, (long)prev_addr, m->phys_page);
3481			printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3482			panic("vm_page_verify_contiguous:  not contiguous!");
3483		}
3484		prev_addr = m->phys_page;
3485		++page_count;
3486	}
3487	if (page_count != npages) {
3488		printf("pages %p actual count 0x%x but requested 0x%x\n",
3489		       pages, page_count, npages);
3490		panic("vm_page_verify_contiguous:  count error");
3491	}
3492	return 1;
3493}
3494
3495
3496/*
3497 *	Check the free lists for proper length etc.
3498 */
3499static unsigned int
3500vm_page_verify_free_list(
3501	queue_head_t	*vm_page_queue,
3502	unsigned int	color,
3503	vm_page_t	look_for_page,
3504	boolean_t	expect_page)
3505{
3506	unsigned int 	npages;
3507	vm_page_t	m;
3508	vm_page_t	prev_m;
3509	boolean_t	found_page;
3510
3511	found_page = FALSE;
3512	npages = 0;
3513	prev_m = (vm_page_t) vm_page_queue;
3514	queue_iterate(vm_page_queue,
3515		      m,
3516		      vm_page_t,
3517		      pageq) {
3518
3519		if (m == look_for_page) {
3520			found_page = TRUE;
3521		}
3522		if ((vm_page_t) m->pageq.prev != prev_m)
3523			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3524			      color, npages, m, m->pageq.prev, prev_m);
3525		if ( ! m->busy )
3526			panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3527			      color, npages, m);
3528		if (color != (unsigned int) -1) {
3529			if ((m->phys_page & vm_color_mask) != color)
3530				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3531				      color, npages, m, m->phys_page & vm_color_mask, color);
3532			if ( ! m->free )
3533				panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3534				      color, npages, m);
3535		}
3536		++npages;
3537		prev_m = m;
3538	}
3539	if (look_for_page != VM_PAGE_NULL) {
3540		unsigned int other_color;
3541
3542		if (expect_page && !found_page) {
3543			printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3544			       color, npages, look_for_page, look_for_page->phys_page);
3545			_vm_page_print(look_for_page);
3546			for (other_color = 0;
3547			     other_color < vm_colors;
3548			     other_color++) {
3549				if (other_color == color)
3550					continue;
3551				vm_page_verify_free_list(&vm_page_queue_free[other_color],
3552							 other_color, look_for_page, FALSE);
3553			}
3554			if (color == (unsigned int) -1) {
3555				vm_page_verify_free_list(&vm_lopage_queue_free,
3556							 (unsigned int) -1, look_for_page, FALSE);
3557			}
3558			panic("vm_page_verify_free_list(color=%u)\n", color);
3559		}
3560		if (!expect_page && found_page) {
3561			printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3562			       color, npages, look_for_page, look_for_page->phys_page);
3563		}
3564	}
3565	return npages;
3566}
3567
3568static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3569static void
3570vm_page_verify_free_lists( void )
3571{
3572	unsigned int	color, npages, nlopages;
3573
3574	if (! vm_page_verify_free_lists_enabled)
3575		return;
3576
3577	npages = 0;
3578
3579	lck_mtx_lock(&vm_page_queue_free_lock);
3580
3581	for( color = 0; color < vm_colors; color++ ) {
3582		npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3583						   color, VM_PAGE_NULL, FALSE);
3584	}
3585	nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3586					    (unsigned int) -1,
3587					    VM_PAGE_NULL, FALSE);
3588	if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3589		panic("vm_page_verify_free_lists:  "
3590		      "npages %u free_count %d nlopages %u lo_free_count %u",
3591		      npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3592
3593	lck_mtx_unlock(&vm_page_queue_free_lock);
3594}
3595
3596void
3597vm_page_queues_assert(
3598	vm_page_t	mem,
3599	int		val)
3600{
3601#if DEBUG
3602	lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3603#endif
3604	if (mem->free + mem->active + mem->inactive + mem->speculative +
3605	    mem->throttled + mem->pageout_queue > (val)) {
3606		_vm_page_print(mem);
3607		panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3608	}
3609	if (VM_PAGE_WIRED(mem)) {
3610		assert(!mem->active);
3611		assert(!mem->inactive);
3612		assert(!mem->speculative);
3613		assert(!mem->throttled);
3614		assert(!mem->pageout_queue);
3615	}
3616}
3617#endif	/* MACH_ASSERT */
3618
3619
3620/*
3621 *	CONTIGUOUS PAGE ALLOCATION
3622 *
3623 *	Find a region large enough to contain at least n pages
3624 *	of contiguous physical memory.
3625 *
3626 *	This is done by traversing the vm_page_t array in a linear fashion
3627 *	we assume that the vm_page_t array has the avaiable physical pages in an
3628 *	ordered, ascending list... this is currently true of all our implementations
3629 * 	and must remain so... there can be 'holes' in the array...  we also can
3630 *	no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3631 * 	which use to happen via 'vm_page_convert'... that function was no longer
3632 * 	being called and was removed...
3633 *
3634 *	The basic flow consists of stabilizing some of the interesting state of
3635 *	a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3636 *	sweep at the beginning of the array looking for pages that meet our criterea
3637 *	for a 'stealable' page... currently we are pretty conservative... if the page
3638 *	meets this criterea and is physically contiguous to the previous page in the 'run'
3639 * 	we keep developing it.  If we hit a page that doesn't fit, we reset our state
3640 *	and start to develop a new run... if at this point we've already considered
3641 * 	at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3642 *	and mutex_pause (which will yield the processor), to keep the latency low w/r
3643 *	to other threads trying to acquire free pages (or move pages from q to q),
3644 *	and then continue from the spot we left off... we only make 1 pass through the
3645 *	array.  Once we have a 'run' that is long enough, we'll go into the loop which
3646 * 	which steals the pages from the queues they're currently on... pages on the free
3647 *	queue can be stolen directly... pages that are on any of the other queues
3648 *	must be removed from the object they are tabled on... this requires taking the
3649 * 	object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3650 *	or if the state of the page behind the vm_object lock is no longer viable, we'll
3651 *	dump the pages we've currently stolen back to the free list, and pick up our
3652 *	scan from the point where we aborted the 'current' run.
3653 *
3654 *
3655 *	Requirements:
3656 *		- neither vm_page_queue nor vm_free_list lock can be held on entry
3657 *
3658 *	Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3659 *
3660 * Algorithm:
3661 */
3662
3663#define	MAX_CONSIDERED_BEFORE_YIELD	1000
3664
3665
3666#define RESET_STATE_OF_RUN()	\
3667	MACRO_BEGIN		\
3668	prevcontaddr = -2;	\
3669	start_pnum = -1;	\
3670	free_considered = 0;	\
3671	substitute_needed = 0;	\
3672	npages = 0;		\
3673	MACRO_END
3674
3675/*
3676 * Can we steal in-use (i.e. not free) pages when searching for
3677 * physically-contiguous pages ?
3678 */
3679#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3680
3681static unsigned int vm_page_find_contiguous_last_idx = 0,  vm_page_lomem_find_contiguous_last_idx = 0;
3682#if DEBUG
3683int vm_page_find_contig_debug = 0;
3684#endif
3685
3686static vm_page_t
3687vm_page_find_contiguous(
3688	unsigned int	contig_pages,
3689	ppnum_t		max_pnum,
3690	ppnum_t     pnum_mask,
3691	boolean_t	wire,
3692	int		flags)
3693{
3694	vm_page_t	m = NULL;
3695	ppnum_t		prevcontaddr;
3696	ppnum_t		start_pnum;
3697	unsigned int	npages, considered, scanned;
3698	unsigned int	page_idx, start_idx, last_idx, orig_last_idx;
3699	unsigned int	idx_last_contig_page_found = 0;
3700	int		free_considered, free_available;
3701	int		substitute_needed;
3702	boolean_t	wrapped;
3703#if DEBUG
3704	clock_sec_t	tv_start_sec, tv_end_sec;
3705	clock_usec_t	tv_start_usec, tv_end_usec;
3706#endif
3707#if MACH_ASSERT
3708	int		yielded = 0;
3709	int		dumped_run = 0;
3710	int		stolen_pages = 0;
3711#endif
3712
3713	if (contig_pages == 0)
3714		return VM_PAGE_NULL;
3715
3716#if MACH_ASSERT
3717	vm_page_verify_free_lists();
3718#endif
3719#if DEBUG
3720	clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3721#endif
3722	vm_page_lock_queues();
3723	lck_mtx_lock(&vm_page_queue_free_lock);
3724
3725	RESET_STATE_OF_RUN();
3726
3727	scanned = 0;
3728	considered = 0;
3729	free_available = vm_page_free_count - vm_page_free_reserved;
3730
3731	wrapped = FALSE;
3732
3733	if(flags & KMA_LOMEM)
3734		idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3735	else
3736		idx_last_contig_page_found =  vm_page_find_contiguous_last_idx;
3737
3738	orig_last_idx = idx_last_contig_page_found;
3739	last_idx = orig_last_idx;
3740
3741	for (page_idx = last_idx, start_idx = last_idx;
3742	     npages < contig_pages && page_idx < vm_pages_count;
3743	     page_idx++) {
3744retry:
3745		if (wrapped &&
3746		    npages == 0 &&
3747		    page_idx >= orig_last_idx) {
3748			/*
3749			 * We're back where we started and we haven't
3750			 * found any suitable contiguous range.  Let's
3751			 * give up.
3752			 */
3753			break;
3754		}
3755		scanned++;
3756		m = &vm_pages[page_idx];
3757
3758		assert(!m->fictitious);
3759		assert(!m->private);
3760
3761		if (max_pnum && m->phys_page > max_pnum) {
3762			/* no more low pages... */
3763			break;
3764		}
3765		if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3766			/*
3767			 * not aligned
3768			 */
3769			RESET_STATE_OF_RUN();
3770
3771		} else if (VM_PAGE_WIRED(m) || m->gobbled ||
3772			   m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3773			   m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3774			   m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
3775			/*
3776			 * page is in a transient state
3777			 * or a state we don't want to deal
3778			 * with, so don't consider it which
3779			 * means starting a new run
3780			 */
3781			RESET_STATE_OF_RUN();
3782
3783		} else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3784			/*
3785			 * page needs to be on one of our queues
3786			 * in order for it to be stable behind the
3787			 * locks we hold at this point...
3788			 * if not, don't consider it which
3789			 * means starting a new run
3790			 */
3791			RESET_STATE_OF_RUN();
3792
3793		} else if (!m->free && (!m->tabled || m->busy)) {
3794			/*
3795			 * pages on the free list are always 'busy'
3796			 * so we couldn't test for 'busy' in the check
3797			 * for the transient states... pages that are
3798			 * 'free' are never 'tabled', so we also couldn't
3799			 * test for 'tabled'.  So we check here to make
3800			 * sure that a non-free page is not busy and is
3801			 * tabled on an object...
3802			 * if not, don't consider it which
3803			 * means starting a new run
3804			 */
3805			RESET_STATE_OF_RUN();
3806
3807		} else {
3808			if (m->phys_page != prevcontaddr + 1) {
3809				if ((m->phys_page & pnum_mask) != 0) {
3810					RESET_STATE_OF_RUN();
3811					goto did_consider;
3812				} else {
3813					npages = 1;
3814					start_idx = page_idx;
3815					start_pnum = m->phys_page;
3816				}
3817			} else {
3818				npages++;
3819			}
3820			prevcontaddr = m->phys_page;
3821
3822			VM_PAGE_CHECK(m);
3823			if (m->free) {
3824				free_considered++;
3825			} else {
3826				/*
3827				 * This page is not free.
3828				 * If we can't steal used pages,
3829				 * we have to give up this run
3830				 * and keep looking.
3831				 * Otherwise, we might need to
3832				 * move the contents of this page
3833				 * into a substitute page.
3834				 */
3835#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3836				if (m->pmapped || m->dirty) {
3837					substitute_needed++;
3838				}
3839#else
3840				RESET_STATE_OF_RUN();
3841#endif
3842			}
3843
3844			if ((free_considered + substitute_needed) > free_available) {
3845				/*
3846				 * if we let this run continue
3847				 * we will end up dropping the vm_page_free_count
3848				 * below the reserve limit... we need to abort
3849				 * this run, but we can at least re-consider this
3850				 * page... thus the jump back to 'retry'
3851				 */
3852				RESET_STATE_OF_RUN();
3853
3854				if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3855					considered++;
3856					goto retry;
3857				}
3858				/*
3859				 * free_available == 0
3860				 * so can't consider any free pages... if
3861				 * we went to retry in this case, we'd
3862				 * get stuck looking at the same page
3863				 * w/o making any forward progress
3864				 * we also want to take this path if we've already
3865				 * reached our limit that controls the lock latency
3866				 */
3867			}
3868		}
3869did_consider:
3870		if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3871
3872			lck_mtx_unlock(&vm_page_queue_free_lock);
3873			vm_page_unlock_queues();
3874
3875			mutex_pause(0);
3876
3877			vm_page_lock_queues();
3878			lck_mtx_lock(&vm_page_queue_free_lock);
3879
3880			RESET_STATE_OF_RUN();
3881			/*
3882			 * reset our free page limit since we
3883			 * dropped the lock protecting the vm_page_free_queue
3884			 */
3885			free_available = vm_page_free_count - vm_page_free_reserved;
3886			considered = 0;
3887#if MACH_ASSERT
3888			yielded++;
3889#endif
3890			goto retry;
3891		}
3892		considered++;
3893	}
3894	m = VM_PAGE_NULL;
3895
3896	if (npages != contig_pages) {
3897		if (!wrapped) {
3898			/*
3899			 * We didn't find a contiguous range but we didn't
3900			 * start from the very first page.
3901			 * Start again from the very first page.
3902			 */
3903			RESET_STATE_OF_RUN();
3904			if( flags & KMA_LOMEM)
3905				idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = 0;
3906			else
3907				idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3908			last_idx = 0;
3909			page_idx = last_idx;
3910			wrapped = TRUE;
3911			goto retry;
3912		}
3913		lck_mtx_unlock(&vm_page_queue_free_lock);
3914	} else {
3915		vm_page_t	m1;
3916		vm_page_t	m2;
3917		unsigned int	cur_idx;
3918		unsigned int	tmp_start_idx;
3919		vm_object_t	locked_object = VM_OBJECT_NULL;
3920		boolean_t	abort_run = FALSE;
3921
3922		assert(page_idx - start_idx == contig_pages);
3923
3924		tmp_start_idx = start_idx;
3925
3926		/*
3927		 * first pass through to pull the free pages
3928		 * off of the free queue so that in case we
3929		 * need substitute pages, we won't grab any
3930		 * of the free pages in the run... we'll clear
3931		 * the 'free' bit in the 2nd pass, and even in
3932		 * an abort_run case, we'll collect all of the
3933		 * free pages in this run and return them to the free list
3934		 */
3935		while (start_idx < page_idx) {
3936
3937			m1 = &vm_pages[start_idx++];
3938
3939#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3940			assert(m1->free);
3941#endif
3942
3943			if (m1->free) {
3944				unsigned int color;
3945
3946				color = m1->phys_page & vm_color_mask;
3947#if MACH_ASSERT
3948				vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3949#endif
3950				queue_remove(&vm_page_queue_free[color],
3951					     m1,
3952					     vm_page_t,
3953					     pageq);
3954				m1->pageq.next = NULL;
3955				m1->pageq.prev = NULL;
3956#if MACH_ASSERT
3957				vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3958#endif
3959				/*
3960				 * Clear the "free" bit so that this page
3961				 * does not get considered for another
3962				 * concurrent physically-contiguous allocation.
3963				 */
3964				m1->free = FALSE;
3965				assert(m1->busy);
3966
3967				vm_page_free_count--;
3968			}
3969		}
3970		/*
3971		 * adjust global freelist counts
3972		 */
3973		if (vm_page_free_count < vm_page_free_count_minimum)
3974			vm_page_free_count_minimum = vm_page_free_count;
3975
3976		if( flags & KMA_LOMEM)
3977			vm_page_lomem_find_contiguous_last_idx = page_idx;
3978		else
3979			vm_page_find_contiguous_last_idx = page_idx;
3980
3981		/*
3982		 * we can drop the free queue lock at this point since
3983		 * we've pulled any 'free' candidates off of the list
3984		 * we need it dropped so that we can do a vm_page_grab
3985		 * when substituing for pmapped/dirty pages
3986		 */
3987		lck_mtx_unlock(&vm_page_queue_free_lock);
3988
3989		start_idx = tmp_start_idx;
3990		cur_idx = page_idx - 1;
3991
3992		while (start_idx++ < page_idx) {
3993			/*
3994			 * must go through the list from back to front
3995			 * so that the page list is created in the
3996			 * correct order - low -> high phys addresses
3997			 */
3998			m1 = &vm_pages[cur_idx--];
3999
4000			assert(!m1->free);
4001			if (m1->object == VM_OBJECT_NULL) {
4002				/*
4003				 * page has already been removed from
4004				 * the free list in the 1st pass
4005				 */
4006				assert(m1->offset == (vm_object_offset_t) -1);
4007				assert(m1->busy);
4008				assert(!m1->wanted);
4009				assert(!m1->laundry);
4010			} else {
4011				vm_object_t object;
4012
4013				if (abort_run == TRUE)
4014					continue;
4015
4016				object = m1->object;
4017
4018				if (object != locked_object) {
4019					if (locked_object) {
4020						vm_object_unlock(locked_object);
4021						locked_object = VM_OBJECT_NULL;
4022					}
4023					if (vm_object_lock_try(object))
4024						locked_object = object;
4025				}
4026				if (locked_object == VM_OBJECT_NULL ||
4027				    (VM_PAGE_WIRED(m1) || m1->gobbled ||
4028				     m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
4029				     m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
4030				     m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
4031
4032					if (locked_object) {
4033						vm_object_unlock(locked_object);
4034						locked_object = VM_OBJECT_NULL;
4035					}
4036					tmp_start_idx = cur_idx;
4037					abort_run = TRUE;
4038					continue;
4039				}
4040				if (m1->pmapped || m1->dirty) {
4041					int refmod;
4042					vm_object_offset_t offset;
4043
4044					m2 = vm_page_grab();
4045
4046					if (m2 == VM_PAGE_NULL) {
4047						if (locked_object) {
4048							vm_object_unlock(locked_object);
4049							locked_object = VM_OBJECT_NULL;
4050						}
4051						tmp_start_idx = cur_idx;
4052						abort_run = TRUE;
4053						continue;
4054					}
4055					if (m1->pmapped)
4056						refmod = pmap_disconnect(m1->phys_page);
4057					else
4058						refmod = 0;
4059					vm_page_copy(m1, m2);
4060
4061					m2->reference = m1->reference;
4062					m2->dirty     = m1->dirty;
4063
4064					if (refmod & VM_MEM_REFERENCED)
4065						m2->reference = TRUE;
4066					if (refmod & VM_MEM_MODIFIED) {
4067						SET_PAGE_DIRTY(m2, TRUE);
4068					}
4069					offset = m1->offset;
4070
4071					/*
4072					 * completely cleans up the state
4073					 * of the page so that it is ready
4074					 * to be put onto the free list, or
4075					 * for this purpose it looks like it
4076					 * just came off of the free list
4077					 */
4078					vm_page_free_prepare(m1);
4079
4080					/*
4081					 * make sure we clear the ref/mod state
4082					 * from the pmap layer... else we risk
4083					 * inheriting state from the last time
4084					 * this page was used...
4085					 */
4086					pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4087					/*
4088					 * now put the substitute page on the object
4089					 */
4090					vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4091
4092					if (m2->reference)
4093						vm_page_activate(m2);
4094					else
4095						vm_page_deactivate(m2);
4096
4097					PAGE_WAKEUP_DONE(m2);
4098
4099				} else {
4100					/*
4101					 * completely cleans up the state
4102					 * of the page so that it is ready
4103					 * to be put onto the free list, or
4104					 * for this purpose it looks like it
4105					 * just came off of the free list
4106					 */
4107					vm_page_free_prepare(m1);
4108				}
4109#if MACH_ASSERT
4110				stolen_pages++;
4111#endif
4112			}
4113			m1->pageq.next = (queue_entry_t) m;
4114			m1->pageq.prev = NULL;
4115			m = m1;
4116		}
4117		if (locked_object) {
4118			vm_object_unlock(locked_object);
4119			locked_object = VM_OBJECT_NULL;
4120		}
4121
4122		if (abort_run == TRUE) {
4123			if (m != VM_PAGE_NULL) {
4124				vm_page_free_list(m, FALSE);
4125			}
4126#if MACH_ASSERT
4127			dumped_run++;
4128#endif
4129			/*
4130			 * want the index of the last
4131			 * page in this run that was
4132			 * successfully 'stolen', so back
4133			 * it up 1 for the auto-decrement on use
4134			 * and 1 more to bump back over this page
4135			 */
4136			page_idx = tmp_start_idx + 2;
4137			if (page_idx >= vm_pages_count) {
4138				if (wrapped)
4139					goto done_scanning;
4140				page_idx = last_idx = 0;
4141				wrapped = TRUE;
4142			}
4143			abort_run = FALSE;
4144
4145			/*
4146			 * We didn't find a contiguous range but we didn't
4147			 * start from the very first page.
4148			 * Start again from the very first page.
4149			 */
4150			RESET_STATE_OF_RUN();
4151
4152			if( flags & KMA_LOMEM)
4153				idx_last_contig_page_found  = vm_page_lomem_find_contiguous_last_idx = page_idx;
4154			else
4155				idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4156
4157			last_idx = page_idx;
4158
4159			lck_mtx_lock(&vm_page_queue_free_lock);
4160			/*
4161			* reset our free page limit since we
4162			* dropped the lock protecting the vm_page_free_queue
4163			*/
4164			free_available = vm_page_free_count - vm_page_free_reserved;
4165			goto retry;
4166		}
4167
4168		for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4169
4170			if (wire == TRUE)
4171				m1->wire_count++;
4172			else
4173				m1->gobbled = TRUE;
4174		}
4175		if (wire == FALSE)
4176			vm_page_gobble_count += npages;
4177
4178		/*
4179		 * gobbled pages are also counted as wired pages
4180		 */
4181		vm_page_wire_count += npages;
4182
4183 		assert(vm_page_verify_contiguous(m, npages));
4184	}
4185done_scanning:
4186	vm_page_unlock_queues();
4187
4188#if DEBUG
4189	clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4190
4191	tv_end_sec -= tv_start_sec;
4192	if (tv_end_usec < tv_start_usec) {
4193		tv_end_sec--;
4194		tv_end_usec += 1000000;
4195	}
4196	tv_end_usec -= tv_start_usec;
4197	if (tv_end_usec >= 1000000) {
4198		tv_end_sec++;
4199		tv_end_sec -= 1000000;
4200	}
4201	if (vm_page_find_contig_debug) {
4202		printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds...  started at %d... scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
4203	       __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4204	       (long)tv_end_sec, tv_end_usec, orig_last_idx,
4205	       scanned, yielded, dumped_run, stolen_pages);
4206	}
4207
4208#endif
4209#if MACH_ASSERT
4210	vm_page_verify_free_lists();
4211#endif
4212	return m;
4213}
4214
4215/*
4216 *	Allocate a list of contiguous, wired pages.
4217 */
4218kern_return_t
4219cpm_allocate(
4220	vm_size_t	size,
4221	vm_page_t	*list,
4222	ppnum_t		max_pnum,
4223	ppnum_t		pnum_mask,
4224	boolean_t	wire,
4225	int		flags)
4226{
4227	vm_page_t		pages;
4228	unsigned int		npages;
4229
4230	if (size % PAGE_SIZE != 0)
4231		return KERN_INVALID_ARGUMENT;
4232
4233	npages = (unsigned int) (size / PAGE_SIZE);
4234	if (npages != size / PAGE_SIZE) {
4235		/* 32-bit overflow */
4236		return KERN_INVALID_ARGUMENT;
4237	}
4238
4239	/*
4240	 *	Obtain a pointer to a subset of the free
4241	 *	list large enough to satisfy the request;
4242	 *	the region will be physically contiguous.
4243	 */
4244	pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4245
4246	if (pages == VM_PAGE_NULL)
4247		return KERN_NO_SPACE;
4248	/*
4249	 * determine need for wakeups
4250	 */
4251	if ((vm_page_free_count < vm_page_free_min) ||
4252	     ((vm_page_free_count < vm_page_free_target) &&
4253	      ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4254	         thread_wakeup((event_t) &vm_page_free_wanted);
4255
4256	VM_CHECK_MEMORYSTATUS;
4257
4258	/*
4259	 *	The CPM pages should now be available and
4260	 *	ordered by ascending physical address.
4261	 */
4262	assert(vm_page_verify_contiguous(pages, npages));
4263
4264	*list = pages;
4265	return KERN_SUCCESS;
4266}
4267
4268
4269unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4270
4271/*
4272 * when working on a 'run' of pages, it is necessary to hold
4273 * the vm_page_queue_lock (a hot global lock) for certain operations
4274 * on the page... however, the majority of the work can be done
4275 * while merely holding the object lock... in fact there are certain
4276 * collections of pages that don't require any work brokered by the
4277 * vm_page_queue_lock... to mitigate the time spent behind the global
4278 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4279 * while doing all of the work that doesn't require the vm_page_queue_lock...
4280 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4281 * necessary work for each page... we will grab the busy bit on the page
4282 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4283 * if it can't immediately take the vm_page_queue_lock in order to compete
4284 * for the locks in the same order that vm_pageout_scan takes them.
4285 * the operation names are modeled after the names of the routines that
4286 * need to be called in order to make the changes very obvious in the
4287 * original loop
4288 */
4289
4290void
4291vm_page_do_delayed_work(
4292	vm_object_t 	object,
4293	struct vm_page_delayed_work *dwp,
4294	int		dw_count)
4295{
4296	int		j;
4297	vm_page_t	m;
4298        vm_page_t       local_free_q = VM_PAGE_NULL;
4299
4300	/*
4301	 * pageout_scan takes the vm_page_lock_queues first
4302	 * then tries for the object lock... to avoid what
4303	 * is effectively a lock inversion, we'll go to the
4304	 * trouble of taking them in that same order... otherwise
4305	 * if this object contains the majority of the pages resident
4306	 * in the UBC (or a small set of large objects actively being
4307	 * worked on contain the majority of the pages), we could
4308	 * cause the pageout_scan thread to 'starve' in its attempt
4309	 * to find pages to move to the free queue, since it has to
4310	 * successfully acquire the object lock of any candidate page
4311	 * before it can steal/clean it.
4312	 */
4313	if (!vm_page_trylockspin_queues()) {
4314		vm_object_unlock(object);
4315
4316		vm_page_lockspin_queues();
4317
4318		for (j = 0; ; j++) {
4319			if (!vm_object_lock_avoid(object) &&
4320			    _vm_object_lock_try(object))
4321				break;
4322			vm_page_unlock_queues();
4323			mutex_pause(j);
4324			vm_page_lockspin_queues();
4325		}
4326	}
4327	for (j = 0; j < dw_count; j++, dwp++) {
4328
4329		m = dwp->dw_m;
4330
4331		if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4332			vm_pageout_throttle_up(m);
4333
4334		if (dwp->dw_mask & DW_vm_page_wire)
4335			vm_page_wire(m);
4336		else if (dwp->dw_mask & DW_vm_page_unwire) {
4337			boolean_t	queueit;
4338
4339			queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4340
4341			vm_page_unwire(m, queueit);
4342		}
4343		if (dwp->dw_mask & DW_vm_page_free) {
4344			vm_page_free_prepare_queues(m);
4345
4346			assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4347			/*
4348			 * Add this page to our list of reclaimed pages,
4349			 * to be freed later.
4350			 */
4351			m->pageq.next = (queue_entry_t) local_free_q;
4352			local_free_q = m;
4353		} else {
4354			if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4355				vm_page_deactivate_internal(m, FALSE);
4356			else if (dwp->dw_mask & DW_vm_page_activate) {
4357				if (m->active == FALSE) {
4358					vm_page_activate(m);
4359				}
4360			}
4361			else if (dwp->dw_mask & DW_vm_page_speculate)
4362				vm_page_speculate(m, TRUE);
4363			else if (dwp->dw_mask & DW_enqueue_cleaned) {
4364				/*
4365				 * if we didn't hold the object lock and did this,
4366				 * we might disconnect the page, then someone might
4367				 * soft fault it back in, then we would put it on the
4368				 * cleaned queue, and so we would have a referenced (maybe even dirty)
4369				 * page on that queue, which we don't want
4370				 */
4371				int refmod_state = pmap_disconnect(m->phys_page);
4372
4373				if ((refmod_state & VM_MEM_REFERENCED)) {
4374					/*
4375					 * this page has been touched since it got cleaned; let's activate it
4376					 * if it hasn't already been
4377					 */
4378					vm_pageout_enqueued_cleaned++;
4379					vm_pageout_cleaned_reactivated++;
4380					vm_pageout_cleaned_commit_reactivated++;
4381
4382					if (m->active == FALSE)
4383						vm_page_activate(m);
4384				} else {
4385					m->reference = FALSE;
4386					vm_page_enqueue_cleaned(m);
4387				}
4388			}
4389			else if (dwp->dw_mask & DW_vm_page_lru)
4390				vm_page_lru(m);
4391			else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4392				if ( !m->pageout_queue)
4393					VM_PAGE_QUEUES_REMOVE(m);
4394			}
4395			if (dwp->dw_mask & DW_set_reference)
4396				m->reference = TRUE;
4397			else if (dwp->dw_mask & DW_clear_reference)
4398				m->reference = FALSE;
4399
4400			if (dwp->dw_mask & DW_move_page) {
4401				if ( !m->pageout_queue) {
4402					VM_PAGE_QUEUES_REMOVE(m);
4403
4404					assert(m->object != kernel_object);
4405
4406					VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4407				}
4408			}
4409			if (dwp->dw_mask & DW_clear_busy)
4410				m->busy = FALSE;
4411
4412			if (dwp->dw_mask & DW_PAGE_WAKEUP)
4413				PAGE_WAKEUP(m);
4414		}
4415	}
4416	vm_page_unlock_queues();
4417
4418	if (local_free_q)
4419		vm_page_free_list(local_free_q, TRUE);
4420
4421	VM_CHECK_MEMORYSTATUS;
4422
4423}
4424
4425kern_return_t
4426vm_page_alloc_list(
4427	int	page_count,
4428	int	flags,
4429	vm_page_t *list)
4430{
4431	vm_page_t	lo_page_list = VM_PAGE_NULL;
4432	vm_page_t	mem;
4433	int		i;
4434
4435	if ( !(flags & KMA_LOMEM))
4436		panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4437
4438	for (i = 0; i < page_count; i++) {
4439
4440		mem = vm_page_grablo();
4441
4442		if (mem == VM_PAGE_NULL) {
4443			if (lo_page_list)
4444				vm_page_free_list(lo_page_list, FALSE);
4445
4446			*list = VM_PAGE_NULL;
4447
4448			return (KERN_RESOURCE_SHORTAGE);
4449		}
4450		mem->pageq.next = (queue_entry_t) lo_page_list;
4451		lo_page_list = mem;
4452	}
4453	*list = lo_page_list;
4454
4455	return (KERN_SUCCESS);
4456}
4457
4458void
4459vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4460{
4461	page->offset = offset;
4462}
4463
4464vm_page_t
4465vm_page_get_next(vm_page_t page)
4466{
4467	return ((vm_page_t) page->pageq.next);
4468}
4469
4470vm_object_offset_t
4471vm_page_get_offset(vm_page_t page)
4472{
4473	return (page->offset);
4474}
4475
4476ppnum_t
4477vm_page_get_phys_page(vm_page_t page)
4478{
4479	return (page->phys_page);
4480}
4481
4482
4483/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4484
4485#if HIBERNATION
4486
4487static vm_page_t hibernate_gobble_queue;
4488
4489extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4490
4491static int  hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4492static int  hibernate_flush_dirty_pages(void);
4493static int  hibernate_flush_queue(queue_head_t *, int);
4494
4495void hibernate_flush_wait(void);
4496void hibernate_mark_in_progress(void);
4497void hibernate_clear_in_progress(void);
4498
4499
4500struct hibernate_statistics {
4501	int hibernate_considered;
4502	int hibernate_reentered_on_q;
4503	int hibernate_found_dirty;
4504	int hibernate_skipped_cleaning;
4505	int hibernate_skipped_transient;
4506	int hibernate_skipped_precious;
4507	int hibernate_queue_nolock;
4508	int hibernate_queue_paused;
4509	int hibernate_throttled;
4510	int hibernate_throttle_timeout;
4511	int hibernate_drained;
4512	int hibernate_drain_timeout;
4513	int cd_lock_failed;
4514	int cd_found_precious;
4515	int cd_found_wired;
4516	int cd_found_busy;
4517	int cd_found_unusual;
4518	int cd_found_cleaning;
4519	int cd_found_laundry;
4520	int cd_found_dirty;
4521	int cd_local_free;
4522	int cd_total_free;
4523	int cd_vm_page_wire_count;
4524	int cd_pages;
4525	int cd_discarded;
4526	int cd_count_wire;
4527} hibernate_stats;
4528
4529
4530
4531static int
4532hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4533{
4534	wait_result_t	wait_result;
4535
4536	vm_page_lock_queues();
4537
4538	while (q->pgo_laundry) {
4539
4540		q->pgo_draining = TRUE;
4541
4542		assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4543
4544		vm_page_unlock_queues();
4545
4546		wait_result = thread_block(THREAD_CONTINUE_NULL);
4547
4548		if (wait_result == THREAD_TIMED_OUT) {
4549			hibernate_stats.hibernate_drain_timeout++;
4550			return (1);
4551		}
4552		vm_page_lock_queues();
4553
4554		hibernate_stats.hibernate_drained++;
4555	}
4556	vm_page_unlock_queues();
4557
4558	return (0);
4559}
4560
4561
4562static int
4563hibernate_flush_queue(queue_head_t *q, int qcount)
4564{
4565	vm_page_t	m;
4566	vm_object_t	l_object = NULL;
4567	vm_object_t	m_object = NULL;
4568	int		refmod_state = 0;
4569	int		try_failed_count = 0;
4570	int		retval = 0;
4571	int		current_run = 0;
4572	struct	vm_pageout_queue *iq;
4573	struct	vm_pageout_queue *eq;
4574	struct	vm_pageout_queue *tq;
4575
4576	hibernate_cleaning_in_progress = TRUE;
4577
4578	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4579
4580	iq = &vm_pageout_queue_internal;
4581	eq = &vm_pageout_queue_external;
4582
4583	vm_page_lock_queues();
4584
4585	while (qcount && !queue_empty(q)) {
4586
4587		if (current_run++ == 1000) {
4588			if (hibernate_should_abort()) {
4589				retval = 1;
4590				break;
4591			}
4592			current_run = 0;
4593		}
4594
4595		m = (vm_page_t) queue_first(q);
4596		m_object = m->object;
4597
4598		/*
4599		 * check to see if we currently are working
4600		 * with the same object... if so, we've
4601		 * already got the lock
4602		 */
4603		if (m_object != l_object) {
4604		        /*
4605			 * the object associated with candidate page is
4606			 * different from the one we were just working
4607			 * with... dump the lock if we still own it
4608			 */
4609		        if (l_object != NULL) {
4610			        vm_object_unlock(l_object);
4611				l_object = NULL;
4612			}
4613			/*
4614			 * Try to lock object; since we've alread got the
4615			 * page queues lock, we can only 'try' for this one.
4616			 * if the 'try' fails, we need to do a mutex_pause
4617			 * to allow the owner of the object lock a chance to
4618			 * run...
4619			 */
4620			if ( !vm_object_lock_try_scan(m_object)) {
4621
4622				if (try_failed_count > 20) {
4623					hibernate_stats.hibernate_queue_nolock++;
4624
4625					goto reenter_pg_on_q;
4626				}
4627				vm_pageout_scan_wants_object = m_object;
4628
4629				vm_page_unlock_queues();
4630				mutex_pause(try_failed_count++);
4631				vm_page_lock_queues();
4632
4633				hibernate_stats.hibernate_queue_paused++;
4634				continue;
4635			} else {
4636				l_object = m_object;
4637				vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4638			}
4639		}
4640		if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4641			/*
4642			 * page is not to be cleaned
4643			 * put it back on the head of its queue
4644			 */
4645			if (m->cleaning)
4646				hibernate_stats.hibernate_skipped_cleaning++;
4647			else
4648				hibernate_stats.hibernate_skipped_transient++;
4649
4650			goto reenter_pg_on_q;
4651		}
4652		if ( !m_object->pager_initialized && m_object->pager_created)
4653			goto reenter_pg_on_q;
4654
4655		if (m_object->copy == VM_OBJECT_NULL) {
4656			if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4657				/*
4658				 * let the normal hibernate image path
4659				 * deal with these
4660				 */
4661				goto reenter_pg_on_q;
4662			}
4663		}
4664		if ( !m->dirty && m->pmapped) {
4665		        refmod_state = pmap_get_refmod(m->phys_page);
4666
4667			if ((refmod_state & VM_MEM_MODIFIED)) {
4668				SET_PAGE_DIRTY(m, FALSE);
4669			}
4670		} else
4671			refmod_state = 0;
4672
4673		if ( !m->dirty) {
4674			/*
4675			 * page is not to be cleaned
4676			 * put it back on the head of its queue
4677			 */
4678			if (m->precious)
4679				hibernate_stats.hibernate_skipped_precious++;
4680
4681			goto reenter_pg_on_q;
4682		}
4683		tq = NULL;
4684
4685		if (m_object->internal) {
4686			if (VM_PAGE_Q_THROTTLED(iq))
4687				tq = iq;
4688		} else if (VM_PAGE_Q_THROTTLED(eq))
4689			tq = eq;
4690
4691		if (tq != NULL) {
4692			wait_result_t	wait_result;
4693			int		wait_count = 5;
4694
4695		        if (l_object != NULL) {
4696			        vm_object_unlock(l_object);
4697				l_object = NULL;
4698			}
4699			vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4700
4701			tq->pgo_throttled = TRUE;
4702
4703			while (retval == 0) {
4704
4705				assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4706
4707				vm_page_unlock_queues();
4708
4709				wait_result = thread_block(THREAD_CONTINUE_NULL);
4710
4711				vm_page_lock_queues();
4712
4713				if (hibernate_should_abort())
4714					retval = 1;
4715
4716				if (wait_result != THREAD_TIMED_OUT)
4717					break;
4718
4719				if (--wait_count == 0) {
4720					hibernate_stats.hibernate_throttle_timeout++;
4721					retval = 1;
4722				}
4723			}
4724			if (retval)
4725				break;
4726
4727			hibernate_stats.hibernate_throttled++;
4728
4729			continue;
4730		}
4731		/*
4732		 * we've already factored out pages in the laundry which
4733		 * means this page can't be on the pageout queue so it's
4734		 * safe to do the VM_PAGE_QUEUES_REMOVE
4735		 */
4736                assert(!m->pageout_queue);
4737
4738		VM_PAGE_QUEUES_REMOVE(m);
4739
4740		vm_pageout_cluster(m, FALSE);
4741
4742		hibernate_stats.hibernate_found_dirty++;
4743
4744		goto next_pg;
4745
4746reenter_pg_on_q:
4747		queue_remove(q, m, vm_page_t, pageq);
4748		queue_enter(q, m, vm_page_t, pageq);
4749
4750		hibernate_stats.hibernate_reentered_on_q++;
4751next_pg:
4752		hibernate_stats.hibernate_considered++;
4753
4754		qcount--;
4755		try_failed_count = 0;
4756	}
4757	if (l_object != NULL) {
4758		vm_object_unlock(l_object);
4759		l_object = NULL;
4760	}
4761	vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4762
4763	vm_page_unlock_queues();
4764
4765	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4766
4767	hibernate_cleaning_in_progress = FALSE;
4768
4769	return (retval);
4770}
4771
4772
4773static int
4774hibernate_flush_dirty_pages()
4775{
4776	struct vm_speculative_age_q	*aq;
4777	uint32_t	i;
4778
4779	bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4780
4781	if (vm_page_local_q) {
4782		for (i = 0; i < vm_page_local_q_count; i++)
4783			vm_page_reactivate_local(i, TRUE, FALSE);
4784	}
4785
4786	for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4787		int		qcount;
4788		vm_page_t	m;
4789
4790		aq = &vm_page_queue_speculative[i];
4791
4792		if (queue_empty(&aq->age_q))
4793			continue;
4794		qcount = 0;
4795
4796		vm_page_lockspin_queues();
4797
4798		queue_iterate(&aq->age_q,
4799			      m,
4800			      vm_page_t,
4801			      pageq)
4802		{
4803			qcount++;
4804		}
4805		vm_page_unlock_queues();
4806
4807		if (qcount) {
4808			if (hibernate_flush_queue(&aq->age_q, qcount))
4809				return (1);
4810		}
4811	}
4812	if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4813		return (1);
4814	if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
4815		return (1);
4816	if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
4817		return (1);
4818	if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
4819		return (1);
4820
4821	if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4822		return (1);
4823	return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4824}
4825
4826
4827extern void IOSleep(unsigned int);
4828extern int sync_internal(void);
4829
4830int
4831hibernate_flush_memory()
4832{
4833	int	retval;
4834
4835	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4836
4837	IOSleep(2 * 1000);
4838
4839	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4840
4841	if ((retval = hibernate_flush_dirty_pages()) == 0) {
4842		if (consider_buffer_cache_collect != NULL) {
4843
4844			KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4845
4846			sync_internal();
4847			(void)(*consider_buffer_cache_collect)(1);
4848			consider_zone_gc(TRUE);
4849
4850			KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4851		}
4852	}
4853	KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4854
4855    HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4856                hibernate_stats.hibernate_considered,
4857                hibernate_stats.hibernate_reentered_on_q,
4858                hibernate_stats.hibernate_found_dirty);
4859    HIBPRINT("   skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4860                hibernate_stats.hibernate_skipped_cleaning,
4861                hibernate_stats.hibernate_skipped_transient,
4862                hibernate_stats.hibernate_skipped_precious,
4863                hibernate_stats.hibernate_queue_nolock);
4864    HIBPRINT("   queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4865                hibernate_stats.hibernate_queue_paused,
4866                hibernate_stats.hibernate_throttled,
4867                hibernate_stats.hibernate_throttle_timeout,
4868                hibernate_stats.hibernate_drained,
4869                hibernate_stats.hibernate_drain_timeout);
4870
4871	return (retval);
4872}
4873
4874
4875static void
4876hibernate_page_list_zero(hibernate_page_list_t *list)
4877{
4878    uint32_t             bank;
4879    hibernate_bitmap_t * bitmap;
4880
4881    bitmap = &list->bank_bitmap[0];
4882    for (bank = 0; bank < list->bank_count; bank++)
4883    {
4884        uint32_t last_bit;
4885
4886	bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4887        // set out-of-bound bits at end of bitmap.
4888        last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4889	if (last_bit)
4890	    bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4891
4892	bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4893    }
4894}
4895
4896void
4897hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4898{
4899    uint32_t i;
4900    vm_page_t m;
4901    uint64_t start, end, timeout, nsec;
4902    clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4903    clock_get_uptime(&start);
4904
4905    for (i = 0; i < gobble_count; i++)
4906    {
4907	while (VM_PAGE_NULL == (m = vm_page_grab()))
4908	{
4909	    clock_get_uptime(&end);
4910	    if (end >= timeout)
4911		break;
4912	    VM_PAGE_WAIT();
4913	}
4914	if (!m)
4915	    break;
4916	m->busy = FALSE;
4917	vm_page_gobble(m);
4918
4919	m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4920	hibernate_gobble_queue = m;
4921    }
4922
4923    clock_get_uptime(&end);
4924    absolutetime_to_nanoseconds(end - start, &nsec);
4925    HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4926}
4927
4928void
4929hibernate_free_gobble_pages(void)
4930{
4931    vm_page_t m, next;
4932    uint32_t  count = 0;
4933
4934    m = (vm_page_t) hibernate_gobble_queue;
4935    while(m)
4936    {
4937        next = (vm_page_t) m->pageq.next;
4938        vm_page_free(m);
4939        count++;
4940        m = next;
4941    }
4942    hibernate_gobble_queue = VM_PAGE_NULL;
4943
4944    if (count)
4945        HIBLOG("Freed %d pages\n", count);
4946}
4947
4948static boolean_t
4949hibernate_consider_discard(vm_page_t m, boolean_t preflight)
4950{
4951    vm_object_t object = NULL;
4952    int                  refmod_state;
4953    boolean_t            discard = FALSE;
4954
4955    do
4956    {
4957        if (m->private)
4958            panic("hibernate_consider_discard: private");
4959
4960        if (!vm_object_lock_try(m->object)) {
4961	    if (!preflight) hibernate_stats.cd_lock_failed++;
4962            break;
4963	}
4964        object = m->object;
4965
4966	if (VM_PAGE_WIRED(m)) {
4967	    if (!preflight) hibernate_stats.cd_found_wired++;
4968            break;
4969	}
4970        if (m->precious) {
4971	    if (!preflight) hibernate_stats.cd_found_precious++;
4972            break;
4973	}
4974        if (m->busy || !object->alive) {
4975           /*
4976            *	Somebody is playing with this page.
4977            */
4978	    if (!preflight) hibernate_stats.cd_found_busy++;
4979            break;
4980	}
4981        if (m->absent || m->unusual || m->error) {
4982           /*
4983            * If it's unusual in anyway, ignore it
4984            */
4985	    if (!preflight) hibernate_stats.cd_found_unusual++;
4986            break;
4987	}
4988        if (m->cleaning) {
4989	    if (!preflight) hibernate_stats.cd_found_cleaning++;
4990            break;
4991	}
4992	if (m->laundry) {
4993	    if (!preflight) hibernate_stats.cd_found_laundry++;
4994            break;
4995	}
4996        if (!m->dirty)
4997        {
4998            refmod_state = pmap_get_refmod(m->phys_page);
4999
5000            if (refmod_state & VM_MEM_REFERENCED)
5001                m->reference = TRUE;
5002            if (refmod_state & VM_MEM_MODIFIED) {
5003              	SET_PAGE_DIRTY(m, FALSE);
5004	    }
5005        }
5006
5007        /*
5008         * If it's clean or purgeable we can discard the page on wakeup.
5009         */
5010        discard = (!m->dirty)
5011		    || (VM_PURGABLE_VOLATILE == object->purgable)
5012		    || (VM_PURGABLE_EMPTY    == object->purgable);
5013
5014	if (discard == FALSE) {
5015	    if (!preflight) hibernate_stats.cd_found_dirty++;
5016	}
5017    }
5018    while (FALSE);
5019
5020    if (object)
5021        vm_object_unlock(object);
5022
5023    return (discard);
5024}
5025
5026
5027static void
5028hibernate_discard_page(vm_page_t m)
5029{
5030    if (m->absent || m->unusual || m->error)
5031       /*
5032        * If it's unusual in anyway, ignore
5033        */
5034        return;
5035
5036#if DEBUG
5037    vm_object_t object = m->object;
5038    if (!vm_object_lock_try(m->object))
5039	panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5040#else
5041    /* No need to lock page queue for token delete, hibernate_vm_unlock()
5042       makes sure these locks are uncontended before sleep */
5043#endif	/* !DEBUG */
5044
5045    if (m->pmapped == TRUE)
5046    {
5047        __unused int refmod_state = pmap_disconnect(m->phys_page);
5048    }
5049
5050    if (m->laundry)
5051        panic("hibernate_discard_page(%p) laundry", m);
5052    if (m->private)
5053        panic("hibernate_discard_page(%p) private", m);
5054    if (m->fictitious)
5055        panic("hibernate_discard_page(%p) fictitious", m);
5056
5057    if (VM_PURGABLE_VOLATILE == m->object->purgable)
5058    {
5059	/* object should be on a queue */
5060        assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5061        purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5062        assert(old_queue);
5063        vm_purgeable_token_delete_first(old_queue);
5064        m->object->purgable = VM_PURGABLE_EMPTY;
5065    }
5066
5067    vm_page_free(m);
5068
5069#if DEBUG
5070    vm_object_unlock(object);
5071#endif	/* DEBUG */
5072}
5073
5074/*
5075 Grab locks for hibernate_page_list_setall()
5076*/
5077void
5078hibernate_vm_lock_queues(void)
5079{
5080    vm_page_lock_queues();
5081    lck_mtx_lock(&vm_page_queue_free_lock);
5082
5083    if (vm_page_local_q) {
5084	uint32_t  i;
5085	for (i = 0; i < vm_page_local_q_count; i++) {
5086	    struct vpl	*lq;
5087	    lq = &vm_page_local_q[i].vpl_un.vpl;
5088	    VPL_LOCK(&lq->vpl_lock);
5089	}
5090    }
5091}
5092
5093void
5094hibernate_vm_unlock_queues(void)
5095{
5096    if (vm_page_local_q) {
5097	uint32_t  i;
5098	for (i = 0; i < vm_page_local_q_count; i++) {
5099	    struct vpl	*lq;
5100	    lq = &vm_page_local_q[i].vpl_un.vpl;
5101	    VPL_UNLOCK(&lq->vpl_lock);
5102	}
5103    }
5104    lck_mtx_unlock(&vm_page_queue_free_lock);
5105    vm_page_unlock_queues();
5106}
5107
5108/*
5109 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5110 pages known to VM to not need saving are subtracted.
5111 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5112*/
5113
5114void
5115hibernate_page_list_setall(hibernate_page_list_t * page_list,
5116			   hibernate_page_list_t * page_list_wired,
5117			   hibernate_page_list_t * page_list_pal,
5118			   boolean_t preflight,
5119			   uint32_t * pagesOut)
5120{
5121    uint64_t start, end, nsec;
5122    vm_page_t m;
5123    uint32_t pages = page_list->page_count;
5124    uint32_t count_zf = 0, count_throttled = 0;
5125    uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5126    uint32_t count_wire = pages;
5127    uint32_t count_discard_active    = 0;
5128    uint32_t count_discard_inactive  = 0;
5129    uint32_t count_discard_cleaned   = 0;
5130    uint32_t count_discard_purgeable = 0;
5131    uint32_t count_discard_speculative = 0;
5132    uint32_t i;
5133    uint32_t             bank;
5134    hibernate_bitmap_t * bitmap;
5135    hibernate_bitmap_t * bitmap_wired;
5136
5137    HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5138
5139    if (preflight) {
5140        page_list       = NULL;
5141        page_list_wired = NULL;
5142        page_list_pal   = NULL;
5143    }
5144
5145#if DEBUG
5146        vm_page_lock_queues();
5147	if (vm_page_local_q) {
5148	    for (i = 0; i < vm_page_local_q_count; i++) {
5149		struct vpl	*lq;
5150		lq = &vm_page_local_q[i].vpl_un.vpl;
5151		VPL_LOCK(&lq->vpl_lock);
5152	    }
5153	}
5154#endif	/* DEBUG */
5155
5156
5157    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5158
5159    clock_get_uptime(&start);
5160
5161    if (!preflight) {
5162	hibernate_page_list_zero(page_list);
5163	hibernate_page_list_zero(page_list_wired);
5164	hibernate_page_list_zero(page_list_pal);
5165
5166	hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5167	hibernate_stats.cd_pages = pages;
5168    }
5169
5170    if (vm_page_local_q) {
5171	    for (i = 0; i < vm_page_local_q_count; i++)
5172		    vm_page_reactivate_local(i, TRUE, !preflight);
5173    }
5174
5175    if (preflight) {
5176	vm_page_lock_queues();
5177	lck_mtx_lock(&vm_page_queue_free_lock);
5178    }
5179
5180    m = (vm_page_t) hibernate_gobble_queue;
5181    while(m)
5182    {
5183	pages--;
5184	count_wire--;
5185	if (!preflight) {
5186	    hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5187	    hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5188	}
5189	m = (vm_page_t) m->pageq.next;
5190    }
5191
5192    if (!preflight) for( i = 0; i < real_ncpus; i++ )
5193    {
5194	if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5195	{
5196	    for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5197	    {
5198		pages--;
5199		count_wire--;
5200		hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5201		hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5202
5203		hibernate_stats.cd_local_free++;
5204		hibernate_stats.cd_total_free++;
5205	    }
5206	}
5207    }
5208
5209    for( i = 0; i < vm_colors; i++ )
5210    {
5211	queue_iterate(&vm_page_queue_free[i],
5212		      m,
5213		      vm_page_t,
5214		      pageq)
5215	{
5216	    pages--;
5217	    count_wire--;
5218	    if (!preflight) {
5219		hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5220		hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5221
5222		hibernate_stats.cd_total_free++;
5223	    }
5224	}
5225    }
5226
5227    queue_iterate(&vm_lopage_queue_free,
5228		  m,
5229		  vm_page_t,
5230		  pageq)
5231    {
5232	pages--;
5233	count_wire--;
5234	if (!preflight) {
5235	    hibernate_page_bitset(page_list,       TRUE, m->phys_page);
5236	    hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5237
5238	    hibernate_stats.cd_total_free++;
5239	}
5240    }
5241
5242    queue_iterate( &vm_page_queue_throttled,
5243                    m,
5244                    vm_page_t,
5245                    pageq )
5246    {
5247        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5248         && hibernate_consider_discard(m, preflight))
5249        {
5250            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5251            count_discard_inactive++;
5252        }
5253        else
5254            count_throttled++;
5255	count_wire--;
5256	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5257    }
5258
5259    queue_iterate( &vm_page_queue_anonymous,
5260                    m,
5261                    vm_page_t,
5262                   pageq )
5263    {
5264        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5265         && hibernate_consider_discard(m, preflight))
5266        {
5267            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5268	    if (m->dirty)
5269		count_discard_purgeable++;
5270	    else
5271		count_discard_inactive++;
5272        }
5273        else
5274            count_zf++;
5275	count_wire--;
5276	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5277    }
5278
5279    queue_iterate( &vm_page_queue_inactive,
5280                    m,
5281                    vm_page_t,
5282                    pageq )
5283    {
5284        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5285         && hibernate_consider_discard(m, preflight))
5286        {
5287            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5288	    if (m->dirty)
5289		count_discard_purgeable++;
5290	    else
5291		count_discard_inactive++;
5292        }
5293        else
5294            count_inactive++;
5295	count_wire--;
5296	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5297    }
5298
5299    queue_iterate( &vm_page_queue_cleaned,
5300                    m,
5301                    vm_page_t,
5302                    pageq )
5303    {
5304        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5305         && hibernate_consider_discard(m, preflight))
5306        {
5307            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5308	    if (m->dirty)
5309		count_discard_purgeable++;
5310	    else
5311		count_discard_cleaned++;
5312        }
5313        else
5314            count_cleaned++;
5315	count_wire--;
5316	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5317    }
5318
5319    for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5320    {
5321       queue_iterate(&vm_page_queue_speculative[i].age_q,
5322                     m,
5323                     vm_page_t,
5324                     pageq)
5325       {
5326           if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5327            && hibernate_consider_discard(m, preflight))
5328           {
5329               if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5330               count_discard_speculative++;
5331           }
5332           else
5333               count_speculative++;
5334           count_wire--;
5335           if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5336       }
5337    }
5338
5339    queue_iterate( &vm_page_queue_active,
5340                    m,
5341                    vm_page_t,
5342                    pageq )
5343    {
5344        if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5345         && hibernate_consider_discard(m, preflight))
5346        {
5347            if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5348	    if (m->dirty)
5349		count_discard_purgeable++;
5350	    else
5351		count_discard_active++;
5352        }
5353        else
5354            count_active++;
5355	count_wire--;
5356	if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5357    }
5358
5359    if (!preflight) {
5360	// pull wired from hibernate_bitmap
5361	bitmap = &page_list->bank_bitmap[0];
5362	bitmap_wired = &page_list_wired->bank_bitmap[0];
5363	for (bank = 0; bank < page_list->bank_count; bank++)
5364	{
5365	    for (i = 0; i < bitmap->bitmapwords; i++)
5366		bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5367	    bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
5368	    bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5369	}
5370    }
5371
5372    // machine dependent adjustments
5373    hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5374
5375    if (!preflight) {
5376	hibernate_stats.cd_count_wire = count_wire;
5377	hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
5378    }
5379
5380    clock_get_uptime(&end);
5381    absolutetime_to_nanoseconds(end - start, &nsec);
5382    HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5383
5384    HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5385	        pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
5386	        count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5387
5388    *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5389
5390#if DEBUG
5391	if (vm_page_local_q) {
5392	    for (i = 0; i < vm_page_local_q_count; i++) {
5393		struct vpl	*lq;
5394		lq = &vm_page_local_q[i].vpl_un.vpl;
5395		VPL_UNLOCK(&lq->vpl_lock);
5396	    }
5397	}
5398        vm_page_unlock_queues();
5399#endif	/* DEBUG */
5400
5401    if (preflight) {
5402	lck_mtx_unlock(&vm_page_queue_free_lock);
5403	vm_page_unlock_queues();
5404    }
5405
5406    KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5407}
5408
5409void
5410hibernate_page_list_discard(hibernate_page_list_t * page_list)
5411{
5412    uint64_t  start, end, nsec;
5413    vm_page_t m;
5414    vm_page_t next;
5415    uint32_t  i;
5416    uint32_t  count_discard_active    = 0;
5417    uint32_t  count_discard_inactive  = 0;
5418    uint32_t  count_discard_purgeable = 0;
5419    uint32_t  count_discard_cleaned   = 0;
5420    uint32_t  count_discard_speculative = 0;
5421
5422#if DEBUG
5423        vm_page_lock_queues();
5424	if (vm_page_local_q) {
5425	    for (i = 0; i < vm_page_local_q_count; i++) {
5426		struct vpl	*lq;
5427		lq = &vm_page_local_q[i].vpl_un.vpl;
5428		VPL_LOCK(&lq->vpl_lock);
5429	    }
5430	}
5431#endif	/* DEBUG */
5432
5433    clock_get_uptime(&start);
5434
5435    m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5436    while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5437    {
5438        next = (vm_page_t) m->pageq.next;
5439        if (hibernate_page_bittst(page_list, m->phys_page))
5440        {
5441	    if (m->dirty)
5442		count_discard_purgeable++;
5443	    else
5444		count_discard_inactive++;
5445            hibernate_discard_page(m);
5446        }
5447        m = next;
5448    }
5449
5450    for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5451    {
5452       m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5453       while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5454       {
5455           next = (vm_page_t) m->pageq.next;
5456           if (hibernate_page_bittst(page_list, m->phys_page))
5457           {
5458               count_discard_speculative++;
5459               hibernate_discard_page(m);
5460           }
5461           m = next;
5462       }
5463    }
5464
5465    m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5466    while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5467    {
5468        next = (vm_page_t) m->pageq.next;
5469        if (hibernate_page_bittst(page_list, m->phys_page))
5470        {
5471	    if (m->dirty)
5472		count_discard_purgeable++;
5473	    else
5474		count_discard_inactive++;
5475            hibernate_discard_page(m);
5476        }
5477        m = next;
5478    }
5479
5480    m = (vm_page_t) queue_first(&vm_page_queue_active);
5481    while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5482    {
5483        next = (vm_page_t) m->pageq.next;
5484        if (hibernate_page_bittst(page_list, m->phys_page))
5485        {
5486	    if (m->dirty)
5487		count_discard_purgeable++;
5488	    else
5489		count_discard_active++;
5490            hibernate_discard_page(m);
5491        }
5492        m = next;
5493    }
5494
5495    m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5496    while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5497    {
5498        next = (vm_page_t) m->pageq.next;
5499        if (hibernate_page_bittst(page_list, m->phys_page))
5500        {
5501	    if (m->dirty)
5502		count_discard_purgeable++;
5503	    else
5504		count_discard_cleaned++;
5505            hibernate_discard_page(m);
5506        }
5507        m = next;
5508    }
5509
5510#if DEBUG
5511	if (vm_page_local_q) {
5512	    for (i = 0; i < vm_page_local_q_count; i++) {
5513		struct vpl	*lq;
5514		lq = &vm_page_local_q[i].vpl_un.vpl;
5515		VPL_UNLOCK(&lq->vpl_lock);
5516	    }
5517	}
5518        vm_page_unlock_queues();
5519#endif	/* DEBUG */
5520
5521    clock_get_uptime(&end);
5522    absolutetime_to_nanoseconds(end - start, &nsec);
5523    HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5524                nsec / 1000000ULL,
5525	        count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5526}
5527
5528#endif /* HIBERNATION */
5529
5530/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5531
5532#include <mach_vm_debug.h>
5533#if	MACH_VM_DEBUG
5534
5535#include <mach_debug/hash_info.h>
5536#include <vm/vm_debug.h>
5537
5538/*
5539 *	Routine:	vm_page_info
5540 *	Purpose:
5541 *		Return information about the global VP table.
5542 *		Fills the buffer with as much information as possible
5543 *		and returns the desired size of the buffer.
5544 *	Conditions:
5545 *		Nothing locked.  The caller should provide
5546 *		possibly-pageable memory.
5547 */
5548
5549unsigned int
5550vm_page_info(
5551	hash_info_bucket_t *info,
5552	unsigned int count)
5553{
5554	unsigned int i;
5555	lck_spin_t	*bucket_lock;
5556
5557	if (vm_page_bucket_count < count)
5558		count = vm_page_bucket_count;
5559
5560	for (i = 0; i < count; i++) {
5561		vm_page_bucket_t *bucket = &vm_page_buckets[i];
5562		unsigned int bucket_count = 0;
5563		vm_page_t m;
5564
5565		bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5566		lck_spin_lock(bucket_lock);
5567
5568		for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5569			bucket_count++;
5570
5571		lck_spin_unlock(bucket_lock);
5572
5573		/* don't touch pageable memory while holding locks */
5574		info[i].hib_count = bucket_count;
5575	}
5576
5577	return vm_page_bucket_count;
5578}
5579#endif	/* MACH_VM_DEBUG */
5580